go-zero/core/filex/lookup.go
2020-07-26 17:09:05 +08:00

106 lines
1.8 KiB
Go

package filex
import (
"io"
"os"
)
type OffsetRange struct {
File string
Start int64
Stop int64
}
func SplitLineChunks(filename string, chunks int) ([]OffsetRange, error) {
info, err := os.Stat(filename)
if err != nil {
return nil, err
}
if chunks <= 1 {
return []OffsetRange{
{
File: filename,
Start: 0,
Stop: info.Size(),
},
}, nil
}
file, err := os.Open(filename)
if err != nil {
return nil, err
}
defer file.Close()
var ranges []OffsetRange
var offset int64
// avoid the last chunk too few bytes
preferSize := info.Size()/int64(chunks) + 1
for {
if offset+preferSize >= info.Size() {
ranges = append(ranges, OffsetRange{
File: filename,
Start: offset,
Stop: info.Size(),
})
break
}
offsetRange, err := nextRange(file, offset, offset+preferSize)
if err != nil {
return nil, err
}
ranges = append(ranges, offsetRange)
if offsetRange.Stop < info.Size() {
offset = offsetRange.Stop
} else {
break
}
}
return ranges, nil
}
func nextRange(file *os.File, start, stop int64) (OffsetRange, error) {
offset, err := skipPartialLine(file, stop)
if err != nil {
return OffsetRange{}, err
}
return OffsetRange{
File: file.Name(),
Start: start,
Stop: offset,
}, nil
}
func skipPartialLine(file *os.File, offset int64) (int64, error) {
for {
skipBuf := make([]byte, bufSize)
n, err := file.ReadAt(skipBuf, offset)
if err != nil && err != io.EOF {
return 0, err
}
if n == 0 {
return 0, io.EOF
}
for i := 0; i < n; i++ {
if skipBuf[i] != '\r' && skipBuf[i] != '\n' {
offset++
} else {
for ; i < n; i++ {
if skipBuf[i] == '\r' || skipBuf[i] == '\n' {
offset++
} else {
return offset, nil
}
}
return offset, nil
}
}
}
}