diff --git a/README.md b/README.md index 5ed2b58..4474fec 100644 --- a/README.md +++ b/README.md @@ -4,17 +4,17 @@ Simple key-value store for single-user applications. ## Pros -* Simple one file structure +* Simple two file structure (data file and index file) * Internal Zstandard compression by [klauspost/compress/zstd](https://github.com/klauspost/compress/tree/master/zstd) * Threadsafe operations through `sync.RWMutex` ## Cons * Index stored in memory (`map[key hash (28 bytes)]file offset (int64)`) -* Need to read the whole file on store open to create file index (you can use index file options to avoid this) +* No transaction system +* Index file is fully rewrited on every store commit * No way to recover disk space from deleted records * Write/Delete operations block Read and each other operations -* Need to decode whole file until stored value ## Usage @@ -63,9 +63,6 @@ type Options struct { // Disk write buffer size in bytes DiskBufferSize int - - // Use index file - UseIndexFile bool } ``` @@ -87,6 +84,17 @@ File is log stuctured list of commands: | Length | Record body bytes length | int64 | | Body | Gob-encoded record | variable | +Index file is simple gob-encoded map: + +```go +map[string]struct { + BlockOffset int64 + RecordOffset int64 +} +``` + +where map key is data key hash and value - data offset in data file. + ## Resource consumption Store requirements: @@ -97,4 +105,4 @@ Store requirements: ## TODO - [ ] Add recovery previous state of store file on write error -- [ ] Add fast file seek to value (add compressed block start position) +- [ ] Add method for index rebuild diff --git a/TestBufferRead.zkv.idx b/TestBufferRead.zkv.idx deleted file mode 100644 index ced4e78..0000000 Binary files a/TestBufferRead.zkv.idx and /dev/null differ diff --git a/TestDeleteBasic.zkv.idx b/TestDeleteBasic.zkv.idx new file mode 100644 index 0000000..b341db7 Binary files /dev/null and b/TestDeleteBasic.zkv.idx differ diff --git a/TestSmallWrites.zkv.idx b/TestSmallWrites.zkv.idx index 15a6d38..6de70da 100644 Binary files a/TestSmallWrites.zkv.idx and b/TestSmallWrites.zkv.idx differ diff --git a/go.mod b/go.mod index 392f6ec..4dc95a4 100644 --- a/go.mod +++ b/go.mod @@ -3,8 +3,8 @@ module github.com/nxshock/zkv go 1.19 require ( - github.com/klauspost/compress v1.15.12 - github.com/stretchr/testify v1.8.1 + github.com/klauspost/compress v1.16.4 + github.com/stretchr/testify v1.8.2 ) require ( diff --git a/go.sum b/go.sum index 8609118..abc35b1 100644 --- a/go.sum +++ b/go.sum @@ -1,8 +1,8 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/klauspost/compress v1.15.12 h1:YClS/PImqYbn+UILDnqxQCZ3RehC9N318SU3kElDUEM= -github.com/klauspost/compress v1.15.12/go.mod h1:QPwzmACJjUTFsnSHH934V6woptycfrDDJnH7hvFVbGM= +github.com/klauspost/compress v1.16.4 h1:91KN02FnsOYhuunwU4ssRe8lc2JosWmizWa91B5v1PU= +github.com/klauspost/compress v1.16.4/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -10,8 +10,8 @@ github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSS github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= -github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= -github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ8= +github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/record_test.go b/record_test.go new file mode 100644 index 0000000..1e69354 --- /dev/null +++ b/record_test.go @@ -0,0 +1,35 @@ +package zkv + +import ( + "bytes" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestRecord(t *testing.T) { + buf := new(bytes.Buffer) + + var records []Record + + for i := 0; i < 10; i++ { + record, err := newRecord(RecordTypeSet, i, i) + assert.NoError(t, err) + + records = append(records, *record) + + b, err := record.Marshal() + assert.NoError(t, err) + + _, err = buf.Write(b) + assert.NoError(t, err) + } + + for i := 0; i < 10; i++ { + _, record, err := readRecord(buf) + assert.NoError(t, err) + + assert.Equal(t, record.KeyHash, records[i].KeyHash) + assert.Equal(t, record.ValueBytes, records[i].ValueBytes) + } +} diff --git a/testdata/TestReadBlock.zkv b/testdata/TestReadBlock.zkv new file mode 100644 index 0000000..7916c5b Binary files /dev/null and b/testdata/TestReadBlock.zkv differ diff --git a/utils.go b/utils.go index 97d63be..ae570b2 100644 --- a/utils.go +++ b/utils.go @@ -4,7 +4,9 @@ import ( "bytes" "crypto/sha256" "encoding/gob" + "errors" "io" + "os" ) func encode(value interface{}) ([]byte, error) { @@ -40,3 +42,13 @@ func skip(r io.Reader, count int64) (err error) { return err } + +func isFileExists(filePath string) (bool, error) { + if _, err := os.Stat(filePath); err == nil { + return true, nil + } else if errors.Is(err, os.ErrNotExist) { + return false, nil + } else { + return false, err + } +} diff --git a/zkv.go b/zkv.go index 1c4d543..8a2b7d6 100644 --- a/zkv.go +++ b/zkv.go @@ -37,7 +37,7 @@ type Store struct { func OpenWithOptions(filePath string, options Options) (*Store, error) { options.setDefaults() - database := &Store{ + store := &Store{ dataOffset: make(map[string]Offsets), bufferDataOffset: make(map[string]int64), buffer: new(bytes.Buffer), @@ -48,50 +48,30 @@ func OpenWithOptions(filePath string, options Options) (*Store, error) { if options.useIndexFile { idxFile, err := os.Open(filePath + indexFileExt) if err == nil { - err = gob.NewDecoder(idxFile).Decode(&database.dataOffset) - if err == nil { - return database, nil + err = gob.NewDecoder(idxFile).Decode(&store.dataOffset) + if err != nil { + return nil, err } + + return store, nil } } - // restore file data - readF, err := os.Open(filePath) - if os.IsNotExist(err) { - // Empty datebase - return database, nil - } else if err != nil { - return nil, fmt.Errorf("open file for indexing: %v", err) - } - defer readF.Close() - - decompressor, err := zstd.NewReader(readF) + exists, err := isFileExists(filePath) if err != nil { - return nil, fmt.Errorf("decompressor initialization: %v", err) - } - defer decompressor.Close() - - offset := int64(0) - for { - n, record, err := readRecord(decompressor) - if err == io.EOF { - break - } - if err != nil { - return nil, fmt.Errorf("read record error: %v", err) - } - - switch record.Type { - case RecordTypeSet: - database.dataOffset[string(record.KeyHash[:])] = Offsets{} // offset - case RecordTypeDelete: - delete(database.dataOffset, string(record.KeyHash[:])) - } - - offset += n + return nil, err } - return database, nil + if !exists { + return store, nil + } + + err = store.rebuildIndex() + if err != nil { + return nil, err + } + + return store, nil } func Open(filePath string) (*Store, error) { @@ -395,14 +375,7 @@ func (s *Store) flush() error { // Update index file only on data update if s.options.useIndexFile && l > 0 { - idxBuf := new(bytes.Buffer) - - err = gob.NewEncoder(idxBuf).Encode(s.dataOffset) - if err != nil { - return err - } - - err = os.WriteFile(s.filePath+indexFileExt, idxBuf.Bytes(), 0644) + err = s.saveIndex() if err != nil { return err } @@ -410,3 +383,126 @@ func (s *Store) flush() error { return nil } + +func readBlock(r *bufio.Reader) (line []byte, n int, err error) { + delim := []byte{0x28, 0xb5, 0x2f, 0xfd} + + line = make([]byte, len(delim)) + copy(line, delim) + + for { + s, err := r.ReadBytes(delim[len(delim)-1]) + line = append(line, []byte(s)...) + if err != nil { + if bytes.Equal(line, delim) { // contains only magic number + return []byte{}, 0, err + } else { + return line, len(s), err + } + } + + if bytes.Equal(line, append(delim, delim...)) { // first block + line = make([]byte, len(delim)) + copy(line, delim) + continue + } + + if bytes.HasSuffix(line, delim) { + return line[:len(line)-len(delim)], len(s), nil + } + } +} + +// RebuildIndex renews index from store file +func (s *Store) RebuildIndex() error { + s.mu.Lock() + defer s.mu.Unlock() + + err := s.rebuildIndex() + if err != nil { + return err + } + + if s.options.useIndexFile { + return s.saveIndex() + } + + return nil +} + +func (s *Store) rebuildIndex() error { + f, err := os.Open(s.filePath) + if err != nil { + return err + } + defer f.Close() + + r := bufio.NewReader(f) + + var blockOffset int64 + + s.dataOffset = make(map[string]Offsets) + + for { + l, n, err := readBlock(r) + if err != nil { + if err != io.EOF { + return err + } else if err == io.EOF && len(l) == 0 { + break + } + } + + dec, err := zstd.NewReader(bytes.NewReader(l)) + + var recordOffset int64 + for { + n, record, err := readRecord(dec) + if err != nil { + if err == io.EOF { + break + } else { + return err + } + } + + switch record.Type { + case RecordTypeSet: + s.dataOffset[string(record.KeyHash[:])] = Offsets{BlockOffset: blockOffset, RecordOffset: recordOffset} + case RecordTypeDelete: + delete(s.dataOffset, string(record.KeyHash[:])) + } + recordOffset += n + } + + blockOffset += int64(n) + } + + idxBuf := new(bytes.Buffer) + + err = gob.NewEncoder(idxBuf).Encode(s.dataOffset) + if err != nil { + return err + } + + err = os.WriteFile(s.filePath+indexFileExt, idxBuf.Bytes(), 0644) + if err != nil { + return err + } + + return nil +} + +func (s *Store) saveIndex() error { + f, err := os.OpenFile(s.filePath+indexFileExt, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644) + if err != nil { + return err + } + + err = gob.NewEncoder(f).Encode(s.dataOffset) + if err != nil { + return err + } + + return f.Close() +} diff --git a/zkv_test.go b/zkv_test.go index 4ec0d41..ec2d2de 100644 --- a/zkv_test.go +++ b/zkv_test.go @@ -1,40 +1,14 @@ package zkv import ( - "bytes" + "bufio" + "io" "os" "testing" "github.com/stretchr/testify/assert" ) -func TestRecord(t *testing.T) { - buf := new(bytes.Buffer) - - var records []Record - - for i := 0; i < 10; i++ { - record, err := newRecord(RecordTypeSet, i, i) - assert.NoError(t, err) - - records = append(records, *record) - - b, err := record.Marshal() - assert.NoError(t, err) - - _, err = buf.Write(b) - assert.NoError(t, err) - } - - for i := 0; i < 10; i++ { - _, record, err := readRecord(buf) - assert.NoError(t, err) - - assert.Equal(t, record.KeyHash, records[i].KeyHash) - assert.Equal(t, record.ValueBytes, records[i].ValueBytes) - } -} - func TestReadWriteBasic(t *testing.T) { const filePath = "TestReadWriteBasic.zkv" const recordCount = 100 @@ -383,3 +357,56 @@ func TestIndexFileBasic(t *testing.T) { err = db.Close() assert.NoError(t, err) } + +func TestReadBlock(t *testing.T) { + file, err := os.Open("testdata/TestReadBlock.zkv") + assert.NoError(t, err) + defer file.Close() + + r := bufio.NewReader(file) + + line, _, err := readBlock(r) + assert.Equal(t, []byte{0x28, 0xb5, 0x2f, 0xfd, 0x04, 0x00, 0x99, 0x04, 0x00, 0x8b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x39, 0xff, 0x81, 0x03, 0x01, 0x01, 0x06, 0x52, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x01, 0xff, 0x82, 0x00, 0x01, 0x03, 0x01, 0x04, 0x54, 0x79, 0x70, 0x65, 0x01, 0x06, 0x00, 0x01, 0x07, 0x4b, 0x65, 0x79, 0x48, 0x61, 0x73, 0x68, 0x01, 0xff, 0x84, 0x00, 0x01, 0x0a, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x42, 0x79, 0x74, 0x65, 0x73, 0x01, 0x0a, 0x00, 0x00, 0x00, 0x19, 0xff, 0x83, 0x01, 0x01, 0x01, 0x09, 0x5b, 0x32, 0x38, 0x5d, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x01, 0xff, 0x84, 0x00, 0x01, 0x06, 0x01, 0x38, 0x00, 0x00, 0x36, 0xff, 0x82, 0x01, 0x01, 0x01, 0x1c, 0xff, 0x90, 0xff, 0xf4, 0x25, 0x15, 0x70, 0x75, 0x5c, 0xff, 0xf4, 0xff, 0xbc, 0xff, 0xf9, 0xff, 0xde, 0xff, 0x93, 0xff, 0xf8, 0x0d, 0x0e, 0x78, 0x5b, 0xff, 0x81, 0xff, 0x95, 0x6e, 0xff, 0xab, 0x4b, 0xff, 0xe8, 0x37, 0xff, 0x97, 0x68, 0x41, 0x3d, 0x01, 0x04, 0x03, 0x04, 0x00, 0x02, 0x00, 0x25, 0xd5, 0x63, 0x21}, line) + line, _, err = readBlock(r) + assert.Equal(t, []byte{0x28, 0xb5, 0x2f, 0xfd, 0x04, 0x00, 0x89, 0x04, 0x00, 0x89, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x39, 0xff, 0x81, 0x03, 0x01, 0x01, 0x06, 0x52, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x01, 0xff, 0x82, 0x00, 0x01, 0x03, 0x01, 0x04, 0x54, 0x79, 0x70, 0x65, 0x01, 0x06, 0x00, 0x01, 0x07, 0x4b, 0x65, 0x79, 0x48, 0x61, 0x73, 0x68, 0x01, 0xff, 0x84, 0x00, 0x01, 0x0a, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x42, 0x79, 0x74, 0x65, 0x73, 0x01, 0x0a, 0x00, 0x00, 0x00, 0x19, 0xff, 0x83, 0x01, 0x01, 0x01, 0x09, 0x5b, 0x32, 0x38, 0x5d, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x01, 0xff, 0x84, 0x00, 0x01, 0x06, 0x01, 0x38, 0x00, 0x00, 0x34, 0xff, 0x82, 0x01, 0x01, 0x01, 0x1c, 0xff, 0x84, 0xff, 0x84, 0xff, 0xc1, 0x21, 0x02, 0xff, 0x8b, 0xff, 0xd7, 0x6d, 0xff, 0xd0, 0xff, 0xad, 0x1a, 0x55, 0x14, 0x5c, 0xff, 0xb1, 0x04, 0x37, 0x29, 0x2f, 0x78, 0x18, 0xff, 0xb5, 0xff, 0xe4, 0x56, 0x4e, 0xff, 0x8d, 0x19, 0x46, 0x01, 0x04, 0x03, 0x04, 0x00, 0x04, 0x00, 0x0c, 0x3b, 0xbf, 0x39}, line) + line, _, err = readBlock(r) + assert.Equal(t, []byte{0x28, 0xb5, 0x2f, 0xfd, 0x04, 0x00, 0x99, 0x04, 0x00, 0x8b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x39, 0xff, 0x81, 0x03, 0x01, 0x01, 0x06, 0x52, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x01, 0xff, 0x82, 0x00, 0x01, 0x03, 0x01, 0x04, 0x54, 0x79, 0x70, 0x65, 0x01, 0x06, 0x00, 0x01, 0x07, 0x4b, 0x65, 0x79, 0x48, 0x61, 0x73, 0x68, 0x01, 0xff, 0x84, 0x00, 0x01, 0x0a, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x42, 0x79, 0x74, 0x65, 0x73, 0x01, 0x0a, 0x00, 0x00, 0x00, 0x19, 0xff, 0x83, 0x01, 0x01, 0x01, 0x09, 0x5b, 0x32, 0x38, 0x5d, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x01, 0xff, 0x84, 0x00, 0x01, 0x06, 0x01, 0x38, 0x00, 0x00, 0x36, 0xff, 0x82, 0x01, 0x01, 0x01, 0x1c, 0x25, 0x79, 0x3e, 0x46, 0x4e, 0xff, 0xac, 0x06, 0x27, 0xff, 0xb1, 0xff, 0xa3, 0xff, 0xaa, 0xff, 0xe3, 0xff, 0xde, 0x37, 0x71, 0x63, 0x72, 0xff, 0x89, 0x0d, 0xff, 0x85, 0x39, 0xff, 0xb5, 0xff, 0xb9, 0xff, 0x8a, 0xff, 0x9e, 0x60, 0xff, 0xad, 0x17, 0x01, 0x04, 0x03, 0x04, 0x00, 0x06, 0x00, 0x52, 0x08, 0x3e, 0x26}, line) + line, _, err = readBlock(r) + assert.Equal(t, []byte{0x28, 0xb5, 0x2f, 0xfd, 0x04, 0x00, 0xc9, 0x04, 0x00, 0x91, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x39, 0xff, 0x81, 0x03, 0x01, 0x01, 0x06, 0x52, 0x65, 0x63, 0x6f, 0x72, 0x64, 0x01, 0xff, 0x82, 0x00, 0x01, 0x03, 0x01, 0x04, 0x54, 0x79, 0x70, 0x65, 0x01, 0x06, 0x00, 0x01, 0x07, 0x4b, 0x65, 0x79, 0x48, 0x61, 0x73, 0x68, 0x01, 0xff, 0x84, 0x00, 0x01, 0x0a, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x42, 0x79, 0x74, 0x65, 0x73, 0x01, 0x0a, 0x00, 0x00, 0x00, 0x19, 0xff, 0x83, 0x01, 0x01, 0x01, 0x09, 0x5b, 0x32, 0x38, 0x5d, 0x75, 0x69, 0x6e, 0x74, 0x38, 0x01, 0xff, 0x84, 0x00, 0x01, 0x06, 0x01, 0x38, 0x00, 0x00, 0x3c, 0xff, 0x82, 0x01, 0x01, 0x01, 0x1c, 0xff, 0xbf, 0x25, 0xff, 0xef, 0xff, 0xc8, 0xff, 0x85, 0x2c, 0xff, 0xbf, 0xff, 0xb5, 0xff, 0xad, 0xff, 0xfa, 0xff, 0xaf, 0x1c, 0xff, 0xe7, 0x71, 0xff, 0xfa, 0x36, 0xff, 0x95, 0x1b, 0xff, 0x91, 0xff, 0xab, 0x36, 0xff, 0xcd, 0x7a, 0x33, 0xff, 0xf7, 0xff, 0xec, 0xff, 0xee, 0xff, 0xc1, 0x01, 0x04, 0x03, 0x04, 0x00, 0x08, 0x00, 0xa5, 0x0e, 0x62, 0x53}, line) + + line, _, err = readBlock(r) + assert.Equal(t, line, []byte{}) + assert.Equal(t, io.EOF, err) +} + +func TestRebuildIndex(t *testing.T) { + const filePath = "TestRebuiltIndex.zkv" + const recordCount = 4 + defer os.Remove(filePath) + defer os.Remove(filePath + indexFileExt) + + for i := 1; i <= recordCount; i++ { + db, err := Open(filePath) + assert.NoError(t, err) + + err = db.Set(i, i) + assert.NoError(t, err) + + err = db.Close() + assert.NoError(t, err) + } + + db, err := Open(filePath) + assert.NoError(t, err) + + err = db.RebuildIndex() + assert.NoError(t, err) + + for i := 1; i <= recordCount; i++ { + var gotValue int + + err = db.Get(i, &gotValue) + assert.NoError(t, err) + assert.Equal(t, i, gotValue) + } +}