From 533eddaed4a2504a0a68387d8df296598008e1b1 Mon Sep 17 00:00:00 2001 From: nxshock Date: Sat, 10 Dec 2022 21:34:16 +0500 Subject: [PATCH 1/4] Fix typo --- options.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/options.go b/options.go index 1471014..103fe4e 100644 --- a/options.go +++ b/options.go @@ -12,7 +12,7 @@ type Options struct { // Memory write buffer size in bytes MemoryBufferSize int - // Diwk write buffer size in bytes + // Disk write buffer size in bytes DiskBufferSize int } From 82a36a1b9eb7aa87926df69b6a88bf7360804d94 Mon Sep 17 00:00:00 2001 From: nxshock Date: Sat, 10 Dec 2022 21:39:24 +0500 Subject: [PATCH 2/4] Add separate index file option --- README.md | 25 +++++++++++++++++++++++-- defaults.go | 3 +++ options.go | 11 +++++++++++ zkv.go | 29 ++++++++++++++++++++++++++++- zkv_test.go | 47 ++++++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 111 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 4db5c20..f27c5e2 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ Simple key-value store for single-user applications. ## Cons * Index stored in memory (`map[key hash (28 bytes)]file offset (int64)`) - average 200-250 Mb of RAM per 1M keys -* Need to read the whole file on store open to create file index +* Need to read the whole file on store open to create file index (you can use index file options to avoid this) * No way to recover disk space from deleted records * Write/Delete operations block Read and each other operations @@ -47,6 +47,28 @@ err = db.Flush() err = db.Backup("new/file/path") ``` +## Store options + +```go +type Options struct { + // Maximum number of concurrent reads + MaxParallelReads int + + // Compression level + CompressionLevel zstd.EncoderLevel + + // Memory write buffer size in bytes + MemoryBufferSize int + + // Disk write buffer size in bytes + DiskBufferSize int + + // Use index file + UseIndexFile bool +} + +``` + ## File structure Record is `encoding/gob` structure: @@ -66,5 +88,4 @@ File is log stuctured list of commands: ## TODO -- [ ] Implement optional separate index file to speedup store initialization - [ ] Add recovery previous state of store file on write error diff --git a/defaults.go b/defaults.go index f22add0..d263685 100644 --- a/defaults.go +++ b/defaults.go @@ -11,4 +11,7 @@ var defaultOptions = Options{ CompressionLevel: zstd.SpeedDefault, MemoryBufferSize: 4 * 1024 * 1024, DiskBufferSize: 1 * 1024 * 1024, + UseIndexFile: false, } + +const indexFileExt = ".idx" diff --git a/options.go b/options.go index 103fe4e..f42f14f 100644 --- a/options.go +++ b/options.go @@ -14,6 +14,9 @@ type Options struct { // Disk write buffer size in bytes DiskBufferSize int + + // Use index file + UseIndexFile bool } func (o *Options) setDefaults() { @@ -24,4 +27,12 @@ func (o *Options) setDefaults() { if o.CompressionLevel == 0 { o.CompressionLevel = defaultOptions.CompressionLevel } + + if o.MemoryBufferSize == 0 { + o.MemoryBufferSize = defaultOptions.MemoryBufferSize + } + + if o.DiskBufferSize == 0 { + o.DiskBufferSize = defaultOptions.DiskBufferSize + } } diff --git a/zkv.go b/zkv.go index 11d2cee..4706c28 100644 --- a/zkv.go +++ b/zkv.go @@ -5,6 +5,7 @@ import ( "bytes" "crypto/sha256" "encoding/base64" + "encoding/gob" "fmt" "io" "os" @@ -41,6 +42,16 @@ func OpenWithOptions(filePath string, options Options) (*Store, error) { options: options, readOrderChan: make(chan struct{}, int(options.MaxParallelReads))} + if options.UseIndexFile { + idxFile, err := os.Open(filePath + indexFileExt) + if err == nil { + err = gob.NewDecoder(idxFile).Decode(&database.dataOffset) + if err == nil { + return database, nil + } + } + } + // restore file data readF, err := os.Open(filePath) if os.IsNotExist(err) { @@ -81,7 +92,8 @@ func OpenWithOptions(filePath string, options Options) (*Store, error) { } func Open(filePath string) (*Store, error) { - return OpenWithOptions(filePath, defaultOptions) + options := defaultOptions + return OpenWithOptions(filePath, options) } func (s *Store) Set(key, value interface{}) error { @@ -416,5 +428,20 @@ func (s *Store) flush() error { return err } + // Update index file only on data update + if s.options.UseIndexFile && l > 0 { + idxBuf := new(bytes.Buffer) + + err = gob.NewEncoder(idxBuf).Encode(s.dataOffset) + if err != nil { + return err + } + + err = os.WriteFile(s.filePath+indexFileExt, idxBuf.Bytes(), 0644) + if err != nil { + return err + } + } + return nil } diff --git a/zkv_test.go b/zkv_test.go index 2b8005f..b10a2a2 100644 --- a/zkv_test.go +++ b/zkv_test.go @@ -327,5 +327,50 @@ func TestBackupWithDeletedRecords(t *testing.T) { err = db.Close() assert.NoError(t, err) - +} + +func TestIndexFileBasic(t *testing.T) { + const filePath = "TestReadWriteBasic.zkv" + const recordCount = 100 + defer os.Remove(filePath) + defer os.Remove(filePath + indexFileExt) + + db, err := OpenWithOptions(filePath, Options{UseIndexFile: true}) + assert.NoError(t, err) + + for i := 1; i <= recordCount; i++ { + err = db.Set(i, i) + assert.NoError(t, err) + } + + assert.Len(t, db.dataOffset, 0) + assert.Len(t, db.bufferDataOffset, recordCount) + + for i := 1; i <= recordCount; i++ { + var gotValue int + + err = db.Get(i, &gotValue) + assert.NoError(t, err) + assert.Equal(t, i, gotValue) + } + + err = db.Close() + assert.NoError(t, err) + + // try to read + db, err = OpenWithOptions(filePath, Options{UseIndexFile: true}) + assert.NoError(t, err) + + assert.Len(t, db.dataOffset, recordCount) + + for i := 1; i <= recordCount; i++ { + var gotValue int + + err = db.Get(i, &gotValue) + assert.NoError(t, err) + assert.Equal(t, i, gotValue) + } + + err = db.Close() + assert.NoError(t, err) } From 28f43e56d5abc320dfeaf00294bd6433fdfe6133 Mon Sep 17 00:00:00 2001 From: nxshock Date: Sat, 10 Dec 2022 22:00:08 +0500 Subject: [PATCH 3/4] Add resource consumption block --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index f27c5e2..cfa473f 100644 --- a/README.md +++ b/README.md @@ -86,6 +86,13 @@ File is log stuctured list of commands: | Length | Record body bytes length | int64 | | Body | Gob-encoded record | variable | +## Resource consumption + +Store requirements: + +* around 300 Mb of RAM per 1 million of keys +* around 34 Mb of disk space for index file per 1 million of keys + ## TODO - [ ] Add recovery previous state of store file on write error From 0458ac515222c3c841b3370dff03e89565042a18 Mon Sep 17 00:00:00 2001 From: nxshock Date: Sat, 10 Dec 2022 22:00:42 +0500 Subject: [PATCH 4/4] Add info about read value issue --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index cfa473f..5ed2b58 100644 --- a/README.md +++ b/README.md @@ -10,10 +10,11 @@ Simple key-value store for single-user applications. ## Cons -* Index stored in memory (`map[key hash (28 bytes)]file offset (int64)`) - average 200-250 Mb of RAM per 1M keys +* Index stored in memory (`map[key hash (28 bytes)]file offset (int64)`) * Need to read the whole file on store open to create file index (you can use index file options to avoid this) * No way to recover disk space from deleted records * Write/Delete operations block Read and each other operations +* Need to decode whole file until stored value ## Usage @@ -96,3 +97,4 @@ Store requirements: ## TODO - [ ] Add recovery previous state of store file on write error +- [ ] Add fast file seek to value (add compressed block start position)