1
0
mirror of https://github.com/nxshock/zkv.git synced 2025-04-20 09:21:50 +05:00

Compare commits

...

3 Commits

Author SHA1 Message Date
d950b6546c Add notes about current store state 2022-12-11 21:33:51 +05:00
5f0d33828f Improve store read speed
by skipping store blocks
2022-12-11 21:00:36 +05:00
412ddb11a8 Remove duplicated code 2022-12-11 18:16:23 +05:00
7 changed files with 57 additions and 73 deletions

View File

@ -4,17 +4,17 @@ Simple key-value store for single-user applications.
## Pros ## Pros
* Simple one file structure * Simple two file structure (data file and index file)
* Internal Zstandard compression by [klauspost/compress/zstd](https://github.com/klauspost/compress/tree/master/zstd) * Internal Zstandard compression by [klauspost/compress/zstd](https://github.com/klauspost/compress/tree/master/zstd)
* Threadsafe operations through `sync.RWMutex` * Threadsafe operations through `sync.RWMutex`
## Cons ## Cons
* Index stored in memory (`map[key hash (28 bytes)]file offset (int64)`) * Index stored in memory (`map[key hash (28 bytes)]file offset (int64)`)
* Need to read the whole file on store open to create file index (you can use index file options to avoid this) * No transaction system
* Index file is fully rewrited on every store commit
* No way to recover disk space from deleted records * No way to recover disk space from deleted records
* Write/Delete operations block Read and each other operations * Write/Delete operations block Read and each other operations
* Need to decode whole file until stored value
## Usage ## Usage
@ -63,9 +63,6 @@ type Options struct {
// Disk write buffer size in bytes // Disk write buffer size in bytes
DiskBufferSize int DiskBufferSize int
// Use index file
UseIndexFile bool
} }
``` ```
@ -87,6 +84,17 @@ File is log stuctured list of commands:
| Length | Record body bytes length | int64 | | Length | Record body bytes length | int64 |
| Body | Gob-encoded record | variable | | Body | Gob-encoded record | variable |
Index file is simple gob-encoded map:
```go
map[string]struct {
BlockOffset int64
RecordOffset int64
}
```
where map key is data key hash and value - data offset in data file.
## Resource consumption ## Resource consumption
Store requirements: Store requirements:
@ -97,4 +105,4 @@ Store requirements:
## TODO ## TODO
- [ ] Add recovery previous state of store file on write error - [ ] Add recovery previous state of store file on write error
- [ ] Add fast file seek to value (add compressed block start position) - [ ] Add method for index rebuild

BIN
TestBufferRead.zkv.idx Normal file

Binary file not shown.

BIN
TestSmallWrites.zkv.idx Normal file

Binary file not shown.

View File

@ -11,7 +11,7 @@ var defaultOptions = Options{
CompressionLevel: zstd.SpeedDefault, CompressionLevel: zstd.SpeedDefault,
MemoryBufferSize: 4 * 1024 * 1024, MemoryBufferSize: 4 * 1024 * 1024,
DiskBufferSize: 1 * 1024 * 1024, DiskBufferSize: 1 * 1024 * 1024,
UseIndexFile: false, useIndexFile: true,
} }
const indexFileExt = ".idx" const indexFileExt = ".idx"

View File

@ -16,10 +16,12 @@ type Options struct {
DiskBufferSize int DiskBufferSize int
// Use index file // Use index file
UseIndexFile bool useIndexFile bool
} }
func (o *Options) setDefaults() { func (o *Options) setDefaults() {
o.useIndexFile = true // TODO: implement database search without index
if o.MaxParallelReads == 0 { if o.MaxParallelReads == 0 {
o.MaxParallelReads = defaultOptions.MaxParallelReads o.MaxParallelReads = defaultOptions.MaxParallelReads
} }

87
zkv.go
View File

@ -14,11 +14,15 @@ import (
"github.com/klauspost/compress/zstd" "github.com/klauspost/compress/zstd"
) )
type Offsets struct {
BlockOffset int64
RecordOffset int64
}
type Store struct { type Store struct {
dataOffset map[string]int64 dataOffset map[string]Offsets
filePath string filePath string
offset int64
buffer *bytes.Buffer buffer *bytes.Buffer
bufferDataOffset map[string]int64 bufferDataOffset map[string]int64
@ -34,15 +38,14 @@ func OpenWithOptions(filePath string, options Options) (*Store, error) {
options.setDefaults() options.setDefaults()
database := &Store{ database := &Store{
dataOffset: make(map[string]int64), dataOffset: make(map[string]Offsets),
bufferDataOffset: make(map[string]int64), bufferDataOffset: make(map[string]int64),
offset: 0,
buffer: new(bytes.Buffer), buffer: new(bytes.Buffer),
filePath: filePath, filePath: filePath,
options: options, options: options,
readOrderChan: make(chan struct{}, int(options.MaxParallelReads))} readOrderChan: make(chan struct{}, int(options.MaxParallelReads))}
if options.UseIndexFile { if options.useIndexFile {
idxFile, err := os.Open(filePath + indexFileExt) idxFile, err := os.Open(filePath + indexFileExt)
if err == nil { if err == nil {
err = gob.NewDecoder(idxFile).Decode(&database.dataOffset) err = gob.NewDecoder(idxFile).Decode(&database.dataOffset)
@ -80,7 +83,7 @@ func OpenWithOptions(filePath string, options Options) (*Store, error) {
switch record.Type { switch record.Type {
case RecordTypeSet: case RecordTypeSet:
database.dataOffset[string(record.KeyHash[:])] = offset database.dataOffset[string(record.KeyHash[:])] = Offsets{} // offset
case RecordTypeDelete: case RecordTypeDelete:
delete(database.dataOffset, string(record.KeyHash[:])) delete(database.dataOffset, string(record.KeyHash[:]))
} }
@ -283,7 +286,7 @@ func (s *Store) getGobBytes(keyHash [sha256.Size224]byte) ([]byte, error) {
return record.ValueBytes, nil return record.ValueBytes, nil
} }
offset, exists = s.dataOffset[string(keyHash[:])] offsets, exists := s.dataOffset[string(keyHash[:])]
if !exists { if !exists {
return nil, ErrNotExists return nil, ErrNotExists
} }
@ -294,13 +297,18 @@ func (s *Store) getGobBytes(keyHash [sha256.Size224]byte) ([]byte, error) {
} }
defer readF.Close() defer readF.Close()
_, err = readF.Seek(offsets.BlockOffset, io.SeekStart)
if err != nil {
return nil, err
}
decompressor, err := zstd.NewReader(readF) decompressor, err := zstd.NewReader(readF)
if err != nil { if err != nil {
return nil, err return nil, err
} }
defer decompressor.Close() defer decompressor.Close()
err = skip(decompressor, offset) err = skip(decompressor, offsets.RecordOffset)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -317,7 +325,6 @@ func (s *Store) getGobBytes(keyHash [sha256.Size224]byte) ([]byte, error) {
} }
return record.ValueBytes, nil return record.ValueBytes, nil
} }
func (s *Store) get(key, value interface{}) error { func (s *Store) get(key, value interface{}) error {
@ -329,57 +336,12 @@ func (s *Store) get(key, value interface{}) error {
return err return err
} }
offset, exists := s.bufferDataOffset[string(hashToFind[:])] b, err := s.getGobBytes(hashToFind)
if exists {
reader := bytes.NewReader(s.buffer.Bytes())
err = skip(reader, offset)
if err != nil { if err != nil {
return err return err
} }
_, record, err := readRecord(reader) return decode(b, value)
if err != nil {
return err
}
return decode(record.ValueBytes, value)
}
offset, exists = s.dataOffset[string(hashToFind[:])]
if !exists {
return ErrNotExists
}
readF, err := os.Open(s.filePath)
if err != nil {
return err
}
defer readF.Close()
decompressor, err := zstd.NewReader(readF)
if err != nil {
return err
}
defer decompressor.Close()
err = skip(decompressor, offset)
if err != nil {
return err
}
_, record, err := readRecord(decompressor)
if err != nil {
return err
}
if !bytes.Equal(record.KeyHash[:], hashToFind[:]) {
expectedHashStr := base64.StdEncoding.EncodeToString(hashToFind[:])
gotHashStr := base64.StdEncoding.EncodeToString(record.KeyHash[:])
return fmt.Errorf("wrong hash of offset %d: expected %s, got %s", offset, expectedHashStr, gotHashStr)
}
return decode(record.ValueBytes, value)
} }
func (s *Store) flush() error { func (s *Store) flush() error {
@ -389,13 +351,18 @@ func (s *Store) flush() error {
if err != nil { if err != nil {
return fmt.Errorf("open store file: %v", err) return fmt.Errorf("open store file: %v", err)
} }
stat, err := f.Stat()
if err != nil {
f.Close()
return fmt.Errorf("stat store file: %v", err)
}
diskWriteBuffer := bufio.NewWriterSize(f, s.options.DiskBufferSize) diskWriteBuffer := bufio.NewWriterSize(f, s.options.DiskBufferSize)
encoder, err := zstd.NewWriter(diskWriteBuffer, zstd.WithEncoderLevel(s.options.CompressionLevel)) encoder, err := zstd.NewWriter(diskWriteBuffer, zstd.WithEncoderLevel(s.options.CompressionLevel))
if err != nil { if err != nil {
f.Close() f.Close()
return fmt.Errorf("open store file: %v", err) return fmt.Errorf("init encoder: %v", err)
} }
_, err = s.buffer.WriteTo(encoder) _, err = s.buffer.WriteTo(encoder)
@ -404,13 +371,11 @@ func (s *Store) flush() error {
} }
for key, val := range s.bufferDataOffset { for key, val := range s.bufferDataOffset {
s.dataOffset[key] = val + s.offset s.dataOffset[key] = Offsets{BlockOffset: stat.Size(), RecordOffset: val}
} }
s.bufferDataOffset = make(map[string]int64) s.bufferDataOffset = make(map[string]int64)
s.offset += l
err = encoder.Close() err = encoder.Close()
if err != nil { if err != nil {
// TODO: truncate file to previous state // TODO: truncate file to previous state
@ -429,7 +394,7 @@ func (s *Store) flush() error {
} }
// Update index file only on data update // Update index file only on data update
if s.options.UseIndexFile && l > 0 { if s.options.useIndexFile && l > 0 {
idxBuf := new(bytes.Buffer) idxBuf := new(bytes.Buffer)
err = gob.NewEncoder(idxBuf).Encode(s.dataOffset) err = gob.NewEncoder(idxBuf).Encode(s.dataOffset)

View File

@ -39,6 +39,7 @@ func TestReadWriteBasic(t *testing.T) {
const filePath = "TestReadWriteBasic.zkv" const filePath = "TestReadWriteBasic.zkv"
const recordCount = 100 const recordCount = 100
defer os.Remove(filePath) defer os.Remove(filePath)
defer os.Remove(filePath + indexFileExt)
db, err := Open(filePath) db, err := Open(filePath)
assert.NoError(t, err) assert.NoError(t, err)
@ -84,6 +85,7 @@ func TestSmallWrites(t *testing.T) {
const filePath = "TestSmallWrites.zkv" const filePath = "TestSmallWrites.zkv"
const recordCount = 100 const recordCount = 100
defer os.Remove(filePath) defer os.Remove(filePath)
defer os.Remove(filePath + indexFileExt)
for i := 1; i <= recordCount; i++ { for i := 1; i <= recordCount; i++ {
db, err := Open(filePath) db, err := Open(filePath)
@ -119,6 +121,7 @@ func TestDeleteBasic(t *testing.T) {
const filePath = "TestDeleteBasic.zkv" const filePath = "TestDeleteBasic.zkv"
const recordCount = 100 const recordCount = 100
defer os.Remove(filePath) defer os.Remove(filePath)
defer os.Remove(filePath + indexFileExt)
db, err := Open(filePath) db, err := Open(filePath)
assert.NoError(t, err) assert.NoError(t, err)
@ -164,6 +167,7 @@ func TestDeleteBasic(t *testing.T) {
func TestBufferBasic(t *testing.T) { func TestBufferBasic(t *testing.T) {
const filePath = "TestBuffer.zkv" const filePath = "TestBuffer.zkv"
defer os.Remove(filePath) defer os.Remove(filePath)
defer os.Remove(filePath + indexFileExt)
db, err := OpenWithOptions(filePath, Options{MemoryBufferSize: 100}) db, err := OpenWithOptions(filePath, Options{MemoryBufferSize: 100})
assert.NoError(t, err) assert.NoError(t, err)
@ -187,8 +191,9 @@ func TestBufferBasic(t *testing.T) {
func TestBufferRead(t *testing.T) { func TestBufferRead(t *testing.T) {
const filePath = "TestBufferRead.zkv" const filePath = "TestBufferRead.zkv"
const recordCount = 100 const recordCount = 2
defer os.Remove(filePath) defer os.Remove(filePath)
defer os.Remove(filePath + indexFileExt)
db, err := OpenWithOptions(filePath, Options{MemoryBufferSize: 100}) db, err := OpenWithOptions(filePath, Options{MemoryBufferSize: 100})
assert.NoError(t, err) assert.NoError(t, err)
@ -241,7 +246,9 @@ func TestBackupBasic(t *testing.T) {
const newFilePath = "TestBackupBasic2.zkv" const newFilePath = "TestBackupBasic2.zkv"
const recordCount = 100 const recordCount = 100
defer os.Remove(filePath) defer os.Remove(filePath)
defer os.Remove(filePath + indexFileExt)
defer os.Remove(newFilePath) defer os.Remove(newFilePath)
defer os.Remove(newFilePath + indexFileExt)
db, err := Open(filePath) db, err := Open(filePath)
assert.NoError(t, err) assert.NoError(t, err)
@ -280,7 +287,9 @@ func TestBackupWithDeletedRecords(t *testing.T) {
const newFilePath = "TestBackupWithDeletedRecords2.zkv" const newFilePath = "TestBackupWithDeletedRecords2.zkv"
const recordCount = 100 const recordCount = 100
defer os.Remove(filePath) defer os.Remove(filePath)
defer os.Remove(filePath + indexFileExt)
defer os.Remove(newFilePath) defer os.Remove(newFilePath)
defer os.Remove(newFilePath + indexFileExt)
db, err := Open(filePath) db, err := Open(filePath)
assert.NoError(t, err) assert.NoError(t, err)
@ -335,7 +344,7 @@ func TestIndexFileBasic(t *testing.T) {
defer os.Remove(filePath) defer os.Remove(filePath)
defer os.Remove(filePath + indexFileExt) defer os.Remove(filePath + indexFileExt)
db, err := OpenWithOptions(filePath, Options{UseIndexFile: true}) db, err := Open(filePath)
assert.NoError(t, err) assert.NoError(t, err)
for i := 1; i <= recordCount; i++ { for i := 1; i <= recordCount; i++ {
@ -358,7 +367,7 @@ func TestIndexFileBasic(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
// try to read // try to read
db, err = OpenWithOptions(filePath, Options{UseIndexFile: true}) db, err = Open(filePath)
assert.NoError(t, err) assert.NoError(t, err)
assert.Len(t, db.dataOffset, recordCount) assert.Len(t, db.dataOffset, recordCount)