1
0
mirror of https://github.com/nxshock/zkv.git synced 2024-11-27 11:21:02 +05:00
zkv/zkv.go

509 lines
9.0 KiB
Go
Raw Permalink Normal View History

2022-02-16 16:16:29 +05:00
package zkv
2022-02-16 16:08:20 +05:00
import (
"bufio"
2022-02-16 16:08:20 +05:00
"bytes"
2022-12-05 21:26:54 +05:00
"crypto/sha256"
2022-02-16 16:08:20 +05:00
"encoding/base64"
2022-12-10 21:39:24 +05:00
"encoding/gob"
2022-02-16 16:08:20 +05:00
"fmt"
"io"
"os"
"sync"
"github.com/klauspost/compress/zstd"
)
type Offsets struct {
BlockOffset int64
RecordOffset int64
}
2022-12-03 20:59:17 +05:00
type Store struct {
dataOffset map[string]Offsets
2022-12-03 20:59:17 +05:00
filePath string
buffer *bytes.Buffer
bufferDataOffset map[string]int64
2022-02-16 16:08:20 +05:00
options Options
readOrderChan chan struct{}
2022-12-02 21:37:15 +05:00
mu sync.RWMutex
2022-02-16 16:08:20 +05:00
}
2022-12-03 20:59:17 +05:00
func OpenWithOptions(filePath string, options Options) (*Store, error) {
options.setDefaults()
2022-02-16 16:08:20 +05:00
2023-04-16 10:32:58 +05:00
store := &Store{
dataOffset: make(map[string]Offsets),
2022-12-03 20:59:17 +05:00
bufferDataOffset: make(map[string]int64),
buffer: new(bytes.Buffer),
filePath: filePath,
options: options,
readOrderChan: make(chan struct{}, int(options.MaxParallelReads))}
if options.useIndexFile {
2022-12-10 21:39:24 +05:00
idxFile, err := os.Open(filePath + indexFileExt)
if err == nil {
2023-04-16 10:32:58 +05:00
err = gob.NewDecoder(idxFile).Decode(&store.dataOffset)
if err != nil {
return nil, err
2022-12-10 21:39:24 +05:00
}
2023-04-16 10:32:58 +05:00
return store, nil
}
2022-12-03 20:59:17 +05:00
}
2023-04-16 10:32:58 +05:00
exists, err := isFileExists(filePath)
2022-12-03 20:59:17 +05:00
if err != nil {
2023-04-16 10:32:58 +05:00
return nil, err
2022-12-03 20:59:17 +05:00
}
2023-04-16 10:32:58 +05:00
if !exists {
return store, nil
}
2022-12-03 20:59:17 +05:00
2023-04-16 10:32:58 +05:00
err = store.rebuildIndex()
if err != nil {
return nil, err
2022-02-16 16:08:20 +05:00
}
2023-04-16 10:32:58 +05:00
return store, nil
2022-02-16 16:08:20 +05:00
}
2022-12-03 20:59:17 +05:00
func Open(filePath string) (*Store, error) {
2022-12-10 21:39:24 +05:00
options := defaultOptions
return OpenWithOptions(filePath, options)
2022-12-03 20:59:17 +05:00
}
2022-02-16 16:08:20 +05:00
2022-12-03 20:59:17 +05:00
func (s *Store) Set(key, value interface{}) error {
s.mu.Lock()
defer s.mu.Unlock()
return s.set(key, value)
}
func (s *Store) Get(key, value interface{}) error {
s.mu.RLock()
defer s.mu.RUnlock()
return s.get(key, value)
}
func (s *Store) Delete(key interface{}) error {
s.mu.Lock()
defer s.mu.Unlock()
keyHash, err := hashInterface(key)
2022-02-16 16:08:20 +05:00
if err != nil {
return err
}
2022-12-03 20:59:17 +05:00
record := &Record{
Type: RecordTypeDelete,
KeyHash: keyHash,
}
2022-02-16 16:08:20 +05:00
b, err := record.Marshal()
if err != nil {
return err
}
2022-12-03 20:59:17 +05:00
delete(s.dataOffset, string(record.KeyHash[:]))
delete(s.bufferDataOffset, string(record.KeyHash[:]))
2022-02-16 16:08:20 +05:00
2022-12-03 20:59:17 +05:00
_, err = s.buffer.Write(b)
2022-02-16 16:08:20 +05:00
if err != nil {
return err
}
if s.buffer.Len() > s.options.MemoryBufferSize {
2022-12-03 20:59:17 +05:00
err = s.flush()
if err != nil {
return err
}
}
2022-02-16 16:08:20 +05:00
return nil
}
2022-12-03 20:59:17 +05:00
func (s *Store) Flush() error {
s.mu.Lock()
defer s.mu.Unlock()
return s.flush()
}
2022-02-16 16:08:20 +05:00
2022-12-05 21:26:54 +05:00
func (s *Store) BackupWithOptions(filePath string, newFileOptions Options) error {
s.mu.Lock()
defer s.mu.Unlock()
err := s.flush()
if err != nil {
return err
}
newStore, err := OpenWithOptions(filePath, newFileOptions)
if err != nil {
return err
}
for keyHashStr := range s.dataOffset {
var keyHash [sha256.Size224]byte
copy(keyHash[:], keyHashStr)
valueBytes, err := s.getGobBytes(keyHash)
if err != nil {
newStore.Close()
return err
}
err = newStore.setBytes(keyHash, valueBytes)
if err != nil {
newStore.Close()
return err
}
}
return newStore.Close()
}
func (s *Store) Backup(filePath string) error {
return s.BackupWithOptions(filePath, defaultOptions)
}
2022-12-03 20:59:17 +05:00
func (s *Store) Close() error {
s.mu.Lock()
defer s.mu.Unlock()
2022-12-03 20:59:17 +05:00
err := s.flush()
2022-02-16 16:08:20 +05:00
if err != nil {
return err
}
2022-12-07 21:06:36 +05:00
return nil
2022-12-03 20:59:17 +05:00
}
2022-12-05 21:26:54 +05:00
func (s *Store) setBytes(keyHash [sha256.Size224]byte, valueBytes []byte) error {
record, err := newRecordBytes(RecordTypeSet, keyHash, valueBytes)
if err != nil {
return err
}
b, err := record.Marshal()
if err != nil {
return err
}
s.bufferDataOffset[string(record.KeyHash[:])] = int64(s.buffer.Len())
_, err = s.buffer.Write(b)
if err != nil {
return err
}
if s.buffer.Len() > s.options.MemoryBufferSize {
2022-12-05 21:26:54 +05:00
err = s.flush()
if err != nil {
return err
}
}
return nil
}
2022-12-03 20:59:17 +05:00
func (s *Store) set(key, value interface{}) error {
record, err := newRecord(RecordTypeSet, key, value)
2022-02-16 16:08:20 +05:00
if err != nil {
return err
}
2022-12-03 20:59:17 +05:00
b, err := record.Marshal()
2022-02-16 16:08:20 +05:00
if err != nil {
return err
}
2022-12-03 20:59:17 +05:00
s.bufferDataOffset[string(record.KeyHash[:])] = int64(s.buffer.Len())
_, err = s.buffer.Write(b)
2022-02-16 16:08:20 +05:00
if err != nil {
return err
}
if s.buffer.Len() > s.options.MemoryBufferSize {
2022-12-03 20:59:17 +05:00
err = s.flush()
if err != nil {
return err
}
2022-02-16 16:08:20 +05:00
}
2022-12-03 20:59:17 +05:00
return nil
2022-02-16 16:08:20 +05:00
}
2022-12-05 21:26:54 +05:00
func (s *Store) getGobBytes(keyHash [sha256.Size224]byte) ([]byte, error) {
s.readOrderChan <- struct{}{}
defer func() { <-s.readOrderChan }()
offset, exists := s.bufferDataOffset[string(keyHash[:])]
if exists {
reader := bytes.NewReader(s.buffer.Bytes())
err := skip(reader, offset)
if err != nil {
return nil, err
}
_, record, err := readRecord(reader)
if err != nil {
return nil, err
}
return record.ValueBytes, nil
}
offsets, exists := s.dataOffset[string(keyHash[:])]
2022-12-05 21:26:54 +05:00
if !exists {
return nil, ErrNotExists
}
readF, err := os.Open(s.filePath)
if err != nil {
return nil, err
}
defer readF.Close()
_, err = readF.Seek(offsets.BlockOffset, io.SeekStart)
if err != nil {
return nil, err
}
2022-12-05 21:26:54 +05:00
decompressor, err := zstd.NewReader(readF)
if err != nil {
return nil, err
}
defer decompressor.Close()
err = skip(decompressor, offsets.RecordOffset)
2022-12-05 21:26:54 +05:00
if err != nil {
return nil, err
}
_, record, err := readRecord(decompressor)
if err != nil {
return nil, err
}
if !bytes.Equal(record.KeyHash[:], keyHash[:]) {
expectedHashStr := base64.StdEncoding.EncodeToString(keyHash[:])
gotHashStr := base64.StdEncoding.EncodeToString(record.KeyHash[:])
return nil, fmt.Errorf("wrong hash of offset %d: expected %s, got %s", offset, expectedHashStr, gotHashStr)
}
return record.ValueBytes, nil
}
2022-12-03 20:59:17 +05:00
func (s *Store) get(key, value interface{}) error {
s.readOrderChan <- struct{}{}
defer func() { <-s.readOrderChan }()
2022-12-03 12:55:42 +05:00
2022-12-03 20:59:17 +05:00
hashToFind, err := hashInterface(key)
2022-02-16 16:08:20 +05:00
if err != nil {
2022-12-03 20:59:17 +05:00
return err
2022-02-16 16:08:20 +05:00
}
2022-12-11 18:16:23 +05:00
b, err := s.getGobBytes(hashToFind)
2022-02-16 16:08:20 +05:00
if err != nil {
2022-12-03 20:59:17 +05:00
return err
2022-02-16 16:08:20 +05:00
}
2022-12-11 18:16:23 +05:00
return decode(b, value)
}
2022-12-03 20:59:17 +05:00
func (s *Store) flush() error {
l := int64(s.buffer.Len())
2022-12-07 21:06:36 +05:00
f, err := os.OpenFile(s.filePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return fmt.Errorf("open store file: %v", err)
}
stat, err := f.Stat()
if err != nil {
f.Close()
return fmt.Errorf("stat store file: %v", err)
}
2022-12-07 21:06:36 +05:00
diskWriteBuffer := bufio.NewWriterSize(f, s.options.DiskBufferSize)
encoder, err := zstd.NewWriter(diskWriteBuffer, zstd.WithEncoderLevel(s.options.CompressionLevel))
2022-12-07 21:06:36 +05:00
if err != nil {
f.Close()
return fmt.Errorf("init encoder: %v", err)
2022-12-07 21:06:36 +05:00
}
_, err = s.buffer.WriteTo(encoder)
if err != nil {
return err
}
2022-12-03 20:59:17 +05:00
for key, val := range s.bufferDataOffset {
s.dataOffset[key] = Offsets{BlockOffset: stat.Size(), RecordOffset: val}
}
2022-12-03 20:59:17 +05:00
s.bufferDataOffset = make(map[string]int64)
2022-12-07 21:06:36 +05:00
err = encoder.Close()
if err != nil {
// TODO: truncate file to previous state
return err
}
err = diskWriteBuffer.Flush()
if err != nil {
// TODO: truncate file to previous state
return err
}
2022-12-07 21:06:36 +05:00
err = f.Close()
if err != nil {
return err
}
2022-12-10 21:39:24 +05:00
// Update index file only on data update
if s.options.useIndexFile && l > 0 {
2023-04-16 10:32:58 +05:00
err = s.saveIndex()
2022-12-10 21:39:24 +05:00
if err != nil {
return err
}
2023-04-16 10:32:58 +05:00
}
2022-12-10 21:39:24 +05:00
2023-04-16 10:32:58 +05:00
return nil
}
func readBlock(r *bufio.Reader) (line []byte, n int, err error) {
delim := []byte{0x28, 0xb5, 0x2f, 0xfd}
line = make([]byte, len(delim))
copy(line, delim)
for {
s, err := r.ReadBytes(delim[len(delim)-1])
line = append(line, []byte(s)...)
2022-12-10 21:39:24 +05:00
if err != nil {
2023-04-16 10:32:58 +05:00
if bytes.Equal(line, delim) { // contains only magic number
return []byte{}, 0, err
} else {
return line, len(s), err
}
}
if bytes.Equal(line, append(delim, delim...)) { // first block
line = make([]byte, len(delim))
copy(line, delim)
continue
}
if bytes.HasSuffix(line, delim) {
return line[:len(line)-len(delim)], len(s), nil
}
}
}
// RebuildIndex renews index from store file
func (s *Store) RebuildIndex() error {
s.mu.Lock()
defer s.mu.Unlock()
err := s.rebuildIndex()
if err != nil {
return err
}
if s.options.useIndexFile {
return s.saveIndex()
}
return nil
}
func (s *Store) rebuildIndex() error {
f, err := os.Open(s.filePath)
if err != nil {
return err
}
defer f.Close()
r := bufio.NewReader(f)
var blockOffset int64
s.dataOffset = make(map[string]Offsets)
for {
l, n, err := readBlock(r)
if err != nil {
if err != io.EOF {
return err
} else if err == io.EOF && len(l) == 0 {
break
}
}
dec, err := zstd.NewReader(bytes.NewReader(l))
var recordOffset int64
for {
n, record, err := readRecord(dec)
if err != nil {
if err == io.EOF {
break
} else {
return err
}
}
switch record.Type {
case RecordTypeSet:
s.dataOffset[string(record.KeyHash[:])] = Offsets{BlockOffset: blockOffset, RecordOffset: recordOffset}
case RecordTypeDelete:
delete(s.dataOffset, string(record.KeyHash[:]))
}
recordOffset += n
2022-12-10 21:39:24 +05:00
}
2023-04-16 10:32:58 +05:00
blockOffset += int64(n)
}
idxBuf := new(bytes.Buffer)
err = gob.NewEncoder(idxBuf).Encode(s.dataOffset)
if err != nil {
return err
}
err = os.WriteFile(s.filePath+indexFileExt, idxBuf.Bytes(), 0644)
if err != nil {
return err
2022-12-10 21:39:24 +05:00
}
return nil
}
2023-04-16 10:32:58 +05:00
func (s *Store) saveIndex() error {
f, err := os.OpenFile(s.filePath+indexFileExt, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
if err != nil {
return err
}
err = gob.NewEncoder(f).Encode(s.dataOffset)
if err != nil {
return err
}
return f.Close()
}