Working put test, passing, but not complete
This commit is contained in:
parent
e10a9e311f
commit
30352fd304
@ -4,4 +4,8 @@ Do it simply and do it well.
|
||||
|
||||
## The Bitcask paper
|
||||
|
||||
https://riak.com/assets/bitcask-intro.pdf
|
||||
https://riak.com/assets/bitcask-intro.pdf
|
||||
|
||||
## Implementation in Ruby
|
||||
|
||||
https://dinesh.wiki/posts/build-your-own-persistent-kv-store/
|
||||
|
@ -1,17 +1,64 @@
|
||||
const std = @import("std");
|
||||
const expect = std.testing.expect;
|
||||
const Dir = std.fs.Dir;
|
||||
const mem = std.mem;
|
||||
|
||||
pub const BitcaskFileError = error{
|
||||
const BitcaskFileError = error{
|
||||
CannotInitialize,
|
||||
AccessDenied,
|
||||
OutOfMemory,
|
||||
FileNotFound,
|
||||
Unknown
|
||||
};
|
||||
|
||||
pub const BitCask = struct {
|
||||
const CaskEntry = struct {
|
||||
keySize: u32,
|
||||
valSize: u32,
|
||||
valType: EntryType,
|
||||
key: []const u8,
|
||||
val: *const anyopaque,
|
||||
};
|
||||
|
||||
const KV = struct {
|
||||
key: []const u8,
|
||||
val: []const u8
|
||||
};
|
||||
|
||||
const EntryType = enum(u8) {
|
||||
str = 0,
|
||||
int = 1,
|
||||
flt = 2,
|
||||
};
|
||||
|
||||
|
||||
const BitCask = struct {
|
||||
|
||||
const KeyDirEntry = struct {
|
||||
file_name: []const u8,
|
||||
offset: u4,
|
||||
size: u4,
|
||||
};
|
||||
|
||||
|
||||
const FILE_THRESHOLD_SIZE = 1000;
|
||||
|
||||
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||
const allocator = arena.allocator();
|
||||
|
||||
|
||||
// std.StringArrayHashMap
|
||||
// TODO fix to a file descriptor or reference for fixed-size, may only need u4? depends on max file size I suppose
|
||||
// u4 = key size of 4.29GB
|
||||
|
||||
var keyDir: std.StringArrayHashMap(KeyDirEntry) = undefined;
|
||||
var current_file: std.fs.File = undefined;
|
||||
var current_file_name: [*]const u8 = "";
|
||||
|
||||
// bitcask:close(BitCaskHandle) Close a Bitcask data store and flush all pending writes (if any) to disk
|
||||
pub fn close() void {
|
||||
std.debug.print("Closing bitcask...\n", .{});
|
||||
arena.deinit();
|
||||
current_file.close();
|
||||
}
|
||||
|
||||
// From the Bitcask paper, the API should look something like this
|
||||
|
||||
@ -29,66 +76,187 @@ pub const BitCask = struct {
|
||||
// writer and not just a reader) and sync on put (if this writer would
|
||||
// prefer to sync the write file after every write operation).
|
||||
// The directory must be readable and writable by this process, and
|
||||
// only one process may open a Bitcask with read write at a time.
|
||||
// only one process may open a Bitcask with read or write at a time.
|
||||
// bitcask:open(DirectoryName) Open a new or existing Bitcask datastore for read-only access.
|
||||
// → BitCaskHandle | {error, any()} The directory and all files in it must be readable by this process.
|
||||
fn open(directory_name: []const u8) BitcaskFileError!void {
|
||||
std.debug.print("Opening bitcask in dir {s}\n", .{directory_name});
|
||||
return error.FileNotFound;
|
||||
pub fn open(directory_name: []const u8) (std.fs.File.OpenError || BitcaskFileError)!void {
|
||||
// init keyDir
|
||||
keyDir = std.StringArrayHashMap(KeyDirEntry).init(
|
||||
allocator,
|
||||
);
|
||||
const file_name = "cask1.db";
|
||||
current_file_name = file_name;
|
||||
// Open file
|
||||
std.debug.print("\nOpening bitcask in dir {s}/\n", .{directory_name});
|
||||
var bitcask_dir = try std.fs.Dir.openDir(std.fs.cwd(), directory_name, .{.access_sub_paths=false});
|
||||
// TODO get all files in cask and have some order to open the latest one
|
||||
var err_or_file = bitcask_dir.openFile(file_name, .{.mode=.read_write});
|
||||
|
||||
// If the file does not exist, create it
|
||||
if (err_or_file == std.fs.File.OpenError.FileNotFound) {
|
||||
std.debug.print("No file found, creating new one\n", .{});
|
||||
_ = try bitcask_dir.createFile(file_name, .{}); // catch this error, then I can remove error union
|
||||
current_file = try bitcask_dir.openFile(file_name, .{.mode=.read_write});
|
||||
} else if (@TypeOf(err_or_file) != std.fs.File.OpenError) {
|
||||
std.debug.print("File {s} found\n", .{file_name});
|
||||
current_file = try err_or_file;
|
||||
} else {
|
||||
std.debug.print("Encountered unknown error while opening bitcask: {any}\n", .{err_or_file});
|
||||
return error.CannotInitialize;
|
||||
}
|
||||
|
||||
// std.debug.print("Current File open: {}\n", .{current_file});
|
||||
return;
|
||||
}
|
||||
|
||||
// bitcask:get(BitCaskHandle, Key) Retrieve a value by key from a Bitcask datastore.
|
||||
// → not found | {ok, Value}
|
||||
fn get(key: []const u8) error{NotImplemented}!void {
|
||||
pub fn get(key: []const u8) error{OutOfMemory, NoCaskFound, Unknown}!CaskEntry {
|
||||
std.debug.print("Getting value with key {s}\n", .{key});
|
||||
return error.NotImplemented;
|
||||
const optional_cask = keyDir.get(key);
|
||||
if (optional_cask == null) {
|
||||
return error.NoCaskFound;
|
||||
}
|
||||
const cask = optional_cask.?; // shorthand for 'optional_cask orelse unreachable'
|
||||
std.debug.print("Got cask from file {s}\n", .{cask.file});
|
||||
return cask;
|
||||
// TODO get entry from db rather than return the keyDir entry
|
||||
}
|
||||
|
||||
// bitcask:put(BitCaskHandle, Key, Value) Store a key and value in a Bitcask datastore.
|
||||
// → ok | {error, any()}
|
||||
fn put() error{NotImplemented}!void {
|
||||
return error.NotImplemented;
|
||||
pub fn put(data: KV) BitcaskFileError!void {
|
||||
|
||||
// const key_type: EntryType = EntryType.str;
|
||||
// const key_size = getEntrySize(data.key, key_type);
|
||||
// const val_type: EntryType = EntryType.str;
|
||||
// const val_size = getEntrySize(data.val, val_type);
|
||||
//
|
||||
|
||||
|
||||
const entry_bytes = serializeCaskEntry(&allocator, data) catch |err| {
|
||||
std.debug.print("Encountered unknown error while serializing bitcask: {any}\n", .{err});
|
||||
return BitcaskFileError.Unknown;
|
||||
};
|
||||
std.debug.print("Writing data to cask: {}\n", .{data});
|
||||
const i = current_file.write(entry_bytes) catch |err| {
|
||||
std.debug.print("Encountered unknown error while writing to bitcask: {any}\n", .{err});
|
||||
return BitcaskFileError.Unknown;
|
||||
};
|
||||
std.debug.print("After write: {any}\n", .{i});
|
||||
|
||||
|
||||
//try keyDir.put("key", data.val);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// bitcask:delete(BitCaskHandle, Key) Delete a key from a Bitcask datastore.
|
||||
// bitcask:delete(BitCaskHandle, Key) Delete a key from a Bitcask datastore. Note that deletion is simply a write of a special tombstone value, which will be removed on the next merge
|
||||
// → ok | {error, any()}
|
||||
fn delete() error{NotImplemented}!void {
|
||||
pub fn delete() error{NotImplemented}!void {
|
||||
return error.NotImplemented;
|
||||
}
|
||||
|
||||
// bitcask:list keys(BitCaskHandle) List all keys in a Bitcask datastore.
|
||||
// → [Key] | {error, any()}
|
||||
fn list() error{NotImplemented}!void {
|
||||
pub fn list() error{NotImplemented}!void {
|
||||
return error.NotImplemented;
|
||||
}
|
||||
|
||||
// bitcask:fold(BitCaskHandle,Fun,Acc0) Fold over all K/V pairs in a Bitcask datastore.
|
||||
// → Acc Fun is expected to be of the form: F(K,V,Acc0) → Acc.
|
||||
fn fold() error{NotImplemented}!void {
|
||||
pub fn fold() error{NotImplemented}!void {
|
||||
return error.NotImplemented;
|
||||
}
|
||||
|
||||
// bitcask:merge(DirectoryName) Merge several data files within a Bitcask datastore into a more
|
||||
// → ok | {error, any()} compact form. Also, produce hintfiles for faster startup.
|
||||
fn merge() error{NotImplemented}!void {
|
||||
pub fn merge() error{NotImplemented}!void {
|
||||
return error.NotImplemented;
|
||||
}
|
||||
|
||||
// bitcask:sync(BitCaskHandle) Force any writes to sync to disk.
|
||||
// → ok
|
||||
fn sync() error{NotImplemented}!void {
|
||||
pub fn sync() error{NotImplemented}!void {
|
||||
return error.NotImplemented;
|
||||
}
|
||||
|
||||
// bitcask:close(BitCaskHandle) Close a Bitcask data store and flush all pending writes (if any) to disk
|
||||
fn close() error{NotImplemented}!void {
|
||||
return error.NotImplemented;
|
||||
fn getEntrySize(data: *anyopaque, t: EntryType) u32 {
|
||||
switch (t) {
|
||||
.str => {
|
||||
const ptr = @ptrCast([*]const u8, data);
|
||||
return ptr.len;
|
||||
},
|
||||
.int => {
|
||||
return @sizeOf(i32);
|
||||
},
|
||||
.flt => {
|
||||
return @sizeOf(f64);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn serializeCaskEntry(aloc: *const mem.Allocator, entry: KV) error{OutOfMemory}![]u8 {
|
||||
|
||||
var serialized_data = try std.ArrayList(u8).initCapacity(aloc.*, entry.key.len + entry.val.len + 8);
|
||||
|
||||
// Serialize u32 into 4 u8 bytes
|
||||
const keyBytes = std.mem.toBytes(entry.key.len);
|
||||
try serialized_data.appendSlice(&keyBytes); // Serialize u32 into 4 u8 bytes
|
||||
// Serialize u32 into 4 u8 bytes
|
||||
const valBytes = std.mem.toBytes(entry.val.len);
|
||||
try serialized_data.appendSlice(&valBytes);
|
||||
//try serialized_data.append(@enumToInt(entry.valType));
|
||||
|
||||
for (entry.key) |byte| {
|
||||
try serialized_data.append(byte);
|
||||
}
|
||||
|
||||
for (entry.val) |byte| {
|
||||
try serialized_data.append(byte);
|
||||
}
|
||||
|
||||
// Convert the val any type to the type specified
|
||||
// switch (entry.valType) {
|
||||
// .str => {
|
||||
// const str_val = @ptrCast([*]const u8, entry.val);
|
||||
// for (str_val) |byte| {
|
||||
// try serialized_data.append(byte);
|
||||
// }
|
||||
// },
|
||||
// .int => {
|
||||
// const int_val = @ptrCast(*const i32, entry.val);
|
||||
// try serialized_data.appendSlice(mem.asBytes(int_val));
|
||||
// },
|
||||
// .flt => {
|
||||
// const flt_val = @ptrCast(*const f64, entry.val);
|
||||
// try serialized_data.appendSlice(mem.asBytes(flt_val));
|
||||
// }
|
||||
// }
|
||||
|
||||
return try serialized_data.toOwnedSlice();
|
||||
}
|
||||
|
||||
// fn getValType(caskVal: any) u8 {
|
||||
// switch (@TypeOf(caskVal)) {
|
||||
// u8 =>
|
||||
// }
|
||||
// }
|
||||
|
||||
};
|
||||
|
||||
// pub fn main() !void {
|
||||
// const bc = BitCask;
|
||||
// try bc.open("data");
|
||||
// defer bc.close();
|
||||
//
|
||||
// try bc.put(.{.key = "id", .val = "abc123"});
|
||||
// }
|
||||
|
||||
test "Bitcask spec implementation: open" {
|
||||
const bc = BitCask;
|
||||
try bc.open("File");
|
||||
try bc.open("data");
|
||||
defer bc.close();
|
||||
// bc.open("File") catch |err| {
|
||||
// try expect(err == error.FileNotFound);
|
||||
// };
|
||||
@ -97,12 +265,33 @@ test "Bitcask spec implementation: open" {
|
||||
|
||||
test "Bitcask spec implementation: get" {
|
||||
const bc = BitCask;
|
||||
try bc.get("key");
|
||||
//bc.init();
|
||||
try bc.open("data");
|
||||
defer bc.close();
|
||||
_ = bc.get("key") catch |err| {
|
||||
try expect(err == error.NoCaskFound);
|
||||
};
|
||||
//try bc.put();
|
||||
const cask = try bc.get("key");
|
||||
try expect(std.mem.eql(u8, cask.file, "file.txt"));
|
||||
try expect(cask.offset == 0);
|
||||
try expect(cask.size == 0);
|
||||
}
|
||||
|
||||
test "Bitcask spec implementation: put" {
|
||||
const bc = BitCask;
|
||||
try bc.put();
|
||||
try bc.open("data");
|
||||
defer bc.close();
|
||||
|
||||
const key = "id";
|
||||
const val = "1";
|
||||
|
||||
try bc.put(.{
|
||||
.key = key,
|
||||
.val = val
|
||||
});
|
||||
|
||||
try bc.put(.{.key = "user1", .val = "likes apples so much"});
|
||||
}
|
||||
|
||||
test "Bitcask spec implementation: delete" {
|
||||
|
Loading…
Reference in New Issue
Block a user