From 30352fd304acd9ed30ce538a4455a14531b4ea01 Mon Sep 17 00:00:00 2001 From: Nathan Anderson Date: Tue, 11 Apr 2023 16:31:46 -0600 Subject: [PATCH] Working put test, passing, but not complete --- README.md | 6 +- src/Bitcask/bitcask.zig | 235 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 217 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 32609ba..55fccb7 100644 --- a/README.md +++ b/README.md @@ -4,4 +4,8 @@ Do it simply and do it well. ## The Bitcask paper -https://riak.com/assets/bitcask-intro.pdf \ No newline at end of file +https://riak.com/assets/bitcask-intro.pdf + +## Implementation in Ruby + +https://dinesh.wiki/posts/build-your-own-persistent-kv-store/ diff --git a/src/Bitcask/bitcask.zig b/src/Bitcask/bitcask.zig index 491e87f..813b0b7 100644 --- a/src/Bitcask/bitcask.zig +++ b/src/Bitcask/bitcask.zig @@ -1,17 +1,64 @@ const std = @import("std"); const expect = std.testing.expect; +const Dir = std.fs.Dir; +const mem = std.mem; -pub const BitcaskFileError = error{ +const BitcaskFileError = error{ + CannotInitialize, AccessDenied, OutOfMemory, FileNotFound, + Unknown }; -pub const BitCask = struct { +const CaskEntry = struct { + keySize: u32, + valSize: u32, + valType: EntryType, + key: []const u8, + val: *const anyopaque, +}; + +const KV = struct { + key: []const u8, + val: []const u8 +}; + +const EntryType = enum(u8) { + str = 0, + int = 1, + flt = 2, +}; + + +const BitCask = struct { + + const KeyDirEntry = struct { + file_name: []const u8, + offset: u4, + size: u4, + }; + + const FILE_THRESHOLD_SIZE = 1000; + var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator); + const allocator = arena.allocator(); + - // std.StringArrayHashMap + // TODO fix to a file descriptor or reference for fixed-size, may only need u4? depends on max file size I suppose + // u4 = key size of 4.29GB + + var keyDir: std.StringArrayHashMap(KeyDirEntry) = undefined; + var current_file: std.fs.File = undefined; + var current_file_name: [*]const u8 = ""; + + // bitcask:close(BitCaskHandle) Close a Bitcask data store and flush all pending writes (if any) to disk + pub fn close() void { + std.debug.print("Closing bitcask...\n", .{}); + arena.deinit(); + current_file.close(); + } // From the Bitcask paper, the API should look something like this @@ -29,66 +76,187 @@ pub const BitCask = struct { // writer and not just a reader) and sync on put (if this writer would // prefer to sync the write file after every write operation). // The directory must be readable and writable by this process, and - // only one process may open a Bitcask with read write at a time. + // only one process may open a Bitcask with read or write at a time. // bitcask:open(DirectoryName) Open a new or existing Bitcask datastore for read-only access. // → BitCaskHandle | {error, any()} The directory and all files in it must be readable by this process. - fn open(directory_name: []const u8) BitcaskFileError!void { - std.debug.print("Opening bitcask in dir {s}\n", .{directory_name}); - return error.FileNotFound; + pub fn open(directory_name: []const u8) (std.fs.File.OpenError || BitcaskFileError)!void { + // init keyDir + keyDir = std.StringArrayHashMap(KeyDirEntry).init( + allocator, + ); + const file_name = "cask1.db"; + current_file_name = file_name; + // Open file + std.debug.print("\nOpening bitcask in dir {s}/\n", .{directory_name}); + var bitcask_dir = try std.fs.Dir.openDir(std.fs.cwd(), directory_name, .{.access_sub_paths=false}); + // TODO get all files in cask and have some order to open the latest one + var err_or_file = bitcask_dir.openFile(file_name, .{.mode=.read_write}); + + // If the file does not exist, create it + if (err_or_file == std.fs.File.OpenError.FileNotFound) { + std.debug.print("No file found, creating new one\n", .{}); + _ = try bitcask_dir.createFile(file_name, .{}); // catch this error, then I can remove error union + current_file = try bitcask_dir.openFile(file_name, .{.mode=.read_write}); + } else if (@TypeOf(err_or_file) != std.fs.File.OpenError) { + std.debug.print("File {s} found\n", .{file_name}); + current_file = try err_or_file; + } else { + std.debug.print("Encountered unknown error while opening bitcask: {any}\n", .{err_or_file}); + return error.CannotInitialize; + } + +// std.debug.print("Current File open: {}\n", .{current_file}); + return; } // bitcask:get(BitCaskHandle, Key) Retrieve a value by key from a Bitcask datastore. // → not found | {ok, Value} - fn get(key: []const u8) error{NotImplemented}!void { + pub fn get(key: []const u8) error{OutOfMemory, NoCaskFound, Unknown}!CaskEntry { std.debug.print("Getting value with key {s}\n", .{key}); - return error.NotImplemented; + const optional_cask = keyDir.get(key); + if (optional_cask == null) { + return error.NoCaskFound; + } + const cask = optional_cask.?; // shorthand for 'optional_cask orelse unreachable' + std.debug.print("Got cask from file {s}\n", .{cask.file}); + return cask; + // TODO get entry from db rather than return the keyDir entry } // bitcask:put(BitCaskHandle, Key, Value) Store a key and value in a Bitcask datastore. // → ok | {error, any()} - fn put() error{NotImplemented}!void { - return error.NotImplemented; + pub fn put(data: KV) BitcaskFileError!void { + +// const key_type: EntryType = EntryType.str; +// const key_size = getEntrySize(data.key, key_type); +// const val_type: EntryType = EntryType.str; +// const val_size = getEntrySize(data.val, val_type); +// + + + const entry_bytes = serializeCaskEntry(&allocator, data) catch |err| { + std.debug.print("Encountered unknown error while serializing bitcask: {any}\n", .{err}); + return BitcaskFileError.Unknown; + }; + std.debug.print("Writing data to cask: {}\n", .{data}); + const i = current_file.write(entry_bytes) catch |err| { + std.debug.print("Encountered unknown error while writing to bitcask: {any}\n", .{err}); + return BitcaskFileError.Unknown; + }; + std.debug.print("After write: {any}\n", .{i}); + + + //try keyDir.put("key", data.val); + + return; } - // bitcask:delete(BitCaskHandle, Key) Delete a key from a Bitcask datastore. + // bitcask:delete(BitCaskHandle, Key) Delete a key from a Bitcask datastore. Note that deletion is simply a write of a special tombstone value, which will be removed on the next merge // → ok | {error, any()} - fn delete() error{NotImplemented}!void { + pub fn delete() error{NotImplemented}!void { return error.NotImplemented; } // bitcask:list keys(BitCaskHandle) List all keys in a Bitcask datastore. // → [Key] | {error, any()} - fn list() error{NotImplemented}!void { + pub fn list() error{NotImplemented}!void { return error.NotImplemented; } // bitcask:fold(BitCaskHandle,Fun,Acc0) Fold over all K/V pairs in a Bitcask datastore. // → Acc Fun is expected to be of the form: F(K,V,Acc0) → Acc. - fn fold() error{NotImplemented}!void { + pub fn fold() error{NotImplemented}!void { return error.NotImplemented; } // bitcask:merge(DirectoryName) Merge several data files within a Bitcask datastore into a more // → ok | {error, any()} compact form. Also, produce hintfiles for faster startup. - fn merge() error{NotImplemented}!void { + pub fn merge() error{NotImplemented}!void { return error.NotImplemented; } // bitcask:sync(BitCaskHandle) Force any writes to sync to disk. // → ok - fn sync() error{NotImplemented}!void { + pub fn sync() error{NotImplemented}!void { return error.NotImplemented; } - // bitcask:close(BitCaskHandle) Close a Bitcask data store and flush all pending writes (if any) to disk - fn close() error{NotImplemented}!void { - return error.NotImplemented; + fn getEntrySize(data: *anyopaque, t: EntryType) u32 { + switch (t) { + .str => { + const ptr = @ptrCast([*]const u8, data); + return ptr.len; + }, + .int => { + return @sizeOf(i32); + }, + .flt => { + return @sizeOf(f64); + } + } } + + fn serializeCaskEntry(aloc: *const mem.Allocator, entry: KV) error{OutOfMemory}![]u8 { + + var serialized_data = try std.ArrayList(u8).initCapacity(aloc.*, entry.key.len + entry.val.len + 8); + + // Serialize u32 into 4 u8 bytes + const keyBytes = std.mem.toBytes(entry.key.len); + try serialized_data.appendSlice(&keyBytes); // Serialize u32 into 4 u8 bytes + // Serialize u32 into 4 u8 bytes + const valBytes = std.mem.toBytes(entry.val.len); + try serialized_data.appendSlice(&valBytes); + //try serialized_data.append(@enumToInt(entry.valType)); + + for (entry.key) |byte| { + try serialized_data.append(byte); + } + + for (entry.val) |byte| { + try serialized_data.append(byte); + } + + // Convert the val any type to the type specified +// switch (entry.valType) { +// .str => { +// const str_val = @ptrCast([*]const u8, entry.val); +// for (str_val) |byte| { +// try serialized_data.append(byte); +// } +// }, +// .int => { +// const int_val = @ptrCast(*const i32, entry.val); +// try serialized_data.appendSlice(mem.asBytes(int_val)); +// }, +// .flt => { +// const flt_val = @ptrCast(*const f64, entry.val); +// try serialized_data.appendSlice(mem.asBytes(flt_val)); +// } +// } + + return try serialized_data.toOwnedSlice(); + } + +// fn getValType(caskVal: any) u8 { +// switch (@TypeOf(caskVal)) { +// u8 => +// } +// } + }; +// pub fn main() !void { +// const bc = BitCask; +// try bc.open("data"); +// defer bc.close(); +// +// try bc.put(.{.key = "id", .val = "abc123"}); +// } + test "Bitcask spec implementation: open" { const bc = BitCask; - try bc.open("File"); + try bc.open("data"); + defer bc.close(); // bc.open("File") catch |err| { // try expect(err == error.FileNotFound); // }; @@ -97,12 +265,33 @@ test "Bitcask spec implementation: open" { test "Bitcask spec implementation: get" { const bc = BitCask; - try bc.get("key"); + //bc.init(); + try bc.open("data"); + defer bc.close(); + _ = bc.get("key") catch |err| { + try expect(err == error.NoCaskFound); + }; + //try bc.put(); + const cask = try bc.get("key"); + try expect(std.mem.eql(u8, cask.file, "file.txt")); + try expect(cask.offset == 0); + try expect(cask.size == 0); } test "Bitcask spec implementation: put" { const bc = BitCask; - try bc.put(); + try bc.open("data"); + defer bc.close(); + + const key = "id"; + const val = "1"; + + try bc.put(.{ + .key = key, + .val = val + }); + + try bc.put(.{.key = "user1", .val = "likes apples so much"}); } test "Bitcask spec implementation: delete" {