From 4fb99346037009793633144f67c3537d9a3fe776 Mon Sep 17 00:00:00 2001 From: Nathan Anderson Date: Wed, 12 Apr 2023 17:04:27 -0600 Subject: [PATCH] edits --- src/Bitcask/bitcask.zig | 301 ++++++++++++++++++++++++++++------------ 1 file changed, 214 insertions(+), 87 deletions(-) diff --git a/src/Bitcask/bitcask.zig b/src/Bitcask/bitcask.zig index 813b0b7..d3f9281 100644 --- a/src/Bitcask/bitcask.zig +++ b/src/Bitcask/bitcask.zig @@ -11,9 +11,14 @@ const BitcaskFileError = error{ Unknown }; +const BitCaskError = error{ + KeyNotFound, + InternalError, +}; + const CaskEntry = struct { - keySize: u32, - valSize: u32, + keySize: usize, + valSize: usize, valType: EntryType, key: []const u8, val: *const anyopaque, @@ -35,8 +40,8 @@ const BitCask = struct { const KeyDirEntry = struct { file_name: []const u8, - offset: u4, - size: u4, + offset: usize, + size: usize, }; @@ -49,9 +54,10 @@ const BitCask = struct { // TODO fix to a file descriptor or reference for fixed-size, may only need u4? depends on max file size I suppose // u4 = key size of 4.29GB - var keyDir: std.StringArrayHashMap(KeyDirEntry) = undefined; + var key_dir_map: std.StringArrayHashMap(KeyDirEntry) = undefined; var current_file: std.fs.File = undefined; - var current_file_name: [*]const u8 = ""; + var current_file_name: []const u8 = ""; + var data_dir_name: []const u8 = ""; // bitcask:close(BitCaskHandle) Close a Bitcask data store and flush all pending writes (if any) to disk pub fn close() void { @@ -60,6 +66,19 @@ const BitCask = struct { current_file.close(); } + pub fn drain() BitcaskFileError!void { + std.debug.print("Draining bitcask...", .{}); + var bitcask_dir = std.fs.Dir.openDir(std.fs.cwd(), data_dir_name, .{.access_sub_paths=false}) catch |err| { + std.debug.print("Encountered error while opening data directory /{s}:\n\t{any}", .{data_dir_name, err}); + return BitcaskFileError.Unknown; + }; + + bitcask_dir.deleteTree("/") catch |err| { + std.debug.print("Encountered error while deleting data tree at /{s}:\n\t{any}", .{data_dir_name, err}); + return BitcaskFileError.Unknown; + }; + } + // From the Bitcask paper, the API should look something like this //** TODO @@ -81,14 +100,17 @@ const BitCask = struct { // → BitCaskHandle | {error, any()} The directory and all files in it must be readable by this process. pub fn open(directory_name: []const u8) (std.fs.File.OpenError || BitcaskFileError)!void { // init keyDir - keyDir = std.StringArrayHashMap(KeyDirEntry).init( + key_dir_map = std.StringArrayHashMap(KeyDirEntry).init( allocator, ); - const file_name = "cask1.db"; + const file_name = "cask1.db"; // TODO come up with a naming convention, like date format? or fun names :) current_file_name = file_name; // Open file std.debug.print("\nOpening bitcask in dir {s}/\n", .{directory_name}); var bitcask_dir = try std.fs.Dir.openDir(std.fs.cwd(), directory_name, .{.access_sub_paths=false}); + //if (bitcask_dir == std.fs.Dir.OpenError) + + data_dir_name = directory_name; // TODO get all files in cask and have some order to open the latest one var err_or_file = bitcask_dir.openFile(file_name, .{.mode=.read_write}); @@ -105,28 +127,53 @@ const BitCask = struct { return error.CannotInitialize; } -// std.debug.print("Current File open: {}\n", .{current_file}); + initKeyDir() catch |err| { + std.debug.print("\nEncountered error while loading bitcask file: {}\n", .{err}); + return BitcaskFileError.CannotInitialize; + }; + return; } // bitcask:get(BitCaskHandle, Key) Retrieve a value by key from a Bitcask datastore. // → not found | {ok, Value} - pub fn get(key: []const u8) error{OutOfMemory, NoCaskFound, Unknown}!CaskEntry { - std.debug.print("Getting value with key {s}\n", .{key}); - const optional_cask = keyDir.get(key); + pub fn get(key: []const u8) BitCaskError!struct{KV, []u8} { +// std.debug.print("Getting value with key {s}\n", .{key}); + const optional_cask = key_dir_map.get(key); if (optional_cask == null) { - return error.NoCaskFound; + return BitCaskError.KeyNotFound; } const cask = optional_cask.?; // shorthand for 'optional_cask orelse unreachable' - std.debug.print("Got cask from file {s}\n", .{cask.file}); - return cask; +// std.debug.print("Got cask from file {s}\n", .{cask.file_name}); +// std.debug.print("Current position: {any}\n", .{current_file.getPos()}); + const offset = @intCast(u64, cask.offset); +// std.debug.print("Seeking to {}\n", .{offset}); + current_file.seekTo(offset) catch |err| { + std.debug.print("Unexpected error while seeking to offset {any}\n\t{any}\n", .{offset, err}); + return BitCaskError.InternalError; + }; + var buffer = allocator.alloc(u8, cask.size) catch |err| { + std.debug.print("Unexpected error while allocating buffer\n\t{any}\n", .{err}); + return BitCaskError.InternalError; + }; + //defer allocator.free(buffer); + var cask_bytes: []u8 = buffer[0..cask.size]; + _ = current_file.read(cask_bytes) catch |err| { + std.debug.print("Unexpected error while reading file\n\t{any}\n", .{err}); + return BitCaskError.InternalError; + }; + const kv = deserializeCaskEntry(cask_bytes) catch |err| { + std.debug.print("Unexpected error while deserializing cask_bytes {s}\n\t{any}\n", .{cask_bytes, err}); + return BitCaskError.InternalError; + }; + + return .{kv, buffer}; // TODO get entry from db rather than return the keyDir entry } // bitcask:put(BitCaskHandle, Key, Value) Store a key and value in a Bitcask datastore. // → ok | {error, any()} - pub fn put(data: KV) BitcaskFileError!void { - + pub fn put(data: KV) !void { // const key_type: EntryType = EntryType.str; // const key_size = getEntrySize(data.key, key_type); // const val_type: EntryType = EntryType.str; @@ -134,19 +181,26 @@ const BitCask = struct { // - const entry_bytes = serializeCaskEntry(&allocator, data) catch |err| { + const entry_bytes = serializeCaskEntry(allocator, data) catch |err| { std.debug.print("Encountered unknown error while serializing bitcask: {any}\n", .{err}); return BitcaskFileError.Unknown; }; + const end = try current_file.getEndPos(); + try current_file.seekTo(end); std.debug.print("Writing data to cask: {}\n", .{data}); - const i = current_file.write(entry_bytes) catch |err| { + _ = current_file.write(entry_bytes) catch |err| { std.debug.print("Encountered unknown error while writing to bitcask: {any}\n", .{err}); return BitcaskFileError.Unknown; }; - std.debug.print("After write: {any}\n", .{i}); + //std.debug.print("After write: {any}\n", .{i}); + const key_dir: KeyDirEntry = .{ + .file_name = current_file_name, + .offset = end, + .size = entry_bytes.len + }; - //try keyDir.put("key", data.val); + try key_dir_map.put(data.key, key_dir); return; } @@ -181,32 +235,79 @@ const BitCask = struct { return error.NotImplemented; } - fn getEntrySize(data: *anyopaque, t: EntryType) u32 { - switch (t) { - .str => { - const ptr = @ptrCast([*]const u8, data); - return ptr.len; - }, - .int => { - return @sizeOf(i32); - }, - .flt => { - return @sizeOf(f64); - } +// fn getEntrySize(data: *anyopaque, t: EntryType) u32 { +// switch (t) { +// .str => { +// const ptr = @ptrCast([*]const u8, data); +// return ptr.len; +// }, +// .int => { +// return @sizeOf(i32); +// }, +// .flt => { +// return @sizeOf(f64); +// } +// } +// } + + fn initKeyDir() !void { + var offset: u64 = 0; + const end = try current_file.getEndPos(); + const len_2usize = @sizeOf(usize) * 2; + + while (offset < end) { + try current_file.seekTo(offset); + std.debug.print("At offset {any}\t->\t", .{offset}); + // create buffer for getting usize of key and val + var size_buffer = try allocator.alloc(u8, len_2usize); // SEGFAULT + defer allocator.free(size_buffer); + var size_bytes: []u8 = size_buffer[0..len_2usize]; + _ = try current_file.read(size_bytes); + + const kv_sizes = try getKeyAndValSize(size_bytes); + const key_size = kv_sizes[0]; + const val_size = kv_sizes[1]; + + var key_buffer = try allocator.alloc(u8, key_size); + defer allocator.free(key_buffer); + var key: []u8 = key_buffer[0..key_size]; + _ = try current_file.read(key); + + std.debug.print("Got key {s}\n", .{key}); + // Add keyDir to hash map + const key_dir: KeyDirEntry = .{ + .file_name = current_file_name, + .offset = offset, + .size = key_size + val_size + len_2usize + }; + try key_dir_map.put(key, key_dir); + + offset += key_dir.size; } } - fn serializeCaskEntry(aloc: *const mem.Allocator, entry: KV) error{OutOfMemory}![]u8 { + fn getKey() !void { - var serialized_data = try std.ArrayList(u8).initCapacity(aloc.*, entry.key.len + entry.val.len + 8); + } + + fn getKeyAndValSize(size_bytes: []u8) !struct{usize, usize}{ + const size_len = @sizeOf(usize); + const key_size = std.mem.bytesToValue(usize, size_bytes[0..size_len]); + const val_size = std.mem.bytesToValue(usize, size_bytes[size_len..size_len*2]); + return .{key_size, val_size}; + } + + fn serializeCaskEntry(aloc: mem.Allocator, entry: KV) error{OutOfMemory}![]u8 { + + var serialized_data = try std.ArrayList(u8).initCapacity(aloc, entry.key.len + entry.val.len + @sizeOf(usize) * 2); // Serialize u32 into 4 u8 bytes - const keyBytes = std.mem.toBytes(entry.key.len); - try serialized_data.appendSlice(&keyBytes); // Serialize u32 into 4 u8 bytes + var key_len_bytes = std.mem.toBytes(entry.key.len); + try serialized_data.appendSlice(&key_len_bytes); // Serialize u32 into 4 u8 bytes // Serialize u32 into 4 u8 bytes - const valBytes = std.mem.toBytes(entry.val.len); - try serialized_data.appendSlice(&valBytes); - //try serialized_data.append(@enumToInt(entry.valType)); + var val_len_bytes = std.mem.toBytes(entry.val.len); + try serialized_data.appendSlice(&val_len_bytes); + //try serialized_data.append(@enumToInt(entry.valTypkeye)); for (entry.key) |byte| { try serialized_data.append(byte); @@ -216,43 +317,25 @@ const BitCask = struct { try serialized_data.append(byte); } - // Convert the val any type to the type specified -// switch (entry.valType) { -// .str => { -// const str_val = @ptrCast([*]const u8, entry.val); -// for (str_val) |byte| { -// try serialized_data.append(byte); -// } -// }, -// .int => { -// const int_val = @ptrCast(*const i32, entry.val); -// try serialized_data.appendSlice(mem.asBytes(int_val)); -// }, -// .flt => { -// const flt_val = @ptrCast(*const f64, entry.val); -// try serialized_data.appendSlice(mem.asBytes(flt_val)); -// } -// } - return try serialized_data.toOwnedSlice(); } -// fn getValType(caskVal: any) u8 { -// switch (@TypeOf(caskVal)) { -// u8 => -// } -// } + fn deserializeCaskEntry(cask_bytes: []u8) !KV { + const size_len = @sizeOf(usize); + const key_size = std.mem.bytesToValue(usize, cask_bytes[0..size_len]); + const val_size = std.mem.bytesToValue(usize, cask_bytes[size_len..size_len*2]); +// std.debug.print("Got key size [{}] and val size [{}]\n", .{key_size, val_size}); + const key_bytes = cask_bytes[size_len*2..size_len*2 + key_size]; + const val_bytes = cask_bytes[size_len*2 + key_size..size_len*2 + key_size + val_size]; + +// kv.* = KV{.key = key_bytes, .val = val_bytes}; +// +// std.debug.print("{any}\n", .{kv}); + return .{.key = key_bytes, .val = val_bytes}; + } }; -// pub fn main() !void { -// const bc = BitCask; -// try bc.open("data"); -// defer bc.close(); -// -// try bc.put(.{.key = "id", .val = "abc123"}); -// } - test "Bitcask spec implementation: open" { const bc = BitCask; try bc.open("data"); @@ -265,17 +348,28 @@ test "Bitcask spec implementation: open" { test "Bitcask spec implementation: get" { const bc = BitCask; - //bc.init(); try bc.open("data"); - defer bc.close(); - _ = bc.get("key") catch |err| { - try expect(err == error.NoCaskFound); + + _ = bc.get("2") catch |err| { + try expect(err == error.KeyNotFound); }; - //try bc.put(); - const cask = try bc.get("key"); - try expect(std.mem.eql(u8, cask.file, "file.txt")); - try expect(cask.offset == 0); - try expect(cask.size == 0); + + //std.debug.print("On load: got kv back\nKey: {s}\tVal: {s}\n", .{res[0].key, res[0].val}); + + try bc.put(.{.key = "1", .val = "This is easy"}); + try bc.put(.{.key = "2", .val = "secret"}); + try bc.put(.{.key = "3", .val = "{\"name\":\"charlie\"}"}); + + const res_1 = try bc.get("1"); + + try expect(std.mem.eql(u8, res_1[0].key, "1")); + try expect(std.mem.eql(u8, res_1[0].val, "This is easy")); + + bc.close(); + + try bc.open("data"); + const res = try bc.get("2"); + std.debug.print("Got kv back\nKey: {s}\tVal: {s}\n", .{res[0].key, res[0].val}); } test "Bitcask spec implementation: put" { @@ -283,15 +377,22 @@ test "Bitcask spec implementation: put" { try bc.open("data"); defer bc.close(); - const key = "id"; - const val = "1"; - try bc.put(.{ - .key = key, - .val = val + .key = "id", + .val = "1" }); try bc.put(.{.key = "user1", .val = "likes apples so much"}); + + const res = bc.key_dir_map.get("user1"); + const key_dir = res[0]; + const buffer = res[1]; + defer bc.allocator.free(buffer); + const val = key_dir.?.size; + + std.debug.print("user1 entry has size {}\n", .{val}); + try expect(std.mem.eql(u8, key_dir.?.file_name, bc.current_file_name)); + try expect(val == 41); } test "Bitcask spec implementation: delete" { @@ -316,10 +417,36 @@ test "Bitcask spec implementation: merge" { test "Bitcask spec implementation: sync" { const bc = BitCask; - try bc.merge(); + try bc.sync(); } test "Bitcask spec implementation: close" { const bc = BitCask; - try bc.close(); + bc.close(); } + +test "Bitcask drain" { + const bc = BitCask; + try bc.open("data"); + try bc.put(.{.key = "id", .val = "1"}); + bc.close(); + try bc.drain(); +} +// Convert the val any type to the type specified +// switch (entry.valType) { +// .str => { +// const str_val = @ptrCast([*]const u8, entry.val); +// for (str_val) |byte| { +// try serialized_data.append(byte); +// } +// }, +// .int => { +// const int_val = @ptrCast(*const i32, entry.val); +// try serialized_data.appendSlice(mem.asBytes(int_val)); +// }, +// .flt => { +// const flt_val = @ptrCast(*const f64, entry.val); +// try serialized_data.appendSlice(mem.asBytes(flt_val)); +// } +// } +