This commit is contained in:
Nathan Anderson 2023-04-12 17:04:27 -06:00
parent 30352fd304
commit 4fb9934603

View File

@ -11,9 +11,14 @@ const BitcaskFileError = error{
Unknown Unknown
}; };
const BitCaskError = error{
KeyNotFound,
InternalError,
};
const CaskEntry = struct { const CaskEntry = struct {
keySize: u32, keySize: usize,
valSize: u32, valSize: usize,
valType: EntryType, valType: EntryType,
key: []const u8, key: []const u8,
val: *const anyopaque, val: *const anyopaque,
@ -35,8 +40,8 @@ const BitCask = struct {
const KeyDirEntry = struct { const KeyDirEntry = struct {
file_name: []const u8, file_name: []const u8,
offset: u4, offset: usize,
size: u4, size: usize,
}; };
@ -49,9 +54,10 @@ const BitCask = struct {
// TODO fix to a file descriptor or reference for fixed-size, may only need u4? depends on max file size I suppose // TODO fix to a file descriptor or reference for fixed-size, may only need u4? depends on max file size I suppose
// u4 = key size of 4.29GB // u4 = key size of 4.29GB
var keyDir: std.StringArrayHashMap(KeyDirEntry) = undefined; var key_dir_map: std.StringArrayHashMap(KeyDirEntry) = undefined;
var current_file: std.fs.File = undefined; var current_file: std.fs.File = undefined;
var current_file_name: [*]const u8 = ""; var current_file_name: []const u8 = "";
var data_dir_name: []const u8 = "";
// bitcask:close(BitCaskHandle) Close a Bitcask data store and flush all pending writes (if any) to disk // bitcask:close(BitCaskHandle) Close a Bitcask data store and flush all pending writes (if any) to disk
pub fn close() void { pub fn close() void {
@ -60,6 +66,19 @@ const BitCask = struct {
current_file.close(); current_file.close();
} }
pub fn drain() BitcaskFileError!void {
std.debug.print("Draining bitcask...", .{});
var bitcask_dir = std.fs.Dir.openDir(std.fs.cwd(), data_dir_name, .{.access_sub_paths=false}) catch |err| {
std.debug.print("Encountered error while opening data directory /{s}:\n\t{any}", .{data_dir_name, err});
return BitcaskFileError.Unknown;
};
bitcask_dir.deleteTree("/") catch |err| {
std.debug.print("Encountered error while deleting data tree at /{s}:\n\t{any}", .{data_dir_name, err});
return BitcaskFileError.Unknown;
};
}
// From the Bitcask paper, the API should look something like this // From the Bitcask paper, the API should look something like this
//** TODO //** TODO
@ -81,14 +100,17 @@ const BitCask = struct {
// BitCaskHandle | {error, any()} The directory and all files in it must be readable by this process. // BitCaskHandle | {error, any()} The directory and all files in it must be readable by this process.
pub fn open(directory_name: []const u8) (std.fs.File.OpenError || BitcaskFileError)!void { pub fn open(directory_name: []const u8) (std.fs.File.OpenError || BitcaskFileError)!void {
// init keyDir // init keyDir
keyDir = std.StringArrayHashMap(KeyDirEntry).init( key_dir_map = std.StringArrayHashMap(KeyDirEntry).init(
allocator, allocator,
); );
const file_name = "cask1.db"; const file_name = "cask1.db"; // TODO come up with a naming convention, like date format? or fun names :)
current_file_name = file_name; current_file_name = file_name;
// Open file // Open file
std.debug.print("\nOpening bitcask in dir {s}/\n", .{directory_name}); std.debug.print("\nOpening bitcask in dir {s}/\n", .{directory_name});
var bitcask_dir = try std.fs.Dir.openDir(std.fs.cwd(), directory_name, .{.access_sub_paths=false}); var bitcask_dir = try std.fs.Dir.openDir(std.fs.cwd(), directory_name, .{.access_sub_paths=false});
//if (bitcask_dir == std.fs.Dir.OpenError)
data_dir_name = directory_name;
// TODO get all files in cask and have some order to open the latest one // TODO get all files in cask and have some order to open the latest one
var err_or_file = bitcask_dir.openFile(file_name, .{.mode=.read_write}); var err_or_file = bitcask_dir.openFile(file_name, .{.mode=.read_write});
@ -105,28 +127,53 @@ const BitCask = struct {
return error.CannotInitialize; return error.CannotInitialize;
} }
// std.debug.print("Current File open: {}\n", .{current_file}); initKeyDir() catch |err| {
std.debug.print("\nEncountered error while loading bitcask file: {}\n", .{err});
return BitcaskFileError.CannotInitialize;
};
return; return;
} }
// bitcask:get(BitCaskHandle, Key) Retrieve a value by key from a Bitcask datastore. // bitcask:get(BitCaskHandle, Key) Retrieve a value by key from a Bitcask datastore.
// not found | {ok, Value} // not found | {ok, Value}
pub fn get(key: []const u8) error{OutOfMemory, NoCaskFound, Unknown}!CaskEntry { pub fn get(key: []const u8) BitCaskError!struct{KV, []u8} {
std.debug.print("Getting value with key {s}\n", .{key}); // std.debug.print("Getting value with key {s}\n", .{key});
const optional_cask = keyDir.get(key); const optional_cask = key_dir_map.get(key);
if (optional_cask == null) { if (optional_cask == null) {
return error.NoCaskFound; return BitCaskError.KeyNotFound;
} }
const cask = optional_cask.?; // shorthand for 'optional_cask orelse unreachable' const cask = optional_cask.?; // shorthand for 'optional_cask orelse unreachable'
std.debug.print("Got cask from file {s}\n", .{cask.file}); // std.debug.print("Got cask from file {s}\n", .{cask.file_name});
return cask; // std.debug.print("Current position: {any}\n", .{current_file.getPos()});
const offset = @intCast(u64, cask.offset);
// std.debug.print("Seeking to {}\n", .{offset});
current_file.seekTo(offset) catch |err| {
std.debug.print("Unexpected error while seeking to offset {any}\n\t{any}\n", .{offset, err});
return BitCaskError.InternalError;
};
var buffer = allocator.alloc(u8, cask.size) catch |err| {
std.debug.print("Unexpected error while allocating buffer\n\t{any}\n", .{err});
return BitCaskError.InternalError;
};
//defer allocator.free(buffer);
var cask_bytes: []u8 = buffer[0..cask.size];
_ = current_file.read(cask_bytes) catch |err| {
std.debug.print("Unexpected error while reading file\n\t{any}\n", .{err});
return BitCaskError.InternalError;
};
const kv = deserializeCaskEntry(cask_bytes) catch |err| {
std.debug.print("Unexpected error while deserializing cask_bytes {s}\n\t{any}\n", .{cask_bytes, err});
return BitCaskError.InternalError;
};
return .{kv, buffer};
// TODO get entry from db rather than return the keyDir entry // TODO get entry from db rather than return the keyDir entry
} }
// bitcask:put(BitCaskHandle, Key, Value) Store a key and value in a Bitcask datastore. // bitcask:put(BitCaskHandle, Key, Value) Store a key and value in a Bitcask datastore.
// ok | {error, any()} // ok | {error, any()}
pub fn put(data: KV) BitcaskFileError!void { pub fn put(data: KV) !void {
// const key_type: EntryType = EntryType.str; // const key_type: EntryType = EntryType.str;
// const key_size = getEntrySize(data.key, key_type); // const key_size = getEntrySize(data.key, key_type);
// const val_type: EntryType = EntryType.str; // const val_type: EntryType = EntryType.str;
@ -134,19 +181,26 @@ const BitCask = struct {
// //
const entry_bytes = serializeCaskEntry(&allocator, data) catch |err| { const entry_bytes = serializeCaskEntry(allocator, data) catch |err| {
std.debug.print("Encountered unknown error while serializing bitcask: {any}\n", .{err}); std.debug.print("Encountered unknown error while serializing bitcask: {any}\n", .{err});
return BitcaskFileError.Unknown; return BitcaskFileError.Unknown;
}; };
const end = try current_file.getEndPos();
try current_file.seekTo(end);
std.debug.print("Writing data to cask: {}\n", .{data}); std.debug.print("Writing data to cask: {}\n", .{data});
const i = current_file.write(entry_bytes) catch |err| { _ = current_file.write(entry_bytes) catch |err| {
std.debug.print("Encountered unknown error while writing to bitcask: {any}\n", .{err}); std.debug.print("Encountered unknown error while writing to bitcask: {any}\n", .{err});
return BitcaskFileError.Unknown; return BitcaskFileError.Unknown;
}; };
std.debug.print("After write: {any}\n", .{i}); //std.debug.print("After write: {any}\n", .{i});
const key_dir: KeyDirEntry = .{
.file_name = current_file_name,
.offset = end,
.size = entry_bytes.len
};
//try keyDir.put("key", data.val); try key_dir_map.put(data.key, key_dir);
return; return;
} }
@ -181,32 +235,79 @@ const BitCask = struct {
return error.NotImplemented; return error.NotImplemented;
} }
fn getEntrySize(data: *anyopaque, t: EntryType) u32 { // fn getEntrySize(data: *anyopaque, t: EntryType) u32 {
switch (t) { // switch (t) {
.str => { // .str => {
const ptr = @ptrCast([*]const u8, data); // const ptr = @ptrCast([*]const u8, data);
return ptr.len; // return ptr.len;
}, // },
.int => { // .int => {
return @sizeOf(i32); // return @sizeOf(i32);
}, // },
.flt => { // .flt => {
return @sizeOf(f64); // return @sizeOf(f64);
} // }
// }
// }
fn initKeyDir() !void {
var offset: u64 = 0;
const end = try current_file.getEndPos();
const len_2usize = @sizeOf(usize) * 2;
while (offset < end) {
try current_file.seekTo(offset);
std.debug.print("At offset {any}\t->\t", .{offset});
// create buffer for getting usize of key and val
var size_buffer = try allocator.alloc(u8, len_2usize); // SEGFAULT
defer allocator.free(size_buffer);
var size_bytes: []u8 = size_buffer[0..len_2usize];
_ = try current_file.read(size_bytes);
const kv_sizes = try getKeyAndValSize(size_bytes);
const key_size = kv_sizes[0];
const val_size = kv_sizes[1];
var key_buffer = try allocator.alloc(u8, key_size);
defer allocator.free(key_buffer);
var key: []u8 = key_buffer[0..key_size];
_ = try current_file.read(key);
std.debug.print("Got key {s}\n", .{key});
// Add keyDir to hash map
const key_dir: KeyDirEntry = .{
.file_name = current_file_name,
.offset = offset,
.size = key_size + val_size + len_2usize
};
try key_dir_map.put(key, key_dir);
offset += key_dir.size;
} }
} }
fn serializeCaskEntry(aloc: *const mem.Allocator, entry: KV) error{OutOfMemory}![]u8 { fn getKey() !void {
var serialized_data = try std.ArrayList(u8).initCapacity(aloc.*, entry.key.len + entry.val.len + 8); }
fn getKeyAndValSize(size_bytes: []u8) !struct{usize, usize}{
const size_len = @sizeOf(usize);
const key_size = std.mem.bytesToValue(usize, size_bytes[0..size_len]);
const val_size = std.mem.bytesToValue(usize, size_bytes[size_len..size_len*2]);
return .{key_size, val_size};
}
fn serializeCaskEntry(aloc: mem.Allocator, entry: KV) error{OutOfMemory}![]u8 {
var serialized_data = try std.ArrayList(u8).initCapacity(aloc, entry.key.len + entry.val.len + @sizeOf(usize) * 2);
// Serialize u32 into 4 u8 bytes // Serialize u32 into 4 u8 bytes
const keyBytes = std.mem.toBytes(entry.key.len); var key_len_bytes = std.mem.toBytes(entry.key.len);
try serialized_data.appendSlice(&keyBytes); // Serialize u32 into 4 u8 bytes try serialized_data.appendSlice(&key_len_bytes); // Serialize u32 into 4 u8 bytes
// Serialize u32 into 4 u8 bytes // Serialize u32 into 4 u8 bytes
const valBytes = std.mem.toBytes(entry.val.len); var val_len_bytes = std.mem.toBytes(entry.val.len);
try serialized_data.appendSlice(&valBytes); try serialized_data.appendSlice(&val_len_bytes);
//try serialized_data.append(@enumToInt(entry.valType)); //try serialized_data.append(@enumToInt(entry.valTypkeye));
for (entry.key) |byte| { for (entry.key) |byte| {
try serialized_data.append(byte); try serialized_data.append(byte);
@ -216,43 +317,25 @@ const BitCask = struct {
try serialized_data.append(byte); try serialized_data.append(byte);
} }
// Convert the val any type to the type specified
// switch (entry.valType) {
// .str => {
// const str_val = @ptrCast([*]const u8, entry.val);
// for (str_val) |byte| {
// try serialized_data.append(byte);
// }
// },
// .int => {
// const int_val = @ptrCast(*const i32, entry.val);
// try serialized_data.appendSlice(mem.asBytes(int_val));
// },
// .flt => {
// const flt_val = @ptrCast(*const f64, entry.val);
// try serialized_data.appendSlice(mem.asBytes(flt_val));
// }
// }
return try serialized_data.toOwnedSlice(); return try serialized_data.toOwnedSlice();
} }
// fn getValType(caskVal: any) u8 { fn deserializeCaskEntry(cask_bytes: []u8) !KV {
// switch (@TypeOf(caskVal)) { const size_len = @sizeOf(usize);
// u8 => const key_size = std.mem.bytesToValue(usize, cask_bytes[0..size_len]);
// } const val_size = std.mem.bytesToValue(usize, cask_bytes[size_len..size_len*2]);
// } // std.debug.print("Got key size [{}] and val size [{}]\n", .{key_size, val_size});
const key_bytes = cask_bytes[size_len*2..size_len*2 + key_size];
const val_bytes = cask_bytes[size_len*2 + key_size..size_len*2 + key_size + val_size];
// kv.* = KV{.key = key_bytes, .val = val_bytes};
//
// std.debug.print("{any}\n", .{kv});
return .{.key = key_bytes, .val = val_bytes};
}
}; };
// pub fn main() !void {
// const bc = BitCask;
// try bc.open("data");
// defer bc.close();
//
// try bc.put(.{.key = "id", .val = "abc123"});
// }
test "Bitcask spec implementation: open" { test "Bitcask spec implementation: open" {
const bc = BitCask; const bc = BitCask;
try bc.open("data"); try bc.open("data");
@ -265,17 +348,28 @@ test "Bitcask spec implementation: open" {
test "Bitcask spec implementation: get" { test "Bitcask spec implementation: get" {
const bc = BitCask; const bc = BitCask;
//bc.init();
try bc.open("data"); try bc.open("data");
defer bc.close();
_ = bc.get("key") catch |err| { _ = bc.get("2") catch |err| {
try expect(err == error.NoCaskFound); try expect(err == error.KeyNotFound);
}; };
//try bc.put();
const cask = try bc.get("key"); //std.debug.print("On load: got kv back\nKey: {s}\tVal: {s}\n", .{res[0].key, res[0].val});
try expect(std.mem.eql(u8, cask.file, "file.txt"));
try expect(cask.offset == 0); try bc.put(.{.key = "1", .val = "This is easy"});
try expect(cask.size == 0); try bc.put(.{.key = "2", .val = "secret"});
try bc.put(.{.key = "3", .val = "{\"name\":\"charlie\"}"});
const res_1 = try bc.get("1");
try expect(std.mem.eql(u8, res_1[0].key, "1"));
try expect(std.mem.eql(u8, res_1[0].val, "This is easy"));
bc.close();
try bc.open("data");
const res = try bc.get("2");
std.debug.print("Got kv back\nKey: {s}\tVal: {s}\n", .{res[0].key, res[0].val});
} }
test "Bitcask spec implementation: put" { test "Bitcask spec implementation: put" {
@ -283,15 +377,22 @@ test "Bitcask spec implementation: put" {
try bc.open("data"); try bc.open("data");
defer bc.close(); defer bc.close();
const key = "id";
const val = "1";
try bc.put(.{ try bc.put(.{
.key = key, .key = "id",
.val = val .val = "1"
}); });
try bc.put(.{.key = "user1", .val = "likes apples so much"}); try bc.put(.{.key = "user1", .val = "likes apples so much"});
const res = bc.key_dir_map.get("user1");
const key_dir = res[0];
const buffer = res[1];
defer bc.allocator.free(buffer);
const val = key_dir.?.size;
std.debug.print("user1 entry has size {}\n", .{val});
try expect(std.mem.eql(u8, key_dir.?.file_name, bc.current_file_name));
try expect(val == 41);
} }
test "Bitcask spec implementation: delete" { test "Bitcask spec implementation: delete" {
@ -316,10 +417,36 @@ test "Bitcask spec implementation: merge" {
test "Bitcask spec implementation: sync" { test "Bitcask spec implementation: sync" {
const bc = BitCask; const bc = BitCask;
try bc.merge(); try bc.sync();
} }
test "Bitcask spec implementation: close" { test "Bitcask spec implementation: close" {
const bc = BitCask; const bc = BitCask;
try bc.close(); bc.close();
} }
test "Bitcask drain" {
const bc = BitCask;
try bc.open("data");
try bc.put(.{.key = "id", .val = "1"});
bc.close();
try bc.drain();
}
// Convert the val any type to the type specified
// switch (entry.valType) {
// .str => {
// const str_val = @ptrCast([*]const u8, entry.val);
// for (str_val) |byte| {
// try serialized_data.append(byte);
// }
// },
// .int => {
// const int_val = @ptrCast(*const i32, entry.val);
// try serialized_data.appendSlice(mem.asBytes(int_val));
// },
// .flt => {
// const flt_val = @ptrCast(*const f64, entry.val);
// try serialized_data.appendSlice(mem.asBytes(flt_val));
// }
// }