This commit is contained in:
Nathan Anderson 2023-04-12 17:04:27 -06:00
parent 30352fd304
commit 4fb9934603

View File

@ -11,9 +11,14 @@ const BitcaskFileError = error{
Unknown
};
const BitCaskError = error{
KeyNotFound,
InternalError,
};
const CaskEntry = struct {
keySize: u32,
valSize: u32,
keySize: usize,
valSize: usize,
valType: EntryType,
key: []const u8,
val: *const anyopaque,
@ -35,8 +40,8 @@ const BitCask = struct {
const KeyDirEntry = struct {
file_name: []const u8,
offset: u4,
size: u4,
offset: usize,
size: usize,
};
@ -49,9 +54,10 @@ const BitCask = struct {
// TODO fix to a file descriptor or reference for fixed-size, may only need u4? depends on max file size I suppose
// u4 = key size of 4.29GB
var keyDir: std.StringArrayHashMap(KeyDirEntry) = undefined;
var key_dir_map: std.StringArrayHashMap(KeyDirEntry) = undefined;
var current_file: std.fs.File = undefined;
var current_file_name: [*]const u8 = "";
var current_file_name: []const u8 = "";
var data_dir_name: []const u8 = "";
// bitcask:close(BitCaskHandle) Close a Bitcask data store and flush all pending writes (if any) to disk
pub fn close() void {
@ -60,6 +66,19 @@ const BitCask = struct {
current_file.close();
}
pub fn drain() BitcaskFileError!void {
std.debug.print("Draining bitcask...", .{});
var bitcask_dir = std.fs.Dir.openDir(std.fs.cwd(), data_dir_name, .{.access_sub_paths=false}) catch |err| {
std.debug.print("Encountered error while opening data directory /{s}:\n\t{any}", .{data_dir_name, err});
return BitcaskFileError.Unknown;
};
bitcask_dir.deleteTree("/") catch |err| {
std.debug.print("Encountered error while deleting data tree at /{s}:\n\t{any}", .{data_dir_name, err});
return BitcaskFileError.Unknown;
};
}
// From the Bitcask paper, the API should look something like this
//** TODO
@ -81,14 +100,17 @@ const BitCask = struct {
// BitCaskHandle | {error, any()} The directory and all files in it must be readable by this process.
pub fn open(directory_name: []const u8) (std.fs.File.OpenError || BitcaskFileError)!void {
// init keyDir
keyDir = std.StringArrayHashMap(KeyDirEntry).init(
key_dir_map = std.StringArrayHashMap(KeyDirEntry).init(
allocator,
);
const file_name = "cask1.db";
const file_name = "cask1.db"; // TODO come up with a naming convention, like date format? or fun names :)
current_file_name = file_name;
// Open file
std.debug.print("\nOpening bitcask in dir {s}/\n", .{directory_name});
var bitcask_dir = try std.fs.Dir.openDir(std.fs.cwd(), directory_name, .{.access_sub_paths=false});
//if (bitcask_dir == std.fs.Dir.OpenError)
data_dir_name = directory_name;
// TODO get all files in cask and have some order to open the latest one
var err_or_file = bitcask_dir.openFile(file_name, .{.mode=.read_write});
@ -105,28 +127,53 @@ const BitCask = struct {
return error.CannotInitialize;
}
// std.debug.print("Current File open: {}\n", .{current_file});
initKeyDir() catch |err| {
std.debug.print("\nEncountered error while loading bitcask file: {}\n", .{err});
return BitcaskFileError.CannotInitialize;
};
return;
}
// bitcask:get(BitCaskHandle, Key) Retrieve a value by key from a Bitcask datastore.
// not found | {ok, Value}
pub fn get(key: []const u8) error{OutOfMemory, NoCaskFound, Unknown}!CaskEntry {
std.debug.print("Getting value with key {s}\n", .{key});
const optional_cask = keyDir.get(key);
pub fn get(key: []const u8) BitCaskError!struct{KV, []u8} {
// std.debug.print("Getting value with key {s}\n", .{key});
const optional_cask = key_dir_map.get(key);
if (optional_cask == null) {
return error.NoCaskFound;
return BitCaskError.KeyNotFound;
}
const cask = optional_cask.?; // shorthand for 'optional_cask orelse unreachable'
std.debug.print("Got cask from file {s}\n", .{cask.file});
return cask;
// std.debug.print("Got cask from file {s}\n", .{cask.file_name});
// std.debug.print("Current position: {any}\n", .{current_file.getPos()});
const offset = @intCast(u64, cask.offset);
// std.debug.print("Seeking to {}\n", .{offset});
current_file.seekTo(offset) catch |err| {
std.debug.print("Unexpected error while seeking to offset {any}\n\t{any}\n", .{offset, err});
return BitCaskError.InternalError;
};
var buffer = allocator.alloc(u8, cask.size) catch |err| {
std.debug.print("Unexpected error while allocating buffer\n\t{any}\n", .{err});
return BitCaskError.InternalError;
};
//defer allocator.free(buffer);
var cask_bytes: []u8 = buffer[0..cask.size];
_ = current_file.read(cask_bytes) catch |err| {
std.debug.print("Unexpected error while reading file\n\t{any}\n", .{err});
return BitCaskError.InternalError;
};
const kv = deserializeCaskEntry(cask_bytes) catch |err| {
std.debug.print("Unexpected error while deserializing cask_bytes {s}\n\t{any}\n", .{cask_bytes, err});
return BitCaskError.InternalError;
};
return .{kv, buffer};
// TODO get entry from db rather than return the keyDir entry
}
// bitcask:put(BitCaskHandle, Key, Value) Store a key and value in a Bitcask datastore.
// ok | {error, any()}
pub fn put(data: KV) BitcaskFileError!void {
pub fn put(data: KV) !void {
// const key_type: EntryType = EntryType.str;
// const key_size = getEntrySize(data.key, key_type);
// const val_type: EntryType = EntryType.str;
@ -134,19 +181,26 @@ const BitCask = struct {
//
const entry_bytes = serializeCaskEntry(&allocator, data) catch |err| {
const entry_bytes = serializeCaskEntry(allocator, data) catch |err| {
std.debug.print("Encountered unknown error while serializing bitcask: {any}\n", .{err});
return BitcaskFileError.Unknown;
};
const end = try current_file.getEndPos();
try current_file.seekTo(end);
std.debug.print("Writing data to cask: {}\n", .{data});
const i = current_file.write(entry_bytes) catch |err| {
_ = current_file.write(entry_bytes) catch |err| {
std.debug.print("Encountered unknown error while writing to bitcask: {any}\n", .{err});
return BitcaskFileError.Unknown;
};
std.debug.print("After write: {any}\n", .{i});
//std.debug.print("After write: {any}\n", .{i});
const key_dir: KeyDirEntry = .{
.file_name = current_file_name,
.offset = end,
.size = entry_bytes.len
};
//try keyDir.put("key", data.val);
try key_dir_map.put(data.key, key_dir);
return;
}
@ -181,32 +235,79 @@ const BitCask = struct {
return error.NotImplemented;
}
fn getEntrySize(data: *anyopaque, t: EntryType) u32 {
switch (t) {
.str => {
const ptr = @ptrCast([*]const u8, data);
return ptr.len;
},
.int => {
return @sizeOf(i32);
},
.flt => {
return @sizeOf(f64);
}
// fn getEntrySize(data: *anyopaque, t: EntryType) u32 {
// switch (t) {
// .str => {
// const ptr = @ptrCast([*]const u8, data);
// return ptr.len;
// },
// .int => {
// return @sizeOf(i32);
// },
// .flt => {
// return @sizeOf(f64);
// }
// }
// }
fn initKeyDir() !void {
var offset: u64 = 0;
const end = try current_file.getEndPos();
const len_2usize = @sizeOf(usize) * 2;
while (offset < end) {
try current_file.seekTo(offset);
std.debug.print("At offset {any}\t->\t", .{offset});
// create buffer for getting usize of key and val
var size_buffer = try allocator.alloc(u8, len_2usize); // SEGFAULT
defer allocator.free(size_buffer);
var size_bytes: []u8 = size_buffer[0..len_2usize];
_ = try current_file.read(size_bytes);
const kv_sizes = try getKeyAndValSize(size_bytes);
const key_size = kv_sizes[0];
const val_size = kv_sizes[1];
var key_buffer = try allocator.alloc(u8, key_size);
defer allocator.free(key_buffer);
var key: []u8 = key_buffer[0..key_size];
_ = try current_file.read(key);
std.debug.print("Got key {s}\n", .{key});
// Add keyDir to hash map
const key_dir: KeyDirEntry = .{
.file_name = current_file_name,
.offset = offset,
.size = key_size + val_size + len_2usize
};
try key_dir_map.put(key, key_dir);
offset += key_dir.size;
}
}
fn serializeCaskEntry(aloc: *const mem.Allocator, entry: KV) error{OutOfMemory}![]u8 {
fn getKey() !void {
var serialized_data = try std.ArrayList(u8).initCapacity(aloc.*, entry.key.len + entry.val.len + 8);
}
fn getKeyAndValSize(size_bytes: []u8) !struct{usize, usize}{
const size_len = @sizeOf(usize);
const key_size = std.mem.bytesToValue(usize, size_bytes[0..size_len]);
const val_size = std.mem.bytesToValue(usize, size_bytes[size_len..size_len*2]);
return .{key_size, val_size};
}
fn serializeCaskEntry(aloc: mem.Allocator, entry: KV) error{OutOfMemory}![]u8 {
var serialized_data = try std.ArrayList(u8).initCapacity(aloc, entry.key.len + entry.val.len + @sizeOf(usize) * 2);
// Serialize u32 into 4 u8 bytes
const keyBytes = std.mem.toBytes(entry.key.len);
try serialized_data.appendSlice(&keyBytes); // Serialize u32 into 4 u8 bytes
var key_len_bytes = std.mem.toBytes(entry.key.len);
try serialized_data.appendSlice(&key_len_bytes); // Serialize u32 into 4 u8 bytes
// Serialize u32 into 4 u8 bytes
const valBytes = std.mem.toBytes(entry.val.len);
try serialized_data.appendSlice(&valBytes);
//try serialized_data.append(@enumToInt(entry.valType));
var val_len_bytes = std.mem.toBytes(entry.val.len);
try serialized_data.appendSlice(&val_len_bytes);
//try serialized_data.append(@enumToInt(entry.valTypkeye));
for (entry.key) |byte| {
try serialized_data.append(byte);
@ -216,43 +317,25 @@ const BitCask = struct {
try serialized_data.append(byte);
}
// Convert the val any type to the type specified
// switch (entry.valType) {
// .str => {
// const str_val = @ptrCast([*]const u8, entry.val);
// for (str_val) |byte| {
// try serialized_data.append(byte);
// }
// },
// .int => {
// const int_val = @ptrCast(*const i32, entry.val);
// try serialized_data.appendSlice(mem.asBytes(int_val));
// },
// .flt => {
// const flt_val = @ptrCast(*const f64, entry.val);
// try serialized_data.appendSlice(mem.asBytes(flt_val));
// }
// }
return try serialized_data.toOwnedSlice();
}
// fn getValType(caskVal: any) u8 {
// switch (@TypeOf(caskVal)) {
// u8 =>
// }
// }
fn deserializeCaskEntry(cask_bytes: []u8) !KV {
const size_len = @sizeOf(usize);
const key_size = std.mem.bytesToValue(usize, cask_bytes[0..size_len]);
const val_size = std.mem.bytesToValue(usize, cask_bytes[size_len..size_len*2]);
// std.debug.print("Got key size [{}] and val size [{}]\n", .{key_size, val_size});
const key_bytes = cask_bytes[size_len*2..size_len*2 + key_size];
const val_bytes = cask_bytes[size_len*2 + key_size..size_len*2 + key_size + val_size];
// kv.* = KV{.key = key_bytes, .val = val_bytes};
//
// std.debug.print("{any}\n", .{kv});
return .{.key = key_bytes, .val = val_bytes};
}
};
// pub fn main() !void {
// const bc = BitCask;
// try bc.open("data");
// defer bc.close();
//
// try bc.put(.{.key = "id", .val = "abc123"});
// }
test "Bitcask spec implementation: open" {
const bc = BitCask;
try bc.open("data");
@ -265,17 +348,28 @@ test "Bitcask spec implementation: open" {
test "Bitcask spec implementation: get" {
const bc = BitCask;
//bc.init();
try bc.open("data");
defer bc.close();
_ = bc.get("key") catch |err| {
try expect(err == error.NoCaskFound);
_ = bc.get("2") catch |err| {
try expect(err == error.KeyNotFound);
};
//try bc.put();
const cask = try bc.get("key");
try expect(std.mem.eql(u8, cask.file, "file.txt"));
try expect(cask.offset == 0);
try expect(cask.size == 0);
//std.debug.print("On load: got kv back\nKey: {s}\tVal: {s}\n", .{res[0].key, res[0].val});
try bc.put(.{.key = "1", .val = "This is easy"});
try bc.put(.{.key = "2", .val = "secret"});
try bc.put(.{.key = "3", .val = "{\"name\":\"charlie\"}"});
const res_1 = try bc.get("1");
try expect(std.mem.eql(u8, res_1[0].key, "1"));
try expect(std.mem.eql(u8, res_1[0].val, "This is easy"));
bc.close();
try bc.open("data");
const res = try bc.get("2");
std.debug.print("Got kv back\nKey: {s}\tVal: {s}\n", .{res[0].key, res[0].val});
}
test "Bitcask spec implementation: put" {
@ -283,15 +377,22 @@ test "Bitcask spec implementation: put" {
try bc.open("data");
defer bc.close();
const key = "id";
const val = "1";
try bc.put(.{
.key = key,
.val = val
.key = "id",
.val = "1"
});
try bc.put(.{.key = "user1", .val = "likes apples so much"});
const res = bc.key_dir_map.get("user1");
const key_dir = res[0];
const buffer = res[1];
defer bc.allocator.free(buffer);
const val = key_dir.?.size;
std.debug.print("user1 entry has size {}\n", .{val});
try expect(std.mem.eql(u8, key_dir.?.file_name, bc.current_file_name));
try expect(val == 41);
}
test "Bitcask spec implementation: delete" {
@ -316,10 +417,36 @@ test "Bitcask spec implementation: merge" {
test "Bitcask spec implementation: sync" {
const bc = BitCask;
try bc.merge();
try bc.sync();
}
test "Bitcask spec implementation: close" {
const bc = BitCask;
try bc.close();
bc.close();
}
test "Bitcask drain" {
const bc = BitCask;
try bc.open("data");
try bc.put(.{.key = "id", .val = "1"});
bc.close();
try bc.drain();
}
// Convert the val any type to the type specified
// switch (entry.valType) {
// .str => {
// const str_val = @ptrCast([*]const u8, entry.val);
// for (str_val) |byte| {
// try serialized_data.append(byte);
// }
// },
// .int => {
// const int_val = @ptrCast(*const i32, entry.val);
// try serialized_data.appendSlice(mem.asBytes(int_val));
// },
// .flt => {
// const flt_val = @ptrCast(*const f64, entry.val);
// try serialized_data.appendSlice(mem.asBytes(flt_val));
// }
// }