edits
This commit is contained in:
parent
30352fd304
commit
4fb9934603
@ -11,9 +11,14 @@ const BitcaskFileError = error{
|
||||
Unknown
|
||||
};
|
||||
|
||||
const BitCaskError = error{
|
||||
KeyNotFound,
|
||||
InternalError,
|
||||
};
|
||||
|
||||
const CaskEntry = struct {
|
||||
keySize: u32,
|
||||
valSize: u32,
|
||||
keySize: usize,
|
||||
valSize: usize,
|
||||
valType: EntryType,
|
||||
key: []const u8,
|
||||
val: *const anyopaque,
|
||||
@ -35,8 +40,8 @@ const BitCask = struct {
|
||||
|
||||
const KeyDirEntry = struct {
|
||||
file_name: []const u8,
|
||||
offset: u4,
|
||||
size: u4,
|
||||
offset: usize,
|
||||
size: usize,
|
||||
};
|
||||
|
||||
|
||||
@ -49,9 +54,10 @@ const BitCask = struct {
|
||||
// TODO fix to a file descriptor or reference for fixed-size, may only need u4? depends on max file size I suppose
|
||||
// u4 = key size of 4.29GB
|
||||
|
||||
var keyDir: std.StringArrayHashMap(KeyDirEntry) = undefined;
|
||||
var key_dir_map: std.StringArrayHashMap(KeyDirEntry) = undefined;
|
||||
var current_file: std.fs.File = undefined;
|
||||
var current_file_name: [*]const u8 = "";
|
||||
var current_file_name: []const u8 = "";
|
||||
var data_dir_name: []const u8 = "";
|
||||
|
||||
// bitcask:close(BitCaskHandle) Close a Bitcask data store and flush all pending writes (if any) to disk
|
||||
pub fn close() void {
|
||||
@ -60,6 +66,19 @@ const BitCask = struct {
|
||||
current_file.close();
|
||||
}
|
||||
|
||||
pub fn drain() BitcaskFileError!void {
|
||||
std.debug.print("Draining bitcask...", .{});
|
||||
var bitcask_dir = std.fs.Dir.openDir(std.fs.cwd(), data_dir_name, .{.access_sub_paths=false}) catch |err| {
|
||||
std.debug.print("Encountered error while opening data directory /{s}:\n\t{any}", .{data_dir_name, err});
|
||||
return BitcaskFileError.Unknown;
|
||||
};
|
||||
|
||||
bitcask_dir.deleteTree("/") catch |err| {
|
||||
std.debug.print("Encountered error while deleting data tree at /{s}:\n\t{any}", .{data_dir_name, err});
|
||||
return BitcaskFileError.Unknown;
|
||||
};
|
||||
}
|
||||
|
||||
// From the Bitcask paper, the API should look something like this
|
||||
|
||||
//** TODO
|
||||
@ -81,14 +100,17 @@ const BitCask = struct {
|
||||
// → BitCaskHandle | {error, any()} The directory and all files in it must be readable by this process.
|
||||
pub fn open(directory_name: []const u8) (std.fs.File.OpenError || BitcaskFileError)!void {
|
||||
// init keyDir
|
||||
keyDir = std.StringArrayHashMap(KeyDirEntry).init(
|
||||
key_dir_map = std.StringArrayHashMap(KeyDirEntry).init(
|
||||
allocator,
|
||||
);
|
||||
const file_name = "cask1.db";
|
||||
const file_name = "cask1.db"; // TODO come up with a naming convention, like date format? or fun names :)
|
||||
current_file_name = file_name;
|
||||
// Open file
|
||||
std.debug.print("\nOpening bitcask in dir {s}/\n", .{directory_name});
|
||||
var bitcask_dir = try std.fs.Dir.openDir(std.fs.cwd(), directory_name, .{.access_sub_paths=false});
|
||||
//if (bitcask_dir == std.fs.Dir.OpenError)
|
||||
|
||||
data_dir_name = directory_name;
|
||||
// TODO get all files in cask and have some order to open the latest one
|
||||
var err_or_file = bitcask_dir.openFile(file_name, .{.mode=.read_write});
|
||||
|
||||
@ -105,28 +127,53 @@ const BitCask = struct {
|
||||
return error.CannotInitialize;
|
||||
}
|
||||
|
||||
// std.debug.print("Current File open: {}\n", .{current_file});
|
||||
initKeyDir() catch |err| {
|
||||
std.debug.print("\nEncountered error while loading bitcask file: {}\n", .{err});
|
||||
return BitcaskFileError.CannotInitialize;
|
||||
};
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
// bitcask:get(BitCaskHandle, Key) Retrieve a value by key from a Bitcask datastore.
|
||||
// → not found | {ok, Value}
|
||||
pub fn get(key: []const u8) error{OutOfMemory, NoCaskFound, Unknown}!CaskEntry {
|
||||
std.debug.print("Getting value with key {s}\n", .{key});
|
||||
const optional_cask = keyDir.get(key);
|
||||
pub fn get(key: []const u8) BitCaskError!struct{KV, []u8} {
|
||||
// std.debug.print("Getting value with key {s}\n", .{key});
|
||||
const optional_cask = key_dir_map.get(key);
|
||||
if (optional_cask == null) {
|
||||
return error.NoCaskFound;
|
||||
return BitCaskError.KeyNotFound;
|
||||
}
|
||||
const cask = optional_cask.?; // shorthand for 'optional_cask orelse unreachable'
|
||||
std.debug.print("Got cask from file {s}\n", .{cask.file});
|
||||
return cask;
|
||||
// std.debug.print("Got cask from file {s}\n", .{cask.file_name});
|
||||
// std.debug.print("Current position: {any}\n", .{current_file.getPos()});
|
||||
const offset = @intCast(u64, cask.offset);
|
||||
// std.debug.print("Seeking to {}\n", .{offset});
|
||||
current_file.seekTo(offset) catch |err| {
|
||||
std.debug.print("Unexpected error while seeking to offset {any}\n\t{any}\n", .{offset, err});
|
||||
return BitCaskError.InternalError;
|
||||
};
|
||||
var buffer = allocator.alloc(u8, cask.size) catch |err| {
|
||||
std.debug.print("Unexpected error while allocating buffer\n\t{any}\n", .{err});
|
||||
return BitCaskError.InternalError;
|
||||
};
|
||||
//defer allocator.free(buffer);
|
||||
var cask_bytes: []u8 = buffer[0..cask.size];
|
||||
_ = current_file.read(cask_bytes) catch |err| {
|
||||
std.debug.print("Unexpected error while reading file\n\t{any}\n", .{err});
|
||||
return BitCaskError.InternalError;
|
||||
};
|
||||
const kv = deserializeCaskEntry(cask_bytes) catch |err| {
|
||||
std.debug.print("Unexpected error while deserializing cask_bytes {s}\n\t{any}\n", .{cask_bytes, err});
|
||||
return BitCaskError.InternalError;
|
||||
};
|
||||
|
||||
return .{kv, buffer};
|
||||
// TODO get entry from db rather than return the keyDir entry
|
||||
}
|
||||
|
||||
// bitcask:put(BitCaskHandle, Key, Value) Store a key and value in a Bitcask datastore.
|
||||
// → ok | {error, any()}
|
||||
pub fn put(data: KV) BitcaskFileError!void {
|
||||
|
||||
pub fn put(data: KV) !void {
|
||||
// const key_type: EntryType = EntryType.str;
|
||||
// const key_size = getEntrySize(data.key, key_type);
|
||||
// const val_type: EntryType = EntryType.str;
|
||||
@ -134,19 +181,26 @@ const BitCask = struct {
|
||||
//
|
||||
|
||||
|
||||
const entry_bytes = serializeCaskEntry(&allocator, data) catch |err| {
|
||||
const entry_bytes = serializeCaskEntry(allocator, data) catch |err| {
|
||||
std.debug.print("Encountered unknown error while serializing bitcask: {any}\n", .{err});
|
||||
return BitcaskFileError.Unknown;
|
||||
};
|
||||
const end = try current_file.getEndPos();
|
||||
try current_file.seekTo(end);
|
||||
std.debug.print("Writing data to cask: {}\n", .{data});
|
||||
const i = current_file.write(entry_bytes) catch |err| {
|
||||
_ = current_file.write(entry_bytes) catch |err| {
|
||||
std.debug.print("Encountered unknown error while writing to bitcask: {any}\n", .{err});
|
||||
return BitcaskFileError.Unknown;
|
||||
};
|
||||
std.debug.print("After write: {any}\n", .{i});
|
||||
//std.debug.print("After write: {any}\n", .{i});
|
||||
|
||||
const key_dir: KeyDirEntry = .{
|
||||
.file_name = current_file_name,
|
||||
.offset = end,
|
||||
.size = entry_bytes.len
|
||||
};
|
||||
|
||||
//try keyDir.put("key", data.val);
|
||||
try key_dir_map.put(data.key, key_dir);
|
||||
|
||||
return;
|
||||
}
|
||||
@ -181,32 +235,79 @@ const BitCask = struct {
|
||||
return error.NotImplemented;
|
||||
}
|
||||
|
||||
fn getEntrySize(data: *anyopaque, t: EntryType) u32 {
|
||||
switch (t) {
|
||||
.str => {
|
||||
const ptr = @ptrCast([*]const u8, data);
|
||||
return ptr.len;
|
||||
},
|
||||
.int => {
|
||||
return @sizeOf(i32);
|
||||
},
|
||||
.flt => {
|
||||
return @sizeOf(f64);
|
||||
}
|
||||
// fn getEntrySize(data: *anyopaque, t: EntryType) u32 {
|
||||
// switch (t) {
|
||||
// .str => {
|
||||
// const ptr = @ptrCast([*]const u8, data);
|
||||
// return ptr.len;
|
||||
// },
|
||||
// .int => {
|
||||
// return @sizeOf(i32);
|
||||
// },
|
||||
// .flt => {
|
||||
// return @sizeOf(f64);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
fn initKeyDir() !void {
|
||||
var offset: u64 = 0;
|
||||
const end = try current_file.getEndPos();
|
||||
const len_2usize = @sizeOf(usize) * 2;
|
||||
|
||||
while (offset < end) {
|
||||
try current_file.seekTo(offset);
|
||||
std.debug.print("At offset {any}\t->\t", .{offset});
|
||||
// create buffer for getting usize of key and val
|
||||
var size_buffer = try allocator.alloc(u8, len_2usize); // SEGFAULT
|
||||
defer allocator.free(size_buffer);
|
||||
var size_bytes: []u8 = size_buffer[0..len_2usize];
|
||||
_ = try current_file.read(size_bytes);
|
||||
|
||||
const kv_sizes = try getKeyAndValSize(size_bytes);
|
||||
const key_size = kv_sizes[0];
|
||||
const val_size = kv_sizes[1];
|
||||
|
||||
var key_buffer = try allocator.alloc(u8, key_size);
|
||||
defer allocator.free(key_buffer);
|
||||
var key: []u8 = key_buffer[0..key_size];
|
||||
_ = try current_file.read(key);
|
||||
|
||||
std.debug.print("Got key {s}\n", .{key});
|
||||
// Add keyDir to hash map
|
||||
const key_dir: KeyDirEntry = .{
|
||||
.file_name = current_file_name,
|
||||
.offset = offset,
|
||||
.size = key_size + val_size + len_2usize
|
||||
};
|
||||
try key_dir_map.put(key, key_dir);
|
||||
|
||||
offset += key_dir.size;
|
||||
}
|
||||
}
|
||||
|
||||
fn serializeCaskEntry(aloc: *const mem.Allocator, entry: KV) error{OutOfMemory}![]u8 {
|
||||
fn getKey() !void {
|
||||
|
||||
var serialized_data = try std.ArrayList(u8).initCapacity(aloc.*, entry.key.len + entry.val.len + 8);
|
||||
}
|
||||
|
||||
fn getKeyAndValSize(size_bytes: []u8) !struct{usize, usize}{
|
||||
const size_len = @sizeOf(usize);
|
||||
const key_size = std.mem.bytesToValue(usize, size_bytes[0..size_len]);
|
||||
const val_size = std.mem.bytesToValue(usize, size_bytes[size_len..size_len*2]);
|
||||
return .{key_size, val_size};
|
||||
}
|
||||
|
||||
fn serializeCaskEntry(aloc: mem.Allocator, entry: KV) error{OutOfMemory}![]u8 {
|
||||
|
||||
var serialized_data = try std.ArrayList(u8).initCapacity(aloc, entry.key.len + entry.val.len + @sizeOf(usize) * 2);
|
||||
|
||||
// Serialize u32 into 4 u8 bytes
|
||||
const keyBytes = std.mem.toBytes(entry.key.len);
|
||||
try serialized_data.appendSlice(&keyBytes); // Serialize u32 into 4 u8 bytes
|
||||
var key_len_bytes = std.mem.toBytes(entry.key.len);
|
||||
try serialized_data.appendSlice(&key_len_bytes); // Serialize u32 into 4 u8 bytes
|
||||
// Serialize u32 into 4 u8 bytes
|
||||
const valBytes = std.mem.toBytes(entry.val.len);
|
||||
try serialized_data.appendSlice(&valBytes);
|
||||
//try serialized_data.append(@enumToInt(entry.valType));
|
||||
var val_len_bytes = std.mem.toBytes(entry.val.len);
|
||||
try serialized_data.appendSlice(&val_len_bytes);
|
||||
//try serialized_data.append(@enumToInt(entry.valTypkeye));
|
||||
|
||||
for (entry.key) |byte| {
|
||||
try serialized_data.append(byte);
|
||||
@ -216,43 +317,25 @@ const BitCask = struct {
|
||||
try serialized_data.append(byte);
|
||||
}
|
||||
|
||||
// Convert the val any type to the type specified
|
||||
// switch (entry.valType) {
|
||||
// .str => {
|
||||
// const str_val = @ptrCast([*]const u8, entry.val);
|
||||
// for (str_val) |byte| {
|
||||
// try serialized_data.append(byte);
|
||||
// }
|
||||
// },
|
||||
// .int => {
|
||||
// const int_val = @ptrCast(*const i32, entry.val);
|
||||
// try serialized_data.appendSlice(mem.asBytes(int_val));
|
||||
// },
|
||||
// .flt => {
|
||||
// const flt_val = @ptrCast(*const f64, entry.val);
|
||||
// try serialized_data.appendSlice(mem.asBytes(flt_val));
|
||||
// }
|
||||
// }
|
||||
|
||||
return try serialized_data.toOwnedSlice();
|
||||
}
|
||||
|
||||
// fn getValType(caskVal: any) u8 {
|
||||
// switch (@TypeOf(caskVal)) {
|
||||
// u8 =>
|
||||
// }
|
||||
// }
|
||||
fn deserializeCaskEntry(cask_bytes: []u8) !KV {
|
||||
const size_len = @sizeOf(usize);
|
||||
const key_size = std.mem.bytesToValue(usize, cask_bytes[0..size_len]);
|
||||
const val_size = std.mem.bytesToValue(usize, cask_bytes[size_len..size_len*2]);
|
||||
// std.debug.print("Got key size [{}] and val size [{}]\n", .{key_size, val_size});
|
||||
const key_bytes = cask_bytes[size_len*2..size_len*2 + key_size];
|
||||
const val_bytes = cask_bytes[size_len*2 + key_size..size_len*2 + key_size + val_size];
|
||||
|
||||
// kv.* = KV{.key = key_bytes, .val = val_bytes};
|
||||
//
|
||||
// std.debug.print("{any}\n", .{kv});
|
||||
return .{.key = key_bytes, .val = val_bytes};
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
// pub fn main() !void {
|
||||
// const bc = BitCask;
|
||||
// try bc.open("data");
|
||||
// defer bc.close();
|
||||
//
|
||||
// try bc.put(.{.key = "id", .val = "abc123"});
|
||||
// }
|
||||
|
||||
test "Bitcask spec implementation: open" {
|
||||
const bc = BitCask;
|
||||
try bc.open("data");
|
||||
@ -265,17 +348,28 @@ test "Bitcask spec implementation: open" {
|
||||
|
||||
test "Bitcask spec implementation: get" {
|
||||
const bc = BitCask;
|
||||
//bc.init();
|
||||
try bc.open("data");
|
||||
defer bc.close();
|
||||
_ = bc.get("key") catch |err| {
|
||||
try expect(err == error.NoCaskFound);
|
||||
|
||||
_ = bc.get("2") catch |err| {
|
||||
try expect(err == error.KeyNotFound);
|
||||
};
|
||||
//try bc.put();
|
||||
const cask = try bc.get("key");
|
||||
try expect(std.mem.eql(u8, cask.file, "file.txt"));
|
||||
try expect(cask.offset == 0);
|
||||
try expect(cask.size == 0);
|
||||
|
||||
//std.debug.print("On load: got kv back\nKey: {s}\tVal: {s}\n", .{res[0].key, res[0].val});
|
||||
|
||||
try bc.put(.{.key = "1", .val = "This is easy"});
|
||||
try bc.put(.{.key = "2", .val = "secret"});
|
||||
try bc.put(.{.key = "3", .val = "{\"name\":\"charlie\"}"});
|
||||
|
||||
const res_1 = try bc.get("1");
|
||||
|
||||
try expect(std.mem.eql(u8, res_1[0].key, "1"));
|
||||
try expect(std.mem.eql(u8, res_1[0].val, "This is easy"));
|
||||
|
||||
bc.close();
|
||||
|
||||
try bc.open("data");
|
||||
const res = try bc.get("2");
|
||||
std.debug.print("Got kv back\nKey: {s}\tVal: {s}\n", .{res[0].key, res[0].val});
|
||||
}
|
||||
|
||||
test "Bitcask spec implementation: put" {
|
||||
@ -283,15 +377,22 @@ test "Bitcask spec implementation: put" {
|
||||
try bc.open("data");
|
||||
defer bc.close();
|
||||
|
||||
const key = "id";
|
||||
const val = "1";
|
||||
|
||||
try bc.put(.{
|
||||
.key = key,
|
||||
.val = val
|
||||
.key = "id",
|
||||
.val = "1"
|
||||
});
|
||||
|
||||
try bc.put(.{.key = "user1", .val = "likes apples so much"});
|
||||
|
||||
const res = bc.key_dir_map.get("user1");
|
||||
const key_dir = res[0];
|
||||
const buffer = res[1];
|
||||
defer bc.allocator.free(buffer);
|
||||
const val = key_dir.?.size;
|
||||
|
||||
std.debug.print("user1 entry has size {}\n", .{val});
|
||||
try expect(std.mem.eql(u8, key_dir.?.file_name, bc.current_file_name));
|
||||
try expect(val == 41);
|
||||
}
|
||||
|
||||
test "Bitcask spec implementation: delete" {
|
||||
@ -316,10 +417,36 @@ test "Bitcask spec implementation: merge" {
|
||||
|
||||
test "Bitcask spec implementation: sync" {
|
||||
const bc = BitCask;
|
||||
try bc.merge();
|
||||
try bc.sync();
|
||||
}
|
||||
|
||||
test "Bitcask spec implementation: close" {
|
||||
const bc = BitCask;
|
||||
try bc.close();
|
||||
bc.close();
|
||||
}
|
||||
|
||||
test "Bitcask drain" {
|
||||
const bc = BitCask;
|
||||
try bc.open("data");
|
||||
try bc.put(.{.key = "id", .val = "1"});
|
||||
bc.close();
|
||||
try bc.drain();
|
||||
}
|
||||
// Convert the val any type to the type specified
|
||||
// switch (entry.valType) {
|
||||
// .str => {
|
||||
// const str_val = @ptrCast([*]const u8, entry.val);
|
||||
// for (str_val) |byte| {
|
||||
// try serialized_data.append(byte);
|
||||
// }
|
||||
// },
|
||||
// .int => {
|
||||
// const int_val = @ptrCast(*const i32, entry.val);
|
||||
// try serialized_data.appendSlice(mem.asBytes(int_val));
|
||||
// },
|
||||
// .flt => {
|
||||
// const flt_val = @ptrCast(*const f64, entry.val);
|
||||
// try serialized_data.appendSlice(mem.asBytes(flt_val));
|
||||
// }
|
||||
// }
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user