Working put test, passing, but not complete

This commit is contained in:
Nathan Anderson 2023-04-11 16:31:46 -06:00
parent e10a9e311f
commit 30352fd304
2 changed files with 217 additions and 24 deletions

View File

@ -4,4 +4,8 @@ Do it simply and do it well.
## The Bitcask paper
https://riak.com/assets/bitcask-intro.pdf
https://riak.com/assets/bitcask-intro.pdf
## Implementation in Ruby
https://dinesh.wiki/posts/build-your-own-persistent-kv-store/

View File

@ -1,17 +1,64 @@
const std = @import("std");
const expect = std.testing.expect;
const Dir = std.fs.Dir;
const mem = std.mem;
pub const BitcaskFileError = error{
const BitcaskFileError = error{
CannotInitialize,
AccessDenied,
OutOfMemory,
FileNotFound,
Unknown
};
pub const BitCask = struct {
const CaskEntry = struct {
keySize: u32,
valSize: u32,
valType: EntryType,
key: []const u8,
val: *const anyopaque,
};
const KV = struct {
key: []const u8,
val: []const u8
};
const EntryType = enum(u8) {
str = 0,
int = 1,
flt = 2,
};
const BitCask = struct {
const KeyDirEntry = struct {
file_name: []const u8,
offset: u4,
size: u4,
};
const FILE_THRESHOLD_SIZE = 1000;
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
const allocator = arena.allocator();
// std.StringArrayHashMap
// TODO fix to a file descriptor or reference for fixed-size, may only need u4? depends on max file size I suppose
// u4 = key size of 4.29GB
var keyDir: std.StringArrayHashMap(KeyDirEntry) = undefined;
var current_file: std.fs.File = undefined;
var current_file_name: [*]const u8 = "";
// bitcask:close(BitCaskHandle) Close a Bitcask data store and flush all pending writes (if any) to disk
pub fn close() void {
std.debug.print("Closing bitcask...\n", .{});
arena.deinit();
current_file.close();
}
// From the Bitcask paper, the API should look something like this
@ -29,66 +76,187 @@ pub const BitCask = struct {
// writer and not just a reader) and sync on put (if this writer would
// prefer to sync the write file after every write operation).
// The directory must be readable and writable by this process, and
// only one process may open a Bitcask with read write at a time.
// only one process may open a Bitcask with read or write at a time.
// bitcask:open(DirectoryName) Open a new or existing Bitcask datastore for read-only access.
// BitCaskHandle | {error, any()} The directory and all files in it must be readable by this process.
fn open(directory_name: []const u8) BitcaskFileError!void {
std.debug.print("Opening bitcask in dir {s}\n", .{directory_name});
return error.FileNotFound;
pub fn open(directory_name: []const u8) (std.fs.File.OpenError || BitcaskFileError)!void {
// init keyDir
keyDir = std.StringArrayHashMap(KeyDirEntry).init(
allocator,
);
const file_name = "cask1.db";
current_file_name = file_name;
// Open file
std.debug.print("\nOpening bitcask in dir {s}/\n", .{directory_name});
var bitcask_dir = try std.fs.Dir.openDir(std.fs.cwd(), directory_name, .{.access_sub_paths=false});
// TODO get all files in cask and have some order to open the latest one
var err_or_file = bitcask_dir.openFile(file_name, .{.mode=.read_write});
// If the file does not exist, create it
if (err_or_file == std.fs.File.OpenError.FileNotFound) {
std.debug.print("No file found, creating new one\n", .{});
_ = try bitcask_dir.createFile(file_name, .{}); // catch this error, then I can remove error union
current_file = try bitcask_dir.openFile(file_name, .{.mode=.read_write});
} else if (@TypeOf(err_or_file) != std.fs.File.OpenError) {
std.debug.print("File {s} found\n", .{file_name});
current_file = try err_or_file;
} else {
std.debug.print("Encountered unknown error while opening bitcask: {any}\n", .{err_or_file});
return error.CannotInitialize;
}
// std.debug.print("Current File open: {}\n", .{current_file});
return;
}
// bitcask:get(BitCaskHandle, Key) Retrieve a value by key from a Bitcask datastore.
// not found | {ok, Value}
fn get(key: []const u8) error{NotImplemented}!void {
pub fn get(key: []const u8) error{OutOfMemory, NoCaskFound, Unknown}!CaskEntry {
std.debug.print("Getting value with key {s}\n", .{key});
return error.NotImplemented;
const optional_cask = keyDir.get(key);
if (optional_cask == null) {
return error.NoCaskFound;
}
const cask = optional_cask.?; // shorthand for 'optional_cask orelse unreachable'
std.debug.print("Got cask from file {s}\n", .{cask.file});
return cask;
// TODO get entry from db rather than return the keyDir entry
}
// bitcask:put(BitCaskHandle, Key, Value) Store a key and value in a Bitcask datastore.
// ok | {error, any()}
fn put() error{NotImplemented}!void {
return error.NotImplemented;
pub fn put(data: KV) BitcaskFileError!void {
// const key_type: EntryType = EntryType.str;
// const key_size = getEntrySize(data.key, key_type);
// const val_type: EntryType = EntryType.str;
// const val_size = getEntrySize(data.val, val_type);
//
const entry_bytes = serializeCaskEntry(&allocator, data) catch |err| {
std.debug.print("Encountered unknown error while serializing bitcask: {any}\n", .{err});
return BitcaskFileError.Unknown;
};
std.debug.print("Writing data to cask: {}\n", .{data});
const i = current_file.write(entry_bytes) catch |err| {
std.debug.print("Encountered unknown error while writing to bitcask: {any}\n", .{err});
return BitcaskFileError.Unknown;
};
std.debug.print("After write: {any}\n", .{i});
//try keyDir.put("key", data.val);
return;
}
// bitcask:delete(BitCaskHandle, Key) Delete a key from a Bitcask datastore.
// bitcask:delete(BitCaskHandle, Key) Delete a key from a Bitcask datastore. Note that deletion is simply a write of a special tombstone value, which will be removed on the next merge
// ok | {error, any()}
fn delete() error{NotImplemented}!void {
pub fn delete() error{NotImplemented}!void {
return error.NotImplemented;
}
// bitcask:list keys(BitCaskHandle) List all keys in a Bitcask datastore.
// [Key] | {error, any()}
fn list() error{NotImplemented}!void {
pub fn list() error{NotImplemented}!void {
return error.NotImplemented;
}
// bitcask:fold(BitCaskHandle,Fun,Acc0) Fold over all K/V pairs in a Bitcask datastore.
// Acc Fun is expected to be of the form: F(K,V,Acc0) Acc.
fn fold() error{NotImplemented}!void {
pub fn fold() error{NotImplemented}!void {
return error.NotImplemented;
}
// bitcask:merge(DirectoryName) Merge several data files within a Bitcask datastore into a more
// ok | {error, any()} compact form. Also, produce hintfiles for faster startup.
fn merge() error{NotImplemented}!void {
pub fn merge() error{NotImplemented}!void {
return error.NotImplemented;
}
// bitcask:sync(BitCaskHandle) Force any writes to sync to disk.
// ok
fn sync() error{NotImplemented}!void {
pub fn sync() error{NotImplemented}!void {
return error.NotImplemented;
}
// bitcask:close(BitCaskHandle) Close a Bitcask data store and flush all pending writes (if any) to disk
fn close() error{NotImplemented}!void {
return error.NotImplemented;
fn getEntrySize(data: *anyopaque, t: EntryType) u32 {
switch (t) {
.str => {
const ptr = @ptrCast([*]const u8, data);
return ptr.len;
},
.int => {
return @sizeOf(i32);
},
.flt => {
return @sizeOf(f64);
}
}
}
fn serializeCaskEntry(aloc: *const mem.Allocator, entry: KV) error{OutOfMemory}![]u8 {
var serialized_data = try std.ArrayList(u8).initCapacity(aloc.*, entry.key.len + entry.val.len + 8);
// Serialize u32 into 4 u8 bytes
const keyBytes = std.mem.toBytes(entry.key.len);
try serialized_data.appendSlice(&keyBytes); // Serialize u32 into 4 u8 bytes
// Serialize u32 into 4 u8 bytes
const valBytes = std.mem.toBytes(entry.val.len);
try serialized_data.appendSlice(&valBytes);
//try serialized_data.append(@enumToInt(entry.valType));
for (entry.key) |byte| {
try serialized_data.append(byte);
}
for (entry.val) |byte| {
try serialized_data.append(byte);
}
// Convert the val any type to the type specified
// switch (entry.valType) {
// .str => {
// const str_val = @ptrCast([*]const u8, entry.val);
// for (str_val) |byte| {
// try serialized_data.append(byte);
// }
// },
// .int => {
// const int_val = @ptrCast(*const i32, entry.val);
// try serialized_data.appendSlice(mem.asBytes(int_val));
// },
// .flt => {
// const flt_val = @ptrCast(*const f64, entry.val);
// try serialized_data.appendSlice(mem.asBytes(flt_val));
// }
// }
return try serialized_data.toOwnedSlice();
}
// fn getValType(caskVal: any) u8 {
// switch (@TypeOf(caskVal)) {
// u8 =>
// }
// }
};
// pub fn main() !void {
// const bc = BitCask;
// try bc.open("data");
// defer bc.close();
//
// try bc.put(.{.key = "id", .val = "abc123"});
// }
test "Bitcask spec implementation: open" {
const bc = BitCask;
try bc.open("File");
try bc.open("data");
defer bc.close();
// bc.open("File") catch |err| {
// try expect(err == error.FileNotFound);
// };
@ -97,12 +265,33 @@ test "Bitcask spec implementation: open" {
test "Bitcask spec implementation: get" {
const bc = BitCask;
try bc.get("key");
//bc.init();
try bc.open("data");
defer bc.close();
_ = bc.get("key") catch |err| {
try expect(err == error.NoCaskFound);
};
//try bc.put();
const cask = try bc.get("key");
try expect(std.mem.eql(u8, cask.file, "file.txt"));
try expect(cask.offset == 0);
try expect(cask.size == 0);
}
test "Bitcask spec implementation: put" {
const bc = BitCask;
try bc.put();
try bc.open("data");
defer bc.close();
const key = "id";
const val = "1";
try bc.put(.{
.key = key,
.val = val
});
try bc.put(.{.key = "user1", .val = "likes apples so much"});
}
test "Bitcask spec implementation: delete" {