Working put test, passing, but not complete
This commit is contained in:
parent
e10a9e311f
commit
30352fd304
@ -4,4 +4,8 @@ Do it simply and do it well.
|
|||||||
|
|
||||||
## The Bitcask paper
|
## The Bitcask paper
|
||||||
|
|
||||||
https://riak.com/assets/bitcask-intro.pdf
|
https://riak.com/assets/bitcask-intro.pdf
|
||||||
|
|
||||||
|
## Implementation in Ruby
|
||||||
|
|
||||||
|
https://dinesh.wiki/posts/build-your-own-persistent-kv-store/
|
||||||
|
@ -1,17 +1,64 @@
|
|||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const expect = std.testing.expect;
|
const expect = std.testing.expect;
|
||||||
|
const Dir = std.fs.Dir;
|
||||||
|
const mem = std.mem;
|
||||||
|
|
||||||
pub const BitcaskFileError = error{
|
const BitcaskFileError = error{
|
||||||
|
CannotInitialize,
|
||||||
AccessDenied,
|
AccessDenied,
|
||||||
OutOfMemory,
|
OutOfMemory,
|
||||||
FileNotFound,
|
FileNotFound,
|
||||||
|
Unknown
|
||||||
};
|
};
|
||||||
|
|
||||||
pub const BitCask = struct {
|
const CaskEntry = struct {
|
||||||
|
keySize: u32,
|
||||||
|
valSize: u32,
|
||||||
|
valType: EntryType,
|
||||||
|
key: []const u8,
|
||||||
|
val: *const anyopaque,
|
||||||
|
};
|
||||||
|
|
||||||
|
const KV = struct {
|
||||||
|
key: []const u8,
|
||||||
|
val: []const u8
|
||||||
|
};
|
||||||
|
|
||||||
|
const EntryType = enum(u8) {
|
||||||
|
str = 0,
|
||||||
|
int = 1,
|
||||||
|
flt = 2,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
const BitCask = struct {
|
||||||
|
|
||||||
|
const KeyDirEntry = struct {
|
||||||
|
file_name: []const u8,
|
||||||
|
offset: u4,
|
||||||
|
size: u4,
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
const FILE_THRESHOLD_SIZE = 1000;
|
const FILE_THRESHOLD_SIZE = 1000;
|
||||||
|
|
||||||
|
var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
|
||||||
|
const allocator = arena.allocator();
|
||||||
|
|
||||||
|
|
||||||
// std.StringArrayHashMap
|
// TODO fix to a file descriptor or reference for fixed-size, may only need u4? depends on max file size I suppose
|
||||||
|
// u4 = key size of 4.29GB
|
||||||
|
|
||||||
|
var keyDir: std.StringArrayHashMap(KeyDirEntry) = undefined;
|
||||||
|
var current_file: std.fs.File = undefined;
|
||||||
|
var current_file_name: [*]const u8 = "";
|
||||||
|
|
||||||
|
// bitcask:close(BitCaskHandle) Close a Bitcask data store and flush all pending writes (if any) to disk
|
||||||
|
pub fn close() void {
|
||||||
|
std.debug.print("Closing bitcask...\n", .{});
|
||||||
|
arena.deinit();
|
||||||
|
current_file.close();
|
||||||
|
}
|
||||||
|
|
||||||
// From the Bitcask paper, the API should look something like this
|
// From the Bitcask paper, the API should look something like this
|
||||||
|
|
||||||
@ -29,66 +76,187 @@ pub const BitCask = struct {
|
|||||||
// writer and not just a reader) and sync on put (if this writer would
|
// writer and not just a reader) and sync on put (if this writer would
|
||||||
// prefer to sync the write file after every write operation).
|
// prefer to sync the write file after every write operation).
|
||||||
// The directory must be readable and writable by this process, and
|
// The directory must be readable and writable by this process, and
|
||||||
// only one process may open a Bitcask with read write at a time.
|
// only one process may open a Bitcask with read or write at a time.
|
||||||
// bitcask:open(DirectoryName) Open a new or existing Bitcask datastore for read-only access.
|
// bitcask:open(DirectoryName) Open a new or existing Bitcask datastore for read-only access.
|
||||||
// → BitCaskHandle | {error, any()} The directory and all files in it must be readable by this process.
|
// → BitCaskHandle | {error, any()} The directory and all files in it must be readable by this process.
|
||||||
fn open(directory_name: []const u8) BitcaskFileError!void {
|
pub fn open(directory_name: []const u8) (std.fs.File.OpenError || BitcaskFileError)!void {
|
||||||
std.debug.print("Opening bitcask in dir {s}\n", .{directory_name});
|
// init keyDir
|
||||||
return error.FileNotFound;
|
keyDir = std.StringArrayHashMap(KeyDirEntry).init(
|
||||||
|
allocator,
|
||||||
|
);
|
||||||
|
const file_name = "cask1.db";
|
||||||
|
current_file_name = file_name;
|
||||||
|
// Open file
|
||||||
|
std.debug.print("\nOpening bitcask in dir {s}/\n", .{directory_name});
|
||||||
|
var bitcask_dir = try std.fs.Dir.openDir(std.fs.cwd(), directory_name, .{.access_sub_paths=false});
|
||||||
|
// TODO get all files in cask and have some order to open the latest one
|
||||||
|
var err_or_file = bitcask_dir.openFile(file_name, .{.mode=.read_write});
|
||||||
|
|
||||||
|
// If the file does not exist, create it
|
||||||
|
if (err_or_file == std.fs.File.OpenError.FileNotFound) {
|
||||||
|
std.debug.print("No file found, creating new one\n", .{});
|
||||||
|
_ = try bitcask_dir.createFile(file_name, .{}); // catch this error, then I can remove error union
|
||||||
|
current_file = try bitcask_dir.openFile(file_name, .{.mode=.read_write});
|
||||||
|
} else if (@TypeOf(err_or_file) != std.fs.File.OpenError) {
|
||||||
|
std.debug.print("File {s} found\n", .{file_name});
|
||||||
|
current_file = try err_or_file;
|
||||||
|
} else {
|
||||||
|
std.debug.print("Encountered unknown error while opening bitcask: {any}\n", .{err_or_file});
|
||||||
|
return error.CannotInitialize;
|
||||||
|
}
|
||||||
|
|
||||||
|
// std.debug.print("Current File open: {}\n", .{current_file});
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// bitcask:get(BitCaskHandle, Key) Retrieve a value by key from a Bitcask datastore.
|
// bitcask:get(BitCaskHandle, Key) Retrieve a value by key from a Bitcask datastore.
|
||||||
// → not found | {ok, Value}
|
// → not found | {ok, Value}
|
||||||
fn get(key: []const u8) error{NotImplemented}!void {
|
pub fn get(key: []const u8) error{OutOfMemory, NoCaskFound, Unknown}!CaskEntry {
|
||||||
std.debug.print("Getting value with key {s}\n", .{key});
|
std.debug.print("Getting value with key {s}\n", .{key});
|
||||||
return error.NotImplemented;
|
const optional_cask = keyDir.get(key);
|
||||||
|
if (optional_cask == null) {
|
||||||
|
return error.NoCaskFound;
|
||||||
|
}
|
||||||
|
const cask = optional_cask.?; // shorthand for 'optional_cask orelse unreachable'
|
||||||
|
std.debug.print("Got cask from file {s}\n", .{cask.file});
|
||||||
|
return cask;
|
||||||
|
// TODO get entry from db rather than return the keyDir entry
|
||||||
}
|
}
|
||||||
|
|
||||||
// bitcask:put(BitCaskHandle, Key, Value) Store a key and value in a Bitcask datastore.
|
// bitcask:put(BitCaskHandle, Key, Value) Store a key and value in a Bitcask datastore.
|
||||||
// → ok | {error, any()}
|
// → ok | {error, any()}
|
||||||
fn put() error{NotImplemented}!void {
|
pub fn put(data: KV) BitcaskFileError!void {
|
||||||
return error.NotImplemented;
|
|
||||||
|
// const key_type: EntryType = EntryType.str;
|
||||||
|
// const key_size = getEntrySize(data.key, key_type);
|
||||||
|
// const val_type: EntryType = EntryType.str;
|
||||||
|
// const val_size = getEntrySize(data.val, val_type);
|
||||||
|
//
|
||||||
|
|
||||||
|
|
||||||
|
const entry_bytes = serializeCaskEntry(&allocator, data) catch |err| {
|
||||||
|
std.debug.print("Encountered unknown error while serializing bitcask: {any}\n", .{err});
|
||||||
|
return BitcaskFileError.Unknown;
|
||||||
|
};
|
||||||
|
std.debug.print("Writing data to cask: {}\n", .{data});
|
||||||
|
const i = current_file.write(entry_bytes) catch |err| {
|
||||||
|
std.debug.print("Encountered unknown error while writing to bitcask: {any}\n", .{err});
|
||||||
|
return BitcaskFileError.Unknown;
|
||||||
|
};
|
||||||
|
std.debug.print("After write: {any}\n", .{i});
|
||||||
|
|
||||||
|
|
||||||
|
//try keyDir.put("key", data.val);
|
||||||
|
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// bitcask:delete(BitCaskHandle, Key) Delete a key from a Bitcask datastore.
|
// bitcask:delete(BitCaskHandle, Key) Delete a key from a Bitcask datastore. Note that deletion is simply a write of a special tombstone value, which will be removed on the next merge
|
||||||
// → ok | {error, any()}
|
// → ok | {error, any()}
|
||||||
fn delete() error{NotImplemented}!void {
|
pub fn delete() error{NotImplemented}!void {
|
||||||
return error.NotImplemented;
|
return error.NotImplemented;
|
||||||
}
|
}
|
||||||
|
|
||||||
// bitcask:list keys(BitCaskHandle) List all keys in a Bitcask datastore.
|
// bitcask:list keys(BitCaskHandle) List all keys in a Bitcask datastore.
|
||||||
// → [Key] | {error, any()}
|
// → [Key] | {error, any()}
|
||||||
fn list() error{NotImplemented}!void {
|
pub fn list() error{NotImplemented}!void {
|
||||||
return error.NotImplemented;
|
return error.NotImplemented;
|
||||||
}
|
}
|
||||||
|
|
||||||
// bitcask:fold(BitCaskHandle,Fun,Acc0) Fold over all K/V pairs in a Bitcask datastore.
|
// bitcask:fold(BitCaskHandle,Fun,Acc0) Fold over all K/V pairs in a Bitcask datastore.
|
||||||
// → Acc Fun is expected to be of the form: F(K,V,Acc0) → Acc.
|
// → Acc Fun is expected to be of the form: F(K,V,Acc0) → Acc.
|
||||||
fn fold() error{NotImplemented}!void {
|
pub fn fold() error{NotImplemented}!void {
|
||||||
return error.NotImplemented;
|
return error.NotImplemented;
|
||||||
}
|
}
|
||||||
|
|
||||||
// bitcask:merge(DirectoryName) Merge several data files within a Bitcask datastore into a more
|
// bitcask:merge(DirectoryName) Merge several data files within a Bitcask datastore into a more
|
||||||
// → ok | {error, any()} compact form. Also, produce hintfiles for faster startup.
|
// → ok | {error, any()} compact form. Also, produce hintfiles for faster startup.
|
||||||
fn merge() error{NotImplemented}!void {
|
pub fn merge() error{NotImplemented}!void {
|
||||||
return error.NotImplemented;
|
return error.NotImplemented;
|
||||||
}
|
}
|
||||||
|
|
||||||
// bitcask:sync(BitCaskHandle) Force any writes to sync to disk.
|
// bitcask:sync(BitCaskHandle) Force any writes to sync to disk.
|
||||||
// → ok
|
// → ok
|
||||||
fn sync() error{NotImplemented}!void {
|
pub fn sync() error{NotImplemented}!void {
|
||||||
return error.NotImplemented;
|
return error.NotImplemented;
|
||||||
}
|
}
|
||||||
|
|
||||||
// bitcask:close(BitCaskHandle) Close a Bitcask data store and flush all pending writes (if any) to disk
|
fn getEntrySize(data: *anyopaque, t: EntryType) u32 {
|
||||||
fn close() error{NotImplemented}!void {
|
switch (t) {
|
||||||
return error.NotImplemented;
|
.str => {
|
||||||
|
const ptr = @ptrCast([*]const u8, data);
|
||||||
|
return ptr.len;
|
||||||
|
},
|
||||||
|
.int => {
|
||||||
|
return @sizeOf(i32);
|
||||||
|
},
|
||||||
|
.flt => {
|
||||||
|
return @sizeOf(f64);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn serializeCaskEntry(aloc: *const mem.Allocator, entry: KV) error{OutOfMemory}![]u8 {
|
||||||
|
|
||||||
|
var serialized_data = try std.ArrayList(u8).initCapacity(aloc.*, entry.key.len + entry.val.len + 8);
|
||||||
|
|
||||||
|
// Serialize u32 into 4 u8 bytes
|
||||||
|
const keyBytes = std.mem.toBytes(entry.key.len);
|
||||||
|
try serialized_data.appendSlice(&keyBytes); // Serialize u32 into 4 u8 bytes
|
||||||
|
// Serialize u32 into 4 u8 bytes
|
||||||
|
const valBytes = std.mem.toBytes(entry.val.len);
|
||||||
|
try serialized_data.appendSlice(&valBytes);
|
||||||
|
//try serialized_data.append(@enumToInt(entry.valType));
|
||||||
|
|
||||||
|
for (entry.key) |byte| {
|
||||||
|
try serialized_data.append(byte);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (entry.val) |byte| {
|
||||||
|
try serialized_data.append(byte);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert the val any type to the type specified
|
||||||
|
// switch (entry.valType) {
|
||||||
|
// .str => {
|
||||||
|
// const str_val = @ptrCast([*]const u8, entry.val);
|
||||||
|
// for (str_val) |byte| {
|
||||||
|
// try serialized_data.append(byte);
|
||||||
|
// }
|
||||||
|
// },
|
||||||
|
// .int => {
|
||||||
|
// const int_val = @ptrCast(*const i32, entry.val);
|
||||||
|
// try serialized_data.appendSlice(mem.asBytes(int_val));
|
||||||
|
// },
|
||||||
|
// .flt => {
|
||||||
|
// const flt_val = @ptrCast(*const f64, entry.val);
|
||||||
|
// try serialized_data.appendSlice(mem.asBytes(flt_val));
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
return try serialized_data.toOwnedSlice();
|
||||||
|
}
|
||||||
|
|
||||||
|
// fn getValType(caskVal: any) u8 {
|
||||||
|
// switch (@TypeOf(caskVal)) {
|
||||||
|
// u8 =>
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// pub fn main() !void {
|
||||||
|
// const bc = BitCask;
|
||||||
|
// try bc.open("data");
|
||||||
|
// defer bc.close();
|
||||||
|
//
|
||||||
|
// try bc.put(.{.key = "id", .val = "abc123"});
|
||||||
|
// }
|
||||||
|
|
||||||
test "Bitcask spec implementation: open" {
|
test "Bitcask spec implementation: open" {
|
||||||
const bc = BitCask;
|
const bc = BitCask;
|
||||||
try bc.open("File");
|
try bc.open("data");
|
||||||
|
defer bc.close();
|
||||||
// bc.open("File") catch |err| {
|
// bc.open("File") catch |err| {
|
||||||
// try expect(err == error.FileNotFound);
|
// try expect(err == error.FileNotFound);
|
||||||
// };
|
// };
|
||||||
@ -97,12 +265,33 @@ test "Bitcask spec implementation: open" {
|
|||||||
|
|
||||||
test "Bitcask spec implementation: get" {
|
test "Bitcask spec implementation: get" {
|
||||||
const bc = BitCask;
|
const bc = BitCask;
|
||||||
try bc.get("key");
|
//bc.init();
|
||||||
|
try bc.open("data");
|
||||||
|
defer bc.close();
|
||||||
|
_ = bc.get("key") catch |err| {
|
||||||
|
try expect(err == error.NoCaskFound);
|
||||||
|
};
|
||||||
|
//try bc.put();
|
||||||
|
const cask = try bc.get("key");
|
||||||
|
try expect(std.mem.eql(u8, cask.file, "file.txt"));
|
||||||
|
try expect(cask.offset == 0);
|
||||||
|
try expect(cask.size == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
test "Bitcask spec implementation: put" {
|
test "Bitcask spec implementation: put" {
|
||||||
const bc = BitCask;
|
const bc = BitCask;
|
||||||
try bc.put();
|
try bc.open("data");
|
||||||
|
defer bc.close();
|
||||||
|
|
||||||
|
const key = "id";
|
||||||
|
const val = "1";
|
||||||
|
|
||||||
|
try bc.put(.{
|
||||||
|
.key = key,
|
||||||
|
.val = val
|
||||||
|
});
|
||||||
|
|
||||||
|
try bc.put(.{.key = "user1", .val = "likes apples so much"});
|
||||||
}
|
}
|
||||||
|
|
||||||
test "Bitcask spec implementation: delete" {
|
test "Bitcask spec implementation: delete" {
|
||||||
|
Loading…
Reference in New Issue
Block a user