Working put test, passing, but not complete
This commit is contained in:
		
							parent
							
								
									e10a9e311f
								
							
						
					
					
						commit
						30352fd304
					
				@ -5,3 +5,7 @@ Do it simply and do it well.
 | 
			
		||||
## The Bitcask paper
 | 
			
		||||
 | 
			
		||||
https://riak.com/assets/bitcask-intro.pdf
 | 
			
		||||
 | 
			
		||||
## Implementation in Ruby
 | 
			
		||||
 | 
			
		||||
https://dinesh.wiki/posts/build-your-own-persistent-kv-store/
 | 
			
		||||
 | 
			
		||||
@ -1,17 +1,64 @@
 | 
			
		||||
const std = @import("std");
 | 
			
		||||
const expect = std.testing.expect;
 | 
			
		||||
const Dir = std.fs.Dir;
 | 
			
		||||
const mem = std.mem;
 | 
			
		||||
 | 
			
		||||
pub const BitcaskFileError = error{
 | 
			
		||||
const BitcaskFileError = error{
 | 
			
		||||
    CannotInitialize,
 | 
			
		||||
    AccessDenied,
 | 
			
		||||
    OutOfMemory,
 | 
			
		||||
    FileNotFound,
 | 
			
		||||
    Unknown
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
pub const BitCask = struct {
 | 
			
		||||
const CaskEntry = struct {
 | 
			
		||||
    keySize: u32,
 | 
			
		||||
    valSize: u32,
 | 
			
		||||
    valType: EntryType,
 | 
			
		||||
    key:     []const u8,
 | 
			
		||||
    val:     *const anyopaque,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
const KV = struct {
 | 
			
		||||
    key: []const u8,
 | 
			
		||||
    val: []const u8
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
const EntryType = enum(u8) {
 | 
			
		||||
    str = 0,
 | 
			
		||||
    int = 1,
 | 
			
		||||
    flt = 2,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
const BitCask = struct {
 | 
			
		||||
 | 
			
		||||
    const KeyDirEntry = struct {
 | 
			
		||||
        file_name: []const u8,
 | 
			
		||||
        offset: u4,
 | 
			
		||||
        size: u4,
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    const FILE_THRESHOLD_SIZE = 1000;
 | 
			
		||||
 | 
			
		||||
    var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
 | 
			
		||||
    const allocator = arena.allocator();
 | 
			
		||||
    
 | 
			
		||||
    // std.StringArrayHashMap
 | 
			
		||||
 | 
			
		||||
    // TODO fix to a file descriptor or reference for fixed-size, may only need u4? depends on max file size I suppose
 | 
			
		||||
    // u4 = key size of 4.29GB
 | 
			
		||||
 | 
			
		||||
    var keyDir: std.StringArrayHashMap(KeyDirEntry) = undefined;
 | 
			
		||||
    var current_file: std.fs.File = undefined;
 | 
			
		||||
    var current_file_name: [*]const u8 = "";
 | 
			
		||||
 | 
			
		||||
    // bitcask:close(BitCaskHandle) Close a Bitcask data store and flush all pending writes (if any) to disk
 | 
			
		||||
    pub fn close() void {
 | 
			
		||||
        std.debug.print("Closing bitcask...\n", .{});
 | 
			
		||||
        arena.deinit();
 | 
			
		||||
        current_file.close();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // From the Bitcask paper, the API should look something like this
 | 
			
		||||
 | 
			
		||||
@ -29,66 +76,187 @@ pub const BitCask = struct {
 | 
			
		||||
    // writer and not just a reader) and sync on put (if this writer would
 | 
			
		||||
    // prefer to sync the write file after every write operation).
 | 
			
		||||
    // The directory must be readable and writable by this process, and
 | 
			
		||||
    // only one process may open a Bitcask with read write at a time.
 | 
			
		||||
    // only one process may open a Bitcask with read or write at a time.
 | 
			
		||||
    // bitcask:open(DirectoryName) Open a new or existing Bitcask datastore for read-only access.
 | 
			
		||||
    // → BitCaskHandle | {error, any()} The directory and all files in it must be readable by this process.
 | 
			
		||||
    fn open(directory_name: []const u8) BitcaskFileError!void {
 | 
			
		||||
        std.debug.print("Opening bitcask in dir {s}\n", .{directory_name});
 | 
			
		||||
        return error.FileNotFound;
 | 
			
		||||
    pub fn open(directory_name: []const u8) (std.fs.File.OpenError || BitcaskFileError)!void {
 | 
			
		||||
        // init keyDir
 | 
			
		||||
        keyDir = std.StringArrayHashMap(KeyDirEntry).init(
 | 
			
		||||
            allocator,
 | 
			
		||||
        );
 | 
			
		||||
        const file_name = "cask1.db";
 | 
			
		||||
        current_file_name = file_name;
 | 
			
		||||
        // Open file
 | 
			
		||||
        std.debug.print("\nOpening bitcask in dir {s}/\n", .{directory_name});
 | 
			
		||||
        var bitcask_dir = try std.fs.Dir.openDir(std.fs.cwd(), directory_name, .{.access_sub_paths=false});
 | 
			
		||||
        // TODO get all files in cask and have some order to open the latest one
 | 
			
		||||
        var err_or_file = bitcask_dir.openFile(file_name, .{.mode=.read_write});
 | 
			
		||||
        
 | 
			
		||||
        // If the file does not exist, create it
 | 
			
		||||
        if (err_or_file == std.fs.File.OpenError.FileNotFound) {
 | 
			
		||||
            std.debug.print("No file found, creating new one\n", .{});
 | 
			
		||||
            _ = try bitcask_dir.createFile(file_name, .{});  // catch this error, then I can remove error union
 | 
			
		||||
            current_file = try bitcask_dir.openFile(file_name, .{.mode=.read_write});
 | 
			
		||||
        } else if (@TypeOf(err_or_file) != std.fs.File.OpenError) {
 | 
			
		||||
            std.debug.print("File {s} found\n", .{file_name});
 | 
			
		||||
            current_file = try err_or_file;
 | 
			
		||||
        } else {
 | 
			
		||||
            std.debug.print("Encountered unknown error while opening bitcask: {any}\n", .{err_or_file});
 | 
			
		||||
            return error.CannotInitialize;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
//         std.debug.print("Current File open: {}\n", .{current_file});
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // bitcask:get(BitCaskHandle, Key) Retrieve a value by key from a Bitcask datastore.
 | 
			
		||||
    // → not found | {ok, Value}
 | 
			
		||||
    fn get(key: []const u8) error{NotImplemented}!void {
 | 
			
		||||
    pub fn get(key: []const u8) error{OutOfMemory, NoCaskFound, Unknown}!CaskEntry {
 | 
			
		||||
        std.debug.print("Getting value with key {s}\n", .{key});
 | 
			
		||||
        return error.NotImplemented;
 | 
			
		||||
        const optional_cask = keyDir.get(key);
 | 
			
		||||
        if (optional_cask == null) {
 | 
			
		||||
            return error.NoCaskFound;
 | 
			
		||||
        }
 | 
			
		||||
        const cask = optional_cask.?; // shorthand for 'optional_cask orelse unreachable'
 | 
			
		||||
        std.debug.print("Got cask from file {s}\n", .{cask.file});
 | 
			
		||||
        return cask;
 | 
			
		||||
        // TODO get entry from db rather than return the keyDir entry
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // bitcask:put(BitCaskHandle, Key, Value) Store a key and value in a Bitcask datastore.
 | 
			
		||||
    // → ok | {error, any()}
 | 
			
		||||
    fn put() error{NotImplemented}!void {
 | 
			
		||||
        return error.NotImplemented;
 | 
			
		||||
    pub fn put(data: KV) BitcaskFileError!void {
 | 
			
		||||
 | 
			
		||||
//         const key_type: EntryType = EntryType.str;
 | 
			
		||||
//         const key_size = getEntrySize(data.key, key_type);
 | 
			
		||||
//         const val_type: EntryType = EntryType.str;
 | 
			
		||||
//         const val_size = getEntrySize(data.val, val_type);
 | 
			
		||||
//
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        const entry_bytes = serializeCaskEntry(&allocator, data) catch |err| {
 | 
			
		||||
            std.debug.print("Encountered unknown error while serializing bitcask: {any}\n", .{err});
 | 
			
		||||
            return BitcaskFileError.Unknown;
 | 
			
		||||
        };
 | 
			
		||||
        std.debug.print("Writing data to cask: {}\n", .{data});
 | 
			
		||||
        const i = current_file.write(entry_bytes) catch |err| {
 | 
			
		||||
            std.debug.print("Encountered unknown error while writing to bitcask: {any}\n", .{err});
 | 
			
		||||
            return BitcaskFileError.Unknown;
 | 
			
		||||
        };
 | 
			
		||||
        std.debug.print("After write: {any}\n", .{i});
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        //try keyDir.put("key", data.val);
 | 
			
		||||
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // bitcask:delete(BitCaskHandle, Key) Delete a key from a Bitcask datastore.
 | 
			
		||||
    // bitcask:delete(BitCaskHandle, Key) Delete a key from a Bitcask datastore.  Note that deletion is simply a write of a special tombstone value, which will be removed on the next merge
 | 
			
		||||
    // → ok | {error, any()}
 | 
			
		||||
    fn delete() error{NotImplemented}!void {
 | 
			
		||||
    pub fn delete() error{NotImplemented}!void {
 | 
			
		||||
        return error.NotImplemented;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // bitcask:list keys(BitCaskHandle) List all keys in a Bitcask datastore.
 | 
			
		||||
    // → [Key] | {error, any()}
 | 
			
		||||
    fn list() error{NotImplemented}!void {
 | 
			
		||||
    pub fn list() error{NotImplemented}!void {
 | 
			
		||||
        return error.NotImplemented;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // bitcask:fold(BitCaskHandle,Fun,Acc0) Fold over all K/V pairs in a Bitcask datastore.
 | 
			
		||||
    // → Acc Fun is expected to be of the form: F(K,V,Acc0) → Acc.
 | 
			
		||||
    fn fold() error{NotImplemented}!void {
 | 
			
		||||
    pub fn fold() error{NotImplemented}!void {
 | 
			
		||||
        return error.NotImplemented;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // bitcask:merge(DirectoryName) Merge several data files within a Bitcask datastore into a more
 | 
			
		||||
    // → ok | {error, any()} compact form. Also, produce hintfiles for faster startup.
 | 
			
		||||
    fn merge() error{NotImplemented}!void {
 | 
			
		||||
    pub fn merge() error{NotImplemented}!void {
 | 
			
		||||
        return error.NotImplemented;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // bitcask:sync(BitCaskHandle) Force any writes to sync to disk.
 | 
			
		||||
    // → ok
 | 
			
		||||
    fn sync() error{NotImplemented}!void {
 | 
			
		||||
    pub fn sync() error{NotImplemented}!void {
 | 
			
		||||
        return error.NotImplemented;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // bitcask:close(BitCaskHandle) Close a Bitcask data store and flush all pending writes (if any) to disk
 | 
			
		||||
    fn close() error{NotImplemented}!void {
 | 
			
		||||
        return error.NotImplemented;
 | 
			
		||||
    fn getEntrySize(data: *anyopaque, t: EntryType) u32 {
 | 
			
		||||
        switch (t) {
 | 
			
		||||
            .str => {
 | 
			
		||||
                const ptr = @ptrCast([*]const u8, data);
 | 
			
		||||
                return ptr.len;
 | 
			
		||||
            },
 | 
			
		||||
            .int => {
 | 
			
		||||
                return @sizeOf(i32);
 | 
			
		||||
            },
 | 
			
		||||
            .flt => {
 | 
			
		||||
                return @sizeOf(f64);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    fn serializeCaskEntry(aloc: *const mem.Allocator, entry: KV) error{OutOfMemory}![]u8 {
 | 
			
		||||
 | 
			
		||||
        var serialized_data = try std.ArrayList(u8).initCapacity(aloc.*, entry.key.len + entry.val.len + 8);
 | 
			
		||||
 | 
			
		||||
        // Serialize u32 into 4 u8 bytes
 | 
			
		||||
        const keyBytes = std.mem.toBytes(entry.key.len);
 | 
			
		||||
        try serialized_data.appendSlice(&keyBytes);        // Serialize u32 into 4 u8 bytes
 | 
			
		||||
        // Serialize u32 into 4 u8 bytes
 | 
			
		||||
        const valBytes = std.mem.toBytes(entry.val.len);
 | 
			
		||||
        try serialized_data.appendSlice(&valBytes);
 | 
			
		||||
        //try serialized_data.append(@enumToInt(entry.valType));
 | 
			
		||||
 | 
			
		||||
        for (entry.key) |byte| {
 | 
			
		||||
            try serialized_data.append(byte);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        for (entry.val) |byte| {
 | 
			
		||||
            try serialized_data.append(byte);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // Convert the val any type to the type specified
 | 
			
		||||
//         switch (entry.valType) {
 | 
			
		||||
//             .str => {
 | 
			
		||||
//                 const str_val = @ptrCast([*]const u8, entry.val);
 | 
			
		||||
//                 for (str_val) |byte| {
 | 
			
		||||
//                     try serialized_data.append(byte);
 | 
			
		||||
//                 }
 | 
			
		||||
//             },
 | 
			
		||||
//             .int => {
 | 
			
		||||
//                 const int_val = @ptrCast(*const i32, entry.val);
 | 
			
		||||
//                 try serialized_data.appendSlice(mem.asBytes(int_val));
 | 
			
		||||
//             },
 | 
			
		||||
//             .flt => {
 | 
			
		||||
//                 const flt_val = @ptrCast(*const f64, entry.val);
 | 
			
		||||
//                 try serialized_data.appendSlice(mem.asBytes(flt_val));
 | 
			
		||||
//             }
 | 
			
		||||
//         }
 | 
			
		||||
 | 
			
		||||
        return try serialized_data.toOwnedSlice();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
//     fn getValType(caskVal: any) u8 {
 | 
			
		||||
//         switch (@TypeOf(caskVal)) {
 | 
			
		||||
//             u8 =>
 | 
			
		||||
//         }
 | 
			
		||||
//     }
 | 
			
		||||
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// pub fn main() !void {
 | 
			
		||||
//     const bc = BitCask;
 | 
			
		||||
//     try bc.open("data");
 | 
			
		||||
//     defer bc.close();
 | 
			
		||||
//
 | 
			
		||||
//     try bc.put(.{.key = "id", .val = "abc123"});
 | 
			
		||||
// }
 | 
			
		||||
 | 
			
		||||
test "Bitcask spec implementation: open" {
 | 
			
		||||
    const bc = BitCask;
 | 
			
		||||
    try bc.open("File");
 | 
			
		||||
    try bc.open("data");
 | 
			
		||||
    defer bc.close();
 | 
			
		||||
    // bc.open("File") catch |err| {
 | 
			
		||||
    //     try expect(err == error.FileNotFound);
 | 
			
		||||
    // };
 | 
			
		||||
@ -97,12 +265,33 @@ test "Bitcask spec implementation: open" {
 | 
			
		||||
 | 
			
		||||
test "Bitcask spec implementation: get" {
 | 
			
		||||
    const bc = BitCask;
 | 
			
		||||
    try bc.get("key");
 | 
			
		||||
    //bc.init();
 | 
			
		||||
    try bc.open("data");
 | 
			
		||||
    defer bc.close();
 | 
			
		||||
    _ = bc.get("key") catch |err| {
 | 
			
		||||
        try expect(err == error.NoCaskFound);
 | 
			
		||||
    };
 | 
			
		||||
    //try bc.put();
 | 
			
		||||
    const cask = try bc.get("key");
 | 
			
		||||
    try expect(std.mem.eql(u8, cask.file, "file.txt"));
 | 
			
		||||
    try expect(cask.offset == 0);
 | 
			
		||||
    try expect(cask.size == 0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
test "Bitcask spec implementation: put" {
 | 
			
		||||
    const bc = BitCask;
 | 
			
		||||
    try bc.put();
 | 
			
		||||
    try bc.open("data");
 | 
			
		||||
    defer bc.close();
 | 
			
		||||
 | 
			
		||||
    const key = "id";
 | 
			
		||||
    const val = "1";
 | 
			
		||||
 | 
			
		||||
    try bc.put(.{
 | 
			
		||||
        .key = key,
 | 
			
		||||
        .val = val
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    try bc.put(.{.key = "user1", .val = "likes apples so much"});
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
test "Bitcask spec implementation: delete" {
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user