Working put test, passing, but not complete
This commit is contained in:
		
							parent
							
								
									e10a9e311f
								
							
						
					
					
						commit
						30352fd304
					
				@ -5,3 +5,7 @@ Do it simply and do it well.
 | 
				
			|||||||
## The Bitcask paper
 | 
					## The Bitcask paper
 | 
				
			||||||
 | 
					
 | 
				
			||||||
https://riak.com/assets/bitcask-intro.pdf
 | 
					https://riak.com/assets/bitcask-intro.pdf
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					## Implementation in Ruby
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					https://dinesh.wiki/posts/build-your-own-persistent-kv-store/
 | 
				
			||||||
 | 
				
			|||||||
@ -1,17 +1,64 @@
 | 
				
			|||||||
const std = @import("std");
 | 
					const std = @import("std");
 | 
				
			||||||
const expect = std.testing.expect;
 | 
					const expect = std.testing.expect;
 | 
				
			||||||
 | 
					const Dir = std.fs.Dir;
 | 
				
			||||||
 | 
					const mem = std.mem;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub const BitcaskFileError = error{
 | 
					const BitcaskFileError = error{
 | 
				
			||||||
 | 
					    CannotInitialize,
 | 
				
			||||||
    AccessDenied,
 | 
					    AccessDenied,
 | 
				
			||||||
    OutOfMemory,
 | 
					    OutOfMemory,
 | 
				
			||||||
    FileNotFound,
 | 
					    FileNotFound,
 | 
				
			||||||
 | 
					    Unknown
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub const BitCask = struct {
 | 
					const CaskEntry = struct {
 | 
				
			||||||
 | 
					    keySize: u32,
 | 
				
			||||||
 | 
					    valSize: u32,
 | 
				
			||||||
 | 
					    valType: EntryType,
 | 
				
			||||||
 | 
					    key:     []const u8,
 | 
				
			||||||
 | 
					    val:     *const anyopaque,
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const KV = struct {
 | 
				
			||||||
 | 
					    key: []const u8,
 | 
				
			||||||
 | 
					    val: []const u8
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const EntryType = enum(u8) {
 | 
				
			||||||
 | 
					    str = 0,
 | 
				
			||||||
 | 
					    int = 1,
 | 
				
			||||||
 | 
					    flt = 2,
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					const BitCask = struct {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const KeyDirEntry = struct {
 | 
				
			||||||
 | 
					        file_name: []const u8,
 | 
				
			||||||
 | 
					        offset: u4,
 | 
				
			||||||
 | 
					        size: u4,
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    const FILE_THRESHOLD_SIZE = 1000;
 | 
					    const FILE_THRESHOLD_SIZE = 1000;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
 | 
				
			||||||
 | 
					    const allocator = arena.allocator();
 | 
				
			||||||
    
 | 
					    
 | 
				
			||||||
    // std.StringArrayHashMap
 | 
					
 | 
				
			||||||
 | 
					    // TODO fix to a file descriptor or reference for fixed-size, may only need u4? depends on max file size I suppose
 | 
				
			||||||
 | 
					    // u4 = key size of 4.29GB
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    var keyDir: std.StringArrayHashMap(KeyDirEntry) = undefined;
 | 
				
			||||||
 | 
					    var current_file: std.fs.File = undefined;
 | 
				
			||||||
 | 
					    var current_file_name: [*]const u8 = "";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // bitcask:close(BitCaskHandle) Close a Bitcask data store and flush all pending writes (if any) to disk
 | 
				
			||||||
 | 
					    pub fn close() void {
 | 
				
			||||||
 | 
					        std.debug.print("Closing bitcask...\n", .{});
 | 
				
			||||||
 | 
					        arena.deinit();
 | 
				
			||||||
 | 
					        current_file.close();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // From the Bitcask paper, the API should look something like this
 | 
					    // From the Bitcask paper, the API should look something like this
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -29,66 +76,187 @@ pub const BitCask = struct {
 | 
				
			|||||||
    // writer and not just a reader) and sync on put (if this writer would
 | 
					    // writer and not just a reader) and sync on put (if this writer would
 | 
				
			||||||
    // prefer to sync the write file after every write operation).
 | 
					    // prefer to sync the write file after every write operation).
 | 
				
			||||||
    // The directory must be readable and writable by this process, and
 | 
					    // The directory must be readable and writable by this process, and
 | 
				
			||||||
    // only one process may open a Bitcask with read write at a time.
 | 
					    // only one process may open a Bitcask with read or write at a time.
 | 
				
			||||||
    // bitcask:open(DirectoryName) Open a new or existing Bitcask datastore for read-only access.
 | 
					    // bitcask:open(DirectoryName) Open a new or existing Bitcask datastore for read-only access.
 | 
				
			||||||
    // → BitCaskHandle | {error, any()} The directory and all files in it must be readable by this process.
 | 
					    // → BitCaskHandle | {error, any()} The directory and all files in it must be readable by this process.
 | 
				
			||||||
    fn open(directory_name: []const u8) BitcaskFileError!void {
 | 
					    pub fn open(directory_name: []const u8) (std.fs.File.OpenError || BitcaskFileError)!void {
 | 
				
			||||||
        std.debug.print("Opening bitcask in dir {s}\n", .{directory_name});
 | 
					        // init keyDir
 | 
				
			||||||
        return error.FileNotFound;
 | 
					        keyDir = std.StringArrayHashMap(KeyDirEntry).init(
 | 
				
			||||||
 | 
					            allocator,
 | 
				
			||||||
 | 
					        );
 | 
				
			||||||
 | 
					        const file_name = "cask1.db";
 | 
				
			||||||
 | 
					        current_file_name = file_name;
 | 
				
			||||||
 | 
					        // Open file
 | 
				
			||||||
 | 
					        std.debug.print("\nOpening bitcask in dir {s}/\n", .{directory_name});
 | 
				
			||||||
 | 
					        var bitcask_dir = try std.fs.Dir.openDir(std.fs.cwd(), directory_name, .{.access_sub_paths=false});
 | 
				
			||||||
 | 
					        // TODO get all files in cask and have some order to open the latest one
 | 
				
			||||||
 | 
					        var err_or_file = bitcask_dir.openFile(file_name, .{.mode=.read_write});
 | 
				
			||||||
 | 
					        
 | 
				
			||||||
 | 
					        // If the file does not exist, create it
 | 
				
			||||||
 | 
					        if (err_or_file == std.fs.File.OpenError.FileNotFound) {
 | 
				
			||||||
 | 
					            std.debug.print("No file found, creating new one\n", .{});
 | 
				
			||||||
 | 
					            _ = try bitcask_dir.createFile(file_name, .{});  // catch this error, then I can remove error union
 | 
				
			||||||
 | 
					            current_file = try bitcask_dir.openFile(file_name, .{.mode=.read_write});
 | 
				
			||||||
 | 
					        } else if (@TypeOf(err_or_file) != std.fs.File.OpenError) {
 | 
				
			||||||
 | 
					            std.debug.print("File {s} found\n", .{file_name});
 | 
				
			||||||
 | 
					            current_file = try err_or_file;
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            std.debug.print("Encountered unknown error while opening bitcask: {any}\n", .{err_or_file});
 | 
				
			||||||
 | 
					            return error.CannotInitialize;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					//         std.debug.print("Current File open: {}\n", .{current_file});
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // bitcask:get(BitCaskHandle, Key) Retrieve a value by key from a Bitcask datastore.
 | 
					    // bitcask:get(BitCaskHandle, Key) Retrieve a value by key from a Bitcask datastore.
 | 
				
			||||||
    // → not found | {ok, Value}
 | 
					    // → not found | {ok, Value}
 | 
				
			||||||
    fn get(key: []const u8) error{NotImplemented}!void {
 | 
					    pub fn get(key: []const u8) error{OutOfMemory, NoCaskFound, Unknown}!CaskEntry {
 | 
				
			||||||
        std.debug.print("Getting value with key {s}\n", .{key});
 | 
					        std.debug.print("Getting value with key {s}\n", .{key});
 | 
				
			||||||
        return error.NotImplemented;
 | 
					        const optional_cask = keyDir.get(key);
 | 
				
			||||||
 | 
					        if (optional_cask == null) {
 | 
				
			||||||
 | 
					            return error.NoCaskFound;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        const cask = optional_cask.?; // shorthand for 'optional_cask orelse unreachable'
 | 
				
			||||||
 | 
					        std.debug.print("Got cask from file {s}\n", .{cask.file});
 | 
				
			||||||
 | 
					        return cask;
 | 
				
			||||||
 | 
					        // TODO get entry from db rather than return the keyDir entry
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // bitcask:put(BitCaskHandle, Key, Value) Store a key and value in a Bitcask datastore.
 | 
					    // bitcask:put(BitCaskHandle, Key, Value) Store a key and value in a Bitcask datastore.
 | 
				
			||||||
    // → ok | {error, any()}
 | 
					    // → ok | {error, any()}
 | 
				
			||||||
    fn put() error{NotImplemented}!void {
 | 
					    pub fn put(data: KV) BitcaskFileError!void {
 | 
				
			||||||
        return error.NotImplemented;
 | 
					
 | 
				
			||||||
 | 
					//         const key_type: EntryType = EntryType.str;
 | 
				
			||||||
 | 
					//         const key_size = getEntrySize(data.key, key_type);
 | 
				
			||||||
 | 
					//         const val_type: EntryType = EntryType.str;
 | 
				
			||||||
 | 
					//         const val_size = getEntrySize(data.val, val_type);
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        const entry_bytes = serializeCaskEntry(&allocator, data) catch |err| {
 | 
				
			||||||
 | 
					            std.debug.print("Encountered unknown error while serializing bitcask: {any}\n", .{err});
 | 
				
			||||||
 | 
					            return BitcaskFileError.Unknown;
 | 
				
			||||||
 | 
					        };
 | 
				
			||||||
 | 
					        std.debug.print("Writing data to cask: {}\n", .{data});
 | 
				
			||||||
 | 
					        const i = current_file.write(entry_bytes) catch |err| {
 | 
				
			||||||
 | 
					            std.debug.print("Encountered unknown error while writing to bitcask: {any}\n", .{err});
 | 
				
			||||||
 | 
					            return BitcaskFileError.Unknown;
 | 
				
			||||||
 | 
					        };
 | 
				
			||||||
 | 
					        std.debug.print("After write: {any}\n", .{i});
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        //try keyDir.put("key", data.val);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // bitcask:delete(BitCaskHandle, Key) Delete a key from a Bitcask datastore.
 | 
					    // bitcask:delete(BitCaskHandle, Key) Delete a key from a Bitcask datastore.  Note that deletion is simply a write of a special tombstone value, which will be removed on the next merge
 | 
				
			||||||
    // → ok | {error, any()}
 | 
					    // → ok | {error, any()}
 | 
				
			||||||
    fn delete() error{NotImplemented}!void {
 | 
					    pub fn delete() error{NotImplemented}!void {
 | 
				
			||||||
        return error.NotImplemented;
 | 
					        return error.NotImplemented;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // bitcask:list keys(BitCaskHandle) List all keys in a Bitcask datastore.
 | 
					    // bitcask:list keys(BitCaskHandle) List all keys in a Bitcask datastore.
 | 
				
			||||||
    // → [Key] | {error, any()}
 | 
					    // → [Key] | {error, any()}
 | 
				
			||||||
    fn list() error{NotImplemented}!void {
 | 
					    pub fn list() error{NotImplemented}!void {
 | 
				
			||||||
        return error.NotImplemented;
 | 
					        return error.NotImplemented;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // bitcask:fold(BitCaskHandle,Fun,Acc0) Fold over all K/V pairs in a Bitcask datastore.
 | 
					    // bitcask:fold(BitCaskHandle,Fun,Acc0) Fold over all K/V pairs in a Bitcask datastore.
 | 
				
			||||||
    // → Acc Fun is expected to be of the form: F(K,V,Acc0) → Acc.
 | 
					    // → Acc Fun is expected to be of the form: F(K,V,Acc0) → Acc.
 | 
				
			||||||
    fn fold() error{NotImplemented}!void {
 | 
					    pub fn fold() error{NotImplemented}!void {
 | 
				
			||||||
        return error.NotImplemented;
 | 
					        return error.NotImplemented;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // bitcask:merge(DirectoryName) Merge several data files within a Bitcask datastore into a more
 | 
					    // bitcask:merge(DirectoryName) Merge several data files within a Bitcask datastore into a more
 | 
				
			||||||
    // → ok | {error, any()} compact form. Also, produce hintfiles for faster startup.
 | 
					    // → ok | {error, any()} compact form. Also, produce hintfiles for faster startup.
 | 
				
			||||||
    fn merge() error{NotImplemented}!void {
 | 
					    pub fn merge() error{NotImplemented}!void {
 | 
				
			||||||
        return error.NotImplemented;
 | 
					        return error.NotImplemented;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // bitcask:sync(BitCaskHandle) Force any writes to sync to disk.
 | 
					    // bitcask:sync(BitCaskHandle) Force any writes to sync to disk.
 | 
				
			||||||
    // → ok
 | 
					    // → ok
 | 
				
			||||||
    fn sync() error{NotImplemented}!void {
 | 
					    pub fn sync() error{NotImplemented}!void {
 | 
				
			||||||
        return error.NotImplemented;
 | 
					        return error.NotImplemented;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // bitcask:close(BitCaskHandle) Close a Bitcask data store and flush all pending writes (if any) to disk
 | 
					    fn getEntrySize(data: *anyopaque, t: EntryType) u32 {
 | 
				
			||||||
    fn close() error{NotImplemented}!void {
 | 
					        switch (t) {
 | 
				
			||||||
        return error.NotImplemented;
 | 
					            .str => {
 | 
				
			||||||
 | 
					                const ptr = @ptrCast([*]const u8, data);
 | 
				
			||||||
 | 
					                return ptr.len;
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            .int => {
 | 
				
			||||||
 | 
					                return @sizeOf(i32);
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            .flt => {
 | 
				
			||||||
 | 
					                return @sizeOf(f64);
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn serializeCaskEntry(aloc: *const mem.Allocator, entry: KV) error{OutOfMemory}![]u8 {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        var serialized_data = try std.ArrayList(u8).initCapacity(aloc.*, entry.key.len + entry.val.len + 8);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Serialize u32 into 4 u8 bytes
 | 
				
			||||||
 | 
					        const keyBytes = std.mem.toBytes(entry.key.len);
 | 
				
			||||||
 | 
					        try serialized_data.appendSlice(&keyBytes);        // Serialize u32 into 4 u8 bytes
 | 
				
			||||||
 | 
					        // Serialize u32 into 4 u8 bytes
 | 
				
			||||||
 | 
					        const valBytes = std.mem.toBytes(entry.val.len);
 | 
				
			||||||
 | 
					        try serialized_data.appendSlice(&valBytes);
 | 
				
			||||||
 | 
					        //try serialized_data.append(@enumToInt(entry.valType));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for (entry.key) |byte| {
 | 
				
			||||||
 | 
					            try serialized_data.append(byte);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for (entry.val) |byte| {
 | 
				
			||||||
 | 
					            try serialized_data.append(byte);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Convert the val any type to the type specified
 | 
				
			||||||
 | 
					//         switch (entry.valType) {
 | 
				
			||||||
 | 
					//             .str => {
 | 
				
			||||||
 | 
					//                 const str_val = @ptrCast([*]const u8, entry.val);
 | 
				
			||||||
 | 
					//                 for (str_val) |byte| {
 | 
				
			||||||
 | 
					//                     try serialized_data.append(byte);
 | 
				
			||||||
 | 
					//                 }
 | 
				
			||||||
 | 
					//             },
 | 
				
			||||||
 | 
					//             .int => {
 | 
				
			||||||
 | 
					//                 const int_val = @ptrCast(*const i32, entry.val);
 | 
				
			||||||
 | 
					//                 try serialized_data.appendSlice(mem.asBytes(int_val));
 | 
				
			||||||
 | 
					//             },
 | 
				
			||||||
 | 
					//             .flt => {
 | 
				
			||||||
 | 
					//                 const flt_val = @ptrCast(*const f64, entry.val);
 | 
				
			||||||
 | 
					//                 try serialized_data.appendSlice(mem.asBytes(flt_val));
 | 
				
			||||||
 | 
					//             }
 | 
				
			||||||
 | 
					//         }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return try serialized_data.toOwnedSlice();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					//     fn getValType(caskVal: any) u8 {
 | 
				
			||||||
 | 
					//         switch (@TypeOf(caskVal)) {
 | 
				
			||||||
 | 
					//             u8 =>
 | 
				
			||||||
 | 
					//         }
 | 
				
			||||||
 | 
					//     }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// pub fn main() !void {
 | 
				
			||||||
 | 
					//     const bc = BitCask;
 | 
				
			||||||
 | 
					//     try bc.open("data");
 | 
				
			||||||
 | 
					//     defer bc.close();
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					//     try bc.put(.{.key = "id", .val = "abc123"});
 | 
				
			||||||
 | 
					// }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
test "Bitcask spec implementation: open" {
 | 
					test "Bitcask spec implementation: open" {
 | 
				
			||||||
    const bc = BitCask;
 | 
					    const bc = BitCask;
 | 
				
			||||||
    try bc.open("File");
 | 
					    try bc.open("data");
 | 
				
			||||||
 | 
					    defer bc.close();
 | 
				
			||||||
    // bc.open("File") catch |err| {
 | 
					    // bc.open("File") catch |err| {
 | 
				
			||||||
    //     try expect(err == error.FileNotFound);
 | 
					    //     try expect(err == error.FileNotFound);
 | 
				
			||||||
    // };
 | 
					    // };
 | 
				
			||||||
@ -97,12 +265,33 @@ test "Bitcask spec implementation: open" {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
test "Bitcask spec implementation: get" {
 | 
					test "Bitcask spec implementation: get" {
 | 
				
			||||||
    const bc = BitCask;
 | 
					    const bc = BitCask;
 | 
				
			||||||
    try bc.get("key");
 | 
					    //bc.init();
 | 
				
			||||||
 | 
					    try bc.open("data");
 | 
				
			||||||
 | 
					    defer bc.close();
 | 
				
			||||||
 | 
					    _ = bc.get("key") catch |err| {
 | 
				
			||||||
 | 
					        try expect(err == error.NoCaskFound);
 | 
				
			||||||
 | 
					    };
 | 
				
			||||||
 | 
					    //try bc.put();
 | 
				
			||||||
 | 
					    const cask = try bc.get("key");
 | 
				
			||||||
 | 
					    try expect(std.mem.eql(u8, cask.file, "file.txt"));
 | 
				
			||||||
 | 
					    try expect(cask.offset == 0);
 | 
				
			||||||
 | 
					    try expect(cask.size == 0);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
test "Bitcask spec implementation: put" {
 | 
					test "Bitcask spec implementation: put" {
 | 
				
			||||||
    const bc = BitCask;
 | 
					    const bc = BitCask;
 | 
				
			||||||
    try bc.put();
 | 
					    try bc.open("data");
 | 
				
			||||||
 | 
					    defer bc.close();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const key = "id";
 | 
				
			||||||
 | 
					    const val = "1";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    try bc.put(.{
 | 
				
			||||||
 | 
					        .key = key,
 | 
				
			||||||
 | 
					        .val = val
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    try bc.put(.{.key = "user1", .val = "likes apples so much"});
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
test "Bitcask spec implementation: delete" {
 | 
					test "Bitcask spec implementation: delete" {
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
		Reference in New Issue
	
	Block a user