WIP parsing

2025-07-22 13:43:27 -06:00 · 2025-07-22 13:43:27 -06:00 · 95d94c1e23
commit 95d94c1e23
parent 38eab409c6
2 changed files with 658 additions and 115 deletions
--- a/build.zig
+++ b/build.zig
@ -80,12 +80,12 @@ pub fn build(b: *std.Build) void {
        .optimize = optimize,
    });
    const check = b.step("check", "Check if project compiles and tests pass, used by Zig Language Server");
-    const check_unit_tests = b.addTest(.{
-        .root_module = exe_mod,
-    });
+    // add unit tests to check step (gives lsp semantics in tests, but can result in no semantics)
+    // const check_unit_tests = b.addTest(.{
+    //     .root_module = exe_mod,
+    // });
+    // const run_check_unit_tests = b.addRunArtifact(check_unit_tests);
+    // check.dependOn(&run_check_unit_tests.step);

-    const run_check_unit_tests = b.addRunArtifact(check_unit_tests);
-
-    check.dependOn(&run_check_unit_tests.step);
    check.dependOn(&exe_check.step);
 }
--- a/src/parser/cyo_parser.zig
+++ b/src/parser/cyo_parser.zig
@ -1,30 +1,178 @@
 const std = @import("std");
+const helpers = @import("parser_helpers.zig");
 const CyoContent = @import("../cyo/cyo.zig").content;

 const DEFAULT_CYO_SOURCE_PATH = "./cyo";

+//
+// Lexical Types
+//
 const Token = struct { line: u32, pos: u32, lexeme: Lexeme, contents: []const u8 };

 const Lexeme = enum {
-    Lt, //           <
-    Gt, //           >
-    LeftParen, //    (
-    RightParen, //   )
-    LeftBracket, //  [
-    RightBracket, // ]
-    Text, //         Foo bar blah. Another.
-    Dollar, //       $
-    Period, //       .
-    Colon, //        :
-    Equals, //       =
-    Hashtag, //      #
-    Underscore, //   _
-    Newline, //      \n
-    Tab, //          \t
-    Space, //        ' '
+    // symbols
+    Gt, //              >
+    Lt, //              <
+    LeftBracket, //     [
+    RightBracket, //    ]
+    LeftParen, //       (
+    RightParen, //      )
+    Dollar, //          $
+    Period, //          .
+    Underscore, //      _
+    Colon, //           :
+    Comma, //           ,
+    SingleQuote, //     '
+    DoubleQuote, //     "
+    Equals, //          =
+    Hashtag, //         #
+    Add, //             +
+    Subtract, //        -
+    Multiply, //        *
+    Divide, //          /
+    Modulo, //          %
+    EqualsEquals, //    ==
+    // keys
+    DollarKey, //       $foobar
+    PeriodKey, //       .Foobar
+    UnderscoreKey, //   _Foobar
+    // keywords
+    If, //              if
+    Else, //            else
+    True, //            true
+    False, //           false
+    // logic keywords
+    And, //             and
+    Or, //              or
+    Not, //             not
+    // text
+    Text, //            Foo bar blah. Another.
+    // whitespacae
+    Newline, //         \n
+    Whitespace, //      \t or ' ', tab or space
    Eof, //
 };

+//
+// AST types
+//
+const AstError = error{
+    NotTopLevelDeclaration,
+};
+
+const EntityType = enum {
+    item,
+    location,
+};
+
+const AttributeValue = union(enum) {
+    String: []const u8,
+    Integer: i64,
+    Float: f64,
+    Bool: bool,
+    EnumList: [][]const u8,
+};
+
+const AstCallback = struct {
+    statements: []Statement,
+};
+
+const Statement = union(enum) {
+    Conditional: ConditionalStatement,
+    Assignment: AssignmentStatement,
+    TextOutput: TextStatement,
+    Expression: Expression, // for standalone expressions
+};
+
+const ConditionalStatement = struct {
+    condition: *Expression,
+    then_block: []Statement,
+    else_block: ?[]Statement, // optional else clause
+};
+
+const AssignmentStatement = struct {
+    target: *Expression, // $self.status, $player.inventory, etc.
+    value: *Expression,
+};
+
+const TextStatement = struct {
+    text: []const u8, // "You screw the back on tight, and the light comes on!"
+};
+
+const Expression = union(enum) {
+    PropertyAccess: PropertyAccess, // $self.status
+    MethodCall: MethodCall, // $context.args.contains(Batteries)
+    Comparison: Comparison, // $self.status = .Bright
+    LogicalOp: LogicalOp, // and, or, not
+    Identifier: []const u8, // Batteries, DuctTape
+    EnumValue: []const u8, // .Bright, .Dead
+    StringLiteral: []const u8,
+    NumberLiteral: i64,
+    BooleanLiteral: bool,
+};
+
+const PropertyAccess = struct {
+    object: []const u8, // "self", "player", "context"
+    property: []const u8, // "status", "inventory"
+};
+
+const MethodCall = struct {
+    object: *Expression,
+    method: []const u8, // "contains", "remove"
+    args: []*Expression,
+};
+
+const Comparison = struct {
+    left: *Expression,
+    operator: ComparisonOp, // =, !=, <, >, etc.
+    right: *Expression,
+};
+
+const ComparisonOp = enum {
+    Equal, // ==
+    NotEqual, // !=
+    LessThan, // <
+    GreaterThan, // >
+    LessEqual, // <=
+    GreaterEqual, // >=
+};
+
+const LogicalOp = struct {
+    operator: LogicalOperator,
+    left: *Expression,
+    right: ?*Expression, // optional for 'not' which is unary
+};
+
+const LogicalOperator = enum {
+    And,
+    Or,
+    Not,
+};
+
+const AstParse = struct {
+    entities: std.ArrayList(AstEntity),
+    parse_error: ?AstParseError,
+};
+
+const AstParseError = struct {
+    file_name: ?[]const u8,
+    line: u32,
+    pos: u32,
+    error_message: []const u8,
+    stack_trace: []const u8,
+
+    pub fn print(self: *AstParseError) void {
+        std.debug.print("Parsing error:\n{s}:{d}:{d}: {s}\n\n{s}", .{ self.file_name, self.line, self.pos, self.error_message, self.stack_trace });
+    }
+};
+
+const AstEntity = struct {
+    type: EntityType,
+    identifier: []const u8,
+    attributes: std.StringHashMap(AttributeValue),
+    callbacks: std.StringHashMap(AstCallback),
+};
+
 pub const CyoError = error{ BadSource, BadIter };

 pub const CyoParser = struct {
@ -60,25 +208,27 @@ pub const CyoParser = struct {
        var files_contents = std.StringHashMap([]const u8).init(allocator);
        try walkDirs(allocator, cyo_dir, 0, &files_contents);

+        // 2. lex contents
        var files_tokens = try lexCyoFiles(allocator, files_contents);
-        printFilesTokens(files_tokens);
-
-        var iter = files_tokens.keyIterator();
-        while (iter.next()) |key| {
-            const tokens = files_tokens.get(key.*);
-            if (tokens) |t| {
-                t.deinit();
+        defer {
+            var iter = files_tokens.keyIterator();
+            while (iter.next()) |key| {
+                const tokens = files_tokens.get(key.*);
+                if (tokens) |t| {
+                    t.deinit();
+                }
+                allocator.free(key.*);
            }
-            allocator.free(key.*);
+            files_tokens.deinit();
        }
-        files_tokens.deinit();
-        // const cyo_dir_path = cyo_dir.realpath(pathname: []const u8, out_buffer: []u8)

-        // 2. process files
+        // 2a. create ast for each file
+        // createAstFromFilesTokens(allocator, files_tokens);
+
+        // 2b. syntactic parsing - validate

-        // 2a. lexical - validate file syntax
-        // 2b. syntactic parsing
        // 2c. semantic - create objects and scenes
+
        // 2d. evaluate - find missing or cyclical links

        return CyoContent{ .allocator = allocator, .files_contents = files_contents };
@ -159,71 +309,7 @@ pub const CyoParser = struct {
                continue;
            }

-            var tokens = std.ArrayList(Token).init(allocator);
-            const c = content.?;
-            var i: u32 = 0;
-            var line: u32 = 1;
-            var col: u32 = 1;
-            while (i < c.len) {
-                const lexeme = charToLexeme(c[i]);
-                switch (lexeme) {
-                    // whitespace
-                    .Space, .Tab => {
-                        const char_repeats = greedyCapture(c, i);
-                        for (0..char_repeats) |_| {
-                            try tokens.append(.{ .line = line, .pos = col, .lexeme = lexeme, .contents = c[i .. i + char_repeats] });
-                            col += 1;
-                        }
-                        i += char_repeats;
-                    },
-                    .Newline => {
-                        try tokens.append(.{ .line = line, .pos = col, .lexeme = lexeme, .contents = c[i .. i + 1] });
-                        line += 1;
-                        col = 1;
-                        i += 1;
-                    },
-                    // symbols
-                    .Gt,
-                    .Lt,
-                    .LeftParen,
-                    .RightParen,
-                    .LeftBracket,
-                    .RightBracket,
-                    .Dollar,
-                    .Period,
-                    .Equals,
-                    .Colon,
-                    .Underscore,
-                    => {
-                        try tokens.append(.{ .line = line, .pos = col, .lexeme = lexeme, .contents = c[i .. i + 1] });
-                        // check length of content is one
-                        std.debug.assert(tokens.items[tokens.items.len - 1].contents.len == 1);
-
-                        col += 1;
-                        i += 1;
-                    },
-                    // text
-                    .Text => {
-                        const text_length = captureText(c, i);
-                        try tokens.append(.{ .line = line, .pos = col, .lexeme = lexeme, .contents = c[i .. i + text_length] });
-
-                        col += text_length;
-                        i += text_length;
-                    },
-                    .Hashtag => {
-                        const to_end_of_line = captureLine(c, i);
-                        // TODO for testing, remove as we dont need to save comments
-                        try tokens.append(.{ .line = line, .pos = col, .lexeme = lexeme, .contents = c[i .. i + to_end_of_line] });
-
-                        col += to_end_of_line;
-                        i += to_end_of_line;
-                    },
-                    .Eof => unreachable,
-                }
-            }
-            // Add eof token
-            try tokens.append(.{ .line = line, .pos = col, .lexeme = Lexeme.Eof, .contents = "" });
-
+            const tokens = try lexCyoContent(allocator, content.?);
            // Add tokens to hashmap
            const key_copy = try allocator.alloc(u8, key.len);
            std.mem.copyForwards(u8, key_copy, key.*);
@ -232,30 +318,161 @@ pub const CyoParser = struct {
        return files_tokens;
    }

+    fn lexCyoContent(allocator: std.mem.Allocator, content: []const u8) !std.ArrayList(Token) {
+        var tokens = std.ArrayList(Token).init(allocator);
+        var i: u32 = 0;
+        var line: u32 = 1;
+        var col: u32 = 1;
+        while (i < content.len) {
+            const lexeme = charToLexeme(content[i]);
+            switch (lexeme) {
+                // whitespace
+                .Whitespace => {
+                    const char_repeats = greedyCapture(content, i);
+                    for (0..char_repeats) |_| {
+                        try tokens.append(.{ .line = line, .pos = col, .lexeme = lexeme, .contents = content[i .. i + char_repeats] });
+                        col += 1;
+                    }
+                    i += char_repeats;
+                },
+                .Newline => {
+                    try tokens.append(.{ .line = line, .pos = col, .lexeme = lexeme, .contents = content[i .. i + 1] });
+                    line += 1;
+                    col = 1;
+                    i += 1;
+                },
+                // symbols
+                .Gt,
+                .Lt,
+                .LeftParen,
+                .RightParen,
+                .LeftBracket,
+                .RightBracket,
+                .Colon,
+                .Comma,
+                .SingleQuote,
+                .DoubleQuote,
+                .Add,
+                .Subtract,
+                .Multiply,
+                .Divide,
+                .Modulo,
+                => {
+                    try tokens.append(.{ .line = line, .pos = col, .lexeme = lexeme, .contents = content[i .. i + 1] });
+                    // check length of content is one
+                    std.debug.assert(tokens.items[tokens.items.len - 1].contents.len == 1);
+
+                    col += 1;
+                    i += 1;
+                },
+                .Equals => {
+                    // check for double equals
+                    if (charToLexeme(content[i + 1]) == .Equals) {
+                        try tokens.append(.{ .line = line, .pos = col, .lexeme = Lexeme.EqualsEquals, .contents = content[i .. i + 2] });
+                        // check length of content is two
+                        std.debug.assert(tokens.items[tokens.items.len - 1].contents.len == 2);
+
+                        col += 2;
+                        i += 2;
+                    } else {
+                        try tokens.append(.{ .line = line, .pos = col, .lexeme = lexeme, .contents = content[i .. i + 1] });
+                        // check length of content is one
+                        std.debug.assert(tokens.items[tokens.items.len - 1].contents.len == 1);
+
+                        col += 1;
+                        i += 1;
+                    }
+                },
+                // keys
+                .Dollar,
+                .Period,
+                .Underscore,
+                => {
+                    const key_length = captureKey(content, i);
+                    if (key_length == 1) {
+                        try tokens.append(.{ .line = line, .pos = col, .lexeme = lexeme, .contents = content[i .. i + 1] });
+                        // check length of content is one
+                        std.debug.assert(tokens.items[tokens.items.len - 1].contents.len == 1);
+
+                        col += 1;
+                        i += 1;
+                    } else {
+                        const lexeme_key = blk: switch (lexeme) {
+                            .Period => break :blk Lexeme.PeriodKey,
+                            .Underscore => break :blk Lexeme.UnderscoreKey,
+                            .Dollar => break :blk Lexeme.DollarKey,
+                            else => unreachable,
+                        };
+                        try tokens.append(.{ .line = line, .pos = col, .lexeme = lexeme_key, .contents = content[i .. i + key_length] });
+
+                        col += key_length;
+                        i += key_length;
+                    }
+                },
+                // text and keywords
+                .Text => {
+                    var text_lex: Lexeme = .Text;
+                    const text_length = captureText(content, i);
+
+                    // check for keywords
+                    const lowered_text = try helpers.toLower(allocator, content[i .. i + text_length]);
+                    defer allocator.free(lowered_text);
+                    if (std.mem.eql(u8, "true", lowered_text)) {
+                        text_lex = .True;
+                    } else if (std.mem.eql(u8, "false", lowered_text)) {
+                        text_lex = .False;
+                    } else if (std.mem.eql(u8, "if", lowered_text)) {
+                        text_lex = .If;
+                    } else if (std.mem.eql(u8, "else", lowered_text)) {
+                        text_lex = .Else;
+                    } else if (std.mem.eql(u8, "and", lowered_text)) {
+                        text_lex = .And;
+                    } else if (std.mem.eql(u8, "or", lowered_text)) {
+                        text_lex = .Or;
+                    } else if (std.mem.eql(u8, "not", lowered_text)) {
+                        text_lex = .Not;
+                    }
+                    try tokens.append(.{ .line = line, .pos = col, .lexeme = text_lex, .contents = content[i .. i + text_length] });
+
+                    col += text_length;
+                    i += text_length;
+                },
+                .Hashtag => {
+                    const to_end_of_line = captureLine(content, i);
+                    // TODO for testing, remove as we dont need to save comments
+                    try tokens.append(.{ .line = line, .pos = col, .lexeme = lexeme, .contents = content[i .. i + to_end_of_line] });
+
+                    col += to_end_of_line;
+                    i += to_end_of_line;
+                },
+                .Eof => unreachable,
+                else => unreachable,
+            }
+        }
+        // Add eof token
+        try tokens.append(.{ .line = line, .pos = col, .lexeme = Lexeme.Eof, .contents = "" });
+        return tokens;
+    }
+
    fn printFilesTokens(files_tokens: std.StringHashMap(std.ArrayList(Token))) void {
        var iter = files_tokens.keyIterator();
        while (iter.next()) |key| {
            std.debug.print("File: {s}", .{key.*});
            const tokens = files_tokens.get(key.*);
            if (tokens) |ts| {
-                for (ts.items) |token| {
-                    std.debug.print("\tGot Token: {s}\tCol{d}:L{d}\t{s}\n", .{ @tagName(token.lexeme), token.pos, token.line, token.contents });
-                }
+                printLexTokens(ts);
            }
        }
    }

+    fn printLexTokens(tokens: std.ArrayList(Token)) void {
+        for (tokens.items) |token| {
+            std.debug.print("\tGot Token: {s}\tCol{d}:L{d}\t{s}\n", .{ @tagName(token.lexeme), token.pos, token.line, token.contents });
+        }
+    }
+
    fn charToLexeme(char: u8) Lexeme {
        switch (char) {
-            ' ' => {
-                return .Space;
-            },
-            '\t' => {
-                return .Tab;
-            },
-            '\n' => {
-                return .Newline;
-            },
            '<' => {
                return .Lt;
            },
@ -277,6 +494,9 @@ pub const CyoParser = struct {
            '$' => {
                return .Dollar;
            },
+            '_' => {
+                return .Underscore;
+            },
            '.' => {
                return .Period;
            },
@ -286,12 +506,41 @@ pub const CyoParser = struct {
            ':' => {
                return .Colon;
            },
-            '_' => {
-                return .Underscore;
+            ',' => {
+                return .Comma;
            },
+            '\'' => {
+                return .SingleQuote;
+            },
+            '"' => {
+                return .DoubleQuote;
+            },
+            '+' => {
+                return .Add;
+            },
+            '-' => {
+                return .Subtract;
+            },
+            '*' => {
+                return .Multiply;
+            },
+            '/' => {
+                return .Divide;
+            },
+            '%' => {
+                return .Modulo;
+            },
+            // comment
            '#' => {
                return .Hashtag;
            },
+            // whitespace
+            ' ', '\t' => {
+                return .Whitespace;
+            },
+            '\n' => {
+                return .Newline;
+            },
            else => return .Text,
        }
    }
@ -306,6 +555,7 @@ pub const CyoParser = struct {
        return j - i;
    }

+    // captures an entire text sequence, broken up by any other lexeme
    fn captureText(seq: []const u8, i: u32) u32 {
        var j = i;
        while (j < seq.len and charToLexeme(seq[j]) == .Text) {
@ -314,6 +564,17 @@ pub const CyoParser = struct {
        return j - i;
    }

+    // used to capture lexemes like `.Bright` or `_onUse`
+    fn captureKey(seq: []const u8, i: u32) u32 {
+        var j = i + 1;
+        while (j < seq.len and charToLexeme(seq[j]) == .Text) {
+            j += 1;
+        }
+
+        return j - i;
+    }
+
+    // comment capture, sucks up the rest of the line
    fn captureLine(seq: []const u8, i: u32) u32 {
        var j = i;
        while (j < seq.len) {
@ -325,9 +586,196 @@ pub const CyoParser = struct {
        }
        return j - i;
    }
+
+    fn createAstFromFilesTokens(allocator: std.mem.Allocator, files_tokens: std.StringHashMap(std.ArrayList(Token))) !std.StringHashMap(AstParse) {
+        var entities = std.StringArrayHashMap(AstEntity).init();
+        var iter = files_tokens.keyIterator();
+        while (iter.next()) |key| {
+            const tokens = files_tokens.get(key.*);
+            if (tokens) |ts| {
+                const ast = try createAstFromTokens(allocator, ts, key.*);
+                if (ast.parse_error != null) |err| {
+                    err.print();
+                    allocator.free(err.error_message);
+                    allocator.free(err.stack_trace);
+                    return error.ParseFailure;
+                }
+                const key_copy = try allocator.alloc(u8, key.len);
+                std.mem.copyForwards(u8, key_copy, key.*);
+                try entities.put(key_copy, ast);
+            }
+        }
+        return entities;
+    }
+
+    fn createAstFromTokens(allocator: std.mem.Allocator, tokens: std.ArrayList(Token), file_name: ?[]const u8) !AstParse {
+        // ast values
+        const entity_type: ?EntityType = EntityType.item;
+        var identifier: []const u8 = "";
+        var attributes = std.StringHashMap(AttributeValue).init(allocator);
+        var callbacks = std.StringHashMap(AstCallback).init(allocator);
+        var entities = std.ArrayList(AstEntity).init(allocator);
+
+        errdefer {
+            attributes.deinit();
+            callbacks.deinit();
+            for (entities.items) |*ent| {
+                ent.attributes.deinit();
+                ent.callbacks.deinit();
+            }
+        }
+
+        // local state
+        var depth: u8 = 0;
+        var in_entity = false;
+
+        var i: usize = 0;
+        while (i < tokens.items.len) {
+            var inc: usize = 1;
+            const token = tokens.items[i];
+            switch (token.lexeme) {
+                .Text => {
+                    if (depth == 0) {
+                        if (peekNext(tokens, i) == .Colon) {
+                            if (!in_entity) {
+                                identifier = token.contents;
+                                in_entity = true;
+                                inc = 2;
+                            } else {
+                                return AstParse{ .entities = entities, .parse_error = try parseErrorFromTokenIndex(allocator, i, tokens, file_name, "Non root declaration") };
+                            }
+                        }
+                    } else {
+                        // check for attribute
+                        if (peekNext(tokens, i) == .Equals) {
+                            const attr_name = token.contents;
+                            const new_attribute = try allocator.create(AttributeValue);
+                            const inc_by = captureAttribute(allocator, new_attribute, tokens, i) catch {
+                                return AstParse{ .entities = entities, .parse_error = try parseErrorFromTokenIndex(allocator, i, tokens, file_name, "Invalid attribute") };
+                            };
+                            inc += inc_by;
+                            try attributes.put(attr_name, new_attribute.*);
+                        }
+                    }
+                },
+                .UnderscoreKey => {
+                    // TODO for global callbacks
+                    if (depth == 0) {
+                        unreachable;
+                    } else {
+                        // callback
+                        const callback_name = token.contents;
+                        var statements: []Statement = undefined;
+                        if (peekNext(tokens, i) == .Colon) {
+                            // TODO
+                            statements = captureStatements();
+                        }
+                        try callbacks.put(callback_name, AstCallback{ .statements = statements });
+                    }
+                },
+                .DollarKey => {
+                    unreachable;
+                },
+                .Whitespace => {
+                    depth += 1;
+                },
+                .Newline, .Eof => {
+                    depth = 0;
+                },
+                else => {
+                    std.debug.print("Got token {s}", .{@tagName(token.lexeme)});
+                    unreachable;
+                },
+            }
+            i += inc;
+        }
+
+        const entity = AstEntity{ .identifier = identifier, .type = entity_type.?, .attributes = attributes, .callbacks = callbacks };
+        try entities.append(entity);
+
+        return AstParse{ .entities = entities, .parse_error = null };
+    }
+
+    // returns the number of tokens captured in attribute, should inc tokens by this value
+    fn captureAttribute(allocator: std.mem.Allocator, attribute: *AttributeValue, tokens: std.ArrayList(Token), i: usize) !usize {
+        const attr_open_lexeme = tokens.items[i + 2].lexeme;
+        switch (attr_open_lexeme) {
+            // string attribute
+            .DoubleQuote, .SingleQuote => {
+                var closed = false;
+                var j = i + 3;
+                var token_search = tokens.items[j].lexeme;
+                while (token_search != attr_open_lexeme and token_search != .Newline and token_search != .Eof) {
+                    j += 1;
+                    token_search = tokens.items[j].lexeme;
+                    if (token_search == attr_open_lexeme) {
+                        closed = true;
+                    }
+                }
+                if (!closed) {
+                    return error.UnclosedStringAttribute;
+                }
+
+                // get size of attr string
+                var size: usize = 0;
+                for (i + 3..j - 1) |k| {
+                    size += tokens.items[k].contents.len;
+                }
+                std.debug.assert(size < 100);
+                var attr_value = try allocator.alloc(u8, size);
+                for (i + 3..j - 1) |k| {
+                    attr_value = try std.fmt.bufPrint(attr_value, "{s}", .{tokens.items[k].contents});
+                }
+                attribute.String = attr_value;
+                return j - i;
+            },
+            // enum attribute
+            .LeftBracket => {},
+            .True, .False => {},
+            else => unreachable,
+        }
+        return 0;
+    }
+
+    //
+    fn captureStatements() []Statement {
+        var statements = [1]Statement{Statement{ .TextOutput = TextStatement{ .text = "foobar" } }};
+        return statements[0..];
+    }
+
+    // error_message and stacktrace need to be freed by caller
+    fn parseErrorFromTokenIndex(allocator: std.mem.Allocator, i: usize, tokens: std.ArrayList(Token), file_name: ?[]const u8, message: []const u8) !AstParseError {
+        var stack_trace = try allocator.alloc(u8, 1024);
+        errdefer allocator.free(stack_trace);
+
+        const msg = try allocator.alloc(u8, message.len);
+        std.mem.copyForwards(u8, msg, message);
+
+        var begin_index = i;
+        var end_index = i;
+        while (begin_index > 0 and tokens.items[begin_index].line == tokens.items[i].line) {
+            begin_index -= 1;
+        }
+
+        while (end_index < tokens.items.len and tokens.items[end_index].line == tokens.items[i].line) {
+            end_index += 1;
+        }
+
+        for (begin_index..end_index) |j| {
+            _ = try std.fmt.bufPrint(stack_trace[0..], "{s}", .{tokens.items[j].contents});
+        }
+        return AstParseError{ .file_name = file_name, .line = tokens.items[i].line, .pos = tokens.items[i].pos, .error_message = msg, .stack_trace = stack_trace };
+    }
+
+    fn peekNext(t: std.ArrayList(Token), i: usize) ?Lexeme {
+        if (t.items.len <= i) {
+            return null;
+        }
+        return t.items[i + 1].lexeme;
+    }
 };

-test "parse test" {
+test "parse directory test" {
    // TODO, programatically create a test directory instead of relying on the test directory to have the right contents
    const cyo_test_dir_path = try std.fs.cwd().realpathAlloc(std.testing.allocator, "./test/cyo_test_dir");
    defer std.testing.allocator.free(cyo_test_dir_path);
@ -337,3 +785,98 @@ test "parse test" {
    // Verify reading in of correct files
    try std.testing.expectEqual(7, cyo_parser.cyo_content.files_contents.count());
 }
+
+test "lex ok" {
+    const tokens = try CyoParser.lexCyoContent(std.testing.allocator,
+        \\SpaceShip:
+        \\  poweredOn=false
+        \\  _onExamine:
+        \\    $self.poweredOn=true
+    );
+    defer tokens.deinit();
+
+    try std.testing.expectEqual(Lexeme.Text, tokens.items[0].lexeme);
+    try std.testing.expectEqual(1, tokens.items[0].line);
+    try std.testing.expectEqual(1, tokens.items[0].pos);
+    try std.testing.expectEqualStrings("SpaceShip", tokens.items[0].contents);
+
+    try std.testing.expectEqual(Lexeme.True, tokens.items[tokens.items.len - 2].lexeme);
+    try std.testing.expectEqual(4, tokens.items[tokens.items.len - 2].line);
+    try std.testing.expectEqual(21, tokens.items[tokens.items.len - 2].pos);
+    try std.testing.expectEqualStrings("true", tokens.items[tokens.items.len - 2].contents);
+
+    try std.testing.expectEqual(Lexeme.Eof, tokens.items[tokens.items.len - 1].lexeme);
+    try std.testing.expectEqual(4, tokens.items[tokens.items.len - 1].line);
+    try std.testing.expectEqual(25, tokens.items[tokens.items.len - 1].pos);
+    try std.testing.expectEqualStrings("", tokens.items[tokens.items.len - 1].contents);
+}
+
+test "lex all lexemes" {
+    const lexemes = [35]Lexeme{ .DollarKey, .Whitespace, .PeriodKey, .Lt, .Gt, .LeftBracket, .RightBracket, .LeftParen, .RightParen, .UnderscoreKey, .Dollar, .Period, .Underscore, .Colon, .If, .Comma, .Else, .SingleQuote, .True, .DoubleQuote, .False, .Add, .And, .Subtract, .Or, .Divide, .Not, .Multiply, .Newline, .Text, .Modulo, .EqualsEquals, .Equals, .Hashtag, .Eof };
+
+    // check every lexeme is accounted for
+    try std.testing.expectEqual(std.meta.fields(Lexeme).len, lexemes.len);
+    comptime {
+        @setEvalBranchQuota(10000);
+        for (std.meta.fields(Lexeme)) |field| {
+            var found = false;
+            for (lexemes) |lex| {
+                if (std.mem.eql(u8, field.name, @tagName(lex))) {
+                    found = true;
+                    break;
+                }
+            }
+            if (!found) {
+                @compileError("Missing lexeme field in test: " ++ field.name);
+            }
+        }
+    }
+
+    // check parsed count matches
+    const tokens = try CyoParser.lexCyoContent(std.testing.allocator,
+        \\$foo .foo<>[]()_foo$._:if,else'true"false+and-or/not*
+        \\foobar%===#
+    );
+    defer tokens.deinit();
+    try std.testing.expectEqual(lexemes.len, tokens.items.len);
+
+    for (0.., lexemes) |i, lex| {
+        try std.testing.expectEqual(lex, tokens.items[i].lexeme);
+    }
+}
+
+test "parse ast" {
+    const tokens = try CyoParser.lexCyoContent(std.testing.allocator,
+        \\SpaceShip:
+        \\  poweredOn=false
+        \\  _onExamine:
+        \\    $self.poweredOn=true
+    );
+    defer tokens.deinit();
+
+    const ast = try CyoParser.createAstFromTokens(std.testing.allocator, tokens, null);
+
+    var attributes = std.StringHashMap(AttributeValue).init(std.testing.allocator);
+    try attributes.put("poweredOn", AttributeValue{ .Bool = false });
+
+    var callbacks = std.StringHashMap(AstCallback).init(std.testing.allocator);
+    var expression_target = try std.testing.allocator.create(Expression);
+    var expression_value = try std.testing.allocator.create(Expression);
+    defer {
+        std.testing.allocator.destroy(expression_target);
+        std.testing.allocator.destroy(expression_value);
+    }
+
+    expression_target.PropertyAccess = PropertyAccess{ .object = "self", .property = "poweredOn" };
+    expression_value.BooleanLiteral = true;
+    var statements = [1]Statement{
+        Statement{
+            .Assignment = AssignmentStatement{ .target = expression_target, .value = expression_value },
+        },
+    };
+    try callbacks.put("onExamine", AstCallback{
+        .statements = statements[0..],
+    });
+
+    try std.testing.expectEqual(AstEntity{ .identifier = "SpaceShip", .type = EntityType.location, .attributes = attributes, .callbacks = callbacks }, ast.entities.items[0]);
+}