From 448b3baa76b6bd25e972e1134941cb649b6a91d0 Mon Sep 17 00:00:00 2001 From: Fabrice Date: Tue, 3 Mar 2026 12:39:25 +0100 Subject: actually lexing --- src/token.cc | 3 +-- src/tokenizer.cc | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/token.cc b/src/token.cc index ed1f65b..d3a787a 100644 --- a/src/token.cc +++ b/src/token.cc @@ -7,7 +7,6 @@ #include "source.cc" #define TOKEN_KINDS_NOLEX \ - X(Eof) \ X(Invalid_Char) \ X(Invalid_Literal) @@ -30,7 +29,7 @@ struct Token { String text; Span span; - Token() : kind(Token_Kind_Eof), text(), span() {} + Token() : kind(Token_Kind_Invalid_Char), text(), span() {} Token(Token_Kind kind, String text, Span span) : kind(kind), text(text), span(span) {} }; diff --git a/src/tokenizer.cc b/src/tokenizer.cc index f2efda4..26763eb 100644 --- a/src/tokenizer.cc +++ b/src/tokenizer.cc @@ -47,7 +47,7 @@ static Buffer* tokenizer_get_buffer(Tokenizer* tokenizer, usize* cursor) { Buffer* curr = tokenizer->buffer; if (likely(curr != nullptr)) { *cursor = curr->cursor; - if(*cursor < curr->content.length) return curr; + if (*cursor < curr->content.length) return curr; } if (!buffer_stack_pop(tokenizer->stack, &curr)) return nullptr; @@ -57,8 +57,7 @@ static Buffer* tokenizer_get_buffer(Tokenizer* tokenizer, usize* cursor) { return curr; } -static char tokenizer_advance(const Tokenizer* tokenizer, - usize* offset) { +static wchar tokenizer_advance(const Tokenizer* tokenizer, usize* offset) { const String text = tokenizer->buffer->content; unsigned char c = *text[*offset]; @@ -66,7 +65,7 @@ static char tokenizer_advance(const Tokenizer* tokenizer, if (nobytes > 1) panic("no support for multi-byte chars: %c:%d", c, nobytes); *offset += nobytes; - return c; + return (wchar)c; } bool tokenizer_next(Tokenizer* tokenizer, Token* out) { @@ -76,13 +75,16 @@ bool tokenizer_next(Tokenizer* tokenizer, Token* out) { usize cursor; Buffer* buffer = tokenizer_get_buffer(tokenizer, &cursor); if (buffer == nullptr) return false; + usize advance = cursor; + wchar c = tokenizer_advance(tokenizer, &advance); - tokenizer_advance(tokenizer, &advance); - Token token = {}; - tokenizer_make_token(tokenizer, &token, Token_Kind_Eof, cursor, advance); + switch (c) { + default: + tokenizer_make_token(tokenizer, out, Token_Kind_Invalid_Char, cursor, + advance); + } - *out = token; buffer->cursor = advance; return true; } -- cgit v1.2.3