summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFabrice <fabrice@schaub-dev.xyz>2026-03-03 12:39:25 +0100
committerFabrice <fabrice@schaub-dev.xyz>2026-03-03 12:39:25 +0100
commit448b3baa76b6bd25e972e1134941cb649b6a91d0 (patch)
tree89d6c43cab9b09ec1cfb20abb19d04fd5b5c1fcb
parentbeeffb374b66bb36d16628b8596782eb98107ef7 (diff)
actually lexingHEADmaster
-rw-r--r--src/token.cc3
-rw-r--r--src/tokenizer.cc18
2 files changed, 11 insertions, 10 deletions
diff --git a/src/token.cc b/src/token.cc
index ed1f65b..d3a787a 100644
--- a/src/token.cc
+++ b/src/token.cc
@@ -7,7 +7,6 @@
#include "source.cc"
#define TOKEN_KINDS_NOLEX \
- X(Eof) \
X(Invalid_Char) \
X(Invalid_Literal)
@@ -30,7 +29,7 @@ struct Token {
String text;
Span span;
- Token() : kind(Token_Kind_Eof), text(), span() {}
+ Token() : kind(Token_Kind_Invalid_Char), text(), span() {}
Token(Token_Kind kind, String text, Span span)
: kind(kind), text(text), span(span) {}
};
diff --git a/src/tokenizer.cc b/src/tokenizer.cc
index f2efda4..26763eb 100644
--- a/src/tokenizer.cc
+++ b/src/tokenizer.cc
@@ -47,7 +47,7 @@ static Buffer* tokenizer_get_buffer(Tokenizer* tokenizer, usize* cursor) {
Buffer* curr = tokenizer->buffer;
if (likely(curr != nullptr)) {
*cursor = curr->cursor;
- if(*cursor < curr->content.length) return curr;
+ if (*cursor < curr->content.length) return curr;
}
if (!buffer_stack_pop(tokenizer->stack, &curr)) return nullptr;
@@ -57,8 +57,7 @@ static Buffer* tokenizer_get_buffer(Tokenizer* tokenizer, usize* cursor) {
return curr;
}
-static char tokenizer_advance(const Tokenizer* tokenizer,
- usize* offset) {
+static wchar tokenizer_advance(const Tokenizer* tokenizer, usize* offset) {
const String text = tokenizer->buffer->content;
unsigned char c = *text[*offset];
@@ -66,7 +65,7 @@ static char tokenizer_advance(const Tokenizer* tokenizer,
if (nobytes > 1) panic("no support for multi-byte chars: %c:%d", c, nobytes);
*offset += nobytes;
- return c;
+ return (wchar)c;
}
bool tokenizer_next(Tokenizer* tokenizer, Token* out) {
@@ -76,13 +75,16 @@ bool tokenizer_next(Tokenizer* tokenizer, Token* out) {
usize cursor;
Buffer* buffer = tokenizer_get_buffer(tokenizer, &cursor);
if (buffer == nullptr) return false;
+
usize advance = cursor;
+ wchar c = tokenizer_advance(tokenizer, &advance);
- tokenizer_advance(tokenizer, &advance);
- Token token = {};
- tokenizer_make_token(tokenizer, &token, Token_Kind_Eof, cursor, advance);
+ switch (c) {
+ default:
+ tokenizer_make_token(tokenizer, out, Token_Kind_Invalid_Char, cursor,
+ advance);
+ }
- *out = token;
buffer->cursor = advance;
return true;
}