summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/source.cc1
-rw-r--r--src/token.cc1
-rw-r--r--src/tokenizer.cc16
-rw-r--r--src/utf8.cc2
-rw-r--r--src/voidc.cc6
5 files changed, 18 insertions, 8 deletions
diff --git a/src/source.cc b/src/source.cc
index c77c089..a393213 100644
--- a/src/source.cc
+++ b/src/source.cc
@@ -10,6 +10,7 @@ struct Span {
String file;
usize start, end;
+ Span() : file(), start(0), end(0) {}
Span(String file, usize start, usize end) : file(file), start(start), end(end) {}
};
diff --git a/src/token.cc b/src/token.cc
index e53abfe..7f38f71 100644
--- a/src/token.cc
+++ b/src/token.cc
@@ -28,6 +28,7 @@ struct Token {
String text;
Span span;
+ Token() : kind(Token_Kind_Eof), text(), span() {}
Token(Token_Kind kind, String text, Span span) : kind(kind), text(text), span(span) {}
};
diff --git a/src/tokenizer.cc b/src/tokenizer.cc
index 54634de..274f572 100644
--- a/src/tokenizer.cc
+++ b/src/tokenizer.cc
@@ -28,14 +28,12 @@ static inline const Buffer* tokenizer_get_buffer(Tokenizer* tokenizer) {
static inline char tokenizer_advance(const Tokenizer* tokenizer, usize* offset) {
const String text = tokenizer->buffer->content;
- const unsigned char* c = text[*offset];
- wchar wc = (wchar)*c;
-
- u8 nobytes = utf8_nobytes(wc);
- if(nobytes > 1) panic("no support for multi-byte chars: %d:%d", wc, nobytes);
+ unsigned char c = *text[*offset];
+ u8 nobytes = utf8_nobytes(c);
+ if(nobytes > 1) panic("no support for multi-byte chars: %c:%d", c, nobytes);
offset += nobytes;
- return (char)wc;
+ return c;
}
bool tokenizer_next(Tokenizer* tokenizer, Token* out) {
@@ -52,7 +50,11 @@ again:
goto again;
}
- return false;
+ tokenizer_advance(tokenizer, &offset);
+
+ const unsigned char* str = buffer->content[offset];
+ *out = Token(Token_Kind_Eof, String(str, 1), Span(buffer->file, 0, 0));
+ return true;
}
#endif
diff --git a/src/utf8.cc b/src/utf8.cc
index 5f3a57a..7360812 100644
--- a/src/utf8.cc
+++ b/src/utf8.cc
@@ -14,7 +14,7 @@ typedef i32 wchar;
#define UTF8_4SHIFT 3
#define UTF8_4BYTE 0x1E
-inline u8 utf8_nobytes(wchar c) {
+inline u8 utf8_nobytes(unsigned char c) {
if(c < UTF8_1BYTE) return 1;
if((c >> UTF8_2SHIFT) == UTF8_2BYTE) return 2;
if((c >> UTF8_3SHIFT) == UTF8_3BYTE) return 3;
diff --git a/src/voidc.cc b/src/voidc.cc
index e6c76e8..bba510f 100644
--- a/src/voidc.cc
+++ b/src/voidc.cc
@@ -1,3 +1,4 @@
+#include <cstdio>
#include <cstdlib>
#include "common.cc"
@@ -30,4 +31,9 @@ int main() {
Tokenizer tokenizer(&stack);
+ Token token = {};
+ while(tokenizer_next(&tokenizer, &token))
+ fprintf(stdout, "Token(kind: %d, text: %.*s)\n", token.kind, (int)token.text.length, token.text.ptr);
+
+
}