diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/source.cc | 1 | ||||
| -rw-r--r-- | src/token.cc | 1 | ||||
| -rw-r--r-- | src/tokenizer.cc | 16 | ||||
| -rw-r--r-- | src/utf8.cc | 2 | ||||
| -rw-r--r-- | src/voidc.cc | 6 |
5 files changed, 18 insertions, 8 deletions
diff --git a/src/source.cc b/src/source.cc index c77c089..a393213 100644 --- a/src/source.cc +++ b/src/source.cc @@ -10,6 +10,7 @@ struct Span { String file; usize start, end; + Span() : file(), start(0), end(0) {} Span(String file, usize start, usize end) : file(file), start(start), end(end) {} }; diff --git a/src/token.cc b/src/token.cc index e53abfe..7f38f71 100644 --- a/src/token.cc +++ b/src/token.cc @@ -28,6 +28,7 @@ struct Token { String text; Span span; + Token() : kind(Token_Kind_Eof), text(), span() {} Token(Token_Kind kind, String text, Span span) : kind(kind), text(text), span(span) {} }; diff --git a/src/tokenizer.cc b/src/tokenizer.cc index 54634de..274f572 100644 --- a/src/tokenizer.cc +++ b/src/tokenizer.cc @@ -28,14 +28,12 @@ static inline const Buffer* tokenizer_get_buffer(Tokenizer* tokenizer) { static inline char tokenizer_advance(const Tokenizer* tokenizer, usize* offset) { const String text = tokenizer->buffer->content; - const unsigned char* c = text[*offset]; - wchar wc = (wchar)*c; - - u8 nobytes = utf8_nobytes(wc); - if(nobytes > 1) panic("no support for multi-byte chars: %d:%d", wc, nobytes); + unsigned char c = *text[*offset]; + u8 nobytes = utf8_nobytes(c); + if(nobytes > 1) panic("no support for multi-byte chars: %c:%d", c, nobytes); offset += nobytes; - return (char)wc; + return c; } bool tokenizer_next(Tokenizer* tokenizer, Token* out) { @@ -52,7 +50,11 @@ again: goto again; } - return false; + tokenizer_advance(tokenizer, &offset); + + const unsigned char* str = buffer->content[offset]; + *out = Token(Token_Kind_Eof, String(str, 1), Span(buffer->file, 0, 0)); + return true; } #endif diff --git a/src/utf8.cc b/src/utf8.cc index 5f3a57a..7360812 100644 --- a/src/utf8.cc +++ b/src/utf8.cc @@ -14,7 +14,7 @@ typedef i32 wchar; #define UTF8_4SHIFT 3 #define UTF8_4BYTE 0x1E -inline u8 utf8_nobytes(wchar c) { +inline u8 utf8_nobytes(unsigned char c) { if(c < UTF8_1BYTE) return 1; if((c >> UTF8_2SHIFT) == UTF8_2BYTE) return 2; if((c >> UTF8_3SHIFT) == UTF8_3BYTE) return 3; diff --git a/src/voidc.cc b/src/voidc.cc index e6c76e8..bba510f 100644 --- a/src/voidc.cc +++ b/src/voidc.cc @@ -1,3 +1,4 @@ +#include <cstdio> #include <cstdlib> #include "common.cc" @@ -30,4 +31,9 @@ int main() { Tokenizer tokenizer(&stack); + Token token = {}; + while(tokenizer_next(&tokenizer, &token)) + fprintf(stdout, "Token(kind: %d, text: %.*s)\n", token.kind, (int)token.text.length, token.text.ptr); + + } |
