1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
#ifndef TOKENIZER_CC
#define TOKENIZER_CC
#include "common.cc"
#include "source.cc"
#include "utf8.cc"
#include "token.cc"
struct Tokenizer {
Buffer* buffer;
Buffer_Stack* stack;
Tokenizer(Buffer_Stack* stack) : buffer(nullptr), stack(stack) {}
};
static inline Buffer* tokenizer_get_buffer(Tokenizer* tokenizer) {
assert_neq(tokenizer, nullptr);
if(tokenizer->buffer != nullptr) return tokenizer->buffer;
Buffer* buffer = nullptr;
if(!buffer_stack_pop(tokenizer->stack, &buffer)) return nullptr;
tokenizer->buffer = buffer;
return buffer;
}
static inline char tokenizer_advance(const Tokenizer* tokenizer, usize* offset) {
const String text = tokenizer->buffer->content;
unsigned char c = *text[*offset];
u8 nobytes = utf8_nobytes(c);
if(nobytes > 1) panic("no support for multi-byte chars: %c:%d", c, nobytes);
offset += nobytes;
return c;
}
static inline String tokenizer_make_lexeme(const Tokenizer* tokenizer, usize start, usize end) {
assert_neq(tokenizer, nullptr);
Buffer* buffer = tokenizer->buffer;
return String(buffer->content[start], end - start);
}
bool tokenizer_next(Tokenizer* tokenizer, Token* out) {
assert_neq(tokenizer, nullptr);
assert_neq(out, nullptr);
again:
Buffer* buffer = tokenizer_get_buffer(tokenizer);
if(buffer == nullptr) return false;
usize start = buffer->cursor;
if(start == buffer->content.length) {
tokenizer->buffer = nullptr;
goto again;
}
usize offset = start;
tokenizer_advance(tokenizer, &offset);
String lexeme = tokenizer_make_lexeme(tokenizer, start, offset);
*out = Token(Token_Kind_Eof, lexeme, Span(buffer->file, 0, 0));
buffer->cursor = offset;
return true;
}
#endif
|