#ifndef TOKENIZER_CC #define TOKENIZER_CC #include "common.cc" #include "source.cc" #include "utf8.cc" #include "token.cc" struct Tokenizer { const Buffer* buffer; Buffer_Stack* stack; Tokenizer(Buffer_Stack* stack) : buffer(nullptr), stack(stack) {} }; static inline const Buffer* tokenizer_get_buffer(Tokenizer* tokenizer) { assert_neq(tokenizer, nullptr); if(tokenizer->buffer != nullptr) return tokenizer->buffer; Buffer* buffer = nullptr; if(!buffer_stack_pop(tokenizer->stack, &buffer)) return nullptr; tokenizer->buffer = buffer; return buffer; } static inline char tokenizer_advance(const Tokenizer* tokenizer, usize* offset) { const String text = tokenizer->buffer->content; unsigned char c = *text[*offset]; u8 nobytes = utf8_nobytes(c); if(nobytes > 1) panic("no support for multi-byte chars: %c:%d", c, nobytes); offset += nobytes; return c; } bool tokenizer_next(Tokenizer* tokenizer, Token* out) { assert_neq(tokenizer, nullptr); assert_neq(out, nullptr); again: const Buffer* buffer = tokenizer_get_buffer(tokenizer); if(buffer == nullptr) return false; usize offset = buffer->cursor; if(offset == buffer->content.length) { tokenizer->buffer = nullptr; goto again; } tokenizer_advance(tokenizer, &offset); const unsigned char* str = buffer->content[offset]; *out = Token(Token_Kind_Eof, String(str, 1), Span(buffer->file, 0, 0)); return true; } #endif