#ifndef TOKENIZER_CC #define TOKENIZER_CC #include "common.cc" #include "source.cc" #include "utf8.cc" #include "token.cc" struct Tokenizer { const Buffer* buffer; Buffer_Stack* stack; Tokenizer(Buffer_Stack* stack) : buffer(nullptr), stack(stack) {} }; static inline const Buffer* tokenizer_get_buffer(Tokenizer* tokenizer) { assert_neq(tokenizer, nullptr); if(tokenizer->buffer != nullptr) return tokenizer->buffer; Buffer* buffer = nullptr; if(!buffer_stack_pop(tokenizer->stack, &buffer)) return nullptr; tokenizer->buffer = buffer; return buffer; } static inline char tokenizer_advance(const Tokenizer* tokenizer, usize* offset) { const String text = tokenizer->buffer->content; const unsigned char* c = text[*offset]; wchar wc = (wchar)*c; u8 nobytes = utf8_nobytes(wc); if(nobytes > 1) panic("no support for multi-byte chars: %d:%d", wc, nobytes); offset += nobytes; return (char)wc; } bool tokenizer_next(Tokenizer* tokenizer, Token* out) { assert_neq(tokenizer, nullptr); assert_neq(out, nullptr); again: const Buffer* buffer = tokenizer_get_buffer(tokenizer); if(buffer == nullptr) return false; usize offset = buffer->cursor; if(offset == buffer->content.length) { tokenizer->buffer = nullptr; goto again; } return false; } #endif