summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common.cc1
-rw-r--r--src/source.cc16
-rw-r--r--src/token.cc2
-rw-r--r--src/tokenizer.cc58
-rw-r--r--src/utf8.cc25
-rw-r--r--src/voidc.cc9
6 files changed, 95 insertions, 16 deletions
diff --git a/src/common.cc b/src/common.cc
index 64c462b..de2f6de 100644
--- a/src/common.cc
+++ b/src/common.cc
@@ -122,7 +122,6 @@ static inline void link_remove(Link* item) {
Link* next = item->next;
if (prev != nullptr) prev->next = next;
-
if (next != nullptr) next->prev = prev;
item->prev = item->next = nullptr;
diff --git a/src/source.cc b/src/source.cc
index ff1257b..c77c089 100644
--- a/src/source.cc
+++ b/src/source.cc
@@ -71,24 +71,24 @@ struct Buffer_Stack {
Link* stack;
};
-void buffer_stack_push(Buffer_Stack* manager, Buffer* b) {
- assert_neq(manager, nullptr);
+void buffer_stack_push(Buffer_Stack* stack, Buffer* b) {
+ assert_neq(stack, nullptr);
assert_neq(b, nullptr);
- if (likely(manager->stack != nullptr)) link_after(manager->stack, &b->link);
- manager->stack = &b->link;
+ if (likely(stack->stack != nullptr)) link_after(stack->stack, &b->link);
+ stack->stack = &b->link;
}
-bool buffer_stack_pop(Buffer_Stack* manager, Buffer** b) {
- assert_neq(manager, nullptr);
+bool buffer_stack_pop(Buffer_Stack* stack, Buffer** b) {
+ assert_neq(stack, nullptr);
assert_neq(b, nullptr);
- Link* link = manager->stack;
+ Link* link = stack->stack;
if (unlikely(link == nullptr)) return false;
Link* next = link->prev;
link_remove(link);
- manager->stack = next;
+ stack->stack = next;
Buffer* buffer = containerof(Buffer, link, link);
*b = buffer;
diff --git a/src/token.cc b/src/token.cc
index ea0a4e5..e53abfe 100644
--- a/src/token.cc
+++ b/src/token.cc
@@ -27,6 +27,8 @@ struct Token {
Token_Kind kind;
String text;
Span span;
+
+ Token(Token_Kind kind, String text, Span span) : kind(kind), text(text), span(span) {}
};
#endif
diff --git a/src/tokenizer.cc b/src/tokenizer.cc
new file mode 100644
index 0000000..54634de
--- /dev/null
+++ b/src/tokenizer.cc
@@ -0,0 +1,58 @@
+#ifndef TOKENIZER_CC
+#define TOKENIZER_CC
+
+#include "common.cc"
+#include "source.cc"
+#include "utf8.cc"
+#include "token.cc"
+
+struct Tokenizer {
+ const Buffer* buffer;
+ Buffer_Stack* stack;
+
+ Tokenizer(Buffer_Stack* stack) : buffer(nullptr), stack(stack) {}
+};
+
+static inline const Buffer* tokenizer_get_buffer(Tokenizer* tokenizer) {
+ assert_neq(tokenizer, nullptr);
+
+ if(tokenizer->buffer != nullptr) return tokenizer->buffer;
+
+ Buffer* buffer = nullptr;
+ if(!buffer_stack_pop(tokenizer->stack, &buffer)) return nullptr;
+
+ tokenizer->buffer = buffer;
+ return buffer;
+}
+
+static inline char tokenizer_advance(const Tokenizer* tokenizer, usize* offset) {
+ const String text = tokenizer->buffer->content;
+
+ const unsigned char* c = text[*offset];
+ wchar wc = (wchar)*c;
+
+ u8 nobytes = utf8_nobytes(wc);
+ if(nobytes > 1) panic("no support for multi-byte chars: %d:%d", wc, nobytes);
+
+ offset += nobytes;
+ return (char)wc;
+}
+
+bool tokenizer_next(Tokenizer* tokenizer, Token* out) {
+ assert_neq(tokenizer, nullptr);
+ assert_neq(out, nullptr);
+
+again:
+ const Buffer* buffer = tokenizer_get_buffer(tokenizer);
+ if(buffer == nullptr) return false;
+
+ usize offset = buffer->cursor;
+ if(offset == buffer->content.length) {
+ tokenizer->buffer = nullptr;
+ goto again;
+ }
+
+ return false;
+}
+
+#endif
diff --git a/src/utf8.cc b/src/utf8.cc
new file mode 100644
index 0000000..5f3a57a
--- /dev/null
+++ b/src/utf8.cc
@@ -0,0 +1,25 @@
+#ifndef UTF8_CC
+#define UTF8_CC
+
+typedef i32 wchar;
+
+#define UTF8_1BYTE 0x80
+
+#define UTF8_2SHIFT 5
+#define UTF8_2BYTE 0x6
+
+#define UTF8_3SHIFT 4
+#define UTF8_3BYTE 0xE
+
+#define UTF8_4SHIFT 3
+#define UTF8_4BYTE 0x1E
+
+inline u8 utf8_nobytes(wchar c) {
+ if(c < UTF8_1BYTE) return 1;
+ if((c >> UTF8_2SHIFT) == UTF8_2BYTE) return 2;
+ if((c >> UTF8_3SHIFT) == UTF8_3BYTE) return 3;
+ if((c >> UTF8_4SHIFT) == UTF8_4BYTE) return 4;
+ panic("what even is: %d\n", c);
+}
+
+#endif
diff --git a/src/voidc.cc b/src/voidc.cc
index fbb9577..e6c76e8 100644
--- a/src/voidc.cc
+++ b/src/voidc.cc
@@ -1,9 +1,9 @@
#include <cstdlib>
-#include <cstring>
#include "common.cc"
#include "memory.cc"
#include "source.cc"
+#include "tokenizer.cc"
static const char* SOURCE = R"(
#include <stdlib.h>
@@ -27,12 +27,7 @@ int main() {
if (!ret) return EXIT_FAILURE;
buffer_stack_push(&stack, buffer);
- buffer_stack_push(&stack, buffer);
- int c = 0;
- while(buffer_stack_pop(&stack, &buffer)) {
- c += 1;
- }
+ Tokenizer tokenizer(&stack);
- assert(c == 2);
}