summaryrefslogtreecommitdiff
path: root/src/utf8.cc
diff options
context:
space:
mode:
authorFabrice <fabrice@schaub-dev.xyz>2026-03-03 07:57:50 +0100
committerFabrice <fabrice@schaub-dev.xyz>2026-03-03 07:57:50 +0100
commit010b3e2e1eb3870724bbde6de7a0929b20bf2f75 (patch)
treea4e51e00669fde113ba38c38dac2d92c8e9a0387 /src/utf8.cc
parentf007cab7e755cb2f0353670765f890caf446d267 (diff)
working on utf8 handling and lexing
Diffstat (limited to 'src/utf8.cc')
-rw-r--r--src/utf8.cc25
1 files changed, 25 insertions, 0 deletions
diff --git a/src/utf8.cc b/src/utf8.cc
new file mode 100644
index 0000000..5f3a57a
--- /dev/null
+++ b/src/utf8.cc
@@ -0,0 +1,25 @@
+#ifndef UTF8_CC
+#define UTF8_CC
+
+typedef i32 wchar;
+
+#define UTF8_1BYTE 0x80
+
+#define UTF8_2SHIFT 5
+#define UTF8_2BYTE 0x6
+
+#define UTF8_3SHIFT 4
+#define UTF8_3BYTE 0xE
+
+#define UTF8_4SHIFT 3
+#define UTF8_4BYTE 0x1E
+
+inline u8 utf8_nobytes(wchar c) {
+ if(c < UTF8_1BYTE) return 1;
+ if((c >> UTF8_2SHIFT) == UTF8_2BYTE) return 2;
+ if((c >> UTF8_3SHIFT) == UTF8_3BYTE) return 3;
+ if((c >> UTF8_4SHIFT) == UTF8_4BYTE) return 4;
+ panic("what even is: %d\n", c);
+}
+
+#endif