From 010b3e2e1eb3870724bbde6de7a0929b20bf2f75 Mon Sep 17 00:00:00 2001 From: Fabrice Date: Tue, 3 Mar 2026 07:57:50 +0100 Subject: working on utf8 handling and lexing --- src/utf8.cc | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 src/utf8.cc (limited to 'src/utf8.cc') diff --git a/src/utf8.cc b/src/utf8.cc new file mode 100644 index 0000000..5f3a57a --- /dev/null +++ b/src/utf8.cc @@ -0,0 +1,25 @@ +#ifndef UTF8_CC +#define UTF8_CC + +typedef i32 wchar; + +#define UTF8_1BYTE 0x80 + +#define UTF8_2SHIFT 5 +#define UTF8_2BYTE 0x6 + +#define UTF8_3SHIFT 4 +#define UTF8_3BYTE 0xE + +#define UTF8_4SHIFT 3 +#define UTF8_4BYTE 0x1E + +inline u8 utf8_nobytes(wchar c) { + if(c < UTF8_1BYTE) return 1; + if((c >> UTF8_2SHIFT) == UTF8_2BYTE) return 2; + if((c >> UTF8_3SHIFT) == UTF8_3BYTE) return 3; + if((c >> UTF8_4SHIFT) == UTF8_4BYTE) return 4; + panic("what even is: %d\n", c); +} + +#endif -- cgit v1.2.3