#ifndef UTF8_CC #define UTF8_CC typedef i32 wchar; #define UTF8_1BYTE 0x80 #define UTF8_2SHIFT 5 #define UTF8_2BYTE 0x6 #define UTF8_3SHIFT 4 #define UTF8_3BYTE 0xE #define UTF8_4SHIFT 3 #define UTF8_4BYTE 0x1E inline u8 utf8_nobytes(unsigned char c) { if (c < UTF8_1BYTE) return 1; if ((c >> UTF8_2SHIFT) == UTF8_2BYTE) return 2; if ((c >> UTF8_3SHIFT) == UTF8_3BYTE) return 3; if ((c >> UTF8_4SHIFT) == UTF8_4BYTE) return 4; panic("what even is: %d\n", c); } inline bool utf8_is_identifier(wchar c) { return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z'); } inline bool utf8_is_number(wchar c) { return ('0' <= c && c <= '9'); } inline bool utf8_is_alnum(wchar c) { return utf8_is_identifier(c) || utf8_is_number(c); } #endif