summaryrefslogtreecommitdiff
path: root/src/utf8.cc
blob: ace1ec06c313d3c1845aa886d89e4479e0e921f7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#ifndef UTF8_CC
#define UTF8_CC

typedef i32 wchar;

#define UTF8_1BYTE 0x80

#define UTF8_2SHIFT 5
#define UTF8_2BYTE 0x6

#define UTF8_3SHIFT 4
#define UTF8_3BYTE 0xE

#define UTF8_4SHIFT 3
#define UTF8_4BYTE 0x1E

inline u8 utf8_nobytes(unsigned char c) {
  if (c < UTF8_1BYTE) return 1;
  if ((c >> UTF8_2SHIFT) == UTF8_2BYTE) return 2;
  if ((c >> UTF8_3SHIFT) == UTF8_3BYTE) return 3;
  if ((c >> UTF8_4SHIFT) == UTF8_4BYTE) return 4;
  panic("what even is: %d\n", c);
}

inline bool utf8_is_identifier(wchar c) {
  return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z');
}

inline bool utf8_is_number(wchar c) { return ('0' <= c && c <= '9'); }

inline bool utf8_is_alnum(wchar c) {
  return utf8_is_identifier(c) || utf8_is_number(c);
}

#endif