src
utf8.h
Go to the documentation of this file.
1 #ifndef _RE2C_IR_REGEXP_ENCODING_UTF8_UTF8_
2 #define _RE2C_IR_REGEXP_ENCODING_UTF8_UTF8_
3 
4 #include "src/util/c99_stdint.h"
5 
6 namespace re2c {
7 
8 class utf8
9 {
10 public:
11  typedef uint32_t rune;
12 
13  // maximum characters per rune
14  // enum instead of static const member because of [-Wvla]
15  enum { MAX_RUNE_LENGTH = 4u };
16 
17  // decoding error
18  static const uint32_t ERROR;
19 
20  // maximal runes for each rune length
21  static const rune MAX_1BYTE_RUNE;
22  static const rune MAX_2BYTE_RUNE;
23  static const rune MAX_3BYTE_RUNE;
24  static const rune MAX_4BYTE_RUNE;
25  static const rune MAX_RUNE;
26 
27  static const uint32_t PREFIX_1BYTE;
28  static const uint32_t INFIX;
29  static const uint32_t PREFIX_2BYTE;
30  static const uint32_t PREFIX_3BYTE;
31  static const uint32_t PREFIX_4BYTE;
32 
33  static const uint32_t SHIFT;
34  static const uint32_t MASK;
35 
36  // UTF-8 bytestring for given Unicode rune
37  static uint32_t rune_to_bytes(uint32_t * s, rune r);
38 
39  // length of UTF-8 bytestring for given Unicode rune
40  static uint32_t rune_length(rune r);
41 
42  // maximal Unicode rune with given length of UTF-8 bytestring
43  static rune max_rune(uint32_t i);
44 };
45 
46 } // namespace re2c
47 
48 #endif // _RE2C_IR_REGEXP_ENCODING_UTF8_UTF8_
static const rune MAX_3BYTE_RUNE
Definition: utf8.h:23
static const uint32_t PREFIX_2BYTE
Definition: utf8.h:29
static const uint32_t SHIFT
Definition: utf8.h:33
uint32_t rune
Definition: utf8.h:11
static rune max_rune(uint32_t i)
Definition: utf8.cc:72
static const rune MAX_2BYTE_RUNE
Definition: utf8.h:22
static const uint32_t MASK
Definition: utf8.h:34
static const uint32_t PREFIX_1BYTE
Definition: utf8.h:27
static const rune MAX_4BYTE_RUNE
Definition: utf8.h:24
static const rune MAX_1BYTE_RUNE
Definition: utf8.h:21
static const uint32_t INFIX
Definition: utf8.h:28
Definition: utf8.h:8
static const uint32_t PREFIX_4BYTE
Definition: utf8.h:31
static const uint32_t ERROR
Definition: utf8.h:18
static uint32_t rune_to_bytes(uint32_t *s, rune r)
Definition: utf8.cc:22
static uint32_t rune_length(rune r)
Definition: utf8.cc:64
static const uint32_t PREFIX_3BYTE
Definition: utf8.h:30
Definition: bitmap.cc:10
static const rune MAX_RUNE
Definition: utf8.h:25