src
Public Types | Static Public Member Functions | Static Public Attributes | List of all members
re2c::utf8 Class Reference

#include <utf8.h>

Public Types

enum  { MAX_RUNE_LENGTH = 4u }
 
typedef uint32_t rune
 

Static Public Member Functions

static uint32_t rune_to_bytes (uint32_t *s, rune r)
 
static uint32_t rune_length (rune r)
 
static rune max_rune (uint32_t i)
 

Static Public Attributes

static const uint32_t ERROR = 0xFFFDu
 
static const rune MAX_1BYTE_RUNE = 0x7Fu
 
static const rune MAX_2BYTE_RUNE = 0x7FFu
 
static const rune MAX_3BYTE_RUNE = 0xFFFFu
 
static const rune MAX_4BYTE_RUNE = 0x10FFFFu
 
static const rune MAX_RUNE = utf8::MAX_4BYTE_RUNE
 
static const uint32_t PREFIX_1BYTE = 0u
 
static const uint32_t INFIX = 0x80u
 
static const uint32_t PREFIX_2BYTE = 0xC0u
 
static const uint32_t PREFIX_3BYTE = 0xE0u
 
static const uint32_t PREFIX_4BYTE = 0xF0u
 
static const uint32_t SHIFT = 6u
 
static const uint32_t MASK = 0x3Fu
 

Detailed Description

Definition at line 8 of file utf8.h.

Member Typedef Documentation

typedef uint32_t re2c::utf8::rune

Definition at line 11 of file utf8.h.

Member Enumeration Documentation

anonymous enum
Enumerator
MAX_RUNE_LENGTH 

Definition at line 15 of file utf8.h.

Member Function Documentation

utf8::rune re2c::utf8::max_rune ( uint32_t  i)
static

Definition at line 72 of file utf8.cc.

73 {
74  switch (i)
75  {
76  case 1: return MAX_1BYTE_RUNE;
77  case 2: return MAX_2BYTE_RUNE;
78  case 3: return MAX_3BYTE_RUNE;
79  case 4: return MAX_4BYTE_RUNE;
80  default: return ERROR;
81  }
82 }
static const rune MAX_3BYTE_RUNE
Definition: utf8.h:23
static const rune MAX_2BYTE_RUNE
Definition: utf8.h:22
static const rune MAX_4BYTE_RUNE
Definition: utf8.h:24
static const rune MAX_1BYTE_RUNE
Definition: utf8.h:21
static const uint32_t ERROR
Definition: utf8.h:18

Here is the caller graph for this function:

uint32_t re2c::utf8::rune_length ( rune  r)
static

Definition at line 64 of file utf8.cc.

65 {
66  if (r <= MAX_2BYTE_RUNE)
67  return r <= MAX_1BYTE_RUNE ? 1 : 2;
68  else
69  return r <= MAX_3BYTE_RUNE ? 3 : 4;
70 }
static const rune MAX_3BYTE_RUNE
Definition: utf8.h:23
static const rune MAX_2BYTE_RUNE
Definition: utf8.h:22
static const rune MAX_1BYTE_RUNE
Definition: utf8.h:21

Here is the caller graph for this function:

uint32_t re2c::utf8::rune_to_bytes ( uint32_t *  s,
rune  r 
)
static

Definition at line 22 of file utf8.cc.

23 {
24  // one byte sequence: 0-0x7F => 0xxxxxxx
25  if (c <= MAX_1BYTE_RUNE)
26  {
27  str[0] = PREFIX_1BYTE | c;
28  return 1;
29  }
30 
31  // two byte sequence: 0x80-0x7FF => 110xxxxx 10xxxxxx
32  if (c <= MAX_2BYTE_RUNE)
33  {
34  str[0] = PREFIX_2BYTE | (c >> 1*SHIFT);
35  str[1] = INFIX | (c & MASK);
36  return 2;
37  }
38 
39  // If the Rune is out of range, convert it to the error rune.
40  // Do this test here because the error rune encodes to three bytes.
41  // Doing it earlier would duplicate work, since an out of range
42  // Rune wouldn't have fit in one or two bytes.
43  if (c > MAX_RUNE)
44  c = ERROR;
45 
46  // three byte sequence: 0x800 - 0xFFFF => 1110xxxx 10xxxxxx 10xxxxxx
47  if (c <= MAX_3BYTE_RUNE)
48  {
49  str[0] = PREFIX_3BYTE | (c >> 2*SHIFT);
50  str[1] = INFIX | ((c >> 1*SHIFT) & MASK);
51  str[2] = INFIX | (c & MASK);
52  return 3;
53  }
54 
55  // four byte sequence (21-bit value):
56  // 0x10000 - 0x1FFFFF => 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
57  str[0] = PREFIX_4BYTE | (c >> 3*SHIFT);
58  str[1] = INFIX | ((c >> 2*SHIFT) & MASK);
59  str[2] = INFIX | ((c >> 1*SHIFT) & MASK);
60  str[3] = INFIX | (c & MASK);
61  return 4;
62 }
static const rune MAX_3BYTE_RUNE
Definition: utf8.h:23
static const uint32_t PREFIX_2BYTE
Definition: utf8.h:29
static const uint32_t SHIFT
Definition: utf8.h:33
static const rune MAX_2BYTE_RUNE
Definition: utf8.h:22
static const uint32_t MASK
Definition: utf8.h:34
static const uint32_t PREFIX_1BYTE
Definition: utf8.h:27
static const rune MAX_1BYTE_RUNE
Definition: utf8.h:21
static const uint32_t INFIX
Definition: utf8.h:28
static const uint32_t PREFIX_4BYTE
Definition: utf8.h:31
static const uint32_t ERROR
Definition: utf8.h:18
static const uint32_t PREFIX_3BYTE
Definition: utf8.h:30
static const rune MAX_RUNE
Definition: utf8.h:25

Here is the caller graph for this function:

Member Data Documentation

const uint32_t re2c::utf8::ERROR = 0xFFFDu
static

Definition at line 18 of file utf8.h.

const uint32_t re2c::utf8::INFIX = 0x80u
static

Definition at line 28 of file utf8.h.

const uint32_t re2c::utf8::MASK = 0x3Fu
static

Definition at line 34 of file utf8.h.

const utf8::rune re2c::utf8::MAX_1BYTE_RUNE = 0x7Fu
static

Definition at line 21 of file utf8.h.

const utf8::rune re2c::utf8::MAX_2BYTE_RUNE = 0x7FFu
static

Definition at line 22 of file utf8.h.

const utf8::rune re2c::utf8::MAX_3BYTE_RUNE = 0xFFFFu
static

Definition at line 23 of file utf8.h.

const utf8::rune re2c::utf8::MAX_4BYTE_RUNE = 0x10FFFFu
static

Definition at line 24 of file utf8.h.

const utf8::rune re2c::utf8::MAX_RUNE = utf8::MAX_4BYTE_RUNE
static

Definition at line 25 of file utf8.h.

const uint32_t re2c::utf8::PREFIX_1BYTE = 0u
static

Definition at line 27 of file utf8.h.

const uint32_t re2c::utf8::PREFIX_2BYTE = 0xC0u
static

Definition at line 29 of file utf8.h.

const uint32_t re2c::utf8::PREFIX_3BYTE = 0xE0u
static

Definition at line 30 of file utf8.h.

const uint32_t re2c::utf8::PREFIX_4BYTE = 0xF0u
static

Definition at line 31 of file utf8.h.

const uint32_t re2c::utf8::SHIFT = 6u
static

Definition at line 33 of file utf8.h.


The documentation for this class was generated from the following files: