src
utf16_regexp.cc
Go to the documentation of this file.
1 #include "src/util/c99_stdint.h"
2 
8 #include "src/util/range.h"
9 
10 namespace re2c {
11 
13 {
14  if (r <= utf16::MAX_1WORD_RUNE)
15  return new MatchOp(Range::sym (r));
16  else
17  {
18  const uint32_t ld = utf16::lead_surr(r);
19  const uint32_t tr = utf16::trail_surr(r);
20  return new CatOp(new MatchOp(Range::sym (ld)), new MatchOp(Range::sym (tr)));
21  }
22 }
23 
24 /*
25  * Split Unicode character class {[l1, h1), ..., [lN, hN)} into
26  * ranges [l1, h1-1], ..., [lN, hN-1] and return alternation of
27  * them. We store partially built range in suffix tree, which
28  * allows to eliminate common suffixes while building.
29  */
30 RegExp * UTF16Range(const Range * r)
31 {
32  RangeSuffix * root = NULL;
33  for (; r != NULL; r = r->next ())
34  UTF16splitByRuneLength(root, r->lower (), r->upper () - 1);
35  return to_regexp (root);
36 }
37 
38 } // namespace re2c
static uint32_t trail_surr(rune r)
Definition: utf16.h:30
uint32_t lower() const
Definition: range.h:40
uint32_t rune
Definition: utf16.h:11
RegExp * to_regexp(RangeSuffix *p)
Definition: range_suffix.cc:12
void UTF16splitByRuneLength(RangeSuffix *&root, utf16::rune l, utf16::rune h)
Definition: utf16_range.cc:120
static Range * sym(uint32_t c)
Definition: range.h:27
static uint32_t lead_surr(rune r)
Definition: utf16.h:25
uint32_t upper() const
Definition: range.h:41
RegExp * UTF16Range(const Range *r)
Definition: utf16_regexp.cc:30
static const uint32_t MAX_1WORD_RUNE
Definition: utf16.h:13
Definition: bitmap.cc:10
RegExp * UTF16Symbol(utf16::rune r)
Definition: utf16_regexp.cc:12
Range * next() const
Definition: range.h:39