src
skeleton.cc
Go to the documentation of this file.
1 #include <stdlib.h>
2 #include <algorithm>
3 #include <utility>
4 
5 #include "src/codegen/go.h"
6 #include "src/conf/msg.h"
7 #include "src/ir/dfa/dfa.h"
8 #include "src/ir/regexp/regexp.h"
11 
12 namespace re2c
13 {
14 
16  : arcs ()
17  , arcsets ()
18  , loop (0)
19  , rule (rule_rank_t::none (), false)
20  , ctx (false)
21  , dist (DIST_ERROR)
22  , reachable ()
23  , suffix (NULL)
24 {}
25 
26 void Node::init(bool c, RuleOp *r, const std::vector<std::pair<Node*, uint32_t> > &a)
27 {
28  if (r)
29  {
30  rule.rank = r->rank;
31  rule.restorectx = r->ctx->fixedLength () != 0;
32  }
33 
34  ctx = c;
35 
36  uint32_t lb = 0;
37  std::vector<std::pair<Node*, uint32_t> >::const_iterator
38  i = a.begin(),
39  e = a.end();
40  for (; i != e; ++i)
41  {
42  Node *n = i->first;
43  const uint32_t ub = i->second - 1;
44 
45  // pick at most 0x100 unique edges from this range
46  // (for 1-byte code units this covers the whole range: [0 - 0xFF])
47  // - range bounds must be included
48  // - values should be evenly distributed
49  // - values should be deterministic
50  const uint32_t step = 1 + (ub - lb) / 0x100;
51  for (uint32_t c = lb; c < ub; c += step)
52  {
53  arcs[n].push_back (c);
54  }
55  arcs[n].push_back (ub);
56 
57  arcsets[n].push_back (std::make_pair (lb, ub));
58  lb = ub + 1;
59  }
60 }
61 
63 {
64  delete suffix;
65 }
66 
67 bool Node::end () const
68 {
69  return arcs.size () == 0;
70 }
71 
73  ( const dfa_t &dfa
74  , const charset_t &cs
75  , const rules_t &rs
76  , const std::string &dfa_name
77  , const std::string &dfa_cond
78  , uint32_t dfa_line
79  )
80  : name (dfa_name)
81  , cond (dfa_cond)
82  , line (dfa_line)
83  , nodes_count (dfa.states.size())
84  , nodes (new Node [nodes_count + 1]) // +1 for default state
85  , sizeof_key (4)
86  , rules (rs)
87 {
88  const size_t nc = cs.size() - 1;
89 
90  // initialize skeleton nodes
91  Node *nil = &nodes[nodes_count];
92  for (size_t i = 0; i < nodes_count; ++i)
93  {
94  dfa_state_t *s = dfa.states[i];
95  std::vector<std::pair<Node*, uint32_t> > arcs;
96  for (size_t c = 0; c < nc;)
97  {
98  const size_t j = s->arcs[c];
99  for (;++c < nc && s->arcs[c] == j;);
100  Node *to = j == dfa_t::NIL
101  ? nil
102  : &nodes[j];
103  arcs.push_back(std::make_pair(to, cs[c]));
104  }
105  // all arcs go to default node => this node is final, drop arcs
106  if (arcs.size() == 1 && arcs[0].first == nil)
107  {
108  arcs.clear();
109  }
110  nodes[i].init(s->ctx, s->rule, arcs);
111  }
112 
113  // calculate maximal path length, check overflow
114  nodes->calc_dist ();
115  const uint32_t maxlen = nodes->dist;
116  if (maxlen == Node::DIST_MAX)
117  {
118  error ("DFA path %sis too long", incond (cond).c_str ());
119  exit (1);
120  }
121 
122  // calculate maximal rule rank (disregarding default and none rules)
123  uint32_t maxrule = 0;
124  for (uint32_t i = 0; i < nodes_count; ++i)
125  {
126  const rule_rank_t r = nodes[i].rule.rank;
127  if (!r.is_none () && !r.is_def ())
128  {
129  maxrule = std::max (maxrule, r.uint32 ());
130  }
131  }
132  // two upper values reserved for default and none rules)
133  maxrule += 2;
134 
135  // initialize size of key
136  const uint32_t max = std::max (maxlen, maxrule);
137  if (max <= std::numeric_limits<uint8_t>::max())
138  {
139  sizeof_key = 1;
140  }
141  else if (max <= std::numeric_limits<uint16_t>::max())
142  {
143  sizeof_key = 2;
144  }
145 }
146 
148 {
149  delete [] nodes;
150 }
151 
152 uint32_t Skeleton::rule2key (rule_rank_t r) const
153 {
154  switch (sizeof_key)
155  {
156  default: // shouldn't happen
157  case 4: return rule2key<uint32_t> (r);
158  case 2: return rule2key<uint16_t> (r);
159  case 1: return rule2key<uint8_t> (r);
160  }
161 }
162 
163 } // namespace re2c
Node * nodes
Definition: skeleton.h:114
std::vector< uint32_t > charset_t
Definition: regexp.h:16
void error(const char *fmt,...)
Definition: msg.cc:10
rule_t rule
Definition: skeleton.h:78
std::vector< dfa_state_t * > states
Definition: dfa.h:40
Skeleton(const dfa_t &dfa, const charset_t &cs, const rules_t &rs, const std::string &dfa_name, const std::string &dfa_cond, uint32_t dfa_line)
Definition: skeleton.cc:73
static key_t rule2key(rule_rank_t r)
Definition: skeleton.h:160
std::map< rule_rank_t, rule_info_t > rules_t
Definition: rules.h:25
bool restorectx
Definition: path.h:15
arcsets_t arcsets
Definition: skeleton.h:71
bool is_def() const
Definition: rule_rank.cc:42
arcs_t arcs
Definition: skeleton.h:70
void init(bool b, RuleOp *r, const std::vector< std::pair< Node *, uint32_t > > &arcs)
Definition: skeleton.cc:26
virtual uint32_t fixedLength()
Definition: fixed_length.cc:12
RegExp * ctx
Definition: regexp_rule.h:22
rule_rank_t rank
Definition: regexp_rule.h:23
static const size_t NIL
Definition: dfa.h:38
bool end() const
Definition: skeleton.cc:67
uint32_t uint32() const
Definition: rule_rank.cc:63
bool is_none() const
Definition: rule_rank.cc:37
size_t sizeof_key
Definition: skeleton.h:115
bool ctx
Definition: dfa.h:21
size_t * arcs
Definition: dfa.h:19
static const uint32_t DIST_MAX
Definition: skeleton.h:85
Definition: bitmap.cc:10
rule_rank_t rank
Definition: path.h:14
path_t * suffix
Definition: skeleton.h:92
bool ctx
Definition: skeleton.h:81
RuleOp * rule
Definition: dfa.h:20
std::string incond(const std::string &cond)
Definition: msg.cc:242