src
compile.cc
Go to the documentation of this file.
1 #include <algorithm>
2 #include <ostream>
3 #include <set>
4 
5 #include "src/codegen/output.h"
6 #include "src/ir/compile.h"
7 #include "src/ir/adfa/adfa.h"
8 #include "src/ir/dfa/dfa.h"
9 #include "src/ir/nfa/nfa.h"
10 #include "src/ir/regexp/regexp.h"
12 #include "src/parse/spec.h"
13 
14 namespace re2c {
15 
16 static std::string make_name(const std::string &cond, uint32_t line)
17 {
18  std::ostringstream os;
19  os << "line" << line;
20  std::string name = os.str();
21  if (!cond.empty ())
22  {
23  name += "_";
24  name += cond;
25  }
26  return name;
27 }
28 
29 smart_ptr<DFA> compile (Spec & spec, Output & output, const std::string & cond, uint32_t cunits)
30 {
31  const uint32_t line = output.source.get_block_line();
32  const std::string name = make_name(cond, line);
33 
34  // The original set of code units (charset) might be very large.
35  // A common trick it is to split charset into disjoint character ranges
36  // and choose a representative of each range (we choose lower bound).
37  // The set of all representatives is the new (compacted) charset.
38  // Don't forget to include zero and upper bound, even if they
39  // do not explicitely apper in ranges.
40  std::set<uint32_t> bounds;
41  spec.re->split(bounds);
42  bounds.insert(0);
43  bounds.insert(cunits);
44  charset_t cs;
45  for (std::set<uint32_t>::const_iterator i = bounds.begin(); i != bounds.end(); ++i)
46  {
47  cs.push_back(*i);
48  }
49 
50  nfa_t nfa(spec.re);
51 
52  dfa_t dfa(nfa, cs, spec.rules);
53 
54  // skeleton must be constructed after DFA construction
55  // but prior to any other DFA transformations
56  Skeleton *skeleton = new Skeleton(dfa, cs, spec.rules, name, cond, line);
57 
58  minimization(dfa);
59 
60  // find YYFILL states and calculate argument to YYFILL
61  std::vector<size_t> fill;
62  fillpoints(dfa, fill);
63 
64  // ADFA stands for 'DFA with actions'
65  DFA *adfa = new DFA(dfa, fill, skeleton, cs, name, cond, line);
66 
67  /*
68  * note [reordering DFA states]
69  *
70  * re2c-generated code depends on the order of states in DFA: simply
71  * flipping two states may change the output significantly.
72  * The order of states is affected by many factors, e.g.:
73  * - flipping left and right subtrees of alternative when constructing
74  * AST (also applies to iteration and counted repetition)
75  * - changing the order in which graph nodes are visited (applies to
76  * any intermediate representation: bytecode, NFA, DFA, etc.)
77  *
78  * To make the resulting code independent of such changes, we hereby
79  * reorder DFA states. The ordering scheme is very simple:
80  *
81  * Starting with DFA root, walk DFA nodes in breadth-first order.
82  * Child nodes are ordered accoding to the (alphabetically) first symbol
83  * leading to each node. Each node must be visited exactly once.
84  * Default state (NULL) is always the last state.
85  */
86  adfa->reorder();
87 
88  // skeleton is constructed, do further DFA transformations
89  adfa->prepare();
90 
91  // finally gather overall DFA statistics
92  adfa->calc_stats();
93 
94  // accumulate global statistics from this particular DFA
95  output.max_fill = std::max (output.max_fill, adfa->max_fill);
96  if (adfa->need_accept)
97  {
98  output.source.set_used_yyaccept ();
99  }
100 
101  return make_smart_ptr(adfa);
102 }
103 
104 } // namespace re2c
size_t max_fill
Definition: output.h:140
size_t max_fill
Definition: adfa.h:69
std::vector< uint32_t > charset_t
Definition: regexp.h:16
smart_ptr< T > make_smart_ptr(T *p)
Definition: smart_ptr.h:63
void calc_stats()
Definition: prepare.cc:240
void set_used_yyaccept()
Definition: output.cc:249
OutputFile source
Definition: output.h:136
smart_ptr< DFA > compile(Spec &spec, Output &output, const std::string &cond, uint32_t cunits)
Definition: compile.cc:29
rules_t rules
Definition: spec.h:13
virtual void split(std::set< uint32_t > &)=0
void fillpoints(const dfa_t &dfa, std::vector< size_t > &fill)
Definition: fillpoints.cc:126
void reorder()
Definition: adfa.cc:92
static std::string make_name(const std::string &cond, uint32_t line)
Definition: compile.cc:16
void prepare()
Definition: prepare.cc:140
Definition: adfa.h:53
RegExp * re
Definition: spec.h:12
uint32_t get_block_line() const
Definition: output.cc:279
Definition: bitmap.cc:10
void minimization(dfa_t &dfa)
bool need_accept
Definition: adfa.h:72