src
skeleton.h
Go to the documentation of this file.
1 #ifndef _RE2C_IR_SKELETON_SKELETON_
2 #define _RE2C_IR_SKELETON_SKELETON_
3 
4 #include "src/util/c99_stdint.h"
5 #include <stddef.h>
6 #include <stdio.h>
7 #include <limits>
8 #include <map>
9 #include <set>
10 #include <string>
11 #include <vector>
12 #include <utility>
13 
14 #include "src/ir/regexp/regexp.h"
15 #include "src/ir/rule_rank.h"
16 #include "src/ir/skeleton/path.h"
17 #include "src/ir/skeleton/way.h"
18 #include "src/parse/rules.h"
20 #include "src/util/forbid_copy.h"
21 #include "src/util/u32lim.h"
22 
23 namespace re2c
24 {
25 
26 struct dfa_t;
27 struct OutputFile;
28 class RuleOp;
29 
30 struct Node
31 {
32  /*
33  * note [counting skeleton edges]
34  *
35  * To avoid any possible overflows all size calculations are wrapped in
36  * a special truncated unsigned 32-bit integer type that checks overflow
37  * on each binary operation or conversion from another type.
38  *
39  * Two things contribute to size calculation: path length and the number
40  * of outgoing arcs in each node. Some considerations on why these values
41  * will not overflow before they are converted to truncated type:
42  *
43  * - Maximal number of outgoing arcs in each node cannot exceed 32 bits:
44  * it is bounded by the number of code units in current encoding, and
45  * re2c doesn't support any encoding with more than 2^32 code units.
46  * Conversion is safe.
47  *
48  * - Maximal path length cannot exceed 32 bits: we estimate it right
49  * after skeleton construction and check for overflow. If path length
50  * does overflow, an error is reported and re2c aborts.
51  */
52 
53  // Type for calculating the size of path cover.
54  // Paths are dumped to file as soon as generated and don't eat
55  // heap space. The total size of path cover (measured in edges)
56  // is O(N^2) where N is the number of edges in skeleton.
58 
59  // Type for counting arcs in paths that cause undefined behaviour.
60  // These paths are stored on heap, so the limit should be low.
61  // Most real-world cases have only a few short paths.
62  // We don't need all paths anyway, just some examples.
63  typedef u32lim_t<1024> nakeds_t; // ~1Kb
64 
65  typedef std::map<Node *, path_t::arc_t> arcs_t;
66  typedef std::map<Node *, way_arc_t> arcsets_t;
68 
69  // outgoing arcs
70  arcs_t arcs;
71  arcsets_t arcsets;
72 
73  // how many times this node has been visited
74  // (controls looping in graph traversals)
75  uint8_t loop;
76 
77  // rule for corresponding DFA state (if any)
79 
80  // start of trailing context
81  bool ctx;
82 
83  // maximal distance to end node (assuming one iteration per loop)
84  static const uint32_t DIST_ERROR;
85  static const uint32_t DIST_MAX;
86  uint32_t dist;
87 
88  // rules reachable from this node (including absent rule)
89  std::set<rule_t> reachable;
90 
91  // path to end node (for constructing path cover)
93 
94  Node ();
95  void init(bool b, RuleOp *r, const std::vector<std::pair<Node*, uint32_t> > &arcs);
96  ~Node ();
97  bool end () const;
98  void calc_dist ();
99  void calc_reachable ();
100  template <typename cunit_t, typename key_t>
101  void cover (path_t & prefix, FILE * input, FILE * keys, covers_t &size);
102  void naked_ways (way_t & prefix, std::vector<way_t> & ways, nakeds_t &size);
103 
104  FORBID_COPY (Node);
105 };
106 
107 struct Skeleton
108 {
109  const std::string name;
110  const std::string cond;
111  const uint32_t line;
112 
113  const size_t nodes_count;
115  size_t sizeof_key;
117 
118  Skeleton
119  ( const dfa_t &dfa
120  , const charset_t &cs
121  , const rules_t & rs
122  , const std::string &dfa_name
123  , const std::string &dfa_cond
124  , uint32_t dfa_line
125  );
126  ~Skeleton ();
128  void warn_unreachable_rules ();
129  void warn_match_empty ();
130  void emit_data (const char * fname);
131  static void emit_prolog (OutputFile & o);
132  void emit_start
133  ( OutputFile & o
134  , size_t maxfill
135  , bool backup
136  , bool backupctx
137  , bool accept
138  ) const;
139  void emit_end
140  ( OutputFile & o
141  , bool backup
142  , bool backupctx
143  ) const;
144  static void emit_epilog (OutputFile & o, const std::set<std::string> & names);
145  void emit_action (OutputFile & o, uint32_t ind, rule_rank_t rank) const;
146 
147  template <typename key_t> static key_t rule2key (rule_rank_t r);
148  uint32_t rule2key (rule_rank_t r) const;
149 
150 private:
151  template <typename cunit_t, typename key_t>
152  void generate_paths_cunit_key (FILE * input, FILE * keys);
153  template <typename cunit_t>
154  void generate_paths_cunit (FILE * input, FILE * keys);
155  void generate_paths (FILE * input, FILE * keys);
156 
157  FORBID_COPY (Skeleton);
158 };
159 
160 template<typename key_t> key_t Skeleton::rule2key (rule_rank_t r)
161 {
162  if (r.is_none()) {
163  return std::numeric_limits<key_t>::max();
164  } else if (r.is_def()) {
165  key_t k = std::numeric_limits<key_t>::max();
166  return --k;
167  } else {
168  return static_cast<key_t>(r.uint32());
169  }
170 }
171 
172 } // namespace re2c
173 
174 #endif // _RE2C_IR_SKELETON_SKELETON_
Node * nodes
Definition: skeleton.h:114
void calc_reachable()
Definition: unreachable.cc:16
std::vector< uint32_t > charset_t
Definition: regexp.h:16
void calc_dist()
Definition: maxlen.cc:18
const uint32_t line
Definition: skeleton.h:111
rule_t rule
Definition: skeleton.h:78
Skeleton(const dfa_t &dfa, const charset_t &cs, const rules_t &rs, const std::string &dfa_name, const std::string &dfa_cond, uint32_t dfa_line)
Definition: skeleton.cc:73
static key_t rule2key(rule_rank_t r)
Definition: skeleton.h:160
std::map< rule_rank_t, rule_info_t > rules_t
Definition: rules.h:25
void emit_end(OutputFile &o, bool backup, bool backupctx) const
void cover(path_t &prefix, FILE *input, FILE *keys, covers_t &size)
uint32_t dist
Definition: skeleton.h:86
void emit_start(OutputFile &o, size_t maxfill, bool backup, bool backupctx, bool accept) const
u32lim_t< 1024 > nakeds_t
Definition: skeleton.h:63
std::map< Node *, path_t::arc_t > arcs_t
Definition: skeleton.h:65
FORBID_COPY(Node)
arcsets_t arcsets
Definition: skeleton.h:71
bool is_def() const
Definition: rule_rank.cc:42
void warn_match_empty()
Definition: match_empty.cc:14
arcs_t arcs
Definition: skeleton.h:70
void init(bool b, RuleOp *r, const std::vector< std::pair< Node *, uint32_t > > &arcs)
Definition: skeleton.cc:26
uint8_t loop
Definition: skeleton.h:75
std::map< Node *, way_arc_t > arcsets_t
Definition: skeleton.h:66
const std::string cond
Definition: skeleton.h:110
std::vector< const way_arc_t * > way_t
Definition: way.h:13
static void emit_epilog(OutputFile &o, const std::set< std::string > &names)
std::set< rule_t > reachable
Definition: skeleton.h:89
static const uint32_t DIST_ERROR
Definition: skeleton.h:84
local_increment_t< uint8_t > local_inc
Definition: skeleton.h:67
u32lim_t< 1024 *1024 *1024 > covers_t
Definition: skeleton.h:57
const size_t nodes_count
Definition: skeleton.h:113
bool end() const
Definition: skeleton.cc:67
uint32_t uint32() const
Definition: rule_rank.cc:63
void naked_ways(way_t &prefix, std::vector< way_t > &ways, nakeds_t &size)
Definition: control_flow.cc:19
rules_t rules
Definition: skeleton.h:116
bool is_none() const
Definition: rule_rank.cc:37
void emit_action(OutputFile &o, uint32_t ind, rule_rank_t rank) const
void emit_data(const char *fname)
size_t sizeof_key
Definition: skeleton.h:115
void warn_unreachable_rules()
Definition: unreachable.cc:37
static const uint32_t DIST_MAX
Definition: skeleton.h:85
void warn_undefined_control_flow()
Definition: control_flow.cc:43
static void emit_prolog(OutputFile &o)
Definition: bitmap.cc:10
const std::string name
Definition: skeleton.h:109
path_t * suffix
Definition: skeleton.h:92
bool ctx
Definition: skeleton.h:81