src
generate_data.cc
Go to the documentation of this file.
1 #include "src/util/c99_stdint.h"
2 #include <stddef.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <algorithm>
6 #include <map>
7 #include <string>
8 #include <utility>
9 #include <vector>
10 
11 #include "src/conf/msg.h"
12 #include "src/conf/opt.h"
13 #include "src/globals.h"
15 #include "src/ir/rule_rank.h"
16 #include "src/ir/skeleton/path.h"
18 #include "src/util/u32lim.h"
19 
20 namespace re2c
21 {
22 
23 template <typename cunit_t, typename key_t>
24  static Node::covers_t cover_one (FILE * input, FILE * keys, const path_t & path);
25 
26 /*
27  * note [generating skeleton path cover]
28  *
29  * With --skeleton switch we need to generate lots of data: strings that
30  * correspond to various paths in DFA and match given regular expression.
31  * We try to generate path cover (a set of paths that cover all skeleton
32  * arcs at least once). Generation must stop as soon as the size of path
33  * cover exceeds limit (in which case we'll only get a partial path cover).
34  *
35  * The algorithm walks graph nodes in deep-first order and assigns suffix
36  * to each node (a path from this node to end node). In order to calculate
37  * suffix for a given node the algorithm must know suffix for any child
38  * node (end nodes are assigned empty suffix at start). Suffix is only
39  * calculated once for each node and then reused as much times as the node
40  * is visited. This is what reduces search space.
41  *
42  * The algorithm calculates prefix (multipath to current node). If current
43  * node has already been assigned suffix, the algorithm immediately
44  * calculates path cover from prefix and suffix. Otherwise it recurses to
45  * child nodes (updating prefix on the go).
46  *
47  * The algorithm avoids eternal loops by maintaining loop counter for each
48  * node. Loop counter is incremented on recursive enter and decremented on
49  * recursive return. If loop counter is greater than 1, current branch is
50  * abandoned and recursion returns immediately.
51  *
52  * See also note [counting skeleton edges].
53  *
54  */
55 template <typename cunit_t, typename key_t>
56  void Node::cover (path_t & prefix, FILE * input, FILE * keys, covers_t &size)
57 {
58  if (end () && suffix == NULL)
59  {
60  suffix = new path_t (rule, ctx);
61  }
62  if (suffix != NULL)
63  {
64  prefix.append (suffix);
65  size = size + cover_one<cunit_t, key_t> (input, keys, prefix);
66  }
67  else if (loop < 2)
68  {
69  local_inc _ (loop);
70  for (arcs_t::iterator i = arcs.begin ();
71  i != arcs.end () && !size.overflow(); ++i)
72  {
73  path_t new_prefix = prefix;
74  new_prefix.extend (i->first->rule, i->first->ctx, &i->second);
75  i->first->cover<cunit_t, key_t> (new_prefix, input, keys, size);
76  if (i->first->suffix != NULL && suffix == NULL)
77  {
78  suffix = new path_t (rule, ctx);
79  suffix->extend (i->first->rule, i->first->ctx, &i->second);
80  suffix->append (i->first->suffix);
81  }
82  }
83  }
84 }
85 
86 template <typename cunit_t, typename key_t>
87  void Skeleton::generate_paths_cunit_key (FILE * input, FILE * keys)
88 {
89  path_t prefix (nodes->rule, nodes->ctx);
91 
92  nodes->cover<cunit_t, key_t> (prefix, input, keys, size);
93 
94  if (size.overflow ())
95  {
96  warning
97  ( NULL
98  , line
99  , false
100  , "DFA %sis too large: can only generate partial path cover"
101  , incond (cond).c_str ()
102  );
103  }
104 }
105 
106 template <typename cunit_t>
107  void Skeleton::generate_paths_cunit (FILE * input, FILE * keys)
108 {
109  switch (sizeof_key)
110  {
111  case 4: generate_paths_cunit_key<cunit_t, uint32_t> (input, keys); break;
112  case 2: generate_paths_cunit_key<cunit_t, uint16_t> (input, keys); break;
113  case 1: generate_paths_cunit_key<cunit_t, uint8_t> (input, keys); break;
114  }
115 }
116 
117 void Skeleton::generate_paths (FILE * input, FILE * keys)
118 {
119  switch (opts->encoding.szCodeUnit ())
120  {
121  case 4: generate_paths_cunit<uint32_t> (input, keys); break;
122  case 2: generate_paths_cunit<uint16_t> (input, keys); break;
123  case 1: generate_paths_cunit<uint8_t> (input, keys); break;
124  }
125 }
126 
127 void Skeleton::emit_data (const char * fname)
128 {
129  const std::string input_name = std::string (fname) + "." + name + ".input";
130  FILE * input = fopen (input_name.c_str (), "wb");
131  if (!input)
132  {
133  error ("cannot open file: %s", input_name.c_str ());
134  exit (1);
135  }
136  const std::string keys_name = std::string (fname) + "." + name + ".keys";
137  FILE * keys = fopen (keys_name.c_str (), "wb");
138  if (!keys)
139  {
140  error ("cannot open file: %s", keys_name.c_str ());
141  exit (1);
142  }
143 
144  generate_paths (input, keys);
145 
146  fclose (input);
147  fclose (keys);
148 }
149 
150 template <typename uintn_t> static uintn_t to_le(uintn_t n)
151 {
152  uintn_t m;
153  uint8_t *p = reinterpret_cast<uint8_t*>(&m);
154  for (size_t i = 0; i < sizeof(uintn_t); ++i)
155  {
156  p[i] = static_cast<uint8_t>(n >> (i * 8));
157  }
158  return m;
159 }
160 
161 template <typename key_t>
162  static void keygen (FILE * f, size_t count, size_t len, size_t len_match, rule_rank_t match)
163 {
164  const key_t m = Skeleton::rule2key<key_t> (match);
165 
166  const size_t keys_size = 3 * count;
167  key_t * keys = new key_t [keys_size];
168  for (uint32_t i = 0; i < keys_size;)
169  {
170  keys[i++] = to_le<key_t>(static_cast<key_t> (len));
171  keys[i++] = to_le<key_t>(static_cast<key_t> (len_match));
172  keys[i++] = to_le<key_t>(m);
173  }
174  fwrite (keys, sizeof (key_t), keys_size, f);
175  delete [] keys;
176 }
177 
178 template <typename cunit_t, typename key_t>
179  static Node::covers_t cover_one (FILE * input, FILE * keys, const path_t & path)
180 {
181  const size_t len = path.len ();
182 
183  size_t count = 0;
184  for (size_t i = 0; i < len; ++i)
185  {
186  count = std::max (count, path[i]->size ());
187  }
188 
190  if (!size.overflow ())
191  {
192  // input
193  const size_t buffer_size = size.uint32 ();
194  cunit_t * buffer = new cunit_t [buffer_size];
195  for (size_t i = 0; i < len; ++i)
196  {
197  const std::vector<uint32_t> & arc = *path[i];
198  const size_t width = arc.size ();
199  for (size_t j = 0; j < count; ++j)
200  {
201  const size_t k = j % width;
202  buffer[j * len + i] = to_le<cunit_t>(static_cast<cunit_t> (arc[k]));
203  }
204  }
205  fwrite (buffer, sizeof (cunit_t), buffer_size, input);
206  delete [] buffer;
207 
208  // keys
209  keygen<key_t> (keys, count, len, path.len_matching (), path.match ());
210  }
211 
212  return size;
213 }
214 
215 } // namespace re2c
Node * nodes
Definition: skeleton.h:114
void append(const path_t *p)
Definition: path.h:85
void error(const char *fmt,...)
Definition: msg.cc:10
rule_rank_t match() const
Definition: path.h:63
const uint32_t line
Definition: skeleton.h:111
rule_t rule
Definition: skeleton.h:78
static u32lim_t from32(uint32_t x)
Definition: u32lim.h:28
static uintn_t to_le(uintn_t n)
void cover(path_t &prefix, FILE *input, FILE *keys, covers_t &size)
static Node::covers_t cover_one(FILE *input, FILE *keys, const path_t &path)
static void keygen(FILE *f, size_t count, size_t len, size_t len_match, rule_rank_t match)
static u32lim_t from64(uint64_t x)
Definition: u32lim.h:29
size_t len() const
Definition: path.h:53
size_t len_matching() const
Definition: path.h:57
uint32_t szCodeUnit() const
Definition: enc.h:152
arcs_t arcs
Definition: skeleton.h:70
uint8_t loop
Definition: skeleton.h:75
const std::string cond
Definition: skeleton.h:110
uint32_t uint32() const
Definition: u32lim.h:36
Enc encoding
Definition: opt.h:118
void extend(rule_t r, bool c, const arc_t *a)
Definition: path.h:71
void warning(const char *type, uint32_t line, bool error, const char *fmt,...)
Definition: msg.cc:48
bool overflow() const
Definition: u32lim.h:41
Opt opts
Definition: opt.cc:7
u32lim_t< 1024 *1024 *1024 > covers_t
Definition: skeleton.h:57
bool end() const
Definition: skeleton.cc:67
void emit_data(const char *fname)
size_t sizeof_key
Definition: skeleton.h:115
Definition: bitmap.cc:10
const std::string name
Definition: skeleton.h:109
path_t * suffix
Definition: skeleton.h:92
bool ctx
Definition: skeleton.h:81
std::string incond(const std::string &cond)
Definition: msg.cc:242