src
minimization.cc
Go to the documentation of this file.
1 #include <string.h>
2 #include <utility>
3 #include <vector>
4 
5 #include "src/conf/opt.h"
6 #include "src/ir/dfa/dfa.h"
7 #include "src/globals.h"
8 
9 namespace re2c
10 {
11 
12 class RuleOp;
13 
14 /*
15  * note [DFA minimization: table filling algorithm]
16  *
17  * This algorithm is simple and slow; it's a reference implementation.
18  *
19  * The algorithm constructs (strictly lower triangular) boolean matrix
20  * indexed by DFA states. Each matrix cell (S1,S2) indicates if states
21  * S1 and S2 are distinguishable. Initialy states are distinguished
22  * according to their rule and context. One step of the algorithm
23  * updates the matrix as follows: each pair of states S1 and S2 is
24  * marked as distinguishable iff exist transitions from S1 and S2 on
25  * the same symbol that go to distinguishable states. The algorithm
26  * loops until the matrix stops changing.
27  */
28 static void minimization_table(
29  size_t *part,
30  const std::vector<dfa_state_t*> &states,
31  size_t nchars)
32 {
33  const size_t count = states.size();
34 
35  bool **tbl = new bool*[count];
36  tbl[0] = new bool[count * (count - 1) / 2];
37  for (size_t i = 0; i < count - 1; ++i)
38  {
39  tbl[i + 1] = tbl[i] + i;
40  }
41 
42  for (size_t i = 0; i < count; ++i)
43  {
44  dfa_state_t *s1 = states[i];
45  for (size_t j = 0; j < i; ++j)
46  {
47  dfa_state_t *s2 = states[j];
48  tbl[i][j] = s1->ctx != s2->ctx
49  || s1->rule != s2->rule;
50  }
51  }
52 
53  for (bool loop = true; loop;)
54  {
55  loop = false;
56  for (size_t i = 0; i < count; ++i)
57  {
58  for (size_t j = 0; j < i; ++j)
59  {
60  if (!tbl[i][j])
61  {
62  for (size_t k = 0; k < nchars; ++k)
63  {
64  size_t oi = states[i]->arcs[k];
65  size_t oj = states[j]->arcs[k];
66  if (oi < oj)
67  {
68  std::swap(oi, oj);
69  }
70  if (oi != oj &&
71  (oi == dfa_t::NIL ||
72  oj == dfa_t::NIL ||
73  tbl[oi][oj]))
74  {
75  tbl[i][j] = true;
76  loop = true;
77  break;
78  }
79  }
80  }
81  }
82  }
83  }
84 
85  for (size_t i = 0; i < count; ++i)
86  {
87  part[i] = i;
88  for (size_t j = 0; j < i; ++j)
89  {
90  if (!tbl[i][j])
91  {
92  part[i] = j;
93  break;
94  }
95  }
96  }
97 
98  delete[] tbl[0];
99  delete[] tbl;
100 }
101 
102 /*
103  * note [DFA minimization: Moore algorithm]
104  *
105  * The algorithm maintains partition of DFA states.
106  * Initial partition is coarse: states are distinguished according
107  * to their rule and context. Partition is gradually refined: each
108  * set of states is split into minimal number of subsets such that
109  * for all states in a subset transitions on the same symbol go to
110  * the same set of states.
111  * The algorithm loops until partition stops changing.
112  */
113 static void minimization_moore(
114  size_t *part,
115  const std::vector<dfa_state_t*> &states,
116  size_t nchars)
117 {
118  const size_t count = states.size();
119 
120  size_t *next = new size_t[count];
121 
122  std::map<std::pair<RuleOp*, bool>, size_t> init;
123  for (size_t i = 0; i < count; ++i)
124  {
125  dfa_state_t *s = states[i];
126  std::pair<RuleOp*, bool> key(s->rule, s->ctx);
127  if (init.insert(std::make_pair(key, i)).second)
128  {
129  part[i] = i;
130  next[i] = dfa_t::NIL;
131  }
132  else
133  {
134  const size_t j = init[key];
135  part[i] = j;
136  next[i] = next[j];
137  next[j] = i;
138  }
139  }
140 
141  size_t *out = new size_t[nchars * count];
142  size_t *diff = new size_t[count];
143  for (bool loop = true; loop;)
144  {
145  loop = false;
146  for (size_t i = 0; i < count; ++i)
147  {
148  if (i != part[i] || next[i] == dfa_t::NIL)
149  {
150  continue;
151  }
152 
153  for (size_t j = i; j != dfa_t::NIL; j = next[j])
154  {
155  size_t *o = &out[j * nchars];
156  size_t *a = states[j]->arcs;
157  for (size_t c = 0; c < nchars; ++c)
158  {
159  o[c] = a[c] == dfa_t::NIL
160  ? dfa_t::NIL
161  : part[a[c]];
162  }
163  }
164 
165  size_t diff_count = 0;
166  for (size_t j = i; j != dfa_t::NIL;)
167  {
168  const size_t j_next = next[j];
169  size_t n = 0;
170  for (; n < diff_count; ++n)
171  {
172  size_t k = diff[n];
173  if (memcmp(&out[j * nchars],
174  &out[k * nchars],
175  nchars * sizeof(size_t)) == 0)
176  {
177  part[j] = k;
178  next[j] = next[k];
179  next[k] = j;
180  break;
181  }
182  }
183  if (n == diff_count)
184  {
185  diff[diff_count++] = j;
186  part[j] = j;
187  next[j] = dfa_t::NIL;
188  }
189  j = j_next;
190  }
191  loop |= diff_count > 1;
192  }
193  }
194  delete[] out;
195  delete[] diff;
196  delete[] next;
197 }
198 
199 void minimization(dfa_t &dfa)
200 {
201  const size_t count = dfa.states.size();
202 
203  size_t *part = new size_t[count];
204 
205  switch (opts->dfa_minimization)
206  {
208  minimization_table(part, dfa.states, dfa.nchars);
209  break;
211  minimization_moore(part, dfa.states, dfa.nchars);
212  break;
213  }
214 
215  size_t *compact = new size_t[count];
216  for (size_t i = 0, j = 0; i < count; ++i)
217  {
218  if (i == part[i])
219  {
220  compact[i] = j++;
221  }
222  }
223 
224  size_t new_count = 0;
225  for (size_t i = 0; i < count; ++i)
226  {
227  dfa_state_t *s = dfa.states[i];
228  if (i == part[i])
229  {
230  size_t *arcs = s->arcs;
231  for (size_t c = 0; c < dfa.nchars; ++c)
232  {
233  if (arcs[c] != dfa_t::NIL)
234  {
235  arcs[c] = compact[part[arcs[c]]];
236  }
237  }
238  dfa.states[new_count++] = s;
239  }
240  else
241  {
242  delete s;
243  }
244  }
245  dfa.states.resize(new_count);
246 
247  delete[] compact;
248  delete[] part;
249 }
250 
251 } // namespace re2c
252 
std::vector< dfa_state_t * > states
Definition: dfa.h:40
static void minimization_moore(size_t *part, const std::vector< dfa_state_t * > &states, size_t nchars)
static void minimization_table(size_t *part, const std::vector< dfa_state_t * > &states, size_t nchars)
Definition: minimization.cc:28
const size_t nchars
Definition: dfa.h:41
Opt opts
Definition: opt.cc:7
static const size_t NIL
Definition: dfa.h:38
static int32_t diff(const re2c::Range *r1, const re2c::Range *r2, const re2c::Range *op1, const re2c::Range *op2, const char *op)
Definition: test.cc:43
dfa_minimization_t dfa_minimization
Definition: opt.h:118
bool ctx
Definition: dfa.h:21
size_t * arcs
Definition: dfa.h:19
Definition: bitmap.cc:10
void minimization(dfa_t &dfa)
RuleOp * rule
Definition: dfa.h:20