1931 lines
49 KiB
C
1931 lines
49 KiB
C
|
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\
|
||
|
* This is GNU Go, a Go program. Contact gnugo@gnu.org, or see *
|
||
|
* http://www.gnu.org/software/gnugo/ for more information. *
|
||
|
* *
|
||
|
* Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, *
|
||
|
* 2008 and 2009 by the Free Software Foundation. *
|
||
|
* *
|
||
|
* This program is free software; you can redistribute it and/or *
|
||
|
* modify it under the terms of the GNU General Public License as *
|
||
|
* published by the Free Software Foundation - version 3 or *
|
||
|
* (at your option) any later version. *
|
||
|
* *
|
||
|
* This program is distributed in the hope that it will be useful, *
|
||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
|
||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
|
||
|
* GNU General Public License in file COPYING for more details. *
|
||
|
* *
|
||
|
* You should have received a copy of the GNU General Public *
|
||
|
* License along with this program; if not, write to the Free *
|
||
|
* Software Foundation, Inc., 51 Franklin Street, Fifth Floor, *
|
||
|
* Boston, MA 02111, USA. *
|
||
|
\* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
|
||
|
|
||
|
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
|
||
|
* * * * * * * * fast pattern matching with DFA version 2.9 * * *
|
||
|
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
|
||
|
|
||
|
#include "liberty.h"
|
||
|
#include "patterns.h"
|
||
|
#include "dfa-mkpat.h"
|
||
|
#include "random.h"
|
||
|
|
||
|
#include <assert.h>
|
||
|
#include <stdlib.h>
|
||
|
|
||
|
#ifdef HAVE_CONFIG_H
|
||
|
#include <config.h>
|
||
|
#endif
|
||
|
|
||
|
#ifdef HAVE_UNISTD_H
|
||
|
#include <unistd.h>
|
||
|
#endif
|
||
|
|
||
|
|
||
|
/*********************
|
||
|
* Public data *
|
||
|
*********************/
|
||
|
|
||
|
/* If > 0 more detailed information is given */
|
||
|
int dfa_verbose = 0;
|
||
|
|
||
|
|
||
|
/*********************
|
||
|
* Private data *
|
||
|
*********************/
|
||
|
|
||
|
/* auxiliary dfa's for high level functions */
|
||
|
#define DFA_BINS 33 /* Number of temporary bins used to store intermediate DFAs */
|
||
|
static dfa_t aux_dfa[DFA_BINS]; /* used to store intermediate DFAs */
|
||
|
static dfa_t aux_temp; /* used to store temporary DFAs */
|
||
|
|
||
|
/* To be sure that everything was well initialized */
|
||
|
static int dfa_was_initialized = 0;
|
||
|
static int aux_count = 0;
|
||
|
|
||
|
|
||
|
/* convert ATT_* values to the corresponding expected values on the board */
|
||
|
static const char att2val[8] = {
|
||
|
'.', 'X', 'O', 'x', 'o', ',', 'a', '!'
|
||
|
};
|
||
|
|
||
|
#define EXPECTED_VAL(att_val) att2val[att_val]
|
||
|
|
||
|
|
||
|
/************************************************
|
||
|
* forward declaration of private functions *
|
||
|
************************************************/
|
||
|
static void clean_dfa(dfa_t *pdfa);
|
||
|
static void resize_dfa(dfa_t *pdfa, int max_states, int max_indexes);
|
||
|
static void create_dfa(dfa_t *pdfa, const char *str, int att_val);
|
||
|
static void do_sync_product(int l, int r);
|
||
|
static void sync_product(dfa_t *pout, dfa_t *pleft, dfa_t *pright);
|
||
|
|
||
|
static void dfa_prepare_rotation_data(void);
|
||
|
|
||
|
|
||
|
/********************************
|
||
|
* manipulating attributes list *
|
||
|
********************************/
|
||
|
|
||
|
/*
|
||
|
* Test if val is member of the attributes set att
|
||
|
*/
|
||
|
|
||
|
static int
|
||
|
member_att(dfa_t *pdfa, int att, int val)
|
||
|
{
|
||
|
while (att != 0) {
|
||
|
if (pdfa->indexes[att].val == val)
|
||
|
return 1;
|
||
|
|
||
|
att = pdfa->indexes[att].next;
|
||
|
}
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* return the union of two attribute sets att1 & att2
|
||
|
* repectively from dfa1 and dfa2 into
|
||
|
* att in dfa.
|
||
|
*/
|
||
|
|
||
|
static int
|
||
|
union_att(dfa_t *pdfa, dfa_t *pdfa1, int att1, dfa_t *pdfa2, int att2)
|
||
|
{
|
||
|
int att;
|
||
|
int att_aux;
|
||
|
|
||
|
/* copy att1 in att */
|
||
|
att = 0;
|
||
|
while (att1 != 0) {
|
||
|
pdfa->last_index++;
|
||
|
if (pdfa->last_index >= pdfa->max_indexes)
|
||
|
resize_dfa(pdfa, pdfa->max_states, pdfa->max_indexes + DFA_RESIZE_STEP);
|
||
|
att_aux = pdfa->last_index;
|
||
|
|
||
|
pdfa->indexes[att_aux].val = pdfa1->indexes[att1].val;
|
||
|
pdfa->indexes[att_aux].next = att;
|
||
|
att = att_aux;
|
||
|
att1 = pdfa1->indexes[att1].next;
|
||
|
}
|
||
|
|
||
|
/* add to att the new elements of att2 */
|
||
|
while (att2 != 0) {
|
||
|
if (!member_att(pdfa, att, pdfa2->indexes[att2].val)) {
|
||
|
pdfa->last_index++;
|
||
|
if (pdfa->last_index >= pdfa->max_indexes)
|
||
|
resize_dfa(pdfa, pdfa->max_states, pdfa->max_indexes + DFA_RESIZE_STEP);
|
||
|
att_aux = pdfa->last_index;
|
||
|
|
||
|
pdfa->indexes[att_aux].val = pdfa2->indexes[att2].val;
|
||
|
pdfa->indexes[att_aux].next = att;
|
||
|
att = att_aux;
|
||
|
}
|
||
|
att2 = pdfa2->indexes[att2].next;
|
||
|
}
|
||
|
|
||
|
return att;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Remove all attribute entry repetitions from a dfa.
|
||
|
*/
|
||
|
static void
|
||
|
compactify_att(dfa_t *pdfa)
|
||
|
{
|
||
|
int k;
|
||
|
int last = 0;
|
||
|
int save_last = pdfa->last_index;
|
||
|
int *map;
|
||
|
int *search_first;
|
||
|
int *search_next;
|
||
|
int size = (save_last + 1) * sizeof(int);
|
||
|
|
||
|
map = malloc(size);
|
||
|
map[0] = 0;
|
||
|
search_first = malloc(size);
|
||
|
memset(search_first, 0, size);
|
||
|
search_next = malloc(size);
|
||
|
memset(search_next, 0, size);
|
||
|
|
||
|
for (k = 1; k <= save_last; k++) {
|
||
|
int i = search_first[pdfa->indexes[k].val];
|
||
|
|
||
|
if (i) {
|
||
|
while (pdfa->indexes[i].next != pdfa->indexes[k].next) {
|
||
|
if (!search_next[i]) {
|
||
|
search_next[i] = ++last;
|
||
|
i = 0;
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
i = search_next[i];
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
search_first[pdfa->indexes[k].val] = ++last;
|
||
|
|
||
|
if (i)
|
||
|
map[k] = i;
|
||
|
else {
|
||
|
map[k] = last;
|
||
|
pdfa->indexes[last] = pdfa->indexes[k];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
free(search_first);
|
||
|
free(search_next);
|
||
|
|
||
|
if (last < save_last) {
|
||
|
pdfa->last_index = last;
|
||
|
for (k = 0; k <= pdfa->last_index; k++)
|
||
|
pdfa->indexes[k].next = map[pdfa->indexes[k].next];
|
||
|
|
||
|
for (k = 0; k <= pdfa->last_state; k++)
|
||
|
pdfa->states[k].att = map[pdfa->states[k].att];
|
||
|
|
||
|
if (0)
|
||
|
fprintf(stderr, "compactified: %d attributes left of %d\n",
|
||
|
last, save_last);
|
||
|
|
||
|
compactify_att(pdfa);
|
||
|
}
|
||
|
|
||
|
free(map);
|
||
|
}
|
||
|
|
||
|
|
||
|
/**********************
|
||
|
* manipulating dfa's *
|
||
|
**********************/
|
||
|
|
||
|
|
||
|
/*
|
||
|
* return the effective size of a dfa in kB.
|
||
|
*/
|
||
|
|
||
|
int
|
||
|
dfa_size(dfa_t *pdfa)
|
||
|
{
|
||
|
int states_size, indexes_size;
|
||
|
|
||
|
states_size = (pdfa->last_state + 1) * sizeof(state_rt_t);
|
||
|
indexes_size = (pdfa->last_index + 1) * sizeof(attrib_rt_t);
|
||
|
|
||
|
return (states_size + indexes_size + sizeof(dfa_rt_t)) / 1024;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*
|
||
|
* resize memory for a dfa
|
||
|
*/
|
||
|
|
||
|
static void
|
||
|
resize_dfa(dfa_t *pdfa, int max_states, int max_indexes)
|
||
|
{
|
||
|
state_t *pBuf;
|
||
|
attrib_t *pBuf2;
|
||
|
int i;
|
||
|
|
||
|
if (dfa_verbose > 1)
|
||
|
fprintf(stderr, "Resizing dfa %s\n", pdfa->name);
|
||
|
|
||
|
assert(pdfa->last_state <= pdfa->max_states);
|
||
|
assert(pdfa->last_index <= pdfa->max_indexes);
|
||
|
|
||
|
pBuf = realloc(pdfa->states, max_states * sizeof(*pBuf));
|
||
|
pBuf2 = realloc(pdfa->indexes, max_indexes * sizeof(*pBuf2));
|
||
|
if (pBuf == NULL || pBuf2 == NULL) {
|
||
|
fprintf(stderr, "No memory left for dfa: %s", pdfa->name);
|
||
|
exit(EXIT_FAILURE);
|
||
|
}
|
||
|
|
||
|
for (i = pdfa->max_states; i < max_states; i++)
|
||
|
memset(pBuf + i, 0, sizeof(state_t));
|
||
|
for (i = pdfa->max_indexes; i < max_indexes; i++)
|
||
|
memset(pBuf2 + i, 0, sizeof(attrib_t));
|
||
|
|
||
|
pdfa->states = pBuf;
|
||
|
pdfa->max_states = max_states;
|
||
|
pdfa->indexes = pBuf2;
|
||
|
pdfa->max_indexes = max_indexes;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
/*
|
||
|
* dump a dfa (debugging purpose).
|
||
|
*/
|
||
|
|
||
|
static const char *line =
|
||
|
"----------------------------------------------------\n";
|
||
|
|
||
|
void
|
||
|
dump_dfa(FILE *f, dfa_t *pdfa)
|
||
|
{
|
||
|
int i;
|
||
|
int att, k;
|
||
|
|
||
|
fprintf(f, line);
|
||
|
fprintf(f, " name : %s\n", pdfa->name);
|
||
|
fprintf(f, " Nb states : %7d, max= %d\n", pdfa->last_state + 1,
|
||
|
pdfa->max_states);
|
||
|
fprintf(f, " Nb Indexes : %7d, max= %d\n", pdfa->last_index,
|
||
|
pdfa->max_indexes);
|
||
|
fprintf(f, " memory needed : %d Mb\n", dfa_size(pdfa) / 1024);
|
||
|
fprintf(f, line);
|
||
|
|
||
|
if (dfa_size(pdfa) > 10000) /* change this value if needed */
|
||
|
return;
|
||
|
fprintf(f, " state | . | O | X | # | att \n");
|
||
|
fprintf(f, line);
|
||
|
for (i = 1; i != pdfa->last_state + 1; i++) {
|
||
|
int *pnext = pdfa->states[i].next;
|
||
|
fprintf(f, " %6d |", i);
|
||
|
fprintf(f, " %6d | %6d | %6d |", pnext[0], pnext[1], pnext[2]);
|
||
|
fprintf(f, " %6d |", pnext[OUT_BOARD]);
|
||
|
att = pdfa->states[i].att;
|
||
|
k = 0;
|
||
|
fprintf(f, " %5d:", att);
|
||
|
while (att != 0 && k < 10) {
|
||
|
fprintf(f, " %4d", pdfa->indexes[att].val);
|
||
|
att = pdfa->indexes[att].next;
|
||
|
k++;
|
||
|
}
|
||
|
if (att != 0)
|
||
|
fprintf(f, " ...");
|
||
|
fprintf(f, "\n");
|
||
|
}
|
||
|
fprintf(f, line);
|
||
|
fflush(f);
|
||
|
}
|
||
|
|
||
|
|
||
|
/*
|
||
|
* Reset a dfa
|
||
|
*/
|
||
|
|
||
|
static void
|
||
|
clean_dfa(dfa_t *pdfa)
|
||
|
{
|
||
|
memset(pdfa->states, 0, pdfa->max_states * sizeof(state_t));
|
||
|
memset(pdfa->indexes, 0, pdfa->max_indexes * sizeof(attrib_t));
|
||
|
pdfa->last_state = 1; /* initial state */
|
||
|
pdfa->last_index = 0;
|
||
|
pdfa->indexes[0].val = -1;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*
|
||
|
* allocate memory for a new dfa
|
||
|
*/
|
||
|
|
||
|
void
|
||
|
new_dfa(dfa_t *pdfa, const char *name)
|
||
|
{
|
||
|
memset(pdfa, 0, sizeof(dfa_t));
|
||
|
resize_dfa(pdfa, DFA_INIT_SIZE, DFA_INIT_SIZE);
|
||
|
clean_dfa(pdfa);
|
||
|
if (name != NULL)
|
||
|
strcpy(pdfa->name, name);
|
||
|
else
|
||
|
strcpy(pdfa->name, "noname ");
|
||
|
|
||
|
if (dfa_verbose > 1)
|
||
|
fprintf(stderr, "dfa %s is born :)\n", pdfa->name);
|
||
|
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* free memory used by a dfa
|
||
|
*/
|
||
|
|
||
|
void
|
||
|
kill_dfa(dfa_t *pdfa)
|
||
|
{
|
||
|
free(pdfa->states);
|
||
|
free(pdfa->indexes);
|
||
|
if (dfa_verbose > 1)
|
||
|
fprintf(stderr, "dfa %s is dead :(\n", pdfa->name);
|
||
|
|
||
|
memset(pdfa, 0, sizeof(dfa_t));
|
||
|
}
|
||
|
|
||
|
|
||
|
/*
|
||
|
* Copy a dfa and resize the destination dfa if necessary.
|
||
|
*/
|
||
|
|
||
|
void
|
||
|
copy_dfa(dfa_t *p_to, dfa_t *p_from)
|
||
|
{
|
||
|
assert(p_to != p_from);
|
||
|
|
||
|
if (p_to->max_states < p_from->last_state)
|
||
|
resize_dfa(p_to, p_from->max_states, p_to->max_indexes);
|
||
|
|
||
|
if (p_to->max_indexes < p_from->last_index)
|
||
|
resize_dfa(p_to, p_to->max_states, p_from->max_indexes);
|
||
|
|
||
|
clean_dfa(p_to);
|
||
|
|
||
|
memcpy(p_to->states, p_from->states,
|
||
|
sizeof(state_t) * (p_from->last_state + 1));
|
||
|
memcpy(p_to->indexes, p_from->indexes,
|
||
|
sizeof(attrib_t) * (p_from->last_index + 1));
|
||
|
|
||
|
p_to->last_state = p_from->last_state;
|
||
|
p_to->last_index = p_from->last_index;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*
|
||
|
* print c dfa:
|
||
|
* print the dfa in c format.
|
||
|
*/
|
||
|
|
||
|
void
|
||
|
print_c_dfa(FILE *of, const char *name, dfa_t *pdfa)
|
||
|
{
|
||
|
int i;
|
||
|
|
||
|
if (sizeof(unsigned short) < 2) {
|
||
|
fprintf(of, "#error shorts too short");
|
||
|
fprintf(stderr, "Error: shorts are expected to be at least 2 bytes long.\n");
|
||
|
exit(EXIT_FAILURE);
|
||
|
}
|
||
|
|
||
|
assert(dfa_minmax_delta(pdfa, -1, 1) > -32768);
|
||
|
if (dfa_minmax_delta(pdfa, -1, 0) > 32768) {
|
||
|
fprintf(of, "#error too many states");
|
||
|
fprintf(stderr, "Error: The dfa states are too disperse. Can't fit delta into a short.\n");
|
||
|
exit(EXIT_FAILURE);
|
||
|
}
|
||
|
|
||
|
if (pdfa->last_index + 1 > 65535) {
|
||
|
fprintf(of, "#error too many states");
|
||
|
fprintf(stderr, "Error: Too many index entries. Can't fit delta into a short.\n");
|
||
|
exit(EXIT_FAILURE);
|
||
|
}
|
||
|
|
||
|
|
||
|
fprintf(of, "\n#include \"dfa-mkpat.h\"\n");
|
||
|
|
||
|
fprintf(of, "static const state_rt_t state_%s[%d] = {\n",
|
||
|
name, pdfa->last_state + 1);
|
||
|
for (i = 0; i != pdfa->last_state + 1; i++) {
|
||
|
int j;
|
||
|
fprintf(of, "{{");
|
||
|
for (j = 0; j < 4; j++) {
|
||
|
int n = pdfa->states[i].next[j];
|
||
|
assert((n == 0) || (abs(n - i) < 32768));
|
||
|
fprintf(of, "%d", n ? n - i : 0);
|
||
|
if (j != 3)
|
||
|
fprintf(of, ",");
|
||
|
}
|
||
|
fprintf(of, "}, %d},%s", pdfa->states[i].att, ((i+1)%3 ? "\t" : "\n"));
|
||
|
}
|
||
|
fprintf(of, "};\n\n");
|
||
|
|
||
|
|
||
|
fprintf(of, "static const attrib_rt_t idx_%s[%d] = {\n",
|
||
|
name, pdfa->last_index + 1);
|
||
|
for (i = 0; i != pdfa->last_index + 1; i++)
|
||
|
fprintf(of, "{%d,%d},%s", pdfa->indexes[i].val, pdfa->indexes[i].next,
|
||
|
((i+1)%4 ? "\t" : "\n"));
|
||
|
fprintf(of, "};\n\n");
|
||
|
|
||
|
fprintf(of, "static dfa_rt_t dfa_%s = {\n", name);
|
||
|
fprintf(of, " \"%s\",\n", name);
|
||
|
fprintf(of, "state_%s,\n", name);
|
||
|
fprintf(of, "idx_%s", name);
|
||
|
fprintf(of, "};\n");
|
||
|
}
|
||
|
|
||
|
|
||
|
/*
|
||
|
* Create a linear dfa from a string and an attributes value
|
||
|
* and resize the dfa if needed.
|
||
|
*
|
||
|
* For example:
|
||
|
* create_dfa(pdfa, "Oo?.", 2001)
|
||
|
* gives:
|
||
|
*
|
||
|
* 1 0,1 0,1,2 0
|
||
|
* (1,{}) -------> (2,{}) -------> (3,{}) -------> (4,{}) ------> (5,{2001})
|
||
|
*
|
||
|
* An empty string force a junk pattern : The scanner will always
|
||
|
* consider this pattern as active.
|
||
|
*
|
||
|
* The possible input symbols are :
|
||
|
*
|
||
|
* '.', ',', '*', '!' for EMPTY expected.
|
||
|
* 'X' for BLACK expected.
|
||
|
* 'O' for WHITE expected.
|
||
|
* 'x' for BLACK|EMPTY expected.
|
||
|
* 'o' for WHITE|EMPTY expected.
|
||
|
* '#', '+', '-', '|' for OUT_BOARD expected.
|
||
|
* '?' for EMPTY|BLACK|WHITE expected.
|
||
|
* '$' for EMPTY|BLACK|WHITE|OUT_BOARD expected.
|
||
|
*/
|
||
|
|
||
|
static void
|
||
|
create_dfa(dfa_t *pdfa, const char *str, int att_val)
|
||
|
{
|
||
|
int new_state;
|
||
|
|
||
|
if (dfa_verbose > 1)
|
||
|
fprintf(stderr, "linear dfa in %s with string: %s\n", pdfa->name, str);
|
||
|
|
||
|
assert(str != NULL);
|
||
|
assert(pdfa->max_states > 1);
|
||
|
assert(pdfa->max_indexes > 1);
|
||
|
|
||
|
clean_dfa(pdfa);
|
||
|
new_state = 1;
|
||
|
for (; *str != '\0' && strchr("$#+-|OoXx.?,!a*", *str); str++) {
|
||
|
memset(pdfa->states[new_state].next, 0, 4 * sizeof(int));
|
||
|
if (strchr("$?.ox,a!*", *str))
|
||
|
pdfa->states[new_state].next[0] = new_state + 1;
|
||
|
if (strchr("$?Oo", *str))
|
||
|
pdfa->states[new_state].next[1] = new_state + 1;
|
||
|
if (strchr("$?Xx", *str))
|
||
|
pdfa->states[new_state].next[2] = new_state + 1;
|
||
|
if (strchr("$#+-|", *str))
|
||
|
pdfa->states[new_state].next[OUT_BOARD] = new_state + 1;
|
||
|
new_state++;
|
||
|
if (new_state >= pdfa->max_states)
|
||
|
resize_dfa(pdfa, pdfa->max_states + DFA_RESIZE_STEP,
|
||
|
pdfa->max_indexes);
|
||
|
}
|
||
|
memset(pdfa->states[new_state].next, 0, 4 * sizeof(int));
|
||
|
|
||
|
pdfa->last_index++;
|
||
|
if (pdfa->last_index >= pdfa->max_indexes)
|
||
|
resize_dfa(pdfa, pdfa->max_states,
|
||
|
pdfa->max_indexes + DFA_RESIZE_STEP);
|
||
|
|
||
|
memset(&(pdfa->indexes[pdfa->last_index]), 0, sizeof(attrib_t));
|
||
|
pdfa->states[new_state].att = pdfa->last_index;
|
||
|
|
||
|
pdfa->indexes[pdfa->states[new_state].att].val = att_val;
|
||
|
pdfa->indexes[pdfa->states[new_state].att].next = 0;
|
||
|
pdfa->last_state = new_state;
|
||
|
}
|
||
|
|
||
|
|
||
|
/**************************
|
||
|
* Test array with a *
|
||
|
* hash table *
|
||
|
**************************/
|
||
|
/* used by sync_product *
|
||
|
* to store visited states*
|
||
|
**************************/
|
||
|
|
||
|
#define MAX_HASH_VALUE 4096
|
||
|
|
||
|
typedef struct entry {
|
||
|
int l, r; /* key */
|
||
|
int val; /* value */
|
||
|
struct entry *pnext; /* NULL if end of list */
|
||
|
} entry_t;
|
||
|
|
||
|
typedef struct test_array {
|
||
|
entry_t *hash[MAX_HASH_VALUE];
|
||
|
} test_array_t;
|
||
|
|
||
|
|
||
|
/* initialize empty lists */
|
||
|
static void
|
||
|
new_test_array(test_array_t *pta)
|
||
|
{
|
||
|
int h;
|
||
|
|
||
|
for (h = 0; h != MAX_HASH_VALUE ; h++)
|
||
|
pta->hash[h] = NULL;
|
||
|
}
|
||
|
|
||
|
/* Searh for (l, r) in the linked list plist */
|
||
|
static int
|
||
|
get_from_entry_list(entry_t *plist, int l, int r)
|
||
|
{
|
||
|
int val = 0;
|
||
|
|
||
|
while (plist != NULL) {
|
||
|
if (plist->l == l && plist->r == r)
|
||
|
val = plist->val;
|
||
|
plist = plist->pnext;
|
||
|
}
|
||
|
return val;
|
||
|
}
|
||
|
|
||
|
/* get the value associated with (l, r) or 0 if none */
|
||
|
static int
|
||
|
get_from_test_array(test_array_t *pta, int l, int r)
|
||
|
{
|
||
|
return get_from_entry_list(pta->hash[(l+r) % MAX_HASH_VALUE], l, r);
|
||
|
}
|
||
|
|
||
|
|
||
|
/* insert a new entry at the beginning of the linked list pplist */
|
||
|
static void
|
||
|
add_to_entry_list(entry_t **pplist, int l, int r, int val)
|
||
|
{
|
||
|
entry_t *new_entry;
|
||
|
|
||
|
/* make sure val > 0: val = 0 is used in get_from_entry_list */
|
||
|
assert(val > 0);
|
||
|
assert(!get_from_entry_list(*pplist, l, r));
|
||
|
|
||
|
new_entry = malloc(sizeof(*new_entry));
|
||
|
if (new_entry == NULL) {
|
||
|
fprintf(stderr, "No memory left for new entry\n");
|
||
|
exit(EXIT_FAILURE);
|
||
|
}
|
||
|
new_entry->pnext = *pplist;
|
||
|
new_entry->l = l;
|
||
|
new_entry->r = r;
|
||
|
new_entry->val = val;
|
||
|
*pplist = new_entry;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* add a value at (l, r) */
|
||
|
static void
|
||
|
add_to_test_array(test_array_t *pta, int l, int r, int val)
|
||
|
{
|
||
|
add_to_entry_list(&(pta->hash[(l+r) % MAX_HASH_VALUE]), l, r, val);
|
||
|
}
|
||
|
|
||
|
/* free the elements of the linked list plist */
|
||
|
static void
|
||
|
free_entry_list(entry_t *plist)
|
||
|
{
|
||
|
entry_t *pentry;
|
||
|
|
||
|
while (plist != NULL) {
|
||
|
pentry = plist;
|
||
|
plist = plist->pnext;
|
||
|
free(pentry);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* free allocated memory */
|
||
|
static void
|
||
|
free_test_array(test_array_t *pta)
|
||
|
{
|
||
|
int h;
|
||
|
|
||
|
for (h = 0; h != MAX_HASH_VALUE; h++) {
|
||
|
free_entry_list(pta->hash[h]);
|
||
|
pta->hash[h] = NULL;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/*
|
||
|
* Synchronization product between two automata.
|
||
|
*
|
||
|
* L(A) is the set of patterns recognized by the automaton A.
|
||
|
*
|
||
|
* A syncronized product betwenn two acyclic deterministic automata
|
||
|
* A1 and A2 is an acyclic deterministic classifier A1xA2 that
|
||
|
* recognize and classify the languages
|
||
|
* L(A1), L(A2), L(A1 Union A2) and L(A1 Inter A2).
|
||
|
*
|
||
|
* This algorithm do the product and the reduction at the same time.
|
||
|
*
|
||
|
* See Hopcroft & Ullman "The design and analysis of computer algorithms"
|
||
|
* Ed. Addison-Wesley, Reading MA, 1974
|
||
|
* For the theorical aspects.
|
||
|
*/
|
||
|
|
||
|
/* globals used to improve readability */
|
||
|
static dfa_t *gpout, *gpleft, *gpright;
|
||
|
|
||
|
/* Hash table used to test if a state has already been
|
||
|
visited and then give its position in the new automaton. */
|
||
|
static test_array_t gtest;
|
||
|
|
||
|
static void
|
||
|
do_sync_product(int l, int r)
|
||
|
{
|
||
|
int c;
|
||
|
int nextl, nextr;
|
||
|
int state;
|
||
|
|
||
|
state = gpout->last_state;
|
||
|
|
||
|
/* unify the attributes of states l and r */
|
||
|
gpout->states[state].att = union_att(gpout, gpleft, gpleft->states[l].att,
|
||
|
gpright, gpright->states[r].att);
|
||
|
|
||
|
/* scan each possible out-transition */
|
||
|
for (c = 0; c != 4; c++) {
|
||
|
nextl = gpleft->states[l].next[c];
|
||
|
nextr = gpright->states[r].next[c];
|
||
|
assert(nextl < gpleft->last_state + 1);
|
||
|
assert(nextr < gpright->last_state + 1);
|
||
|
|
||
|
/* transition to (0,0) mean no transition at all */
|
||
|
if (nextl != 0 || nextr != 0) {
|
||
|
/* if the out-state doesn't already exist */
|
||
|
if (get_from_test_array(>est, nextl, nextr) == 0) {
|
||
|
/* create it! */
|
||
|
gpout->last_state++;
|
||
|
if (gpout->last_state >= gpout->max_states)
|
||
|
resize_dfa(gpout, gpout->max_states + DFA_RESIZE_STEP,
|
||
|
gpout->max_indexes);
|
||
|
|
||
|
add_to_test_array(>est, nextl, nextr, gpout->last_state);
|
||
|
|
||
|
/* link it */
|
||
|
gpout->states[state].next[c] = gpout->last_state;
|
||
|
|
||
|
/* create also its sub-automaton */
|
||
|
do_sync_product(nextl, nextr);
|
||
|
}
|
||
|
else {
|
||
|
/* link it */
|
||
|
gpout->states[state].next[c] =
|
||
|
get_from_test_array(>est, nextl, nextr);
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
/* no output by c from the actual state */
|
||
|
gpout->states[state].next[c] = 0;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static void
|
||
|
sync_product(dfa_t *pout, dfa_t *pleft, dfa_t *pright)
|
||
|
{
|
||
|
pout->last_index = 0;
|
||
|
|
||
|
if (dfa_verbose > 2) {
|
||
|
fprintf(stderr, "Product between %s and %s\n", pleft->name, pright->name);
|
||
|
fprintf(stderr, "result in %s\n", pout->name);
|
||
|
}
|
||
|
|
||
|
|
||
|
gpout = pout;
|
||
|
gpleft = pleft;
|
||
|
gpright = pright;
|
||
|
new_test_array(>est);
|
||
|
add_to_test_array(>est, 1, 1, 1);
|
||
|
pout->last_state = 1;
|
||
|
|
||
|
do_sync_product(1, 1);
|
||
|
|
||
|
free_test_array(>est);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Init/end functions
|
||
|
*/
|
||
|
|
||
|
void
|
||
|
dfa_init(void)
|
||
|
{
|
||
|
int j;
|
||
|
|
||
|
if (dfa_verbose > 1)
|
||
|
fprintf(stderr, "dfa: init\n");
|
||
|
dfa_was_initialized++;
|
||
|
|
||
|
build_spiral_order();
|
||
|
dfa_prepare_rotation_data();
|
||
|
|
||
|
for (j = 0; j < DFA_BINS; j++)
|
||
|
new_dfa(&(aux_dfa[j]), "binAux ");
|
||
|
new_dfa(&aux_temp, "tempAux ");
|
||
|
}
|
||
|
|
||
|
void
|
||
|
dfa_end(void)
|
||
|
{
|
||
|
int j;
|
||
|
|
||
|
if (dfa_verbose > 1)
|
||
|
fprintf(stderr, "dfa: end\n");
|
||
|
|
||
|
for (j = 0; j < DFA_BINS; j++)
|
||
|
kill_dfa(&(aux_dfa[j]));
|
||
|
kill_dfa(&aux_temp);
|
||
|
dfa_was_initialized--;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*
|
||
|
* Returns max or min jump distance from state to next[next_index] for
|
||
|
* all states. If next_index < 0, then max/min for all for states.
|
||
|
*/
|
||
|
|
||
|
int
|
||
|
dfa_minmax_delta(dfa_t *pdfa, int next_index, int isMin)
|
||
|
{
|
||
|
|
||
|
int ret, i, j;
|
||
|
assert(next_index <= 3);
|
||
|
|
||
|
if (isMin)
|
||
|
ret = 99999;
|
||
|
else
|
||
|
ret = -1;
|
||
|
|
||
|
for (i = 0; i <= pdfa->last_state; i++) {
|
||
|
for (j = 0; j < 4; j++) {
|
||
|
if (j == next_index || next_index < 0) {
|
||
|
int next = pdfa->states[i].next[j];
|
||
|
if (!next)
|
||
|
continue;
|
||
|
if (isMin) {
|
||
|
if (ret > next - i)
|
||
|
ret = next - i;
|
||
|
}
|
||
|
else {
|
||
|
if (ret < next - i)
|
||
|
ret = next - i;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return ret;
|
||
|
}
|
||
|
|
||
|
#define DFA_ALIGN 2
|
||
|
|
||
|
/*
|
||
|
* Re-orders DFA into a canonical form, which does a half-hearted
|
||
|
* attempt to reduce the size of jumps for all states entries.
|
||
|
*/
|
||
|
void
|
||
|
dfa_shuffle(dfa_t *pdfa)
|
||
|
{
|
||
|
struct state *old_states;
|
||
|
int *state_to;
|
||
|
int *state_from;
|
||
|
int *queue1;
|
||
|
int *queue2;
|
||
|
int *tempq;
|
||
|
int next_new_state;
|
||
|
int q1p;
|
||
|
int q2p;
|
||
|
int i, j;
|
||
|
|
||
|
state_to = calloc(pdfa->last_state+1, sizeof(*state_to));
|
||
|
state_from = calloc(pdfa->last_state+1, sizeof(*state_from));
|
||
|
|
||
|
queue1 = malloc((pdfa->last_state+1) * sizeof(*queue1));
|
||
|
queue2 = malloc((pdfa->last_state+1) * sizeof(*queue2));
|
||
|
q1p = 1;
|
||
|
q2p = 0;
|
||
|
queue1[0] = 1; /* i.e. start at state 1. */
|
||
|
state_from[0] = state_to[0] = 0;
|
||
|
state_from[1] = state_to[1] = 1;
|
||
|
next_new_state = 2;
|
||
|
|
||
|
while (q1p) {
|
||
|
for (i = 0; i < q1p; i++) {
|
||
|
for (j = 0; j < 4; j++) {
|
||
|
int n = pdfa->states[queue1[i]].next[j];
|
||
|
/* next_new_state = DFA_ALIGN * ((next_new_state-1) / DFA_ALIGN) + 1;*/
|
||
|
while (n && !state_to[n]) {
|
||
|
state_to[n] = next_new_state;
|
||
|
state_from[next_new_state] = n;
|
||
|
next_new_state++;
|
||
|
queue2[q2p++] = n;
|
||
|
n = pdfa->states[n].next[0];
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
tempq = queue1;
|
||
|
queue1 = queue2;
|
||
|
queue2 = tempq;
|
||
|
q1p = q2p;
|
||
|
q2p = 0;
|
||
|
}
|
||
|
|
||
|
old_states = malloc((pdfa->last_state+1) * sizeof(*old_states));
|
||
|
for (i = 1; i <= pdfa->last_state; i++) {
|
||
|
for (j = 0; j < 4; j++) {
|
||
|
old_states[i].next[j] = pdfa->states[i].next[j];
|
||
|
old_states[i].att = pdfa->states[i].att;
|
||
|
}
|
||
|
}
|
||
|
for (i = 1; i <= pdfa->last_state; i++) {
|
||
|
for (j = 0; j < 4; j++) {
|
||
|
assert(state_to[i] > 0);
|
||
|
pdfa->states[i].next[j] = state_to[old_states[state_from[i]].next[j]];
|
||
|
}
|
||
|
pdfa->states[i].att = old_states[state_from[i]].att;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Calculate the maximal number of patterns matched at one point for
|
||
|
* one transformation. Multiplying this number by 8 gives an upper
|
||
|
* bound for the total number of matched patterns for all
|
||
|
* transformation.
|
||
|
*/
|
||
|
int
|
||
|
dfa_calculate_max_matched_patterns(dfa_t *pdfa)
|
||
|
{
|
||
|
int total_max = 0;
|
||
|
int *state_max = calloc(pdfa->last_state + 1, sizeof(int));
|
||
|
char *queued = calloc(pdfa->last_state + 1, sizeof(char));
|
||
|
int *queue = malloc(pdfa->last_state * sizeof(int));
|
||
|
int queue_start = 0;
|
||
|
int queue_end = 1;
|
||
|
|
||
|
queue[0] = 1;
|
||
|
while (queue_start < queue_end) {
|
||
|
int state = queue[queue_start++];
|
||
|
int k;
|
||
|
|
||
|
/* Increment maximal number of matched patterns for each pattern
|
||
|
* matched at current `state'.
|
||
|
*/
|
||
|
for (k = pdfa->states[state].att; k; k = pdfa->indexes[k].next)
|
||
|
state_max[state]++;
|
||
|
|
||
|
if (total_max < state_max[state])
|
||
|
total_max = state_max[state];
|
||
|
|
||
|
for (k = 0; k < 4; k++) {
|
||
|
int next = pdfa->states[state].next[k];
|
||
|
|
||
|
if (next != 0) {
|
||
|
if (!queued[next]) {
|
||
|
queue[queue_end++] = next;
|
||
|
queued[next] = 1;
|
||
|
}
|
||
|
|
||
|
if (state_max[next] < state_max[state])
|
||
|
state_max[next] = state_max[state];
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
assert(queue_end == pdfa->last_state);
|
||
|
|
||
|
free(state_max);
|
||
|
free(queued);
|
||
|
free(queue);
|
||
|
|
||
|
return total_max;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*
|
||
|
* Merges cached dfas into the master DFA
|
||
|
*/
|
||
|
void
|
||
|
dfa_finalize(dfa_t *pdfa)
|
||
|
{
|
||
|
int j;
|
||
|
int next_bin = aux_count;
|
||
|
int last_bin = aux_count + DFA_BINS - 1;
|
||
|
while (next_bin + 1 != last_bin) {
|
||
|
for (j = aux_count + 1; j <= last_bin; j += 2) {
|
||
|
if (j+1 == next_bin)
|
||
|
copy_dfa(&aux_dfa[next_bin % DFA_BINS], &aux_dfa[j % DFA_BINS]);
|
||
|
else
|
||
|
sync_product(&aux_dfa[next_bin % DFA_BINS],
|
||
|
&aux_dfa[j % DFA_BINS],
|
||
|
&aux_dfa[(j+1) % DFA_BINS]);
|
||
|
next_bin++;
|
||
|
}
|
||
|
last_bin = next_bin - 1;
|
||
|
aux_count--;
|
||
|
next_bin = aux_count;
|
||
|
}
|
||
|
copy_dfa(pdfa, &aux_dfa[last_bin % DFA_BINS]);
|
||
|
|
||
|
compactify_att(pdfa);
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Add a new string with attribute att_val into the dfa.
|
||
|
* if the new size of the dfa respect some size conditions
|
||
|
* return increase in kB or -1 if the pattern was rejected.
|
||
|
* This function never rejects string of length <= 1.
|
||
|
*/
|
||
|
|
||
|
float
|
||
|
dfa_add_string(dfa_t *pdfa, const char *str, int pattern_index, int ll)
|
||
|
{
|
||
|
dfa_t *new_dfa = &(aux_dfa[aux_count % DFA_BINS]);
|
||
|
dfa_t *old_dfa = &(aux_dfa[(aux_count+1) % DFA_BINS]);
|
||
|
float ratio;
|
||
|
|
||
|
if (dfa_verbose > 1) {
|
||
|
fprintf(stderr, "Adding to dfa %s the string: %s\n", pdfa->name, str);
|
||
|
fprintf(stderr, " pat_ind: %d; rotation: %d at bin: %d\n",
|
||
|
pattern_index, ll, aux_count);
|
||
|
}
|
||
|
|
||
|
assert(dfa_was_initialized > 0);
|
||
|
assert(pdfa != NULL);
|
||
|
|
||
|
create_dfa(&aux_temp, str, pattern_index);
|
||
|
|
||
|
/* then we do the synchronization product with dfa */
|
||
|
sync_product(new_dfa, old_dfa, &aux_temp);
|
||
|
aux_count++;
|
||
|
|
||
|
ratio = 1;
|
||
|
if (dfa_size(old_dfa) > 0)
|
||
|
ratio = (float) (dfa_size(new_dfa) / dfa_size(old_dfa));
|
||
|
|
||
|
return ratio;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Used for quick string rotation. */
|
||
|
static int dfa_rotation_data[DFA_BASE * DFA_BASE];
|
||
|
|
||
|
static void
|
||
|
dfa_prepare_rotation_data(void)
|
||
|
{
|
||
|
int k;
|
||
|
|
||
|
for (k = 0; k < DFA_MAX_ORDER; k++)
|
||
|
dfa_rotation_data[DFA_POS(0, 0) + spiral[k][0]] = k;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Create a transformation of `string' and store it in
|
||
|
* `rotated_string'. The latter must be of at least DFA_MAX_ORDER
|
||
|
* characters in length. */
|
||
|
void
|
||
|
dfa_rotate_string(char *rotated_string, const char *string, int transformation)
|
||
|
{
|
||
|
if (transformation > 0) {
|
||
|
int k;
|
||
|
int length = strlen(string);
|
||
|
int new_length = 0;
|
||
|
|
||
|
memset(rotated_string, '$', DFA_MAX_ORDER);
|
||
|
|
||
|
for (k = 0; k < length; k++) {
|
||
|
if (string[k] != '$') {
|
||
|
int string_position = dfa_rotation_data[DFA_POS(0, 0)
|
||
|
+ spiral[k][transformation]];
|
||
|
rotated_string[string_position] = string[k];
|
||
|
if (string_position + 1 > new_length)
|
||
|
new_length = string_position + 1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
rotated_string[new_length] = 0;
|
||
|
}
|
||
|
else
|
||
|
strcpy(rotated_string, string);
|
||
|
}
|
||
|
|
||
|
|
||
|
/*
|
||
|
* Build a pattern string from a pattern. `str' must refer a buffer
|
||
|
* of size greater than DFA_MAX_ORDER.
|
||
|
*/
|
||
|
void
|
||
|
pattern_2_string(struct pattern *pat, struct patval_b *elements,
|
||
|
char *str, int ci, int cj)
|
||
|
{
|
||
|
char work_space[DFA_MAX_BOARD * 4][DFA_MAX_BOARD * 4];
|
||
|
int m, n; /* anchor position */
|
||
|
int edges, borders, to_test;
|
||
|
int i, j, k;
|
||
|
char c;
|
||
|
|
||
|
m = DFA_MAX_BOARD * 2 + ci;
|
||
|
n = DFA_MAX_BOARD * 2 + cj; /* position of the anchor */
|
||
|
|
||
|
assert(dfa_was_initialized);
|
||
|
memset(str, 0, DFA_MAX_ORDER);
|
||
|
memset(work_space, '#', sizeof(work_space));
|
||
|
|
||
|
if (dfa_verbose > 0)
|
||
|
fprintf(stderr, "converting pattern into string.\n");
|
||
|
|
||
|
/* basic edge constraints */
|
||
|
for (i = DFA_MAX_BOARD; i != DFA_MAX_BOARD * 3; i++)
|
||
|
for (j = DFA_MAX_BOARD; j != DFA_MAX_BOARD * 3; j++)
|
||
|
work_space[i][j] = '$';
|
||
|
|
||
|
/* pattern mask */
|
||
|
for (i = pat->mini + m; i != pat->maxi + m + 1; i++)
|
||
|
for (j = pat->minj + n; j != pat->maxj + n + 1; j++)
|
||
|
work_space[i][j] = '?';
|
||
|
|
||
|
/* more advanced edge constraints */
|
||
|
|
||
|
/* South constraint */
|
||
|
if (pat->edge_constraints & SOUTH_EDGE) {
|
||
|
for (i = m + pat->maxi + 1; i != DFA_MAX_BOARD * 3; i++)
|
||
|
for (j = 0; j != DFA_MAX_BOARD * 3; j++)
|
||
|
work_space[i][j] = '-';
|
||
|
}
|
||
|
|
||
|
/* East constraint */
|
||
|
if (pat->edge_constraints & EAST_EDGE) {
|
||
|
for (i = 0; i != DFA_MAX_BOARD * 3; i++)
|
||
|
for (j = n + pat->maxj + 1; j != DFA_MAX_BOARD * 3; j++)
|
||
|
work_space[i][j] = '|';
|
||
|
}
|
||
|
|
||
|
/* North constraint */
|
||
|
if (pat->edge_constraints & NORTH_EDGE) {
|
||
|
for (i = 0; i != m + pat->mini; i++)
|
||
|
for (j = 0; j != DFA_MAX_BOARD * 4; j++)
|
||
|
work_space[i][j] = '-';
|
||
|
}
|
||
|
|
||
|
/* West constraint */
|
||
|
if (pat->edge_constraints & WEST_EDGE) {
|
||
|
/* take care not to erase the south edge constraint */
|
||
|
for (i = 0; i != m + pat->maxi + 1; i++)
|
||
|
for (j = 0; j != n + pat->minj; j++)
|
||
|
work_space[i][j] = '|';
|
||
|
|
||
|
/* complete the last corner only if necessary */
|
||
|
if (!(pat->edge_constraints & SOUTH_EDGE)) {
|
||
|
for (i = m + pat->maxi + 1; i != DFA_MAX_BOARD * 3; i++)
|
||
|
for (j = 0; j != n + pat->minj; j++)
|
||
|
work_space[i][j] = '|';
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* dump */
|
||
|
if (dfa_verbose > 4) {
|
||
|
for (i = DFA_MAX_BOARD - 1; i != DFA_MAX_BOARD * 3 + 1; i++) {
|
||
|
for (j = DFA_MAX_BOARD - 1; j != DFA_MAX_BOARD * 3 + 1; j++) {
|
||
|
if (i == m && j == n)
|
||
|
fprintf(stderr, "s"); /* mark the anchor */
|
||
|
else
|
||
|
fprintf(stderr, "%c", work_space[i][j]);
|
||
|
}
|
||
|
fprintf(stderr, "\n");
|
||
|
}
|
||
|
fprintf(stderr, "\n");
|
||
|
}
|
||
|
|
||
|
/* pattern representation on the work space */
|
||
|
for (k = 0; k != pat->patlen; k++) {
|
||
|
c = EXPECTED_VAL(elements[k].att);
|
||
|
assert(work_space[m + elements[k].x - ci][n + elements[k].y - cj] == '?');
|
||
|
work_space[m + elements[k].x - ci][n + elements[k].y - cj] = c;
|
||
|
}
|
||
|
|
||
|
/* dump */
|
||
|
if (dfa_verbose > 3) {
|
||
|
for (i = DFA_MAX_BOARD - 1; i != DFA_MAX_BOARD * 3 + 1; i++) {
|
||
|
for (j = DFA_MAX_BOARD - 1; j != DFA_MAX_BOARD * 3 + 1; j++) {
|
||
|
if (i == m && j == n)
|
||
|
fprintf(stderr, "s"); /* mark the anchor */
|
||
|
else
|
||
|
fprintf(stderr, "%c", work_space[i][j]);
|
||
|
}
|
||
|
fprintf(stderr, "\n");
|
||
|
}
|
||
|
fprintf(stderr, "\n");
|
||
|
}
|
||
|
|
||
|
/* Now we can build the smallest pattern string possible
|
||
|
* from the anchor */
|
||
|
|
||
|
to_test = pat->patlen; /* How many positions left to test ? */
|
||
|
edges = pat->edge_constraints; /* how many constraint tested ? */
|
||
|
borders = 0xF;
|
||
|
/* we must test at least one intersection by border for
|
||
|
* patterns like
|
||
|
*
|
||
|
* ???
|
||
|
* O.O
|
||
|
* ???
|
||
|
*
|
||
|
* To ensure edge position.
|
||
|
*/
|
||
|
|
||
|
for (k = 0;
|
||
|
(k != DFA_MAX_ORDER - 1) && ((borders > 0) || edges || to_test > 0);
|
||
|
k++) {
|
||
|
j = spiral[k][0] % DFA_BASE;
|
||
|
if (j >= DFA_MAX_BOARD)
|
||
|
j -= DFA_BASE;
|
||
|
if (j <= -DFA_MAX_BOARD)
|
||
|
j += DFA_BASE;
|
||
|
i = (spiral[k][0] - j) / DFA_BASE;
|
||
|
|
||
|
if (i == pat->maxi)
|
||
|
borders &= ~SOUTH_EDGE;
|
||
|
if (i == pat->mini)
|
||
|
borders &= ~NORTH_EDGE;
|
||
|
if (j == pat->maxj)
|
||
|
borders &= ~EAST_EDGE;
|
||
|
if (j == pat->minj)
|
||
|
borders &= ~WEST_EDGE;
|
||
|
|
||
|
assert(m + i < DFA_MAX_BOARD * 3 && m + i < DFA_MAX_BOARD * 3);
|
||
|
str[k] = work_space[m + i][n + j];
|
||
|
assert(strchr("XOxo.,a!?$#|-+", str[k]));
|
||
|
|
||
|
if (strchr("XOxo.,a!", str[k]))
|
||
|
to_test--;
|
||
|
if (strchr("#|-+", str[k])) {
|
||
|
if (i > pat->maxi)
|
||
|
edges &= ~SOUTH_EDGE;
|
||
|
if (i < pat->mini)
|
||
|
edges &= ~NORTH_EDGE;
|
||
|
if (j > pat->maxj)
|
||
|
edges &= ~EAST_EDGE;
|
||
|
if (j < pat->minj)
|
||
|
edges &= ~WEST_EDGE;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
assert(k < DFA_MAX_ORDER);
|
||
|
str[k] = '\0'; /* end of string */
|
||
|
|
||
|
if (0 && dfa_verbose > 0)
|
||
|
fprintf(stderr, "converted pattern %s into string: %s\n", pat->name, str);
|
||
|
}
|
||
|
|
||
|
|
||
|
/**************************************
|
||
|
* Experimental DFA builder *
|
||
|
**************************************/
|
||
|
|
||
|
/* This builder differs from the one above in that it builds the whole dfa
|
||
|
* at once. That is, it must have all the patterns to build and cannot add
|
||
|
* pattern by pattern. Currently, it is only used in DFA size optimization
|
||
|
* (it seems to be significantly faster).
|
||
|
*/
|
||
|
|
||
|
|
||
|
/* Allocate a new dfa_attrib structure from a dynamic array. */
|
||
|
static dfa_attrib *
|
||
|
dfa_attrib_new(dfa_attrib_array *array, int string_index)
|
||
|
{
|
||
|
dfa_attrib *attribute;
|
||
|
|
||
|
if (array->allocated == DFA_ATTRIB_BLOCK_SIZE) {
|
||
|
dfa_attrib_block *new_block = malloc(sizeof(*new_block));
|
||
|
assert(new_block);
|
||
|
|
||
|
new_block->previous = array->last_block;
|
||
|
array->last_block = new_block;
|
||
|
array->allocated = 0;
|
||
|
}
|
||
|
|
||
|
attribute = &(array->last_block->attrib[array->allocated++]);
|
||
|
attribute->next = NULL;
|
||
|
attribute->string_index = string_index;
|
||
|
|
||
|
return attribute;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Initialize dfa_attrib_array structure. */
|
||
|
static void
|
||
|
dfa_attrib_array_reset(dfa_attrib_array *array)
|
||
|
{
|
||
|
array->last_block = NULL;
|
||
|
array->allocated = DFA_ATTRIB_BLOCK_SIZE;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Clear a dynamic array by freeing all blocks befor `cutoff_point'. */
|
||
|
static void
|
||
|
dfa_attrib_array_partially_clear(dfa_attrib_block *cutoff_point)
|
||
|
{
|
||
|
if (cutoff_point) {
|
||
|
dfa_attrib_block *block = cutoff_point->previous;
|
||
|
|
||
|
while (block) {
|
||
|
dfa_attrib_block *previous = block->previous;
|
||
|
free(block);
|
||
|
block = previous;
|
||
|
}
|
||
|
|
||
|
cutoff_point->previous = NULL;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Clear a dynamic array completely. All blocks are freed. */
|
||
|
static void
|
||
|
dfa_attrib_array_clear(dfa_attrib_array *array)
|
||
|
{
|
||
|
if (array->last_block) {
|
||
|
dfa_attrib_array_partially_clear(array->last_block);
|
||
|
free(array->last_block);
|
||
|
array->last_block = NULL;
|
||
|
}
|
||
|
|
||
|
array->allocated = DFA_ATTRIB_BLOCK_SIZE;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Allocate a new dfa_node structure in a DFA graph. */
|
||
|
static dfa_node *
|
||
|
dfa_node_new(dfa_graph *graph)
|
||
|
{
|
||
|
dfa_node *node;
|
||
|
|
||
|
if (graph->allocated == DFA_NODE_BLOCK_SIZE) {
|
||
|
dfa_node_block *new_block = malloc(sizeof(*new_block));
|
||
|
assert(new_block);
|
||
|
|
||
|
new_block->previous = graph->last_block;
|
||
|
graph->last_block = new_block;
|
||
|
graph->allocated = 0;
|
||
|
}
|
||
|
|
||
|
graph->num_nodes++;
|
||
|
node = &(graph->last_block->node[graph->allocated++]);
|
||
|
memset(node, 0, sizeof(dfa_node));
|
||
|
|
||
|
return node;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* This is a hash table used to quickly find a DFA node using a linked list
|
||
|
* of its attributes as a key.
|
||
|
*/
|
||
|
static dfa_hash_entry *dfa_hash_table[DFA_HASH_TABLE_SIZE];
|
||
|
static dfa_hash_block *dfa_hash_last_block = NULL;
|
||
|
static int dfa_hash_allocated;
|
||
|
|
||
|
|
||
|
/* Allocate a dfa_entry structure dynamically. */
|
||
|
static dfa_hash_entry *
|
||
|
dfa_hash_entry_new(void)
|
||
|
{
|
||
|
if (dfa_hash_allocated == DFA_HASH_BLOCK_SIZE) {
|
||
|
dfa_hash_block *new_block = malloc(sizeof(*new_block));
|
||
|
assert(new_block);
|
||
|
|
||
|
new_block->previous = dfa_hash_last_block;
|
||
|
dfa_hash_last_block = new_block;
|
||
|
dfa_hash_allocated = 0;
|
||
|
}
|
||
|
|
||
|
return &(dfa_hash_last_block->entry[dfa_hash_allocated++]);
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Clear the hash table completely. Used after having finished a graph level. */
|
||
|
static void
|
||
|
dfa_hash_clear(void)
|
||
|
{
|
||
|
memset(dfa_hash_table, 0, DFA_HASH_TABLE_SIZE * sizeof(dfa_hash_entry *));
|
||
|
|
||
|
if (dfa_hash_last_block) {
|
||
|
dfa_hash_block *block = dfa_hash_last_block->previous;
|
||
|
|
||
|
while (block) {
|
||
|
dfa_hash_block *previous = block->previous;
|
||
|
free(block);
|
||
|
block = previous;
|
||
|
}
|
||
|
|
||
|
dfa_hash_last_block->previous = NULL;
|
||
|
dfa_hash_allocated = 0;
|
||
|
}
|
||
|
else
|
||
|
dfa_hash_allocated = DFA_HASH_BLOCK_SIZE;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Compute the hash value of a key (linked list of attributes). */
|
||
|
static int
|
||
|
dfa_hash_value(dfa_attrib *key)
|
||
|
{
|
||
|
int hash_value = DFA_HASH_VALUE_1 * key->string_index;
|
||
|
if (key->next) {
|
||
|
hash_value += DFA_HASH_VALUE_2 * key->next->string_index;
|
||
|
if (key->next->next)
|
||
|
hash_value += DFA_HASH_VALUE_3 * key->next->next->string_index;
|
||
|
}
|
||
|
|
||
|
return hash_value % DFA_HASH_TABLE_SIZE;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Search for a node with a given key in the hash table. */
|
||
|
static dfa_node *
|
||
|
dfa_hash_search(dfa_attrib *key)
|
||
|
{
|
||
|
int hash_value = dfa_hash_value(key);
|
||
|
dfa_hash_entry *entry;
|
||
|
|
||
|
for (entry = dfa_hash_table[hash_value]; entry; entry = entry->next) {
|
||
|
dfa_attrib *left = key;
|
||
|
dfa_attrib *right = entry->key;
|
||
|
|
||
|
while (left && right) {
|
||
|
if (left->string_index != right->string_index)
|
||
|
break;
|
||
|
|
||
|
left = left->next;
|
||
|
right = right->next;
|
||
|
}
|
||
|
|
||
|
if (!left && !right)
|
||
|
return entry->value;
|
||
|
}
|
||
|
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Add a node to the table. The list of strings which pass through it is used
|
||
|
* as a key.
|
||
|
*/
|
||
|
static void
|
||
|
dfa_hash_add_node(dfa_node *node)
|
||
|
{
|
||
|
int hash_value = dfa_hash_value(node->passing_strings);
|
||
|
dfa_hash_entry *entry;
|
||
|
|
||
|
entry = dfa_hash_entry_new();
|
||
|
entry->next = dfa_hash_table[hash_value];
|
||
|
dfa_hash_table[hash_value] = entry;
|
||
|
|
||
|
entry->key = node->passing_strings;
|
||
|
entry->value = node;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* DFA iterator. Used to walk the array of nodes in backward direction. */
|
||
|
static dfa_node_block *dfa_iterator_block;
|
||
|
static int dfa_iterator_node_num;
|
||
|
|
||
|
|
||
|
/* Reset the iterator. The last added node of the specified graph becomes
|
||
|
* the current node.
|
||
|
*/
|
||
|
static dfa_node*
|
||
|
dfa_iterator_reset(dfa_graph *graph)
|
||
|
{
|
||
|
assert(graph->last_block);
|
||
|
|
||
|
if (graph->allocated > 0) {
|
||
|
dfa_iterator_block = graph->last_block;
|
||
|
dfa_iterator_node_num = graph->allocated - 1;
|
||
|
}
|
||
|
else {
|
||
|
dfa_iterator_block = graph->last_block->previous;
|
||
|
assert(dfa_iterator_block);
|
||
|
dfa_iterator_node_num = DFA_NODE_BLOCK_SIZE - 1;
|
||
|
}
|
||
|
|
||
|
return &(dfa_iterator_block->node[dfa_iterator_node_num]);
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Shift the current node pointer one node backwards. */
|
||
|
static dfa_node*
|
||
|
dfa_iterate(void)
|
||
|
{
|
||
|
dfa_iterator_node_num--;
|
||
|
if (dfa_iterator_node_num < 0) {
|
||
|
dfa_iterator_block = dfa_iterator_block->previous;
|
||
|
assert(dfa_iterator_block);
|
||
|
dfa_iterator_node_num = DFA_NODE_BLOCK_SIZE - 1;
|
||
|
}
|
||
|
|
||
|
return &(dfa_iterator_block->node[dfa_iterator_node_num]);
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Initialize a dfa_graph structure. */
|
||
|
void
|
||
|
dfa_graph_reset(dfa_graph *graph)
|
||
|
{
|
||
|
graph->num_nodes = 0;
|
||
|
graph->root = NULL;
|
||
|
|
||
|
graph->last_block = NULL;
|
||
|
graph->allocated = DFA_NODE_BLOCK_SIZE;
|
||
|
dfa_attrib_array_reset(&(graph->attributes));
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Free all resources associated with the specified DFA graph. */
|
||
|
static void
|
||
|
dfa_graph_clear(dfa_graph *graph)
|
||
|
{
|
||
|
dfa_node_block *block = graph->last_block;
|
||
|
graph->num_nodes = 0;
|
||
|
graph->root = NULL;
|
||
|
|
||
|
while (block) {
|
||
|
dfa_node_block *previous = block->previous;
|
||
|
free(block);
|
||
|
block = previous;
|
||
|
}
|
||
|
|
||
|
graph->last_block = NULL;
|
||
|
graph->allocated = DFA_NODE_BLOCK_SIZE;
|
||
|
|
||
|
dfa_attrib_array_clear(&(graph->attributes));
|
||
|
}
|
||
|
|
||
|
|
||
|
/* dfa_graph_build_level() builds a level of a graph. Level `n' is a set of
|
||
|
* nodes which correspond to n's element of a string. When matching using a
|
||
|
* dfa, nodes of level `n' are only checked at (n + 1)'s iteration (root node
|
||
|
* is considered to be level -1, but is matched at iteration 0).
|
||
|
*/
|
||
|
static void
|
||
|
dfa_graph_build_level(dfa_graph *graph, char **strings, int level,
|
||
|
dfa_node *terminal_node,
|
||
|
dfa_attrib_array *passing_strings_array)
|
||
|
{
|
||
|
int save_num_nodes = graph->num_nodes;
|
||
|
dfa_attrib_block *cutoff_point;
|
||
|
dfa_node *node;
|
||
|
dfa_node *this_terminal_node = dfa_iterator_reset(graph);
|
||
|
|
||
|
cutoff_point = passing_strings_array->last_block;
|
||
|
dfa_hash_clear();
|
||
|
|
||
|
/* Walk through all nodes of the previous level (backwards, but that doesn't
|
||
|
* matter - it's just because iterator works that way).
|
||
|
*/
|
||
|
for (node = this_terminal_node; node != terminal_node; node = dfa_iterate()) {
|
||
|
int k;
|
||
|
int num_masks = 0;
|
||
|
char mask[4];
|
||
|
dfa_attrib *passing_string;
|
||
|
dfa_attrib **link = &(node->attributes);
|
||
|
dfa_attrib *new_passing_strings[4];
|
||
|
dfa_attrib **new_link[4];
|
||
|
|
||
|
/* Calculate all different masks for subnodes. For instance, if there are
|
||
|
* three strings passing through a node of level 1, say "X$...", "Xx..."
|
||
|
* and "XO...", there will be three masks: 8 (stands for '#'), 5 ('X' and
|
||
|
* '.') and 2 ('O'). String "X$..." will pass further through all three
|
||
|
* subnodes, "Xx..." - through subnode corresponding to mask 5 and string
|
||
|
* "XO..." - through subnode corresponding to mask 2.
|
||
|
*/
|
||
|
for (passing_string = node->passing_strings;
|
||
|
passing_string && num_masks < 4;
|
||
|
passing_string = passing_string->next) {
|
||
|
int index = passing_string->string_index;
|
||
|
char string_mask = strings[index][level];
|
||
|
|
||
|
if (string_mask) {
|
||
|
int limit = num_masks;
|
||
|
|
||
|
for (k = 0; k < limit; k++) {
|
||
|
char common_branches = string_mask & mask[k];
|
||
|
|
||
|
if (common_branches && common_branches != mask[k]) {
|
||
|
/* Split a mask, since the string passes through a "part" of it. */
|
||
|
mask[k] ^= common_branches;
|
||
|
mask[num_masks++] = common_branches;
|
||
|
}
|
||
|
|
||
|
string_mask ^= common_branches;
|
||
|
}
|
||
|
|
||
|
if (string_mask) {
|
||
|
/* If there is no mask corresponding to a (part) of the string's
|
||
|
* mask, add it now.
|
||
|
*/
|
||
|
mask[num_masks++] = string_mask;
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
/* If the string ends at this level, add its index to the list of
|
||
|
* matched strings of the current node. Not used at the moment,
|
||
|
* since this builder isn't used for actual DFA building.
|
||
|
*/
|
||
|
*link = dfa_attrib_new(&(graph->attributes), index);
|
||
|
link = &((*link)->next);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
for (k = 0; k < num_masks; k++)
|
||
|
new_link[k] = &(new_passing_strings[k]);
|
||
|
|
||
|
/* Now, for each mask, create a list of all strings which will follow it
|
||
|
* (pass through a node corresponding to it). It is possible to merge this
|
||
|
* loop with the previous one, but it is simplier to keep them separated.
|
||
|
*/
|
||
|
for (passing_string = node->passing_strings; passing_string;
|
||
|
passing_string = passing_string->next) {
|
||
|
int index = passing_string->string_index;
|
||
|
|
||
|
for (k = 0; k < num_masks; k++) {
|
||
|
if (strings[index][level] & mask[k]) {
|
||
|
*(new_link[k]) = dfa_attrib_new(passing_strings_array, index);
|
||
|
new_link[k] = &((*(new_link[k]))->next);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Finally, create new nodes for the masks when necessary. */
|
||
|
for (k = 0; k < num_masks; k++) {
|
||
|
int i;
|
||
|
|
||
|
/* Maybe we have already added such a node? */
|
||
|
dfa_node *new_node = dfa_hash_search(new_passing_strings[k]);
|
||
|
|
||
|
if (!new_node) {
|
||
|
/* If not, create it, save the list of passing strings and add the
|
||
|
* new node to hash table.
|
||
|
*/
|
||
|
new_node = dfa_node_new(graph);
|
||
|
new_node->passing_strings = new_passing_strings[k];
|
||
|
|
||
|
dfa_hash_add_node(new_node);
|
||
|
}
|
||
|
|
||
|
/* At this moment we convert the masks to actual transitions. These are
|
||
|
* also unused till we use this builder for actual DFA creation.
|
||
|
*/
|
||
|
for (i = 0; i < 4; i++) {
|
||
|
if (mask[k] & (1 << i))
|
||
|
node->branch[i] = new_node;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* Free the lists of passing strings for the previous level. Useful if we
|
||
|
* building an exceptionally huge DFA.
|
||
|
*/
|
||
|
dfa_attrib_array_partially_clear(cutoff_point);
|
||
|
|
||
|
if (graph->num_nodes != save_num_nodes) {
|
||
|
/* If we have added any nodes, this level is not the last one. */
|
||
|
dfa_graph_build_level(graph, strings, level + 1, this_terminal_node,
|
||
|
passing_strings_array);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Convert a pattern to a string of masks. */
|
||
|
static char *
|
||
|
dfa_prepare_string(const char *string)
|
||
|
{
|
||
|
int k;
|
||
|
int l = strlen(string);
|
||
|
char *dfa_string = malloc(l + 1);
|
||
|
assert(dfa_string);
|
||
|
|
||
|
for (k = 0; k < l; k++) {
|
||
|
switch (string[k]) {
|
||
|
case '$': dfa_string[k] = 15; break; /* 1111 */
|
||
|
case '-':
|
||
|
case '|':
|
||
|
case '+':
|
||
|
case '#': dfa_string[k] = 8; break; /* 1000 */
|
||
|
case '.':
|
||
|
case ',':
|
||
|
case '!':
|
||
|
case 'a': dfa_string[k] = 1; break; /* 0001 */
|
||
|
case '?': dfa_string[k] = 7; break; /* 0111 */
|
||
|
case 'O': dfa_string[k] = 2; break; /* 0010 */
|
||
|
case 'X': dfa_string[k] = 4; break; /* 0100 */
|
||
|
case 'o': dfa_string[k] = 3; break; /* 0011 */
|
||
|
case 'x': dfa_string[k] = 5; break; /* 0101 */
|
||
|
|
||
|
default: assert(0); /* Shouldn't happen. */
|
||
|
}
|
||
|
}
|
||
|
|
||
|
dfa_string[l] = 0;
|
||
|
return dfa_string;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Initialize a dfa_patterns structure. */
|
||
|
void
|
||
|
dfa_patterns_reset(dfa_patterns *patterns)
|
||
|
{
|
||
|
patterns->num_patterns = 0;
|
||
|
patterns->patterns = NULL;
|
||
|
patterns->last_pattern = NULL;
|
||
|
|
||
|
dfa_graph_reset(&(patterns->graph));
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Clear the graph and reset all fields of a dfa_patterns structure. */
|
||
|
void
|
||
|
dfa_patterns_clear(dfa_patterns *patterns)
|
||
|
{
|
||
|
dfa_pattern *pattern = patterns->patterns;
|
||
|
|
||
|
while (pattern) {
|
||
|
int k;
|
||
|
dfa_pattern *next = pattern->next;
|
||
|
|
||
|
for (k = 0; k < pattern->num_variations; k++)
|
||
|
free(pattern->variation[k]);
|
||
|
|
||
|
free(pattern);
|
||
|
pattern = next;
|
||
|
}
|
||
|
|
||
|
patterns->num_patterns = 0;
|
||
|
patterns->patterns = NULL;
|
||
|
patterns->last_pattern = NULL;
|
||
|
|
||
|
dfa_graph_clear(&(patterns->graph));
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Add a pattern to a list. If `index' is equal to the index of the last
|
||
|
* added pattern, add a variation to that pattern instead.
|
||
|
*/
|
||
|
void
|
||
|
dfa_patterns_add_pattern(dfa_patterns *patterns, const char *string, int index)
|
||
|
{
|
||
|
dfa_pattern *pattern = NULL;
|
||
|
|
||
|
if (index == patterns->num_patterns - 1) {
|
||
|
assert(patterns->last_pattern);
|
||
|
assert(patterns->last_pattern->num_variations < 8);
|
||
|
|
||
|
pattern = patterns->last_pattern;
|
||
|
}
|
||
|
else {
|
||
|
assert(patterns->num_patterns <= index);
|
||
|
|
||
|
while (patterns->num_patterns <= index) {
|
||
|
patterns->num_patterns++;
|
||
|
pattern = malloc(sizeof(*pattern));
|
||
|
pattern->num_variations = 0;
|
||
|
|
||
|
if (patterns->last_pattern)
|
||
|
patterns->last_pattern->next = pattern;
|
||
|
else
|
||
|
patterns->patterns = pattern;
|
||
|
patterns->last_pattern = pattern;
|
||
|
}
|
||
|
|
||
|
pattern->current_variation = 0;
|
||
|
pattern->next = NULL;
|
||
|
}
|
||
|
|
||
|
pattern->variation[pattern->num_variations++] = dfa_prepare_string(string);
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Set the variation of the last pattern. Can be used in actual DFA building
|
||
|
* or to set a hint (results of the previous optimization) for optimization.
|
||
|
*/
|
||
|
void
|
||
|
dfa_patterns_set_last_pattern_variation(dfa_patterns *patterns, int variation)
|
||
|
{
|
||
|
assert(patterns->last_pattern);
|
||
|
assert(patterns->last_pattern->num_variations > variation);
|
||
|
|
||
|
patterns->last_pattern->current_variation = variation;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Make the shortest variation of the last pattern its current variation. It
|
||
|
* is used as a starting point in DFA optimization process.
|
||
|
*/
|
||
|
void
|
||
|
dfa_patterns_select_shortest_variation(dfa_patterns *patterns)
|
||
|
{
|
||
|
int k;
|
||
|
int min_length;
|
||
|
dfa_pattern *pattern = patterns->last_pattern;
|
||
|
assert(pattern);
|
||
|
|
||
|
pattern->current_variation = 0;
|
||
|
min_length = strlen(pattern->variation[0]);
|
||
|
for (k = 1; k < pattern->num_variations; k++) {
|
||
|
int length = strlen(pattern->variation[k]);
|
||
|
|
||
|
if (length < min_length) {
|
||
|
pattern->current_variation = k;
|
||
|
min_length = length;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
/* Build a DFA graph for a list of patterns. */
|
||
|
void
|
||
|
dfa_patterns_build_graph(dfa_patterns *patterns)
|
||
|
{
|
||
|
int k = 0;
|
||
|
char **strings;
|
||
|
dfa_attrib_array passing_strings_array;
|
||
|
dfa_attrib **link;
|
||
|
dfa_node *error_state;
|
||
|
dfa_graph *graph = &(patterns->graph);
|
||
|
dfa_pattern *pattern;
|
||
|
|
||
|
strings = malloc(patterns->num_patterns * sizeof(*strings));
|
||
|
assert(strings);
|
||
|
|
||
|
dfa_graph_clear(graph);
|
||
|
dfa_attrib_array_reset(&passing_strings_array);
|
||
|
|
||
|
/* Error state node is used as a terminator for level -1 (root node). */
|
||
|
error_state = dfa_node_new(graph);
|
||
|
graph->root = dfa_node_new(graph);
|
||
|
|
||
|
/* Add all strings as passing through root node (level -1). */
|
||
|
link = &(graph->root->passing_strings);
|
||
|
for (pattern = patterns->patterns; pattern; pattern = pattern->next, k++) {
|
||
|
if (pattern->num_variations > 0) {
|
||
|
assert(pattern->current_variation < pattern->num_variations);
|
||
|
strings[k] = pattern->variation[pattern->current_variation];
|
||
|
|
||
|
*link = dfa_attrib_new(&passing_strings_array, k);
|
||
|
link = &((*link)->next);
|
||
|
}
|
||
|
else
|
||
|
strings[k] = NULL;
|
||
|
}
|
||
|
|
||
|
dfa_graph_build_level(graph, strings, 0, error_state, &passing_strings_array);
|
||
|
|
||
|
free(strings);
|
||
|
dfa_attrib_array_clear(&passing_strings_array);
|
||
|
}
|
||
|
|
||
|
|
||
|
/* dfa_patterns_optimize_variations() tries to reduce the size of DFA by
|
||
|
* altering pattern variations (in fact, transformations). The algorithm
|
||
|
* is to change several patterns' variations and if this happens to give
|
||
|
* size reduction, to keep the change, otherwise, revert.
|
||
|
*
|
||
|
* This function contains many heuristically chosen values for variation
|
||
|
* changing probability etc. They have been chosen by observing algorithm
|
||
|
* effectiveness and seem to be very good.
|
||
|
*
|
||
|
* Note that we subtract 1 from the number of nodes to be consistent with
|
||
|
* the standard builder, which doesn't count error state.
|
||
|
*/
|
||
|
int *
|
||
|
dfa_patterns_optimize_variations(dfa_patterns *patterns, int iterations)
|
||
|
{
|
||
|
int k = 0;
|
||
|
int failed_iterations = 0;
|
||
|
int min_nodes_so_far;
|
||
|
int num_nodes_original;
|
||
|
int *best_variations;
|
||
|
double lower_limit = 2.0 / patterns->num_patterns;
|
||
|
double upper_limit = 6.0 / patterns->num_patterns;
|
||
|
double change_probability = 4.0 / patterns->num_patterns;
|
||
|
dfa_pattern *pattern;
|
||
|
|
||
|
best_variations = malloc(patterns->num_patterns * sizeof(*best_variations));
|
||
|
assert(best_variations);
|
||
|
for (pattern = patterns->patterns; pattern; pattern = pattern->next, k++)
|
||
|
best_variations[k] = pattern->current_variation;
|
||
|
|
||
|
dfa_patterns_build_graph(patterns);
|
||
|
num_nodes_original = patterns->graph.num_nodes;
|
||
|
min_nodes_so_far = num_nodes_original;
|
||
|
|
||
|
fprintf(stderr, "Original number of DFA states: %d\n", min_nodes_so_far - 1);
|
||
|
fprintf(stderr, "Trying to optimize in %d iterations\n", iterations);
|
||
|
|
||
|
gg_srand(num_nodes_original + patterns->num_patterns);
|
||
|
|
||
|
while (iterations--) {
|
||
|
int changed_variations = 0;
|
||
|
int k = 0;
|
||
|
|
||
|
/* Randomly change some variations. */
|
||
|
for (pattern = patterns->patterns; pattern; pattern = pattern->next, k++) {
|
||
|
if (gg_drand() < change_probability && pattern->num_variations > 1) {
|
||
|
int new_variation = gg_rand() % (pattern->num_variations - 1);
|
||
|
if (new_variation >= pattern->current_variation)
|
||
|
new_variation++;
|
||
|
pattern->current_variation = new_variation;
|
||
|
changed_variations++;
|
||
|
}
|
||
|
else
|
||
|
pattern->current_variation = best_variations[k];
|
||
|
}
|
||
|
|
||
|
if (changed_variations == 0) {
|
||
|
iterations++;
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
fprintf(stderr, ".");
|
||
|
dfa_patterns_build_graph(patterns);
|
||
|
|
||
|
if (patterns->graph.num_nodes < min_nodes_so_far) {
|
||
|
/* If the new set of variations produces smaller dfa, save it. */
|
||
|
int k = 0;
|
||
|
for (pattern = patterns->patterns; pattern; pattern = pattern->next, k++)
|
||
|
best_variations[k] = pattern->current_variation;
|
||
|
|
||
|
fprintf(stderr, "\nOptimized: %d => %d states (%d iterations left)\n",
|
||
|
min_nodes_so_far - 1, patterns->graph.num_nodes - 1, iterations);
|
||
|
min_nodes_so_far = patterns->graph.num_nodes;
|
||
|
failed_iterations = 0;
|
||
|
}
|
||
|
else
|
||
|
failed_iterations++;
|
||
|
|
||
|
if (failed_iterations >= 30) {
|
||
|
/* If haven't succeded in 30 last iterations, try to alter variation
|
||
|
* change probability.
|
||
|
*/
|
||
|
double delta = gg_drand() / patterns->num_patterns;
|
||
|
if (change_probability > upper_limit
|
||
|
|| (change_probability >= lower_limit && gg_rand() % 2 == 0))
|
||
|
delta = -delta;
|
||
|
|
||
|
change_probability += delta;
|
||
|
failed_iterations = 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
fprintf(stderr, "\nTotal optimization result: %d => %d states\n",
|
||
|
num_nodes_original - 1, min_nodes_so_far - 1);
|
||
|
|
||
|
dfa_graph_clear(&(patterns->graph));
|
||
|
return best_variations;
|
||
|
}
|
||
|
|
||
|
|
||
|
/*
|
||
|
* Local Variables:
|
||
|
* tab-width: 8
|
||
|
* c-basic-offset: 2
|
||
|
* End:
|
||
|
*/
|