CWB
|
Regular expression to DFA converter -- originally written by markh@csd4.csd.uwm.edu. More...
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdarg.h>
#include <stdlib.h>
#include <assert.h>
#include "../cl/globals.h"
#include "../cl/macros.h"
#include "eval.h"
#include "options.h"
#include "regex2dfa.h"
Regular expression to DFA converter -- originally written by markh@csd4.csd.uwm.edu.
Derived from the syntax:
Rule = (ID "=" Ex ",")* Ex.
Ex = "0" | "1" | ID | "(" Ex ")" | "[" Ex "]" | Ex "+" | Ex "*" | Ex Ex | Ex "|" Ex. *
with the usual precedence rules.
(Note, this is the token-sequence regex = not the string-level regex!
#define EQU_EXTEND 0x200 |
Referenced by MakeExp().
#define HASH_MAX 0x200 |
Referenced by init().
#define MAX_CHAR 0x4000 |
Referenced by LEX().
#define MAX_ERRORS 25 |
The maximum number of erros that the regex2dfa module will allow before killing the program.
Referenced by REGEX2DFA_ERROR().
#define STACK_MAX 200 |
Referenced by PUSH().
#define X_EXTEND 4 |
Referenced by PushQ().
typedef unsigned char byte |
enum ExpTag |
enum Lexical |
enum StackTag |
void AddBuf | ( | Symbol | LHS, |
int | Q | ||
) |
References cl_realloc(), IMax, Is, item::LHS, symbol::Name, item::RHS, and item::Size.
Referenced by FormState().
void AddEquiv | ( | int | L, |
int | R | ||
) |
References cl_realloc(), state::Class, EMax, Es, ETab, Equiv::L, and Equiv::R.
Referenced by MergeStates().
int AddState | ( | int | States, |
int * | SList | ||
) |
References cl_realloc(), state::Class, state::SList, Ss, and state::States.
Referenced by FormState().
void* Allocate | ( | unsigned | Bytes | ) |
TODO delete: has been replaced throughout with cl_malloc.
References cl_malloc().
char* CopyS | ( | char * | S | ) |
TODO delete: has been replaced throughout with cl_strdup (was only used once anyway)
References cl_malloc().
void FormState | ( | int | Q | ) |
References AddBuf(), AddState(), AndX, exp::Arg, exp::Body, cl_malloc(), state::Empty, IMax, Is, exp::Leaf, item::LHS, state::LHS, MakeExp(), OneX, OptX, OrX, PlusX, PopQ(), PushQ(), state::RHS, state::Shifts, state::ShList, state::SList, SP, Ss, StarX, state::States, SymX, exp::Tag, equation::Value, Xs, XStack, and ZeroX.
Referenced by regex2dfa().
void free_dfa | ( | DFA * | dfa | ) |
Frees all the memory associated with this DFA.
References dfa::Final, dfa::Max_Input, dfa::Max_States, and dfa::TransTable.
Referenced by free_environment().
static int GET | ( | void | ) | [static] |
byte Hash | ( | char * | S | ) |
Creates a one-byte hash of the string S.
void init | ( | void | ) |
void init_dfa | ( | DFA * | dfa | ) |
Initialises the members of the given DFA object.
References dfa::Final, dfa::Max_Input, dfa::Max_States, and dfa::TransTable.
Referenced by next_environment().
Lexical LEX | ( | void | ) |
Symbol LookUp | ( | char * | S | ) |
Look up the symbol contained in string S in the global hash table.
References cl_malloc(), cl_strdup(), symbol::Hash, Hash, symbol::Name, symbol::Next, and symbol::Tail.
Referenced by Parse().
int MakeExp | ( | int | Q, |
ExpTag | Tag, | ||
... | |||
) |
References AndX, exp::Arg, exp::Body, cl_malloc(), cl_realloc(), exp::Class, DUP(), EQU_EXTEND, EquMax, Equs, exp::Hash, Hash, equation::Hash, symbol::Hash, exp::Leaf, OneX, OptX, OrX, PlusX, equation::Stack, StarX, SymX, Tag, exp::Tag, exp::Tail, equation::Value, and ZeroX.
Referenced by FormState(), and Parse().
void MergeStates | ( | void | ) |
References AddEquiv(), state::Class, EMax, state::Empty, Es, ETab, Equiv::L, state::LHS, Equiv::R, state::RHS, state::Shifts, state::ShList, SP, and Ss.
Referenced by regex2dfa().
int Parse | ( | void | ) |
the regex parser proper: private function
References Action, AND, AndX, EQU, EqualT, IdenT, LastW, LBrT, LEX(), LookUp(), LParT, MakeExp(), OneT, OneX, OPT, OptX, OR, OrX, PAR, PlusX, POP, PUSH(), REGEX2DFA_ERROR(), RULE, Stack, StarX, Store(), SymX, TOP, ZeroT, and ZeroX.
Referenced by regex2dfa().
void PopQ | ( | void | ) |
References equation::Stack, Xs, and XStack.
Referenced by FormState().
void PUSH | ( | StackTag | Tag, |
int | Q | ||
) |
References StackCard::Q, REGEX2DFA_ERROR(), STACK_MAX, Tag, and StackCard::Tag.
Referenced by Parse().
void PushQ | ( | int | Q | ) |
References cl_realloc(), equation::Stack, X_EXTEND, XMax, Xs, and XStack.
Referenced by FormState().
void* Reallocate | ( | void * | X, |
unsigned | Bytes | ||
) |
TODO delete: has been replaced throughout with cl_realloc.
References cl_realloc().
void regex2dfa | ( | char * | rxs, |
DFA * | automaton | ||
) |
Converts a regular expression to a DFA.
Public function.
rxs | The regular expression. |
automaton | Pointer to the DFA object to write to. |
References C, cl_malloc(), state::Class, dfa::E_State, eep, state::Empty, Environment, ERRORS, False, dfa::Final, FormState(), init(), state::LHS, dfa::Max_Input, dfa::Max_States, evalenv::MaxPatIndex, MergeStates(), symbol::Name, Parse(), state::RHS, searchstr, state::Shifts, state::ShList, show_dfa, SP, Ss, dfa::TransTable, True, and WriteStates().
Referenced by do_SearchPattern().
static void REGEX2DFA_ERROR | ( | char * | Format, |
... | |||
) | [static] |
void show_complete_dfa | ( | DFA | dfa | ) |
Prints the contents of a DFA to stdout.
References dfa::E_State, dfa::Final, dfa::Max_Input, dfa::Max_States, and dfa::TransTable.
Referenced by show_environment().
void Store | ( | Symbol | S, |
int | Q | ||
) |
References exp::Body, cl_malloc(), exp::Class, exp::Hash, symbol::Hash, exp::Leaf, SymX, exp::Tag, and exp::Tail.
Referenced by Parse().
static void UNGET | ( | int | Ch | ) | [static] |
void WriteStates | ( | void | ) |
Write states to stdout.
Private function.
References C, state::Class, state::Empty, state::LHS, symbol::Name, state::RHS, state::Shifts, state::ShList, SP, and Ss.
Referenced by regex2dfa().
char* Action[7] |
{ ".ABCH|&&&&&+*", "I=BCH|&&&&&+*", "DD)FH|&&&&&+*", "EEG]H|&&&&&+*", "vvvvv|&&&&&+*", "xxxxxx&&&&&+*" }
Referenced by Parse().
int currpos |
int EMax |
Referenced by AddEquiv(), and MergeStates().
int ERRORS |
The number of errors enocuntered while parsing a regex to a DFA.
Referenced by init(), regex2dfa(), and REGEX2DFA_ERROR().
int Es |
Referenced by AddEquiv(), and MergeStates().
Referenced by AddEquiv(), init(), and MergeStates().
int IMax |
Referenced by AddBuf(), and FormState().
int Is |
Referenced by AddBuf(), and FormState().
int LINE |
Referenced by init(), and REGEX2DFA_ERROR().
char* searchstr |
Global variable containing a search string that is to be converted to a DFA.
(Needs to be global; functions using the DFA write to it, and then the DFA parser reads from it. Declared as an external global in cqp.h so other parts of CQP can access it.)
Referenced by do_SearchPattern(), do_StandardQuery(), GET(), prepare_input(), prepare_Query(), and regex2dfa().
Referenced by FormState(), MergeStates(), regex2dfa(), and WriteStates().
int Ss |
Referenced by AddState(), FormState(), init(), MergeStates(), regex2dfa(), and WriteStates().
int Xs |
Referenced by FormState(), init(), PopQ(), and PushQ().
int* XStack = NULL |
Referenced by FormState(), init(), PopQ(), and PushQ().