CWB
|
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include <limits.h>
#include <math.h>
#include <sys/types.h>
#include "../cl/globals.h"
#include "../cl/macros.h"
#include "../cl/corpus.h"
#include "../cl/attributes.h"
#include "../cl/cdaccess.h"
#include "../cl/special-chars.h"
#include "cqp.h"
#include "ranges.h"
#include "options.h"
#include "tree.h"
#include "symtab.h"
#include "corpmanag.h"
#include "regex2dfa.h"
#include "eval.h"
#include "builtins.h"
#include "output.h"
#include "matchlist.h"
#define no_match -1 |
Referenced by check_alignment_constraints().
#define RED_THRESHOLD 0.01 |
Referenced by matchfirstpattern().
Boolean calculate_initial_matchlist | ( | Constrainttree | ctptr, |
Matchlist * | matchlist, | ||
CorpusList * | corpus | ||
) |
Wrapper around calculate_initial_matchlist1, qv.
References calculate_initial_matchlist_1(), Complement, _Matchlist::is_inverted, mark_offrange_cells(), Reduce, and Setop().
Referenced by cqp_run_tab_query(), eval_mu_tree(), and matchfirstpattern().
Boolean calculate_initial_matchlist_1 | ( | Constrainttree | ctptr, |
Matchlist * | matchlist, | ||
CorpusList * | corpus | ||
) |
Gets the inital list of matches for a query.
NB. This function is called recursively.
References c_tree::attr, b_and, b_implies, b_not, b_or, bnode, c_tree::canon, CID, cmp_eq, cmp_ex, cmp_get, cmp_gt, cmp_let, cmp_lt, cmp_neq, cnode, collect_matches, Complement, c_tree::constnode, cqpmessage(), c_tree::ctype, _Matchlist::end, Error, eval_bool(), eval_debug, EvaluationIsRunning, False, free_matchlist(), func, get_corpus_positions(), get_matched_corpus_positions(), get_positions, id_list, c_tree::idlist, init_matchlist(), Intersection, _Matchlist::is_inverted, c_tree::items, c_tree::label, c_tree::leaf, c_tree::left, left, mark_offrange_cells(), _Matchlist::matches_whole_corpus, _label_entry::name, c_tree::negated, c_tree::node, NORMAL, c_tree::nr_items, c_tree::op_id, c_tree::pa_ref, pa_ref, c_tree::pat_type, cl::range, Reduce, REGEXP, c_tree::right, right, sa_ref, Setop(), cl::size, _Matchlist::start, string_leaf, _Matchlist::tabsize, True, c_tree::type, Union, and c_tree::val.
Referenced by calculate_initial_matchlist().
int check_alignment_constraints | ( | Matchlist * | ml | ) |
References CDA_OK, cderrno, cl_alg2cpos(), cl_cpos2alg(), cl_malloc(), cqp, cqpmessage(), delete_reftab(), evalenv::dfa, eep, _Matchlist::end, Environment, evalenv, EvaluationIsRunning, free_matchlist(), Info, init_matchlist(), install_signal_handler(), evalenv::labels, dfa::Max_States, evalenv::negated, new_reftab(), no_match, simulate(), _Matchlist::start, _Matchlist::tabsize, and which_app.
Referenced by simulate_dfa().
void cqp_run_mu_query | ( | int | keep_old_ranges, |
int | cut_value | ||
) |
References cl_malloc(), cqpmessage(), _Matchlist::end, Environment, Error, eval_mu_tree(), evalenv::evaltree, free_matchlist(), init_matchlist(), mark_offrange_cells(), evalenv::query_corpus, Reduce, set_corpus_matchlists(), Setop(), _Matchlist::start, and _Matchlist::tabsize.
Referenced by do_MUQuery().
void cqp_run_query | ( | int | cut, |
int | keep_old_ranges | ||
) |
This function wraps round simulate_dfa (the only other thing it does is enforce the hard_cut limit).
References eep, hard_cut, and simulate_dfa().
Referenced by do_StandardQuery().
void cqp_run_tab_query | ( | int | implode | ) |
References calculate_initial_matchlist(), cl_malloc(), Environment, evalenv::evaltree, free_matchlist(), hard_boundary, init_matchlist(), mark_offrange_cells(), e_tree::next, e_tree::patindex, evalenv::patternlist, evalenv::query_corpus, Reduce, repeat_inf, set_corpus_matchlists(), Setop(), e_tree::tab_el, tabular, and e_tree::type.
Referenced by do_TABQuery().
Boolean eval_bool | ( | Constrainttree | ctptr, |
RefTab | rt, | ||
int | corppos | ||
) |
References ATTAT_FLOAT, ATTAT_INT, ATTAT_NONE, ATTAT_PAREF, ATTAT_POS, ATTAT_STRING, ATTAT_VAR, c_tree::attr, b_and, b_implies, b_not, b_or, bnode, CDA_OK, cderrno, _DCR::charres, cl_regex_match(), cmp_eq, cmp_ex, cmp_get, cmp_gt, cmp_let, cmp_lt, cmp_neq, cnode, c_tree::constnode, cqpmessage(), c_tree::delete, Error, eval_bool(), eval_debug, EvaluationIsRunning, False, float_leaf, _DCR::floatres, func, get_id_at_position, get_label_referenced_position(), get_leaf_value(), get_string_of_id, get_struc_attribute, id_list, c_tree::idlist, int_leaf, intcompare(), _DCR::intres, c_tree::is_closing, c_tree::items, c_tree::label, c_tree::leaf, c_tree::left, _label_entry::name, c_tree::negated, c_tree::node, NORMAL, c_tree::nr_items, c_tree::op_id, pa_ref, _DCR::parefres, c_tree::pat_type, _label_entry::ref, REGEXP, c_tree::right, c_tree::rx, sa_ref, c_tree::sbound, sbound, set_reftab(), STREQ, string_leaf, c_tree::strucattr, True, _DCR::type, c_tree::type, c_tree::val, and _DCR::value.
Referenced by calculate_initial_matchlist_1(), eval_bool(), eval_constraint(), evaluate_subset(), evaluate_target(), and simulate().
References _avs::anchor, Anchor, _avs::attr, cl_cpos2struc(), cl_regex_match(), cl_struc2cpos(), cl_struc2str(), _avs::con, _avs::constraint, corpus, dup_reftab(), _Range::end, eval_bool(), False, _avs::field, get_reftab(), _avs::is_closing, KeywordField, cl::keywords, _avs::label, _avs::matchall, MatchAll, MatchEndField, MatchField, _avs::negated, Pattern, evalenv::query_corpus, cl::range, _label_entry::ref, _avs::right_boundary, evalenv::rp, _avs::rx, set_reftab(), _Range::start, strict_regions, _avs::tag, Tag, TargetField, cl::targets, True, and _avs::type.
Referenced by simulate().
References calculate_initial_matchlist(), e_tree::cooc, cooc_meet, cooc_union, CurEnv, EvaluationIsRunning, free_matchlist(), init_matchlist(), e_tree::leaf, leaf, e_tree::left, e_tree::lw, meet_mu(), meet_union, e_tree::op_id, e_tree::patindex, evalenv::patternlist, evalenv::query_corpus, e_tree::right, e_tree::rw, Setop(), e_tree::struc, e_tree::type, and Union.
Referenced by cqp_run_mu_query().
int free_environment | ( | int | thisenv | ) |
Frees an evaluation environment.
The environment must be one currently occupied within the global array.
thisenv | The eval environment to free. |
References Anchor, ctxtsp::attrib, cl_delete_regex(), cl_free, delete_symbol_table(), ctxtsp::direction, eep, Environment, False, free_booltree(), free_dfa(), free_evaltree(), evalenv::gconstraint, evalenv::has_target_indicator, evalenv::labels, leftright, MatchAll, evalenv::MaxPatIndex, NoField, Pattern, evalenv::patternlist, evalenv::query_corpus, evalenv::search_context, ctxtsp::size, Tag, dfa::TransTable, ctxtsp::type, cl::type, and word.
Referenced by free_environments().
void free_environments | ( | void | ) |
Frees all eval environments in the global array, and sets the eep pointer to -1.
References eep, and free_environment().
Referenced by in_UnnamedCorpusCommand(), and prepare_input().
Gets a list of corpus positions where the given p-attribute has the specified form.
Positions are placed into the "start" array of the matchlist.
attribute | The p-attribute to search. |
wordform | The form to search for. |
matchlist | Where to put the results. |
References CDA_OK, cl_errno, cl_idlist2cpos, cl_str2id(), initial_matchlist_debug, _Matchlist::matches_whole_corpus, silent, _Matchlist::start, and _Matchlist::tabsize.
Referenced by calculate_initial_matchlist_1().
int get_label_referenced_position | ( | LabelEntry | label, |
RefTab | rt, | ||
int | corppos | ||
) |
References eval_debug, get_reftab(), _label_entry::name, and _label_entry::ref.
Referenced by eval_bool(), and get_leaf_value().
Boolean get_leaf_value | ( | Constrainttree | ctptr, |
RefTab | rt, | ||
int | corppos, | ||
DynCallResult * | dcr, | ||
int | deliver_strings | ||
) |
References c_tree::args, ATTAT_FLOAT, ATTAT_INT, ATTAT_NONE, ATTAT_PAREF, ATTAT_POS, ATTAT_STRING, c_tree::attr, call_dynamic_attribute, call_predefined_function(), CDA_EPOSORNG, CDA_OK, cderrno, _DCR::charres, CID, cl_malloc(), cqpmessage(), c_tree::ctype, c_tree::delete, c_tree::dynattr, Error, eval_debug, EvaluationIsRunning, False, float_leaf, _DCR::floatres, c_tree::func, func, get_id_at_position, get_label_referenced_position(), get_string_at_position, get_struc_attribute, int_leaf, _DCR::intres, c_tree::label, c_tree::leaf, _label_entry::name, _ActualParamList::next, c_tree::nr_args, c_tree::pa_ref, pa_ref, _ActualParamList::param, _DCR::parefres, c_tree::pat_type, c_tree::predef, _label_entry::ref, c_tree::sa_ref, sa_ref, set_reftab(), string_leaf, structure_value_at_position(), True, c_tree::type, _DCR::type, and _DCR::value.
Referenced by eval_bool().
int get_matched_corpus_positions | ( | Attribute * | attribute, |
char * | regstr, | ||
int | canonicalize, | ||
Matchlist * | matchlist, | ||
int * | restrictor_list, | ||
int | restrictor_size | ||
) |
Get corpus positions matching a regular expression on a given attribute.
get_matched_corpus_positions looks in a corpus which is to be loaded for a regular expression 'regstr' of a given p-attribute and returns the table of matching start indices (start_table) and the tablesize (tabsize).
attribute | The attribute to search on. May be NULL, in which case DEFAULT_ATT_NAME is used. |
regstr | String containing the regular expression. |
canonicalize | Flags to be passed to the CL regex engine. |
matchlist | Location where the list of matches will be placed. |
restrictor_list | Passed to cl_idlist2cpos_oldstyle |
restrictor_size | Passed to cl_idlist2cpos_oldstyle |
References ATT_POS, cl_free, cl_idlist2cpos_oldstyle(), cl_malloc(), cl_max_cpos(), cl_max_id(), cl_new_attribute_oldstyle(), cl_regex2id(), cl::corpus, DEFAULT_ATT_NAME, eval_debug, initial_matchlist_debug, _Matchlist::is_inverted, _Matchlist::matches_whole_corpus, evalenv::query_corpus, cl::range, silent, cl::size, _Matchlist::start, STREQ, and _Matchlist::tabsize.
Referenced by calculate_initial_matchlist_1(), and matchfirstpattern().
static int intcompare | ( | const void * | i, |
const void * | j | ||
) | [static] |
Comparison function used when calling qsort().
Referenced by eval_bool().
int mark_offrange_cells | ( | Matchlist * | matchlist, |
CorpusList * | corpus | ||
) |
References _Matchlist::end, _Range::end, cl::mother_size, cl::range, cl::size, _Matchlist::start, and _Range::start.
Referenced by calculate_initial_matchlist(), calculate_initial_matchlist_1(), cqp_run_mu_query(), and cqp_run_tab_query().
Boolean matchfirstpattern | ( | AVS | pattern, |
Matchlist * | matchlist, | ||
CorpusList * | corpus | ||
) |
References _avs::anchor, Anchor, _avs::attr, calculate_initial_matchlist(), cl_free, cl_malloc(), cl_max_struc(), cl_regex_match(), cl_struc2cpos(), cl_struc2str(), clear_all_bits(), _avs::con, _avs::constraint, cqpmessage(), create_bitfield(), destroy_bitfield(), _Range::end, _Matchlist::end, Error, EvaluationIsRunning, False, _avs::field, get_bit(), get_matched_corpus_positions(), _avs::is_closing, KeywordField, cl::keywords, MatchAll, MatchEndField, _Matchlist::matches_whole_corpus, MatchField, _avs::negated, Pattern, query_optimize, cl::range, red_factor(), RED_THRESHOLD, Reduce, _avs::rx, set_all_bits(), set_bit(), Setop(), silent, cl::size, _Range::start, _Matchlist::start, _Matchlist::tabsize, _avs::tag, Tag, TargetField, cl::targets, True, and _avs::type.
Referenced by simulate_dfa().
References CDA_OK, cderrno, cl_free, cl_malloc(), cl_realloc(), _Matchlist::end, get_struc_attribute, _Matchlist::matches_whole_corpus, MIN, _Matchlist::start, and _Matchlist::tabsize.
Referenced by eval_mu_tree().
int next_environment | ( | void | ) |
Sets up a new environment in the global array.
The next slot upwards is used (and eep is incremented).
References ctxtsp::attrib, CurEnv, ctxtsp::direction, eep, Environment, evalenv::evaltree, evalenv::gconstraint, evalenv::has_target_indicator, init_dfa(), evalenv::labels, leftright, evalenv::match_label, evalenv::matchend_label, MAXENVIRONMENT, evalenv::MaxPatIndex, evalenv::negated, new_symbol_table(), evalenv::query_corpus, evalenv::search_context, ctxtsp::size, evalenv::target_label, ctxtsp::type, and word.
Referenced by ActivateCorpus(), after_CorpusSetExpr(), prepare_AlignmentConstraints(), and prepare_Query().
int nr_positions | ( | CorpusList * | cp | ) |
Counts the number of token positions encompassed by all members of the ->range array of the CorpusList argument.
That is, in oher words, it tells you the size of this corpus.
References _Range::end, cl::range, cl::size, and _Range::start.
Referenced by compose_kwic_line(), red_factor(), and remember_this_position().
float red_factor | ( | CorpusList * | cp, |
int * | nr_pos | ||
) |
References access_corpus(), ATT_POS, cl::corpus, DEFAULT_ATT_NAME, find_attribute, cl::mother_size, nr_positions(), and cl::size.
Referenced by matchfirstpattern().
void set_corpus_matchlists | ( | CorpusList * | cp, |
Matchlist * | matchlist, | ||
int | nr_lists, | ||
int | keep_old_ranges | ||
) |
Set the appropriate values to the corpus id (given by its pointer to the symbol table).
References cl_free, cl_malloc(), _Matchlist::end, _Range::end, cl::keywords, cl::range, RangeSetop(), RReduce, cl::size, cl::sortidx, _Matchlist::start, _Range::start, _Matchlist::tabsize, _Matchlist::target_positions, and cl::targets.
Referenced by cqp_run_mu_query(), cqp_run_tab_query(), and simulate_dfa().
void show_environment | ( | int | thisenv | ) |
Prints the contents of an EvalEnvironment object to STDOUT.
Which bits of information are printed depends on which of a group of debugging-variables are set to true.
The EvalEnvironment to print is specified as an index into the global array (Environment).
thisenv | Index into Environment indicating which EvalEnvironment should be displayed. |
References eep, Environment, print_booltree(), print_evaltree(), show_compdfa, show_complete_dfa(), show_evaltree, show_gconstraints, show_patlist, and show_patternlist().
Referenced by debug_output().
void simulate | ( | Matchlist * | matchlist, |
int * | cut, | ||
int | start_state, | ||
int | start_offset, | ||
int * | state_vector, | ||
int * | target_vector, | ||
RefTab * | reftab_vector, | ||
RefTab * | reftab_target_vector, | ||
int | start_transition | ||
) |
References evalenv::aligned, _avs::anchor, Anchor, _avs::attr, calculate_rightboundary(), CheckForInterrupts(), _avs::con, cqpmessage(), debug_simulation, evalenv::dfa, dup_reftab(), dfa::E_State, _Range::end, _Matchlist::end, Error, eval_bool(), eval_constraint(), EvaluationIsRunning, dfa::Final, free_matchlist(), evalenv::gconstraint, get_reftab(), get_struc_attribute, evalenv::has_target_indicator, _avs::is_closing, _avs::is_target, LAB_DEFINED, LAB_RDAT, LAB_USED, _avs::label, evalenv::labels, longest_match, _avs::lookahead, evalenv::match_label, _avs::matchall, MatchAll, evalenv::matchend_label, matching_strategy, dfa::Max_Input, dfa::Max_States, MIN, Pattern, evalenv::patternlist, print_label_values(), progress_bar, progress_bar_percentage(), query_corpus, evalenv::query_corpus, cl::range, _label_entry::ref, reset_reftab(), _avs::right_boundary, evalenv::rp, evalenv::search_context, set_reftab(), cl::size, _Range::start, _Matchlist::start, strict_regions, symbol_table_iterator(), symbol_table_new_iterator(), symtab_debug, _Matchlist::tabsize, _avs::tag, Tag, evalenv::target_label, _Matchlist::target_positions, dfa::TransTable, and _avs::type.
Referenced by check_alignment_constraints(), and simulate_dfa().
void simulate_dfa | ( | int | envidx, |
int | cut, | ||
int | keep_old_ranges | ||
) |
simulate the dfa
References evalenv::aligned, check_alignment_constraints(), cl_malloc(), cqp, cqpmessage(), delete_reftab(), evalenv::dfa, dfa::E_State, eep, _Matchlist::end, Environment, Error, EvaluationIsRunning, dfa::Final, free_matchlist(), evalenv::has_target_indicator, Info, init_matchlist(), initial_matchlist_debug, install_signal_handler(), evalenv::labels, matchfirstpattern(), dfa::Max_Input, dfa::Max_States, new_reftab(), evalenv::patternlist, print_symbol_table(), progress_bar, progress_bar_clear_line(), progress_bar_message(), evalenv::query_corpus, Reduce, reset_reftab(), set_corpus_matchlists(), Setop(), show_matchlist(), show_matchlist_firstelements(), simulate(), cl::size, _Matchlist::start, _Matchlist::tabsize, _Matchlist::target_positions, dfa::TransTable, True, Union, Warning, and which_app.
Referenced by cqp_run_query().