CWB
|
#include <ctype.h>
#include <sys/types.h>
#include "globals.h"
#include "endian.h"
#include "macros.h"
#include "storage.h"
#include "fileutils.h"
#include "corpus.h"
#include "attributes.h"
#include "cdaccess.h"
#include "makecomps.h"
#define BUFSIZE 0x10000 |
Referenced by creat_freqs().
int creat_freqs | ( | Component * | freqs | ) |
creates frequency table component.
References TComponent::attribute, BUFSIZE, CompCorpus, CompCorpusFreqs, CompLexiconIdx, component_full_name(), TMblob::data, TComponent::data, ensure_component(), find_component(), TComponent::id, MALLOCED, TComponent::path, read_file_into_blob(), TComponent::size, TMblob::size, and write_file_from_blob().
Referenced by create_component().
int creat_rev_corpus | ( | Component * | revcorp | ) |
Creates a reversed corpus component.
This function should only be invoked by the makeall tool (via create_component()), which must make sure that the lexicon and (possibly) compressed token stream have been created by now, so CL access to the token stream works.
References TComponent::attribute, cl_cpos2id(), cl_debug, cl_free, cl_id2freq(), cl_malloc(), cl_max_cpos(), cl_max_id(), cl_memory_limit, CompCorpusFreqs, TComponent::corpus, TMblob::data, TComponent::data, ensure_component(), NwriteInt(), NwriteInts(), and TComponent::path.
Referenced by create_component().
int creat_rev_corpus_idx | ( | Component * | revcidx | ) |
creates index for reversed corpus
References TMblob::allocation_method, TComponent::attribute, TMblob::changed, cl_malloc(), CompCorpusFreqs, TComponent::corpus, TMblob::data, TComponent::data, ensure_component(), TMblob::fname, TMblob::fsize, TMblob::item_size, MALLOCED, TMblob::nr_items, TMblob::offset, TComponent::path, TComponent::size, TMblob::size, SIZE_INT, write_file_from_blob(), and TMblob::writeable.
Referenced by create_component().
int creat_sort_lexicon | ( | Component * | lexsrt | ) |
creates a sorted index from the (already existing) lexicon index of the Attribute.
References TComponent::attribute, comp_component_state(), CompLexicon, CompLexiconIdx, ComponentDefined, TMblob::data, TComponent::data, ensure_component(), TMblob::item_size, MALLOCED, TMblob::nr_items, TComponent::path, read_file_into_blob(), scompare(), TComponent::size, TMblob::size, and write_file_from_blob().
Referenced by create_component().
static int scompare | ( | const void * | idx1, |
const void * | idx2 | ||
) | [static] |
Sorts two lexicon entries using cl_strcmp.
This function is for use with qsort().
References cl_strcmp(), and TMblob::data.
Referenced by creat_sort_lexicon().
char errmsg[CL_MAX_LINE_LENGTH] |
MemBlob* SortLexicon [static] |