CWB
|
#include <ctype.h>
#include "../cl/globals.h"
#include "../cl/cl.h"
#include "../cl/corpus.h"
#include "../cl/attributes.h"
#define MAX_ATTRS 1024 |
Maximum number of attributes that can be printed.
Referenced by decode_add_attribute().
#define MAX_PRINT_VALUES 1024 |
Maximum number of attributes whose "surrounding values" can be printed in matchlist mode.
Referenced by main().
typedef enum _output_modes OutputMode |
enum _output_modes |
int decode_add_attribute | ( | Attribute * | attr | ) |
Adds a specified Attribute to the global print_list array.
Aborts the program if that array is already full.
References _Attribute::any, corpus_id, decode_attribute_is_in_list(), decode_cleanup(), MAX_ATTRS, and print_list_index.
Referenced by main().
Determines whether or not a given Attribute is in an array of Attributes.
attr | The attribute to look for. |
att_list | Pointer to the first member of the array (i.e. array name). |
att_list_size | Upper bound of the array (the last member the function checks is attlist[attlist_size-1]). |
Referenced by decode_add_attribute(), and decode_verify_print_value_list().
void decode_cleanup | ( | int | error_code | ) |
Cleans up memory prior to an error-prompted exit.
error_code | Value to be returned by the program when it exits. |
References cl_delete_corpus().
Referenced by decode_add_attribute(), decode_print_token_sequence(), decode_usage(), and main().
void decode_print_surrounding_s_att_values | ( | int | position | ) |
Prints a starting tag for each s-attribute.
References _Attribute::any, cl_cpos2struc(), cl_struc2str(), ConclineMode, decode_string_escape(), EncodeMode, LispMode, mode, printValuesIndex, StandardMode, and XMLMode.
Referenced by main().
void decode_print_token_sequence | ( | int | start_position, |
int | end_position, | ||
Attribute * | context | ||
) |
Prints out the requested attributes for a sequence of tokens (or a single token if end_position == -1).
If the -c flag was used (and, thus, the context parameter is not NULL), then the sequence is extended to the entire s-attribute region (in matchlist mode).
References SAttRegion::annot, _Attribute::any, ATT_ALIGN, ATT_DYN, ATT_POS, ATT_STRUC, CDA_OK, cl_alg2cpos(), cl_cpos2alg(), cl_cpos2str(), cl_cpos2struc(), cl_cpos2struc2cpos(), cl_errno, cl_error(), cl_struc2cpos(), cl_struc2str(), cl_struc_values(), ConclineMode, decode_cleanup(), decode_sort_s_att_regions(), decode_string_escape(), EncodeMode, SAttRegion::end, LispMode, mode, N_sar, SAttRegion::name, print_list_index, printnum, sar_sort_index, StandardMode, SAttRegion::start, _Attribute::type, and XMLMode.
Referenced by main().
void decode_print_xml_declaration | ( | void | ) |
void decode_sort_s_att_regions | ( | void | ) |
Sorts s_att_regions[MAX_ATTRS] in ascending 'nested' order, using sar_sort_index[] (which is automatically initialised).
Since only regions which begin or end at the current token are considered, such an ordering is always possible; without knowing the current token, we sort by end position descending, then by start position ascending, which gives us:
The function uses bubble sort in order to retain the existing order of identical regions.
References SAttRegion::end, N_sar, sar_sort_index, and SAttRegion::start.
Referenced by decode_print_token_sequence().
char* decode_string_escape | ( | const char * | s | ) |
Escapes a string according to the currently active global mode.
In XMLMode, this function converts the string to an encoded XML string; all 'critical' characters are replaced by entity references, and C0 control characters are replaced with blanks. (This also happens in other modes - i.e. compact - if the global xml_compatible variable is true.)
In LispMode, it converts the string to a Lisp string with the required escapes (probably!)
In any other mode, it does nothing, and just returns the argument pointer.
It is safe to use this function without checking for a NULL argument, as NULLs will just be returned as NULLs.
Warning: returns pointer to static internal buffer of fixed size; in particular, don't use it twice in a single argument list!
s | String to encode. |
References CL_MAX_LINE_LENGTH, LispMode, mode, xml_compatible, and XMLMode.
Referenced by decode_print_surrounding_s_att_values(), and decode_print_token_sequence().
void decode_usage | ( | int | exit_code | ) |
Prints a usage message and exits the program.
exit_code | Value to be returned by the program when it exits. |
References decode_cleanup(), progname, and VERSION.
Referenced by main().
void decode_verify_print_value_list | ( | void | ) |
Check the context of the global printValues array, to check that no s-attribute in it is declared more in the main print_list_index as well.
If an attribute is found to be declared in nboth, a warning is printed.
References corpus_id, decode_attribute_is_in_list(), print_list_index, and printValuesIndex.
Referenced by main().
int is_num | ( | char * | s | ) |
Check whether a string represents a number.
s | The string to check. |
Referenced by main().
int main | ( | int | argc, |
char ** | argv | ||
) |
Main function for cwb-decode.
argc | Number of command-line arguments. |
argv | Command-line arguments. |
References _Attribute::any, ATT_ALIGN, ATT_POS, ATT_STRUC, TCorpus::attributes, cl_max_cpos(), CL_MAX_LINE_LENGTH, cl_new_attribute, cl_new_corpus(), cl_standard_registry(), cl_struc_values(), ConclineMode, corpus_id, decode_add_attribute(), decode_cleanup(), decode_print_surrounding_s_att_values(), decode_print_token_sequence(), decode_print_xml_declaration(), decode_usage(), decode_verify_print_value_list(), EncodeMode, first_token, input_file, input_filename, is_num(), last, LispMode, MAX_PRINT_VALUES, maxlast, mode, printnum, printValuesIndex, progname, registry_directory, xml_compatible, and XMLMode.
char* corpus_id = NULL |
int first_token |
cpos of token to begin output at
Referenced by main().
int last |
cpos of token to end output at (inclusive; ie this one gets printed!)
Referenced by ascii_print_output(), cl_path_get_component(), do_cqi_cqp_dump_subcorpus(), do_cut(), encode_strtok(), html_print_output(), latex_print_output(), main(), print_tabulation(), and sgml_print_output().
OutputMode mode = StandardMode |
global variable for overall output mode
Referenced by decode_print_surrounding_s_att_values(), decode_print_token_sequence(), decode_string_escape(), and main().
int N_sar = 0 |
number of regions currently in list (may change for each token printed)
Attribute* print_list[MAX_ATTRS] |
array of attributes selected by user for printing
int print_list_index = 0 |
Number of atts added to print_list (so far); used with less-than, = top limit for scrolling that array.
Referenced by decode_add_attribute(), decode_print_token_sequence(), and decode_verify_print_value_list().
int printnum = 0 |
whether or not token numbers are to be printed (-n option)
Referenced by decode_print_token_sequence(), and main().
Attribute* printValues[MAX_PRINT_VALUES] |
List of s-attributes whose values are to be printed.
int printValuesIndex = 0 |
Number of atts added to printValues (so far); used with less-than, = top limit for scrolling that array.
Referenced by decode_print_surrounding_s_att_values(), decode_verify_print_value_list(), and main().
char* progname = NULL |
char* registry_directory = NULL |
SAttRegion s_att_regions[MAX_ATTRS] |
int sar_sort_index[MAX_ATTRS] |
index used for bubble-sorting list of regions
int xml_compatible = 0 |
xml-style, for (cwb-encode -x ...); EncodeMode only, selected by -Cx
Referenced by decode_string_escape(), and main().