diff --git a/.gitignore b/.gitignore index 6ddc45c129..2a6ec1f412 100644 --- a/.gitignore +++ b/.gitignore @@ -57,3 +57,4 @@ data/th/old_words/** # a.wasm is created by "emcc" (in "emconfigure ./configure"). # For now just ignore it. a.wasm +Session.vim diff --git a/bindings/python-examples/tests.py b/bindings/python-examples/tests.py index 0dc9ece04e..cc9ab441da 100755 --- a/bindings/python-examples/tests.py +++ b/bindings/python-examples/tests.py @@ -1090,12 +1090,12 @@ def test_no_op_resolving(self): def test_resolving(self): """ - Test expression resolving using the default headline:4 setting from + Test expression resolving using the default headline:99 setting from data/en/4.0.dialect. """ dictnode = clg.dictionary_lookup_list(self.d._obj, sm('book.n')) exp_old = dictnode[0].exp - exp_new = clg.lg_exp_resolve(self.d._obj, exp_old, ParseOptions()._obj) # headline:4 + exp_new = clg.lg_exp_resolve(self.d._obj, exp_old, ParseOptions()._obj) # headline:99 # Find the 2 locations with a difference when comparing # exp_old to exp_new and validate them. diff --git a/bindings/python/Makefile.am b/bindings/python/Makefile.am index 11e2f5a42d..878c47b3b2 100644 --- a/bindings/python/Makefile.am +++ b/bindings/python/Makefile.am @@ -51,7 +51,7 @@ if HAVE_SWIG # Swig builds these .... $(built_c_sources) $(built_py_sources): $(SWIG_INCLUDES) $(built_c_sources) $(built_py_sources): $(SWIG_SOURCES) - $(AM_V_GEN) $(SWIG) -python -py3 -module clinkgrammar -I$(top_srcdir)/link-grammar -o $@ $< + $(AM_V_GEN) $(SWIG) -python -module clinkgrammar -I$(top_srcdir)/link-grammar -o $@ $< else $(built_c_sources) $(built_py_sources): touch $(built_c_sources) $(built_py_sources) diff --git a/debug/README.md b/debug/README.md index 0792f58e75..1710e5f8a7 100644 --- a/debug/README.md +++ b/debug/README.md @@ -122,7 +122,7 @@ messages.) Or, in order to display the word array: -`link-parser -v=7 -debug=tokenize.c,print_sentence_word_alternatives` +`link-parser -v=8 -debug=build_sentence_expressions,print_sentence_word_alternatives` 5) Debug post-processing: diff --git a/link-grammar/api-structures.h b/link-grammar/api-structures.h index c308401a61..3924a947bb 100644 --- a/link-grammar/api-structures.h +++ b/link-grammar/api-structures.h @@ -55,10 +55,10 @@ * included tracon sharing) should always be done. And now the overhead * is negligible. * - * Note: setting this to non-zero values disables some of the more + * Note: setting this to 254(MAX_SENTENCE) disables some of the more * subtle tracon encoding code, and thus can be used to create a - * baseline parse, skipping that code. This can be setin with the - * test_enabled("min-len-encoding") flag (see api.c) + * baseline parse, skipping that code. This can be done using + * -test="min-len-encoding:254" (see sentence.c). */ #define SENTENCE_MIN_LENGTH_TRAILING_HASH 0 diff --git a/link-grammar/connectors.h b/link-grammar/connectors.h index b46aca1731..b6c4bc5bf1 100644 --- a/link-grammar/connectors.h +++ b/link-grammar/connectors.h @@ -14,10 +14,10 @@ #ifndef _LINK_GRAMMAR_CONNECTORS_H_ #define _LINK_GRAMMAR_CONNECTORS_H_ -#include // for islower() +#include // islower() #include #include -#include // for uint8_t +#include // uint8_t ... #include "api-types.h" #include "error.h" @@ -100,6 +100,9 @@ struct condesc_struct }; typedef struct condesc_struct condesc_t; +/* Length-limits for how far connectors can reach out. */ +#define UNLIMITED_LEN 255 + typedef struct length_limit_def { const char *defword; @@ -145,17 +148,17 @@ struct Connector_struct Connector *next; union { - const gword_set *originating_gword; /* Used while and after parsing */ + const gword_set *originating_gword; /* Used while and after parsing. */ struct { - int32_t refcount;/* Memory-sharing reference count - for pruning. */ - uint16_t exp_pos; /* The position in the originating expression, - currently used only for debugging dict macros. */ - bool shallow; /* TRUE if this is a shallow connector. - * A connectors is shallow if it is the first in - * its list on its disjunct. (It is deep if it is - * not the first in its list; it is deepest if it - * is the last on its list.) */ + int32_t refcount; /* Memory-sharing reference count - for pruning. */ + uint16_t exp_pos; /* The position in the originating expression, + * currently used only for debugging dict macros. */ + bool shallow; /* TRUE if this is a shallow connector. + * A connector is shallow if it is the first in + * its list on its disjunct. (It is deep if it is + * not the first in its list; it is deepest if it + * is the last on its list.) */ }; }; }; @@ -225,9 +228,6 @@ static inline Connector *connector_deepest(const Connector *c) return (Connector *)c; /* Note: Constness removed. */ } -/* Length-limits for how far connectors can reach out. */ -#define UNLIMITED_LEN 255 - /** * Returns TRUE if s and t match according to the connector matching * rules. The connector strings must be properly formed, starting with @@ -383,17 +383,20 @@ static inline size_t pair_hash(int lw, int rw, /** * Get the word number of the given tracon. - * c is the leading tracon connector. The word number is extracted from - * the nearest_word of the deepest connector. + * It is extracted from the nearest_word of the deepest connector. + * @param c The leading tracon connector. + * @param dir Direction - 0: left; 1: right. + * @return Sentence word number. + * * This function depends on setup_connectors() (which initializes - * nearest_word). It should not be called after power_prune() (which - * changes nearest_word). + * nearest_word). It should not be called during or after power_prune() + * (which changes nearest_word). * * Note: An alternative for getting the word number of a tracon is to keep * it in the tracon list table or in a separate array. Both ways add * noticeable overhead, maybe due to the added CPU cache footprint. - * However, if the word number will be needed after power_prune() there - * will be a need to keep it in an alternative way. + * However, if the need arises for the word number of a tracon during/after + * power_prune(), there will be a need to keep it in an alternative way. */ static inline int get_tracon_word_number(Connector *c, int dir) { diff --git a/link-grammar/dict-common/dict-common.h b/link-grammar/dict-common/dict-common.h index 4f4bba989a..543a598467 100644 --- a/link-grammar/dict-common/dict-common.h +++ b/link-grammar/dict-common/dict-common.h @@ -14,6 +14,8 @@ #ifndef _LG_DICT_COMMON_H_ #define _LG_DICT_COMMON_H_ +#include // INT_MAX + #include "api-types.h" // pp_knowledge #include "connectors.h" // ConTable #include "dict-defines.h" @@ -34,7 +36,7 @@ * is used. */ static const float UNINITIALIZED_MAX_DISJUNCT_COST = -10000.0f; static const float DEFAULT_MAX_DISJUNCT_COST = 2.7f; -static const float UNINITIALIZED_MAX_DISJUNCTS = -1; +static const int UNINITIALIZED_MAX_DISJUNCTS = INT_MAX; /* We need some of these as literal strings. */ #define LG_DICTIONARY_VERSION_NUMBER "dictionary-version-number" diff --git a/link-grammar/dict-common/regex-morph.c b/link-grammar/dict-common/regex-morph.c index 5bd76e0b97..d3a3914201 100644 --- a/link-grammar/dict-common/regex-morph.c +++ b/link-grammar/dict-common/regex-morph.c @@ -347,7 +347,7 @@ static void reg_free(Regex_node *rn) /** * Check the specified capture group of the pattern (if any). - * Return true if no capture group specified if it is valid, + * Return true if no capture group specified or if it is valid, * and -1 on error. * * Algo: Append the specified capture group specification to the pattern diff --git a/link-grammar/dict-file/dictionary.c b/link-grammar/dict-file/dictionary.c index 9feedee2ad..e2252d4005 100644 --- a/link-grammar/dict-file/dictionary.c +++ b/link-grammar/dict-file/dictionary.c @@ -132,7 +132,7 @@ static bool load_regexes(Dictionary dict, const char *regex_name) } /** - * Read dictionary entries from a wide-character string "input". + * Read dictionary entries from a utf-8 string "input". * All other parts are read from files. */ #define D_DICT 10 diff --git a/link-grammar/disjunct-utils.c b/link-grammar/disjunct-utils.c index 98b629c052..b5e456ab16 100644 --- a/link-grammar/disjunct-utils.c +++ b/link-grammar/disjunct-utils.c @@ -265,7 +265,7 @@ static bool disjuncts_equal(Disjunct * d1, Disjunct * d2, bool ignore_string) /* A shortcut to detect NULL and non-NULL jets on the same side. * Note that it is not possible to share memory between the - * right/left jets due to filed value differences (sharing would + * right/left jets due to field value differences (sharing would * invalidate this check). */ if (d1->left == d2->right) return false; diff --git a/link-grammar/linkage/linkage.c b/link-grammar/linkage/linkage.c index 62f85aa6d5..7b61760fbe 100644 --- a/link-grammar/linkage/linkage.c +++ b/link-grammar/linkage/linkage.c @@ -127,10 +127,9 @@ static Gword *wordgraph_null_join(Sentence sent, Gword **start, Gword **end) * Add a display wordgraph placeholder for a combined morpheme with links * that are not discardable. * This is needed only when hiding morphology. This is a kind of a hack. - * It it is not deemed nice, the "hide morphology" mode should just not be + * If it is not deemed nice, the "hide morphology" mode should just not be * used for languages with morphemes which have links that cannot be * discarded on that mode (like Hebrew). - * Possible FIXME: Currently it is also used by w/ in English. */ static Gword *wordgraph_link_placeholder(Sentence sent, Gword *w) { diff --git a/link-grammar/memory-pool.c b/link-grammar/memory-pool.c index 5e78959958..4a4d9272e8 100644 --- a/link-grammar/memory-pool.c +++ b/link-grammar/memory-pool.c @@ -44,7 +44,7 @@ static size_t align_size(size_t element_size) * Create a memory pool descriptor. * 1. If required, set the allocation size to a power of 2 of the element size. * 2. Save the given parameters in the pool descriptor, to be used by - * pool_alloc(); + * pool_alloc_vec(); * 3. Chain the pool descriptor to the given pool_list, so it can be * automatically freed. [ Not implemented. ] */ diff --git a/link-grammar/parse/count.c b/link-grammar/parse/count.c index 16ddb10cd1..4573dbfcc1 100644 --- a/link-grammar/parse/count.c +++ b/link-grammar/parse/count.c @@ -31,7 +31,7 @@ /* This file contains the exhaustive search algorithm. */ -#define D_COUNT 5 /* General debug level for this file. */ +#define D_COUNT 5 /* General debug level for this file */ typedef uint8_t null_count_m; /* Storage representation of null_count */ typedef uint8_t WordIdx_m; /* Storage representation of word index */ @@ -39,8 +39,9 @@ typedef uint8_t WordIdx_m; /* Storage representation of word index */ /* Allow to disable the use of the various caches (for debug). */ const bool ENABLE_WORD_SKIP_VECTOR = true; const bool ENABLE_MATCH_LIST_CACHE = true; -const bool ENABLE_TABLE_LRCNT = true; // Also controls the above two caches. -const bool USE_TABLE_TRACON = true; // The table is always maintained. +const bool ENABLE_TABLE_LRCNT = true; // Also controls the above two caches. +const bool USE_TABLE_TRACON = true; // The table is always maintained. +const bool USE_PSEUDOCOUNT = true; // Controls only the non-cyclic solutions. typedef struct Table_tracon_s Table_tracon; struct Table_tracon_s @@ -571,7 +572,7 @@ static Count_bin table_store(count_context_t *ctxt, if (!USE_TABLE_TRACON) { - // In case a table count already exist, check its consistency. + // In case a table count already exists, check its consistency. Count_bin *e = table_lookup(ctxt, lw, rw, le, re, null_count, NULL); if (e != NULL) { @@ -971,6 +972,7 @@ static Count_bin table_count(count_context_t * ctxt, return *count; } +#ifdef USE_PSEUDOCOUNT /** * Check to see if a parse is even possible, so that we don't later waste * CPU time performing an actual count, only to discover that it is zero. @@ -1007,6 +1009,7 @@ static bool pseudocount(count_context_t * ctxt, Count_bin *count, return false; } +#endif // USE_PSEUDOCOUNT /** * Return the number of optional words strictly between w1 and w2. @@ -1529,18 +1532,23 @@ static Count_bin do_count(const char dlabel[], count_context_t *ctxt, * lookup can be skipped in cases we cannot skip the actual * calculation and a table entry exists. */ Count_bin lcount[4] = { NO_COUNT, NO_COUNT, NO_COUNT, NO_COUNT }; + Count_bin rcount[4] = { NO_COUNT, NO_COUNT, NO_COUNT, NO_COUNT }; +#ifdef USE_PSEUDOCOUNT if (Lmatch && !leftpcount) { leftpcount = pseudocount(ctxt, lcount, lw, w, le, d->left, lnull_cnt); } - Count_bin rcount[4] = { NO_COUNT, NO_COUNT, NO_COUNT, NO_COUNT }; if (Rmatch && !rightpcount && (leftpcount || (le == NULL))) { rightpcount = pseudocount(ctxt, rcount, w, rw, d->right, re, rnull_cnt); } +#else + leftpcount = Lmatch; + rightpcount = Rmatch; +#endif // USE_PSEUDOCOUNT /* Perform a table lookup for a possible cyclic solution. */ if (leftpcount) diff --git a/link-grammar/prepare/build-disjuncts.c b/link-grammar/prepare/build-disjuncts.c index 84d1c37b3b..45ac3db330 100644 --- a/link-grammar/prepare/build-disjuncts.c +++ b/link-grammar/prepare/build-disjuncts.c @@ -129,7 +129,7 @@ static void debug_last(Clause *c, Clause **c_last, const char *type) * Return the number of clauses that would be generated by expanding * the expression. */ -static unsigned long count_clauses(Exp *e) +GNUC_UNUSED static unsigned long count_clauses(Exp *e) { if (e->type == AND_type) { @@ -350,7 +350,7 @@ Disjunct *build_disjuncts_for_exp(Sentence sent, Exp* exp, const char *word, pool_reuse(ct.Clause_pool); pool_reuse(ct.Tconnector_pool); - /* We are done, in the concvetional case. */ + /* We are done, in the convectional case. */ if (NULL == opts || 0 == opts->max_disjuncts) return dis; /* If there are more than the allowed number of disjuncts, @@ -358,9 +358,10 @@ Disjunct *build_disjuncts_for_exp(Sentence sent, Exp* exp, const char *word, * with uniform weighting; no attempt to look at the cost * is made. A fancier algo might selectively choose those * with lower cost. + * We don't care for now that this doesn't work if discnt > INT_MAX. */ - unsigned int maxdj = opts->max_disjuncts; - unsigned int discnt = count_disjuncts(dis); + int maxdj = opts->max_disjuncts; + int discnt = count_disjuncts(dis); if (discnt < maxdj) return dis; /* If we are here, we need to trim down the list */ @@ -369,7 +370,7 @@ Disjunct *build_disjuncts_for_exp(Sentence sent, Exp* exp, const char *word, Disjunct *ktail = dis; for (Disjunct *d = dis->next; d != NULL; d=d->next) { - unsigned int pick = rand_r(&rst) % discnt; + int pick = rand_r(&rst) % discnt; if (pick < maxdj) { ktail->next = d; diff --git a/link-grammar/tokenize/tokenize.c b/link-grammar/tokenize/tokenize.c index a3fdca9b67..613a0ab887 100644 --- a/link-grammar/tokenize/tokenize.c +++ b/link-grammar/tokenize/tokenize.c @@ -3124,12 +3124,7 @@ static Word *word_new(Sentence sent) const size_t len = sent->length; sent->word = realloc(sent->word, (len+1)*sizeof(*sent->word)); - sent->word[len].d = NULL; - sent->word[len].x = NULL; - sent->word[len].unsplit_word = NULL; - sent->word[len].alternatives = NULL; - sent->word[len].gwords = NULL; - sent->word[len].optional = false; + memset(&sent->word[len], 0, sizeof(sent->word[0])); sent->length++; return &sent->word[len]; diff --git a/link-grammar/tokenize/word-structures.h b/link-grammar/tokenize/word-structures.h index e6a8007e30..c4efcb75ac 100644 --- a/link-grammar/tokenize/word-structures.h +++ b/link-grammar/tokenize/word-structures.h @@ -43,7 +43,7 @@ struct Word_struct X_node * x; /* Sentence starts out with these, */ Disjunct * d; /* eventually these get generated. */ - uint32_t num_disjuncts; /* Length of above */ + uint32_t num_disjuncts; /* Length of above. */ bool optional; /* Linkage is optional. */ diff --git a/link-grammar/tracon-set.c b/link-grammar/tracon-set.c index 0847c57225..58575d760a 100644 --- a/link-grammar/tracon-set.c +++ b/link-grammar/tracon-set.c @@ -15,11 +15,12 @@ #include "const-prime.h" #include "connectors.h" +#include "tracon-set.h" +#include "utilities.h" + #ifdef TRACON_SET_DEBUG #include "disjunct-utils.h" // print_connector_list_str #endif -#include "tracon-set.h" -#include "utilities.h" /** * This is an adaptation of the string_set module for detecting unique diff --git a/link-parser/lg_readline.c b/link-parser/lg_readline.c index c7308a092f..d2e872ba54 100644 --- a/link-parser/lg_readline.c +++ b/link-parser/lg_readline.c @@ -64,8 +64,8 @@ static char *complete_command(const wchar_t *input, size_t len, bool is_help) const Switch **start = NULL; const Switch **end; const Switch **match; - const char *prev; - size_t addlen; + const char *prev = NULL; + size_t addlen = 0; bool is_assignment = false; /* marking for the help facility */ if ((1 < len) && L'=' == input[len-1] && !is_help) diff --git a/link-parser/link-generator.c b/link-parser/link-generator.c index 77d331a6a7..350cbf409e 100644 --- a/link-parser/link-generator.c +++ b/link-parser/link-generator.c @@ -61,7 +61,7 @@ typedef struct /* Originally, this program used argp, but now it uses getopt in * order to make the porting to MS Windows easy. The original * definitions are still being used here because they are more readable - * and the also allow easy a dynamic generation of an help message. + * and also allow an easy dynamic generation of a help message. * They are converted to getopt options. Only the minimal needed * conversion is done (e.g. flags are not supported). */ diff --git a/msvc/Python3.vcxproj b/msvc/Python3.vcxproj index 1e1b1e8d69..f5d78ba211 100644 --- a/msvc/Python3.vcxproj +++ b/msvc/Python3.vcxproj @@ -24,22 +24,22 @@ <nul set/p x="Invoking "& where.exe swig.exe echo on cd $(IntDir) -swig.exe -c++ -python -py3 -outdir $(OutDir) -module clinkgrammar -I..\..\..\link-grammar -o lg_python_wrap.cpp "%(FullPath)" +swig.exe -c++ -python -outdir $(OutDir) -module clinkgrammar -I..\..\..\link-grammar -o lg_python_wrap.cpp "%(FullPath)" %40echo off <nul set/p x="Invoking "& where.exe swig.exe echo on cd $(IntDir) -swig.exe -c++ -python -py3 -outdir $(OutDir) -module clinkgrammar -I..\..\..\link-grammar -o lg_python_wrap.cpp "%(FullPath)" +swig.exe -c++ -python -outdir $(OutDir) -module clinkgrammar -I..\..\..\link-grammar -o lg_python_wrap.cpp "%(FullPath)" %40echo off <nul set/p x="Invoking "& where.exe swig.exe echo on cd $(IntDir) -swig.exe -c++ -python -py3 -outdir $(OutDir) -module clinkgrammar -I..\..\..\link-grammar -o lg_python_wrap.cpp "%(FullPath)" +swig.exe -c++ -python -outdir $(OutDir) -module clinkgrammar -I..\..\..\link-grammar -o lg_python_wrap.cpp "%(FullPath)" %40echo off <nul set/p x="Invoking "& where.exe swig.exe echo on cd $(IntDir) -swig.exe -c++ -python -py3 -outdir $(OutDir) -module clinkgrammar -I..\..\..\link-grammar -o lg_python_wrap.cpp "%(FullPath)" +swig.exe -c++ -python -outdir $(OutDir) -module clinkgrammar -I..\..\..\link-grammar -o lg_python_wrap.cpp "%(FullPath)" %40echo off Generating Python3 wrapper ^& interface $(IntDir)\lg_python_wrap.cpp;$(OutDir)\clinkgrammar.py @@ -266,4 +266,4 @@ swig.exe -c++ -python -py3 -outdir $(OutDir) -module clinkgrammar -I..\..\..\lin - \ No newline at end of file +