Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Small fixes #1506

Merged
merged 11 commits into from
Apr 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -57,3 +57,4 @@ data/th/old_words/**
# a.wasm is created by "emcc" (in "emconfigure ./configure").
# For now just ignore it.
a.wasm
Session.vim
4 changes: 2 additions & 2 deletions bindings/python-examples/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1090,12 +1090,12 @@ def test_no_op_resolving(self):

def test_resolving(self):
"""
Test expression resolving using the default headline:4 setting from
Test expression resolving using the default headline:99 setting from
data/en/4.0.dialect.
"""
dictnode = clg.dictionary_lookup_list(self.d._obj, sm('book.n'))
exp_old = dictnode[0].exp
exp_new = clg.lg_exp_resolve(self.d._obj, exp_old, ParseOptions()._obj) # headline:4
exp_new = clg.lg_exp_resolve(self.d._obj, exp_old, ParseOptions()._obj) # headline:99

# Find the 2 locations with a difference when comparing
# exp_old to exp_new and validate them.
Expand Down
2 changes: 1 addition & 1 deletion bindings/python/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ if HAVE_SWIG
# Swig builds these ....
$(built_c_sources) $(built_py_sources): $(SWIG_INCLUDES)
$(built_c_sources) $(built_py_sources): $(SWIG_SOURCES)
$(AM_V_GEN) $(SWIG) -python -py3 -module clinkgrammar -I$(top_srcdir)/link-grammar -o $@ $<
$(AM_V_GEN) $(SWIG) -python -module clinkgrammar -I$(top_srcdir)/link-grammar -o $@ $<
else
$(built_c_sources) $(built_py_sources):
touch $(built_c_sources) $(built_py_sources)
Expand Down
2 changes: 1 addition & 1 deletion debug/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ messages.)

Or, in order to display the word array:

`link-parser -v=7 -debug=tokenize.c,print_sentence_word_alternatives`
`link-parser -v=8 -debug=build_sentence_expressions,print_sentence_word_alternatives`

5) Debug post-processing:

Expand Down
6 changes: 3 additions & 3 deletions link-grammar/api-structures.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,10 @@
* included tracon sharing) should always be done. And now the overhead
* is negligible.
*
* Note: setting this to non-zero values disables some of the more
* Note: setting this to 254(MAX_SENTENCE) disables some of the more
* subtle tracon encoding code, and thus can be used to create a
* baseline parse, skipping that code. This can be setin with the
* test_enabled("min-len-encoding") flag (see api.c)
* baseline parse, skipping that code. This can be done using
* -test="min-len-encoding:254" (see sentence.c).
*/
#define SENTENCE_MIN_LENGTH_TRAILING_HASH 0

Expand Down
43 changes: 23 additions & 20 deletions link-grammar/connectors.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
#ifndef _LINK_GRAMMAR_CONNECTORS_H_
#define _LINK_GRAMMAR_CONNECTORS_H_

#include <ctype.h> // for islower()
#include <ctype.h> // islower()
#include <stdlib.h>
#include <stdbool.h>
#include <stdint.h> // for uint8_t
#include <stdint.h> // uint8_t ...

#include "api-types.h"
#include "error.h"
Expand Down Expand Up @@ -100,6 +100,9 @@ struct condesc_struct
};
typedef struct condesc_struct condesc_t;

/* Length-limits for how far connectors can reach out. */
#define UNLIMITED_LEN 255

typedef struct length_limit_def
{
const char *defword;
Expand Down Expand Up @@ -145,17 +148,17 @@ struct Connector_struct
Connector *next;
union
{
const gword_set *originating_gword; /* Used while and after parsing */
const gword_set *originating_gword; /* Used while and after parsing. */
struct
{
int32_t refcount;/* Memory-sharing reference count - for pruning. */
uint16_t exp_pos; /* The position in the originating expression,
currently used only for debugging dict macros. */
bool shallow; /* TRUE if this is a shallow connector.
* A connectors is shallow if it is the first in
* its list on its disjunct. (It is deep if it is
* not the first in its list; it is deepest if it
* is the last on its list.) */
int32_t refcount; /* Memory-sharing reference count - for pruning. */
uint16_t exp_pos; /* The position in the originating expression,
* currently used only for debugging dict macros. */
bool shallow; /* TRUE if this is a shallow connector.
* A connector is shallow if it is the first in
* its list on its disjunct. (It is deep if it is
* not the first in its list; it is deepest if it
* is the last on its list.) */
};
};
};
Expand Down Expand Up @@ -225,9 +228,6 @@ static inline Connector *connector_deepest(const Connector *c)
return (Connector *)c; /* Note: Constness removed. */
}

/* Length-limits for how far connectors can reach out. */
#define UNLIMITED_LEN 255

/**
* Returns TRUE if s and t match according to the connector matching
* rules. The connector strings must be properly formed, starting with
Expand Down Expand Up @@ -383,17 +383,20 @@ static inline size_t pair_hash(int lw, int rw,

/**
* Get the word number of the given tracon.
* c is the leading tracon connector. The word number is extracted from
* the nearest_word of the deepest connector.
* It is extracted from the nearest_word of the deepest connector.
* @param c The leading tracon connector.
* @param dir Direction - 0: left; 1: right.
* @return Sentence word number.
*
* This function depends on setup_connectors() (which initializes
* nearest_word). It should not be called after power_prune() (which
* changes nearest_word).
* nearest_word). It should not be called during or after power_prune()
* (which changes nearest_word).
*
* Note: An alternative for getting the word number of a tracon is to keep
* it in the tracon list table or in a separate array. Both ways add
* noticeable overhead, maybe due to the added CPU cache footprint.
* However, if the word number will be needed after power_prune() there
* will be a need to keep it in an alternative way.
* However, if the need arises for the word number of a tracon during/after
* power_prune(), there will be a need to keep it in an alternative way.
*/
static inline int get_tracon_word_number(Connector *c, int dir)
{
Expand Down
4 changes: 3 additions & 1 deletion link-grammar/dict-common/dict-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
#ifndef _LG_DICT_COMMON_H_
#define _LG_DICT_COMMON_H_

#include <limits.h> // INT_MAX

#include "api-types.h" // pp_knowledge
#include "connectors.h" // ConTable
#include "dict-defines.h"
Expand All @@ -34,7 +36,7 @@
* is used. */
static const float UNINITIALIZED_MAX_DISJUNCT_COST = -10000.0f;
static const float DEFAULT_MAX_DISJUNCT_COST = 2.7f;
static const float UNINITIALIZED_MAX_DISJUNCTS = -1;
static const int UNINITIALIZED_MAX_DISJUNCTS = INT_MAX;

/* We need some of these as literal strings. */
#define LG_DICTIONARY_VERSION_NUMBER "dictionary-version-number"
Expand Down
2 changes: 1 addition & 1 deletion link-grammar/dict-common/regex-morph.c
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ static void reg_free(Regex_node *rn)

/**
* Check the specified capture group of the pattern (if any).
* Return true if no capture group specified if it is valid,
* Return true if no capture group specified or if it is valid,
* and -1 on error.
*
* Algo: Append the specified capture group specification to the pattern
Expand Down
2 changes: 1 addition & 1 deletion link-grammar/dict-file/dictionary.c
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ static bool load_regexes(Dictionary dict, const char *regex_name)
}

/**
* Read dictionary entries from a wide-character string "input".
* Read dictionary entries from a utf-8 string "input".
* All other parts are read from files.
*/
#define D_DICT 10
Expand Down
2 changes: 1 addition & 1 deletion link-grammar/disjunct-utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ static bool disjuncts_equal(Disjunct * d1, Disjunct * d2, bool ignore_string)

/* A shortcut to detect NULL and non-NULL jets on the same side.
* Note that it is not possible to share memory between the
* right/left jets due to filed value differences (sharing would
* right/left jets due to field value differences (sharing would
* invalidate this check). */
if (d1->left == d2->right) return false;

Expand Down
3 changes: 1 addition & 2 deletions link-grammar/linkage/linkage.c
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,9 @@ static Gword *wordgraph_null_join(Sentence sent, Gword **start, Gword **end)
* Add a display wordgraph placeholder for a combined morpheme with links
* that are not discardable.
* This is needed only when hiding morphology. This is a kind of a hack.
* It it is not deemed nice, the "hide morphology" mode should just not be
* If it is not deemed nice, the "hide morphology" mode should just not be
* used for languages with morphemes which have links that cannot be
* discarded on that mode (like Hebrew).
* Possible FIXME: Currently it is also used by w/ in English.
*/
static Gword *wordgraph_link_placeholder(Sentence sent, Gword *w)
{
Expand Down
2 changes: 1 addition & 1 deletion link-grammar/memory-pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ static size_t align_size(size_t element_size)
* Create a memory pool descriptor.
* 1. If required, set the allocation size to a power of 2 of the element size.
* 2. Save the given parameters in the pool descriptor, to be used by
* pool_alloc();
* pool_alloc_vec();
* 3. Chain the pool descriptor to the given pool_list, so it can be
* automatically freed. [ Not implemented. ]
*/
Expand Down
18 changes: 13 additions & 5 deletions link-grammar/parse/count.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,17 @@

/* This file contains the exhaustive search algorithm. */

#define D_COUNT 5 /* General debug level for this file. */
#define D_COUNT 5 /* General debug level for this file */

typedef uint8_t null_count_m; /* Storage representation of null_count */
typedef uint8_t WordIdx_m; /* Storage representation of word index */

/* Allow to disable the use of the various caches (for debug). */
const bool ENABLE_WORD_SKIP_VECTOR = true;
const bool ENABLE_MATCH_LIST_CACHE = true;
const bool ENABLE_TABLE_LRCNT = true; // Also controls the above two caches.
const bool USE_TABLE_TRACON = true; // The table is always maintained.
const bool ENABLE_TABLE_LRCNT = true; // Also controls the above two caches.
const bool USE_TABLE_TRACON = true; // The table is always maintained.
const bool USE_PSEUDOCOUNT = true; // Controls only the non-cyclic solutions.

typedef struct Table_tracon_s Table_tracon;
struct Table_tracon_s
Expand Down Expand Up @@ -571,7 +572,7 @@ static Count_bin table_store(count_context_t *ctxt,

if (!USE_TABLE_TRACON)
{
// In case a table count already exist, check its consistency.
// In case a table count already exists, check its consistency.
Count_bin *e = table_lookup(ctxt, lw, rw, le, re, null_count, NULL);
if (e != NULL)
{
Expand Down Expand Up @@ -971,6 +972,7 @@ static Count_bin table_count(count_context_t * ctxt,
return *count;
}

#ifdef USE_PSEUDOCOUNT
/**
* Check to see if a parse is even possible, so that we don't later waste
* CPU time performing an actual count, only to discover that it is zero.
Expand Down Expand Up @@ -1007,6 +1009,7 @@ static bool pseudocount(count_context_t * ctxt, Count_bin *count,

return false;
}
#endif // USE_PSEUDOCOUNT

/**
* Return the number of optional words strictly between w1 and w2.
Expand Down Expand Up @@ -1529,18 +1532,23 @@ static Count_bin do_count(const char dlabel[], count_context_t *ctxt,
* lookup can be skipped in cases we cannot skip the actual
* calculation and a table entry exists. */
Count_bin lcount[4] = { NO_COUNT, NO_COUNT, NO_COUNT, NO_COUNT };
Count_bin rcount[4] = { NO_COUNT, NO_COUNT, NO_COUNT, NO_COUNT };
#ifdef USE_PSEUDOCOUNT
if (Lmatch && !leftpcount)
{
leftpcount =
pseudocount(ctxt, lcount, lw, w, le, d->left, lnull_cnt);
}

Count_bin rcount[4] = { NO_COUNT, NO_COUNT, NO_COUNT, NO_COUNT };
if (Rmatch && !rightpcount && (leftpcount || (le == NULL)))
{
rightpcount =
pseudocount(ctxt, rcount, w, rw, d->right, re, rnull_cnt);
}
#else
leftpcount = Lmatch;
rightpcount = Rmatch;
#endif // USE_PSEUDOCOUNT

/* Perform a table lookup for a possible cyclic solution. */
if (leftpcount)
Expand Down
11 changes: 6 additions & 5 deletions link-grammar/prepare/build-disjuncts.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ static void debug_last(Clause *c, Clause **c_last, const char *type)
* Return the number of clauses that would be generated by expanding
* the expression.
*/
static unsigned long count_clauses(Exp *e)
GNUC_UNUSED static unsigned long count_clauses(Exp *e)
{
if (e->type == AND_type)
{
Expand Down Expand Up @@ -350,17 +350,18 @@ Disjunct *build_disjuncts_for_exp(Sentence sent, Exp* exp, const char *word,
pool_reuse(ct.Clause_pool);
pool_reuse(ct.Tconnector_pool);

/* We are done, in the concvetional case. */
/* We are done, in the convectional case. */
if (NULL == opts || 0 == opts->max_disjuncts) return dis;

/* If there are more than the allowed number of disjuncts,
* then randomly discard some of them. The discard is done
* with uniform weighting; no attempt to look at the cost
* is made. A fancier algo might selectively choose those
* with lower cost.
* We don't care for now that this doesn't work if discnt > INT_MAX.
*/
unsigned int maxdj = opts->max_disjuncts;
unsigned int discnt = count_disjuncts(dis);
int maxdj = opts->max_disjuncts;
int discnt = count_disjuncts(dis);
if (discnt < maxdj) return dis;

/* If we are here, we need to trim down the list */
Expand All @@ -369,7 +370,7 @@ Disjunct *build_disjuncts_for_exp(Sentence sent, Exp* exp, const char *word,
Disjunct *ktail = dis;
for (Disjunct *d = dis->next; d != NULL; d=d->next)
{
unsigned int pick = rand_r(&rst) % discnt;
int pick = rand_r(&rst) % discnt;
if (pick < maxdj)
{
ktail->next = d;
Expand Down
7 changes: 1 addition & 6 deletions link-grammar/tokenize/tokenize.c
Original file line number Diff line number Diff line change
Expand Up @@ -3124,12 +3124,7 @@ static Word *word_new(Sentence sent)
const size_t len = sent->length;

sent->word = realloc(sent->word, (len+1)*sizeof(*sent->word));
sent->word[len].d = NULL;
sent->word[len].x = NULL;
sent->word[len].unsplit_word = NULL;
sent->word[len].alternatives = NULL;
sent->word[len].gwords = NULL;
sent->word[len].optional = false;
memset(&sent->word[len], 0, sizeof(sent->word[0]));
sent->length++;

return &sent->word[len];
Expand Down
2 changes: 1 addition & 1 deletion link-grammar/tokenize/word-structures.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ struct Word_struct

X_node * x; /* Sentence starts out with these, */
Disjunct * d; /* eventually these get generated. */
uint32_t num_disjuncts; /* Length of above */
uint32_t num_disjuncts; /* Length of above. */

bool optional; /* Linkage is optional. */

Expand Down
5 changes: 3 additions & 2 deletions link-grammar/tracon-set.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@

#include "const-prime.h"
#include "connectors.h"
#include "tracon-set.h"
#include "utilities.h"

#ifdef TRACON_SET_DEBUG
#include "disjunct-utils.h" // print_connector_list_str
#endif
#include "tracon-set.h"
#include "utilities.h"

/**
* This is an adaptation of the string_set module for detecting unique
Expand Down
4 changes: 2 additions & 2 deletions link-parser/lg_readline.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@ static char *complete_command(const wchar_t *input, size_t len, bool is_help)
const Switch **start = NULL;
const Switch **end;
const Switch **match;
const char *prev;
size_t addlen;
const char *prev = NULL;
size_t addlen = 0;
bool is_assignment = false; /* marking for the help facility */

if ((1 < len) && L'=' == input[len-1] && !is_help)
Expand Down
2 changes: 1 addition & 1 deletion link-parser/link-generator.c
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ typedef struct
/* Originally, this program used argp, but now it uses getopt in
* order to make the porting to MS Windows easy. The original
* definitions are still being used here because they are more readable
* and the also allow easy a dynamic generation of an help message.
* and also allow an easy dynamic generation of a help message.
* They are converted to getopt options. Only the minimal needed
* conversion is done (e.g. flags are not supported).
*/
Expand Down
Loading