Skip to content

Commit

Permalink
mk_parse_set(): Optimize for zero table counts
Browse files Browse the repository at this point in the history
  • Loading branch information
ampli committed Mar 27, 2024
1 parent e347855 commit 8130a52
Showing 1 changed file with 79 additions and 22 deletions.
101 changes: 79 additions & 22 deletions link-grammar/parse/extract-links.c
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,49 @@ static Parse_set* dummy_set(int lw, int rw,
return &dummy->set;
}

static count_t table_count(count_context_t * ctxt,
int lw, int rw, Connector *le, Connector *re,
unsigned int null_count)
{
/* This check is not necessary for correctness, but it saves CPU time.
* If a cross link would result, immediately return 0. Note that there is
* no need to check here if the nearest_word fields are in the range
* [lw, rw] due to the way start_word/end_word are computed, and due to
* nearest_word checks in form_match_list(). */
if ((le != NULL) && (re != NULL) && (le->nearest_word > re->nearest_word))
return 0;

Count_bin *count = table_lookup(ctxt, lw, rw, le, re, null_count, NULL);

/* If there's no counter, then there's no way to parse. */
if (NULL == count) return 0;

/* hist_total() from histogram.h is used here, but note that anyway
* defining PERFORM_COUNT_HISTOGRAMMING=1 will break the code in this
* file due to incompatibilities caused when count_t and w_count_t got
* introduced. One way to fix it is to define Count_bin to use
* count_t instead of w_count_t. There is no point to fix it for now. */
return hist_total(count);
}

static bool fetch_counts(count_context_t *ctxt, count_t count[4],
int ew, int w, Connector *e, Connector *c,
unsigned int null_count)
{
count[0] = table_count(ctxt, ew, w, e->next, c->next, null_count);

if (e->multi)
count[1] = table_count(ctxt, ew, w, e, c->next, null_count);

if (c->multi)
count[2] = table_count(ctxt, ew, w, e->next, c, null_count);

if (e->multi && c->multi)
count[3] = table_count(ctxt, ew, w, e, c, null_count);

return (count[0] > 0) || (count[1] > 0) || (count[2] > 0) || (count[3] > 0);
}

/**
* returns NULL if there are no ways to parse, or returns a pointer
* to a set structure representing all the ways to parse.
Expand All @@ -390,7 +433,7 @@ static Parse_set* dummy_set(int lw, int rw,
*/
static
Parse_set * mk_parse_set(fast_matcher_t *mchxt,
count_context_t * ctxt,
count_context_t * ctxt, count_t count,
int lw, int rw,
Connector *le, Connector *re, unsigned int null_count,
extractor_t * pex)
Expand All @@ -399,11 +442,9 @@ Parse_set * mk_parse_set(fast_matcher_t *mchxt,

assert(null_count < 0x7fff, "Called with null_count < 0.");

Count_bin *count = table_lookup(ctxt, lw, rw, le, re, null_count, NULL);

/* If there's no counter, then there's no way to parse. */
if (NULL == count) return NULL;
if (hist_total(count) == 0) return NULL;
if (count < 0)
count = table_count(ctxt, lw, rw, le, re, null_count);
if (count == 0) return NULL;

Pset_bucket *xtp = x_table_pointer(lw, rw, le, re, null_count, pex);

Expand All @@ -415,7 +456,7 @@ Parse_set * mk_parse_set(fast_matcher_t *mchxt,
xtp = x_table_store(lw, rw, le, re, null_count, pex);

/* The count we previously computed; it's non-zero. */
xtp->set.count = hist_total(count);
xtp->set.count = count;

//#define NUM_PARSES 4
// xtp->set.cost_cutoff = hist_cost_cutoff(count, NUM_PARSES);
Expand Down Expand Up @@ -448,7 +489,7 @@ Parse_set * mk_parse_set(fast_matcher_t *mchxt,
{
if (dis->left == NULL)
{
pset = mk_parse_set(mchxt, ctxt,
pset = mk_parse_set(mchxt, ctxt, -1,
w, rw, dis->right, NULL,
null_count-1, pex);
if (pset == NULL) continue;
Expand All @@ -459,7 +500,7 @@ Parse_set * mk_parse_set(fast_matcher_t *mchxt,
RECOUNT({xtp->set.recount += pset->recount;})
}
}
pset = mk_parse_set(mchxt, ctxt,
pset = mk_parse_set(mchxt, ctxt, -1,
w, rw, NULL, NULL,
null_count-1, pex);
if (pset != NULL)
Expand Down Expand Up @@ -553,33 +594,49 @@ Parse_set * mk_parse_set(fast_matcher_t *mchxt,
/* Now, we determine if (based on table only) we can see that
the current range is not parsable. */

for (i=0; i<4; i++) { ls[i] = rs[i] = NULL; }
count_t lcount[4] = { 0 }, rcount[4] = { 0 };
if (Lmatch)
Lmatch = fetch_counts(ctxt, lcount, lw, w, le, d->left, lnull_count);
if (Rmatch && (Lmatch || (le == NULL)))
Rmatch = fetch_counts(ctxt, rcount, w, rw, d->right, re, rnull_count);

count_t l_bnr = 0, r_bnl = 0;
if (Lmatch)
l_bnr = table_count(ctxt, w, rw, d->right, re, rnull_count);
else
{
if (!Rmatch) continue; /* Left and right counts are 0. */
if (le == NULL)
r_bnl = table_count(ctxt, lw, w, le, d->left, lnull_count);
}

for (i=0; i<4; i++) { ls[i] = rs[i] = NULL; }
if (Lmatch && (Rmatch || (l_bnr > 0)))
{
ls[0] = mk_parse_set(mchxt, ctxt,
ls[0] = mk_parse_set(mchxt, ctxt, lcount[0],
lw, w, le->next, d->left->next,
lnull_count, pex);

if (le->multi)
ls[1] = mk_parse_set(mchxt, ctxt,
ls[1] = mk_parse_set(mchxt, ctxt, lcount[1],
lw, w, le, d->left->next,
lnull_count, pex);

if (d->left->multi)
ls[2] = mk_parse_set(mchxt, ctxt,
ls[2] = mk_parse_set(mchxt, ctxt, lcount[2],
lw, w, le->next, d->left,
lnull_count, pex);

if (le->multi && d->left->multi)
ls[3] = mk_parse_set(mchxt, ctxt,
ls[3] = mk_parse_set(mchxt, ctxt, lcount[3],
lw, w, le, d->left,
lnull_count, pex);

if (ls[0] != NULL || ls[1] != NULL || ls[2] != NULL || ls[3] != NULL)
{
ls_exists = true;
/* Evaluate using the left match, but not the right */
Parse_set* rset = mk_parse_set(mchxt, ctxt,
Parse_set* rset = mk_parse_set(mchxt, ctxt, l_bnr,
w, rw, d->right, re,
rnull_count, pex);
if (rset != NULL)
Expand All @@ -598,24 +655,24 @@ Parse_set * mk_parse_set(fast_matcher_t *mchxt,
}
}

if (Rmatch && (ls_exists || le == NULL))
if (Rmatch && (ls_exists || (r_bnl > 0)))
{
rs[0] = mk_parse_set(mchxt, ctxt,
rs[0] = mk_parse_set(mchxt, ctxt, rcount[0],
w, rw, d->right->next, re->next,
rnull_count, pex);

if (d->right->multi)
rs[1] = mk_parse_set(mchxt, ctxt,
rs[1] = mk_parse_set(mchxt, ctxt, rcount[1],
w, rw, d->right, re->next,
rnull_count, pex);

if (re->multi)
rs[2] = mk_parse_set(mchxt, ctxt,
rs[2] = mk_parse_set(mchxt, ctxt, rcount[2],
w, rw, d->right->next, re,
rnull_count, pex);

if (d->right->multi && re->multi)
rs[3] = mk_parse_set(mchxt, ctxt,
rs[3] = mk_parse_set(mchxt, ctxt, rcount[3],
w, rw, d->right, re,
rnull_count, pex);

Expand All @@ -624,7 +681,7 @@ Parse_set * mk_parse_set(fast_matcher_t *mchxt,
if (le == NULL)
{
/* Evaluate using the right match, but not the left */
Parse_set* lset = mk_parse_set(mchxt, ctxt,
Parse_set* lset = mk_parse_set(mchxt, ctxt, r_bnl,
lw, w, le, d->left,
lnull_count, pex);

Expand Down Expand Up @@ -726,7 +783,7 @@ bool build_parse_set(extractor_t* pex, Sentence sent,
pex->islands_ok = opts->islands_ok;

pex->parse_set =
mk_parse_set(mchxt, ctxt,
mk_parse_set(mchxt, ctxt, -1,
-1, sent->length, NULL, NULL, null_count+1, pex);

return set_overflowed(pex);
Expand Down

0 comments on commit 8130a52

Please sign in to comment.