From 5239ce545868b57a75a38ef3f04b42a01f4aa381 Mon Sep 17 00:00:00 2001 From: Krasimir Angelov Date: Thu, 5 Sep 2024 14:01:22 +0200 Subject: [PATCH] an experimental left-corner table maker --- src/runtime/c/Makefile.am | 2 +- src/runtime/c/pgf/data.h | 93 +++++++- src/runtime/c/pgf/parser.cxx | 427 +++++++++++++++++++++++++++++++++++ src/runtime/c/pgf/parser.h | 21 ++ src/runtime/c/pgf/reader.cxx | 4 +- 5 files changed, 540 insertions(+), 7 deletions(-) diff --git a/src/runtime/c/Makefile.am b/src/runtime/c/Makefile.am index 3fd9b2383..142329221 100644 --- a/src/runtime/c/Makefile.am +++ b/src/runtime/c/Makefile.am @@ -47,7 +47,7 @@ libpgf_la_SOURCES = \ pgf/md5.h libpgf_la_LDFLAGS = -no-undefined -version-info 4:0:0 -libpgf_la_CXXFLAGS = -fno-rtti -std=c++11 -DCOMPILING_PGF +libpgf_la_CXXFLAGS = -fno-rtti -std=c++14 -DCOMPILING_PGF bin_PROGRAMS = diff --git a/src/runtime/c/pgf/data.h b/src/runtime/c/pgf/data.h index 7945e9417..ac8b5f447 100644 --- a/src/runtime/c/pgf/data.h +++ b/src/runtime/c/pgf/data.h @@ -124,13 +124,15 @@ typedef struct { PgfProbspace funs_by_cat; } PgfAbstr; +typedef struct { + size_t factor; + size_t var; +} term; + struct PGF_INTERNAL_DECL PgfLParam { size_t i0; size_t n_terms; - struct { - size_t factor; - size_t var; - } terms[]; + term terms[]; static void release(ref param); }; @@ -263,6 +265,89 @@ struct PGF_INTERNAL_DECL PgfConcrPrintname { static void release(ref printname); }; +#define containerof(T,field,p) (T*) (((char*) p)-offsetof(T,field)) + +struct PGF_INTERNAL_DECL PgfLCEdge { + struct { + ref lincat; + struct { + size_t i0; + term& operator[](int i) { + PgfLCEdge *edge = containerof(PgfLCEdge,from.value,this); + return edge->terms[i]; + } + size_t size() { + PgfLCEdge *edge = containerof(PgfLCEdge,from.value,this); + return edge->from.lin_idx.n_offset; + } + } value; + struct { + size_t i0; + size_t n_offset; + term& operator[](int i) { + PgfLCEdge *edge = containerof(PgfLCEdge,from.lin_idx,this); + return edge->terms[n_offset+i]; + } + size_t size() { + PgfLCEdge *edge = containerof(PgfLCEdge,from.lin_idx,this); + return edge->to.value.n_offset-n_offset; + } + } lin_idx; + } from; + + struct { + ref lincat; + struct { + size_t i0; + size_t n_offset; + term& operator[](int i) { + PgfLCEdge *edge = containerof(PgfLCEdge,to.value,this); + return edge->terms[n_offset+i]; + } + size_t size() { + PgfLCEdge *edge = containerof(PgfLCEdge,to.value,this); + return edge->to.lin_idx.n_offset-n_offset; + } + } value; + struct { + size_t i0; + size_t n_offset; + term& operator[](int i) { + PgfLCEdge *edge = containerof(PgfLCEdge,to.lin_idx,this); + return edge->terms[n_offset+i]; + } + size_t size() { + PgfLCEdge *edge = containerof(PgfLCEdge,to.lin_idx,this); + return edge->n_terms-n_offset; + } + } lin_idx; + } to; + + struct { + size_t n_vars; + PgfVariableRange& operator[](int i) { + PgfLCEdge *edge = containerof(PgfLCEdge,vars,this); + return ((PgfVariableRange*)(((term*) (edge+1))+edge->n_terms))[i]; + } + size_t size() { + return n_vars; + } + } vars; + + size_t n_terms; + term terms[]; + + static ref alloc(size_t n_terms1, size_t n_terms2, size_t n_terms3, size_t n_terms4, size_t n_vars) { + auto edge = PgfDB::malloc((n_terms1+n_terms2+n_terms3+n_terms4)*sizeof(term)+n_vars*sizeof(PgfVariableRange)); + edge->from.lin_idx.n_offset = n_terms1; + edge->to.value.n_offset = n_terms1+n_terms2; + edge->to.lin_idx.n_offset = n_terms1+n_terms2+n_terms3; + edge->n_terms = n_terms1+n_terms2+n_terms3+n_terms4; + edge->vars.n_vars = n_vars; + return edge; + } +}; + struct PGF_INTERNAL_DECL PgfLRShift { size_t next_state; ref lincat; diff --git a/src/runtime/c/pgf/parser.cxx b/src/runtime/c/pgf/parser.cxx index e6a77cf0b..261d53b3d 100644 --- a/src/runtime/c/pgf/parser.cxx +++ b/src/runtime/c/pgf/parser.cxx @@ -1197,6 +1197,433 @@ vector PgfLRTableMaker::make() return lrtable; } +PgfLCTableMaker::PgfLCTableMaker(ref abstr, ref concr) +{ + this->abstr = abstr; + this->concr = concr; +} + +PgfLCTableMaker::~PgfLCTableMaker() +{ +} + +static bool edge_match(ref edge1, ref edge2) +{ + size_t sz1 = sizeof(PgfLCEdge) + sizeof(term)*edge1->n_terms + sizeof(PgfVariableRange)*edge1->vars.size(); + size_t sz2 = sizeof(PgfLCEdge) + sizeof(term)*edge2->n_terms + sizeof(PgfVariableRange)*edge2->vars.size(); + + if (sz1 != sz2) + return false; + return (memcmp(&*edge1,&*edge2,sz1) == 0); +} + +int comp (const void * elem1, const void * elem2) +{ + int f = *((int*)elem1); + int s = *((int*)elem2); + if (f > s) return 1; + if (f < s) return -1; + return 0; +} + +void PgfLCTableMaker::rename(ref edge) +{ + size_t next_var = 0; + std::map subst; + for (size_t i = 0; i < edge->n_terms; i++) { + auto it = subst.find(edge->terms[i].var); + if (it == subst.end()) { + subst[edge->terms[i].var] = next_var; + edge->terms[i].var = next_var++; + } else { + edge->terms[i].var = it->second; + } + } + + for (size_t i = 0; i < edge->vars.size(); i++) { + edge->vars[i].var = subst[edge->vars[i].var]; + } + qsort (&edge->vars[0], edge->vars.size(), sizeof(PgfVariableRange), comp); +} + +void PgfLCTableMaker::add_edge(ref edge) +{ + bool found = false; + for (ref xedge : forwards[edge->from.lincat]) { + if (edge_match(edge,xedge)) { + found = true; + break; + } + } + + if (!found) { + print_edge(edge); + forwards[edge->from.lincat].push_back(edge); + backwards[edge->to.lincat].push_back(edge); + update_closure(edge); + } +} + +void PgfLCTableMaker::update_closure(ref edge) +{ + auto &incoming = backwards[edge->from.lincat]; + size_t n_incoming = incoming.size(); + for (size_t i = 0; i < n_incoming; i++) { + ref xedge = compute_unifier(incoming[i],edge); + if (xedge != 0) { + rename(xedge); + add_edge(xedge); + } + } + + auto &outgoing = forwards[edge->to.lincat]; + size_t n_outgoing = outgoing.size(); + for (size_t i = 0; i < n_outgoing; i++) { + ref xedge = compute_unifier(edge,outgoing[i]); + if (xedge != 0) { + rename(xedge); + add_edge(xedge); + } + } +} + +typedef std::pair> Param; +typedef std::map Subst; + +template +bool unifier_helper1(Subst &subst1, V &vars1, T &to, + Subst &subst2, V &vars2, F &from) +{ + size_t i01t = to.i0; + size_t i02f = from.i0; + + size_t i = 0, j = 0; + while (i < to.size() && j < from.size()) { + size_t factor1 = to[i].factor; + size_t range1 = 0; + for (size_t k = 0; k < vars1.size(); k++) { + if (vars1[k].var == to[i].var) { + range1 = vars1[k].range; + break; + } + } + size_t value1 = factor1*range1; + + size_t factor2 = from[j].factor; + size_t range2 = 0; + for (size_t k = 0; k < vars2.size(); k++) { + if (vars2[k].var == from[j].var) { + range2 = vars2[k].range; + break; + } + } + size_t value2 = factor2*range2; + + if (value1 > value2) { + size_t x = i02f / factor1; + if (x >= range1) + return false; + auto &s = subst1[to[i].var]; + s.first = i02f / factor1; + s.second.clear(); + i02f %= factor1; + while (j < from.size() && factor2 % factor1 == 0) { + size_t factor = factor2 / factor1; + s.second.emplace_back(); + s.second.back().factor=factor; + s.second.back().var=subst2[from[j].var].second[0].var; + j++; + factor2 = from[j].factor; + } + i++; + } else { + size_t x = i01t / factor2; + if (x >= range2) + return false; + auto &s = subst2[from[j].var]; + s.first = i01t / factor2; + s.second.clear(); + i01t %= factor2; + while (i < to.size() && factor1 % factor2 == 0) { + size_t factor = factor1 / factor2; + s.second.emplace_back(); + s.second.back().factor=factor; + s.second.back().var=subst1[to[i].var].second[0].var; + i++; + factor1 = to[i].factor; + } + j++; + } + } + + while (i < to.size()) { + auto &s = subst1[to[i].var]; + size_t factor1 = to[i].factor; + s.first = i02f / factor1; + s.second.clear(); + i02f %= factor1; + i++; + } + + while (j < from.size()) { + auto &s = subst2[from[j].var]; + size_t factor2 = from[j].factor; + s.first = i01t / factor2; + s.second.clear(); + i01t %= factor2; + j++; + } + + return (i01t == i02f); +} + +template +void unifier_helper2(Subst &subst, std::map &vars, std::map &ranges, A &v, Param &p) +{ + for (size_t i = 0; i < v.size(); i++) { + auto &s = subst[v[i].var]; + size_t factor = v[i].factor; + p.first += factor * s.first; + for (term &t : s.second) { + p.second.emplace_back(); + p.second.back().factor = factor * t.factor; + p.second.back().var = t.var; + vars[t.var] = ranges[t.var]; + } + } +} + +ref PgfLCTableMaker::compute_unifier(ref edge1, ref edge2) +{ + std::map>> subst1, subst2; + std::map vars, ranges; + + size_t next_var = 0; + for (size_t i = 0; i < edge1->vars.size(); i++) { + ranges[next_var] = edge1->vars[i].range; + + auto &s = subst1[edge1->vars[i].var]; + s.second.emplace_back(); + s.second.back().factor = 1; + s.second.back().var = next_var++; + } + for (size_t i = 0; i < edge2->vars.size(); i++) { + ranges[next_var] = edge2->vars[i].range; + + auto &s = subst2[edge2->vars[i].var]; + s.second.emplace_back(); + s.second.back().factor = 1; + s.second.back().var = next_var++; + } + + if (!unifier_helper1(subst1, edge1->vars, edge1->to.value, + subst2, edge2->vars, edge2->from.value)) + return 0; + if (!unifier_helper1(subst1, edge1->vars, edge1->to.lin_idx, + subst2, edge2->vars, edge2->from.lin_idx)) + return 0; + + Param p1fv,p1fi,p2tv,p2ti; + p1fv.first = edge1->from.value.i0; + p1fi.first = edge1->from.lin_idx.i0; + p2tv.first = edge2->to.value.i0; + p2ti.first = edge2->to.lin_idx.i0; + + unifier_helper2(subst1, vars, ranges, edge1->from.value, p1fv); + unifier_helper2(subst1, vars, ranges, edge1->from.lin_idx, p1fi); + unifier_helper2(subst2, vars, ranges, edge2->to.value, p2tv); + unifier_helper2(subst2, vars, ranges, edge2->to.lin_idx, p2ti); + + ref edge = PgfLCEdge::alloc(p1fv.second.size(),p1fi.second.size(),p2tv.second.size(),p2ti.second.size(),vars.size()); + edge->from.lincat = edge1->from.lincat; + edge->from.value.i0 = p1fv.first; + for (size_t i = 0; i < p1fv.second.size(); i++) { + edge->from.value[i] = p1fv.second[i]; + } + edge->from.lin_idx.i0 = p1fi.first; + for (size_t i = 0; i < p1fi.second.size(); i++) { + edge->from.lin_idx[i] = p1fi.second[i]; + } + edge->to.lincat = edge2->to.lincat; + edge->to.value.i0 = p2tv.first; + for (size_t i = 0; i < p2tv.second.size(); i++) { + edge->to.value[i] = p2tv.second[i]; + } + edge->to.lin_idx.i0 = p2ti.first; + for (size_t i = 0; i < p2ti.second.size(); i++) { + edge->to.lin_idx[i] = p2ti.second[i]; + } + size_t i = 0; + for (auto it : vars) { + edge->vars[i].var = it.first; + edge->vars[i].range = it.second; + i++; + } +/* + if (strcmp(edge->to.lincat->name.text, "VP") == 0 && edge->to.value.i0 == 2 && edge->to.value.size() == 2) { + print_edge(edge1); + print_edge(edge2); + fprintf(stderr,"------------------\n"); + print_edge(edge); + fprintf(stderr,"\n"); + } +*/ + return edge; +} + +void PgfLCTableMaker::print_edge(ref edge) +{ + PgfPrinter printer(NULL, 0, NULL); + + if (edge->vars.size() > 0) { + printer.puts("{"); + for (size_t i = 0; i < edge->vars.size(); i++) { + if (i > 0) + printer.puts(","); + printer.lvar(edge->vars[i].var); + printer.nprintf(32,"<%zu",edge->vars[i].range); + } + printer.puts("} "); + } + + printer.efun(&edge->from.lincat->name); + printer.puts("("); + if (edge->from.value.i0 != 0 || edge->from.value.size() == 0) + printer.nprintf(32,"%ld",edge->from.value.i0); + for (size_t i = 0; i < edge->from.value.size(); i++) { + if (edge->from.value.i0 != 0 || i > 0) + printer.puts("+"); + if (edge->from.value[i].factor != 1) { + printer.nprintf(32,"%ld",edge->from.value[i].factor); + printer.puts("*"); + } + printer.lvar(edge->from.value[i].var); + } + printer.puts(","); + if (edge->from.lin_idx.i0 != 0 || edge->from.lin_idx.size() == 0) + printer.nprintf(32,"%ld",edge->from.lin_idx.i0); + for (size_t i = 0; i < edge->from.lin_idx.size(); i++) { + if (edge->from.lin_idx.i0 != 0 || i > 0) + printer.puts("+"); + if (edge->from.lin_idx[i].factor != 1) { + printer.nprintf(32,"%ld",edge->from.lin_idx[i].factor); + printer.puts("*"); + } + printer.lvar(edge->from.lin_idx[i].var); + } + printer.puts(") -> "); + + printer.efun(&edge->to.lincat->name); + printer.puts("("); + if (edge->to.value.i0 != 0 || edge->to.value.size() == 0) + printer.nprintf(32,"%ld",edge->to.value.i0); + for (size_t i = 0; i < edge->to.value.size(); i++) { + if (edge->to.value.i0 != 0 || i > 0) + printer.puts("+"); + if (edge->to.value[i].factor != 1) { + printer.nprintf(32,"%ld",edge->to.value[i].factor); + printer.puts("*"); + } + printer.lvar(edge->to.value[i].var); + } + printer.puts(","); + if (edge->to.lin_idx.i0 != 0 || edge->to.lin_idx.size() == 0) + printer.nprintf(32,"%ld",edge->to.lin_idx.i0); + for (size_t i = 0; i < edge->to.lin_idx.size(); i++) { + if (edge->to.lin_idx.i0 != 0 || i > 0) + printer.puts("+"); + if (edge->to.lin_idx[i].factor != 1) { + printer.nprintf(32,"%ld",edge->to.lin_idx[i].factor); + printer.puts("*"); + } + printer.lvar(edge->to.lin_idx[i].var); + } + printer.puts(")\n"); + + PgfText *text = printer.get_text(); + fputs(text->text, stderr); + free(text); +} + +vector PgfLCTableMaker::make() +{ + std::function)> f = + [this](ref lin) { + for (size_t seq_idx = 0; seq_idx < lin->seqs.size(); seq_idx++) { + size_t index = seq_idx / (lin->seqs.size() / lin->res.size()); + size_t n_args = (lin->args.size() / lin->res.size()); + ref res = lin->res[index]; + ref seq = lin->seqs[seq_idx]; + + if (seq->syms.size() > 0) { + PgfSymbol sym = seq->syms[0]; + switch (ref::get_tag(sym)) { + case PgfSymbolCat::tag: { + auto sym_cat = ref::untagged(sym); + size_t arg_idx = n_args * index + sym_cat->d; + ref arg = ref::from_ptr(&lin->args[arg_idx]); + + std::set vars; + for (size_t i = 0; i < res->param.n_terms; i++) { + vars.insert(res->param.terms[i].var); + } + for (size_t i = 0; i < arg->param->n_terms; i++) { + vars.insert(arg->param->terms[i].var); + } + for (size_t i = 0; i < sym_cat->r.n_terms; i++) { + vars.insert(sym_cat->r.terms[i].var); + } + + ref edge = + PgfLCEdge::alloc(res->param.n_terms,0,arg->param->n_terms,sym_cat->r.n_terms,vars.size()); + edge->from.lincat = lin->lincat; + edge->from.value.i0 = res->param.i0; + for (size_t i = 0; i < res->param.n_terms; i++) { + edge->from.value[i] = res->param.terms[i]; + } + edge->from.lin_idx.i0 = seq_idx % (lin->seqs.size() / lin->res.size()); + edge->to.lincat = + namespace_lookup(concr->lincats, &lin->absfun->type->hypos[sym_cat->d].type->name); + edge->to.value.i0 = arg->param->i0; + for (size_t i = 0; i < arg->param->n_terms; i++) { + edge->to.value[i] = arg->param->terms[i]; + } + edge->to.lin_idx.i0 = sym_cat->r.i0; + for (size_t i = 0; i < sym_cat->r.n_terms; i++) { + edge->to.lin_idx[i] = sym_cat->r.terms[i]; + } + size_t i = 0; + for (size_t var : vars) { + edge->vars[i].var = var; + for (size_t k = 0; k < res->vars.size(); k++) { + if (res->vars[k].var == var) { + edge->vars[i].range = res->vars[k].range; + break; + } + } + i++; + } + + rename(edge); + add_edge(edge); + } + break; + } + } + } + return true; + }; + namespace_iter(concr->lins, f); + +/* for (auto it : forwards) { + for (ref edge : it.second) { + print_edge(edge); + } + } +*/ + return 0; +} + struct PgfParser::Choice { int fid; std::vector prods; diff --git a/src/runtime/c/pgf/parser.h b/src/runtime/c/pgf/parser.h index a17d32b80..c0bbd4773 100644 --- a/src/runtime/c/pgf/parser.h +++ b/src/runtime/c/pgf/parser.h @@ -95,6 +95,27 @@ class PGF_INTERNAL_DECL PgfLRTableMaker ~PgfLRTableMaker(); }; +class PGF_INTERNAL_DECL PgfLCTableMaker +{ + ref abstr; + ref concr; + + + std::map,std::vector>> forwards; + std::map,std::vector>> backwards; + + ref compute_unifier(ref edge1, ref edge2); + void update_closure(ref edge); + void rename(ref edge); + void add_edge(ref edge); + void print_edge(ref edge); + +public: + PgfLCTableMaker(ref abstr, ref concr); + vector make(); + ~PgfLCTableMaker(); +}; + class PgfPrinter; class PGF_INTERNAL_DECL PgfParser : public PgfPhraseScanner, public PgfExprEnum diff --git a/src/runtime/c/pgf/reader.cxx b/src/runtime/c/pgf/reader.cxx index 3c1294254..e571cc016 100644 --- a/src/runtime/c/pgf/reader.cxx +++ b/src/runtime/c/pgf/reader.cxx @@ -752,8 +752,8 @@ ref PgfReader::read_concrete() auto printnames = read_namespace(&PgfReader::read_printname); concrete->printnames = printnames; -// PgfLRTableMaker maker(abstract, concrete); -// concrete->lrtable = maker.make(); + PgfLRTableMaker maker(abstract, concrete); + concrete->lrtable = maker.make(); return concrete; }