diff --git a/examples/lassie/Holmakefile b/examples/lassie/Holmakefile deleted file mode 100644 index d3c73bab5e..0000000000 --- a/examples/lassie/Holmakefile +++ /dev/null @@ -1,7 +0,0 @@ -ANT_INSTALLED = $(which ant) - -ifneq "$(ANT_INSTALLED)" "" -CLINE_OPTIONS = -j1 -r -INCLUDES = src examples - -endif diff --git a/examples/lassie/examples/Holmakefile b/examples/lassie/examples/Holmakefile deleted file mode 100644 index 28764c0e3e..0000000000 --- a/examples/lassie/examples/Holmakefile +++ /dev/null @@ -1,12 +0,0 @@ -INCLUDES = ../src/ -CLINE_OPTIONS=-j1 -TACTIC_WORLD = ../sempre/classes/interactive/edu/stanford/nlp/sempre/interactive/lassie/TacticWorld.class - -all: $(DEFAULT_TARGETS) -.PHONY: all - -caseStudy1EuclidTheory.sml: $(TACTIC_WORLD) -caseStudy2RealNumsTheory.sml: $(TACTIC_WORLD) -caseStudy3IntervalLibTheory.sml: $(TACTIC_WORLD) -caseStudy4NaprochePowersetTheory.sml: $(TACTIC_WORLD) -gaussTheory.sml: $(TACTIC_WORLD) diff --git a/examples/lassie/examples/arithTacticsLib.sml b/examples/lassie/examples/arithTacticsLib.sml deleted file mode 100644 index a558493bbc..0000000000 --- a/examples/lassie/examples/arithTacticsLib.sml +++ /dev/null @@ -1,44 +0,0 @@ -structure arithTacticsLib = -struct - open LassieLib; - - local open arithmeticTheory in end; - fun fs_all g = let val thms = map (fn (a,th) => th) (DB.theorems "-") in fs thms g end; - val _ = - let - fun jargon () = - let - val _ = LassieLib.addCustomTactic fs_all "fs_all"; - val _ = - map (uncurry def) [ - (`simplify`, `fs [ ]`), - (`simplify with [ADD_ASSOC]`, `fs [ ADD_ASSOC ]`), - (`use [ADD_ASSOC] to simplify`, `fs [ ADD_ASSOC ]`), - (`follows from [ADD_ASSOC]`, `metis_tac [ ADD_ASSOC ]`), - (`rewrite [ADD_ASSOC]` ,`rw [ADD_ASSOC]`), - (`[ADD_ASSOC] solves the goal`, - `all_tac THEN ( fs [ ADD_ASSOC ] THEN NO_TAC) ORELSE (rw [ ADD_ASSOC ] THEN NO_TAC) ORELSE metis_tac [ ADD_ASSOC ]`), - (‘trivial’, ‘[] solves the goal’), - (`perform an induction on 't'`, `Induct_on ' t '`), - (`Induction on 't'`, `Induct_on ' t '`), - (`perform a case split`, `Cases`), - (`perform a case split for 't'`, `Cases_on ' t '`), - (`Complete Induction on 't'`, `completeInduct_on ' t '`), - (`suppose not`, `spose_not_then assume_tac`), - (`show 'T' using (gen_tac)` ,`' T ' by gen_tac`), - (‘show 'T' using [ CONJ_COMM ]’, ‘ ' T ' by ([ CONJ_COMM ] solves the goal)’), - (‘'T' follows trivially’, ‘show 'T' using (trivial)’), - (`we further know 'T'`, `' T ' by rw [ ]`), - (`we can derive 'T' from [ADD_ASSOC]`, `' T ' by rw [ ADD_ASSOC ]`), - (`thus ADD_ASSOC for 'n'`, `qspec_then ' n ' assume_tac ADD_ASSOC`), - (‘'T' suffices to show the goal’, ‘'T' suffices_by (fs[])’), - (`it suffices to show that the arguments are equal`, `AP_TERM_TAC`), - (`it suffices to show that the functions are equal`, `AP_THM_TAC`), - (‘cheat and cheat’, ‘cheat THEN cheat’) - ] - in () end; - in - LassieLib.registerJargon "Arithmetic" (jargon) - end - -end; diff --git a/examples/lassie/examples/caseStudy1EuclidScript.sml b/examples/lassie/examples/caseStudy1EuclidScript.sml deleted file mode 100644 index 82ef9cab4e..0000000000 --- a/examples/lassie/examples/caseStudy1EuclidScript.sml +++ /dev/null @@ -1,280 +0,0 @@ -(** Taken from the HOL4 distribution (original file: HOL4/examples/euclid.sml **) - -(*===========================================================================*) -(* Euclid's theorem: for every prime, there is another one that is larger. *) -(* This proof has been excerpted and adapted from John Harrison's proof of *) -(* a special case (n=4) of Fermat's Last Theorem. *) -(* *) -(*===========================================================================*) - -(*---------------------------------------------------------------------------*) -(* First, open required context: the theory of arithmetic. This theory is *) -(* automatically loaded when HOL starts, but the ML module arithmeticTheory *) -(* needs to be opened before the definitions and theorems of the theory are *) -(* available without supplying the "arithmeticTheory." prefix. *) -(*---------------------------------------------------------------------------*) - -open BasicProvers Defn HolKernel Parse Conv SatisfySimps Tactic monadsyntax - boolTheory bossLib arithmeticTheory; - -open LassieLib arithTacticsLib; - -val _ = new_theory "caseStudy1Euclid"; - -val _ = LassieLib.loadJargon "Arithmetic"; -(*---------------------------------------------------------------------------*) -(* Divisibility. *) -(*---------------------------------------------------------------------------*) - -set_fixity "divides" (Infix(NONASSOC, 450)); - -Definition divides_def: -(a divides b) = (? x. b = a * x) -End - - -(*---------------------------------------------------------------------------*) -(* Primality. *) -(*---------------------------------------------------------------------------*) - -Definition prime_def: - prime p = (p<>1 /\ !x . x divides p ==> (x=1) \/ (x=p)) -End - -(*---------------------------------------------------------------------------*) -(* A sequence of basic theorems about the "divides" relation. *) -(*---------------------------------------------------------------------------*) - -Theorem DIVIDES_0: - ! x . x divides 0 -Proof - LassieLib.nltac ` - [divides_def, MULT_CLAUSES] solves the goal.` - (* metis_tac [divides_def,MULT_CLAUSES] *) -QED - -Theorem DIVIDES_ZERO: - ! x . (0 divides x) = (x = 0) -Proof - LassieLib.nltac ` - [divides_def, MULT_CLAUSES] solves the goal.` - (* metis_tac [divides_def,MULT_CLAUSES] *) -QED - -Theorem DIVIDES_ONE: - ! x . (x divides 1) = (x = 1) -Proof - LassieLib.nltac - `[divides_def, MULT_CLAUSES, MULT_EQ_1] solves the goal.` - (* metis_tac [divides_def,MULT_CLAUSES,MULT_EQ_1] *) -QED - -Theorem DIVIDES_REFL: - ! x . x divides x -Proof - LassieLib.nltac ` - [divides_def, MULT_CLAUSES] solves the goal.` - (* metis_tac [divides_def,MULT_CLAUSES] *) -QED - -Theorem DIVIDES_TRANS: - ! a b c . a divides b /\ b divides c ==> a divides c -Proof - LassieLib.nltac ` - [divides_def, MULT_ASSOC] solves the goal.` - (* metis_tac [divides_def,MULT_ASSOC] *) -QED - -Theorem DIVIDES_ADD: - ! d a b . d divides a /\ d divides b ==> d divides (a + b) -Proof - LassieLib.nltac ` - [divides_def, LEFT_ADD_DISTRIB] solves the goal.` - (* metis_tac [divides_def, LEFT_ADD_DISTRIB] *) -QED - -Theorem DIVIDES_SUB: - !d a b . d divides a /\ d divides b ==> d divides (a - b) -Proof - LassieLib.nltac ` - [divides_def, LEFT_SUB_DISTRIB] solves the goal.` - (* metis_tac [divides_def,LEFT_SUB_DISTRIB] *) -QED - -Theorem DIVIDES_ADDL: - !d a b . d divides a /\ d divides (a + b) ==> d divides b -Proof - LassieLib.nltac ` - [ADD_SUB, ADD_SYM, DIVIDES_SUB] solves the goal.` - (* metis_tac [ADD_SUB,ADD_SYM,DIVIDES_SUB] *) -QED - -Theorem DIVIDES_LMUL: - !d a x . d divides a ==> d divides (x * a) -Proof - LassieLib.nltac ` - [divides_def, MULT_ASSOC, MULT_SYM] solves the goal.` - (* metis_tac [divides_def,MULT_ASSOC,MULT_SYM] *) -QED - -Theorem DIVIDES_RMUL: - !d a x . d divides a ==> d divides (a * x) -Proof - LassieLib.nltac ` - [MULT_SYM,DIVIDES_LMUL] solves the goal.` - (* metis_tac [MULT_SYM,DIVIDES_LMUL] *) -QED - -Theorem DIVIDES_LE: - !m n . m divides n ==> m <= n \/ (n = 0) -Proof - LassieLib.nltac ‘ - rewrite [divides_def]. - [] solves the goal.’ - (* rw [divides_def] >> rw[] *) -QED - -(*---------------------------------------------------------------------------*) -(* Various proofs of the same formula *) -(*---------------------------------------------------------------------------*) -val NOT_X_LE = save_thm ("NOT_X_LE", DECIDE ``! x. ~(x < x)``); - -Theorem DIVIDES_FACT: - !m n . 0 < m /\ m <= n ==> m divides (FACT n) -Proof - LassieLib.nltac ‘ - rewrite [LESS_EQ_EXISTS]. - perform an induction on 'p'. - [FACT, NOT_X_LE, num_CASES, DIVIDES_RMUL, DIVIDES_LMUL, DIVIDES_REFL, ADD_CLAUSES] - solves the goal.’ - (* - rw [LESS_EQ_EXISTS] - >> Induct_on `p` - >> metis_tac [FACT, DECIDE ``!x. ~(x < x)``, num_CASES, - DIVIDES_RMUL,DIVIDES_LMUL,DIVIDES_REFL,ADD_CLAUSES] *) -QED - -(*---------------------------------------------------------------------------*) -(* Zero and one are not prime, but two is. All primes are positive. *) -(*---------------------------------------------------------------------------*) - -Theorem NOT_PRIME_0: - ~prime 0 -Proof - LassieLib.nltac `rewrite [prime_def, DIVIDES_0].` - (* rw [prime_def,DIVIDES_0] *) -QED - -Theorem NOT_PRIME_1: - ~prime 1 -Proof - LassieLib.nltac `rewrite [prime_def].` - (* rw [prime_def] *) -QED - -val two_prime_eqs = curry save_thm "two_prime_eqs" (DECIDE ``~(2=1) /\ ~(2=0) /\ ((x <=2) = (x = 0) \/ (x = 1) \/ (x = 2))``); - -Theorem PRIME_2: - prime 2 -Proof - LassieLib.nltac ` - rewrite [prime_def]. - [DIVIDES_LE, DIVIDES_ZERO, two_prime_eqs] solves the goal.` - (* rw [prime_def] >> - metis_tac [DIVIDES_LE, DIVIDES_ZERO, - DECIDE``~(2=1) /\ ~(2=0) /\ (x<=2 = (x=0) \/ (x=1) \/ (x=2))``] *) -QED - -Theorem PRIME_POS: - !p . prime p ==> 0

> rw [NOT_PRIME_0] *) -QED - -(*---------------------------------------------------------------------------*) -(* Every number has a prime factor, except for 1. The proof proceeds by a *) -(* "complete" induction on n, and then considers cases on whether n is *) -(* prime or not. The first case (n is prime) is trivial. In the second case, *) -(* there must be an "x" that divides n, and x is not 1 or n. By DIVIDES_LE, *) -(* n=0 or x <= n. If n=0, then 2 is a prime that divides 0. On the other *) -(* hand, if x <= n, there are two cases: if x ?p . prime p /\ p divides n -Proof - LassieLib.nltac ‘ - Complete Induction on 'n'. - rewrite []. - perform a case split for 'prime n'. - Goal 1. [DIVIDES_REFL] solves the goal. End. - Goal 1. - show '? x. x divides n and x <> 1 and x <> n' using (follows from [prime_def]). - [LESS_OR_EQ, PRIME_2, DIVIDES_LE, DIVIDES_TRANS, DIVIDES_0] solves the goal. - End.’ - (* - completeInduct_on `n` - >> rw [] - >> Cases_on `prime n` >| - [metis_tac [DIVIDES_REFL], - `?x. x divides n /\ x<>1 /\ x<>n` by metis_tac[prime_def] >> - metis_tac [LESS_OR_EQ, PRIME_2, - DIVIDES_LE, DIVIDES_TRANS, DIVIDES_0]] *) -QED - -(*---------------------------------------------------------------------------*) -(* In the following proof, metis_tac automatically considers cases on *) -(* whether n is prime or not. *) -(*---------------------------------------------------------------------------*) - -Theorem PRIME_FACTOR: - !n . n<>1 ==> ?p . prime p /\ p divides n -Proof - LassieLib.nltac ‘ - Complete Induction on 'n'. - [DIVIDES_REFL,prime_def,LESS_OR_EQ, PRIME_2, - DIVIDES_LE, DIVIDES_TRANS, DIVIDES_0] solves the goal.’ - (* - completeInduct_on `n` >> - metis_tac [DIVIDES_REFL,prime_def,LESS_OR_EQ, PRIME_2, - DIVIDES_LE, DIVIDES_TRANS, DIVIDES_0] *) -QED - -(*---------------------------------------------------------------------------*) -(* Every number has a prime greater than it. *) -(* Proof. *) -(* Suppose not; then there's an n such that all p greater than n are not *) -(* prime. Consider FACT(n) + 1: it's not equal to 1, so there's a prime q *) -(* that divides it. q also divides FACT n because q is less-than-or-equal *) -(* to n. By DIVIDES_ADDL, this means that q=1. But then q is not prime, *) -(* which is a contradiction. *) -(*---------------------------------------------------------------------------*) - -val neq_zero = curry save_thm "neq_zero" (DECIDE ``~(x=0) = (0 < x)``); - -Theorem EUCLID: - !n . ?p . n < p /\ prime p -Proof - LassieLib.nltac‘ - suppose not. simplify. - we can derive 'FACT n + 1 <> 1' from [FACT_LESS, neq_zero]. - thus PRIME_FACTOR for 'FACT n + 1'. - we further know '?q. prime q and q divides (FACT n + 1)'. - show 'q <= n' using [NOT_LESS_EQUAL]. - show '0 < q' using [PRIME_POS] . - show 'q divides FACT n' using [DIVIDES_FACT]. - show 'q=1' using [DIVIDES_ADDL, DIVIDES_ONE]. - show 'prime 1' using (simplify). - [NOT_PRIME_1] solves the goal.’ - (* - spose_not_then strip_assume_tac - >> mp_tac (SPEC ``FACT n + 1`` PRIME_FACTOR) - >> rw [FACT_LESS, DECIDE ``~(x=0) = (0> metis_tac [DIVIDES_FACT, DIVIDES_ADDL, DIVIDES_ONE, - NOT_PRIME_1, NOT_LESS, PRIME_POS] *) -QED - -val _ = export_theory(); diff --git a/examples/lassie/examples/caseStudy2RealNumsScript.sml b/examples/lassie/examples/caseStudy2RealNumsScript.sml deleted file mode 100644 index 7cb303b8cc..0000000000 --- a/examples/lassie/examples/caseStudy2RealNumsScript.sml +++ /dev/null @@ -1,189 +0,0 @@ -open BasicProvers Defn HolKernel Parse Conv SatisfySimps Tactic monadsyntax - boolTheory bossLib; - -open realTheory arithmeticTheory realLib RealArith; - -open LassieLib realTacticsLib logicTacticsLib; - -val _ = new_theory "caseStudy2RealNums"; - -val _ = (LassieLib.loadJargon "Reals"; LassieLib.loadJargon "Logic"); - -Theorem binom1: - ! (a b:real). (a + b) pow 2 = a pow 2 + 2 * a * b + b pow 2 -Proof - LassieLib.nltac ` - introduce assumptions. - rewrite with [POW_2, REAL_LDISTRIB, REAL_RDISTRIB]. - rewrite with [<- REAL_ADD_ASSOC]. - simplify with [REAL_EQ_RADD]. - rewrite with [REAL_ADD_ASSOC]. - simplify with [REAL_EQ_LADD]. trivial.` - (* - rpt strip_tac - \\ once_rewrite_tac [POW_2] - \\ once_rewrite_tac [REAL_LDISTRIB] - \\ once_rewrite_tac [REAL_RDISTRIB] - \\ rewrite_tac [REAL_ADD_ASSOC] - \\ simp[REAL_EQ_RADD] - \\ rewrite_tac [GSYM REAL_ADD_ASSOC] - \\ simp[REAL_EQ_LADD] - \\ `b * a = a * b` by (fs[REAL_MUL_COMM]) - \\ simp[REAL_DOUBLE, REAL_MUL_ASSOC] *) -QED - -Theorem binom2: - ! (a b:real). (a - b) pow 2 = a pow 2 - 2 * a * b + b pow 2 -Proof - LassieLib.nltac ` - introduce assumptions. - rewrite with [POW_2, real_sub, REAL_LDISTRIB, REAL_RDISTRIB]. - rewrite with [<- REAL_ADD_ASSOC]. - simplify with [REAL_EQ_RADD]. - rewrite once [REAL_NEG_MUL2]. - rewrite with [REAL_ADD_ASSOC]. - simplify with [REAL_EQ_LADD]. trivial.` - (* - rpt strip_tac - \\ once_rewrite_tac [POW_2] - \\ once_rewrite_tac [real_sub] - \\ once_rewrite_tac [REAL_LDISTRIB] - \\ once_rewrite_tac [REAL_RDISTRIB] - \\ rewrite_tac [REAL_ADD_ASSOC] - \\ once_rewrite_tac [REAL_NEG_MUL2] - \\ simp[REAL_EQ_RADD] - \\ rewrite_tac [GSYM REAL_ADD_ASSOC] - \\ simp[REAL_EQ_LADD] - \\ `-b * a = a * -b` by (fs[REAL_MUL_COMM]) - \\ simp[REAL_DOUBLE, GSYM REAL_NEG_LMUL, GSYM REAL_NEG_RMUL, REAL_MUL_ASSOC] *) -QED - -Definition sum_of_cubes_def: - (sum_of_cubes 0 = 0:real) /\ - (sum_of_cubes (SUC n) = (&(SUC n)) pow 3 + sum_of_cubes n) -End - -Definition sum_def: - (sum 0 = 0:real) /\ - (sum (SUC n) = (&(SUC n) + sum n)) -End - -Theorem closed_form_sum: - ! n. (sum n = (((&n):real) * (1 + &n)) / 2) -Proof - LassieLib.nltac ‘ - induction on 'n'. simplify with [sum_def, REAL_DIV_LZERO, MULT]. - rewrite MULT_SYM for 'n'. - we show 'SUC n + 1 = SUC (SUC n)' using (simplify). - rewrite last assumption. - we show 'SUC (SUC n) * n = n + n + n * n' using (simplify with [MULT]). - rewrite last assumption. - we show 'n + n + n * n + 1 = SUC n + n * (n + 1)' using - (simplify with [ADD1, LEFT_ADD_DISTRIB, MULT_RIGHT_1]). - rewrite last assumption. - rewrite with [ADD_ASSOC]. - we show 'SUC n + SUC n = 2 * (SUC n)' using (simplify). - rewrite last assumption. - rewrite once [MULT_COMM]. - rewrite with [GSYM REAL_MUL, GSYM REAL_ADD, GSYM REAL_DIV_ADD]. - rewrite with [real_div]. - simplify with [GSYM REAL_MUL_ASSOC, REAL_MUL_RINV]. - simplify with [REAL_MUL_ASSOC].’ - (* Induct_on `n` - \\ fs[sum_def, REAL_DIV_LZERO] - \\ pop_assum (fn thm=> once_rewrite_tac [GSYM thm] \\ assume_tac thm) - \\ fs[MULT] - \\ qspec_then `n` (fn thm => once_rewrite_tac [thm]) MULT_SYM - \\ `SUC n + 1 = SUC (SUC n)` - by (pop_assum kall_tac \\ Induct_on `n` \\ fs[]) - \\ qpat_x_assum `SUC n + 1 = _` (fn thm => once_rewrite_tac [thm]) - \\ `SUC (SUC n) * n = n + n + n * n` - by (fs[MULT]) - \\ qpat_x_assum `SUC (SUC n) * _ = _` (fn thm => once_rewrite_tac [thm]) - \\ `n + n + n * n + 1 = SUC n + n * (n + 1)` - by (once_rewrite_tac [ADD1] \\ once_rewrite_tac [LEFT_ADD_DISTRIB] - \\ rewrite_tac [ADD_ASSOC, MULT_RIGHT_1] \\ fs[]) - \\ qpat_x_assum `n + n + _ + _ = _` (fn thm => once_rewrite_tac [thm]) - \\ rewrite_tac [ADD_ASSOC] - \\ `SUC n + SUC n = 2 * (SUC n)` - by (fs[]) - \\ qpat_x_assum `SUC n + _ = _` (fn thm => once_rewrite_tac [thm]) - \\ once_rewrite_tac [MULT_COMM] - \\ rewrite_tac [GSYM REAL_MUL, GSYM REAL_ADD] - \\ rewrite_tac [GSYM REAL_DIV_ADD] - \\ rewrite_tac [real_div] - \\ rewrite_tac [GSYM REAL_MUL_ASSOC] - \\ fs[REAL_MUL_RINV] - \\ fs[REAL_MUL_ASSOC] *) -QED - -Theorem pow_3: - n pow 3 = n * n * n -Proof - LassieLib.nltac ` - we show '3 = SUC 2' using (simplify). - rewrite last assumption. simplify with [pow, POW_2]. trivial.` - (* - `3 = SUC 2` by (fs[]) - \\ pop_assum rw_th - \\ fs[pow, POW_2] \\ REAL_ASM_ARITH_TAC *) -QED - -(** - The sum of cubed numbers up to n is the squared sum -**) -Theorem sum_of_cubes_is_squared_sum: - ! n. sum_of_cubes n = (sum n) pow 2 -Proof - LassieLib.nltac ‘ - induction on 'n'. - simplify conclusion with [sum_of_cubes_def, sum_def]. - rewrite with [POW_2, REAL_LDISTRIB, REAL_RDISTRIB, REAL_ADD_ASSOC]. - showing '&SUC n pow 3 = &SUC n * &SUC n + &SUC n * sum n + sum n * &SUC n' - closes the proof because (simplify conclusion with [REAL_EQ_LADD]). - we know '& SUC n * sum n + sum n * &SUC n = 2 * (sum n * & SUC n)'. - rewrite once [<- REAL_ADD_ASSOC]. - rewrite last assumption. - rewrite with [pow_3, closed_form_sum, real_div, REAL_MUL_ASSOC]. - we know '2 * &n * (1 + &n) * inv 2 = 2 * inv 2 * & n * (1 + &n)'. - rewrite last assumption. - simplify conclusion with [REAL_MUL_RINV]. - we show 'n + 1 = SUC n' using (simplify conclusion). - rewrite last assumption. simplify conclusion. - we show '2 = (SUC (SUC 0))' using (simplify conclusion). - rewrite last assumption. rewrite last assumption. - rewrite with [EXP]. - we show 'SUC n = n + 1' using (simplify conclusion). - rewrite last assumption. - rewrite with [GSYM REAL_OF_NUM_ADD, pow_3]. - rewrite with [REAL_OF_NUM_ADD, REAL_OF_NUM_MUL, MULT_RIGHT_1, - RIGHT_ADD_DISTRIB, LEFT_ADD_DISTRIB, MULT_LEFT_1]. - simplify.’ -QED - -(* -Induct_on ‘n’ \\ simp [sum_of_cubes_def, sum_def, ] - \\ fs[pow_3] - \\ once_rewrite_tac [REAL_ADD_ASSOC] - \\ pop_assum rw_th - \\ once_rewrite_tac [closed_form_sum] - \\ once_rewrite_tac [real_div] - \\ rewrite_tac [REAL_MUL_ASSOC] - \\ `2 * &n * (1 + &n) * inv 2 = 2 * inv 2 * & n * (1 + &n)` by (REAL_ASM_ARITH_TAC) - \\ pop_assum rw_th - \\ simp [REAL_MUL_RINV] - \\ `(SUC n) ** 3 = (SUC n)**2 + (SUC n)**2 * n` - by (`3= SUC (SUC (SUC 0))` by (fs[]) \\ pop_assum rw_th - \\ `2 = SUC(SUC 0)` by (fs[]) \\ pop_assum rw_th - \\ rewrite_tac[EXP] - \\ `SUC n = n + 1` by (fs[]) - \\ pop_assum rw_th - \\ rewrite_tac [MULT_RIGHT_1, RIGHT_ADD_DISTRIB, LEFT_ADD_DISTRIB, MULT_LEFT_1] - \\ fs[]) - \\ pop_assum rw_th - \\ `n + 1 = SUC n` by (fs[]) - \\ pop_assum rw_th \\ fs[] -QED - *) - -val _ = export_theory(); diff --git a/examples/lassie/examples/caseStudy3IntervalLibScript.sml b/examples/lassie/examples/caseStudy3IntervalLibScript.sml deleted file mode 100644 index fb3c28da1c..0000000000 --- a/examples/lassie/examples/caseStudy3IntervalLibScript.sml +++ /dev/null @@ -1,216 +0,0 @@ -open BasicProvers Defn HolKernel Parse Conv SatisfySimps Tactic monadsyntax - boolTheory bossLib; - -open realTheory arithmeticTheory realLib RealArith; - -open LassieLib realTacticsLib logicTacticsLib; - -val _ = new_theory "caseStudy3IntervalLib"; - -val _ = (LassieLib.loadJargon "Reals"; LassieLib.loadJargon "Logic"); - -Definition min4_def: -min4 a b c d = min a (min b (min c d)) -End - -Definition max4_def: - max4 a b c d = max a (max b (max c d)) -End - -val _ = temp_overload_on("abs",``real$abs``); -val _ = temp_overload_on("max",``real$max``); -val _ = temp_overload_on("min",``real$min``); -(** - Define validity of an interval, requiring that the lower bound is less than or equal to the upper bound. - Containement is defined such that if x is contained in the interval, it must lie between the lower and upper bound. -**) -Definition valid_def: - valid ((lo,hi):(real # real)) = (lo <= hi) -End - -Definition contained_def: - contained (a:real) (lo,hi) = (lo <= a /\ a <= hi) -End - -Definition absIntvUpd_def: -absIntvUpd (op:real->real->real) (iv1:real#real) (iv2:real#real) = -( - min4 (op (FST iv1) (FST iv2)) - (op (FST iv1) (SND iv2)) - (op (SND iv1) (FST iv2)) - (op (SND iv1) (SND iv2)), - max4 (op (FST iv1) (FST iv2)) - (op (FST iv1) (SND iv2)) - (op (SND iv1) (FST iv2)) - (op (SND iv1) (SND iv2)) -) -End - -Definition widenInterval_def: -widenInterval (iv:real#real) (v:real) = ((FST iv - v), (SND iv + v)) -End - -Definition negateInterval_def: -negateInterval (iv:real#real) = ((- SND iv), (- FST iv)) -End - -Definition invertInterval_def: - invertInterval (iv:real#real) = (1 /(SND iv), 1 /(FST iv)) -End - -Definition addInterval_def: - addInterval (iv1:real#real) (iv2:real#real) = absIntvUpd (+) iv1 iv2 -End - -Definition subtractInterval_def: - subtractInterval (iv1:real#real) (iv2:real#real) = addInterval iv1 (negateInterval iv2) -End - -Definition multInterval_def: - multInterval (iv1:real#real) (iv2:real#real) = absIntvUpd ( * ) iv1 iv2 -End - -Definition divideInterval_def: - divideInterval iv1 iv2 = multInterval iv1 (invertInterval iv2) -End - -Definition minAbsFun_def: - minAbsFun iv = min (abs (FST iv)) (abs (SND iv)) -End - -Theorem contained_implies_valid: - !(a:real) (iv:real#real). - contained a iv ==> valid iv -Proof - LassieLib.nltac ` - introduce variables. - case split for 'iv'. - trivial using [contained_def, valid_def, REAL_LE_TRANS].` -QED - -Theorem min4_correct: - ! a b c d. - let m = min4 a b c d in - m <= a /\ m <= b /\ m <= c /\ m <= d -Proof - LassieLib.nltac ` - introduce variables. simplify with [min4_def]. perform a case split. - try simplify with [REAL_MIN_LE1]. - use transitivity for 'min b (min c d)'. - simplify with [REAL_MIN_LE1, REAL_MIN_LE2]. - use transitivity for 'min c d'. - simplify with [REAL_MIN_LE1, REAL_MIN_LE2].` -QED - -Theorem max4_correct: - !a b c d. - let m = max4 a b c d in - a <= m /\ b <= m /\ c <= m /\ d <= m -Proof - LassieLib.nltac ` - introduce variables. simplify with [max4_def]. perform a case split. - try simplify with [REAL_LE_MAX1]. - use transitivity for 'max b (max c d)'. - simplify with [REAL_LE_MAX1, REAL_LE_MAX2]. - use transitivity for 'max c d'. - simplify with [REAL_LE_MAX1, REAL_LE_MAX2].` -QED - -Theorem interval_negation_valid: - ! iv a. - contained a iv ==> contained (- a) (negateInterval iv) -Proof - LassieLib.nltac ` - introduce variables. case split for 'iv'. - simplify with [contained_def, negateInterval_def, REAL_LE_TRANS].` -QED - -Theorem iv_neg_preserves_valid: - !iv. - valid iv ==> valid (negateInterval iv) -Proof - LassieLib.nltac ` - introduce variables. - case split for 'iv'. - simplify with [valid_def, negateInterval_def].` -QED - -(* -gt `! x y. 0 < x /\ 0 < y ==> (inv x <= inv y <=> y <= x)` - -proveInteractive(); - -introduce assumptions -we show 'inv x < inv y <=> y < x' using (use REAL_INV_LT_ANTIMONO THEN follows trivially) -*) -Theorem nonzerop_EQ1_I'[simp]: - 0 < r ==> (nonzerop r = 1) -Proof - rw[nonzerop_def] -QED - -val REAL_LE_IMP_LT = curry save_thm "REAL_LE_IMP_LT" (fst (EQ_IMP_RULE (Drule.SPEC_ALL REAL_LE_LT))); - -Theorem REAL_INV_LE_ANTIMONO[local]: - ! x y. - 0 < x /\ 0 < y ==> - (inv x <= inv y <=> y <= x) -Proof - LassieLib.nltac ` - introduce assumptions. - we show 'inv x < inv y <=> y < x' using (use REAL_INV_LT_ANTIMONO THEN follows trivially). - case split. - simplify with [REAL_LE_LT]. - introduce assumptions. - simplify with [REAL_INV_INJ]. trivial.` - (* More verbose version using subgoal selectors: - LassieLib.nltac ‘ - introduce assumptions. - we show 'inv x < inv y <=> y < x' - using (use REAL_INV_LT_ANTIMONO then follows trivially). - case split. introduce assumptions. - Case 'inv x ≤ inv y'. - resolve with REAL_LE_IMP_LT. - Case 'inv x = inv y'. follows from [REAL_INV_INJ]. End. - Case 'inv x < inv y'. trivial. End. - Case 'y ≤ x'. - resolve with REAL_LE_IMP_LT. - Case 'y = x'. follows from [REAL_INV_INJ]. End. - Case 'y < x'. trivial. End.’ *) -QED - -Theorem interval_inversion_valid: - ∀ iv a. - (SND iv < 0 ∨ 0 < FST iv) ∧ contained a iv ⇒ - contained (inv a) (invertInterval iv) -Proof - LassieLib.nltac ‘ - introduce variables. - case split for 'iv'. - simplify with [contained_def, invertInterval_def]. - introduce assumptions. - rewrite once [<- REAL_INV_1OVER]. - Next Goal. - rewrite once [ <- REAL_LE_NEG]. we know 'a < 0'. thus 'a <> 0'. - we know 'r < 0'. thus 'r <> 0'. - 'inv(-a) <= inv (-r) <=> (- r) <= -a' using - (use REAL_INV_LE_ANTIMONO THEN simplify). - resolve with REAL_NEG_INV. rewrite assumptions. follows trivially. - Next Goal. - rewrite once [<- REAL_LE_NEG]. - we know 'a < 0'. thus 'a <> 0'. we know 'q <> 0'. - resolve with REAL_NEG_INV. - 'inv (-q) <= inv (-a) <=> (-a) <= (-q)' using - (use REAL_INV_LE_ANTIMONO THEN simplify THEN trivial). - rewrite assumptions. follows trivially. - Next Goal. - rewrite with [<- REAL_INV_1OVER]. - 'inv r <= inv a <=> a <= r' using (use REAL_INV_LE_ANTIMONO THEN trivial). - follows trivially. - Next Goal. - rewrite with [<- REAL_INV_1OVER]. - 'inv a <= inv q <=> q <= a' using (use REAL_INV_LE_ANTIMONO THEN trivial). - follows trivially.’ -QED - -val _ = export_theory(); diff --git a/examples/lassie/examples/caseStudy4NaprochePowersetScript.sml b/examples/lassie/examples/caseStudy4NaprochePowersetScript.sml deleted file mode 100644 index 6ab7748cd7..0000000000 --- a/examples/lassie/examples/caseStudy4NaprochePowersetScript.sml +++ /dev/null @@ -1,59 +0,0 @@ -open BasicProvers Defn HolKernel Parse Conv SatisfySimps Tactic monadsyntax - boolTheory bossLib arithmeticTheory; - -open realTheory arithmeticTheory realLib RealArith; - -open LassieLib logicTacticsLib; - -val _ = new_theory "caseStudy4NaprochePowerset"; - -val _ = LassieLib.loadJargon "Logic"; - -Theorem cantor: - ∀ f:'a -> ('a -> bool). - ~ (∀ y:'a -> bool. ∃ x:'a. f x = y) -Proof - LassieLib.nltac ‘ - assume the contrary. simplify. - we know '∃ x. f x = λ x . ~ (x IN (f x))'. - simplify. - case split for 'x IN f x'. - simplify with [IN_DEF]. trivial.’ - (* CCONTR_TAC \\ fs[] - \\ first_x_assum (qspec_then `\ x. ~ (x IN (f x))` assume_tac) - \\ fs[] - \\ Cases_on `x IN f x` - >- ( - pop_assum mp_tac \\ fs[IN_DEF] \\ metis_tac[]) - >- (fs[IN_DEF] \\ metis_tac[]) *) -QED - -(* -Let M denote a set. -Let f denote a function. -Let the value of f at x stand for f[x]. -Let f is defined on M stand for Dom(f) = M. -Let the domain of f stand for Dom(f). *) - -(* Axiom. The value of f at any element of the domain of f is a set. *) -(** This axiom can be encoded by giving f the type :'a -> 'b set option **) - -(* Definition. -A subset of M is a set N such that every element of N is an element of M. *) - -(* Definition. -The powerset of M is the set of subsets of M. *) - -(* Definition. -f surjects onto M iff every element of M is equal to the value of f at some element of the domain of f. *) - -(* Proposition. -No function that is defined on M surjects onto the powerset of M. -Proof. -Assume the contrary. -Take a function f that is defined on M and surjects onto the powerset of M. -Define N = { x in M | x is not an element of f[x] }. -Then N is not equal to the value of f at any element of M. -Contradiction. qed. *) - -val _ = export_theory(); diff --git a/examples/lassie/examples/gaussScript.sml b/examples/lassie/examples/gaussScript.sml deleted file mode 100644 index 770a6caa9f..0000000000 --- a/examples/lassie/examples/gaussScript.sml +++ /dev/null @@ -1,35 +0,0 @@ -open BasicProvers Defn HolKernel Parse Tactic - arithmeticTheory boolLib boolSimps bossLib; -open LassieLib arithTacticsLib realTacticsLib logicTacticsLib; - -val _ = new_theory "gauss"; - -val _ = LassieLib.loadJargon "Arithmetic"; -val _ = LassieLib.loadJargon "Logic"; - -Definition sum_def: - sum (0:num) = 0 ∧ - sum n = n + sum (n-1) -End - -Theorem closed_form_sum: - ∀ n. - sum n = (n * (n + 1)) DIV 2 -Proof - nltac - ‘Induction on 'n'. - use [sum_def] to simplify. - use [sum_def, GSYM ADD_DIV_ADD_DIV] to simplify. - '2 * SUC n + n * (n + 1) = SUC n * (SUC n + 1)' - suffices to show the goal. - show 'SUC n * (SUC n + 1) = (SUC n + 1) + n * (SUC n + 1)' - using (simplify with [MULT_CLAUSES]). - simplify. - show 'n * (n + 1) = SUC n * n' - using (trivial using [MULT_CLAUSES, MULT_SYM]). - '2 * SUC n = SUC n + SUC n' follows trivially. - 'n * (SUC n + 1) = SUC n * n + n' follows trivially. - rewrite assumptions. simplify.’ -QED - -val _ = export_theory(); diff --git a/examples/lassie/examples/logicTacticsLib.sml b/examples/lassie/examples/logicTacticsLib.sml deleted file mode 100644 index 122502fa2a..0000000000 --- a/examples/lassie/examples/logicTacticsLib.sml +++ /dev/null @@ -1,63 +0,0 @@ -structure logicTacticsLib = -struct - - open LassieLib; - - local open realTheory in end; - val _ = - let - fun jargon () = - let - val _ = - map (uncurry def) [ - (* Case splitting *) - (‘split conjuncts’, ‘conj_tac THEN rpt conj_tac’), - (`case split`, `(split conjuncts) ORELSE (EQ_TAC ORELSE Cases)`), - (`case split for 's'`,`Cases_on 's'`), - (`perform a case split`,`case split`), - (`specialize for 'T'`,`first_x_assum qspec_then ' T ' assume_tac`), - (`assume the contrary`,`CCONTR_TAC`), - (* Automation a la textbook *) - (`trivial`,`metis_tac [ ]`), - (`trivial using [CONJ_COMM]`, `metis_tac [ CONJ_COMM ]`), - (`follows trivially`,`fs [ ]`), - (`follows from [ADD_COMM]`, `fs [ ADD_COMM ]`), - (* Simplification *) - (`simplify`, `fs [ ]`), - (`simplify with [CONJ_COMM]`, `fs [ CONJ_COMM ]`), - (`simplify conclusion`, `simp [ ]`), - (`simplify conclusion with [CONJ_COMM]`, `simp [ CONJ_COMM ]`), - (* lc aliases *) - (`try gen_tac`, `TRY gen_tac`), - (* `try solving with [CONJ_COMM]` [`TRY simp [CONJ_COMM]`]; *) - (* Textbook style tactics for existentials, modus ponens, ... *) - (`choose 'e'`, `qexists_tac ' e '`), - (`use transitivity for 'x'`, `irule REAL_LE_TRANS THEN qexists_tac ' x '`), - (`use REAL_LE_TRANS`, `irule REAL_LE_TRANS`), - (`resolve with REAL_NEG_INV`, `imp_res_tac REAL_NEG_INV`), - (`induction on 'n'`, `Induct_on ' n '`), - (* rewriting *) - (`rewrite once [REAL_INV_1OVER]`, `once_rewrite_tac [ REAL_INV_1OVER ]`), - (`rewrite once [<- REAL_INV_1OVER]`, `once_rewrite_tac [ GSYM REAL_INV_1OVER ]`), - (`rewrite with [REAL_INV_1OVER]`, `rewrite_tac [REAL_INV_1OVER]`), - (`rewrite with [<- REAL_INV_1OVER]`, `rewrite_tac [GSYM REAL_INV_1OVER]`), - (`rewrite assumptions`, `asm_rewrite_tac []`), - (`rewrite assumptions and [ADD_ASSOC] `, `asm_rewrite_tac [ADD_ASSOC]`), - (* subgoals *) - (`we show first 'T'`, `sg 'T'`), - (`we show next 'T'`, `we show first 'T'`), - (`we show 'T' using (gen_tac)`, `'T' by gen_tac`), - (`we know 'T'`, `'T' by (fs [ ])`), - (`thus 'T'`, `we know 'T'`), - (`'T' using (cheat)`, `'T' by (cheat)`), - (`showing 'T' closes the proof because (gen_tac)`, `'T' suffices_by (gen_tac)`), - (‘Case 'x'’, ‘Goal 'x'’), - (‘cheat then cheat’, ‘cheat THEN cheat’), - (‘Next Goal’, ‘Goal 1’) - ] - in () end; - in - LassieLib.registerJargon "Logic" (jargon) - end - -end; diff --git a/examples/lassie/examples/realTacticsLib.sml b/examples/lassie/examples/realTacticsLib.sml deleted file mode 100644 index 62d4cfacc7..0000000000 --- a/examples/lassie/examples/realTacticsLib.sml +++ /dev/null @@ -1,29 +0,0 @@ -structure realTacticsLib = -struct - - open LassieLib; - - local open realTheory RealArith in end; - val _ = - let - fun jargon () = - let val _ = LassieLib.addCustomTactic RealArith.REAL_ASM_ARITH_TAC "REAL_ASM_ARITH_TAC" - val _ = LassieLib.addCustomTactic DECIDE_TAC "DECIDE_TAC" - val rw_th = fn thm => once_rewrite_tac[thm]; - val _ = LassieLib.addCustomThmTactic rw_th "rw_th"; - val _ = - map (uncurry def) [ - (* intro tactics *) - (`introduce variables`, `rpt gen_tac`), - (`introduce assumptions`, `rpt strip_tac`), - (* Custom tactic *) - (`rewrite last assumption`, `pop_assum rw_th`), - (`rewrite ADD_ASSOC for 'n'`, `qspec_then 'n' rw_th ADD_ASSOC`), - (‘trivial’, ‘REAL_ASM_ARITH_TAC’), - (`we know 'T'`, `'T' by (REAL_ASM_ARITH_TAC ORELSE DECIDE_TAC)`) - ] - in () end - in - LassieLib.registerJargon "Reals" jargon - end; -end; diff --git a/examples/lassie/examples/tacticsCaseStudyLib.sml b/examples/lassie/examples/tacticsCaseStudyLib.sml deleted file mode 100644 index d8b3c70fe6..0000000000 --- a/examples/lassie/examples/tacticsCaseStudyLib.sml +++ /dev/null @@ -1,93 +0,0 @@ -structure tacticsCaseStudyLib = -struct - -(** Below we add some natural language tactics to Lassie first, then we showcase - how Lassie generalizes them using SEMPRE **) - -(* First, add some custom tactics, this is also how a hand crafted decision - procedure can be added *) -val _ = LassieLib.addCustomTactic "REAL_ASM_ARITH_TAC"; -val _ = LassieLib.addCustomTactic "impl_tac"; -val _ = LassieLib.addCustomTactic "cheat"; -val _ = LassieLib.addCustomTactic "EQ_TAC"; - -val _ = LassieLib.def `introduce variables` [`rpt gen_tac`]; -val _ = LassieLib.def `introduce variables and assumptions` [`rpt strip_tac`]; -val _ = LassieLib.def `case split for 's'` [`Cases_on 's'`]; -val _ = LassieLib.def `case split` [`(rpt conj_tac ORELSE EQ_TAC) ORELSE Cases`]; -val _ = LassieLib.def `trivial using [CONJ_COMM]` [`metis_tac [CONJ_COMM]`]; -val _ = LassieLib.def `simplify with [CONJ_COMM]` [`simp [CONJ_COMM]`]; -val _ = LassieLib.def `try solving with [CONJ_COMM]` [`TRY simp [CONJ_COMM]`]; -val _ = LassieLib.def `choose 'e'` [`qexists_tac 'e'`]; -val _ = LassieLib.def `use transitivity for 'x'` [`irule REAL_LE_TRANS THEN qexists_tac 'x'`]; -val _ = LassieLib.def `use REAL_LE_TRANS` [`irule REAL_LE_TRANS`]; -val _ = LassieLib.def `perform a case split` [`rpt conj_tac`]; -val _ = LassieLib.def `we show first 'T'` [`sg 'T'`]; -val _ = LassieLib.def `we show 'T' using (gen_tac)` [`'T' by (gen_tac)`]; -val _ = LassieLib.def `introduce assumptions` [`rpt strip_tac`]; -val _ = LassieLib.def `rewrite once [REAL_INV_1OVER]` [`once_rewrite_tac [REAL_INV_1OVER]`]; -val _ = LassieLib.def `rewrite once [<- REAL_INV_1OVER]` [`once_rewrite_tac [GSYM REAL_INV_1OVER]`]; -val _ = LassieLib.def `rewrite with [REAL_INV_1OVER]` [`rewrite_tac [REAL_INV_1OVER]`]; -val _ = LassieLib.def `rewrite with [<- REAL_INV_1OVER]` [`rewrite_tac [GSYM REAL_INV_1OVER]`]; -val _ = LassieLib.def `we show next 'T'` [`we show first 'T'`]; -val _ = LassieLib.def `'T' using (fs[])` [`'T' by ( fs[] )`]; -val _ = LassieLib.def `we know 'T'` [`'T' by (REAL_ASM_ARITH_TAC)`]; -val _ = LassieLib.def `thus 'T'` [`we know 'T'`]; -val _ = LassieLib.def `resolve with REAL_NEG_INV` [`imp_res_tac REAL_NEG_INV`]; -val _ = LassieLib.def `follows from [CONJ_COMM]` [`asm_rewrite_tac [CONJ_COMM] THEN fs[CONJ_COMM]`]; -val _ = LassieLib.def `gen_tac . gen_tac` [`gen_tac THEN gen_tac`]; -val _ = LassieLib.def `gen_tac .` [`gen_tac THEN all_tac`]; - -(* -local open LassieLib; -in -val _ = LassieLib.def "introduce variables" ["rpt gen_tac"]; -val _ = LassieLib.def "introduce assumptions" ["rpt strip_tac"]; -end; - -val _ = LassieLib.def "introduce variables and assumptions" ["introduce variables THEN introduce assumptions"]; -val _ = LassieLib.def "we show `T` using (gen_tac)" ["`T` by (gen_tac)"]; - -val _ = LassieLib.def "case split" ["rpt conj_tac ORELSE EQ_TAC"]; -val _ = LassieLib.def "case split for `s`" ["Cases_on `s`"]; - -val _ = LassieLib.nltac "case split for `t`."; -val _ = LassieLib.nltac "case split for `A and B`."; - -val _ = LassieLib.def "trivial using [CONJ_COMM]" ["metis_tac [CONJ_COMM]"]; - -val _ = LassieLib.nltac "trivial using [REAL_ADD_ASSOC]."; -val _ = LassieLib.nltac "trivial using [REAL_ADD_ASSOC, CONJ_COMM, REAL_LDISTRIB]."; - -(** The below tactics generalize for arbitrary list similarly *) -val _ = LassieLib.def "simplify with [MULT]" ["simp [MULT]"]; -val _ = LassieLib.def "solve with [MULT]" ["simp [MULT]"]; -val _ = LassieLib.def "try solve with [MULT]" ["TRY solve with [MULT]"]; - -(* Note that the above generalizes "try" for any! tactic *) -val _ = LassieLib.nltac "try simplify with [MULT]."; - -val _ = LassieLib.def "choose `e`" ["qexists_tac `e`"]; -val _ = LassieLib.def "use REAL_LE_TRANS" ["irule REAL_LE_TRANS"]; -val _ = LassieLib.def "perform a case split" ["rpt conj_tac"]; -val _ = LassieLib.def "we show first `T`" ["sg `T`"]; -val _ = LassieLib.def "use transitivity for `x`" ["irule REAL_LE_TRANS THEN qexists_tac `x`"]; - -val _ = LassieLib.def "rewrite once [REAL_INV_1OVER]" ["once_rewrite_tac [REAL_INV_1OVER]"]; -val _ = LassieLib.def "rewrite once [<- REAL_INV_1OVER]" ["once_rewrite_tac [GSYM REAL_INV_1OVER]"]; - -val _ = LassieLib.nltac "rewrite once [<- MULT]."; -(* FIXME: LassieLib.nltac "solve with [<- MULT]." *) - -val _ = LassieLib.def "rewrite with [REAL_INV_1OVER]" ["rewrite_tac [REAL_INV_1OVER]"]; -val _ = LassieLib.def "rewrite with [<- REAL_INV_1OVER]" ["rewrite_tac [GSYM REAL_INV_1OVER]"]; - -val _ = LassieLib.def "we show next `T`" ["we show first `T`"]; -val _ = LassieLib.def "`T` using (fs[])" ["`T` by (fs[])"]; -val _ = LassieLib.def "we know `T`" ["`T` by (REAL_ASM_ARITH_TAC)"]; -val _ = LassieLib.def "thus `T`" ["we know `T`"]; -val _ = LassieLib.def "resolve with REAL_NEG_INV" ["imp_res_tac REAL_NEG_INV"]; -val _ = LassieLib.def "follows from [CONJ_COMM]" ["asm_rewrite_tac [CONJ_COMM] THEN fs[CONJ_COMM]"]; -*) - -end diff --git a/examples/lassie/examples/tutorialScript.sml b/examples/lassie/examples/tutorialScript.sml deleted file mode 100644 index 53bd06f3d4..0000000000 --- a/examples/lassie/examples/tutorialScript.sml +++ /dev/null @@ -1,37 +0,0 @@ -open BasicProvers Defn HolKernel Parse Conv SatisfySimps Tactic boolTheory - bossLib arithmeticTheory; -open LassieLib arithTacticsLib logicTacticsLib; - -val _ = new_theory "tutorial"; - -val _ = loadJargon "Arithmetic"; -val _ = loadJargon "Logic"; - -Definition sum_def: - sum (n:num) = if n = 0 then 0 else sum (n-1) + n -End - -Definition sumEq_def: - sumEq (0:num) = 0 /\ - sumEq n = n + sumEq (n-1) -End - -Theorem closed_form_sum: - ! n. sumEq n = n * (n + 1) DIV 2 -Proof - nltac `Induction on 'n'.` - \\ nltac `simplify with [sumEq_def].` - \\ nltac - `simplify with [sumEq_def, GSYM ADD_DIV_ADD_DIV].` - \\ nltac `'2 * SUC n + n * (n + 1) = SUC n * (SUC n + 1)' - suffices to show the goal.` - \\ nltac - `show 'SUC n * (SUC n + 1) = (SUC n + 1) + n * (SUC n + 1)' - using (simplify with [MULT_CLAUSES]).` - \\ nltac `simplify.` - \\ nltac `show 'n * (n + 1) = SUC n * n' using (trivial using [MULT_CLAUSES, - MULT_SYM]).` - \\ nltac `rewrite assumptions. simplify.` -QED - -val _ = export_theory(); diff --git a/examples/lassie/regression/Holmakefile b/examples/lassie/regression/Holmakefile deleted file mode 100644 index 5f2fec6c4f..0000000000 --- a/examples/lassie/regression/Holmakefile +++ /dev/null @@ -1,8 +0,0 @@ -INCLUDES = ../src/ -CLINE_OPTIONS=-j1 -TACTIC_WORLD = ../sempre/classes/interactive/edu/stanford/nlp/sempre/interactive/lassie/TacticWorld.class - -all: $(DEFAULT_TARGETS) -.PHONY: all - -caseStudy1EuclidTheory.sml: $(TACTIC_WORLD) diff --git a/examples/lassie/regression/arithTacticsLib.sml b/examples/lassie/regression/arithTacticsLib.sml deleted file mode 100644 index a558493bbc..0000000000 --- a/examples/lassie/regression/arithTacticsLib.sml +++ /dev/null @@ -1,44 +0,0 @@ -structure arithTacticsLib = -struct - open LassieLib; - - local open arithmeticTheory in end; - fun fs_all g = let val thms = map (fn (a,th) => th) (DB.theorems "-") in fs thms g end; - val _ = - let - fun jargon () = - let - val _ = LassieLib.addCustomTactic fs_all "fs_all"; - val _ = - map (uncurry def) [ - (`simplify`, `fs [ ]`), - (`simplify with [ADD_ASSOC]`, `fs [ ADD_ASSOC ]`), - (`use [ADD_ASSOC] to simplify`, `fs [ ADD_ASSOC ]`), - (`follows from [ADD_ASSOC]`, `metis_tac [ ADD_ASSOC ]`), - (`rewrite [ADD_ASSOC]` ,`rw [ADD_ASSOC]`), - (`[ADD_ASSOC] solves the goal`, - `all_tac THEN ( fs [ ADD_ASSOC ] THEN NO_TAC) ORELSE (rw [ ADD_ASSOC ] THEN NO_TAC) ORELSE metis_tac [ ADD_ASSOC ]`), - (‘trivial’, ‘[] solves the goal’), - (`perform an induction on 't'`, `Induct_on ' t '`), - (`Induction on 't'`, `Induct_on ' t '`), - (`perform a case split`, `Cases`), - (`perform a case split for 't'`, `Cases_on ' t '`), - (`Complete Induction on 't'`, `completeInduct_on ' t '`), - (`suppose not`, `spose_not_then assume_tac`), - (`show 'T' using (gen_tac)` ,`' T ' by gen_tac`), - (‘show 'T' using [ CONJ_COMM ]’, ‘ ' T ' by ([ CONJ_COMM ] solves the goal)’), - (‘'T' follows trivially’, ‘show 'T' using (trivial)’), - (`we further know 'T'`, `' T ' by rw [ ]`), - (`we can derive 'T' from [ADD_ASSOC]`, `' T ' by rw [ ADD_ASSOC ]`), - (`thus ADD_ASSOC for 'n'`, `qspec_then ' n ' assume_tac ADD_ASSOC`), - (‘'T' suffices to show the goal’, ‘'T' suffices_by (fs[])’), - (`it suffices to show that the arguments are equal`, `AP_TERM_TAC`), - (`it suffices to show that the functions are equal`, `AP_THM_TAC`), - (‘cheat and cheat’, ‘cheat THEN cheat’) - ] - in () end; - in - LassieLib.registerJargon "Arithmetic" (jargon) - end - -end; diff --git a/examples/lassie/regression/caseStudy1EuclidScript.sml b/examples/lassie/regression/caseStudy1EuclidScript.sml deleted file mode 100644 index 82ef9cab4e..0000000000 --- a/examples/lassie/regression/caseStudy1EuclidScript.sml +++ /dev/null @@ -1,280 +0,0 @@ -(** Taken from the HOL4 distribution (original file: HOL4/examples/euclid.sml **) - -(*===========================================================================*) -(* Euclid's theorem: for every prime, there is another one that is larger. *) -(* This proof has been excerpted and adapted from John Harrison's proof of *) -(* a special case (n=4) of Fermat's Last Theorem. *) -(* *) -(*===========================================================================*) - -(*---------------------------------------------------------------------------*) -(* First, open required context: the theory of arithmetic. This theory is *) -(* automatically loaded when HOL starts, but the ML module arithmeticTheory *) -(* needs to be opened before the definitions and theorems of the theory are *) -(* available without supplying the "arithmeticTheory." prefix. *) -(*---------------------------------------------------------------------------*) - -open BasicProvers Defn HolKernel Parse Conv SatisfySimps Tactic monadsyntax - boolTheory bossLib arithmeticTheory; - -open LassieLib arithTacticsLib; - -val _ = new_theory "caseStudy1Euclid"; - -val _ = LassieLib.loadJargon "Arithmetic"; -(*---------------------------------------------------------------------------*) -(* Divisibility. *) -(*---------------------------------------------------------------------------*) - -set_fixity "divides" (Infix(NONASSOC, 450)); - -Definition divides_def: -(a divides b) = (? x. b = a * x) -End - - -(*---------------------------------------------------------------------------*) -(* Primality. *) -(*---------------------------------------------------------------------------*) - -Definition prime_def: - prime p = (p<>1 /\ !x . x divides p ==> (x=1) \/ (x=p)) -End - -(*---------------------------------------------------------------------------*) -(* A sequence of basic theorems about the "divides" relation. *) -(*---------------------------------------------------------------------------*) - -Theorem DIVIDES_0: - ! x . x divides 0 -Proof - LassieLib.nltac ` - [divides_def, MULT_CLAUSES] solves the goal.` - (* metis_tac [divides_def,MULT_CLAUSES] *) -QED - -Theorem DIVIDES_ZERO: - ! x . (0 divides x) = (x = 0) -Proof - LassieLib.nltac ` - [divides_def, MULT_CLAUSES] solves the goal.` - (* metis_tac [divides_def,MULT_CLAUSES] *) -QED - -Theorem DIVIDES_ONE: - ! x . (x divides 1) = (x = 1) -Proof - LassieLib.nltac - `[divides_def, MULT_CLAUSES, MULT_EQ_1] solves the goal.` - (* metis_tac [divides_def,MULT_CLAUSES,MULT_EQ_1] *) -QED - -Theorem DIVIDES_REFL: - ! x . x divides x -Proof - LassieLib.nltac ` - [divides_def, MULT_CLAUSES] solves the goal.` - (* metis_tac [divides_def,MULT_CLAUSES] *) -QED - -Theorem DIVIDES_TRANS: - ! a b c . a divides b /\ b divides c ==> a divides c -Proof - LassieLib.nltac ` - [divides_def, MULT_ASSOC] solves the goal.` - (* metis_tac [divides_def,MULT_ASSOC] *) -QED - -Theorem DIVIDES_ADD: - ! d a b . d divides a /\ d divides b ==> d divides (a + b) -Proof - LassieLib.nltac ` - [divides_def, LEFT_ADD_DISTRIB] solves the goal.` - (* metis_tac [divides_def, LEFT_ADD_DISTRIB] *) -QED - -Theorem DIVIDES_SUB: - !d a b . d divides a /\ d divides b ==> d divides (a - b) -Proof - LassieLib.nltac ` - [divides_def, LEFT_SUB_DISTRIB] solves the goal.` - (* metis_tac [divides_def,LEFT_SUB_DISTRIB] *) -QED - -Theorem DIVIDES_ADDL: - !d a b . d divides a /\ d divides (a + b) ==> d divides b -Proof - LassieLib.nltac ` - [ADD_SUB, ADD_SYM, DIVIDES_SUB] solves the goal.` - (* metis_tac [ADD_SUB,ADD_SYM,DIVIDES_SUB] *) -QED - -Theorem DIVIDES_LMUL: - !d a x . d divides a ==> d divides (x * a) -Proof - LassieLib.nltac ` - [divides_def, MULT_ASSOC, MULT_SYM] solves the goal.` - (* metis_tac [divides_def,MULT_ASSOC,MULT_SYM] *) -QED - -Theorem DIVIDES_RMUL: - !d a x . d divides a ==> d divides (a * x) -Proof - LassieLib.nltac ` - [MULT_SYM,DIVIDES_LMUL] solves the goal.` - (* metis_tac [MULT_SYM,DIVIDES_LMUL] *) -QED - -Theorem DIVIDES_LE: - !m n . m divides n ==> m <= n \/ (n = 0) -Proof - LassieLib.nltac ‘ - rewrite [divides_def]. - [] solves the goal.’ - (* rw [divides_def] >> rw[] *) -QED - -(*---------------------------------------------------------------------------*) -(* Various proofs of the same formula *) -(*---------------------------------------------------------------------------*) -val NOT_X_LE = save_thm ("NOT_X_LE", DECIDE ``! x. ~(x < x)``); - -Theorem DIVIDES_FACT: - !m n . 0 < m /\ m <= n ==> m divides (FACT n) -Proof - LassieLib.nltac ‘ - rewrite [LESS_EQ_EXISTS]. - perform an induction on 'p'. - [FACT, NOT_X_LE, num_CASES, DIVIDES_RMUL, DIVIDES_LMUL, DIVIDES_REFL, ADD_CLAUSES] - solves the goal.’ - (* - rw [LESS_EQ_EXISTS] - >> Induct_on `p` - >> metis_tac [FACT, DECIDE ``!x. ~(x < x)``, num_CASES, - DIVIDES_RMUL,DIVIDES_LMUL,DIVIDES_REFL,ADD_CLAUSES] *) -QED - -(*---------------------------------------------------------------------------*) -(* Zero and one are not prime, but two is. All primes are positive. *) -(*---------------------------------------------------------------------------*) - -Theorem NOT_PRIME_0: - ~prime 0 -Proof - LassieLib.nltac `rewrite [prime_def, DIVIDES_0].` - (* rw [prime_def,DIVIDES_0] *) -QED - -Theorem NOT_PRIME_1: - ~prime 1 -Proof - LassieLib.nltac `rewrite [prime_def].` - (* rw [prime_def] *) -QED - -val two_prime_eqs = curry save_thm "two_prime_eqs" (DECIDE ``~(2=1) /\ ~(2=0) /\ ((x <=2) = (x = 0) \/ (x = 1) \/ (x = 2))``); - -Theorem PRIME_2: - prime 2 -Proof - LassieLib.nltac ` - rewrite [prime_def]. - [DIVIDES_LE, DIVIDES_ZERO, two_prime_eqs] solves the goal.` - (* rw [prime_def] >> - metis_tac [DIVIDES_LE, DIVIDES_ZERO, - DECIDE``~(2=1) /\ ~(2=0) /\ (x<=2 = (x=0) \/ (x=1) \/ (x=2))``] *) -QED - -Theorem PRIME_POS: - !p . prime p ==> 0

> rw [NOT_PRIME_0] *) -QED - -(*---------------------------------------------------------------------------*) -(* Every number has a prime factor, except for 1. The proof proceeds by a *) -(* "complete" induction on n, and then considers cases on whether n is *) -(* prime or not. The first case (n is prime) is trivial. In the second case, *) -(* there must be an "x" that divides n, and x is not 1 or n. By DIVIDES_LE, *) -(* n=0 or x <= n. If n=0, then 2 is a prime that divides 0. On the other *) -(* hand, if x <= n, there are two cases: if x ?p . prime p /\ p divides n -Proof - LassieLib.nltac ‘ - Complete Induction on 'n'. - rewrite []. - perform a case split for 'prime n'. - Goal 1. [DIVIDES_REFL] solves the goal. End. - Goal 1. - show '? x. x divides n and x <> 1 and x <> n' using (follows from [prime_def]). - [LESS_OR_EQ, PRIME_2, DIVIDES_LE, DIVIDES_TRANS, DIVIDES_0] solves the goal. - End.’ - (* - completeInduct_on `n` - >> rw [] - >> Cases_on `prime n` >| - [metis_tac [DIVIDES_REFL], - `?x. x divides n /\ x<>1 /\ x<>n` by metis_tac[prime_def] >> - metis_tac [LESS_OR_EQ, PRIME_2, - DIVIDES_LE, DIVIDES_TRANS, DIVIDES_0]] *) -QED - -(*---------------------------------------------------------------------------*) -(* In the following proof, metis_tac automatically considers cases on *) -(* whether n is prime or not. *) -(*---------------------------------------------------------------------------*) - -Theorem PRIME_FACTOR: - !n . n<>1 ==> ?p . prime p /\ p divides n -Proof - LassieLib.nltac ‘ - Complete Induction on 'n'. - [DIVIDES_REFL,prime_def,LESS_OR_EQ, PRIME_2, - DIVIDES_LE, DIVIDES_TRANS, DIVIDES_0] solves the goal.’ - (* - completeInduct_on `n` >> - metis_tac [DIVIDES_REFL,prime_def,LESS_OR_EQ, PRIME_2, - DIVIDES_LE, DIVIDES_TRANS, DIVIDES_0] *) -QED - -(*---------------------------------------------------------------------------*) -(* Every number has a prime greater than it. *) -(* Proof. *) -(* Suppose not; then there's an n such that all p greater than n are not *) -(* prime. Consider FACT(n) + 1: it's not equal to 1, so there's a prime q *) -(* that divides it. q also divides FACT n because q is less-than-or-equal *) -(* to n. By DIVIDES_ADDL, this means that q=1. But then q is not prime, *) -(* which is a contradiction. *) -(*---------------------------------------------------------------------------*) - -val neq_zero = curry save_thm "neq_zero" (DECIDE ``~(x=0) = (0 < x)``); - -Theorem EUCLID: - !n . ?p . n < p /\ prime p -Proof - LassieLib.nltac‘ - suppose not. simplify. - we can derive 'FACT n + 1 <> 1' from [FACT_LESS, neq_zero]. - thus PRIME_FACTOR for 'FACT n + 1'. - we further know '?q. prime q and q divides (FACT n + 1)'. - show 'q <= n' using [NOT_LESS_EQUAL]. - show '0 < q' using [PRIME_POS] . - show 'q divides FACT n' using [DIVIDES_FACT]. - show 'q=1' using [DIVIDES_ADDL, DIVIDES_ONE]. - show 'prime 1' using (simplify). - [NOT_PRIME_1] solves the goal.’ - (* - spose_not_then strip_assume_tac - >> mp_tac (SPEC ``FACT n + 1`` PRIME_FACTOR) - >> rw [FACT_LESS, DECIDE ``~(x=0) = (0> metis_tac [DIVIDES_FACT, DIVIDES_ADDL, DIVIDES_ONE, - NOT_PRIME_1, NOT_LESS, PRIME_POS] *) -QED - -val _ = export_theory(); diff --git a/examples/lassie/sempre/.gitignore b/examples/lassie/sempre/.gitignore deleted file mode 100644 index 2335baeab2..0000000000 --- a/examples/lassie/sempre/.gitignore +++ /dev/null @@ -1,48 +0,0 @@ -lib -fig -sfig -refdb -virtuoso-opensource - -classes -libsempre -sempre.jar -module-classes.txt - -state -out -test-output - -.project -.classpath -.settings -.idea -semparse.iml -*~ -*.swp -*.bak -*.pyc -*.cache -*.DS_Store -java.hprof.txt - -# Don't put papers here -/papers - -# Symlinks -/c -/e -/t -/x -scr -rdf - -# interactive outputs -int-output* -int-backup -interactive/.ipynb_checkpoints -interactive/lassie.lexicon -interactive/sempre-out-socket.sml -# Community server logs -community-server/data -community-server/data-backup diff --git a/examples/lassie/sempre/DOCUMENTATION.md b/examples/lassie/sempre/DOCUMENTATION.md deleted file mode 100644 index 31bbffe15a..0000000000 --- a/examples/lassie/sempre/DOCUMENTATION.md +++ /dev/null @@ -1,1236 +0,0 @@ -# SEMPRE 2.0 documentation - -This document describes SEMPRE 2.0 in detail. See -[the tutorial](TUTORIAL.md) for a more engaging introduction. This document -assumes a modest understanding of how SEMPRE works. - -If you find any bugs, please report them or fix them (file a GitHub issue or -submit a pull request)! - -## Code organization - -SEMPRE is designed to be modular so that one can plug in various types of -parsers, logical forms, executors, etc. - -### Modules - -The SEMPRE code is broken up into a set of modules (see `Makefile` to see the -list). The code of the core module is in: - - src/edu/stanford/nlp/sempre - -The code for each of the other non-essential modules is in: - - src/edu/stanford/nlp/sempre/ - -- core: contains all the basic code (this is kept to a minimum) -- cache: allows us to run a cache server (implements a simple key-value store for things like SPARQL results) -- corenlp: contains the glue code that allows us to use Stanford CoreNLP -- freebase: needed to handle logical forms which are database queries (SPARQL, Freebase) - -### Java classes - -Here are the basic Java classes in `core`, organized by function: - -Logical forms: - -- `Formula` (`ValueFormula`, `SuperlativeFormula`, ...): logical form -- `Derivation`: contains `Formula` and the way it was constructed -- `SemanticFn` (`JoinFn`, `NumberFn`, ...): takes multiple `Derivation`s and combines them into one -- `Rule`: associated with a `SemanticFn` which specifies how to combine `Derivation`s -- `Grammar`: a set of `Rule`s - -Execution: - -- `Value` (`NameValue`, `ListValue`, ...): represents a denotation -- `Executor` (`JavaExecutor`, `freebase.SparqlExecutor`): takes a `Formula` and returns a value -- `ValueEvaluator` (`ExactValueEvaluator`, `freebase.FreebaseValueEvaluator`): evaluates how correct a denotation is (with respect to the correct answer) - -Parsing: - -- `Example`: specifies an utterance and a `ContextValue` -- `LanguageAnalyzer` (`SimpleLanguageAnalyzer`, `corenlp.CoreNLPAnalyzer`): does basic linguistic analysis on an `Example` -- `FeatureExtractor`: takes an `Example` and `Derivation` and extracts features for scoring -- `Params`: parameters of the semantic parser -- `Parser` (`BeamParser`, `FloatingParser`, `ReinforcementParser`): takes `Example`s and produces `Derivation`s, scoring them using `FeatureExtractor` and `Params` - -Learning: - -- `Dataset`: specifies a set of `Example`s -- `Learner`: takes a `Dataset` and produces parameters `Params` - -# Logical forms - -There are two types of logical forms, depending on the application: - -- Lambda DCS logical forms for querying databases (declarative); use `freebase.SparqlExecutor` -- Simple Java logical forms for doing everything else (procedural); use `JavaExecutor` - -The interpretation/denotation of a logical form depends on the `Executor` that -is being used (specified via the `-Builder.executor` option). `Executor`s -(i.e., classes that extend `Executor`) define how logical forms are mapped to -denotations. `JavaExecutor` treats the logical forms as simple Java -code and just runs it. `SparqlExecutor` treats the logical forms as lambda DCS -expressions, which are used to query a database (e.g., via SPARQL, and e.g., -Freebase). - -Logical forms are defined recursively. In the base case, we have primitive -formulas, which are either variables are values. In the recursive case, we -build up larger formulas out of smaller formulas using various composition -constructs. It is useful to think of the logical form as a tree, where the -leaves are the primitive formulas and each non-leaf subtree as corresponpding -to one composition operation. - -## Primitive logical forms - -### ValueFormula - -A `ValueFormula` is a formula that wraps a specific denotation `Value`. -The possible `Value`s, along with an example are as follows: - -- `BooleanValue`: standard boolean value. For example: - - (boolean true) - (boolean false) - -- `NumberValue`: standard floating point number with optional units. For example: - - (number 2) - (number 2.5) - (number 2.5 fb:en.meter) - -- `StringValue`: standard UTF-8 string. For example: - - (string hello) - (string "hello") - (string "hello world") - (string hello\ world) - - Note that the first two and the last two are identical. - -- `DateValue`: year-month-day representation of a date. For example: - - (date 2014 12 21) # December 21, 2014 - (date 2014 -1 -1) # 2014 - (date -1 12 21) # December 21 - (date -1 12 -1) # December - - In general: - - (date ) - -- `NameValue`: represents a predicate symbol (either representing an entity or relation in a knowledge base); - the symbol has an optional description. For example: - - fb:en.barack_obama - (name fb:en.barack_obama) - (name fb:en.barack_obama "Barack Obama") - - all refer to the same symbol. Note: the first one is only valid as a string - representation of a `Formula` (which represent a `Value`) — to be used - in logical forms. The second two are valid representations of a `Value` - — usually returned from `Executor`s. - -- `ListValue`: represents a list of values. For example: - - (list) - (list (number 1) (number 2) (string hello)) - - In general: - - (list ... ) - -- `TableValue`: represents a table (matrix) of values, where each column has a string. Here is an example table: - - (table (State Capital) ((name fb:en.california) (name fb:en.sacramento)) ((name fb:en.oregon) (name fb:en.salem))) - - In general: - - (table ( ... ) ( ... ) ...) - -There are some more arcane `Value`s (see `Values.java` for a list), but they -are not that important from the point of view of specifying a logical form. - -### VariableFormula - -Besides `ValueFormula`s, the other primitive formula is a `VariableFormula`, -which represent variables in lambda expressions. An example `VariableFormula` is: - - (var x) - -which simply represents a variable with the name `x`. On their own, these -formulas are rather uninteresting. However, they are integral subcomponents of -`LambdaFormula`s which are described later. - -## Compositional logical forms - -Now we describe the compositional logical forms, those that allow us to -construct larger logical forms from smaller ones. - -For executing simple Java programs (`JavaExecutor`), the only relevant -composition is `CallFormula`. The rest are for building lambda DCS documents -for execution using the `freebase.SparqlExecutor`. - -### CallFormula - -A `CallFormula` has the following form: - - (call ... ) - -The `` is a string specifying any Java function and the -`` entries are logical forms. The function is one of the following: - -- A static method: - - (call java.lang.Math.cos (number 0)) - -- An instance method (`arg-1` is used as `this`): - - (call .length (string hello)) # (number 5) - (call .indexOf (string "what is this?") (string is)) # (number 5) - -- A shortcut, which maps onto to a static method in `JavaExecutor.BasicFunctions`: - - (call < (number 3) (number 4)) # (boolean true) - (call + (number 3) (number 4)) # (number 7) - (call + (string 3) (string 4) (string 5)) # (string 345) - - Note that operator overloading is supported, and resolution is based on the - argument types. Conditionals are supported but both true and false branches - are evaluated: - - (call if (call < (number 3) (number 4)) (string yes) (string no)) # (string yes) - - We can perform iteration via functional programming. These list functions - take a `ListValue` and a `LambdaFormula` as arguments: - - (call map (list (number 3) (number 4)) (lambda x (call * (var x) (var x)))) # (list (number 9) (number 16)) - (call select (list (number 3) (number 4)) (lambda x (call < (var x) (number 3.5))))) # (list (number 3)) - (call reduce (list (number 3) (number 4)) (lambda x (lambda y (call + (var x) (var y)))))) # (number 7) - - To create a list of indices: - - (call range (number 0) (number 3)) # (list (number 0) (number 1) (number 2)) - - Note that the goal is not to support the ability to write arbitrarily complex - programs using this fairly verbose language. In practice, you would write - your own custom modules, and use these mechanisms to glue a small set of modules together. - -### JoinFormula - -Now we embark on our tour of lambda DCS logical forms. See the [lambda DCS -documentation](http://arxiv.org/pdf/1309.4408.pdf) for a more mathematical -description. - -A brief note about terminology in lambda DCS. Unaries and binaries are logical -forms which represent sets and (binary) relations: - -- Unary (e.g., `(fb:type.object.type fb:people.person)`): denotes a set of entities -- Binary (e.g., `fb:location.location.area`): denotes a set of entity-pairs (includes functions too). - We consider the first argument the *head* and the second argument the *modifier*. For example: - - fb:location.location.containedby denotes {(fb:en.san_francisco, fb:en.california), ...} - fb:people.person.place_of_birth denotes {(fb:en.barack_obama, fb:en.honolulu), ...} - !fb:people.person.place_of_birth denotes {(fb:en.honolulu, fb:en.barack_obama), ...} - - The notational convention is that `!` reverses the head and modifier. - - In general: - - denotes {(, ), ...} - -A `JoinFormula` combines a binary with a unary: - - ( ) - -This is a database join on the second argument of the binary and the unary, and -a projection onto the first argument of the binary. - -Here are some examples: - - (fb:location.location.containedby fb:en.california) - (!fb:people.person.place_of_birth fb:en.barack_obama) - (fb:people.person.place_of_birth (fb:location.location.containedby fb:en.california)) - -Sometimes, the binary is special, and the resulting logical form denotes an -infinite set: - - (!= fb:en.california) # denotes entities not equal to California - (> (number 5)) # denotes numbers greater than 5 - (STRSTARTS (string A)) # denotes strings starting with "A" - -Note: One might be tempted to think of `JoinFormula` as function application. -This is sometimes valid, but one has to be very careful about which is the head -and which is the modifier. When the binary is a `LambdaFormula`, then function -application is generally the right mental model. - -### MergeFormula - -A `MergeFormula` combines two formulas and represents either their set -intersection (and) or set union (or): - - (and ) - (or ) - -For example, *scientists born in Seattle*: - - (and (fb:people.person.profession fb:en.scientist) (fb:people.person.place_of_birth fb:en.seattle)) - -Here are *people who are born in Honolulu or Seattle*: - - (or (fb:people.person.place_of_birth fb:en.honolulu) (fb:people.person.place_of_birth fb:en.seattle)) - -### NotFormula - -A `NotFormula` takes a formula denoting a set and denotes the complement of that set: - - (not ) - -For example, *cities not in California*: - - (and (fb:type.object.type fb:location.citytown) (not (fb:location.location.containedby fb:en.california))) - -### AggregateFormula - -An `AggregateFormula` takes a formula representing a set and performs some operation. The general form is: - - ( ) - -where `` defines the type of aggregation to be performed while -`` denotes/defines the set that we are aggregating over. The possible -modes (with their corresponding semantics) are listed below: - -- `count`: Returns the cardinality of a set. -- `sum` : Returns the sum of the elements in the set. -- `avg` : Returns the average/mean of the elements in the set. -- `min` : Returns the minimum element of a set. -- `max` : Returns the maximum element of a set. - -Note that the latter four modes can only be applied to logical forms denoting sets of numbers. - -For example, *number of people born in Honolulu*: - - (count (fb:people.person.place_of_birth fb:en.honolulu)) # (number 570) - -Here is the *maximum height of any mountain* - - (max (!fb:geography.mountain.elevation (fb:type.object.type fb:geography.mountain))) # (number 8848 fb:en.meter) - -### ArithmeticFormula - -An `ArithmeticFormula` combines two logical forms denoting numbers and also -denotes a number. - -The form of an `ArithmeticFormula` is as follows: - - ( ) - -The `` entry is either `+`, `-`, `*`, or `/`, and the two arguments -are formulas that denote numeric values (including dates, although the support -there is a bit sketchy). Here are some examples: - - (+ (number 5) (number 3)) # (number 8) - (* (number -1) (number 3)) # (number -3) - -Here's how to compute the difference in height between two people: - - (- (!fb:people.person.height_meters fb:en.michael_jordan) (!fb:people.person.height_meters fb:en.barack_obama)) # (number 0.130 fb:en.meter) - -Note that we could not define these arithmetic formulas via `JoinFormula` -because they are ternary relations rather than binary ones. - -### ReverseFormula - -A `ReversalFormula` takes a binary logical form denoting a set of head-modifier -pairs and denotes the corresponding set of modifier-head pairs. - -Recall: - - fb:people.person.place_of_birth denotes {(fb:en.barack_obama, fb:en.honolulu), ...} - -The following is equivalent to `!fb:people.person.place_of_birth`: - - (reverse fb:people.person.place_of_birth) denotes {(fb:en.honolulu, fb:en.barack_obama), ...} - -But reversal can be applied to compositional binaries built using `LambdaFormula`: - - (reverse (lambda x (fb:people.person.places_lived (fb:people.place_lived.location (var x))))) - -You can check that the above is equivalent to: - - (lambda x (!fb:people.place_lived.location (!fb:people.person.places_lived (var x)))) - -In both logical forms, the location is in the head position and person is in -the modifier position. - -### LambdaFormula - -Now the serious stuff begins. Up until now, all the compositional logical -forms were unaries (which denote sets). Lambda abstraction allows us to -construct logical forms that denote binary, ternary, and -other higher-order relations. - -The general form includes a variable and body formula which uses the variable: - - (lambda ) - -If the body is a unary, then the resulting logical form is a binary, where the -unary represents the head and the variable represents the modifier. Let us see some examples. - -The first one is equivalent to `fb:people.person.place_of_birth`: - - (lambda x (fb:people.person.place_of_birth (var x))) - -This logical form denotes a binary relation where the head is the person and -the modifier is the location: - - (lambda x (fb:people.person.places_lived (fb:people.place_lived.location (var x)))) - -The more complex (but very useful) example represents a binary where the head -is a person and the modifier is the number of children the person has: - - (reverse (lambda x (count (!fb:people.person.children (var x))))) denotes {(fb:en.barack_obama, 2), ...} - -#### Macro substitution - -Another more syntactic view of `LambdaFormula`s is that they are just logical -forms with holes. In this case, a `JoinFormula` where the binary is a -`LambdaFormula` performs macro substitution. Here is an example of a join -of a `LambdaFormula` binary and an unary: - - ((lambda x (fb:people.person.places_lived (fb:people.place_lived.location (var x)))) fb:en.seattle) - -In the macro substitution view, the variable `x` becomes bound to `fb:en.seattle`, and the resulting logical form is: - - (fb:people.person.places_lived (fb:people.place_lived.location fb:en.seattle)) - -which is equivalent. This process is called beta-reduction. But one must -exercise care, since the macro substitution view and the original higher-order -relation view are different. Consider: - - ((lambda x (and (!fb:people.person.place_of_birth (var x)) (!fb:people.deceased_person.place_of_death (var x)))) (fb:type.object.type fb:people.person)) - -In the higher-order relation view, the binary relates locations (head) to -people (modifier). The resulting logical form denotes the set of locations -which are both the place of birth and place of death of some person. However, -if we do macro substitution, then we get the set of locations which are either -the place of birth of someone or the place of death of someone (possibly -different). - -Macro substitution is triggered by explicit beta reduction (see `JoinFn` with -`betaReduce` below), and this is typically during the intermediate steps of -constructing logical forms. - -We can construct logical forms that have more than one argument: - - (lambda binary (lambda unary ((var binary) (var unary)))) - -We can apply this ternary logical form to two arguments, - - (((lambda binary (lambda unary ((var binary) (var unary)))) fb:people.person.place_of_birth) fb:en.seattle) - -which after beta reduction results in the very mundane - - (fb:people.person.place_of_birth fb:en.seattle) - -In lambda DCS, all logical forms are either unaries or binaries. Ternaries and -beyond are exclusively used for macro substitution to construct logical forms -in a controlled compositional way. - -### SuperlativeFormula - -A `SuperlativeFormula` formula takes a unary denoting a set and a binary -denoting a relation between elements of that set (head) and a number -(modifier), and denotes a set representing extreme elements based on the -relation. - -Here is the general form: - - ( ) - -The different pieces are as follows: - -- `` is either `argmin` or `argmax` -- `` is an integer indicating the offset in the sorted list of the entities -- `` specifies the number of elements to return -- `` is the base set that we're drawing from -- `` specifies the relation by which we sort the elements - -Here is *the city with the largest area*: - - (argmax 1 1 (fb:type.object.type fb:location.citytown) fb:location.location.area) - -The *second largest city by area*: - - (argmax 2 1 (fb:type.object.type fb:location.citytown) fb:location.location.area) - -The *five largest cities by area*: - - (argmax 1 5 (fb:type.object.type fb:location.citytown) fb:location.location.area) - -The *person who has the most number of children* (who, in Freebase, turns out -to be Chulalongkorn with a whopping 62): - - (argmax 1 1 (fb:type.object.type fb:people.person) (reverse (lambda x (count (!fb:people.person.children (var x)))))) - -### MarkFormula - -In a way, `MarkFormula` essentially allows us to do anaphora. If we think of -simple lambda DCS (joins and merges) as producing tree-structured logical -forms, `MarkFormula` allows us to have non-tree edges between the root of a -subtree and a node in that subtree. - -Recall `LambdaFormula` creates a binary from a unary. A `MarkFormula` creates -a unary from a unary, which simply binds the body unary to the variable (which -appears in the body). - -The general form: - - (mark ) - -Here is *those whose place of birth is the same as their place of death*: - - (mark x (fb:people.person.place_of_birth (!fb:people.deceased_person.place_of_death (var x)))) - -Note that `x` binds to the head of `fb:people.person.place_of_birth`, -representing the person in question. This allows us to use this head again -deeper in the logical form. Here's a pictorial representation: - - x -- [fb:people.person.place_of_birth] --> ? - ? -- [!fb:people.deceased_person.place_of_death] --> x - -where `?` denotes an unnamed intermediate variable. - -# Semantic functions - -From the tutorial, recall that a grammar is a set of rules of the following -form: - - (rule ( ... ) ) - -Basically, these rules specify how derivations belonging to the source categories are combined -into derivations that correspond to the target category. -Semantic functions are the workhorses of these rules: -they take in a set of source derivations and apply transformations (defined via Java code) on these derivations -in order to produce a derivation that is a member of the target category. - -Since semantic functions are built from arbitrary Java code (any class extending `SemanticFn`), there is a great deal of flexibility, -and knowing how to properly use semantic functions is perhaps the most important step in working with SEMPRE. -This section defines the semantic functions that come pre-packaged with SEMPRE. -In all likelihood, these semantic functions will provide all the functionality that you need. - -Some of these semantic functions will rely on other packages/classes/options being used/set in order to function properly. -For example, many semantic functions require that the Stanford CoreNLP package is loaded (via the `-languageAnalyzer corenlp.CoreNLPAnalyzer` option); -this package contains many general purpose NLP algorithms that extract linguistic information (e.g., parts of speech) from utterances. -Dependences on packages such as CoreNLP will be noted when necessary. - -## Special categories: - -Some special categories that you should now in order to effectively write grammars and use semantic functions: - -- `$TOKEN`: selects a single atomic word from the utterance. -- `$PHRASE`: selects any subsequence of tokens/words from the utterance. -- `$LEMMA_TOKEN`: selects any atomic word from the utterance and lemmatizes the word. For example, "arriving" would become "arrive". -- `$LEMMA_PHRASE`: selects any subsequence of tokens/words and lemmatizes them. - -Both of the special categories that rely on lemmatization will only function -properly if the `corenlp.CoreNLPAnalyzer` is loaded. Otherwise, the -lemmatization will simply amount to making everything lowercase. - -There are two broad informal classes of semantic functions: primitive semantic -functions and compositional semantic functions. The primitive ones take -phrases in the natural language utterance, filters them, and produces some -derivation generally with a simple logical form. The compositional ones take -these simple derivations and combines them into larger derivations, -usually generating larger logical forms. - -## Primitive semantic functions - -Primitives are basic semantic functions that directly map spans (i.e., subsequences) of an utterance to logical forms. -Most grammars will rely on primitives as the "bottom" rules, i.e., the rules that have some span of the utterance as their right hand side (RHS) -and which will other rules build off of. - -### ConstantFn - -This is a basic primitive function which allows for you to hard-code basic rules, such as - - (rule $ROOT (barack obama) (ConstantFn fb:en.barack_obama fb:people.person)) - -which would parse the following utterance into its corresponding entity (i.e., logical form): - - barack obama # (name fb:en.barack_obama) - -Note the form that `ConstantFn` takes, i.e. `(ConstantFn formula type)`, and that the `type` argument is optional, meaning that - - (rule $ROOT (barack obama) (ConstantFn fb:en.barack_obama)) - -would also parse the above utterance. -However, note that if types are not explicitly added, the system relies on automatic type inference (see the section on Types below). - -Another example of `ConstantFn` would be the following: - - (rule $ROOT (born in) (ConstantFn fb:people.person.place_of_birth (-> fb:location.location fb:people.person)))) - -which would parse the phrase `born in` to a relation/binary logical form (as opposed to an entity) as follows: - - born in # (name fb:people.person.place_of_birth) - -Lastly, note that in both these cases the parsed logical form is prefixed with `name`. -This denotes that the logical form has a `NameValue` denotation, which simply means that it represents a logical predicate/entity -and not some other primitive (e.g., a `NumberValue` or `DateValue`). - -### SimpleLexiconFn (reliant on having a SimpleLexicon) - -This function allows you to map phrases/tokens to logical entities in a more scalable manner than hard-coding everything with `ConstantFn` directly in your grammar. -Suppose you have a `SimpleLexicon` loaded from a JSON file containing the entries: - - {'lexeme' : 'barack obama', 'formula' : 'fb:en.barack_obama', 'type' : 'fb:people.person'} - {'lexeme' : 'born in', 'formula' : 'fb:people.person.place_of_birth', 'type' : '(-> fb:location.location fb:people.person)'} - -then the rule - - (rule $ROOT ($PHRASE) (SimpleLexiconFn (type fb:people.person))) - -will parse the following: - - barack obama # (name fb:en.barack_obama) - -and the rule - - (rule $ROOT ($PHRASE) (SimpleLexiconFn (type (-> fb:location.location fb:people.person)))) - -will parse - - born in # (name fb:people.person.place_of_birth) - -Thus, the function of `SimpleLexiconFn` is similar to `ConstantFn`, but it facilitates more modularity -since the lexical items are contained within the `SimpleLexicon` (loaded from a JSON) instead of being hard-coded into the grammar. - -In the above examples, this type annotation is not particularly important, but consider the case where we have the following lexical entries: - - {"lexeme" : "Lincoln", "formula" : "fb:en.abraham_lincoln", "type" : "fb:people.person"} - {"lexeme" : "Lincoln", "formula" : "fb:en.lincoln_nevada", "type" : "fb:location.location"} - -Both of these lexical entries have the same trigger phrase (i.e., lexeme). -However, they correspond to very distinct entities (one is a former president, while the other is a city in Nevada). -By leveraging the type annotation, we can specify which entity we actually want to trigger. -For example, the rule - - (rule $ROOT ($PHRASE) (SimpleLexiconFn (type fb:location.location))) - -would ensure that we only trigger the entity that corresponds to the city. -Alternatively, if we wanted both entities to be triggered (e.g., if type-checking later on will handle the ambiguity), -then we could write: - - (rule $ROOT ($PHRASE) (SimpleLexiconFn (type fb:type.any))) - -### NumberFn (partially reliant on the CoreNLPAnalyzer) - -`NumberFn` is the basic primitive function for parsing numbers from an utterance. -For example, - - (rule $ROOT (NumberFn)) - -would parse the following strings into the following logical forms: - - - 2.3 # (number 2) - four # (number 4) - 3 million # (number 3000000) - -The last one works only if `-languageAnalyzer corenlp.CoreNLPAnalyzer` is set. - -### DateFn (reliant on the CoreNLPAnalyzer) - -`DateFn` is the basic primitive function for parsing date values. -For example, - - (rule $ROOT (DateFn)) - -would parse the following strings into the following logical forms: - - Thursday, December 4th # (date -1 12 4) - The 12th of Nov, 2012 # (date 2012 11 12) - August # (date -1 8 -1) - -Note that the logical representation of dates here (defined in the `DateValue` class) is distinct from Java's date (or JodaTime etc.) -and that only dates not times are represented. -The logical form representation of a `DateValue` is simply `(date year month day)`, with one-based indexing for the months. -The above examples also illustrate how missing aspects of dates are treated: -if any part of a date (day, month, or year) is left unspecified, then the `DateValue` logical representation inserts -1s in those positions. - -## Filtering semantic functions - -You might find that your grammar is generating far too many candidate -derivations, resulting in the correct derivations falling off the beam. After -all, the number of derivations does tend to grow exponentially with the -sentence length. - -The semantic functions described below help to ameliorate this issue by -allowing you to filter down the phrases that are considered. In other words, -they help control how many logical forms you generate by allowing you to -specify more precise situations in which rules should fire. - -This is particularly important when your lexicon (like our EMNLP 2013 system) -contains a lot of noisy entries. - -### FilterSpanLengthFn - -This semantic function allows you to filter out the length of spans that you trigger on. For example, - - (rule $Length2Span ($PHRASE) (FilterSpanLengthFn 2)) - -produces a new category `$Length2Span` which contains only phrases of length 2, -which is useful for limiting the number of phrases you compute on. -For instance, we could combine this with the simple lexicon function we used above and say - - (rule $ROOT ($Length2Span) (SimpleLexiconFn (type fb:type.any))) - -and this would parse - - barack obama # (name fb:en.barack_obama) - -but would make running the grammar faster than if we replaced `$Length2Span` with `$PHRASE`, since `$PHRASE` will try all possible lengths, -and we know that our lexicon only contains length 2 phrases. - -### FilterPosTagFn (relies on CoreNLPAnalyzer) - -`FilterPosTagFn` allows you to extract spans or tokens that are composed only of certain parts of speech (POS) tags. -For example, - - (rule $ProperNoun ($TOKEN) (FilterSpanPosTag token NNP NNPS)) - -produces a new category `$ProperNoun` which contains words that are proper nouns. -This would accept phrases like: - - honolulu - obama - -and assign them to the `$ProperNoun` category. - -Note the options that are passed: `token` specifies that we want to look at single tokens and `NNP NNPS` are the POS tags for proper nouns. -If you wanted to get multi-word proper nouns then you would use - - (rule $ProperNounPhrase ($PHRASE) (FilterSpanPosTag span NNP NNPS)) - -Note that we both changed the RHS category to `$PHRASE` and changed the `token` option to `span`. -This would accept things like - - honolulu hawaii - barack obama - -but note that the entire span must have the same POS tag (that is in one of the accepted categories). -Also, it will only accept the maximal span, meaning that `barack obama` will NOT be parsed three times as `barack`, `obama`, and `barack obama`. - -Note also that, unlike the previous examples for simpler semantic functions, the logical form is not written out next to the phrases. -This is because the above rule to does not rewrite to `$ROOT` (i.e., it does not have `$ROOT` as a LHS), -and thus, this rule would accept the example phrases above, but it does not parse them directly to a logical form. -Other rules which take `$ProperNounPhrase` as a RHS are necessary to complete a grammar with this rule. -Most of the filtering functions discussed below will also have this flavor. - -### FilterNerSpanFn (relies on CoreNLPAnalyzer) - -This `SemanticFn` allows you to extract spans that are named entities (NEs). -For example, - - (rule $Person ($Phrase) (FilterNerSpan PERSON)) - -produces a new category (`$Person`) which contains phrases that the CoreNLPAnalyzer labels as referring to people. -This would accept phrases like - - barack obama - michelle obama - -Note that unlike our earlier examples where we parsed these phrases corresponding to an entity/person, -the LHS side of our rule in this case is a new category `$Person`. -The idea is that you would use this new category to restrict what you apply rules on further up in the derivation. -For example, you may want to change our earlier `SimpleLexiconFn` example rule to - - (rule $ROOT ($Person) (SimpleLexiconFn (type fb:people.person))) - -since you know that this rule should be restricted to spans that may contain a mention of a person. - -### ConcatFn - -This function allows you to concatenate strings, giving you more fine-grained control over how categories are constructed over spans, -which is useful for doing things that are not possible with the other filter functions. -Concretely, say you wanted to look for spans of the utterance that may correspond to binaries, -you would probably want to look at verbs (e.g., "lived", "born"). -However, triggering only on verbs is too restrictive; for example, some binaries take their meaning from verb-preposition pairs. -In fact, the example of "born in" used previously is exactly such a phrase. -In order to handle this case, we could use `ConcatFn` with the following rules: - - (rule $Verb ($TOKEN) (FilterSpanPosTag token VB VBD VBN VBG VBP VBZ)) - (rule $Prep ($TOKEN) (FilterSpanPosTag token IN)) - (rule $VerbPrep ($Verb $Prep) (ConcatFn " ")) - -These rules would create a new category (`$VerbPrep`) that contains two word phrases consisting of a verb followed by a preposition. -One way we could leverage this category would be with a rule that uses `SimpleLexiconFn`, such as the following: - - (rule $Relation ($VerbPrep) (SimpleLexiconFn (type (-> fb:type.any fb:type.any)))) - -This rule basically says that when we look for binaries, we shouldn't look at any phrase, we should restrict to phrases consisting -of verbs followed by prepositions. -Of course, there are other grammatical constructions that could give rise to a binary, and you would need to add new rules/categories for these. -Nevertheless, using filtering to narrow down to the spans which trigger certain rules is a very powerful tool. - -## Compositional semantic functions - -Compositional semantic functions are used to create larger derivations from -smaller ones. - -It is important to distinguish these semantic functions, which also work -compositionally, from the definition of the recursive definition of logical -forms in the previous section. One semantic function could easily construct a -logical form that involved multiple logical compositions, for example, going -from *city* (`fb:location.citytown`) to *largest city* (`(argmax 1 1 -(fb:type.object.type fb:location.citytown) fb:location.location.area)`). - -### IdentityFn - -`IdentityFn` takes a logical form and returns the same logical form. - -This is the most basic composition function, useful for refactoring grammar categories. -For example, suppose you have the following rule that parses things into entities. - - (rule $Entity ...) - -You can then write: - - (rule $ROOT ($Entity) (IdentityFn)) - -### JoinFn - -`JoinFn` can be used in a number of different ways. - -#### Joining binaries and unaries - -The first way in which `JoinFn` combines a binary and a unary is as follows. -Recall that each binary (e.g., `fb:people.person.place_of_birth`) has a head -(also called arg0 or return) and modifier (also called arg1 and argument). - -Just for clarity, the concrete edge in the knowledge graph is: - - head -- [fb:people.person.place_of_birth] --> modifier - -and the type of `fb:people.person.place_of_birth` is - - (-> modifier_type head_type) - -The way to think about a binary is not a function that returns the places of -births of people, but rather as a relation connecting people (in the head -position) with locations (in the modifier position). - -When applying `JoinFn`, the unary can be joined with either `arg0` or `arg1`, -and either the binary can come before the unary in the sentence or vice-versa. -Thus, there are four ways in which a binary may be joined with a unary. -These four possibilities are defined via the following two options: - - - Ordering: either `binary,unary` or `unary,binary` - - Position: `unaryCanBeArg0` or `unaryCanBeArg1` or both - -To illustrate the two most common settings for these options, -suppose we have the following setup: - - (rule $Entity (barack obama) (ConstantFn fb:en.barack_obama)) - (rule $Entity (honolulu) (ConstantFn fb:en.honolulu)) - (rule $Binary (birthplace) (ConstantFn fb:people.person.place_of_birth)) - -then we can write - - (rule $ROOT ($Binary $Entity) (JoinFn binary,unary unaryCanBeArg1)) - -These rules will parse the following utterance as follows: - - birthplace honolulu # (fb:people.person.place_of_birth fb:en.honolulu) - -This corresponds to standard forward application of a binary, and indeed we can -equivalently write: - - (rule $ROOT ($Relation $Entity) (JoinFn forward)) - -Alternatively, we can alter the arguments and write - - (rule $ROOT ($Entity $Relation) (JoinFn unary,binary unaryCanBeArg1)) - -or equivalently - - (rule $ROOT ($Entity $Relation) (JoinFn backward)) - -and this will parse the following into the same logical form: - - honolulu birthplace # (fb:people.person.place_of_birth fb:en.honolulu) - -If we use - - (rule $ROOT ($Relation $Entity) (JoinFn binary,unary unaryCanBeArg0)) - -we can parse: - - birthplace barack obama # (!fb:people.person.place_of_birth fb:en.barack_obama) - -#### Macro substitution - -The second way to use `JoinFn` is as macro substitution. Simply add the -`betaReduce` flag. For example: - - (rule $LambdaRelation (birthplace) (ConstantFn (lambda x (!fb:people.person.place_of_birth (var x))))) - (rule $ROOT ($LambdaRelation $Entity) (JoinFn forward betaReduce)) - -This will parse: - - birthplace barack obama # (!fb:people.person.place_of_birth fb:en.barack_obama) - -#### Multi-argument macro substitution - -So far, `JoinFn` takes two arguments, but we can also specify one of the arguments: - - (rule $ROOT (birthplace $Entity) (JoinFn forward betaReduce (arg0 (lambda x (!fb:people.person.place_of_birth (var x)))))) - -This rule also parses *birthplace barack obama* in the expected way. An -equivalent and much clearer way to write this is the following, where we omit -`JoinFn` completely: - - (rule $ROOT (birthplace $Entity) (lambda x (!fb:people.person.place_of_birth (var x)))) - -Here is another example that takes multiple arguments and forms the set -containing two entities: - - (rule $ROOT ($Entity and $Entity) (lambda x (lambda y (or (var x) (var y))))) - -Going forward, you are encouraged to use this last form since it is most -transparent. In the future, it might even be backed by another `SemanticFn` -since `JoinFn` is getting a bit out of hand. - -### MergeFn - -`MergeFn` simply constructs a `MergeFormula`. For example, consider the -following rules: - - (rule $Set (female) (ConstantFn (fb:people.person.gender fb:en.female))) - (rule $Set (scientist) (ConstantFn (fb:people.person.profession fb:en.scientist))) - (rule $Set ($Set $Set) (MergeFn and)) - -We could then parse: - - female scientist # (and (fb:people.person.gender fb:en.female) (fb:people.person.profession fb:en.scientist)) - -We can also use the following rule - - (rule $Set ($Set or $Set) (MergeFn or)) - -to do set union: - - female or scientist # (or (fb:people.person.gender fb:en.female) (fb:people.person.profession fb:en.scientist)) - -### SelectFn - -`SelectFn` acts as a utility composition function that aids in refactoring grammars. -More specifically, it can be used to skip over certain categories or parts of an utterance (e.g., stop words) in a controlled manner. -For example, in a question-answering setup, you may have many utterances that -start with the word "what" (or "who" etc.), but suppose this word does not -really convey any semantic content (this is not quite true). -To handle this, - - (rule $Wh (what) (ConstantFn null)) - (rule $Wh (who) (ConstantFn null)) - (rule $ROOT ($Wh $Set) (SelectFn 1)) - -These rules allow us to simply ignore the presence of *what* or *who*. - -### Freebase-specific semantic functions - -There are two semantic functions, `freebase.LexiconFn` and `freebase.BridgeFn` -which are used in our first sematic parsing applications, but they probably -should be avoided unless you're specifically doing Freebase QA. Even in that -case, the main thing you should think about is: - - (rule $Entity ($PHRASE) (LexiconFn fbsearch)) - -which uses the Freebase Search API to look up entities. Be aware here that the -API will generously return many candidate entities for any string you give it, -so if you are getting too many results, you should use filtering to constrain -the number of spans you look at. - -### Context-specific semantic functions - -`FuzzyMatchFn` is used for matching words with a context-specific graph. -More documentation coming soon. - -#### ContextFn - -`ContextFn` generates logical forms from the context. -Concretely, an `Example` optionally has an associated `ContextValue`, which contains an ordered list of `n` `Exchange`s. -Each `Exchange` contains an utterance, a logical form (i.e., a `Formula`), and its denotation (i.e., a `Value`). - -A `ContextValue` thus represents information that is known prior to parsing the utterance in an `Example`. -For example, when running SEMPRE in interactive mode, the `ContextValue` -contains information about the last (`n`) utterance(s) that was/were parsed. -`ContextValue`s can also be provided in a dataset. - -`ContextFn` can be used to resolve anaphora. -For example, suppose you are running SEMPRE in interactive mode: - - User: where has barack obama lived - System: ((fb:people.person.places_lived fb:en.barack_obama) -The `ContextValue` of the interactive system now contains an `Exchange` corresponding to this utterance, logical form, and the computed denotation, -i.e. a list of cities including `fb:en.honolulu` and `fb:en.washington_dc`. -(Note that by default only one `Exchange`, specifically the most recent one, is stored in the system's `ContextValue`). - -Now, suppose that the you then say: `where was he born`. -Parsing this utterance requires resolving the anaphoric reference `he`, and `ContextFn` facilitates this. -Specifically, we would make the following rule: - - (rule $ContextEntity (he) (ContextFn (depth 0) (type fb:people.person))) - -This rule will trigger on the word `he` and retrieve the unary `fb:en.barack_obama` from the logical -formula in the `ContextValue` (since it has type `fb:people.person`). -The `depth` argument specifies how deep/tall of a logical form subtree you want to retrieve. -In the above example, a depth of 0 was specified since we wanted to retrieve a -single bare entity (i.e., a leaf). - -Alternatively, you could have said: `and which of those cities is the biggest?`. -Here, we would want to essentially "pull out" the entire logical form from the context. -Or more specifically, we want to pull out the join of the `fb:people.person.places_lived` binary and the `fb:en.barack_obama` entity. -To do this, we could make the following rule: - - (rule $ContextSetFromJoin (those cities) (ContextFn (depth 1) (type fb:places_lived))) - -This rule would trigger on the phrase `those cities` and retrieve the logical form `(fb:people.person.places_lived fb:en.barack_obama)`. -Note that compared to the previous example, we changed both the type and the depth arguments. -Here, a depth of 1 indicated that we want a subtree of depth 1 (i.e., a tree with one level before the leaves), -which corresponds, for example, to a logical form resulting from a single join. - -# Grammar - -So far, we have talked about grammars as being a set of rules. But SEMPRE -`Grammar`s have additional supporting functionality which make it easier to -use. A grammar is specified in a `.grammar` file and is loaded by the -command-line flag `-Grammar.inPaths`. - -## Binarization - -First, the `Grammar` performs automatic binarization of the grammar, so if you have a rule: - - (rule $ROOT ($Subject $Verb $Object) ...) - -This rule gets converted to a binarized grammar where each right-hand-side has at most two elements: - - (rule $ROOT ($Subject $Intermediate) ...) - (rule $Intermediate (Verb $Object) ...) - -The consequence is that the grammar you write will not be exactly the same as -the grammar that the parser receives. - -## Macros - -If you find yourself writing a lot of repeated elements: - - (rule (person) (ConstantFn (fb:type.object.type fb:people.person))) - (rule (place) (ConstantFn (fb:type.object.type fb:location.location))) - (rule (location) (ConstantFn (fb:type.object.type fb:location.location))) - -then you might want to use macros (which are all prefixed with `@`): - - (def @type fb:type.object.type) - (def @person (@type fb:people.person)) - (def @place (@type fb:location.location)) - -so that you can simply write the three rules as follows: - - (rule (person) (ConstantFn @person)) - (rule (place) (ConstantFn @location)) - (rule (location) (ConstantFn @location)) - -The general form for defining a macro: - - (def @ ) - -Note that macros cannot take arguments. - -## For loops - -If you are defining multiple rules, with small variations, then you can use for -loops to create many rules. For example: - - (for @x (place location) - (rule (@x) (ConstantFn (fb:type.object.type fb:location.location))) - ) - -which is equivalent to: - - (rule (place) (ConstantFn (fb:type.object.type fb:location.location))) - (rule (location) (ConstantFn (fb:type.object.type fb:location.location))) - -The general form is: - - (for @ ( ... ) - - ... - - ) - -## Conditionals - -Grammar files do not have full conditionals (the point is not to develop a -full-blown programming language here). But if you want a grammar which is -parametrized by complexity, then you can use conditionals to include or exclude -statements: - - (when compositional - (rule $Set ($Set $Set) (MergeFn and)) - ) - - (when (not compositional) - (rule $Set ($Entity $Entity) (MergeFn and)) - ) - -To enable the first rule (and disable the second), you would pass -`-Grammar.tags compositional` on the command-line. - -In general: - - (when - - ... - - ) - -where is one of the following - - - (not ) - (and ) - -## Includes - -You can use the statement: - - (include ) - -to break up your grammar file into multiple files. It is not advisable to use -many small files; rather use the `when` construction. - -# Types - -SEMPRE uses a type system to associate each logical form with a type, which is -used to rule out obviously bad logical forms (e.g., `(+ fb:en.barack_obama 3)` -or `(fb:location.location.containedby fb:en.barack_obama)`). - -Types are in a sense redundant with the grammar categories (we could very well -use `$Int` to capture only logical forms that compute integers. But there are -a few differences: - -- Categories could be based on how the natural language, whereas types are a - pure function of the logical forms (e.g., `$Preposition` and `$Verb` might - both generate logical forms that map to the type `(-> fb:type.any - fb:type.any)`. - -- The parser can handle categories efficiently (e.g., for coarse-to-fine - parsing); it is not necessary to compute the logical form. Types only exist - with logical forms. - -Each type is an instance of `SemType`. - -## Atomic types (`AtomicSemType`) - -All the **atomic types** are related via a partial ordering (the subtype -relation), which is easiest to think of as a tree structure: - - | fb:type.any - | | fb:type.boolean - | | fb:type.number - | | | fb:type.int - | | | fb:type.float - | | fb:type.datetime - | | fb:type.text - | | fb:common.topic - | | | fb:people.person - | | | fb:location.location - | | | ... - -Atomic types can be combined to create higher-order types: - -## Pair types (`FuncSemType`) - -A **pair type** looks like this: - - (-> ) - -and is generally used to represent a binary relation, such as -`fb:people.person.place_of_birth`: - - (-> fb:location.location fb:people.person) - -This type might seem a bit backwards, but it is important to remember that we -should think of binaries not as functions but as relations. - -In fact, the pair type looks like a function, but really behaves like a pair -type (this is more appropriate for database queries and makes it easier to -reason about). - -## Union types (`UnionSemType`) - -A **union type** represent a disjunction over many types: - - (union ... ) - -Note that `(union fb:type.int fb:type.float)` is a supertype of `fb:type.int`. - -## Special types - -- Top type (top): corresponds to the most general type (including all atomic - and function types). - -- Bottom type (bot): corresponds to type failure. - -## Other notes - -Some of these types are inherited from Freebase (e.g., `fb:type.float`), but -others are SEMPRE-specific and made in the spirit of the Freebase names. -Identifiers begin with `fb:` even if you are working with an application which -has little to do with Freebase, just for notational compatibility. - -Types are assigned to logical forms via type inference (class `TypeInference` -in SEMPRE), and are importantly not part of the logical form itself. This -gives us the flexibility of experimenting with different type systems without -actually changing the meaning. Type inference in general is computationally -expensive, so the default implementation of `TypeInference` sometimes just -punt. - -The main operation one can perform with two types is to compute their *meet*, -which is the most general type which is the superset of both types. For example: - - (-> fb:location.location fb:type.any) meet (-> top fb:people.person) - -is - - (-> fb:location.location fb:people.person) - -# Running SEMPRE - -The main entry point to SEMPRE is `run`. Run is based on the execrunner Ruby -library from fig, which provides a small domain-specific language for -generating command-line options. (See `fig/lib/execrunner.rb` for more -documentation). - - ./run @mode= ... - -The mode specifies the specific way you want to use SEMPRE. - -## Execution directories - -When you run something like `./run @mode=freebase ...`, an execution directory -will be created in - - state/execs/.exec, - -where is a unique identifier. This directory contains the stdout in the -`log` file, as well as the grammar and the parameters. - -## Web interface - -If you want to launch a demo, then you probably want to use the web interface, -which can be triggered by using the `-server` option. For example: - - ./run @mode=simple -interactive false -server true diff --git a/examples/lassie/sempre/LICENSE.txt b/examples/lassie/sempre/LICENSE.txt deleted file mode 100644 index 49f81325f3..0000000000 --- a/examples/lassie/sempre/LICENSE.txt +++ /dev/null @@ -1,12 +0,0 @@ -Copyright (c) 2013, Stanford University. - -Licensed under the Apache License, Version 2.0 (the "License"); you may not use -this file except in compliance with the License. You may obtain a copy of the -License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software distributed -under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -CONDITIONS OF ANY KIND, either express or implied. See the License for the -specific language governing permissions and limitations under the License. diff --git a/examples/lassie/sempre/README.md b/examples/lassie/sempre/README.md deleted file mode 100644 index 64dba683e4..0000000000 --- a/examples/lassie/sempre/README.md +++ /dev/null @@ -1,188 +0,0 @@ -# SEMPRE 2.4: Semantic Parsing with Execution - -## What is semantic parsing? - -A semantic parser maps natural language utterances into an intermediate logical -form, which is "executed" to produce a denotation that is useful for some task. - -A simple arithmetic task: - -- Utterance: *What is three plus four?* -- Logical form: `(+ 3 4)` -- Denotation: `7` - -A question answering task: - -- Utterance: *Where was Obama born?* -- Logical form: `(place_of_birth barack_obama)` -- Denotation: `Honolulu` - -A virtual travel agent task: - -- Utterance: *Show me flights to Montreal leaving tomorrow.* -- Logical form: `(and (type flight) (destination montreal) (departure_date 2014.12.09))` -- Denotation: `(list ...)` - -By parsing utterances into logical forms, we obtain a rich representation that -enables much deeper, context-aware understanding beyond the words. With the -rise of natural language interfaces, semantic parsers are becoming increasingly -more powerful and useful. - -## What is SEMPRE? - -SEMPRE is a toolkit that makes it easy to develop semantic parsers for new -tasks. The main paradigm is to learn a feature-rich discriminative semantic -parser from a set of utterance-denotation pairs. One can also quickly -prototype rule-based systems, learn from other forms of supervision, and -combine any of the above. - -If you use SEMPRE in your work, please cite: - - @inproceedings{berant2013freebase, - author = {J. Berant and A. Chou and R. Frostig and P. Liang}, - booktitle = {Empirical Methods in Natural Language Processing (EMNLP)}, - title = {Semantic Parsing on {F}reebase from Question-Answer Pairs}, - year = {2013}, - } - -SEMPRE has been used in the following papers: - -- J. Berant and A. Chou and R. Frostig and P. Liang. [Semantic parsing on - Freebase from question-answer - pairs](http://cs.stanford.edu/~pliang/papers/freebase-emnlp2013.pdf). EMNLP, - 2013. - This paper introduced SEMPRE 1.0, applied it to question answering on - Freebase, and created the WebQuestions dataset. The paper focuses on scaling - up semantic parsing via alignment and bridging, and does not talk about the - SEMPRE framework at all. To reproduce those results, check out SEMPRE 1.0. -- J. Berant and P. Liang. [Semantic Parsing via - Paraphrasing](http://cs.stanford.edu/~pliang/papers/paraphrasing-acl2014.pdf). - ACL, 2014. - This paper also used SEMPRE 1.0. The paraphrasing model is somewhat of a - offshoot, and does not use many of the core learning and parsing utiltiies in - SEMPRE. To reproduce those results, check out SEMPRE 1.0. - -Please refer to the [project page](https://nlp.stanford.edu/software/sempre/) for a more complete list. - -## Where do I go next? - -- If you're new to semantic parsing, you can learn more from the [background - reading section of the tutorial](TUTORIAL.md). -- Install SEMPRE using the instructions under **Installation** below. -- Walk through the [tutorial](TUTORIAL.md) - to get a hands-on introduction to semantic parsing through SEMPRE. -- Read the complete [documentation](DOCUMENTATION.md) - to learn about the different components in SEMPRE. - -# Installation - -## Requirements - -You must have the following already installed on your system. - -- Java 8 (not 7) -- Ant 1.8.2 -- Ruby 1.8.7 or 1.9 -- wget -- make (for compiling fig and Virtuoso) -- zip (for unzip downloaded dependencies) - -Other dependencies will be downloaded as you need them. SEMPRE has been tested -on Ubuntu Linux 12.04 and MacOS X. Your mileage will vary depending on how -similar your system is. - -## Easy setup - -1. Clone the GitHub repository: - - git clone https://github.com/percyliang/sempre - -2. Download the minimal core dependencies (all dependencies will be placed in `lib`): - - ./pull-dependencies core - -3. Compile the source code (this produces `libsempre/sempre-core.jar`): - - ant core - -4. Run an interactive shell: - - ./run @mode=simple - - You should be able to type the following into the shell and get the answer `(number 7)`: - - (execute (call + (number 3) (number 4))) - -To go further, check out the [tutorial](TUTORIAL.md) and then the [full -documentation](DOCUMENTATION.md). - -## Virtuoso graph database - -If you will be using natural language to query databases (e.g., Freebase), then -you will also need to setup your own Virtuoso database (unless someone already -has done this for you): - -For Ubuntu, follow this: - - sudo apt-get install -y automake gawk gperf libtool bison flex libssl-dev - - # Clone the repository - ./pull-dependencies virtuoso - - # Make and install - cd virtuoso-opensource - ./autogen.sh - ./configure --prefix=$PWD/install - make - make install - cd .. - -on OS/X you can install virtuoso using homebrew by following the instructions -[here](http://carsten.io/virtuoso-os-on-mac-os/) - -To have SEMPRE interact with Virtuoso, the required modules need to be compiled as follow: - - ./pull-dependencies core corenlp freebase - ant freebase - -# Contribute - -To contribute code or resource to SEMPRE: - -- Create a fork of the repository. If you already have a fork, - it is a good idea to sync with the upstream repository first. -- Push your changes to a new branch in your fork. -- Start a pull request: go to your branch on the GitHub website, - then click "New pull request". Please specify the `develop` branch - of the upstream repository. - -# ChangeLog - -Changes from SEMPRE 1.0 to SEMPRE 2.0: - -- Updated tutorial and documentation. -- Refactored into a core part for building semantic parsers in general; - interacting with Freebase and Stanford CoreNLP are just different modules. -- Removed fbalignment (EMNLP 2013) and paraphrase (ACL 2014) components to - avoid confusion. If you want to reproduce those systems, use SEMPRE 1.0. - -Changes from SEMPRE 2.0 to SEMPRE 2.1: - -- Added the `tables` package for the paper *Compositional semantic parsing on semi-structured tables* (ACL 2015). -- Add and `overnight` package for the paper *Building a semantic parser overnight* (ACL 2015). - -Changes from SEMPRE 2.1 to SEMPRE 2.2: - -- Added code for the paper *Inferring Logical Forms From Denotations* (ACL 2016). - -Changes from SEMPRE 2.2 to SEMPRE 2.3: - -- Added the `interactive` package for the paper *Naturalizing a programming language through interaction* (ACL 2017). - -Changes from SEMPRE 2.3 to SEMPRE 2.3.1: - -- Modified the `tables` module to resemble SEMPRE 2.1, effectively making it work again. - -Changes from SEMPRE 2.3.1 to SEMPRE 2.4: - -- Added the `cprune` package for the paper *Macro Grammars and Holistic Triggering for Efficient Semantic Parsing* (EMNLP 2017). diff --git a/examples/lassie/sempre/TUTORIAL.md b/examples/lassie/sempre/TUTORIAL.md deleted file mode 100644 index 75151e07c5..0000000000 --- a/examples/lassie/sempre/TUTORIAL.md +++ /dev/null @@ -1,855 +0,0 @@ -# SEMPRE 2.0 tutorial - -In this tutorial, we will provide a brief tour of SEMPRE. This tutorial is -very much about the mechanics of the system, not about the linguistics or -semantic parsing from a research point of view (for those, see the recommended -readings at the end of this document). Once you have gone through the tutorial, -you can read the [full documentation](DOCUMENTATION.md). - -We will construct a semantic parser to understand a toy subset of natural -language. Concretely, the system we will build will have the following -behavior: - -- Input: *What is three plus four?* -- Output: 7 - -Recall that in semantic parsing, *natural language utterances* are mapped into -*logical forms* (think programs), which are executed to produce some -*denotation* (think return value). - -We have assumed you have already [installed](README.md#installation) -SEMPRE and can open up a shell: - - ./run @mode=simple - -This will put you in an interactive prompt where you can develop a system and -parse utterances into tiny Java programs. Note: you might find it convenient -to use `rlwrap` to get readline support. - -Just to provide a bit of transparency: The `run` script simply creates a -shell command and executes it. To see which command is run, do: - - ./run @mode=simple -n - -This should print out: - - java -cp libsempre/*:lib/* -ea edu.stanford.nlp.sempre.Main -Main.interactive - -You can pass in additional options: - - ./run @mode=simple -Parser.verbose 3 # Turn on more verbose debugging for the parser - ./run @mode=simple -help # Shows all options and default values - -## Section 1: Logical forms and denotations - -A **logical form** (class `Formula` in SEMPRE) is a hierarchical expression. -In the base case, we have primitive logical forms for representing concrete -values (booleans, numbers, strings, dates, names, and lists): - - (boolean true) - (number 3) - (string "hello world") - (date 2014 12 8) - fb:en.barack_obama - (list (number 1) (number 2)) - -Logical forms can be constructed recursively using `call`, which takes a -function name followed by arguments, which are themselves logical forms: - - (call + (number 3) (number 4)) - (call java.lang.Math.cos (number 0)) - (call .indexOf (string "what is this?") (string is)) - (call .substring (string "what is this?") (number 5) (number 7)) - (call if (call < (number 3) (number 4)) (string yes) (string no)) - -In general: - - (call ... ) - -Later, we will see other ways (besides `call`) of building more complex logical -forms from simpler logical forms. - -Note that each logical form is painfully explicit about types. You would -probably not want to program directly in this language (baby Java using -LISP-like notation), but that is not the point; we will later generate these -logical forms automatically from natural language. - -We can execute a logical form by typing the following into the interactive -prompt: - - (execute (call + (number 3) (number 4))) - -In general: - - (execute ) - -This should print out `(number 7)`, which we refer to as the *denotation* -(class `Value` in SEMPRE) of the logical form. Try to execute the other -logical forms and see what you get. - -**Exercise 1.1**: write a logical form that computes the first word -("compositionality") in the string "compositionality is key". - -### Lambda expressions - -So far, we have been representing logical forms that produce a single output -value (e.g., "compositionality"). But one of the key ideas of having programs -(logical forms) is the power of abstraction — that we can represents -*functions* that compute an output value for each input value. - -For example, the following logical form denotes a function that takes a number -and returns its square: - - (lambda x (call * (var x) (var x))) - -If you execute this logical form directly, you will get an error, because the -denotation of this logical form is a function, which is not handled by the -`JavaExecutor`. However, we can apply this function to an argument `(number -3)`: - - ((lambda x (call * (var x) (var x))) (number 3)) - -This logical form now denotes a number. Executing this logical form should -yield `(number 9)`. - -In general: - - ( ) - -**Exercise 1.2**: Adapt your logical form form Exercise 1.1 to compute the -first word of any string. Your answer should be `(lambda x ...)`. Create a -logical form that applies this on the argument `(string "compositionality is -key")`. - -Technical note: these lambda expressions are actually just doing macro -substitution, not actually representing higher-order functions; since there are -no side effects here, there is no difference. - -This concludes the section on logical forms and denotations. We have presented -one system of logical forms, which are executed using `JavaExecutor`. The -system supports other types of logical forms, for example, those which encode -SPARQL database queries for question answering (we will get to that later). -Note that there is no mention of natural language yet... - -## Section 2: Parsing utterances to logical forms - -Having established the nature of logical forms and their denotations, let us -turn to the problem of mapping a natural language utterance into a logical -form. Again, the key framework is *compositionality*, which roughly says that -the meaning of a full sentence is created by combining the meanings of its -parts. For us, meanings are represented by logical forms. - -We will start by defining a **grammar** (class `Grammar` in SEMPRE), which is a -set of **rules** (class `Rule` in SEMPRE), which specify how to combine logical -forms to build more complex ones in a manner that is guided by the natural -language. - -We will run through some examples to give you a feel for how things work, -and then go into the details. First, let us add a rule to the grammar by -typing the following into the interactive prompt: - - (rule $ROOT (three) (ConstantFn (number 3))) - -Now to parse an utterance, just type it in to the interactive prompt: - - three - -The parser should print out (among other information) a line that shows that -the utterance was parsed successfully into a **derivation** (class `Derivation` -in SEMPRE), which importantly carries the correct logical form `(number 3.0)`: - - (derivation (formula (number 3.0)) (value (number 3.0)) (type fb:type.number)) - -Now type in the utterance: - - four - -You should get no results because no rule matches `four`. To fix that, let -us create a more general rule: - - (rule $ROOT ($PHRASE) (NumberFn)) - -This rule says for any phrase (sequence of consecutive tokens), pass it to a special -function called `NumberFn`, which will transform the phrase string into a new -derivation representing a number. - -Now, you can parse the following: - - four - 20 - -Note: if you now type in `three`, you should get two derivations that yield the -same answer, one coming from each rule. Note that `twenty-five million` will -not parse because we are using `SimpleLanguageAnalyzer`. Later, we can using -Stanford CoreNLP to improve the basic linguistic capabilities. - -So far, we have only parsed utterances using one rule, but the true power of -these grammars come from combining multiple rules. Copy and paste in the -following rules: - - (rule $Expr ($PHRASE) (NumberFn)) - (rule $Operator (plus) (ConstantFn (lambda y (lambda x (call + (var x) (var y)))))) - (rule $Operator (times) (ConstantFn (lambda y (lambda x (call * (var x) (var y)))))) - (rule $Partial ($Operator $Expr) (JoinFn forward)) - (rule $Expr ($Expr $Partial) (JoinFn backward)) - (rule $ROOT ((what optional) (is optional) $Expr (? optional)) (IdentityFn)) - -Now try typing in: - - What is three plus four? - -The output should be: - - (number 7) - -We can parse longer sentences. Type in: - - What is three plus four times two? - -There should be two derivations, yielding `(number 14)` and `(number 11)`, -corresponding to either combining *three plus four* first or *four times two* -first. Note that this is expected because we have not encoded any order of -operations anywhere. - -Hopefully that should give you a sense of what parsing looks like. Let us now -take a closer look. At the end of the day, a grammar declaratively specifies a -mapping from utterances to a set of candidate derivations. A **parser** (class -`Parser` in SEMPRE) is an actual algorithm that takes the grammar and generates -those derivations. Recall that a derivation looks like this: - - (derivation (formula (number 3.0)) (value (number 3.0)) (type fb:type.number)) - -Formally, each derivation produced by the parser has the following properties: - -1. Span i:j (e.g., 0:1): specifies the contiguous portion of the input - utterance (tokens i to j-1) that the Derivation is constructed from. -2. Category (e.g., `$ROOT`): categories place hard constraints on what - Derivations can be combined. -3. Type (e.g., `(fb:type.number)`): typically more fine-grained than the - category, and is generated dynamically. -4. Logical form (e.g., `(call + (number 3) (number 4))`): what we normally - think of as the output of semantic parsing. -5. Value (e.g., `(number 7)`): the result of executing the logical form. - -There are some special categories: - -1. `$TOKEN`: matches a single token of the utterance. Formally, the parser - builds a Derivation with category `$TOKEN` and logical form corresponding to - the token (e.g., `(string three)`) for each token in the utterance. -2. `$PHRASE`: matches any contiguous subsequence of tokens. The logical form created - is the concatenation of those tokens (e.g., `(string "twenty-five million")`). -3. `$LEMMA_TOKEN`: like `$TOKEN`, but the logical form produced is a lemmatized - version of the token (for example $TOKEN would yield *cows*, while - $LEMMA_TOKEN would yield *cow*). -4. `$LEMMA_PHRASE`: the lemmatized version of `$PHRASE`. -5. `$ROOT` Derivations that have category `$ROOT` and span the entire utterance - are executed, scored, and sent back to the user. - -Now let us see how a grammar specifies the set of derivations. -A grammar is a set of rules, and each rule has the following form: - - (rule ( ... ) ) - -1. Target category (e.g., `$ROOT`): any derivation produced by this rule is - labeled with this category. `$ROOT` is the designated top-level category. - Derivations of type `$ROOT` than span the entire utterance are returned to the - user. -2. Source sequence (e.g., `three`): in general, this is a sequence of tokens and categories - (all categories start with `$` by convention). Tokens (e.g., `three`) are - matched verbatim, and categories (e.g., `$PHRASE`) match any derivation that - is labeled with that category and has a span at that position. -3. Semantic function (`SemanticFn`): a semantic function takes a sequence of - derivations corresponding to the categories in the children and produces a set - of new derivations which are to be labeled with the target category. - Semantic functions run arbitrary Java code, and allow the parser to integrate - custom logic in a flexible modular way. In the example above, `ConstantFn` - is an example of a semantic function which always returns one derivation - with the given logical form (e.g., `(number 3)`). `JoinFn` produces a - derivation whose logical form is the composition of the logical forms of the - two source derivations. - -Derivations are built recursively: for each category and span, we construct a -set of Derivations. We can apply a rule if there is some segmentation of the -span into sub-spans $s_1, \dots, s_k$ and a derivation $d_i$ on each span $s_i$ -with category |source_i|. In this case, we pass the list of derivations as -input into the semantic function. The output is a set of derivations (possibly -zero). - -The first rule is a familiar one that just parses strings such as *three* -into the category `$Expr`: - - (rule $Expr ($PHRASE) (NumberFn)) - -Specifically, one derivation with logical form `(string three)` is created -with category `$PHRASE` and span 0:1. This derivation is passed into -`NumberFn`, which returns one derivation with logical form `(number 3)` and -category `$Expr` and span 0:1. The same goes for *four* on span 2:3. - -The next two rules map the tokens *plus* and *times* to a static logical form -(returned by `ConstantFn`): - - (rule $Operator (plus) (ConstantFn (lambda y (lambda x (call + (var x) (var y)))))) - (rule $Operator (times) (ConstantFn (lambda y (lambda x (call * (var x) (var y)))))) - -The next two rules are the main composition rules: - - (rule $Partial ($Operator $Expr) (JoinFn forward)) - (rule $Expr ($Expr $Partial) (JoinFn backward)) - -The semantic function `(JoinFn forward)` takes two a lambda term `$Operator` -and an argument `$Expr` and returns a new derivation by forward application: - - Source $Operator: (lambda y (lambda x (call + (var x) (var y)))) - Source $Expr: (number 4) - Target $Partial: (lambda x (call + (var x) (number 4))) - -The semantic function `(Join backward)` takes an argument `$Expr` and a lambda -term `$Partial` and returns a new derivation by backward application: - - Source $Expr: (number 3) - Source $Partial: (lambda x (call + (var x) (number 4))) - Target $Expr: (call + (number 3) (number 4)) - - (rule $ROOT ((what optional) (is optional) $Expr (? optional)) (IdentityFn)) - -We allow some RHS elements to be optional, so that we could have typed in -`three plus four` or `three plus four?`. `IdentityFn` simply takes the logical -form corresponding to `$Expr` and passes it up. - -The complete derivation for *three plus four* is illustrated here: - - $ROOT : (call + (number 3) (number 4))) - | [IdentityFn] - $Expr : (call + (number 3) (number 4))) - | [JoinFn backward] - +-------------------------+-------------------------+ - | | - | $Partial : (lambda x (call + (var x) (number 4))) - | | [JoinFn forward] - | +-----------------------------+------------------------------+ - | | | - $Expr : (number 3) $Operator : (lambda y (lambda x (call + (var x) (var y)))) $Expr : (number 4) - | [NumberFn] | [ConstantFn] | [NumberFn] - $PHRASE: three | $PHRASE : four - | [built-in] | | [built-in] - three plus four - - -**Exercise 2.1**: write rules that can parse the following utterances into -into the category `$Expr`: - - length of hello world # 11 - length of one # 3 - -Your rules should look something like: - - (rule $Function (length of) ...) - (rule $Expr ($Function $PHRASE) ...) - -**Exercise 2.2**: turn your "first word" program into a rule so that you can -parse the following utterances into `$String`: - - first word in compositionality is key # compositionality - first word in a b c d e # a - -**Exercise 2.3**: combine all the rules that you have written to produce one grammar -that can parse the following: - - two times length of hello world # 22 - length of hello world times two # (what happens here?) - -To summarize, we have shown how to connect natural language utterances and -logical forms using grammars, which specify how one can compositionally form -the logical form incrementally starting from the words in the utterance. Note -that we are dealing with grammars in the computer science sense, not in the -linguistic sense, as we are not developing a linguistic theory of -grammaticality; we are merely trying to parse some useful subset of utterances -for some task. Given an utterance, the grammar defines an entire set of -derivations, which reflect both the intrinsic ambiguity of language as well as -the imperfection of the grammar. In the next section, we will show how to -learn a semantic parser that can resolve these ambiguities. - -### Saving to a file (optional) - -You can put a set of grammar rules in a file (e.g., -`data/tutorial-arithmetic.grammar`) and load it: - - ./run @mode=simple -Grammar.inPaths data/tutorial-arithmetic.grammar - -If you edit the grammar, you can reload the grammar without exiting the -prompt by typing: - - (reload) - -### Using CoreNLP (optional) - -Recall that we were able to parse *four*, but not *twenty-five million*, -because we used the `SimpleLanguageAnalyzer`. In this section, we will show -how to leverage Stanford CoreNLP, which provides us with more sophisticated -linguistic processing on which we can build more advanced semantic parsers. - -First, we need to do download an additional dependency (this could take a while -to download because it loads all of the Stanford CoreNLP models for -part-of-speech tagging, named-entity recognition, syntactic dependency parsing, -etc.): - - ./pull-dependencies corenlp - -Compile it: - - ant corenlp - -Now we can load the SEMPRE interactive shell with `CoreNLPAnalyzer`: - - ./run @mode=simple -languageAnalyzer corenlp.CoreNLPAnalyzer -Grammar.inPaths data/tutorial-arithmetic.grammar - -The following utterances should work now (the initial utterance will take a few -seconds while CoreNLP models are being loaded): - - twenty-five million - twenty-five million plus forty-two - -## Section 3: Learning - -So far, we have used the grammar to generate a set of derivations given an -utterance. We could work really hard to make the grammar not overgenerate, but -this will in general be hard to do without tons of manual effort. So instead, we will -use machine learning to learn a model that can choose the best derivation (and -thus logical form) given this large set of candidates. So the philosophy is: - -- Grammar: small set of manual rules, defines the candidate derivations -- Learning: automatically learn to pick the correct derivation using features - -In a nutshell, the learning algorithm (class `Learner` in SEMPRE) uses -stochastic gradient descent to optimize the conditional log-likelihood of the -denotations given the utterances in a training set. Let us unpack this. - -### Components of learning - -First, for each derivation, we extract a set of **features** (formally a map -from strings to doubles — 0 or 1 for indicator features) using a feature -extractor (class `FeatureExtractor` in SEMPRE), which is an arbitrary function -on the derivation. Given a parameter vector theta (class `Params` in SEMPRE), -which is also a map from strings to doubles, the inner product gives us a -score: - - Score(x, d) = features(x, d) dot theta, - -where x is the utterance and d is a candidate derivation. - -Second, we define a **compatibility function** (class `ValueEvaluator` in SEMPRE) -between denotations, which returns a number between 0 and 1. This allows us -learn with approximate values (e.g., "3.5 meters" versus "3.6 meters") and -award partial credit. - -Third, we have a dataset (class `Dataset` in SEMPRE) consisting of **examples** -(class `Example` in SEMPRE), which specifies utterance-denotation pairs. -Datasets can be loaded from files; here is what -`data/tutorial-arithmetic.grammar` looks like: - - (example - (utterance "three and four") - (targetValue (number 7)) - ) - -Intuitively, the learning algorithm will tune the parameter vector theta so -that derivations with logical forms whose denotations have high compatibility -with the target denotation are assigned higher scores. For the mathematical -details, see the learning section of this -[paper](http://www.stanford.edu/~cgpotts/manuscripts/liang-potts-semantics.pdf). - -### No learning - -As a simple example, imagine that a priori, we do not know what the word *and* -means: it could be either plus or times. Let us add two rules to capture the -two possibilities (this is reflected in `data/tutorial-arithmetic.grammar`): - - (rule $Operator (and) (ConstantFn (lambda y (lambda x (call * (var x) (var y)))) (-> fb:type.number (-> fb:type.number fb:type.number)))) - (rule $Operator (and) (ConstantFn (lambda y (lambda x (call + (var x) (var y)))) (-> fb:type.number (-> fb:type.number fb:type.number)))) - -Start the interactive prompt: - - ./run @mode=simple -Grammar.inPaths data/tutorial-arithmetic.grammar - -and type in: - - three and four - -There should be two derivations each with probability 0.5 (the system arbitrarily chooses one): - - (derivation (formula (((lambda y (lambda x (call * (var x) (var y)))) (number 4.0)) (number 3.0))) (value (number 12.0)) (type fb:type.number)) [score=0, prob=0.500] - (derivation (formula (((lambda y (lambda x (call + (var x) (var y)))) (number 4.0)) (number 3.0))) (value (number 7.0)) (type fb:type.number)) [score=0, prob=0.500] - -### Batch learning - -To perform (batch) learning, we run SEMPRE: - - ./run @mode=simple -Grammar.inPaths data/tutorial-arithmetic.grammar -FeatureExtractor.featureDomains rule -Dataset.inPaths train:data/tutorial-arithmetic.examples -Learner.maxTrainIters 3 - -The `rule` feature domain tells the feature extractor to increment the feature -each time the grammar rule is applied in the derivation. `Dataset.inPaths` -specifies the examples file to train on, and `-Learner.maxTrainIters 3` -specifies that we will iterate over all the examples three times. - -Now type: - - three and four - -The correct derivation should now have much higher score and probability: - - (derivation (formula (((lambda y (lambda x (call + (var x) (var y)))) (number 4)) (number 3))) (value (number 7)) (type fb:type.any)) [score=18.664, prob=0.941] - (derivation (formula (((lambda y (lambda x (call * (var x) (var y)))) (number 4)) (number 3))) (value (number 12)) (type fb:type.any)) [score=15.898, prob=0.059] - -You will also see the features that are active for the predicted derivation. -For example, the following line represents the feature indicating that we -applied the rule mapping *and* to `+`: - - [ rule :: $Operator -> and (ConstantFn (lambda y (lambda x (call + (var x) (var y))))) ] 1.383 = 1 * 1.383 - -The feature value is 1, the feature weight is 1.383, and their product is the -additive contribution to the score of this derivation. You can look at the score of the other derivation: - - (select 1) - -The corresponding feature there is: - - [ rule :: $Operator -> and (ConstantFn (lambda y (lambda x (call * (var x) (var y))))) ] -1.383 = 1 * -1.383 - -This negative contribution to the score is why we favored the `+` derivation -over this `*` one. - -We can also inspect the parameters: - - (params) - -### Online learning - -Finally, you can also do (online) learning directly in the prompt: - - (accept 1) - -This will accept the `*` derivation as the correct one and update the -parameters on the fly. If you type: - - three and four - -again, you will see that the probability of the `+` derivation has decreased. -If you type `(accept 1)` a few more times, the `*` derivation will dominate -once more. - -## Section 4: Lambda DCS and SPARQL - -So far, we used `JavaExecutor` to map logical forms to denotations by executing -Java code. A major application of semantic parsing (and indeed the initial one -that gave birth to SEMPRE) is where the logical forms are database queries. In -this section, we will look at querying graph databases. - -A graph database (e.g., Freebase) stores information about entities -and their properties; concretely, it is just a set of triples $(s, p, o)$, -where $s$ and $o$ are entities and $p$ is a property. For example: - - fb:en.barack_obama fb:place_of_birth fb:en.honolulu - -is one triple. If we think of the entities as nodes in a directed graph, the -each triple is a directed edge between two nodes labeled with the property. - -See `freebase/data/tutorial.ttl` for an example of a tiny subset of the Freebase graph -pertaining to geography about California. - -First, pull the dependencies needed for Freebase: - - ./pull-dependencies freebase - -### Setting up your own Virtuoso graph database - -We use the graph database engine, Virtuoso, to store these triples and allow -querying. Follow these instructions if you want to create your own Virtuoso instance. - -First, make sure you have Virtuoso installed — see the Installation -section of the [readme](README.md). - -Then start the server: - - freebase/scripts/virtuoso start tutorial.vdb 3001 - -Add a small graph to the database: - - freebase/scripts/virtuoso add freebase/data/tutorial.ttl 3001 - -Now you can query the graph (this should print out three items): - - ./run @mode=query @sparqlserver=localhost:3001 -formula '(fb:location.location.containedby fb:en.california)' - -To stop the server: - - freebase/scripts/virtuoso stop 3001 - -### Setting up a copy of Freebase - -The best case is someone already installed Freebase for you and handed you a -host:port. Otherwise, to run your own copy of the entire Freebase graph (a -2013 snapshot), read on. - -Download it (this is really big and takes a LONG time): - - ./pull-dependencies fullfreebase-vdb - -Then you can start the server (make sure you have at least 60GB of memory): - - freebase/scripts/virtuoso start lib/fb_data/93.exec/vdb 3093 - -### Lambda DCS - -SPARQL is the standard language for querying graph databases, but it will be -convenient to use a language more tailored for semantic parsing. We will use -[lambda DCS](http://arxiv.org/pdf/1309.4408.pdf), which is based on a mix -between lambda calculus, description logic, and dependency-based compositional -semantics (DCS). - -We assume you have started the Virtuoso database: - - freebase/scripts/virtuoso start tutorial.vdb 3001 - -Then start up a prompt: - - ./run @mode=simple-freebase-nocache @sparqlserver=localhost:3001 - -The simplest logical formula in lambda DCS is a single entity such as `fb:en.california`. -To execute this query, simply type the following into the interactive prompt: - - (execute fb:en.california) - -This should return: - - (list (name fb:en.california California)) - -The result is a list containing the single entity. Here, `fb:en.california` is -the canonical Freebase ID (always beginning with the prefix `fb:`) and -`California` is the name (look at `data/tutorial.ttl` to see where this comes -from). - -Let us try a more complex query which will fetch all the cities (in the database); - - (execute (fb:type.object.type fb:location.citytown)) - -This should return the three cities, Seattle, San Francisco, and Los Angeles. -We can restrict to *cities in California*: - - (execute (and (fb:type.object.type fb:location.citytown) (fb:location.location.containedby fb:en.california))) - -This should return the two cities satisfying the restriction: San Francisco and Los Angeles. - -We can count the number of cities (should return 3): - - (execute (count (fb:type.object.type fb:location.citytown))) - -We can also get the city with the largest area: - - (execute (argmax 1 1 (fb:type.object.type fb:location.citytown) fb:location.location.area)) - -Now let us take a closer look at what is going on with these logical forms -under the hood. We are using a logical language called lambda DCS. - -Here are the following types of logical forms: - -1. Primitive (e.g., `fb:en.seattle`): denotes a set containing that single entity. -1. Intersection `(and |u1| |u2|)`: denotes the intersection of the sets denoted - by unary logical forms `u1` and `u2`. -1. Join `(|b| |u|)`: denotes the set of $x$ which are connected to some $y$ via - a binary $b$ and $y$ is in the set denoted by unary $u$. -1. Count `(count |u|)`: denotes the set containing the cardinality of the set denoted by `u`. -1. Superlative `(argmax |rank| |count| |u| |b|)`: sort the elements of `z` by decreasing `b` - and return `count` elements starting at offset `rank` (1-based). -1. Mu abstraction `(mark (var |v|) |u|)`: same as the unary |u| denoting - entities |x|, with the exception that |x| must be equal to all occurrences - of the variable |v| in |u|. -1. Lambda abstraction `(lambda (var |v|) |u|)`: produces a binary (x,y) where - `x` is in the set denoted by `u` and `y` is the value taken on by variable - `v`. - -See `src/edu/stanford/nlp/sempre/freebase/test/SparqlExecutorTest.java` for -many more examples (which only work on the full Freebase). - -**Exercise 4.1**: write lambda DCS logical forms for the following utterances: - - `city with the largest area` - - `top 5 cities by area` - - `countries whose capitals have area at least 500 squared kilometers` - - `states bordering Oregon and Washington` - - `second tallest mountain in France` - - `country with the most number of rivers` - -You should familiarize yourself with the [Freebase -schema](http://www.freebase.com/schema) to see which predicates to use. -Execute these on the full Freebase to find out the answer! - -### Parsing - -So far, we have described the denotations of logical forms for querying a graph -database. Now we focus on parsing natural language utterances into these -logical forms. - -The core challenge is at the lexical level: mapping natural language phrases -(e.g., *born in*) to logical predicates (e.g., -`fb:people.person.place_of_birth`). It is useful to distinguish between two -types of lexical items: - -- Entities (e.g., `fb:en.barack_obama`): There are generally a huge number of - entities (Freebase has tens of millions). Often, string matching gets you - part of the way there (for example, *Obama* to `fb.en:barack_obama`), but - there is often quite a bit of ambiguity (Obama is also a city in Japan). - -- Non-entities (e.g., `fb:people.person.place_of_birth`), which include unary - and binary predicates: There are fewer of these, but string matching is - unlikely to get you very far. - -We could always add grammar rules like this: - - (rule $Entity (the golden state) (ConstantFn fb:en.california)) - (rule $Entity (california) (ConstantFn fb:en.california)) - -but grammars are supposed to be small, so this approach does not scale, so we -are not going to do this. -One way is to create a **lexicon**, which is a mapping from words to predicates -(see `freebase/data/tutorial-freebase.lexicon`), with entries like this: - - {"lexeme": "california", "formula": "fb:en.california"} - {"lexeme": "the golden state", "formula": "fb:en.california"} - {"lexeme": "cities", "formula": "(fb:type.object.type fb:location.citytown)"} - {"lexeme": "towns", "formula": "(fb:type.object.type fb:location.citytown)"} - {"lexeme": "in", "formula": "fb:location.location.containedby"} - {"lexeme": "located in", "formula": "fb:location.location.containedby"} - -Then we can add the following rules (see -`freebase/data/tutorial-freebase.grammar`): - - (rule $Unary ($PHRASE) (SimpleLexiconFn (type fb:type.any))) - (rule $Binary ($PHRASE) (SimpleLexiconFn (type (-> fb:type.any fb:type.any)))) - (rule $Set ($Unary) (IdentityFn)) - (rule $Set ($Unary $Set) (MergeFn and)) - (rule $Set ($Binary $Set) (JoinFn forward)) - (rule $ROOT ($Set) (IdentityFn)) - -The `SimpleLexiconFn` looks up the phrase and returns all formulas that have the given type. To check the type, use: - - (type fb:en.california) # fb:common.topic - (type fb:location.location.containedby) # (-> fb:type.any fb:type.any) - -`MergeFn` takes the two (unary) logical forms |u| and |v| (in this case, coming from `$Unary` and `$Set`), -and forms the intersection logical form `(and |u| |v|)`. - -`JoinFn` takes two logical forms (one binary and one unary) and returns the -logical form `(|b| |v|)`. Note that before we were using `JoinFn` as function -application. In lambda DCS, `JoinFn` produces an actual logical form that -corresponds to joining |b| and |v|. The two bear striking similarities, which -is the basis for the overloading. - -Now start the interactive prompt: - - ./run @mode=simple-freebase-nocache @sparqlserver=localhost:3001 -Grammar.inPaths freebase/data/tutorial-freebase.grammar -SimpleLexicon.inPaths freebase/data/tutorial-freebase.lexicon - -We should be able to parse the following utterances: - - california - the golden state - cities in the golden state - towns located in california - -In general, how does one create grammars? One good strategy is to start with a -single rule mapping the entire utterance to the final logical form. Then -decompose the rule into parts. For example, you might start with: - - (rule $ROOT (cities in california) (ConstantFn (and (fb:type.object.type fb:location.citytown) (fb:location.location.containedby fb:en.california)))) - -Then you might factor it into two pieces, in order to generalize: - - (rule $ROOT (cities in $Entity) (lambda e (and (fb:type.object.type fb:location.citytown) (fb:location.location.containedby (var e))))) - (rule $Entity (california) (ConstantFn fb:en.california)) - -Note that in the first rule, we are writing `(lambda x ...)` directly. This -means, take the logical form for the source (`$Entity`) and substitute it in -for `x`. - -We can refactor the first rule: - - (rule $ROOT ($Unary in $Entity) (lambda u (lambda e (and (var u) (fb:location.location.containedby (var e)))))) - (rule $Unary (cities) (ConstantFn (fb:type.object.type fb:location.citytown))) - -and so on... - -**Exercise 4.2**: Write a grammar that can parse the utterances from Exercise -4.1 into a set of candidates containing the true logical form you annotated. -Of course you can trivially write one rule for each example, but try to -decompose the grammars as much as possible. This is what will permit -generalization. - -**Exercise 4.3**: Train a model so that the correct logical forms appear at the -top of the candidate list on the training examples. Remember to add features. - -## Debugging - -In the beginning, SEMPRE grammars can be difficult to debug. This is primarily -because everything is dynamic, which means that minor typos result in empty -results rather than errors. - -The first you should do is to check that you do not have typos. Then, try to -simplify your grammar as much as possible (comment things out) until you have -the smallest example that fails. Then you should turn on more debugging -output: - -Only derivations that reach `$ROOT` over the entire span of the sentence are -built. You can also turn on debugging to print out all intermediate -derivations so that you can see where something is failing: - - (set Parser.verbose 3) # or pass -Parser.verbose 3 on the command-line - -Often derivations fail because an intermediate combination does not type check. -This option will print out all combinations which are tried. You might find -that you are combining two logical forms in the wrong way: - - (set JoinFn.verbose 3) - (set JoinFn.showTypeCheckFailures true) - (set MergeFn.verbose 3) - (set MergeFn.showTypeCheckFailures true) - -## Appendix: Background reading - -So far this tutorial has provided a very operational view of semantic parsing -based on SEMPRE. The following references provide a broader look at the area -of semantic parsing as well as the linguistic and statistical foundations. - -* **Natural language semantics**: The question of how to represent natural - language utterances using logical forms has been well-studied in linguistics - under formal (or compositional) semantics. Start with the - [CS224U course notes from Stanford](http://www.stanford.edu/class/cs224u/readings/cl-semantics-new.pdf) - to get a brief taste of the various phenomena in natural language. - The [Bos/Blackburn book](http://www.let.rug.nl/bos/comsem/book1.html) - (also see this [related article](http://www.coli.uni-saarland.de/publikationen/softcopies/Blackburn:1997:RIN.pdf)) - gives more details on how parsing to logical forms works (without any - learning); Prolog code is given too. - -* **Log-linear models**: Our semantic parser is based on log-linear models, - which is a very important tool in machine learning and statistical natural - language processing. Start with [a tutorial by Michael - Collins](http://www.cs.columbia.edu/~mcollins/loglinear.pdf), which is geared - towards applications in NLP. - -* **Semantic parsing**: finally, putting the linguistic insights from formal - semantics and the computational and statistical tools from machine learning, - we get semantic parsing. There has been a lot of work on semantic parsing, - we will not attempt to list fully here. Check out the [ACL 2013 tutorial by - Yoav Artzi and Luke - Zettlemoyer](http://yoavartzi.com/pub/afz-tutorial.acl.2013.pdf), which - focuses on how to build semantic parsers using Combinatory Categorical - Grammar (CCG). Our [EMNLP 2013 - paper](http://cs.stanford.edu/~pliang/papers/freebase-emnlp2013.pdf) is the - first paper based on SEMPRE. This [Annual Reviews - paper](http://www.stanford.edu/~cgpotts/manuscripts/liang-potts-semantics.pdf) - provides a tutorial of how to learn a simple model of compositional semantics - ([Python code](https://github.com/cgpotts/annualreview-complearning) is - available) along with a discussion of compositionality and generalization. diff --git a/examples/lassie/sempre/build.xml b/examples/lassie/sempre/build.xml deleted file mode 100644 index c6a7cc524f..0000000000 --- a/examples/lassie/sempre/build.xml +++ /dev/null @@ -1,130 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/examples/lassie/sempre/interactive/README.md b/examples/lassie/sempre/interactive/README.md deleted file mode 100644 index 4a343be237..0000000000 --- a/examples/lassie/sempre/interactive/README.md +++ /dev/null @@ -1,115 +0,0 @@ -# README - -This `interactive` package is the code for our paper -*Naturalizing a programming language through interaction* (ACL 2017). -A live demo is at [www.voxelurn.com](http://www.voxelurn.com). - -voxelurn is a language interface to a voxel world. -This server handles commands used to learn from definitions, and other interactive queries. -In this setting, the system begin with the dependency-based action language (`dal.grammar`), and gradually expand the language through interacting with it users. - -## Overview of the components - -### sempre.interactive - -The `edu.stanford.nlp.sempre.interactive` package live in this repo contains code for -* running interactive commands (such as query, accept, reject, definition) -* executor for the dependency-based action (DAL) language -* voxelurn specific code in `edu.stanford.nlp.sempre.interactive.voxelurn` for actually generating the voxel and manipulating them -Utilties and resources such as the grammar and run script are in this directory, and the code in in the regular `sempre/src` directory. - -### voxelurn client - -It queries the server, and renders the voxels to a browser. A live version is at [www.voxelurn.com](http://www.voxelurn.com), which queries our server. You can also find a client for localhost at [http://local.voxelurn.com](http://local.voxelurn.com), which is the same client, but with queries going to `http://localhost:8410` instead. -Code for the client at `https://github.com/sidaw/shrdlurn`. See its [README.md](https://github.com/sidaw/shrdlurn/blob/master/README.md) if you want to work with and build the client yourself. - - -### voxelurn community server -Located at `interactive/community-server`, the community server -handles other functionalities such as logging client actions, leaderboard, submiting structures, authentication etc. and generally functions not related to parsing. This server is needed for running interactive experiments, but is not required just for trying out voxelurn. - - -## Running the SEMPRE server for Voxelurn - -0. Setup SEMPRE dependencies and compile - - ./pull-dependencies core - ant interactive - -1. Start the server - - ./interactive/run @mode=voxelurn -server -interactive - - things in the core language such as `add red left`, `repeat 3 [select left]` should work. - -2. Feed the server existing definitions, which should take less than 2 minutes. - - ./interactive/run @mode=simulator @server=local @sandbox=none @task=freebuilddef -maxQueries 2496 - - try `add dancer` now. - -### Interacting with the server - -After you run the above, there are 3 ways to interact and try your own commands. - -* The visual way is to use the client: [http://local.voxelurn.com](http://local.voxelurn.com). - Code for the client is at `https://github.com/sidaw/shrdlurn` (see its [README.md](https://github.com/sidaw/shrdlurn/blob/master/README.md)). - Try `[add dancer; front 5] 3 times`. - -* Hit `Ctrl-D` on the terminal running the server, and type `add red top`, or `add green monster` - -* On a browser, type `http://localhost:8410/sempre?q=(:q add green monster)` - - -## Experiments in ACL2017 - -1. Start the server - - ./interactive/run @mode=voxelurn -server -interactive - -2. Feed the server all the query logs - - ./interactive/run @mode=simulator @server=local @sandbox=none @task=freebuild -maxQueries 103874 - - This currently takes just under 30 minutes. Decrease maxQuery for a quicker experiment. This generate `plotInfo.json` in `./state/execs/${lastExec}.exec/` where `lastExec` is `cat ./state/lastExec`. - -3. Taking `../state/execs/${lastExec}.exec/plotInfo.json` as input, we can analyze the data and produce some plots using the following ipython notebook - - ipython notebook interactive/analyze_data.ipynb - - which prints out basic statistics and generates the plots used in our paper. The plots are saved at `../state/execs/${lastExec}.exec/` - - -## Misc. - -There are some unit tests - - ./interactive/run @mode=test - -To specify a specific test class and verbosity - - ./interactive/run @mode=test @class=DALExecutorTest -verbose 5 - -Clean up or backup data - - ./interactive/run @mode=backup # save previous data logs - ./interactive/run @mode=trash # deletes previous data logs - -Data, in .gz can be found in queries. - -* `./interactive/queries/freebuild.def.json.gz` -has 2495 definitions combining just over 10k utterances. -* `./interactive/queries/freebuild.json.gz` has 103873 queries made during the main experiment. - -## Voxelurn community server (optional and in development) - -This server helps with client side logging, leaderboard, authentication etc. basically anything that is not directly related to parsing. -This component is only required if you want to run the interactive experiment yourself. It is fairly coupled with the [voxelurn client](http://github.com/sidaw/shrdlurn), which sends the request to this server. - - cd interactive/community-server - python install-deps.py - python server.py --port 8403 - - # required keys for authentication - export SEMPRE_JWT_SECRET=ANY_RANDOM_SEQEUNCE - export SLACK_OAUTH_SECRET=OAUTH_KEY_FROM_SLACK diff --git a/examples/lassie/sempre/interactive/lassie.db b/examples/lassie/sempre/interactive/lassie.db deleted file mode 100644 index f0e36b5ac1..0000000000 --- a/examples/lassie/sempre/interactive/lassie.db +++ /dev/null @@ -1,752 +0,0 @@ -############################################################################ -# Domain knowledge is written here in a mix of whitespace- and # -# comma-separated values. For each line, the first token is a HOL # -# component and the second is the attribute we want to specify. What # -# follows is a comma-separated list of values which characterize this # -# attribute of that component. In the list (and nowhere else) can tokens # -# contain whitespaces. This allows SML types and expressions of more than # -# one word to be represented naturally. # -# # -# The `type` attribute is the only one required for each component and # -# gives rise to litteral lexemes to be parsed into their fitting type in # -# the grammar; e.g. `asm_rewrite_tac` is given type `thm list -> tactic` # -# which produces a lexeme # -# # -# lexeme: `asm_rewrite_tac` # -# formula: `asm_rewrite_tac` # -# type: `thmlist->tactic` # -# # -# Calling SimpleLexiconFn on (type thmlist->tactic) captures all # -# instances of asm_rewrite_tac (and other functions of the same type) into # -# a category of choice. Note that internally, those types have their # -# whitespaces removed and parentheses turned into square brackets so type # -# # -# term quotation list -> (thm -> tactic) -> thm -> tactic # -# # -# is actually fetched from SimpleLexiconFn with # -# # -# (type termquotationlist->[thm->tactic]->thm->tactic) # -# # -# into a category potentially called # -# # -# $termquotationlist->[thm->tactic]->thm->tactic # -############################################################################ - - -######################### -# HOL4 Tactics - Types: # -######################### - -# rewriting -asm_rewrite_tac type thm list -> tactic -rewrite_tac type thm list -> tactic -once_rewrite_tac type thm list -> tactic -once_asm_rewrite_tac type thm list -> tactic - -# simplification -simp type thm list -> tactic -fs type thm list -> tactic -rfs type thm list -> tactic -rw type thm list -> tactic - -# thm tactics -imp_res_tac type thm -> tactic -assume_tac type thm -> tactic -irule type thm -> tactic -drule type thm -> tactic -match_mp_tac type thm -> tactic -mp_tac type thm -> tactic - -# automation tactics -res_tac type tactic -metis_tac type thm list -> tactic -MESON_TAC type thm list -> tactic - -# pattern based tactics -qpat_x_assum type term quotation -> (thm -> tactic) -> tactic -qpat_assum type term quotation -> (thm -> tactic) -> tactic - -# tacticals -first_x_assum type (thm -> tactic) -> tactic -first_assum type (thm -> tactic) -> tactic -last_x_assum type (thm -> tactic) -> tactic -last_assum type (thm -> tactic) -> tactic -rpt type tactic -> tactic -TRY type tactic -> tactic - -# other -Cases type tactic -Induct type tactic -cheat type tactic -gen_tac type tactic -strip_tac type tactic -conj_tac type tactic -all_tac type tactic -NO_TAC type tactic -EQ_TAC type tactic -CCONTR_TAC type tactic -AP_TERM_TAC type tactic -AP_THM_TAC type tactic -Induct_on type term quotation -> tactic -Cases_on type term quotation -> tactic -completeInduct_on type term quotation -> tactic -qexists_tac type term quotation -> tactic -sg type term quotation -> tactic -subgoal type term quotation -> tactic -qspec_then type term quotation -> (thm -> tactic) -> thm -> tactic -qspecl_then type term quotation list -> (thm -> tactic) -> thm -> tactic -pop_assum type (thm -> tactic) -> tactic -spose_not_then type (thm -> tactic) -> tactic -rename type term quotation list -> tactic -ntac type int -> tactic -> tactic -by type term quotation * tactic -> tactic -suffices_by type term quotation * tactic -> tactic - -################### -# Tactic chaining # -################### -THEN type tactic -> tactic -> tactic -ORELSE type tactic -> tactic -> tactic - - -######################## -# HOL4 - Rules : Types # -######################## -GSYM type thm -> thm -EVAL_RULE type thm -> thm -Once type thm -> thm - -############################ -# Interactive mode : Types # -############################ -b type command - -################################# -# HOL4 Tactics - Name features: # -################################# -# rewriting -#rewrite_tac VP rewrite -#rewrite_tac OBJ goal -#rewrite_tac PREARG with -# -#asm_rewrite_tac VP rewrite -#asm_rewrite_tac OBJ goal -#asm_rewrite_tac CP with assumptions -#asm_rewrite_tac PREARG with -# -#once_rewrite_tac VP rewrite -#once_rewrite_tac OBJ goal -#once_rewrite_tac CP once -#once_rewrite_tac PREARG with -# -#once_asm_rewrite_tac VP rewrite -#once_asm_rewrite_tac OBJ goal -#once_asm_rewrite_tac CP once -#once_asm_rewrite_tac CP with assumptions -#once_asm_rewrite_tac PREARG with -# -## simplification -#simp VP simplify -#simp OBJ goal -#simp CP with assumptions -#simp PREARG with -# -#fs AV full, fully -#fs VP simplify -#fs OBJ goal, all of goal, assumptions -#fs CP with assumptions -#fs PREARG with -# -#rfs AV reverse, full, fully -#rfs VP simplify -#rfs OBJ goal, all of goal, assumptions -#rfs CP with assumptions, in reverse order -#rfs PREARG with -# -## thm tactics -#imp_res_tac VP enrich assumptions, resolve theorem -#imp_res_tac PREARG with -# -#res_tac VP enrich, resolve -#res_tac OBJ assumptions -#res_tac CP together, with eachother -# -#assume_tac VP assume, have, add -#assume_tac OBJ assumption -#assume_tac CP to goal, to assumptions -# -#irule VP reduce -#irule OBJ goal -#irule CP with matching -#irule PREARG with, with implication -# -#drule name resolve with -# -#match_mp_tac name modus ponens -#match_mp_tac VP reduce -#match_mp_tac OBJ goal -#match_mp_tac CP with matching, without normalization, without normalizing -#match_mp_tac PREARG with, with implication -# -## automation tactics -#res_tac VP enrich, resolve -#res_tac OBJ assumptions -#res_tac CP together, with eachother -# -#MESON_TAC name meson -#MESON_TAC AV auto, automatic -#MESON_TAC VP proof search, search -#MESON_TAC OBJ proof -#MESON_TAC PREARG with -# -#metis_tac name metis -#metis_tac AV auto, automatic, ordered, fancy -#metis_tac VP proof search, search -#metis_tac OBJ proof -#metis_tac PREARG with -# -## pattern based tactics -#qpat_assum VP find, use -#qpat_assum OBJ assumption, matching assumption -#qpat_assum PREARG with pattern -# -#qpat_x_assum VP spend, find, use -#qpat_x_assum OBJ assumption, matching assumption -#qpat_x_assum CP and remove it -#qpat_x_assum PREARG with pattern -# -## tacticals -#first_x_assum VP map, spend, use -#first_x_assum OBJ assumption, matching assumption -#first_x_assum CP over assumptions -# -#last_x_assum AV reverse -#last_x_assum VP map, spend, use -#last_x_assum OBJ assumption, matching assumption, last matching assumption -#last_x_assum CP over assumptions -# -#ntac VP apply -#ntac CP times -# -#rpt AV infinitely, indefinitely -#rpt VP repeat, keep applying -# -## other -#subgoal VP produce, add, show -#subgoal OBJ subgoal -#subgoal CP as subgoal -# -#by name by, shown by -#suffices_by name suffices to show -# -#Cases name case analysis, case, cases -#Cases VP split -#Cases OBJ goal -# -#Induct AV by -#Induct VP induct, induction, do induction -# -#Induct_on AV by -#Induct_on VP induct, induction, do induction -#Induct_on OBJ on variable -#Induct_on PREARG on -# -#strip_tac name intros -#strip_tac VP remove, strip -#strip_tac OBJ quantifier -# -#rename name alpha conversion -#rename VP rename, change -#rename OBJ variable names, free variables -#rename CP in goal -#rename PREARG into, to, with -# -#qexists_tac name exists, exists_tac -#qexists_tac VP reduce, instantiate, use witness, have witness -#qexists_tac OBJ existential -#qexists_tac PREARG with -# -#qspec_then name specialize for -#qspecl_then name specialize for all -# -######################### -## HOL4 - Rules : Types # -######################### -#GSYM name symmetry, gsym -#EVAL_RULE name eval_rule, evaluate -#Once name only once, once - -####################################### -# HOL4 - Real number theorems : Types # -####################################### -SUP_LEMMA3 type thm -SUP_LEMMA2 type thm -SUP_LEMMA1 type thm -SUP_EPSILON type thm -SUM_ZERO type thm -SUM_TWO type thm -SUM_SUBST type thm -SUM_SUB type thm -SUM_REINDEX type thm -SUM_POS_GEN type thm -SUM_POS type thm -SUM_PERMUTE_0 type thm -SUM_OFFSET type thm -SUM_NSUB type thm -SUM_NEG type thm -SUM_LE type thm -sum_ind type thm -SUM_GROUP type thm -SUM_EQ type thm -SUM_DIFF type thm -sum_compute type thm -SUM_CMUL type thm -SUM_CANCEL type thm -SUM_BOUND type thm -SUM_ADD type thm -SUM_ABS_LE type thm -SUM_ABS type thm -SUM_2 type thm -SUM_1 type thm -SUM_0 type thm -sum type thm -SETOK_LE_LT type thm -REAL_THIRDS_BETWEEN type thm -REAL_SUP_UBOUND_LE type thm -REAL_SUP_UBOUND type thm -REAL_SUP_SOMEPOS type thm -REAL_SUP_MAX type thm -REAL_SUP_LE type thm -REAL_SUP_EXISTS_UNIQUE type thm -REAL_SUP_EXISTS type thm -REAL_SUP_CONST type thm -REAL_SUP_ALLPOS type thm -REAL_SUP type thm -REAL_SUMSQ type thm -REAL_SUB_TRIANGLE type thm -REAL_SUB_SUB2 type thm -REAL_SUB_SUB type thm -REAL_SUB_RZERO type thm -REAL_SUB_RNEG type thm -REAL_SUB_REFL type thm -REAL_SUB_RDISTRIB type thm -REAL_SUB_RAT type thm -REAL_SUB_NEG2 type thm -REAL_SUB_LZERO type thm -REAL_SUB_LT type thm -REAL_SUB_LNEG type thm -REAL_SUB_LE type thm -REAL_SUB_LDISTRIB type thm -REAL_SUB_INV2 type thm -REAL_SUB_ADD2 type thm -REAL_SUB_ADD type thm -REAL_SUB_ABS type thm -REAL_SUB_0 type thm -REAL_SUB type thm -REAL_RNEG_UNIQ type thm -REAL_RINV_UNIQ type thm -REAL_RDISTRIB type thm -REAL_POW_POW type thm -REAL_POW_MONO_LT type thm -REAL_POW_LT2 type thm -REAL_POW_LT type thm -REAL_POW_INV type thm -REAL_POW_DIV type thm -REAL_POW_ADD type thm -REAL_POW2_ABS type thm -REAL_POS_POS type thm -REAL_POS_NZ type thm -REAL_POS_MONO type thm -REAL_POS_LE_ZERO type thm -REAL_POS_INFLATE type thm -REAL_POS_ID type thm -REAL_POS_EQ_ZERO type thm -REAL_POS type thm -REAL_POASQ type thm -REAL_OVER1 type thm -REAL_OF_NUM_SUC type thm -REAL_OF_NUM_POW type thm -REAL_OF_NUM_MUL type thm -REAL_OF_NUM_LE type thm -REAL_OF_NUM_EQ type thm -REAL_OF_NUM_ADD type thm -REAL_NZ_IMP_LT type thm -REAL_NOT_LT type thm -REAL_NOT_LE type thm -REAL_NEGNEG type thm -REAL_NEG_THIRD type thm -REAL_NEG_SUB type thm -REAL_NEG_RMUL type thm -REAL_NEG_NEG type thm -REAL_NEG_MUL2 type thm -REAL_NEG_MINUS1 type thm -REAL_NEG_LT0 type thm -REAL_NEG_LMUL type thm -REAL_NEG_LE0 type thm -REAL_NEG_INV type thm -REAL_NEG_HALF type thm -REAL_NEG_GT0 type thm -REAL_NEG_GE0 type thm -REAL_NEG_EQ0 type thm -REAL_NEG_EQ type thm -REAL_NEG_ADD type thm -REAL_NEG_0 type thm -REAL_MUL_SYM type thm -REAL_MUL_SUB2_CANCEL type thm -REAL_MUL_SUB1_CANCEL type thm -REAL_MUL_RZERO type thm -REAL_MUL_RNEG type thm -REAL_MUL_RINV type thm -REAL_MUL_RID type thm -REAL_MUL_LZERO type thm -REAL_MUL_LNEG type thm -REAL_MUL_LINV type thm -REAL_MUL_LID type thm -REAL_MUL_COMM type thm -REAL_MUL_ASSOC type thm -REAL_MUL type thm -REAL_MIN_SUB type thm -REAL_MIN_REFL type thm -REAL_MIN_MAX type thm -REAL_MIN_LT type thm -REAL_MIN_LE_LIN type thm -REAL_MIN_LE2 type thm -REAL_MIN_LE1 type thm -REAL_MIN_LE type thm -REAL_MIN_ALT type thm -REAL_MIN_ADD type thm -REAL_MIN_ACI type thm -REAL_MIDDLE2 type thm -REAL_MIDDLE1 type thm -REAL_MEAN type thm -REAL_MAX_SUB type thm -REAL_MAX_REFL type thm -REAL_MAX_MIN type thm -REAL_MAX_LT type thm -REAL_MAX_LE type thm -REAL_MAX_ALT type thm -REAL_MAX_ADD type thm -REAL_MAX_ACI type thm -REAL_LTE_TRANS type thm -REAL_LTE_TOTAL type thm -REAL_LTE_ANTSYM type thm -REAL_LTE_ANTISYM type thm -REAL_LTE_ADD2 type thm -REAL_LTE_ADD type thm -REAL_LT_TRANS type thm -REAL_LT_TOTAL type thm -REAL_LT_SUB_RADD type thm -REAL_LT_SUB_LADD type thm -REAL_LT_RMUL_IMP type thm -REAL_LT_RMUL_0 type thm -REAL_LT_RMUL type thm -REAL_LT_REFL type thm -REAL_LT_RDIV_EQ type thm -REAL_LT_RDIV_0 type thm -REAL_LT_RDIV type thm -REAL_LT_RADD type thm -REAL_LT_NZ type thm -REAL_LT_NEGTOTAL type thm -REAL_LT_NEG type thm -REAL_LT_MULTIPLE type thm -REAL_LT_MUL2 type thm -REAL_LT_MUL type thm -REAL_LT_MIN type thm -REAL_LT_MAX type thm -REAL_LT_LMUL_IMP type thm -REAL_LT_LMUL_0 type thm -REAL_LT_LMUL type thm -REAL_LT_LE type thm -REAL_LT_LDIV_EQ type thm -REAL_LT_LADD type thm -REAL_LT_INV_EQ type thm -REAL_LT_INV type thm -REAL_LT_IMP_NE type thm -REAL_LT_IMP_LE type thm -REAL_LT_IADD type thm -REAL_LT_HALF2 type thm -REAL_LT_HALF1 type thm -REAL_LT_GT type thm -REAL_LT_FRACTION_0 type thm -REAL_LT_FRACTION type thm -REAL_LT_DIV type thm -REAL_LT_ANTISYM type thm -REAL_LT_ADDR type thm -REAL_LT_ADDNEG2 type thm -REAL_LT_ADDNEG type thm -REAL_LT_ADDL type thm -REAL_LT_ADD_SUB type thm -REAL_LT_ADD2 type thm -REAL_LT_ADD1 type thm -REAL_LT_ADD type thm -REAL_LT_1 type thm -REAL_LT_01 type thm -REAL_LT1_POW2 type thm -real_lt type thm -REAL_LT type thm -REAL_LNEG_UNIQ type thm -REAL_LINV_UNIQ type thm -REAL_LIN_LE_MAX type thm -REAL_LET_TRANS type thm -REAL_LET_TOTAL type thm -REAL_LET_ANTISYM type thm -REAL_LET_ADD2 type thm -REAL_LET_ADD type thm -REAL_LE_TRANS type thm -REAL_LE_TOTAL type thm -REAL_LE_SUP type thm -REAL_LE_SUB_RADD type thm -REAL_LE_SUB_LADD type thm -REAL_LE_SUB_CANCEL2 type thm -REAL_LE_SQUARE type thm -REAL_LE_RNEG type thm -REAL_LE_RMUL_IMP type thm -REAL_LE_RMUL type thm -REAL_LE_REFL type thm -REAL_LE_RDIV_EQ type thm -REAL_LE_RDIV type thm -REAL_LE_RADD type thm -REAL_LE_POW2 type thm -REAL_LE_NEGTOTAL type thm -REAL_LE_NEGR type thm -REAL_LE_NEGL type thm -REAL_LE_NEG2 type thm -REAL_LE_NEG type thm -REAL_LE_MUL2 type thm -REAL_LE_MUL type thm -REAL_LE_MIN type thm -REAL_LE_MAX2 type thm -REAL_LE_MAX1 type thm -REAL_LE_MAX type thm -REAL_LE_LT type thm -REAL_LE_LNEG type thm -REAL_LE_LMUL_IMP type thm -REAL_LE_LMUL type thm -REAL_LE_LDIV_EQ type thm -REAL_LE_LDIV type thm -REAL_LE_LADD_IMP type thm -REAL_LE_LADD type thm -REAL_LE_INV_EQ type thm -REAL_LE_INV type thm -REAL_LE_EPSILON type thm -REAL_LE_DOUBLE type thm -REAL_LE_DIV type thm -REAL_LE_ANTISYM type thm -REAL_LE_ADDR type thm -REAL_LE_ADDL type thm -REAL_LE_ADD2 type thm -REAL_LE_ADD type thm -REAL_LE_01 type thm -REAL_LE1_POW2 type thm -REAL_LE type thm -REAL_LDISTRIB type thm -REAL_INVINV type thm -REAL_INV_POS type thm -REAL_INV_NZ type thm -REAL_INV_MUL type thm -REAL_INV_LT_ANTIMONO type thm -REAL_INV_LT1 type thm -REAL_INV_INV type thm -REAL_INV_INJ type thm -REAL_INV_EQ_0 type thm -REAL_INV_1OVER type thm -REAL_INV_0 type thm -REAL_INV1 type thm -REAL_INJ type thm -REAL_INF_MIN type thm -REAL_INF_LT type thm -REAL_INF_LE type thm -REAL_INF_CLOSE type thm -REAL_IMP_SUP_LE type thm -REAL_IMP_MIN_LE2 type thm -REAL_IMP_MAX_LE2 type thm -REAL_IMP_LE_SUP type thm -REAL_IMP_LE_INF type thm -REAL_IMP_INF_LE type thm -REAL_HALF_DOUBLE type thm -REAL_HALF_BETWEEN type thm -REAL_FACT_NZ type thm -REAL_EQ_SUB_RADD type thm -REAL_EQ_SUB_LADD type thm -REAL_EQ_RMUL_IMP type thm -REAL_EQ_RMUL type thm -REAL_EQ_RDIV_EQ type thm -REAL_EQ_RADD type thm -REAL_EQ_NEG type thm -REAL_EQ_MUL_LCANCEL type thm -REAL_EQ_LMUL_IMP type thm -REAL_EQ_LMUL2 type thm -REAL_EQ_LMUL type thm -REAL_EQ_LDIV_EQ type thm -REAL_EQ_LADD type thm -REAL_EQ_IMP_LE type thm -REAL_ENTIRE type thm -REAL_DOWN2 type thm -REAL_DOWN type thm -REAL_DOUBLE type thm -REAL_DIV_RMUL_CANCEL type thm -REAL_DIV_RMUL type thm -REAL_DIV_REFL3 type thm -REAL_DIV_REFL2 type thm -REAL_DIV_REFL type thm -REAL_DIV_OUTER_CANCEL3 type thm -REAL_DIV_OUTER_CANCEL2 type thm -REAL_DIV_OUTER_CANCEL type thm -REAL_DIV_MUL2 type thm -REAL_DIV_LZERO type thm -REAL_DIV_LMUL_CANCEL type thm -REAL_DIV_LMUL type thm -REAL_DIV_INNER_CANCEL3 type thm -REAL_DIV_INNER_CANCEL2 type thm -REAL_DIV_INNER_CANCEL type thm -REAL_DIV_DENOM_CANCEL3 type thm -REAL_DIV_DENOM_CANCEL2 type thm -REAL_DIV_DENOM_CANCEL type thm -REAL_DIV_ADD type thm -REAL_DIFFSQ type thm -REAL_BIGNUM type thm -REAL_ARCH_LEAST type thm -REAL_ARCH type thm -REAL_ADD_SYM type thm -REAL_ADD_SUB_ALT type thm -REAL_ADD_SUB2 type thm -REAL_ADD_SUB type thm -REAL_ADD_RINV type thm -REAL_ADD_RID_UNIQ type thm -REAL_ADD_RID type thm -REAL_ADD_RDISTRIB type thm -REAL_ADD_RAT type thm -REAL_ADD_LINV type thm -REAL_ADD_LID_UNIQ type thm -REAL_ADD_LID type thm -REAL_ADD_LDISTRIB type thm -REAL_ADD_COMM type thm -REAL_ADD_ASSOC type thm -REAL_ADD2_SUB2 type thm -REAL_ADD type thm -REAL_ABS_TRIANGLE type thm -REAL_ABS_POS type thm -REAL_ABS_MUL type thm -REAL_ABS_0 type thm -REAL_10 type thm -REAL_1 type thm -REAL_0 type thm -REAL type thm -POW_ZERO_EQ type thm -POW_ZERO type thm -pow_rat type thm -POW_POS_LT type thm -POW_POS type thm -POW_PLUS1 type thm -POW_ONE type thm -POW_NZ type thm -POW_MUL type thm -POW_MINUS1 type thm -POW_M1 type thm -POW_LT type thm -POW_LE type thm -POW_INV type thm -POW_EQ type thm -POW_ADD type thm -POW_ABS type thm -POW_2_LT type thm -POW_2_LE1 type thm -POW_2 type thm -POW_1 type thm -POW_0 type thm -NUM_FLOOR_upper_bound type thm -NUM_FLOOR_LOWER_BOUND type thm -NUM_FLOOR_LET type thm -NUM_FLOOR_LE2 type thm -NUM_FLOOR_LE type thm -NUM_FLOOR_EQNS type thm -NUM_FLOOR_DIV_LOWERBOUND type thm -NUM_FLOOR_DIV type thm -NUM_FLOOR_BASE type thm -NUM_CEILING_NUM_FLOOR type thm -NUM_CEILING_LE type thm -neg_rat type thm -mult_ratr type thm -mult_ratl type thm -mult_rat type thm -mult_ints type thm -lt_ratr type thm -lt_ratl type thm -lt_rat type thm -lt_int type thm -le_ratr type thm -le_ratl type thm -le_rat type thm -LE_NUM_CEILING type thm -le_int type thm -INFINITE_REAL_UNIV type thm -eq_ratr type thm -eq_ratl type thm -eq_rat type thm -eq_ints type thm -div_ratr type thm -div_ratl type thm -div_rat type thm -add_ratr type thm -add_ratl type thm -add_rat type thm -add_ints type thm -ABS_ZERO type thm -ABS_TRIANGLE_SUB type thm -ABS_TRIANGLE_NEG type thm -ABS_TRIANGLE_LT type thm -ABS_TRIANGLE type thm -ABS_SUM type thm -ABS_SUB_ABS type thm -ABS_SUB type thm -ABS_STILLNZ type thm -ABS_SIGN2 type thm -ABS_SIGN type thm -ABS_REFL type thm -ABS_POW2 type thm -ABS_POS type thm -ABS_NZ type thm -ABS_NEG type thm -ABS_N type thm -ABS_MUL type thm -ABS_LT_MUL2 type thm -ABS_LE type thm -ABS_INV type thm -ABS_DIV type thm -ABS_CIRCLE type thm -ABS_CASES type thm -ABS_BOUNDS type thm -ABS_BOUND type thm -ABS_BETWEEN2 type thm -ABS_BETWEEN1 type thm -ABS_BETWEEN type thm -ABS_ABS type thm -ABS_1 type thm -ABS_0 type thm - -####################################### -# HOL4 - Real number theorems : Names # -####################################### -#REAL_DIV set real, division -#REAL_SUB set real, subtraction, additive inverse -#POW_2 set power, square, two -#REAL set real, definition, successor -#REAL_ADD set real, addition -#REAL_ADD_ASSOC set real, addition, associativity -#REAL_ADD_LDISTRIB set real, addition, left, distributivity -#REAL_ADD_RDISTRIB set real, addition, right, distributivity -#REAL_ADD_SYM set real, addition, symmetry -#REAL_DIV_LZERO set real, division, left, zero -#REAL_DOUBLE set real, double -#REAL_HALF_DOUBLE set real, half, double -#REAL_LDISTRIB set real, left, distributivity -#REAL_MUL set real, multiplication -#REAL_MUL_LID set real, multiplication, left, identity -#REAL_MUL_RID set real, multiplication, right, identity -#REAL_MUL_RINV set real, multiplication, right, inverse -#REAL_MUL_SYM set real, multiplication, symmetry -#REAL_NEG_ADD set real, negation, addition -#REAL_NEG_LMUL set real, negation, left, multiplication -#REAL_RDISTRIB set real, right, distributivity -#REAL_SUB_LDISTRIB set real, subtraction, left, distributivity -#REAL_SUB_RDISTRIB set real, subtraction, right, distributivity diff --git a/examples/lassie/sempre/interactive/lassie.grammar b/examples/lassie/sempre/interactive/lassie.grammar deleted file mode 100644 index 1a7465467b..0000000000 --- a/examples/lassie/sempre/interactive/lassie.grammar +++ /dev/null @@ -1,245 +0,0 @@ - ############################################################################ - # GRAMMAR FOR NATURAL PROOF EXPRESSIONS # - # # - # Refer to SEMPRE's documentation for general indications on rule # - # construction. # - # # - # Currently, domain knowledge comes from two sources. The lassie.lexicon # - # file contains component names (e.g. fs) and with their types # - # (e.g. thmlist->tactic, used for sound applications). Features of those # - # components (e.g. their natural name, their class/type) are read from # - # lassie.db into the TacticWorld. # - # # - # TacticWorld.java holds the main semantics of Lassie's operations, as # - # we piggy back on the DALExecutor for handling the semantic part of # - # this grammar. DALExecutor interprets semantic expression in a "world" # - # containing "items". We superifcially follow this convention where HOL # - # components can be considered the "items" of our "tactic-world". # - # # - # Generally, lowercased categories (e.g. $thm, $name) correspond to # - # types as found in the lexicon/database. Categories which are # - # capitalized are intermediates between lowercased categories and the # - # $tactic category. # - ############################################################################ - -########################################### -# Incorporated SML types: # -########################################### -# $tactic -# $thm -# ($thmlist) -# $thm->tactic -# $thmlist->tactic -# $tactic->tactic -# $thm->thm -# $termquotation->tactic -# $int->tactic->tactic -# $termquotation->[thm->tactic]->tactic -# $[thm->tactic]->tactic -# $termquotation*tactic->tactic -# $termquotationlist->tactic -# $termquotation->[thm->tactic]->thm->tactic -# $termquotationlist->[thm->tactic]->thm->tactic - - ################################################################ - # Define some abbreviations for calling into library functions # - ################################################################ -(def @int2string edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.int2string) -(def @app edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.app) -(def @intgoal edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.goalInt) -(def @termgoal edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.goalTerm) -(def @infix edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.infix) -(def @then edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.then) -(def @then1 edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.then1) -(def @cons edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.cons) -(def @list edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.list) -(def @quote edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.quote) -(def @parens edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.parens) -(def @op edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.op) -(def @fromFeature edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.fromFeature) -(def @intersect edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.intersect) -(def @set2string edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.set2string) -(def @choice edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.choice) -(def @tactic edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.tactic) -(def @mark edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.mark) -(def @command edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.command) -(def @thm edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.thm) - -(def @ChoiceFn edu.stanford.nlp.sempre.interactive.lassie.ChoiceFn) - -(def @appT1T2 (lambda t1 (lambda t2 (call @app (var t1) (var t2))))) -(def @infixT1T2T3 (lambda t1 (lambda t2 (lambda t3 (call @op (var t2) (var t1) (var t3)))))) - -################################### -# GRAMMAR SUPPORTING LITERAL HOL4 # -################################### - -(rule $ROOT ($tactic) (IdentityFn) (anchored 1)) -(rule $ROOT ($command) (IdentityFn) (anchored 1)) - -(rule $command - (back) - (ConstantFn (string "back")) (anchored 1)) - -## We can put parentheses around a tactic or a command (Missing: $ROOT $command ) -(for @cat ($tactic $thm->tactic) - (rule @cat (\( @cat \)) (lambda t (call @parens (var t))) (anchored 1))) - -############################################################################### -# literal HOL4 tactics, tacticals, ... # -# looked up in the Lassie database # -############################################################################### -# tactics -(rule $tactic - ($TOKEN) - (SimpleLexiconFn (type "tactic")) - (anchored 1)) - -# tactic modifiers -(rule $tactic->tactic - ($TOKEN) - (SimpleLexiconFn (type "tactic->tactic")) (anchored 1)) - -# thm tactics -(rule $thm->tactic - ($TOKEN) - (SimpleLexiconFn (type "thm->tactic") (anchored 1))) - -# thm list tactics: -(rule $thmlist->tactic - ($TOKEN) - (SimpleLexiconFn (type "thmlist->tactic") (anchored 1))) - -# term tactics -(rule $termquotation->tactic - ($TOKEN) - (SimpleLexiconFn (type "termquotation->tactic") (anchored 1))) - -# first_x_assum, last_x_assum, ... -(rule $[thm->tactic]->tactic - ($TOKEN) - (SimpleLexiconFn (type "[thm->tactic]->tactic") (anchored 1))) - -# qspec -(rule $termquotation->[thm->tactic]->thm->tactic - ($TOKEN) - (SimpleLexiconFn (type "termquotation->[thm->tactic]->thm->tactic") (anchored 1))) - -# qspecl -(rule $termquotationlist->[thm->tactic]->thm->tactic - ($TOKEN) - (SimpleLexiconFn (type "termquotationlist->[thm->tactic]->thm->tactic") (anchored 1))) - -# qpat_x_assum, qpat_assum, ... -(rule $termquotation->[thm->tactic]->tactic - ($TOKEN) - (SimpleLexiconFn (type "termquotation->[thm->tactic]->tactic")) (anchored 1)) - -# THEN, ORELSE -(rule $tactic->tactic->tactic - ($TOKEN) - (SimpleLexiconFn (type "tactic->tactic->tactic") (anchored 1))) - -#by, suffices_by -(rule $termquotation*tactic->tactic - ($TOKEN) - (SimpleLexiconFn (type "termquotation*tactic->tactic")) (anchored 1)) - -(rule $thm->thm - ($TOKEN) - (SimpleLexiconFn (type "thm->thm")) (anchored 1)) - -# Tactics can be produced by combining different constructs -(for @category - (($tactic->tactic $tactic) - ($thm->tactic $thm) - ($thmlist->tactic $thmlist) - ($termquotation->tactic $termquotation) - ($[thm->tactic]->tactic $thm->tactic)) - (rule $tactic @category @appT1T2 (anchored 1))) - -(rule $tactic - ($termquotation $termquotation*tactic->tactic $tactic) - @infixT1T2T3 (anchored 1)) - -## Partial applications for things like qpat_x_assum or first_x_assum -(rule $[thm->tactic]->tactic - ($termquotation->[thm->tactic]->tactic $termquotation) @appT1T2 (anchored 1)) -(rule $[thm->tactic]->thm->tactic - ($termquotation->[thm->tactic]->thm->tactic $termquotation) @appT1T2 (anchored 1)) -(rule $[thm->tactic]->thm->tactic - ($termquotationlist->[thm->tactic]->thm->tactic $termquotationlist) @appT1T2 (anchored 1)) -(rule $thm->tactic ($[thm->tactic]->thm->tactic $thm->tactic) @appT1T2 (anchored 1)) - -## Support for inline THEN -(rule $tactic ($tactic $tactic->tactic->tactic $tactic) @infixT1T2T3 (anchored 1)) - -### HOL4 Theorems -(rule $thm ($TOKEN) (lambda t (call @thm (var t))) (anchored 1)) -(rule $thm ($thm->thm $thm) @appT1T2 (anchored 1)) -(rule $Thms ($thm) (IdentityFn) (anchored 1)) -(rule $Thms ($thm , $Thms) (lambda t1 (lambda t2 (call @cons (var t1) (var t2)))) (anchored 1)) -(rule $thmlist ([ $Thms ]) (lambda thms (call @list (var thms))) (anchored 1)) -(rule $thmlist ([]) (ConstantFn (call @list (string " "))) (anchored 1)) -(rule $thmlist ([ ]) (ConstantFn (call @list (string " "))) (anchored 1)) - -##### HOL4 Terms -(rule $termquotation (` $term ') (lambda e (call @quote (var e))) (anchored 1)) -(rule $termquotation (` $term `) (lambda e (call @quote (var e))) (anchored 1)) -(rule $term ($PHRASE) (IdentityFn) (anchored 1)) -(rule $term ($term and $term) (lambda t1 (lambda t2 (call @op (string "∧") (var t1) (var t2)))) (anchored 1)) -(rule $term ($term or $term) (lambda t1 (lambda t2 (call @op (string "∨") (var t1) (var t2)))) (anchored 1)) - -(rule $Terms ($termquotation) (IdentityFn) (anchored 1)) -(rule $Terms ($termquotation , $Terms) (lambda t1 (lambda t2 (call @cons (var t1) (var t2)))) (anchored 1)) -(rule $termquotationlist ([ $Terms ]) (lambda termquotations (call @list (var termquotations))) (anchored 1)) -(rule $termquotationlist ([ ]) (ConstantFn (call @list (string " "))) (anchored 1)) -(rule $termquotationlist ([]) (ConstantFn (call @list (string " "))) (anchored 1)) - -(rule $tactic (Goal $int) (lambda n (call @intgoal (var n))) (anchored 1)) -(rule $tactic (Goal $termquotation) (lambda t (call @termgoal (var t))) (anchored 1)) -(rule $tactic (End) (ConstantFn (string "ALLGOALS")) (anchored 1)) - -### Other -(rule $Number ($TOKEN) (NumberFn) (anchored 1)) -(rule $int ($Number) (lambda n (call @int2string (var n))) (anchored 1)) - -## tactic combinators -#(rule $tactic->tactic->tactic -# ($TOKEN) -# (SimpleLexiconFn (type "tactic->tactic->tactic") (anchored 1))) -## ntac -#(rule $int->tactic->tactic -# ($TOKEN) -# (SimpleLexiconFn (type "int->tactic->tactic")) (anchored 1)) -#(rule $tactic->tactic ($int->tactic->tactic $int) @appT1T2 (anchored 1)) -#(rule $termquotation*tactic->tactic -# ($TOKEN) -# (SimpleLexiconFn (type "termquotation*tactic->tactic")) (anchored 1)) -#(rule $tactic -# ($termquotation $termquotation*tactic->tactic $tactic) -# @infixT1T2T3 (anchored 1)) -# -############################################################### -## literal SML objects, looked up from the database # -############################################################### -# -# -##### HOL4 Theorems -#(rule $thm ($thm->thm $thm) @appT1T2 (anchored 1)) -#(rule $thm ($TOKEN) (IdentityFn) (anchored 1)) -# -### Lists -#(rule $Thms ($thm) (IdentityFn) (anchored 1)) -#(rule $Thms ($thm , $Thms) (lambda t1 (lambda t2 (call @cons (var t1) (var t2)))) (anchored 1)) -#(rule $thmlist ([ $Thms ]) (lambda thms (call @list (var thms))) (anchored 1)) -#(rule $thmlist ([]) (ConstantFn (call @list (string " "))) (anchored 1)) -#(rule $thmlist ([ ]) (ConstantFn (call @list (string " "))) (anchored 1)) -# -# -###Commands to the interactive prove interface -#(rule $command -# (back) -# (ConstantFn (string "(b)")) (anchored 1)) -# -# diff --git a/examples/lassie/sempre/interactive/lassie.grammar_old b/examples/lassie/sempre/interactive/lassie.grammar_old deleted file mode 100644 index 454245e0af..0000000000 --- a/examples/lassie/sempre/interactive/lassie.grammar_old +++ /dev/null @@ -1,334 +0,0 @@ - ############################################################################ - # GRAMMAR FOR NATURAL PROOF EXPRESSIONS # - # # - # Refer to SEMPRE's documentation for general indications on rule # - # construction. # - # # - # Currently, domain knowledge comes from two sources. The lassie.lexicon # - # file contains component names (e.g. fs) and with their types # - # (e.g. thmlist->tactic, used for sound applications). Features of those # - # components (e.g. their natural name, their class/type) are read from # - # lassie.db into the TacticWorld. # - # # - # TacticWorld.java holds the main semantics of Lassie's operations, as # - # we piggy back on the DALExecutor for handling the semantic part of # - # this grammar. DALExecutor interprets semantic expression in a "world" # - # containing "items". We superifcially follow this convention where HOL # - # components can be considered the "items" of our "tactic-world". # - # # - # Generally, lowercased categories (e.g. $thm, $name) correspond to # - # types as found in the lexicon/database. Categories which are # - # capitalized are intermediates between lowercased categories and the # - # $tactic category. # - ############################################################################ - -########################################### -# Incorporated SML types: # -########################################### -# $tactic -# $thm -# ($thmlist) -# $thm->tactic -# $thmlist->tactic -# $tactic->tactic -# $thm->thm -# $termquotation->tactic -# $int->tactic->tactic -# $termquotation->[thm->tactic]->tactic -# $[thm->tactic]->tactic -# $termquotation*tactic->tactic -# $termquotationlist->tactic -# $termquotation->[thm->tactic]->thm->tactic -# $termquotationlist->[thm->tactic]->thm->tactic - - ################################################################ - # Define some abbreviations for calling into library functions # - ################################################################ -(def @int2string edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.int2string) -(def @app edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.app) -(def @infix edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.infix) -(def @then edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.then) -(def @then1 edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.then1) -(def @cons edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.cons) -(def @list edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.list) -(def @quote edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.quote) -(def @parens edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.parens) -(def @op edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.op) -(def @fromFeature edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.fromFeature) -(def @intersect edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.intersect) -(def @set2string edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.set2string) -(def @choice edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.choice) -(def @tactic edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.tactic) -(def @command edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.command) - -(def @ChoiceFn edu.stanford.nlp.sempre.interactive.lassie.ChoiceFn) - -(def @appT1T2 (lambda t1 (lambda t2 (call @app (var t1) (var t2))))) -(def @infixT1T2T3 (lambda t1 (lambda t2 (lambda t3 (call @op (var t2) (var t1) (var t3)))))) - -################################### -# GRAMMAR SUPPORTING LITERAL HOL4 # -################################### - -# The root is always a tactic or a command -(rule $ROOT ($tactic) (IdentityFn) (anchored 1)) -(rule $ROOT ($command) (lambda c (call @command (var c))) (anchored 1)) - -# Tactics can be produced by combining different constructs -(for @category - (($thm->tactic $thm) ($tactic->tactic $tactic) ($thmlist->tactic $thmlist) - ($termquotation->tactic $termquotation) - ($[thm->tactic]->tactic $thm->tactic)) - (rule $tactic @category @appT1T2 (anchored 1))) -(rule $tactic ($termquotation $termquotation*tactic->tactic $tactic) @infixT1T2T3 (anchored 1)) -# Support for inline THEN -(rule $tactic ($tactic $tactic->tactic->tactic $tactic) @infixT1T2T3 (anchored 1)) - -# Partial applications for things like qpat_x_assum or first_x_assum -(rule $[thm->tactic]->tactic - ($termquotation->[thm->tactic]->tactic $termquotation) @appT1T2 (anchored 1)) -(rule $[thm->tactic]->thm->tactic - ($termquotation->[thm->tactic]->thm->tactic $termquotation) @appT1T2 (anchored 1)) -(rule $[thm->tactic]->thm->tactic - ($termquotationlist->[thm->tactic]->thm->tactic $termquotationlist) @appT1T2 (anchored 1)) -(rule $thm->tactic ($[thm->tactic]->thm->tactic $thm->tactic) @appT1T2 (anchored 1)) -(rule $tactic->tactic ($int->tactic->tactic $int) @appT1T2 (anchored 1)) - -# We can put parentheses around a tactic or a command -(for @cat ($ROOT $command $tactic) - (rule @cat (\( @cat \)) (lambda t (call @parens (var t))) (anchored 1))) - -############################################################## -# literal SML objects, looked up from the database # -############################################################## -# tactics -(rule $tactic - ($TOKEN) - (SimpleLexiconFn (type "tactic")) (anchored 1)) -# tactic modifiers -(rule $tactic->tactic - ($TOKEN) - (SimpleLexiconFn (type "tactic->tactic")) (anchored 1)) -# thm tactics -(rule $thm->tactic - ($TOKEN) - (SimpleLexiconFn (type "thm->tactic") (anchored 1))) -# thm list tactics: -(rule $thmlist->tactic - ($TOKEN) - (SimpleLexiconFn (type "thmlist->tactic") (anchored 1))) -# tactic combinators -(rule $tactic->tactic->tactic - ($TOKEN) - (SimpleLexiconFn (type "tactic->tactic->tactic") (anchored 1))) -# term tactics -(rule $termquotation->tactic - ($TOKEN) - (SimpleLexiconFn (type "termquotation->tactic") (anchored 1))) -# first_x_assum, last_x_assum, ... -(rule $[thm->tactic]->tactic - ($TOKEN) - (SimpleLexiconFn (type "[thm->tactic]->tactic") (anchored 1))) -# qspec -(rule $termquotation->[thm->tactic]->thm->tactic - ($TOKEN) - (SimpleLexiconFn (type "termquotation->[thm->tactic]->thm->tactic") (anchored 1))) -# qspecl -(rule $termquotationlist->[thm->tactic]->thm->tactic - ($TOKEN) - (SimpleLexiconFn (type "termquotationlist->[thm->tactic]->thm->tactic") (anchored 1))) -# qpat_x_assum, qpat_assum, ... -(rule $termquotation->[thm->tactic]->tactic - ($TOKEN) - (SimpleLexiconFn (type "termquotation->[thm->tactic]->tactic")) (anchored 1)) -# ntac -(rule $int->tactic->tactic - ($TOKEN) - (SimpleLexiconFn (type "int->tactic->tactic")) (anchored 1)) -(rule $termquotation*tactic->tactic - ($TOKEN) - (SimpleLexiconFn (type "termquotation*tactic->tactic")) (anchored 1)) -(rule $thm->thm - ($TOKEN) - (SimpleLexiconFn (type "thm->thm")) (anchored 1)) - -#### HOL4 Terms -(rule $termquotation (` $term ') (lambda e (call @quote (var e))) (anchored 1)) -(rule $termquotation (' $term ') (lambda e (call @quote (var e))) (anchored 1)) -(rule $term ($PHRASE) (IdentityFn) (anchored 1)) -(rule $term ($term and $term) (lambda t1 (lambda t2 (call @op (string "∧") (var t1) (var t2)))) (anchored 1)) -(rule $term ($term or $term) (lambda t1 (lambda t2 (call @op (string "∨") (var t1) (var t2)))) (anchored 1)) - -(rule $Terms ($termquotation) (IdentityFn) (anchored 1)) -(rule $Terms ($termquotation , $Terms) (lambda t1 (lambda t2 (call @cons (var t1) (var t2)))) (anchored 1)) -(rule $termquotationlist ([ $Terms ]) (lambda termquotations (call @list (var termquotations))) (anchored 1)) -(rule $termquotationlist ([ ]) (ConstantFn (call @list (string " "))) (anchored 1)) -(rule $termquotationlist ([]) (ConstantFn (call @list (string " "))) (anchored 1)) - -#### HOL4 Theorems -(rule $thm ($thm->thm $thm) @appT1T2 (anchored 1)) -(rule $thm ($TOKEN) (IdentityFn) (anchored 1)) - -## Lists -(rule $Thms ($thm) (IdentityFn) (anchored 1)) -(rule $Thms ($thm , $Thms) (lambda t1 (lambda t2 (call @cons (var t1) (var t2)))) (anchored 1)) -(rule $thmlist ([ $Thms ]) (lambda thms (call @list (var thms))) (anchored 1)) -(rule $thmlist ([]) (ConstantFn (call @list (string " "))) (anchored 1)) -(rule $thmlist ([ ]) (ConstantFn (call @list (string " "))) (anchored 1)) - - -##Commands to the interactive prove interface -(rule $command - (back) - (ConstantFn (string "(b)")) (anchored 1)) - -## Other -(rule $Number ($TOKEN) (NumberFn) (anchored 1)) -(rule $int ($Number) (lambda n (call @int2string (var n))) (anchored 1)) - -################################################################ -## GRAMMAR SUPPORTING ABSTRACT DESCRIPTIONS OF HOL4 COMPONENTS # -################################################################ - -## Sets and their intersections, by constructing imperative sentences - -## Lexemes -#(rule $type_lx ($PHRASE) (SimpleLexiconFn (type type)) (anchored 1)) -#(rule $name_lx ($PHRASE) (SimpleLexiconFn (type name)) (anchored 1)) -#(rule $AV_lx ($PHRASE) (SimpleLexiconFn (type AV)) (anchored 1)) -#(rule $VP_lx ($PHRASE) (SimpleLexiconFn (type VP)) (anchored 1)) -#(rule $OBJ_lx ($PHRASE) (SimpleLexiconFn (type OBJ)) (anchored 1)) -#(rule $CP_lx ($PHRASE) (SimpleLexiconFn (type CP)) (anchored 1)) -#(rule $PREARG_lx ($PHRASE) (SimpleLexiconFn (type PREARG)) (anchored 1)) -# -#(rule $set_lx ($PHRASE) (SimpleLexiconFn (type set)) (anchored 1)) -# -### Get sets -#(def @fromFeatureX (lambda x (call @fromFeature (var x)))) -#(rule $type ($type_lx) @fromFeatureX (anchored 1)) -#(rule $name ($name_lx) @fromFeatureX (anchored 1)) -#(rule $AV ($AV_lx) @fromFeatureX (anchored 1)) -#(rule $VP ($VP_lx) @fromFeatureX (anchored 1)) -#(rule $OBJ ($OBJ_lx) @fromFeatureX (anchored 1)) -#(rule $CP ($CP_lx) @fromFeatureX (anchored 1)) -#(rule $PREARG ($PREARG_lx) @fromFeatureX (anchored 1)) -# -#(rule $set ($set_lx) @fromFeatureX (anchored 1)) -# -### Syntactically correct intersections -#(def @intersectS1S2 (lambda s1 (lambda s2 (call @intersect (var s1) (var s2))))) -#(rule $VP ($AV $VP) @intersectS1S2 (anchored 1)) -#(rule $VP ($VP $OBJ) @intersectS1S2 (anchored 1)) -#(rule $VP ($VP $CP) @intersectS1S2 (anchored 1)) -#(rule $VP ($VP $AV) @intersectS1S2 (anchored 1)) -# -#(for @p (on with the) -# (rule $Prep (@p) (ConstantFn null) (anchored 1))) -#(rule $set ($set $set) @intersectS1S2 (anchored 1)) -# -#(for @a (use apply) -# (rule $Apply (@a) (ConstantFn null) (anchored 1))) -# -#(rule $VP' ($Apply $name) (SelectFn 1) (anchored 1)) -#(rule $VP' ($name) (IdentityFn) (anchored 1)) -#(rule $VP' ($VP' $type) @intersectS1S2 (anchored 1)) -#(rule $VP' ($type $VP') @intersectS1S2 (anchored 1)) -#(rule $VP' ($VP) (IdentityFn) (anchored 1)) -#(rule $VP' ($VP' $PREARG) @intersectS1S2 (anchored 1)) -#(rule $VP' ($VP' with) (SelectFn 0) (anchored 1)) -# -### Collapsing sets to single components -#(rule $tactic' ($VP') (lambda s (call @choice (call @intersect (var s) (call @fromFeature "type.tactic")))) (anchored 1)) -#(rule $tactic ($tactic') (interactive.lassie.ChoiceFn) (anchored 1)) -# -#(rule $thm->tactic' ($VP') (lambda s (call @choice (call @intersect (var s) (call @fromFeature "type.thm -> tactic")))) (anchored 1)) -#(rule $thm->tactic ($thm->tactic') (interactive.lassie.ChoiceFn) (anchored 1)) -# -#(rule $thmlist->tactic' ($VP') (lambda s (call @choice (call @intersect (var s) (call @fromFeature "type.thm list -> tactic")))) (anchored 1)) -#(rule $thmlist->tactic ($thmlist->tactic') (interactive.lassie.ChoiceFn) (anchored 1)) -# -#(rule $tactic->tactic' ($VP') (lambda s (call @choice (call @intersect (var s) (call @fromFeature "type.tactic -> tactic")))) (anchored 1)) -#(rule $tactic->tactic ($tactic->tactic') (interactive.lassie.ChoiceFn) (anchored 1)) -# -#(rule $thm->thm' ($VP') (lambda s (call @choice (call @intersect (var s) (call @fromFeature "type.thm -> thm")))) (anchored 1)) -#(rule $thm->thm ($thm->thm') (interactive.lassie.ChoiceFn) (anchored 1)) -# -#(rule $termquotation->tactic' ($VP') (lambda s (call @choice (call @intersect (var s) (call @fromFeature "type.term quotation -> tactic")))) (anchored 1)) -#(rule $termquotation->tactic ($termquotation->tactic') (interactive.lassie.ChoiceFn) (anchored 1)) -# -#(rule $int->tactic->tactic' ($VP') (lambda s (call @choice (call @intersect (var s) (call @fromFeature "type.int -> tactic -> tactic")))) (anchored 1)) -#(rule $int->tactic->tactic ($int->tactic->tactic') (interactive.lassie.ChoiceFn) (anchored 1)) - - -## Theorems -#(rule $thm' ($set) (lambda s (call @choice (call @intersect (var s) (call @fromFeature "type.thm")))) (anchored 1)) -#(rule $thm ($thm') (interactive.lassie.ChoiceFn) (anchored 1)) -# -## Casting sets as lists -#(rule $Thms (all $set theorems) -# (lambda s (call @set2string (call @intersect (var s) (call @fromFeature "type.thm")))) -# (anchored 1)) -#(rule $Thms ($set theorems) -# (lambda s (call @set2string (call @intersect (var s) (call @fromFeature "type.thm")))) -# (anchored 1)) -#(rule $Thms (all $set) -# (lambda s (call @set2string (call @intersect (var s) (call @fromFeature "type.thm")))) -# (anchored 1)) - -## Typed wildcards -#(rule $tactic (\( $PHRASE : tactic \)) (IdentityFn) (anchored 1)) -#(rule $thm (\( $PHRASE : thm \)) (IdentityFn) (anchored 1)) -#(rule $thm->tactic (\( $PHRASE : thm->tactic \)) (IdentityFn) (anchored 1)) -#(rule $thmlist->tactic (\( $PHRASE : thm list -> tactic \)) (IdentityFn) (anchored 1)) -#(rule $tactic->tactic (\( $PHRASE : tactic -> tactic \)) (IdentityFn) (anchored 1)) -#(rule $thm->thm (\( $PHRASE : thm -> thm \)) (IdentityFn) (anchored 1)) -#(rule $termquotation->tactic (\( $PHRASE : term quotation -> tactic \)) (IdentityFn) (anchored 1)) -#(rule $int->tactic->tactic (\( $PHRASE : int -> tactic -> tactic \)) (IdentityFn) (anchored 1)) -#(rule $termquotation->[thm->tactic]->tactic (\( $PHRASE : term quotation -> \( thm -> tactic \) -> tactic \)) (IdentityFn) (anchored 1)) -#(rule $[thm->tactic]->tactic (\( $PHRASE : \( thm -> tactic \) -> tactic \)) (IdentityFn) (anchored 1)) -#(rule $termquotation*tactic->tactic (\( $PHRASE : term quotation * tactic -> tactic \)) (IdentityFn) (anchored 1)) -#(rule $termquotationlist->tactic (\( $PHRASE : term quotation list -> tactic \)) (IdentityFn) (anchored 1)) -#(rule $termquotation->[thm->tactic]->thm->tactic (\( $PHRASE : term quotation -> \( thm -> tactic \) -> thm -> tactic \)) (IdentityFn) (anchored 1)) -#(rule $termquotationlist->[thm->tactic]->thm->tactic (\( $PHRASE : term quotation list -> \( thm -> tactic \) -> thm -> tactic \)) (IdentityFn) (anchored 1)) -# -############################## -## NATURAL LANGUAGE SYNONYMS # -############################## -# -## Theorem Lists -#(rule $Thms ($Thms and $Thms) (lambda t1 (lambda t2 (call @cons (var t1) (var t2)))) (anchored 1)) -#(rule $thmlist (nothing) (ConstantFn []) (anchored 1)) -#(rule $thmlist (empty list) (ConstantFn []) (anchored 1)) -# -## Tactic composition -#(rule $tactic ($tactic then $tactic) (lambda t1 (lambda t2 (call @then (var t1) (var t2)))) (anchored 1)) -#(rule $tactic ($tactic then $tactic on the first goal) (lambda t1 (lambda t2 (call @then1 (var t1) (var t2)))) (anchored 1)) -# -############## -## OPTIONALS # -############## -## App -#(for @cat ($thm->tactic $thmlist->tactic $tactic->tactic $thm->thm $termquotation->tactic $int->tactic->tactic) -# (rule @cat ($Apply @cat) (SelectFn 1) (anchored 1))) -# -## Args -#(for @cat ($tactic $thm $thmlist $termquotation) -# (rule @cat ($Prep @cat) (SelectFn 1) (anchored 1))) - -########### UNUSED ########### -#(rule $tactic->tactic ($int->tactic->tactic $int) @appT1T2 (anchored 1)) -#(rule $[thm->tactic]->tactic ($termquotation->[thm->tactic]->tactic $termquotation) @appT1T2 (anchored 1)) -#(rule $termquotation (` $PHRASE `) (lambda e (call @quote (var e))) (anchored 1)) -# TODO: Necessary? -#(rule $thm -# ($TOKEN) -# (SimpleLexiconFn (type "thm")) (anchored 1)) -#Disabled to make learning easier... See test on "repeat" in LassieTests -#(rule $thmlist ($Thms) (lambda thms (call @list (var thms))) (anchored 1)) -#(rule $termquotationlist ($Terms) (lambda termquotations (call @list (var termquotations))) (anchored 1)) -## Tactic composition -#(rule $tactic ($tactic THEN $tactic) (lambda t1 (lambda t2 (call @then (var t1) (var t2)))) (anchored 1)) -#(rule $tactic ($tactic THEN1 $tactic) (lambda t1 (lambda t2 (call @then1 (var t1) (var t2)))) (anchored 1)) -#(rule $tactic ($tactic \\ $tactic) (lambda t1 (lambda t2 (call @then (var t1) (var t2)))) (anchored 1)) -#(rule $tactic ($tactic >- $tactic) (lambda t1 (lambda t2 (call @then1 (var t1) (var t2)))) (anchored 1)) -# \ No newline at end of file diff --git a/examples/lassie/sempre/interactive/lassie.grammar_v2.0 b/examples/lassie/sempre/interactive/lassie.grammar_v2.0 deleted file mode 100644 index 92c5c5a114..0000000000 --- a/examples/lassie/sempre/interactive/lassie.grammar_v2.0 +++ /dev/null @@ -1,366 +0,0 @@ - ############################################################################ - # GRAMMAR FOR NATURAL PROOF EXPRESSIONS # - # # - # Refer to SEMPRE's documentation for general indications on rule # - # construction. # - # # - # Currently, domain knowledge comes from two sources. The lassie.lexicon # - # file contains component names (e.g. fs) and with their types # - # (e.g. thmlist->tactic, used for sound applications). Features of those # - # components (e.g. their natural name, their class/type) are read from # - # lassie.db into the TacticWorld. # - # # - # TacticWorld.java holds the main semantics of Lassie's operations, as # - # we piggy back on the DALExecutor for handling the semantic part of # - # this grammar. DALExecutor interprets semantic expression in a "world" # - # containing "items". We superifcially follow this convention where HOL # - # components can be considered the "items" of our "tactic-world". # - # # - # Generally, lowercased categories (e.g. $thm, $name) correspond to # - # types as found in the lexicon/database. Categories which are # - # capitalized are intermediates between lowercased categories and the # - # $tactic category. # - ############################################################################ - -########################################### -# Incorporated SML types: # -########################################### -# $tactic -# $thm -# ($thmlist) -# $thm->tactic -# $thmlist->tactic -# $tactic->tactic -# $thm->thm -# $termquotation->tactic -# $int->tactic->tactic -# $termquotation->[thm->tactic]->tactic -# $[thm->tactic]->tactic -# $termquotation*tactic->tactic -# $termquotationlist->tactic -# $termquotation->[thm->tactic]->thm->tactic -# $termquotationlist->[thm->tactic]->thm->tactic - - ################################################################ - # Define some abbreviations for calling into library functions # - ################################################################ -(def @int2string edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.int2string) -(def @app edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.app) -(def @infix edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.infix) -(def @then edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.then) -(def @then1 edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.then1) -(def @cons edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.cons) -(def @list edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.list) -(def @quote edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.quote) -(def @parens edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.parens) -(def @op edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.op) -(def @fromFeature edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.fromFeature) -(def @intersect edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.intersect) -(def @set2string edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.set2string) -(def @choice edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.choice) -(def @tactic edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.tactic) -(def @command edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.command) - -(def @ChoiceFn edu.stanford.nlp.sempre.interactive.lassie.ChoiceFn) - -(def @appT1T2 (lambda t1 (lambda t2 (call @app (var t1) (var t2))))) - -(def @infixT1T2T3 (lambda t1 (lambda t2 (lambda t3 (call @infix (var t1) (var t2) (var t3)))))) - -################################### -# GRAMMAR SUPPORTING LITERAL HOL4 # -################################### - -# The root is always a tactic or a command -(rule $ROOT ($tactic) (lambda t (call @tactic (var t))) (anchored 1)) -(rule $ROOT ($command) (lambda c (call @command (var c))) (anchored 1)) - -# We can put parentheses around a tactic -(for @cat ($tactic $command) - (rule @cat (\( @cat \)) (lambda t (call @parens (var t))) (anchored 1))) - -# Literal SML tactics: -(rule $tactic - ($TOKEN) - (SimpleLexiconFn (type "tactic")) (anchored 1)) - -# Literal SML tactic modifiers -(rule $tactic->tactic - ($TOKEN) - (SimpleLexiconFn (type "tactic->tactic")) (anchored 1)) - -# Literal SML thm tactics: -(rule $thm->tactic - ($TOKEN) - (SimpleLexiconFn (type "thm->tactic") (anchored 1))) - -# Literal SML thm list tactics: -(rule $thmlist->tactic - ($TOKEN) - (SimpleLexiconFn (type "thmlist->tactic") (anchored 1))) - -# Literal Tactic combinators -(rule $tactic->tactic->tactic - ($TOKEN) - (SimpleLexiconFn (type "tactic->tactic->tactic") (anchored 1))) - -# Literal Term tactics -(rule $termquotation->tactic - ($TOKEN) - (SimpleLexiconFn (type "termquotation->tactic") (anchored 1))) - -(rule $tactic ($thm->tactic $thm) @appT1T2 (anchored 1)) -(rule $tactic ($tactic->tactic $tactic) @appT1T2 (anchored 1)) -(rule $tactic ($thmlist->tactic $thmlist) @appT1T2 (anchored 1)) -(rule $tactic ($tactic $tactic->tactic->tactic $tactic) @infixT1T2T3 (anchored 1)) -(rule $tactic ($termquotation->tactic $termquotation) @appT1T2 (anchored 1)) - -(rule $termquotation (` $PHRASE `) (lambda e (call @quote (var e))) (anchored 1)) - -#Thm fallback: -(rule $thm ($TOKEN) (IdentityFn) (anchored 1)) - -# Lists -(rule $thmlist ([ ]) (ConstantFn []) (anchored 1)) -(rule $thmlist ([ $Thms ]) (lambda thms (call @list (var thms))) (anchored 1)) - -(rule $Thms ($TOKEN) (IdentityFn) (anchored 1)) -(rule $Thms ($thm , $Thms) (lambda t1 (lambda t2 (call @cons (var t1) (var t2)))) (anchored 1)) - -#(rule $tactic ($thmlist->tactic $thmlist) @appT1T2 (anchored 1)) -#(rule $thm ($thm->thm $thm) @appT1T2 (anchored 1)) - -#(rule $tactic->tactic ($int->tactic->tactic $int) @appT1T2 (anchored 1)) -#(rule $[thm->tactic]->tactic ($termquotation->[thm->tactic]->tactic $termquotation) @appT1T2 (anchored 1)) -#(rule $tactic ($[thm->tactic]->tactic $thm->tactic) @appT1T2 (anchored 1)) -#(rule $tactic ($termquotationlist->tactic $termquotationlist) @appT1T2 (anchored 1)) -#(rule $[thm->tactic]->thm->tactic ($termquotation->[thm->tactic]->thm->tactic $termquotation) @appT1T2 (anchored 1)) -#(rule $[thm->tactic]->thm->tactic ($termquotationlist->[thm->tactic]->thm->tactic $termquotationlist) @appT1T2 (anchored 1)) -#(rule $thm->tactic ($[thm->tactic]->thm->tactic $thm->tactic) @appT1T2 (anchored 1)) - - - -## Literal Components Import -#(rule $tactic -# ($TOKEN) -# (SimpleLexiconFn (type "tactic")) (anchored 1)) -#(rule $thm -# ($TOKEN) -# (SimpleLexiconFn (type "thm")) (anchored 1)) -#(rule $thm->tactic -# ($TOKEN) -# (SimpleLexiconFn (type "thm->tactic")) (anchored 1)) -#(rule $thmlist->tactic -# ($TOKEN) -# (SimpleLexiconFn (type "thmlist->tactic")) (anchored 1)) -#(rule $tactic->tactic -# ($TOKEN) -# (SimpleLexiconFn (type "tactic->tactic")) (anchored 1)) -#(rule $thm->thm -# ($TOKEN) -# (SimpleLexiconFn (type "thm->thm")) (anchored 1)) -#(rule $termquotation->tactic -# ($TOKEN) -# (SimpleLexiconFn (type term "quotation->tactic")) (anchored 1)) -#(rule $int->tactic->tactic -# ($TOKEN) -# (SimpleLexiconFn (type "int->tactic->tactic")) (anchored 1)) -#(rule $termquotation->[thm->tactic]->tactic -# ($TOKEN) -# (SimpleLexiconFn (type "termquotation->[thm->tactic]->tactic")) (anchored 1)) -#(rule $[thm->tactic]->tactic -# ($TOKEN) -# (SimpleLexiconFn (type "[thm->tactic]->tactic")) (anchored 1)) -#(rule $termquotation*tactic->tactic -# ($TOKEN) -# (SimpleLexiconFn (type "termquotation*tactic->tactic")) (anchored 1)) -#(rule $termquotationlist->tactic -# ($TOKEN) -# (SimpleLexiconFn (type "termquotationlist->tactic")) (anchored 1)) -#(rule $termquotation->[thm->tactic]->thm->tactic -# ($TOKEN) -# (SimpleLexiconFn (type "termquotation->[thm->tactic]->thm->tactic")) (anchored 1)) -#(rule $termquotationlist->[thm->tactic]->thm->tactic -# ($TOKEN) -# (SimpleLexiconFn (type "termquotationlist->[thm->tactic]->thm->tactic")) (anchored 1)) -##Commands to the interactive prove interface -#(rule $command -# ($TOKEN) -# (SimpleLexiconFn (type "command") (anchored 1))) -# -#(def @appT1T2 (lambda t1 (lambda t2 (call @app (var t1) (var t2))))) -# -## Applications -#(rule $tactic ($thm->tactic $thm) @appT1T2 (anchored 1)) -#(rule $tactic ($thmlist->tactic $thmlist) @appT1T2 (anchored 1)) -#(rule $tactic ($tactic->tactic $tactic) @appT1T2 (anchored 1)) -#(rule $thm ($thm->thm $thm) @appT1T2 (anchored 1)) -#(rule $tactic ($termquotation->tactic $termquotation) @appT1T2 (anchored 1)) -#(rule $tactic->tactic ($int->tactic->tactic $int) @appT1T2 (anchored 1)) -#(rule $[thm->tactic]->tactic ($termquotation->[thm->tactic]->tactic $termquotation) @appT1T2 (anchored 1)) -#(rule $tactic ($[thm->tactic]->tactic $thm->tactic) @appT1T2 (anchored 1)) -#(rule $tactic ($termquotationlist->tactic $termquotationlist) @appT1T2 (anchored 1)) -#(rule $[thm->tactic]->thm->tactic ($termquotation->[thm->tactic]->thm->tactic $termquotation) @appT1T2 (anchored 1)) -#(rule $[thm->tactic]->thm->tactic ($termquotationlist->[thm->tactic]->thm->tactic $termquotationlist) @appT1T2 (anchored 1)) -#(rule $thm->tactic ($[thm->tactic]->thm->tactic $thm->tactic) @appT1T2 (anchored 1)) -# -## infix -#(rule $tactic -# ($termquotation $termquotation*tactic->tactic $tactic) -# (lambda q (lambda by (lambda t (call @op (var by) (var q) (var t))))) (anchored 1)) -# -## Typed wildcards -#(rule $tactic (\( $PHRASE : tactic \)) (IdentityFn) (anchored 1)) -#(rule $thm (\( $PHRASE : thm \)) (IdentityFn) (anchored 1)) -#(rule $thm->tactic (\( $PHRASE : thm->tactic \)) (IdentityFn) (anchored 1)) -#(rule $thmlist->tactic (\( $PHRASE : thm list -> tactic \)) (IdentityFn) (anchored 1)) -#(rule $tactic->tactic (\( $PHRASE : tactic -> tactic \)) (IdentityFn) (anchored 1)) -#(rule $thm->thm (\( $PHRASE : thm -> thm \)) (IdentityFn) (anchored 1)) -#(rule $termquotation->tactic (\( $PHRASE : term quotation -> tactic \)) (IdentityFn) (anchored 1)) -#(rule $int->tactic->tactic (\( $PHRASE : int -> tactic -> tactic \)) (IdentityFn) (anchored 1)) -#(rule $termquotation->[thm->tactic]->tactic (\( $PHRASE : term quotation -> \( thm -> tactic \) -> tactic \)) (IdentityFn) (anchored 1)) -#(rule $[thm->tactic]->tactic (\( $PHRASE : \( thm -> tactic \) -> tactic \)) (IdentityFn) (anchored 1)) -#(rule $termquotation*tactic->tactic (\( $PHRASE : term quotation * tactic -> tactic \)) (IdentityFn) (anchored 1)) -#(rule $termquotationlist->tactic (\( $PHRASE : term quotation list -> tactic \)) (IdentityFn) (anchored 1)) -#(rule $termquotation->[thm->tactic]->thm->tactic (\( $PHRASE : term quotation -> \( thm -> tactic \) -> thm -> tactic \)) (IdentityFn) (anchored 1)) -#(rule $termquotationlist->[thm->tactic]->thm->tactic (\( $PHRASE : term quotation list -> \( thm -> tactic \) -> thm -> tactic \)) (IdentityFn) (anchored 1)) -# -## Tactic composition -#(rule $tactic ($tactic THEN $tactic) (lambda t1 (lambda t2 (call @then (var t1) (var t2)))) (anchored 1)) -#(rule $tactic ($tactic THEN1 $tactic) (lambda t1 (lambda t2 (call @then1 (var t1) (var t2)))) (anchored 1)) -#(rule $tactic ($tactic \\ $tactic) (lambda t1 (lambda t2 (call @then (var t1) (var t2)))) (anchored 1)) -#(rule $tactic ($tactic >- $tactic) (lambda t1 (lambda t2 (call @then1 (var t1) (var t2)))) (anchored 1)) -# -## Lists -#(rule $Thms ($thm) (IdentityFn) (anchored 1)) -#(rule $Thms ($thm , $Thms) (lambda t1 (lambda t2 (call @cons (var t1) (var t2)))) (anchored 1)) -#(rule $thmlist ([ $Thms ]) (lambda thms (call @list (var thms))) (anchored 1)) -#(rule $thmlist ($Thms) (lambda thms (call @list (var thms))) (anchored 1)) -#(rule $thmlist ([ ]) (ConstantFn []) (anchored 1)) -# -#(rule $Terms ($termquotation) (IdentityFn) (anchored 1)) -#(rule $Terms ($termquotation , $Terms) (lambda t1 (lambda t2 (call @cons (var t1) (var t2)))) (anchored 1)) -#(rule $termquotationlist ([ $Terms ]) (lambda termquotations (call @list (var termquotations))) (anchored 1)) -#(rule $termquotationlist ($Terms) (lambda termquotations (call @list (var termquotations))) (anchored 1)) -#(rule $termquotationlist ([ ]) (ConstantFn []) (anchored 1)) -# -## Other -#(rule $termquotation (` $PHRASE `) (lambda e (call @quote (var e))) (anchored 1)) -#(rule $Number ($TOKEN) (NumberFn) (anchored 1)) -#(rule $int ($Number) (lambda n (call @int2string (var n))) (anchored 1)) -# -# -############################## -## NATURAL LANGUAGE SYNONYMS # -############################## -# -## Theorem Lists -#(rule $Thms ($Thms and $Thms) (lambda t1 (lambda t2 (call @cons (var t1) (var t2)))) (anchored 1)) -#(rule $thmlist (nothing) (ConstantFn []) (anchored 1)) -#(rule $thmlist (empty list) (ConstantFn []) (anchored 1)) -# -## Tactic composition -#(rule $tactic ($tactic then $tactic) (lambda t1 (lambda t2 (call @then (var t1) (var t2)))) (anchored 1)) -#(rule $tactic ($tactic then $tactic on the first goal) (lambda t1 (lambda t2 (call @then1 (var t1) (var t2)))) (anchored 1)) -# -# -################################################################ -## GRAMMAR SUPPORTING ABSTRACT DESCRIPTIONS OF HOL4 COMPONENTS # -################################################################ -# -## Sets and their intersections, by constructing imperative sentences -# -## Lexemes -#(rule $type_lx ($PHRASE) (SimpleLexiconFn (type type)) (anchored 1)) -#(rule $name_lx ($PHRASE) (SimpleLexiconFn (type name)) (anchored 1)) -#(rule $AV_lx ($PHRASE) (SimpleLexiconFn (type AV)) (anchored 1)) -#(rule $VP_lx ($PHRASE) (SimpleLexiconFn (type VP)) (anchored 1)) -#(rule $OBJ_lx ($PHRASE) (SimpleLexiconFn (type OBJ)) (anchored 1)) -#(rule $CP_lx ($PHRASE) (SimpleLexiconFn (type CP)) (anchored 1)) -#(rule $PREARG_lx ($PHRASE) (SimpleLexiconFn (type PREARG)) (anchored 1)) -# -#(rule $set_lx ($PHRASE) (SimpleLexiconFn (type set)) (anchored 1)) -# -## Get sets -#(def @fromFeatureX (lambda x (call @fromFeature (var x)))) -#(rule $type ($type_lx) @fromFeatureX (anchored 1)) -#(rule $name ($name_lx) @fromFeatureX (anchored 1)) -#(rule $AV ($AV_lx) @fromFeatureX (anchored 1)) -#(rule $VP ($VP_lx) @fromFeatureX (anchored 1)) -#(rule $OBJ ($OBJ_lx) @fromFeatureX (anchored 1)) -#(rule $CP ($CP_lx) @fromFeatureX (anchored 1)) -#(rule $PREARG ($PREARG_lx) @fromFeatureX (anchored 1)) -# -#(rule $set ($set_lx) @fromFeatureX (anchored 1)) -# -## Syntactically correct intersections -#(def @intersectS1S2 (lambda s1 (lambda s2 (call @intersect (var s1) (var s2))))) -#(rule $VP ($AV $VP) @intersectS1S2 (anchored 1)) -#(rule $VP ($VP $OBJ) @intersectS1S2 (anchored 1)) -#(rule $VP ($VP $CP) @intersectS1S2 (anchored 1)) -#(rule $VP ($VP $AV) @intersectS1S2 (anchored 1)) -# -#(for @p (on with the) -# (rule $Prep (@p) (ConstantFn null) (anchored 1))) -#(rule $set ($set $set) @intersectS1S2 (anchored 1)) -# -#(for @a (use apply) -# (rule $Apply (@a) (ConstantFn null) (anchored 1))) -# -#(rule $VP' ($Apply $name) (SelectFn 1) (anchored 1)) -#(rule $VP' ($name) (IdentityFn) (anchored 1)) -#(rule $VP' ($VP' $type) @intersectS1S2 (anchored 1)) -#(rule $VP' ($type $VP') @intersectS1S2 (anchored 1)) -#(rule $VP' ($VP) (IdentityFn) (anchored 1)) -#(rule $VP' ($VP' $PREARG) @intersectS1S2 (anchored 1)) -#(rule $VP' ($VP' with) (SelectFn 0) (anchored 1)) -# -## Collapsing sets to single components -#(rule $tactic' ($VP') (lambda s (call @choice (call @intersect (var s) (call @fromFeature "type.tactic")))) (anchored 1)) -#(rule $tactic ($tactic') (interactive.lassie.ChoiceFn) (anchored 1)) -# -#(rule $thm->tactic' ($VP') (lambda s (call @choice (call @intersect (var s) (call @fromFeature "type.thm -> tactic")))) (anchored 1)) -#(rule $thm->tactic ($thm->tactic') (interactive.lassie.ChoiceFn) (anchored 1)) -# -#(rule $thmlist->tactic' ($VP') (lambda s (call @choice (call @intersect (var s) (call @fromFeature "type.thm list -> tactic")))) (anchored 1)) -#(rule $thmlist->tactic ($thmlist->tactic') (interactive.lassie.ChoiceFn) (anchored 1)) -# -#(rule $tactic->tactic' ($VP') (lambda s (call @choice (call @intersect (var s) (call @fromFeature "type.tactic -> tactic")))) (anchored 1)) -#(rule $tactic->tactic ($tactic->tactic') (interactive.lassie.ChoiceFn) (anchored 1)) -# -#(rule $thm->thm' ($VP') (lambda s (call @choice (call @intersect (var s) (call @fromFeature "type.thm -> thm")))) (anchored 1)) -#(rule $thm->thm ($thm->thm') (interactive.lassie.ChoiceFn) (anchored 1)) -# -#(rule $termquotation->tactic' ($VP') (lambda s (call @choice (call @intersect (var s) (call @fromFeature "type.term quotation -> tactic")))) (anchored 1)) -#(rule $termquotation->tactic ($termquotation->tactic') (interactive.lassie.ChoiceFn) (anchored 1)) -# -#(rule $int->tactic->tactic' ($VP') (lambda s (call @choice (call @intersect (var s) (call @fromFeature "type.int -> tactic -> tactic")))) (anchored 1)) -#(rule $int->tactic->tactic ($int->tactic->tactic') (interactive.lassie.ChoiceFn) (anchored 1)) -# -# -## Theorems -#(rule $thm' ($set) (lambda s (call @choice (call @intersect (var s) (call @fromFeature "type.thm")))) (anchored 1)) -#(rule $thm ($thm') (interactive.lassie.ChoiceFn) (anchored 1)) -# -## Casting sets as lists -#(rule $Thms (all $set theorems) -# (lambda s (call @set2string (call @intersect (var s) (call @fromFeature "type.thm")))) -# (anchored 1)) -#(rule $Thms ($set theorems) -# (lambda s (call @set2string (call @intersect (var s) (call @fromFeature "type.thm")))) -# (anchored 1)) -#(rule $Thms (all $set) -# (lambda s (call @set2string (call @intersect (var s) (call @fromFeature "type.thm")))) -# (anchored 1)) -# -# -############## -## OPTIONALS # -############## -## App -#(for @cat ($thm->tactic $thmlist->tactic $tactic->tactic $thm->thm $termquotation->tactic $int->tactic->tactic) -# (rule @cat ($Apply @cat) (SelectFn 1) (anchored 1))) -# -## Args -#(for @cat ($tactic $thm $thmlist $termquotation) -# (rule @cat ($Prep @cat) (SelectFn 1) (anchored 1))) diff --git a/examples/lassie/sempre/interactive/lassie.grammar_v2.1 b/examples/lassie/sempre/interactive/lassie.grammar_v2.1 deleted file mode 100644 index b8f258b3b4..0000000000 --- a/examples/lassie/sempre/interactive/lassie.grammar_v2.1 +++ /dev/null @@ -1,340 +0,0 @@ - ############################################################################ - # GRAMMAR FOR NATURAL PROOF EXPRESSIONS # - # # - # Refer to SEMPRE's documentation for general indications on rule # - # construction. # - # # - # Currently, domain knowledge comes from two sources. The lassie.lexicon # - # file contains component names (e.g. fs) and with their types # - # (e.g. thmlist->tactic, used for sound applications). Features of those # - # components (e.g. their natural name, their class/type) are read from # - # lassie.db into the TacticWorld. # - # # - # TacticWorld.java holds the main semantics of Lassie's operations, as # - # we piggy back on the DALExecutor for handling the semantic part of # - # this grammar. DALExecutor interprets semantic expression in a "world" # - # containing "items". We superifcially follow this convention where HOL # - # components can be considered the "items" of our "tactic-world". # - # # - # Generally, lowercased categories (e.g. $thm, $name) correspond to # - # types as found in the lexicon/database. Categories which are # - # capitalized are intermediates between lowercased categories and the # - # $tactic category. # - ############################################################################ - -########################################### -# Incorporated SML types: # -########################################### -# $tactic -# $thm -# ($thmlist) -# $thm->tactic -# $thmlist->tactic -# $tactic->tactic -# $thm->thm -# $termquotation->tactic -# $int->tactic->tactic -# $termquotation->[thm->tactic]->tactic -# $[thm->tactic]->tactic -# $termquotation*tactic->tactic -# $termquotationlist->tactic -# $termquotation->[thm->tactic]->thm->tactic -# $termquotationlist->[thm->tactic]->thm->tactic - - ################################################################ - # Define some abbreviations for calling into library functions # - ################################################################ -(def @int2string edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.int2string) -(def @app edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.app) -(def @infix edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.infix) -(def @then edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.then) -(def @then1 edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.then1) -(def @cons edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.cons) -(def @list edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.list) -(def @quote edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.quote) -(def @parens edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.parens) -(def @op edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.op) -(def @fromFeature edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.fromFeature) -(def @intersect edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.intersect) -(def @set2string edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.set2string) -(def @choice edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.choice) -(def @tactic edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.tactic) -(def @command edu.stanford.nlp.sempre.interactive.lassie.TacticWorld.command) - -(def @ChoiceFn edu.stanford.nlp.sempre.interactive.lassie.ChoiceFn) - -(def @appT1T2 (lambda t1 (lambda t2 (call @app (var t1) (var t2))))) -(def @infixT1T2T3 (lambda t1 (lambda t2 (lambda t3 (call @op (var t2) (var t1) (var t3)))))) - -################################### -# GRAMMAR SUPPORTING LITERAL HOL4 # -################################### - -(rule $ROOT ($tactic) (IdentityFn) (anchored 1)) - -############################################################### -## literal SML objects, looked up from the database # -############################################################### -## tactics -(rule $tactic - ($TOKEN) - (call @tactic (SimpleLexiconFn (type "tactic"))) (anchored 1)) - -# The root is always a tactic or a command -#(rule $ROOT ($tactic) (IdentityFn) (anchored 1)) -#(rule $ROOT ($command) (lambda c (call @command (var c))) (anchored 1)) -# -## Tactics can be produced by combining different constructs -#(for @category -# (($thm->tactic $thm) ($tactic->tactic $tactic) ($thmlist->tactic $thmlist) -# ($termquotation->tactic $termquotation) -# ($[thm->tactic]->tactic $thm->tactic)) -# (rule $tactic @category @appT1T2 (anchored 1))) -#(rule $tactic ($termquotation $termquotation*tactic->tactic $tactic) @infixT1T2T3 (anchored 1)) -## Support for inline THEN -#(rule $tactic ($tactic $tactic->tactic->tactic $tactic) @infixT1T2T3 (anchored 1)) -# -## Partial applications for things like qpat_x_assum or first_x_assum -#(rule $[thm->tactic]->tactic -# ($termquotation->[thm->tactic]->tactic $termquotation) @appT1T2 (anchored 1)) -#(rule $[thm->tactic]->thm->tactic -# ($termquotation->[thm->tactic]->thm->tactic $termquotation) @appT1T2 (anchored 1)) -#(rule $[thm->tactic]->thm->tactic -# ($termquotationlist->[thm->tactic]->thm->tactic $termquotationlist) @appT1T2 (anchored 1)) -#(rule $thm->tactic ($[thm->tactic]->thm->tactic $thm->tactic) @appT1T2 (anchored 1)) -#(rule $tactic->tactic ($int->tactic->tactic $int) @appT1T2 (anchored 1)) -# -## We can put parentheses around a tactic or a command -#(for @cat ($ROOT $command $tactic) -# (rule @cat (\( @cat \)) (lambda t (call @parens (var t))) (anchored 1))) -# -############################################################### -## literal SML objects, looked up from the database # -############################################################### -## tactic modifiers -#(rule $tactic->tactic -# ($TOKEN) -# (SimpleLexiconFn (type "tactic->tactic")) (anchored 1)) -## thm tactics -#(rule $thm->tactic -# ($TOKEN) -# (SimpleLexiconFn (type "thm->tactic") (anchored 1))) -## thm list tactics: -#(rule $thmlist->tactic -# ($TOKEN) -# (SimpleLexiconFn (type "thmlist->tactic") (anchored 1))) -## tactic combinators -#(rule $tactic->tactic->tactic -# ($TOKEN) -# (SimpleLexiconFn (type "tactic->tactic->tactic") (anchored 1))) -## term tactics -#(rule $termquotation->tactic -# ($TOKEN) -# (SimpleLexiconFn (type "termquotation->tactic") (anchored 1))) -## first_x_assum, last_x_assum, ... -#(rule $[thm->tactic]->tactic -# ($TOKEN) -# (SimpleLexiconFn (type "[thm->tactic]->tactic") (anchored 1))) -## qspec -#(rule $termquotation->[thm->tactic]->thm->tactic -# ($TOKEN) -# (SimpleLexiconFn (type "termquotation->[thm->tactic]->thm->tactic") (anchored 1))) -## qspecl -#(rule $termquotationlist->[thm->tactic]->thm->tactic -# ($TOKEN) -# (SimpleLexiconFn (type "termquotationlist->[thm->tactic]->thm->tactic") (anchored 1))) -## qpat_x_assum, qpat_assum, ... -#(rule $termquotation->[thm->tactic]->tactic -# ($TOKEN) -# (SimpleLexiconFn (type "termquotation->[thm->tactic]->tactic")) (anchored 1)) -## ntac -#(rule $int->tactic->tactic -# ($TOKEN) -# (SimpleLexiconFn (type "int->tactic->tactic")) (anchored 1)) -#(rule $termquotation*tactic->tactic -# ($TOKEN) -# (SimpleLexiconFn (type "termquotation*tactic->tactic")) (anchored 1)) -#(rule $thm->thm -# ($TOKEN) -# (SimpleLexiconFn (type "thm->thm")) (anchored 1)) -# -##### HOL4 Terms -#(rule $termquotation (` $term ') (lambda e (call @quote (var e))) (anchored 1)) -#(rule $termquotation (' $term ') (lambda e (call @quote (var e))) (anchored 1)) -#(rule $term ($PHRASE) (IdentityFn) (anchored 1)) -#(rule $term ($term and $term) (lambda t1 (lambda t2 (call @op (string "∧") (var t1) (var t2)))) (anchored 1)) -#(rule $term ($term or $term) (lambda t1 (lambda t2 (call @op (string "∨") (var t1) (var t2)))) (anchored 1)) -# -#(rule $Terms ($termquotation) (IdentityFn) (anchored 1)) -#(rule $Terms ($termquotation , $Terms) (lambda t1 (lambda t2 (call @cons (var t1) (var t2)))) (anchored 1)) -#(rule $termquotationlist ([ $Terms ]) (lambda termquotations (call @list (var termquotations))) (anchored 1)) -#(rule $termquotationlist ([ ]) (ConstantFn (call @list (string " "))) (anchored 1)) -#(rule $termquotationlist ([]) (ConstantFn (call @list (string " "))) (anchored 1)) -# -##### HOL4 Theorems -#(rule $thm ($thm->thm $thm) @appT1T2 (anchored 1)) -#(rule $thm ($TOKEN) (IdentityFn) (anchored 1)) -# -### Lists -#(rule $Thms ($thm) (IdentityFn) (anchored 1)) -#(rule $Thms ($thm , $Thms) (lambda t1 (lambda t2 (call @cons (var t1) (var t2)))) (anchored 1)) -#(rule $thmlist ([ $Thms ]) (lambda thms (call @list (var thms))) (anchored 1)) -#(rule $thmlist ([]) (ConstantFn (call @list (string " "))) (anchored 1)) -#(rule $thmlist ([ ]) (ConstantFn (call @list (string " "))) (anchored 1)) -# -# -###Commands to the interactive prove interface -#(rule $command -# (back) -# (ConstantFn (string "(b)")) (anchored 1)) -# -### Other -#(rule $Number ($TOKEN) (NumberFn) (anchored 1)) -#(rule $int ($Number) (lambda n (call @int2string (var n))) (anchored 1)) -# -################################################################# -### GRAMMAR SUPPORTING ABSTRACT DESCRIPTIONS OF HOL4 COMPONENTS # -################################################################# -# -### Sets and their intersections, by constructing imperative sentences -# -### Lexemes -##(rule $type_lx ($PHRASE) (SimpleLexiconFn (type type)) (anchored 1)) -##(rule $name_lx ($PHRASE) (SimpleLexiconFn (type name)) (anchored 1)) -##(rule $AV_lx ($PHRASE) (SimpleLexiconFn (type AV)) (anchored 1)) -##(rule $VP_lx ($PHRASE) (SimpleLexiconFn (type VP)) (anchored 1)) -##(rule $OBJ_lx ($PHRASE) (SimpleLexiconFn (type OBJ)) (anchored 1)) -##(rule $CP_lx ($PHRASE) (SimpleLexiconFn (type CP)) (anchored 1)) -##(rule $PREARG_lx ($PHRASE) (SimpleLexiconFn (type PREARG)) (anchored 1)) -## -##(rule $set_lx ($PHRASE) (SimpleLexiconFn (type set)) (anchored 1)) -## -#### Get sets -##(def @fromFeatureX (lambda x (call @fromFeature (var x)))) -##(rule $type ($type_lx) @fromFeatureX (anchored 1)) -##(rule $name ($name_lx) @fromFeatureX (anchored 1)) -##(rule $AV ($AV_lx) @fromFeatureX (anchored 1)) -##(rule $VP ($VP_lx) @fromFeatureX (anchored 1)) -##(rule $OBJ ($OBJ_lx) @fromFeatureX (anchored 1)) -##(rule $CP ($CP_lx) @fromFeatureX (anchored 1)) -##(rule $PREARG ($PREARG_lx) @fromFeatureX (anchored 1)) -## -##(rule $set ($set_lx) @fromFeatureX (anchored 1)) -## -#### Syntactically correct intersections -##(def @intersectS1S2 (lambda s1 (lambda s2 (call @intersect (var s1) (var s2))))) -##(rule $VP ($AV $VP) @intersectS1S2 (anchored 1)) -##(rule $VP ($VP $OBJ) @intersectS1S2 (anchored 1)) -##(rule $VP ($VP $CP) @intersectS1S2 (anchored 1)) -##(rule $VP ($VP $AV) @intersectS1S2 (anchored 1)) -## -##(for @p (on with the) -## (rule $Prep (@p) (ConstantFn null) (anchored 1))) -##(rule $set ($set $set) @intersectS1S2 (anchored 1)) -## -##(for @a (use apply) -## (rule $Apply (@a) (ConstantFn null) (anchored 1))) -## -##(rule $VP' ($Apply $name) (SelectFn 1) (anchored 1)) -##(rule $VP' ($name) (IdentityFn) (anchored 1)) -##(rule $VP' ($VP' $type) @intersectS1S2 (anchored 1)) -##(rule $VP' ($type $VP') @intersectS1S2 (anchored 1)) -##(rule $VP' ($VP) (IdentityFn) (anchored 1)) -##(rule $VP' ($VP' $PREARG) @intersectS1S2 (anchored 1)) -##(rule $VP' ($VP' with) (SelectFn 0) (anchored 1)) -## -#### Collapsing sets to single components -##(rule $tactic' ($VP') (lambda s (call @choice (call @intersect (var s) (call @fromFeature "type.tactic")))) (anchored 1)) -##(rule $tactic ($tactic') (interactive.lassie.ChoiceFn) (anchored 1)) -## -##(rule $thm->tactic' ($VP') (lambda s (call @choice (call @intersect (var s) (call @fromFeature "type.thm -> tactic")))) (anchored 1)) -##(rule $thm->tactic ($thm->tactic') (interactive.lassie.ChoiceFn) (anchored 1)) -## -##(rule $thmlist->tactic' ($VP') (lambda s (call @choice (call @intersect (var s) (call @fromFeature "type.thm list -> tactic")))) (anchored 1)) -##(rule $thmlist->tactic ($thmlist->tactic') (interactive.lassie.ChoiceFn) (anchored 1)) -## -##(rule $tactic->tactic' ($VP') (lambda s (call @choice (call @intersect (var s) (call @fromFeature "type.tactic -> tactic")))) (anchored 1)) -##(rule $tactic->tactic ($tactic->tactic') (interactive.lassie.ChoiceFn) (anchored 1)) -## -##(rule $thm->thm' ($VP') (lambda s (call @choice (call @intersect (var s) (call @fromFeature "type.thm -> thm")))) (anchored 1)) -##(rule $thm->thm ($thm->thm') (interactive.lassie.ChoiceFn) (anchored 1)) -## -##(rule $termquotation->tactic' ($VP') (lambda s (call @choice (call @intersect (var s) (call @fromFeature "type.term quotation -> tactic")))) (anchored 1)) -##(rule $termquotation->tactic ($termquotation->tactic') (interactive.lassie.ChoiceFn) (anchored 1)) -## -##(rule $int->tactic->tactic' ($VP') (lambda s (call @choice (call @intersect (var s) (call @fromFeature "type.int -> tactic -> tactic")))) (anchored 1)) -##(rule $int->tactic->tactic ($int->tactic->tactic') (interactive.lassie.ChoiceFn) (anchored 1)) -# -# -### Theorems -##(rule $thm' ($set) (lambda s (call @choice (call @intersect (var s) (call @fromFeature "type.thm")))) (anchored 1)) -##(rule $thm ($thm') (interactive.lassie.ChoiceFn) (anchored 1)) -## -### Casting sets as lists -##(rule $Thms (all $set theorems) -## (lambda s (call @set2string (call @intersect (var s) (call @fromFeature "type.thm")))) -## (anchored 1)) -##(rule $Thms ($set theorems) -## (lambda s (call @set2string (call @intersect (var s) (call @fromFeature "type.thm")))) -## (anchored 1)) -##(rule $Thms (all $set) -## (lambda s (call @set2string (call @intersect (var s) (call @fromFeature "type.thm")))) -## (anchored 1)) -# -### Typed wildcards -##(rule $tactic (\( $PHRASE : tactic \)) (IdentityFn) (anchored 1)) -##(rule $thm (\( $PHRASE : thm \)) (IdentityFn) (anchored 1)) -##(rule $thm->tactic (\( $PHRASE : thm->tactic \)) (IdentityFn) (anchored 1)) -##(rule $thmlist->tactic (\( $PHRASE : thm list -> tactic \)) (IdentityFn) (anchored 1)) -##(rule $tactic->tactic (\( $PHRASE : tactic -> tactic \)) (IdentityFn) (anchored 1)) -##(rule $thm->thm (\( $PHRASE : thm -> thm \)) (IdentityFn) (anchored 1)) -##(rule $termquotation->tactic (\( $PHRASE : term quotation -> tactic \)) (IdentityFn) (anchored 1)) -##(rule $int->tactic->tactic (\( $PHRASE : int -> tactic -> tactic \)) (IdentityFn) (anchored 1)) -##(rule $termquotation->[thm->tactic]->tactic (\( $PHRASE : term quotation -> \( thm -> tactic \) -> tactic \)) (IdentityFn) (anchored 1)) -##(rule $[thm->tactic]->tactic (\( $PHRASE : \( thm -> tactic \) -> tactic \)) (IdentityFn) (anchored 1)) -##(rule $termquotation*tactic->tactic (\( $PHRASE : term quotation * tactic -> tactic \)) (IdentityFn) (anchored 1)) -##(rule $termquotationlist->tactic (\( $PHRASE : term quotation list -> tactic \)) (IdentityFn) (anchored 1)) -##(rule $termquotation->[thm->tactic]->thm->tactic (\( $PHRASE : term quotation -> \( thm -> tactic \) -> thm -> tactic \)) (IdentityFn) (anchored 1)) -##(rule $termquotationlist->[thm->tactic]->thm->tactic (\( $PHRASE : term quotation list -> \( thm -> tactic \) -> thm -> tactic \)) (IdentityFn) (anchored 1)) -## -############################### -### NATURAL LANGUAGE SYNONYMS # -############################### -## -### Theorem Lists -##(rule $Thms ($Thms and $Thms) (lambda t1 (lambda t2 (call @cons (var t1) (var t2)))) (anchored 1)) -##(rule $thmlist (nothing) (ConstantFn []) (anchored 1)) -##(rule $thmlist (empty list) (ConstantFn []) (anchored 1)) -## -### Tactic composition -##(rule $tactic ($tactic then $tactic) (lambda t1 (lambda t2 (call @then (var t1) (var t2)))) (anchored 1)) -##(rule $tactic ($tactic then $tactic on the first goal) (lambda t1 (lambda t2 (call @then1 (var t1) (var t2)))) (anchored 1)) -## -############### -### OPTIONALS # -############### -### App -##(for @cat ($thm->tactic $thmlist->tactic $tactic->tactic $thm->thm $termquotation->tactic $int->tactic->tactic) -## (rule @cat ($Apply @cat) (SelectFn 1) (anchored 1))) -## -### Args -##(for @cat ($tactic $thm $thmlist $termquotation) -## (rule @cat ($Prep @cat) (SelectFn 1) (anchored 1))) -# -############ UNUSED ########### -##(rule $tactic->tactic ($int->tactic->tactic $int) @appT1T2 (anchored 1)) -##(rule $[thm->tactic]->tactic ($termquotation->[thm->tactic]->tactic $termquotation) @appT1T2 (anchored 1)) -##(rule $termquotation (` $PHRASE `) (lambda e (call @quote (var e))) (anchored 1)) -## TODO: Necessary? -##(rule $thm -## ($TOKEN) -## (SimpleLexiconFn (type "thm")) (anchored 1)) -##Disabled to make learning easier... See test on "repeat" in LassieTests -##(rule $thmlist ($Thms) (lambda thms (call @list (var thms))) (anchored 1)) -##(rule $termquotationlist ($Terms) (lambda termquotations (call @list (var termquotations))) (anchored 1)) -### Tactic composition -##(rule $tactic ($tactic THEN $tactic) (lambda t1 (lambda t2 (call @then (var t1) (var t2)))) (anchored 1)) -##(rule $tactic ($tactic THEN1 $tactic) (lambda t1 (lambda t2 (call @then1 (var t1) (var t2)))) (anchored 1)) -##(rule $tactic ($tactic \\ $tactic) (lambda t1 (lambda t2 (call @then (var t1) (var t2)))) (anchored 1)) -##(rule $tactic ($tactic >- $tactic) (lambda t1 (lambda t2 (call @then1 (var t1) (var t2)))) (anchored 1)) -## diff --git a/examples/lassie/sempre/interactive/lassie.synonyms b/examples/lassie/sempre/interactive/lassie.synonyms deleted file mode 100644 index 4cbe426ad0..0000000000 --- a/examples/lassie/sempre/interactive/lassie.synonyms +++ /dev/null @@ -1,2 +0,0 @@ -{"lexeme":"theorem","formula":"type.thm","type":"type"} -{"lexeme":"add","formula":"name.addition","type":"name"} \ No newline at end of file diff --git a/examples/lassie/sempre/interactive/run b/examples/lassie/sempre/interactive/run deleted file mode 100755 index 6be2ac07bc..0000000000 --- a/examples/lassie/sempre/interactive/run +++ /dev/null @@ -1,314 +0,0 @@ -#!/usr/bin/env ruby - -# This is the main entry point for running SHRDLURN. See -# fig/lib/execrunner.rb for more documentation for how commands are generated. -# There are a bunch of modes that this script can be invoked with, which -# loosely correspond to the modules. - -$: << 'fig/lib' -require 'execrunner' -$optPrefix = '-' -$path = 'interactive' -$output = 'interactive/output' -$modes = [] -def addMode(name, description, func) - $modes << [name, description, func] -end - -def codalab(dependencies=nil) - # Set @cl=1 to run job on CodaLab - dependencies ||= l(':fig', ':lib', ':module-classes.txt', ':libsempre') - l( - letDefault(:cl, 0), - sel(:cl, - l(), - l('cl', 'run', dependencies, '---', 'LC_ALL=C.UTF-8'), - nil), - nil) -end - -def header(modules='core', codalabDependencies=nil) - l( - codalab(codalabDependencies), - # Queuing system - letDefault(:q, 0), sel(:q, l(), l('fig/bin/q', '-shareWorkingPath', o('mem', '5g'), o('memGrace', 10), '-add', '---')), - # Create execution directory - letDefault(:pooldir, 1), - sel(:pooldir, l(), 'fig/bin/qcreate'), - # Run the Java command... - 'java', - '-ea', - '-Dmodules='+modules, - # Memory size; set to low to work with HOL regression server - letDefault(:memsize, 'low'), - sel(:memsize, { - 'tiny' => l('-Xms2G', '-Xmx4G'), - 'low' => l('-Xms5G', '-Xmx7G'), - 'default' => l('-Xms8G', '-Xmx10G'), - 'medium' => l('-Xms12G', '-Xmx14G'), - 'high' => l('-Xms20G', '-Xmx24G'), - 'higher' => l('-Xms40G', '-Xmx50G'), - 'impressive' => l('-Xms75G', '-Xmx90G'), - }), - # Classpath - '-cp', 'libsempre/*:lib/*', - # Profiling - letDefault(:prof, 0), sel(:prof, l(), '-Xrunhprof:cpu=samples,depth=100,file=_OUTPATH_/java.hprof.txt'), - # Debugging - letDefault(:debug, 0), sel(:debug, l(), l('-Xdebug', '-Xrunjdwp:server=y,transport=dt_socket,suspend=y,address=8898')), - nil) -end - -def figOpts; l(selo(:pooldir, 'execDir', 'exec', '_OUTPATH_'), o('overwriteExecDir'), o('addToView', 0), o('monitor', 'false')) end - -############################################################ -# Unit tests - -addMode('backup', 'small commands like run community server, backup, or simulator', lambda { |e| l( - lambda { |e| system 'echo "backing up with mv"'}, - lambda { |e| system 'mkdir -p ./int-backup/'}, - letDefault(:msg, 'backing up, no message'), - lambda { |e| l('echo ', :msg, '> ./int-backup/message')}, - lambda { |e| l('echo ', '`date +%Y-%m-%d.%H:%M:%S`', '>> ./int-backup/message')}, - lambda { |e| system 'mv int-output int-backup/`date +%Y-%m-%d.%H:%M:%S`'}, - lambda { |e| system 'mkdir -p ./int-output'}, -nil)}) - -addMode('backup-data', 'put community-server into trash with time stamp', lambda { |e| l( - lambda { |e| system 'echo "backing up data with mv"'}, - lambda { |e| system 'mkdir -p ./community-server/data-backup'}, - lambda { |e| system 'mv ./community-server/data ./community-server/data-backup/`date +%Y-%m-%d.%H:%M:%S`'}, -nil)}) - -addMode('trash', 'put int-output into trash with time stamp', lambda { |e| l( - lambda { |e| system 'echo "trashing int-output with time stamp"'}, - lambda { |e| system 'mv int-output int-output-trash-`date +%Y-%m-%d.%H:%M:%S`'}, - lambda { |e| system 'rm -rf int-output-trash-*'}, - lambda { |e| system 'mkdir -p ./int-output'}, -nil)}) - -addMode('test', 'Run unit tests for interactive stuff', lambda { |e| - l( - 'java', '-ea', '-Xmx12g', '-cp', 'libsempre/*:lib/*', - letDefault(:debug, 0), sel(:debug, l(), l('-Xdebug', '-Xrunjdwp:server=y,transport=dt_socket,suspend=y,address=8898')), - 'org.testng.TestNG', - lambda { |e| - if e[:class] - l('-testclass', 'edu.stanford.nlp.sempre.interactive.test.' + e[:class]) - else - "./#{$path}/testng.xml" - end - }, - nil) -}) - -addMode('simulator', 'run the simulator', lambda { |e| l( - # rlwrap, - header('core,interactive'), - 'edu.stanford.nlp.sempre.interactive.Simulator', - figOpts, - letDefault(:server, 'local'), - sel(:server, { - 'local' => o('serverURL', 'http://localhost:8410'), - 'remote' => o('serverURL', 'http://jonsson.stanford.edu:8410') - }), - # set to 0 to enable logging - o('numThreads', 1), - letDefault(:sandbox, 'full'), - sel(:sandbox, { - 'all' => o('reqParams', 'grammar=0\&cite=0\&learn=0\&logging=0'), - 'nolog' => o('reqParams', 'grammar=0\&cite=0\&learn=1\&logging=0'), - 'nolearn' => o('reqParams', 'grammar=1\&cite=1\&learn=0\&logging=0'), - 'none' => o('reqParams', 'grammar=1\&cite=1\&learn=1\&logging=0'), - 'nocite' => o('reqParams', 'grammar=1\&cite=0\&learn=1\&logging=0'), - }), - letDefault(:task, 'sidaw'), - sel(:task, { - 'freebuild' => o('logFiles', "./#{$path}/queries/freebuild.json.gz"), - 'freebuilddef' => o('logFiles', "./#{$path}/queries/freebuild.def.json.gz"), - - 'qual1' => o('logFiles', "./#{$path}/queries/rawqueries/qualifier1-0118.json.gz"), - 'qual2' => o('logFiles', "./#{$path}/queries/rawqueries/qualifier2-0129.json.gz"), - 'qual3' => o('logFiles', "./#{$path}/queries/rawqueries/qualifier3-0201.json.gz"), # both 2 and 3 - 'free1' => o('logFiles', "./#{$path}/queries/rawqueries/freebuild1-0121.json.gz"), - 'free2' => o('logFiles', "./#{$path}/queries/rawqueries/freebuild2-0127.json.gz"), - }), -nil)}) - -############################################################ -# {2016-07-02} [sidaw]: interactive semantic parsing -addMode('voxelurn', 'interactive semantic parsing in a VoxelWorld', lambda { |e| l( - #rlwrap, - header('core,interactive'), - 'edu.stanford.nlp.sempre.Main', - #figOpts, - o('server'), - o('masterType', 'edu.stanford.nlp.sempre.interactive.InteractiveMaster'), - o('Executor', 'interactive.DALExecutor'), - o('LanguageAnalyzer', 'interactive.DALAnalyzer'), - o('DALExecutor.convertNumberValues', true), - o('DALExecutor.printStackTrace', true), - o('VoxelWorld.maxBlocks', 100000), - selo(0, 'DALExecutor.worldType', 'VoxelWorld', 'CalendarWorld', 'Otherworld'), - selo(0, 'Grammar.inPaths', "./#{$path}/voxelurn.grammar"), - - o('Params.initWeightsRandomly', false), - o('Grammar.binarizeRules', false), - o('Grammar.useApplyFn', 'interactive.ApplyFn'), - - o('LanguageAnalyzer.lowerCaseTokens', true), - - o('Parser.pruneErrorValues', true), - o('Parser', 'interactive.InteractiveBeamParser'), - o('Parser.callSetEvaluation', false), - o('Parser.coarsePrune', true), - - o('Parser.beamSize', 50), - o('InteractiveBeamParser.maxNewTreesPerSpan', 5001), - o('ParserState.customExpectedCounts', 'None'), - - selo(0, 'InteractiveBeamParser.floatStrategy', 'Never', 'NoParse', 'Always'), - o('InteractiveBeamParser.trackedCats', 'Number', 'Numbers', 'Color', 'Direction', 'Set', 'Sets', 'Action', 'Actions'), - - o('Derivation.derivComparator', 'AnchorPriorityScoreComparator'), - o('Params.l1Reg', 'nonlazy'), - o('Params.l1RegCoeff', 0.0001), - - o('Params.initStepSize', 0.1), - o('Params.adaptiveStepSize', true), - #o('Params.stepSizeReduction', 0.25), - - o('FeatureExtractor.featureComputers', 'interactive.DALFeatureComputer'), - o('FeatureExtractor.featureDomains', ':rule', ':span', ':stats', ':scope', ':social', ':window'), - # o('FeatureExtractor.featureDomains', ':rule'), - - o('InteractiveMaster.intOutputPath', './int-output/'), - o('InteractiveMaster.onlyInteractive', true), - o('InteractiveUtils.citationPath', './int-output/citation/'), - - o('InteractiveMaster.useAligner', false), - o('InteractiveMaster.maxSequence', 20), - o('InteractiveMaster.maxChars', 200), - - o('DefinitionAligner.strategies', 'ExactExclusion'), - - o('InteractiveServer.numThreads', 16), - o('InteractiveServer.maxCandidates', 50), - o('InteractiveServer.queryLogPath', './int-output/query.log'), - o('InteractiveServer.responseLogPath', './int-output/response.log'), - o('InteractiveServer.port', 8410), - - o('GrammarInducer.useBestPacking', true), - o('GrammarInducer.useSimplePacking', true), - o('GrammarInducer.maxNonterminals', 3), - - o('Derivation.showTypes', false), - o('Derivation.showValues', false), - o('Derivation.showRules', false), - o('Derivation.anchoredBonus', 1.0), - - o('NumberFn.allowedRange', 0, 100), - o('SimpleLexicon.inPaths', "./#{$path}/csscolors.lexicon"), - - lambda { |e| system 'mkdir -p ./int-output/'; nil}, - lambda { |e| system 'mkdir -p ./int-output/log/'; nil}, - lambda { |e| system 'mkdir -p ./int-output/citation/'; nil}, -nil) }) - -############################################################ -# {2019-06-12} [nbos]: Natural Language Parsing for HOL4 -addMode('lassie', 'interactive semantic parsing for proving theorems in HOL4', lambda { |e| l( - #rlwrap, - header('core,interactive'), - 'edu.stanford.nlp.sempre.Main', - figOpts, - o('interactive'), - o('masterType', 'edu.stanford.nlp.sempre.interactive.InteractiveMaster'), - - o('Executor', 'JavaExecutor'), # Change executor for different semantics - o('JavaExecutor.convertNameValues', true), - - o('LanguageAnalyzer', 'interactive.DALAnalyzer'), - o('Grammar.inPaths', "./#{$path}/lassie.grammar"), - - o('InteractiveMaster.allowRegularCommands', true), - o('HOLOntology.dbPath', "./#{$path}/lassie.db"), - o('HOLOntology.lexPath', "./#{$path}/lassie.lexicon"), - # o('HOLOntology.seedGrammarPath', "./#{$path}/lassie.seed.grammar"), - # o('HOLOntology.genGrammarPath', "./#{$path}/lassie.generated.grammar"), - - o('Params.initWeightsRandomly', false), - o('Grammar.binarizeRules', false), - o('Grammar.useApplyFn', 'interactive.ApplyFn'), - - o('LanguageAnalyzer.lowerCaseTokens', false), - o('SimpleLexicon.lowerCaseTokens', false), - - o('Parser.pruneErrorValues', true), - o('Parser', 'interactive.InteractiveBeamParser'), - o('Parser.callSetEvaluation', false), - o('Parser.coarsePrune', true), - - o('Parser.beamSize', 100), - o('InteractiveBeamParser.maxNewTreesPerSpan', 5001), - o('ParserState.customExpectedCounts', 'None'), - - selo(0, 'InteractiveBeamParser.floatStrategy', 'Never', 'NoParse', 'Always'), - o('InteractiveBeamParser.trackedCats', 'Number', 'Numbers', 'Color', 'Direction', 'Set', 'Sets', 'Action', 'Actions'), - - o('Derivation.derivComparator', 'AnchorPriorityScoreComparator'), - o('Params.l1Reg', 'nonlazy'), - o('Params.l1RegCoeff', 0.0001), - - o('Params.initStepSize', 0.1), - o('Params.adaptiveStepSize', true), - #o('Params.stepSizeReduction', 0.25), - - # o('FeatureExtractor.featureComputers', 'interactive.DALFeatureComputer'), - # o('FeatureExtractor.featureDomains', ':rule', ':span', ':stats', ':scope', ':social', ':window'), - # o('FeatureExtractor.featureDomains', ':rule'), - - o('InteractiveMaster.intOutputPath', './int-output/'), - o('InteractiveMaster.onlyInteractive', true), - o('InteractiveUtils.citationPath', './int-output/citation/'), - - o('InteractiveMaster.useAligner', false), - o('InteractiveMaster.maxSequence', 20), - o('InteractiveMaster.maxChars', 200), - - o('DefinitionAligner.strategies', 'ExactExclusion'), - - o('GrammarInducer.useBestPacking', true), - o('GrammarInducer.useSimplePacking', false), - o('GrammarInducer.maxNonterminals', 5), # default 3 - o('GrammarInducer.minTerminals', 0), # default 1 - - o('Derivation.showTypes', false), - o('Derivation.showValues', false), - o('Derivation.showRules', false), - o('Derivation.anchoredBonus', 1.0), - - o('NumberFn.allowedRange', 0, 100), - o('SimpleLexicon.inPaths', "./#{$path}/lassie.lexicon", "./#{$path}/lassie.synonyms"), - - lambda { |e| system 'mkdir -p ./int-output/'; nil}, - lambda { |e| system 'mkdir -p ./int-output/log/'; nil}, - lambda { |e| system 'mkdir -p ./int-output/citation/'; nil}, -nil) }) -############################################################ - -if ARGV.size == 0 - puts "#{$0} @mode= [options]" - puts - puts 'This is the main entry point for all interactive related modes.' - puts "Modes:" - $modes.each { |name,description,func| - puts " #{name}: #{description}" - } -end - -modesMap = {} -$modes.each { |name,description,func| - modesMap[name] = func -} -run!(sel(:mode, modesMap)) diff --git a/examples/lassie/sempre/pull-dependencies b/examples/lassie/sempre/pull-dependencies deleted file mode 100755 index 0757939aec..0000000000 --- a/examples/lassie/sempre/pull-dependencies +++ /dev/null @@ -1,332 +0,0 @@ -#!/usr/bin/env ruby - -# SEMPRE depends on several library/data files into |lib|. Run this script to -# copy those dependencies to your local directory. This allows you to run -# SEMPRE from anywhere. This file consists of a set of modules (which loosely -# correspond to the code modules). -# -# The master copy of these dependencies are stored on the Stanford NLP machines. -# -# Usage: -# ./pull-dependencies ... -# -# For developers with ssh access to NLP machines, there are two more local commands: -# - Copy or link |sourcePath| into lib/|dir|. -# ./pull-dependencies -l ... -# - Deploy the dependencies to the NLP machines's public www. -# ./pull-dependencies -l -r ... - -# Specify the version of the dependencies -# (To developer: Update this before releasing a new version!) -$version = '2.0' - -$isLocal = ARGV.index('-l') -$isRelease = ARGV.index('-r') -if $isRelease and not $isLocal - puts "ERROR: To release, must use both -l and -r" - exit 1 -end -ARGV.delete_if { |x| x == '-l' or x == '-r' } - -def isZip(name) - # Directories are zipped - name.end_with?('.exec') or name !~ /\./ -end - -def pull(sourcePath, dir=nil, opts={}) - puts sourcePath - destDir = 'lib' + (dir ? '/' + dir : '') - system "mkdir -p #{destDir}" - - name = File.basename(sourcePath) - ext = isZip(name) ? '.zip' : '' - - if not $isLocal and not $isRelease - # Download url => localPath - if sourcePath.start_with?('http://') || sourcePath.start_with?('https://') - url = sourcePath - else - url = 'http://nlp.stanford.edu/software/sempre/dependencies-' + $version + sourcePath + ext - end - localPath = destDir + '/' + name + ext - system "mkdir -p #{File.dirname(localPath)}" or exit 1 - system "wget -c '#{url}' -O #{localPath}" or exit 1 - # Unzip localPath to destDir if it's a zip file - if isZip(name) - system "cd #{File.dirname(localPath)} && unzip #{File.basename(localPath)}" or exit 1 - system "rm #{localPath}" or exit 1 - end - else - rsyncOpts = '-rlptDzi' # Preserve everything except groups and permissions - if $isRelease - # Copy sourcePath to cluster - baseDeployPath = '/u/apache/htdocs/static/software/sempre/dependencies-' + $version - deployPath = baseDeployPath + sourcePath + ext - system "mkdir -p #{File.dirname(deployPath)}" or exit 1 - if File.exists?(sourcePath) - if isZip(name) - system "cd #{File.dirname(sourcePath)} && zip -r #{deployPath} #{File.basename(sourcePath)}" or exit 1 - else - if opts[:symlink] - system "ln -sf #{File.expand_path(sourcePath)} #{deployPath}" or exit 1 - else - system "rsync #{rsyncOpts} #{sourcePath} #{deployPath}" or exit 1 - end - end - else - system "rsync #{rsyncOpts} jamie.stanford.edu:#{sourcePath} #{deployPath}" or exit 1 - end - system "chmod -R og=u #{baseDeployPath}" #or exit 1 - else - # Download sourcePath from cluster to destDir - if File.exists?(sourcePath) - if opts[:symlink] - system "ln -sf #{File.expand_path(sourcePath)} #{destDir}" or exit 1 - else - system "rsync #{rsyncOpts} #{sourcePath} #{destDir}" or exit 1 - end - else - system "rsync #{rsyncOpts} jamie.stanford.edu:#{sourcePath} #{destDir}" or exit 1 - end - end - end -end - -# source: path to master git repository -def updateGit(source) - dir = File.basename(source.sub(/\.git$/, '')) - if File.exists?(dir) - system 'cd '+dir+' && git pull' or exit 1 - else - system 'git clone ' + source or exit 1 - end -end - -def downloadExec(path) - ['options.map', 'params', 'grammar'].each { |file| - if File.exists?(path+'/'+file) - pull(path+'/'+file, 'models/'+File.basename(path)) - end - } -end - -$modules = [] -def addModule(name, description, func) - $modules << [name, description, func] -end - -############################################################ - -addModule('core', 'Core utilities (need to compile)', lambda { - # fig: options parsing, experiment management, utils - updateGit('https://github.com/percyliang/fig') - system 'make -C fig' or exit 1 - system 'mkdir -p lib && cd lib && ln -sf ../fig/fig.jar' or exit 1 - - # Google libraries - pull('/u/nlp/data/semparse/resources/guava-14.0.1.jar') - - # TestNG -- testing framework - pull('/u/nlp/data/semparse/resources/testng-6.8.5.jar') - pull('/u/nlp/data/semparse/resources/jcommander-1.30.jar') - - # Checkstyle: make sure code looks fine - pull('/u/nlp/data/semparse/resources/checkstyle') - - # JSON - pull('/u/nlp/data/semparse/resources/jackson-core-2.2.0.jar') - pull('/u/nlp/data/semparse/resources/jackson-annotations-2.2.0.jar') - pull('/u/nlp/data/semparse/resources/jackson-databind-2.2.0.jar') - - # jLine from maven central - pull('https://repo1.maven.org/maven2/jline/jline/2.14.2/jline-2.14.2.jar') -}) - -addModule('corenlp', 'Stanford CoreNLP 3.6.0', lambda { - pull('/u/nlp/data/semparse/resources/stanford-corenlp-full-2015-12-09.zip', '', {:symlink => true}) - if not File.exists?('lib/stanford-corenlp-full-2015-12-09') - system "cd lib && unzip stanford-corenlp-full-2015-12-09.zip" or exit 1 - end - pull('/u/nlp/data/semparse/resources/stanford-corenlp-caseless-2015-04-20-models.jar', - 'stanford-corenlp-full-2015-12-09', {:symlink => true}) - # Remove old file (for backward compatibility) - if Dir.glob('lib/stanford-corenlp*.jar').any? - system 'rm -v lib/stanford-corenlp*.jar' or exit 1 - end - {'stanford-corenlp-3.6.0.jar' => 'stanford-corenlp.jar', - 'stanford-corenlp-3.6.0-models.jar' => 'stanford-corenlp-models.jar', - 'stanford-corenlp-caseless-2015-04-20-models.jar' => 'stanford-corenlp-caseless-models.jar', - 'joda-time.jar' => 'joda-time.jar', - 'jollyday.jar' => 'jollyday.jar', - 'ejml-0.23.jar' => 'ejml.jar', - 'slf4j-api.jar' => 'slf4j-api.jar', - 'slf4j-simple.jar' => 'slf4j-simple.jar', - }.each { |key, value| - system "ln -sfv stanford-corenlp-full-2015-12-09/#{key} lib/#{value}" or exit 1 - } -}) - -addModule('corenlp-3.2.0', 'Stanford CoreNLP 3.2.0 (for backward reproducibility)', lambda { - pull('/u/nlp/data/semparse/resources/stanford-corenlp-full-2013-06-20.zip', '', {:symlink => true}) - if not File.exists?('lib/stanford-corenlp-full-2013-06-20') - system "cd lib && unzip stanford-corenlp-full-2013-06-20.zip" or exit 1 - end - pull('/u/nlp/data/semparse/resources/stanford-corenlp-caseless-2013-06-07-models.jar', - 'stanford-corenlp-full-2013-06-20', {:symlink => true}) - # Remove old file (for backward compatibility) - if Dir.glob('lib/stanford-corenlp*.jar').any? - system 'rm -v lib/stanford-corenlp*.jar' or exit 1 - end - {'stanford-corenlp-3.2.0.jar' => 'stanford-corenlp.jar', - 'stanford-corenlp-3.2.0-models.jar' => 'stanford-corenlp-models.jar', - 'stanford-corenlp-caseless-2013-06-07-models.jar' => 'stanford-corenlp-caseless-models.jar', - 'joda-time.jar' => 'joda-time.jar', - 'jollyday.jar' => 'jollyday.jar' - }.each { |key, value| - system "ln -sfv stanford-corenlp-full-2013-06-20/#{key} lib/#{value}" or exit 1 - } -}) - -addModule('freebase', 'Freebase: need to construct Freebase schemas', lambda { - # Freebase schema - pull('/u/nlp/data/semparse/scr/freebase/state/execs/93.exec/schema2.ttl', 'fb_data/93.exec') - - # Lucene libraries - pull('/u/nlp/data/semparse/resources/lucene-core-4.4.0.jar') - pull('/u/nlp/data/semparse/resources/lucene-analyzers-common-4.4.0.jar') - pull('/u/nlp/data/semparse/resources/lucene-queryparser-4.4.0.jar') - - # Freebase data (for lexicon) - pull('/u/nlp/data/semparse/scr/fb_data/7', 'fb_data') - - # WebQuestions dataset - pull('/u/nlp/data/semparse/webquestions/dataset_11/webquestions.examples.train.json', 'data/webquestions/dataset_11') - pull('/u/nlp/data/semparse/webquestions/dataset_11/webquestions.examples.test.json', 'data/webquestions/dataset_11') -}) - - - -addModule('virtuoso', 'Virtuoso: if want to run own SPARQL server locally', lambda { - updateGit('https://github.com/openlink/virtuoso-opensource') - # Run this command to compile: - #system "cd virtuoso-opensource && ./autogen.sh && ./configure --prefix=$PWD/install && make && make install" or exit 1 -}) - -addModule('fullfreebase-ttl', 'Freebase (ttl file)', lambda { - # This is just for your reference. It is not directly used by SEMPRE. - pull('/u/nlp/data/semparse/scr/freebase/state/execs/93.exec/0.ttl.bz2', 'fb_data/93.exec', {:symlink => true}) -}) - -addModule('fullfreebase-vdb', 'Freebase (Virtuoso database)', lambda { - # Virtuoso index of 0.ttl above. This is read (and written) by Virtuoso. - pull('/u/nlp/data/semparse/scr/freebase/state/execs/93.exec/vdb.tar.bz2', 'fb_data/93.exec', {:symlink => true}) - # You need to unzip this yourself and move these files to the right place. -}) - -addModule('fullfreebase-types', 'Freebase types', lambda { - # Map from mid (e.g., fb:m.02mjmr) to id (e.g., fb:en.barack_obama) (used to link external things like Freebase API search with our internal Freebase) - pull('/u/nlp/data/semparse/scr/freebase/freebase-rdf-2013-06-09-00-00.canonical-id-map.gz', 'fb_data', {:symlink => true}) - # Map from id to types (used to do type inference on internal entities) - pull('/u/nlp/data/semparse/scr/freebase/freebase-rdf-2013-06-09-00-00.canonicalized.en-types.gz', 'fb_data', {:symlink => true}) - # You need to unzip these yourself and move these files to the right place. -}) - -addModule('tables', 'Semantic parsing with execution on tables', lambda { - # CSV reader - pull('/u/nlp/data/semparse/resources/opencsv-3.0.jar') -}) - -addModule('tables-data', 'WikiTableQuestions dataset v1.0.2', lambda { - # Compact version of the dataset - pull('https://github.com/ppasupat/WikiTableQuestions/releases/download/v1.0.2/WikiTableQuestions-1.0.2-compact.zip', 'data') - # Remove old file (for backward compatibility) - if File.directory?('lib/data/WikiTableQuestions') - system 'rm -rv lib/data/WikiTableQuestions' or exit 1 - end - system "cd lib/data && unzip WikiTableQuestions-1.0.2-compact.zip" or exit 1 -}) - -addModule('tables-data-0.5', 'WikiTableQuestions dataset v0.5 (for backward reproducibility)', lambda { - # Compact version of the dataset - pull('https://github.com/ppasupat/WikiTableQuestions/releases/download/v0.5/WikiTableQuestions-0.5-compact.zip', 'data') - # Remove old file (for backward compatibility) - if File.directory?('lib/data/WikiTableQuestions') - system 'rm -rv lib/data/WikiTableQuestions' or exit 1 - end - system "cd lib/data && unzip WikiTableQuestions-0.5-compact.zip" or exit 1 -}) - -addModule('tables-cprune', 'Neighbor information for applying macro grammar on tables', lambda { - pull('/u/nlp/data/semparse/cprune/nn_0.zip', 'data/nn_0', {:symlink => true}) - system "cd lib/data/nn_0 && unzip nn_0.zip" or exit 1 -}) - -addModule('overnight', 'Creating a parser for multiple domains', lambda { - # Geo evaluation - pull('/u/nlp/data/semparse/overnight/geo880.db', 'data/overnight/', {:symlink => true}) - pull('/u/nlp/data/semparse/overnight/geo880/geo880-train.examples', 'data/overnight/', {:symlink => true}) - pull('/u/nlp/data/semparse/overnight/geo880/geo880-test.examples', 'data/overnight/', {:symlink => true}) - - - # Cache for turking - pull('/u/nlp/data/semparse/overnight/cache/', 'data/overnight/', {:symlink => true}) - pull('/u/nlp/data/semparse/overnight/layouts/', 'data/overnight/', {:symlink => true}) - - # Pull testing code - pull('/u/nlp/data/semparse/overnight/test/', 'data/overnight/', {:symlink => true}) - - # Pull geo880 - pull('/u/nlp/data/semparse/overnight/geo880/geo880.paraphrases.train.superlatives.examples', 'data/overnight/', {:symlink => true}) - pull('/u/nlp/data/semparse/overnight/geo880/geo880.paraphrases.train.superlatives2.examples', 'data/overnight/', {:symlink => true}) - pull('/u/nlp/data/semparse/overnight/geo880/geo880.lexicon', 'data/overnight/', {:symlink => true}) - pull('/u/nlp/data/semparse/overnight/geo880/geo880.predicate.dict', 'data/overnight/', {:symlink => true}) - - # Pull dependencies for everything else - domains = ['geo880', 'regex', 'publications', 'socialnetwork', 'restaurants', 'blocks', 'calendar', 'housing', 'basketball', 'recipes', 'calendarplus'] - domains.each do |domain| - pull('/u/nlp/data/semparse/overnight/' + domain + '/' + domain + '.paraphrases.train.examples', 'data/overnight/', {:symlink => true}) - pull('/u/nlp/data/semparse/overnight/' + domain + '/' + domain + '.paraphrases.test.examples', 'data/overnight/', {:symlink => true}) - pull('/u/nlp/data/semparse/overnight/' + domain + '/' + domain + '.paraphrases.groups', 'data/overnight/', {:symlink => true}) - pull('/u/nlp/data/semparse/overnight/' + domain + '/' + domain + '.word_alignments.berkeley', 'data/overnight/', {:symlink => true}) - pull('/u/nlp/data/semparse/overnight/' + domain + '/' + domain + '.phrase_alignments', 'data/overnight/', {:symlink => true}) - pull('/u/nlp/data/semparse/overnight/' + domain + '/' + domain + '-ppdb.txt', 'data/overnight/', {:symlink => true}) - end - - # Pull the independent sets for calendar - pull('/u/nlp/data/semparse/overnight/calendar/eval/calendar.test.turk.examples', 'data/overnight/', {:symlink => true}) -}) - -addModule('esslli_2016', 'Data for ESSLLI 2016 semantic parsing class', lambda { - pull('/u/nlp/data/semparse/esslli_2016', 'data/esslli_2016/', {:symlink => true}) -}) - -addModule('geo880', 'Data, lexicon, grammars and KB for geo880', lambda { - pull('/u/nlp/data/semparse/geo880/geo880-test.examples', 'data/geo880', {:symlink => true}) - pull('/u/nlp/data/semparse/geo880/geo880-test.preprocessed.examples', 'data/geo880', {:symlink => true}) - pull('/u/nlp/data/semparse/geo880/geo880-train.preprocessed.examples', 'data/geo880', {:symlink => true}) - pull('/u/nlp/data/semparse/geo880/geo880.grammar', 'data/geo880', {:symlink => true}) - pull('/u/nlp/data/semparse/geo880/geo880.lexicon', 'data/geo880', {:symlink => true}) - pull('/u/nlp/data/semparse/geo880/geo880.kg', 'data/geo880', {:symlink => true}) - pull('/u/nlp/data/semparse/geo880/geo880.type_hierarchy', 'data/geo880', {:symlink => true}) -}) -############################################################ - -if ARGV.size == 0 - puts "#{$0} ... " - puts - puts "Modules:" - $modules.each { |name,description,func| - puts " #{name}: #{description}" - } - puts - puts "Internal use (Stanford NLP only):" - puts " #{$0} -l ...: Get the files from the local Stanford NLP server instead" - puts " #{$0} -l -r ...: Release to the public www directory on the server" -end - -$modules.each { |name,description,func| - if ARGV.index(name) - puts "===== Downloading #{name}: #{description}" - func.call - end -} diff --git a/examples/lassie/sempre/run b/examples/lassie/sempre/run deleted file mode 100755 index 7f5bc89141..0000000000 --- a/examples/lassie/sempre/run +++ /dev/null @@ -1,1115 +0,0 @@ -#!/usr/bin/env ruby - -# This is the main entry point for running all SEMPRE programs. See -# fig/lib/execrunner.rb for more documentation for how commands are generated. -# There are a bunch of modes that this script can be invoked with, which -# loosely correspond to the modules. - -$: << 'fig/lib' -require 'execrunner' -$optPrefix = '-' - -$modes = [] -def addMode(name, description, func) - $modes << [name, description, func] -end - -def codalab(dependencies=nil) - # Set @cl=1 to run job on CodaLab - dependencies ||= l(':fig', ':lib', ':module-classes.txt', ':libsempre') - l( - letDefault(:cl, 0), - sel(:cl, - l(), - l('cl', 'run', dependencies, '---', 'LC_ALL=C.UTF-8'), - nil), - nil) -end - -def header(modules='core', codalabDependencies=nil) - l( - codalab(codalabDependencies), - # Queuing system - letDefault(:q, 0), sel(:q, l(), l('fig/bin/q', '-shareWorkingPath', o('mem', '5g'), o('memGrace', 10), '-add', '---')), - # Create execution directory - letDefault(:pooldir, 1), - sel(:pooldir, l(), 'fig/bin/qcreate'), - # Run the Java command... - 'java', - '-ea', - '-Dmodules='+modules, - # Memory size - letDefault(:memsize, 'default'), - sel(:memsize, { - 'tiny' => l('-Xms2G', '-Xmx4G'), - 'low' => l('-Xms5G', '-Xmx7G'), - 'default' => l('-Xms8G', '-Xmx10G'), - 'medium' => l('-Xms12G', '-Xmx14G'), - 'high' => l('-Xms20G', '-Xmx24G'), - 'higher' => l('-Xms40G', '-Xmx50G'), - 'impressive' => l('-Xms75G', '-Xmx90G'), - }), - # Classpath - '-cp', 'libsempre/*:lib/*', - # Profiling - letDefault(:prof, 0), sel(:prof, l(), '-Xrunhprof:cpu=samples,depth=100,file=_OUTPATH_/java.hprof.txt'), - nil) -end - -def unbalancedTrainDevSplit - l(o('Dataset.trainFrac', 0.8), o('Dataset.devFrac', 0.2)) -end -def balancedTrainDevSplit - l(o('Dataset.trainFrac', 0.5), o('Dataset.devFrac', 0.5)) -end - -def figOpts; l(selo(:pooldir, 'execDir', 'exec', '_OUTPATH_'), o('overwriteExecDir'), o('addToView', 0)) end - -############################################################ -# Unit tests - -addMode('test', 'Run unit tests', lambda { |e| - l( - 'java', '-ea', '-Xmx12g', '-cp', 'libsempre/*:lib/*', - lambda { |e| - e.key?(:sparqlserver) ? "-Dsparqlserver=http://#{e[:sparqlserver]}/sparql" : l() - }, - 'org.testng.TestNG', - lambda { |e| - if e[:class] - l('-testclass', 'edu.stanford.nlp.sempre.' + e[:class]) - else - 'testng.xml' - end - }, - lambda { |e| - if e[:fast] - o('excludegroups', 'sparql,corenlp') - else - nil - end - }, - nil) -}) - -############################################################ -# Freebase - -def freebaseHeader; header('core,freebase') end - -def freebaseFeatureDomains - [ - 'basicStats', - 'alignmentScores', - 'entityFeatures', - 'context', - 'skipPos', - 'joinPos', - 'wordSim', - 'lexAlign', - 'tokenMatch', - 'rule', - 'opCount', - 'constant', - 'denotation', - 'whType', - 'span', - 'derivRank', - 'lemmaAndBinaries', - nil].compact -end - -def sparqlOpts - l( - required(:sparqlserver, 'host:port of the Sparql server'), # Example: jonsson:3093, etc. - o('SparqlExecutor.endpointUrl', lambda{|e| 'http://'+e[:sparqlserver]+'/sparql'}), - nil) -end - -def freebaseOpts - l( - figOpts, - sparqlOpts, - - # Features - o('FeatureExtractor.featureDomains', *freebaseFeatureDomains), - o('Builder.executor', 'freebase.SparqlExecutor'), - o('Builder.valueEvaluator', 'freebase.FreebaseValueEvaluator'), - o('LanguageAnalyzer.languageAnalyzer', 'corenlp.CoreNLPAnalyzer'), - - # Lexicon - o('LexiconFn.lexiconClassName', 'edu.stanford.nlp.sempre.fbalignment.lexicons.Lexicon'), - l( # binary - o('BinaryLexicon.binaryLexiconFilesPath', 'lib/fb_data/7/binaryInfoStringAndAlignment.txt'), - o('BinaryLexicon.keyToSortBy', 'Intersection_size_typed'), - nil), - o('UnaryLexicon.unaryLexiconFilePath','lib/fb_data/7/unaryInfoStringAndAlignment.txt'), # unary - o('EntityLexicon.entityPopularityPath','lib/fb_data/7/entityPopularity.txt'), # entity - #Jonathan - added this 3/5/2015 - o('TypeInference.typeLookup','freebase.FreebaseTypeLookup'), - o('FreebaseSearch.cachePath', '/u/nlp/data/semparse/scr/cache/fbsearch/1.cache'), - nil) -end - -def cachePaths(lexiconFnCachePath, sparqlExecutorCachePath) - l( - required(:cacheserver, 'none (don\'t cache to disk), local (write to local file), or : (hit the cacheserver)'), - lambda { |e| - cacheserver = e[:cacheserver] - cacheserver = 'jonsson:4000' if cacheserver == 'remote' # Default - case cacheserver - when 'none' then l() - when 'local' then l( # Use files directly - don't run more than one job that does this! - o('Lexicon.cachePath', 'LexiconFn.cache'), - o('SparqlExecutor.cachePath', 'SparqlExecutor.cache'), - o('FreebaseSearch.cachePath', 'FreebaseSearch.cache'), - nil) - else l( - o('Lexicon.cachePath', cacheserver+':/u/nlp/data/semparse/cache/'+lexiconFnCachePath), - o('SparqlExecutor.cachePath', cacheserver+':/u/nlp/data/semparse/cache/'+sparqlExecutorCachePath), - o('FreebaseSearch.cachePath', cacheserver+':/u/nlp/data/semparse/cache/fbsearch/1.cache'), - # Read-only - o('EntityLexicon.mid2idPath', cacheserver+':/u/nlp/data/semparse/scr/freebase/freebase-rdf-2013-06-09-00-00.canonical-id-map'), - o('FreebaseTypeLookup.entityTypesPath', cacheserver+':/u/nlp/data/semparse/scr/freebase/freebase-rdf-2013-06-09-00-00.canonicalized.en-types'), - nil) - end - }, - nil) -end - -# tag is either "free917" or "webquestions" -def emnlp2013AblationExperiments(tag) - l( - letDefault(:ablation, 0), - # Ablation experiments (EMNLP) - sel(:ablation, - l(), # (0) Just run things normally - selo(nil, 'Parser.beamSize', 200, 50, 10), # (1) Vary beam size - selo(nil, 'Dataset.trainFrac', 0.1, 0.2, 0.4, 0.6), # (2) Vary training set size - sel(nil, # (3) Structural: only do join or only do bridge - o('Grammar.tags', l(tag, 'join')), - o('Grammar.tags', l(tag, 'bridge')), - o('Grammar.tags', l(tag, 'inject')), - nil), - sel(nil, # (4) Features - o('FeatureExtractor.featureDomains', *(freebaseFeatureDomains+['lexAlign'])), # +lexAlign - o('FeatureExtractor.featureDomains', *(freebaseFeatureDomains+['lexAlign']-['alignmentScores'])), # +lexAlign -alignmentScores - o('FeatureExtractor.featureDomains', *(freebaseFeatureDomains-['denotation'])), # -denotation - o('FeatureExtractor.featureDomains', *(freebaseFeatureDomains-['skipPos', 'joinPos'])), # -syntax features (skipPos, joinPos) - nil), - #o('Builder.executor', 'FormulaMatchExecutor'), # (6) train on logical forms (doesn't really work well) - nil), - - letDefault(:split, 0), selo(:split, 'Dataset.splitRandom', 1, 2, 3), - nil) -end - -def free917 - l( # Data - letDefault(:data, 0), - sel(:data, - l(o('Dataset.inPaths', 'train,data/free917.train.examples.canonicalized.json'), unbalancedTrainDevSplit), # (0) train 0.8, dev 0.2 - l(o('Dataset.inPaths', 'train,data/free917.train.examples.canonicalized.json', 'test,data/free917.test.examples.canonicalized.json')), # (1) Don't run on test yet! - nil), - - # Grammar - o('Grammar.inPaths', 'freebase/data/emnlp2013.grammar'), - o('Parser.beamSize', 500), - - emnlp2013AblationExperiments('free917'), - - # lexicon index - letDefault(:lucene, 0), - sel(:lucene, - l( - o('EntityLexicon.exactMatchIndex','lib/lucene/4.4/free917/'), - cachePaths('10/LexiconFn.cache', '10/SparqlExecutor.cache'), - o('Grammar.tags', 'free917', 'bridge', 'join', 'inject', 'exact'), - nil), - l( # With entity disambiguation - currently too crappy - o('EntityLexicon.inexactMatchIndex','lib/lucene/4.4/inexact/'), - cachePaths('4/LexiconFn.cache', '4/SparqlExecutor.cache'), - o('Grammar.tags', 'free917', 'bridge', 'join', 'inject', 'inexact'), - nil), - nil), - # Use binary predicate features (overfits on free917) - o('BridgeFn.filterBadDomain',false), - # Learning - o('Learner.maxTrainIters', 6), - nil) -end - -def webquestions - l( - # Data - letDefault(:data, 0), - sel(:data, - l( # Webquestions (dev) [EMNLP final JSON] - o('Dataset.inPaths', - 'train,lib/data/webquestions/dataset_11/webquestions.examples.train.json'), - unbalancedTrainDevSplit, - nil), - l( # Webquestions (test) [EMNLP final JSON] - o('Dataset.inPaths', - 'train,lib/data/webquestions/dataset_11/webquestions.examples.train.json', - 'test,lib/data/webquestions/dataset_11/webquestions.examples.test.json'), - nil), - nil), - - # Grammar - letDefault(:grammar, 1), - sel(:grammar, l(), l(o('Grammar.inPaths', 'freebase/data/emnlp2013.grammar'))), - - o('Parser.beamSize', 200), # {07/03/13}: WebQuestions is too slow to run with default 500, so set to 200 for now... - - # Caching - letDefault(:entitysearch, 0), - sel(:entitysearch, # Used for EMNLP 2013 - l( - cachePaths('lucene/0.cache', 'sparql/3.cache'), - o('EntityLexicon.inexactMatchIndex','lib/lucene/4.4/inexact/'), - o('LexiconFn.maxEntityEntries',10), - o('Grammar.tags', 'webquestions', 'bridge', 'join', 'inject','inexact'), # specify also strategy - nil), - nil), - - # Learning - o('Learner.maxTrainIters', 3), - - # Use binary predicate features (overfits on free917) - o('BridgeFn.useBinaryPredicateFeatures', true), - o('BridgeFn.filterBadDomain',true), - letDefault(:split, 0), selo(:split, 'Dataset.splitRandom', 1,2,3), - nil) -end - - -addMode('freebase', 'Freebase (for EMNLP 2013, ACL 2014, TACL 2014)', lambda { |e| l( - letDefault(:train, 0), - letDefault(:interact, 0), - - # nlpsub: for running commands on PBS - letDefault(:nlpsub, 0), - sel(:nlpsub, - l(), - l('nlpsub', '-d/scr/yonatan/sandbox/blackhole', '-nyonatan', '-c3'), - l('nlpsub', '-d/scr/yonatan/sandbox/blackhole', '-nyonatan', '-qjag', '-c3'), - l('nlpsub', '-d/scr/yonatan/sandbox/blackhole', '-nyonatan', '-qjohn', '-c3'), - nil), - sel(:interact, l()), - freebaseHeader, - 'edu.stanford.nlp.sempre.Main', - freebaseOpts, - - # Dataset - sel(:domain, { - 'webquestions' => webquestions, - 'free917' => free917, - }), - - - sel(:interact, l(), l( - # After training, run interact, which loads up a set of parameters and - # puts you in a prompt. - o('Dataset.inPaths'), - o('Learner.maxTrainIters', 0), - required(:load, 'none or exec number (e.g., 15) to load'), - lambda { |e| - if e[:load] == 'none' then - l() - else - execPath = "lib/models/#{e[:load]}.exec" - l( - o('Builder.inParamsPath', execPath+'/params'), - o('Grammar.inPaths', execPath+'/grammar'), - o('Master.logPath', lambda{|e| 'state/' + e[:domain] + '.log'}), - o('Master.newExamplesPath', lambda{|e| 'state/' + e[:domain] + '.examples'}), - o('Master.onlineLearnExamples', true), - # Make sure features are set properly! - nil) - end - }, - o('Main.interactive'), - nil)) -) }) - -addMode('cacheserver', 'Start the general-purpose cache server that serves files with key-value maps', lambda { |e| - l( - 'java', '-Xmx36g', '-ea', '-cp', 'libsempre/*:lib/fig.jar', - 'edu.stanford.nlp.sempre.cache.StringCacheServer', - letDefault(:port, 4000), - lambda { |e| o('port', e[:port]) }, - - letDefault(:cachetype, 0), - sel(:cachetype, - l( - o('FileStringCache.appendMode'), - o('FileStringCache.capacity', 35 * 1024), - o('FileStringCache.flushFrequency', 2147483647), - nil), - l( - o('FileStringCache.appendMode',false), - o('FileStringCache.capacity', 1 * 1024), - o('FileStringCache.flushFrequency', 100000), - nil), - nil), - nil) -}) - -############################################################ -# Freebase RDF database (for building SPARQL database) - -# Scratch directory -def scrOptions - letDefault(:scr, '/u/nlp/data/semparse/rdf/scr/' + `hostname | cut -f 1 -d .`.chomp) -end - -addMode('filterfreebase', '(1) Filter RDF Freebase dump (do this once) [takes about 1 hour]', lambda { |e| l( - scrOptions, - l( - 'fig/bin/qcreate', o('statePath', lambda{|e| e[:scr] + '/state'}), - 'java', '-ea', '-Xmx20g', '-cp', 'libsempre/*:lib/*', - 'edu.stanford.nlp.sempre.freebase.FilterFreebase', - o('inPath', '/u/nlp/data/semparse/scr/freebase/freebase-rdf-2013-06-09-00-00.canonicalized'), - sel(:keep, { - 'all' => o('keepAllProperties'), - 'geo' => l( - o('keepTypesPaths', 'data/geo.types'), - o('keepPropertiesPath', 'data/geo.properties'), - o('keepGeneralPropertiesOnlyForSeenEntities', true), - nil), - }), - o('execDir', '_OUTPATH_'), o('overwriteExecDir'), - nil), -nil) }) - -addMode('sparqlserver', '(2) Start the SPARQL server [do this every time]', lambda { |e| l( - scrOptions, - required(:exec), - sel(nil, - l( - 'freebase/scripts/virtuoso', 'start', - lambda{|e| e[:scr]+'/state/execs/'+e[:exec].to_s+'.exec/vdb'}, # DB directory - lambda{|e| 3000+e[:exec]}, # port - nil), - # Give everyone permissions so that anyone can kill the server if needed. - l( - 'chmod', '-R', 'og=u', - lambda{|e| e[:scr]+'/state/execs/'+e[:exec].to_s+'.exec/vdb'}, # DB directory - nil), - # To stop the server: freebase/scripts/virtuoso stop 3093 - nil), -nil) }) - -# (3) Index the filtered RDF dump [takes 48 hours] -addMode('indexfreebase', '(3) Index the filtered RDF dump [takes 48 hours for Freebase]', lambda { |e| l( - letDefault(:stage, nil), - scrOptions, - required(:exec), - sel(:stage, - l( - 'scripts/virtuoso', 'add', - lambda{|e| e[:scr]+'/state/execs/'+e[:exec].to_s+'.exec/0.ttl'}, # ttl file - lambda{|e| 3000+e[:exec]}, # port - lambda{|e| e[:offset] || 0}, # offset - nil), - l( - 'scripts/extract-freebase-schema.rb', - lambda{|e| 'http://localhost:'+(3000+e[:exec]).to_s+'/sparql'}, # port - lambda{|e| e[:scr]+'/state/execs/'+e[:exec].to_s+'.exec/schema.ttl'}, - nil), - nil), -nil) }) - -addMode('convertfree917', 'Convert the Free917 dataset', lambda { |e| l( - 'java', '-ea', '-Xmx15g', - '-cp', 'libsempre/*:lib/*', - 'edu.stanford.nlp.sempre.freebase.Free917Converter', - o('inDir','/u/nlp/data/semparse/yates/final-dataset-acl-2013-all/'), - o('outDir','data/free917_convert/'), - o('entityInfoFile','/user/joberant/scr/fb_data/3/entityInfo.txt'), - o('cvtFile','lib/fb_data/2/Cvts.txt'), - o('midToIdFile','/u/nlp/data/semparse/scr/freebase/freebase-rdf-2013-06-09-00-00.canonical-id-map'), -nil) }) - -addMode('query', 'Query a single logical form or SPARQL', lambda { |e| l( - codalab, - 'java', '-ea', - '-cp', 'libsempre/*:lib/*', - 'edu.stanford.nlp.sempre.freebase.SparqlExecutor', - sparqlOpts, -nil) }) - -############################################################ - - -# Just start a simple interactive shell to try out SEMPRE commands -addMode('simple', 'Simple shell', lambda { |e| l( - codalab, 'java', '-cp', 'libsempre/*:lib/*', '-ea', 'edu.stanford.nlp.sempre.Main', - o('Main.interactive'), -nil) }) - -addMode('simple-sparql', 'Simple shell for querying SPARQL', lambda { |e| l( - codalab, 'java', '-Dmodules=core,freebase', '-cp', 'libsempre/*:lib/*', '-ea', 'edu.stanford.nlp.sempre.Main', - o('executor', 'freebase.SparqlExecutor'), - sparqlOpts, - o('Main.interactive'), -nil) }) - -addMode('simple-lambdadcs', 'Simple shell for querying with the LambdaDCSExecutor', lambda { |e| l( - codalab, 'java', '-Dmodules=core,tables,corenlp', '-cp', 'libsempre/*:lib/*', '-ea', 'edu.stanford.nlp.sempre.Main', - o('executor', 'tables.lambdadcs.LambdaDCSExecutor'), - o('FeatureExtractor.featureDomains', 'denotation lexAlign joinPos skipPos'.split), - o('LanguageAnalyzer.languageAnalyzer', 'corenlp.CoreNLPAnalyzer'), - o('Main.interactive'), -nil) }) - -addMode('simple-freebase', 'Simple shell for using Freebase', lambda { |e| l( - 'java', '-Dmodules=core,freebase', '-cp', 'libsempre/*:lib/*', '-ea', 'edu.stanford.nlp.sempre.Main', - o('executor', 'freebase.SparqlExecutor'), - letDefault(:sparqlserver, 'freebase.cloudapp.net:3093'), - letDefault(:cacheserver, 'freebase.cloudapp.net:4000'), - sparqlOpts, - # Set up Freebase search for entities - # Assume run following on the server (read-only and capacity are important!) - # ./run @mode=cacheserver -readOnly -capacity MAX -basePath lib/fb_data - o('FreebaseSearch.cachePath', 'FreebaseSearch.cache'), - o('EntityLexicon.mid2idPath', lambda { |e| e[:cacheserver] + ':freebase-rdf-2013-06-09-00-00.canonical-id-map.gz' }), - o('TypeInference.typeLookup', 'freebase.FreebaseTypeLookup'), - o('FreebaseTypeLookup.entityTypesPath', lambda { |e| e[:cacheserver] + ':freebase-rdf-2013-06-09-00-00.canonicalized.en-types.gz' }), - o('EntityLexicon.maxEntries', 2), - o('FeatureExtractor.featureDomains', 'rule'), - o('Parser.coarsePrune'), - o('JoinFn.typeInference'), - o('UnaryLexicon.unaryLexiconFilePath', '/dev/null'), - o('BinaryLexicon.binaryLexiconFilesPath', '/dev/null'), - #o('JoinFn.showTypeCheckFailures'), # Use this to debug - o('Grammar.inPaths', 'freebase/data/demo1.grammar'), # Override with your own custom grammar - o('SparqlExecutor.returnTable'), - #o('SparqlExecutor.includeSupportingInfo'), # Show full information - o('Main.interactive'), -nil) }) - -addMode('simple-freebase-nocache', 'Simple shell for using Freebase (without a cache server)', lambda { |e| l( - 'java', '-Dmodules=core,freebase', '-cp', 'libsempre/*:lib/*', '-ea', 'edu.stanford.nlp.sempre.Main', - o('executor', 'freebase.SparqlExecutor'), - letDefault(:sparqlserver, 'freebase.cloudapp.net:3093'), - sparqlOpts, - o('FeatureExtractor.featureDomains', 'rule'), - o('Parser.coarsePrune'), - o('JoinFn.typeInference'), - o('UnaryLexicon.unaryLexiconFilePath', '/dev/null'), - o('BinaryLexicon.binaryLexiconFilesPath', '/dev/null'), - #o('JoinFn.showTypeCheckFailures'), # Use this to debug - o('Grammar.inPaths', 'freebase/data/demo1.grammar'), # Override with your own custom grammar - #o('SparqlExecutor.includeSupportingInfo'), # Show full information - o('Main.interactive'), -nil) }) - - -############################################################ -# {2014-12-27} [Percy]: Overnight semantic parsing -def overnightFeatureDomains - [ - 'match', - 'ppdb', - 'skip-bigram', - 'root', - 'alignment', - 'lexical', - 'root_lexical', - 'lf', - 'simpleworld', - nil].compact -end - -addMode('overnight', 'Overnight semantic parsing', l( - header('core,freebase,overnight'), - 'edu.stanford.nlp.sempre.Main', - figOpts, - o('JavaExecutor.convertNumberValues', false), - o('useAnchorsOnce', true), - o('trackLocalChoices'), - o('JoinFn.typeInference', true), - o('Builder.parser', 'FloatingParser'), - o('FloatingParser.executeAllDerivations', 'true'), - o('LanguageAnalyzer', 'corenlp.CoreNLPAnalyzer'), - o('Learner.maxTrainIters', 1), - #o('printAllPredictions'), - o('Derivation.showUtterance'), - letDefault(:debug, 0), - - selo(1, 'maxExamples', 'train:10', 'train:MAX'), - - # Exact matching is needed on most simple domains - # o('executor', 'FormulaMatchExecutor'), - # o('Builder.valueEvaluator', 'ExactValueEvaluator'), - - # Features - o('FeatureExtractor.featureDomains', 'denotation'), # denotation features from general feature extractor - o('FeatureExtractor.featureComputers', 'overnight.OvernightFeatureComputer'), # - o('OvernightFeatureComputer.featureDomains', *overnightFeatureDomains), - #o('initialization', 'paraphrase :: match,1', 'paraphrase :: size,-0.1', 'paraphrase :: ppdb,0.3', - # 'paraphrase :: skip-bigram,0.8', 'paraphrase :: skip-ppdb,0.2','denotation :: error,-1000'), - o('coarsePrune'), - sel(2, - l(), # no reg - l(o('Params.l1Reg','lazy'),o('Params.l1RegCoeff',0)), - l(o('Params.l1Reg','lazy'),o('Params.l1RegCoeff',0.001)), - nil), - # Set up the domain - required(:domain), - o('Grammar.inPaths', lambda { |e| 'overnight/' + e[:domain] + '.grammar' }), - o('SimpleWorld.domain', lambda { |e| e[:domain] }), - o('PPDBModel.ppdbModelPath', lambda { |e| 'lib/data/overnight/' + e[:domain] + '-ppdb.txt' }), - o('Dataset.trainFrac', 0.8), o('Dataset.devFrac', 0.2), - o('FloatingParser.maxDepth', 11), - o('Parser.beamSize', 20), - letDefault(:alignment, 1), - sel(:alignment, - o('wordAlignmentPath', lambda { |e| 'lib/data/overnight/' + e[:domain] + '.word_alignments.heuristic' }), - o('wordAlignmentPath', lambda { |e| 'lib/data/overnight/' + e[:domain] + '.word_alignments.berkeley' }), - nil), - o('phraseAlignmentPath', lambda { |e| 'lib/data/overnight/' + e[:domain] + '.phrase_alignments' }), - o('PPDBModel.ppdbModelPath', lambda { |e| 'lib/data/overnight/' + e[:domain] + '-ppdb.txt' }), - o('DerivationPruner.pruningComputers', ['overnight.OvernightDerivationPruningComputer']), - o('DerivationPruner.pruningStrategies', ['violateHardConstraints']), - o('Dataset.inPaths', - lambda { |e| 'train:lib/data/overnight/' + e[:domain] + '.paraphrases.train.examples' }, - lambda { |e| 'test:lib/data/overnight/' + e[:domain] + '.paraphrases.test.examples' }), - sel(:domain, { - 'geo880' => l( - letDefault(:data,0), - sel(:data, - l(o('Dataset.inPaths', 'train:lib/data/overnight/geo880.paraphrases.train.superlatives.examples')), - l(o('Dataset.inPaths', 'train:lib/data/overnight/geo880.paraphrases.train.superlatives.examples', 'test:lib/data/overnight/geo880-train.examples')), - l(o('Dataset.inPaths', 'train:lib/data/overnight/geo880.paraphrases.train.superlatives2.examples', 'test:lib/data/overnight/geo880-train.examples')), - l(o('Dataset.inPaths', 'train:lib/data/overnight/geo880.paraphrases.train.superlatives.examples', 'test:lib/data/overnight/geo880-test.examples')), - l(o('Dataset.inPaths', 'train:lib/data/overnight/geo880.paraphrases.train.superlatives2.examples', 'test:lib/data/overnight/geo880-test.examples')), - nil), - o('Parser.beamSize', 20), - o('initialization', 'paraphrase :: match,1', 'paraphrase :: size,-0.1', 'paraphrase :: ppdb,0.3', - 'lf :: edu.stanford.nlp.sempre.SimpleWorld.superlative& superlative,10', - 'root :: pos0=WRB&returnType=class edu.stanford.nlp.sempre.NumberValue,10'), - o('FloatingParser.maxDepth', 11), - o('Grammar.tags','generate','general', 'geo880'), - o('SimpleLexicon.inPaths', 'lib/data/overnight/geo880.lexicon'), - nil), - 'calendar' => l( - o('Grammar.tags','generate','general'), - nil), - 'calendarplus' => l( - o('Grammar.tags','generate','general','geo440'), - o('Grammar.inPaths','overnight/calendar.grammar'), - o('SimpleWorld.domain', 'calendar'), - nil), - 'blocks' => l( - o('Grammar.tags','generate','general'), - nil), - 'restaurants' => l( - o('Grammar.tags','generate','general'), - nil), - 'housing' => l( - o('Grammar.tags','generate','general'), - nil), - 'socialnetwork' => l( - o('Grammar.tags','generate','general'), - nil), - 'publications' => l( - o('Grammar.tags','generate','general'), - nil), - 'basketball' => l( - o('Grammar.tags','generate','general'), - nil), - 'recipes' => l( - o('Grammar.tags','generate','general'), - nil), - }), -nil)) - -############################################################ -# {5/27/15} [Ice] -addMode('tables', 'QA on HTML tables', lambda { |e| l( - # Add @cldir=1 to use CodaLab's directory paths - letDefault(:cldir, 0), - # Usual header - header('core,tables,corenlp,cprune'), - # Select class - letDefault(:class, 'main'), - sel(:class, { - 'main' => 'edu.stanford.nlp.sempre.Main', - 'check' => 'edu.stanford.nlp.sempre.tables.test.DPDParserChecker', - 'dump' => 'edu.stanford.nlp.sempre.tables.serialize.SerializedDumper', - 'load' => l('edu.stanford.nlp.sempre.tables.serialize.SerializedLoader', let(:parser, 'serialized')), - 'stats' => 'edu.stanford.nlp.sempre.tables.test.TableStatsComputer', - 'tag-data' => 'edu.stanford.nlp.sempre.tables.serialize.TaggedDatasetGenerator', - 'tag-table' => 'edu.stanford.nlp.sempre.tables.serialize.TaggedTableGenerator', - 'tag-fuzzy' => 'edu.stanford.nlp.sempre.tables.serialize.TaggedFuzzyGenerator', - 'alter' => l('edu.stanford.nlp.sempre.tables.alter.BatchTableAlterer', let(:parser, 'serialized')), - 'alter-ex' => l('edu.stanford.nlp.sempre.tables.alter.AlteredTablesExecutor', let(:parser, 'serialized')), - 'filter' => 'edu.stanford.nlp.sempre.tables.serialize.DumpFilterer', - 'column' => 'edu.stanford.nlp.sempre.tables.test.TableColumnAnalyzer', - 'execute' => 'edu.stanford.nlp.sempre.tables.test.BatchTableExecutor', - }), - # Fig parameters - selo(:cldir, 'execDir', '_OUTPATH_', '.'), - o('overwriteExecDir'), o('addToView', 15), o('jarFiles', 'libsempre/*'), - sel(:cldir, l(), '>/dev/null'), - # Set environment for table execution - o('executor', 'tables.lambdadcs.LambdaDCSExecutor'), - o('targetValuePreprocessor', 'tables.TableValuePreprocessor'), - o('NumberFn.unitless'), o('NumberFn.alsoTestByConversion'), - o('TypeInference.typeLookup', 'tables.TableTypeLookup'), - o('JoinFn.specializedTypeCheck', false), o('JoinFn.typeInference', true), - o('Learner.outputPredValues'), - # Value Evaluator - letDefault(:eval, 'value'), - sel(:eval, { - 'value' => o('Builder.valueEvaluator', 'tables.TableValueEvaluator'), - 'denotation' => o('Builder.valueEvaluator', 'tables.TableValueEvaluator'), # alias of 'value' - 'formula' => l( - o('Builder.valueEvaluator', 'tables.TableFormulaEvaluator'), - o('fallBackToValueEvaluator', false), - nil), - }), - # Parser - letDefault(:parser, 'floatsize'), - o('beamSize', 50), - o('useSizeInsteadOfDepth'), - sel(:parser, { - 'floatsize' => l( - o('Builder.parser', 'FloatingParser'), - o('FloatingParser.maxDepth', 15), - nil), - 'baseline' => o('Builder.parser', 'tables.baseline.TableBaselineParser'), - 'serialized' => o('Builder.parser', 'tables.serialize.SerializedParser'), - # ACL 2016 - 'grow-dpd' => l( - o('Builder.parser', 'tables.dpd.DPDParser'), - o('FloatingParser.maxDepth', 8), - nil), - 'grow-float' => l( - o('Builder.parser', 'FloatingParser'), - o('FloatingParser.maxDepth', 8), - o('FloatingParser.betaReduce'), o('initialFloatingHasZeroDepth'), - nil), - 'grow-mix' => l( - o('Builder.parser', 'MixParser'), - o('MixParser.parsers', 'FloatingParser', 'tables.serialize.SerializedParser:train-0xc'), - o('FloatingParser.maxDepth', 8), - o('FloatingParser.betaReduce'), o('initialFloatingHasZeroDepth'), - nil), - # EMNLP 2017 - 'cprune' => l( - o('Builder.parser', 'cprune.CPruneFloatingParser'), - o('FloatingParser.maxDepth', 15), - o('maxNumNeighbors', 40), - o('maxPredictedPatterns', 1000), - nil), - }), - o('Parser.verbose', 0), - letDefault(:pruning, 1), - sel(:pruning, - l(), - l( - o('DerivationPruner.pruningStrategies', *tablesPruningStrategies), - o('DerivationPruner.pruningComputers', 'tables.TableDerivationPruningComputer'), - nil), - nil), - # Grammar - tablesGrammarPaths, - letDefault(:fuzzy, 'original'), - sel(:fuzzy, { - 'original' => o('FuzzyMatcher.fuzzyMatcher', 'tables.match.OriginalMatcher'), - 'editdist-exact' => l( - o('FuzzyMatcher.fuzzyMatcher', 'tables.match.EditDistanceFuzzyMatcher'), - o('fuzzyMatchMaxEditDistanceRatio', 0.0), - nil), - 'editdist-fuzzy' => l( - o('FuzzyMatcher.fuzzyMatcher', 'tables.match.EditDistanceFuzzyMatcher'), - o('fuzzyMatchSubstring'), o('fuzzyMatchMaxEditDistanceRatio', 0.15), - o('alsoMatchPart'), - nil), - }), - letDefault(:normalize, 1), - sel(:normalize, - l(), - l(o('genericDateValue'), o('numberCanStartAnywhere'), o('num2CanStartAnywhere')), - nil), - letDefault(:anchor, 1), - sel(:anchor, { - 1 => o('FloatingParser.useAnchorsOnce', true), - 2 => l(o('FloatingParser.useAnchorsOnce', false), o('FloatingParser.useMaxAnchors', 2)), - }), - # Dataset - letDefault(:data, 'none'), - tablesDataPaths, - # Verbosity - o('FeatureVector.ignoreZeroWeight'), - o('logFeaturesLimit', 10), - o('LambdaDCSException.noErrorMessage'), - letDefault(:verbose, 0), - sel(:verbose, - l( - o('maxPrintedPredictions', 1), o('maxPrintedTrue', 1), - nil), - l( - o('maxPrintedPredictions', 10), o('maxPrintedTrue', 10), - o('putCellNameInCanonicalUtterance'), o('showUtterance'), - nil), - l( - o('maxPrintedPredictions', 10), o('maxPrintedTrue', 10), - o('putCellNameInCanonicalUtterance'), o('showUtterance'), - o('summarizeRuleTime'), o('summarizeDenotations'), - nil), - l( - o('maxPrintedPredictions', 10), o('maxPrintedTrue', 10), - o('putCellNameInCanonicalUtterance'), o('showUtterance'), - o('summarizeRuleTime'), o('summarizeDenotations'), - o('showRules'), - o('Parser.verbose', 2), - o('JoinFn.verbose', 3), - o('JoinFn.showTypeCheckFailures'), - nil), - nil), - # Language Analyzer - letDefault(:lang, 'corenlp'), - sel(:lang, { - 'simple' => o('LanguageAnalyzer', 'SimpleAnalyzer'), - 'corenlp' => l(o('LanguageAnalyzer', 'corenlp.CoreNLPAnalyzer'), o('annotators', *'tokenize ssplit pos lemma ner'.split)), - 'fullcorenlp' => l(o('LanguageAnalyzer', 'corenlp.CoreNLPAnalyzer'), o('annotators', *'tokenize ssplit pos lemma ner parse'.split)), - }), - # Training - letDefault(:train, 0), - sel(:train, - l( - let(:l1, 0), - nil), - l( - o('combineFromFloatingParser'), - o('maxTrainIters', 3), - o('showValues', false), o('showFirstValue'), - o('customExpectedCounts', 'TOP'), - nil), - l( - # for dumping derivations (@class=dump) - # force unbalancedTrainDevSplit + combine from floating parser - o('combineFromFloatingParser'), o('DPDParser.cheat'), - nil), - nil), - # Regularization - letDefault(:l1, 1), - sel(:l1, - l(), - l(o('Params.l1Reg','lazy'), o('Params.l1RegCoeff', '3e-5')), # Default - l(o('Params.l1Reg','lazy'), selo(nil, 'Params.l1RegCoeff', 0, 0.00001, 0.0001, 0.001, 0.01)), - l(o('Params.l1Reg','lazy'), selo(nil, 'Params.l1RegCoeff', 0.00001, 0.00003, 0.0001, 0.0003)), - l(o('Params.l1Reg','lazy'), selo(nil, 'Params.l1RegCoeff', 0.00001, 0.00003, 0.0005)), - nil), - # Features - letDefault(:feat, 'none'), - sel(:feat, { - 'none' => l(), # No features (random) - 'some' => l( # Add your own features! (only set up the feature computers) - o('FeatureExtractor.featureComputers', 'tables.features.PhrasePredicateFeatureComputer tables.features.PhraseDenotationFeatureComputer'.split), - nil), - 'all' => l( # All ACL 2015 features - o('FeatureExtractor.featureDomains', 'custom-denotation phrase-predicate phrase-denotation headword-denotation missing-predicate'.split), - o('FeatureExtractor.featureComputers', 'tables.features.PhrasePredicateFeatureComputer tables.features.PhraseDenotationFeatureComputer'.split), - nil), - 'more' => l( # All ACL 2015 features + more experimental features - o('FeatureExtractor.featureDomains', 'custom-denotation phrase-predicate phrase-denotation headword-denotation missing-predicate anchored-entity'.split), - o('FeatureExtractor.featureComputers', 'tables.features.PhrasePredicateFeatureComputer tables.features.PhraseDenotationFeatureComputer tables.features.AnchorFeatureComputer'.split), - nil), - 'baseline' => l( # For the baseline classifier - o('FeatureExtractor.featureDomains', 'custom-denotation phrase-denotation headword-denotation table-baseline'.split), - o('FeatureExtractor.featureComputers', 'tables.baseline.TableBaselineFeatureComputer tables.features.PhraseDenotationFeatureComputer'.split), - nil), - 'ablate' => l( - o('FeatureExtractor.featureComputers', 'tables.features.PhrasePredicateFeatureComputer tables.features.PhraseDenotationFeatureComputer'.split), - selo(nil, - 'FeatureExtractor.featureDomains', - 'phrase-predicate phrase-denotation headword-denotation missing-predicate'.split, - 'custom-denotation phrase-denotation headword-denotation missing-predicate'.split, - 'custom-denotation phrase-predicate headword-denotation missing-predicate'.split, - 'custom-denotation phrase-predicate phrase-denotation missing-predicate'.split, - 'custom-denotation phrase-predicate phrase-denotation headword-denotation'.split, - nil), - nil), - }), - letDefault(:featOp, 'careful'), - sel(:featOp, { - 'none' => l(), - 'careful' => l( - o('maxNforLexicalizeAllPairs', 2), - o('computeFuzzyMatchPredicates'), - nil), - }), -nil) }) - -def tablesGrammarPaths - lambda { |e| - baseDir = ['tables/grammars/', 'grammars/'][e[:cldir]] - l( - letDefault(:grammar, 'combined-all'), - sel(:grammar, { - 'custom' => l(), - 'restrict' => o('Grammar.inPaths', "#{baseDir}restrict.grammar"), - 'simple' => o('Grammar.inPaths', "#{baseDir}simple.grammar"), - 'combined' => o('Grammar.inPaths', "#{baseDir}combined.grammar"), - 'combined-jnc' => l( # WQ baseline - o('Grammar.inPaths', "#{baseDir}combined.grammar"), - o('Grammar.tags', *'movement count'.split), - nil), - 'combined-cut' => l( # No intersection / union - o('Grammar.inPaths', "#{baseDir}combined.grammar"), - o('Grammar.tags', *'movement comparison count aggregate superlative arithmetic'.split), - nil), - 'combined-all' => l( # Default - o('Grammar.inPaths', "#{baseDir}combined.grammar"), - o('Grammar.tags', *'alternative movement comparison count aggregate superlative arithmetic merge'.split), - nil), - 'combined-more' => l( - o('Grammar.inPaths', "#{baseDir}combined.grammar"), - o('Grammar.tags', *'alternative movement comparison count aggregate superlative arithmetic merge v-superlative'.split), - nil), - 'combined-trigger' => l( # Use trigger words for operations - o('Grammar.inPaths', "#{baseDir}combined.grammar"), - o('Grammar.tags', *'t-alternative t-movement t-comparison t-count t-aggregate t-superlative t-arithmetic merge'.split), - nil), - # ACL 2016 - 'grow-custom' => l( - o('Grammar.inPaths', "#{baseDir}grow.grammar"), - o('Grammar.binarizeRules', false), - nil), - 'grow-default' => l( - o('Grammar.inPaths', "#{baseDir}grow.grammar"), - o('Grammar.binarizeRules', false), - o('Grammar.tags', *'scoped merge-and arithmetic comparison alternative neq yearrange part closedclass scoped-2args-merge-and'.split), - let(:anchor, 2), - nil), - 'grow-strict' => l( - o('Grammar.inPaths', "#{baseDir}grow.grammar"), - o('Grammar.binarizeRules', false), - o('Grammar.tags', *'scoped merge-and arithmetic comparison alternative neq yearrange part closedclass-generic scoped-2args-merge-and'.split), - let(:anchor, 2), - nil), - # EMNLP 2017 - 'extended' => l( - o('Grammar.inPaths', "#{baseDir}extended.grammar"), - o('Grammar.tags', *'alternative movement comparison count aggregate superlative arithmetic merge v-superlative'.split), - nil), - }), - nil) - } -end - -def tablesDataPaths - lambda { |e| - baseDir = ['lib/data/WikiTableQuestions/data/', 'WikiTableQuestions/data/'][e[:cldir]] - csvDir = ['lib/data/WikiTableQuestions/', 'WikiTableQuestions/'][e[:cldir]] - nnDir = ['lib/data/nn_0/', 'nn_0/'][e[:cldir]] - datasets = { - 'none' => l(), - 'train' => o('Dataset.inPaths', "train,#{baseDir}training.examples"), - # Pristine test test - 'test' => l( - o('Dataset.inPaths', - "train,#{baseDir}training.examples", - "test,#{baseDir}pristine-unseen-tables.examples"), - o('neighborFilePath', "#{nnDir}/exact_nearest_neighbors.all"), - nil), - # @data=annotated can be used with @class=check only - 'annotated' => o('Dataset.inPaths', "train,#{baseDir}annotated-all.examples"), - 'before300' => o('Dataset.inPaths', "train,#{baseDir}training-before300.examples"), - } - # Development sets: 80:20 random split of training data - ['1', '2', '3', '4', '5'].each do |x| - datasets['u-' + x] = l( - o('Dataset.inPaths', - "train,#{baseDir}random-split-#{x}-train.examples", - "dev,#{baseDir}random-split-#{x}-dev.examples", - nil), - o('neighborFilePath', "#{nnDir}/exact_nearest_neighbors.seed-#{x}.train"), - nil) - end - # That's it! - l( - o('TableKnowledgeGraph.baseCSVDir', csvDir), - # To use the normalized values from the tagged file, which were checked by hand, - # add @useTaggedFile=1 - letDefault(:useTaggedFile, 0), - selo(:useTaggedFile, 'TableValuePreprocessor.taggedFiles', '', "#{csvDir}/tagged/data/"), - sel(:data, datasets), - nil) - } -end - -def tablesPruningStrategies - [ - ### Critical strategies - "emptyDenotation", - "nonLambdaError", - ### Strategies that do not depend on the children's actual formulas - "atomic", - "tooManyValues", - "badSummarizerHead", - "mistypedMerge", - ### Strategies that depend on the children's formulas - "doubleNext", - "multipleSuperlatives", - "sameMerge", - "forwardBackward", - "unsortedMerge", - "typeRowMerge", - nil].compact -end - - -############################################################ -# {2015-01-18} Generate utterances [Percy] -addMode('genovernight', 'Generate utterances for overnight semantic parsing', lambda { |e| l( - header('core,overnight'), - 'edu.stanford.nlp.sempre.overnight.GenerationMain', - figOpts, - o('JoinFn.typeInference', true), - o('JoinFn.specializedTypeCheck', false), - - o('JavaExecutor.convertNumberValues', false), - o('JavaExecutor.printStackTrace', false), - - # These domains are all based on SimpleWorld - required(:domain), - o('Grammar.inPaths', lambda { |e| 'overnight/' + e[:domain] + '.grammar' }), - o('SimpleWorld.domain', lambda { |e| e[:domain] }), - o('initialization', 'denotation :: error,-1000', 'denotation :: empty,-100', 'paraphrase :: size,+0.01', 'denotation :: value_in_formula,-100'), - o('FeatureExtractor.featureComputers','overnight.OvernightFeatureComputer'), - o('OvernightFeatureComputer.featureDomains', ''), - o('OvernightFeatureComputer.itemAnalysis',false), - letDefault(:gen, 1), - sel(:gen, - l( # For debugging the grammar - o('FeatureExtractor.featureDomains', 'denotation'), - o('Dataset.inPaths', lambda { |e| 'train:overnight/' + e[:domain] + '-unittest.examples'}), - selo(:parse, 'Grammar.tags', 'generate', 'parse'), - o('interactive'), - nil), - l( # For generating utterances - o('parser', 'FloatingParser'), - o('maxDepth', 30), o('beamSize', 10000), - o('derivationScoreNoise', 1), - o('Dataset.inPaths', 'train:overnight/null.examples'), - o('Derivation.showUtterance'), - o('FeatureExtractor.featureDomains', 'denotation'), - o('printAllPredictions'), - o('printPredictedUtterances'), - o('executeAllDerivations'), - o('Parser.pruneErrorValues', true), - o('Grammar.tags', 'generate'), - nil), - nil), - a('Grammar.tags', 'general'), -nil) }) - -# Generate for all the domains -addMode('genovernight-wrapper', 'Generate utterances for overnight semantic parsing', lambda { |e| l( - './run', '@mode=genovernight', '@gen=1', - sel(nil, { - 'calendar' => let(:domain, 'calendar'), - 'blocks' => let(:domain, 'blocks'), - 'housing' => let(:domain, 'housing'), - 'restaurants' => let(:domain, 'restaurants'), - 'publications' => let(:domain, 'publications'), - 'socialnetwork' => let(:domain, 'socialnetwork'), - 'basketball' => let(:domain, 'basketball'), - 'geo880' => let(:domain, 'geo880'), - 'recipes' => let(:domain, 'recipes'), - }), - lambda { |e| '@domain=' + e[:domain] }, - lambda { |e| system 'mkdir -p genovernight.out'; o('execDir', 'genovernight.out/' + e[:domain]) }, -nil) }) - -addMode('geo880', 'Semantic parsing on the geo880 dataset', lambda { |e| l( - # Usual header - header('core,tables,corenlp,geo880'), - 'edu.stanford.nlp.sempre.Main', - # Fig parameters - figOpts, - o('executor', 'tables.lambdadcs.LambdaDCSExecutor'), - o('JoinFn.specializedTypeCheck', false), o('JoinFn.typeInference', false), - # Parser - o('Builder.parser', 'BeamParser'), - o('Parser.coarsePrune'), - - # Evaluation - o('Builder.valueEvaluator', 'geo880.Geo880ValueEvaluator'), - - # Grammar - o('Grammar.inPaths','lib/data/geo880/geo880.grammar'), - - # Type hierarchy - o('Geo880TypeLookup.typeHierarchyPath', 'lib/data/geo880/geo880.type_hierarchy'), - o('TypeInference.typeLookup','geo880.Geo880TypeLookup'), - - # Yrkvpba - o('SimpleLexicon.inPaths', 'lib/data/geo880/geo880.lexicon'), - - # Learner - o('Learner.maxTrainIters', 3), - - # Dataset - letDefault(:data, 0), - sel(:data, - l(o('Dataset.inPaths', 'train,lib/data/geo880/geo880-train.preprocessed.examples'), unbalancedTrainDevSplit), # (0) train 0.8, dev 0.2 - l(o('Dataset.inPaths', 'train,lib/data/geo880/geo880-train.examples', 'test,lib/data/geo880/geo880-test.preprocessed/examples')), # (1) Don't run on test yet! - nil), - # Load the graph - o('Dataset.globalGraphPath', 'lib/data/geo880/geo880.kg'), - # Verbosity - letDefault(:verbose, 0), - sel(:verbose, - l(), - l( - o('showRules'), - o('Parser.verbose', 2), - o('JoinFn.verbose', 3), - o('JoinFn.showTypeCheckFailures'), - nil), - nil), - # Language Analyzer - l(o('LanguageAnalyzer', 'corenlp.CoreNLPAnalyzer'), o('annotators', *'tokenize ssplit pos lemma ner'.split)), - # Regularization - letDefault(:l1, 0), - sel(:l1, - l(), - l(o('Params.l1Reg','lazy'), o('Params.l1RegCoeff', '3e-5')), - l(o('Params.l1Reg','lazy'), selo(nil, 'Params.l1RegCoeff', 0, 0.00001, 0.0001, 0.001, 0.01)), - nil), - # Features - letDefault(:feat, 'freebase'), - sel(:feat, { - 'none' => l(), # No features (random) - 'freebase' => l( - o('FeatureExtractor.featureDomains', 'rule opCount constant whType span lemmaAndBinaries denotation lexAlign joinPos skipPos'.split), -# o('FeatureExtractor.featureDomains', 'rule opCount constant whType lemmaAndBinaries denotation lexAlign joinPos skipPos'.split), - nil), - }), -nil) }) - -############################################################ - -if ARGV.size == 0 - puts "#{$0} @mode= [options]" - puts - puts 'This is the main entry point for all SEMPRE-related runs.' - puts "Modes:" - $modes.each { |name,description,func| - puts " #{name}: #{description}" - } -end - -modesMap = {} -$modes.each { |name,description,func| - modesMap[name] = func -} -run!(sel(:mode, modesMap)) diff --git a/examples/lassie/sempre/scripts/agenda-stats b/examples/lassie/sempre/scripts/agenda-stats deleted file mode 100755 index 43bd9b52f4..0000000000 --- a/examples/lassie/sempre/scripts/agenda-stats +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env ruby - -ARGV.each { |e| - e = e.sub(/\.exec$/, '') - puts "===== #{e}" - system "./fig/bin/tab -s -H -i e/#{e}.exec/learner.events -a iter group .sort utterance / ^parseTime$ ^numOfFeaturizedDerivs$ ^firstCorrectItem$ ^totalDerivs$ ^partCorrect$ ^partOracle$" -} diff --git a/examples/lassie/sempre/scripts/checkstyle.sh b/examples/lassie/sempre/scripts/checkstyle.sh deleted file mode 100755 index bd68d0a712..0000000000 --- a/examples/lassie/sempre/scripts/checkstyle.sh +++ /dev/null @@ -1,11 +0,0 @@ -# Check style of the code - -if [ -z "$1" ]; then - files=`find src -name "*.java"` -else - files="$@" -fi - -d=`dirname $0` -prog="$d/../lib/checkstyle/checkstyle-6.6-all.jar" -java -cp $prog com.puppycrawl.tools.checkstyle.Main -c `dirname $0`/checkstyle.xml $files diff --git a/examples/lassie/sempre/scripts/checkstyle.xml b/examples/lassie/sempre/scripts/checkstyle.xml deleted file mode 100644 index 06eda0a3d9..0000000000 --- a/examples/lassie/sempre/scripts/checkstyle.xml +++ /dev/null @@ -1,204 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/examples/lassie/sempre/scripts/create-geo-simple-lexicon.py b/examples/lassie/sempre/scripts/create-geo-simple-lexicon.py deleted file mode 100755 index ce2f8171fc..0000000000 --- a/examples/lassie/sempre/scripts/create-geo-simple-lexicon.py +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/python - -import sys -import json - -class LexicalEntry: - def __init__(self, l, f, t): - self.lexeme=l.strip() - self.formula=f.strip() - self.type=t.strip() - -out = open(sys.argv[2],'w') - -with open(sys.argv[1]) as f: - for line in f: - tokens = line.split("\t") - if len(tokens) > 2: - continue - if(tokens[0] == "loc_city"): - index = tokens[1].rfind('.') - citystate = tokens[1][index+1:] - city = citystate[0:citystate.rfind('_')] - city = city.replace('_',' ').strip() - entry = LexicalEntry(city, tokens[1], "fb:en.city") - out.write(json.dumps(entry.__dict__)+'\n') - elif (tokens[0] == "loc_state"): - index = tokens[1].rfind('.') - state = tokens[1][index+1:].strip() - state = state.replace('_',' ').strip() - entry = LexicalEntry(state, tokens[1], "fb:en.state") - out.write(json.dumps(entry.__dict__)+'\n') - elif tokens[0] == "loc_river": - index = tokens[1].rfind('.') - river = tokens[1][index+1:].strip() - river = river.replace('_',' ').strip() - entry = LexicalEntry(river+" river", tokens[1], "fb:en.river") - out.write(json.dumps(entry.__dict__)+'\n') - elif (tokens[0] == "loc_place"): - index = tokens[1].rfind('.') - place = tokens[1][index+1:].strip() - place = place.replace('_',' ').strip() - entry = LexicalEntry(place, tokens[1], "fb:en.place") - out.write(json.dumps(entry.__dict__)+'\n') - elif (tokens[0] == "loc_lake"): - index = tokens[1].rfind('.') - lake = tokens[1][index+1:].strip() - lake = lake.replace('_',' ').strip() - if not 'lake' in lake: - lake = lake + " lake" - entry = LexicalEntry(lake, tokens[1], "fb:en.lake") - out.write(json.dumps(entry.__dict__)+'\n') - elif (tokens[0] == "loc_mountain"): - index = tokens[1].rfind('.') - mountain = tokens[1][index+1:].strip() - mountain = mountain.replace('_',' ').strip() - entry = LexicalEntry("mount " + mountain, tokens[1], "fb:en.mountain") - out.write(json.dumps(entry.__dict__)+'\n') - elif (tokens[0] == "loc_country"): - index = tokens[1].rfind('.') - country = tokens[1][index+1:].strip() - country = country.replace('_',' ').strip() - entry = LexicalEntry(country, tokens[1], "fb:en.country") - out.write(json.dumps(entry.__dict__)+'\n') - - -out.close() - - diff --git a/examples/lassie/sempre/scripts/evaluation.py b/examples/lassie/sempre/scripts/evaluation.py deleted file mode 100755 index b7f298af23..0000000000 --- a/examples/lassie/sempre/scripts/evaluation.py +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/python - -import sys -import json - -# Official evaluation script used to evaluate Freebase question answering -# systems. Used for EMNLP 2013, ACL 2014 papers, etc. - -if len(sys.argv) != 2: - sys.exit("Usage: %s " % sys.argv[0]) - -"""return a tuple with recall, precision, and f1 for one example""" -def computeF1(goldList,predictedList): - - """Assume all questions have at least one answer""" - if len(goldList)==0: - raise Exception("gold list may not be empty") - """If we return an empty list recall is zero and precision is one""" - if len(predictedList)==0: - return (0,1,0) - """It is guaranteed now that both lists are not empty""" - - precision = 0 - for entity in predictedList: - if entity in goldList: - precision+=1 - precision = float(precision) / len(predictedList) - - recall=0 - for entity in goldList: - if entity in predictedList: - recall+=1 - recall = float(recall) / len(goldList) - - f1 = 0 - if precision+recall>0: - f1 = 2*recall*precision / (precision + recall) - return (recall,precision,f1) - -averageRecall=0 -averagePrecision=0 -averageF1=0 -count=0 - -"""Go over all lines and compute recall, precision and F1""" -with open(sys.argv[1]) as f: - for line in f: - tokens = line.split("\t") - gold = json.loads(tokens[1]) - predicted = json.loads(tokens[2]) - recall, precision, f1 = computeF1(gold,predicted) - averageRecall += recall - averagePrecision += precision - averageF1 += f1 - count+=1 - -"""Print final results""" -averageRecall = float(averageRecall) / count -averagePrecision = float(averagePrecision) / count -averageF1 = float(averageF1) / count -print "Number of questions: " + str(count) -print "Average recall over questions: " + str(averageRecall) -print "Average precision over questions: " + str(averagePrecision) -print "Average f1 over questions: " + str(averageF1) -averageNewF1 = 2 * averageRecall * averagePrecision / (averagePrecision + averageRecall) -print "F1 of average recall and average precision: " + str(averageNewF1) - diff --git a/examples/lassie/sempre/scripts/extract-module-classes.rb b/examples/lassie/sempre/scripts/extract-module-classes.rb deleted file mode 100755 index 3f3b3daaf5..0000000000 --- a/examples/lassie/sempre/scripts/extract-module-classes.rb +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env ruby - -# Input: src -# Output: module-classes.txt - -# For each module (e.g., core, freebase), we compute the list of classes -# associated with that class. - -out_path = 'module-classes.txt' -out = open(out_path, 'w') -items = [] -core_packages = ['test'] # Packages in core which are not their own modules -modules = {} -Dir['src/**/*.java'].each { |path| - class_name = path.sub(/^src\//, '').gsub(/\//, '.').gsub(/\.java$/, '') - module_name = path.sub(/^.*sempre\//, '').split(/\//)[0] - module_name = 'core' if module_name =~ /\.java$/ || core_packages.index(module_name) - modules[module_name] = true - items << (module_name + " " + class_name) -} -items.sort! -out.puts items -out.close -puts "Wrote modules with #{items.size} files to #{out_path}: #{modules.keys.sort.join(' ')}" diff --git a/examples/lassie/sempre/scripts/filterGeneratedNegations.py b/examples/lassie/sempre/scripts/filterGeneratedNegations.py deleted file mode 100755 index a7da009ea8..0000000000 --- a/examples/lassie/sempre/scripts/filterGeneratedNegations.py +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/python - -import sys -import json - -# Official evaluation script used to evaluate Freebase question answering -# systems. Used for EMNLP 2013, ACL 2014 papers, etc. - -if len(sys.argv) != 3: - sys.exit("Usage: %s " % sys.argv[0]) - -generated = set() -with open(sys.argv[1]) as f: - for line in f: - generated.add(line) - -out = open(sys.argv[2],'w') - - -with open(sys.argv[1]) as f: - for line in f: - index = line.find(" not") - if index != -1: - newStr = line[0:index] + line[index+4:len(line)] - if newStr in generated: - out.write(line) - else: - out.write(line) -out.close() diff --git a/examples/lassie/sempre/scripts/find-first-pred-diff.sh b/examples/lassie/sempre/scripts/find-first-pred-diff.sh deleted file mode 100755 index cec807cbf9..0000000000 --- a/examples/lassie/sempre/scripts/find-first-pred-diff.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/sh -# Takes two log files and outputs the first line where they differ in terms -# of the example or the predictions - -egrep "Example: |Pred@| Item " ../state/execs/$1.exec/log > temp.1 -egrep "Example: |Pred@| Item " ../state/execs/$2.exec/log > temp.2 -cmp temp.1 temp.2 -#rm temp.1 -#rm temp.2 - diff --git a/examples/lassie/sempre/scripts/find-hard-coded-paths.rb b/examples/lassie/sempre/scripts/find-hard-coded-paths.rb deleted file mode 100755 index 32b5a89f3a..0000000000 --- a/examples/lassie/sempre/scripts/find-hard-coded-paths.rb +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env ruby - -# Heuristically find all hard-coded paths in the source code. -# There should be no absolute paths. -Dir["src/**/*.java"].each { |sourcePath| - IO.foreach(sourcePath) { |line| - next unless line =~ /"([\w_\/\.]+)"/ - file = $1 - next unless file =~ /^\/[uU]\w+\/\w+/ || file =~ /^lib\// - if file =~ /^\// - message = " [BAD: absolute path]" - elsif not File.exists?(file) - message = " [BAD: does not exist]" - else - message = "" - end - puts sourcePath + ": " + file + message - } -} diff --git a/examples/lassie/sempre/scripts/fix-checkstyle.rb b/examples/lassie/sempre/scripts/fix-checkstyle.rb deleted file mode 100755 index 36fa0f62f6..0000000000 --- a/examples/lassie/sempre/scripts/fix-checkstyle.rb +++ /dev/null @@ -1,113 +0,0 @@ -#!/usr/bin/env ruby - -# Hacky script for automatically fixing style errors. This script is far from -# perfect and you should manually inspect all changes before making changes. - -# Usage: -# scripts/fix-checkstyle.rb # Inspect changes -# scripts/fix-checkstyle.rb mutate # Apply -# scripts/fix-checkstyle.rb # Do to particular files - -mutate = ARGV.index('mutate'); ARGV = ARGV.select { |a| a != 'mutate' } - -files = ARGV.size > 0 ? ARGV : Dir['src/**/*.java'] - -files.each { |path| - in_multi_comment = false - line_num = 0 - lines = IO.readlines(path).map { |line| - line_num += 1 - line = line.chomp - new_line = line + '' - - # Remove trailing whitespace - new_line.gsub!(/\s+$/, '') - - # Add space after comment - new_line.gsub!(/ \/\/(\w)/, '// \1') - - # Put space after certain operators - new_line.gsub!(/ (if|for|while|catch)\(/, ' \1 (') - - # Put space - new_line.gsub!(/( for \(\w+ \w+): /, '\1 : ') - - # Put space after casts - new_line.gsub!(/\((\w+)\)([a-z])/, '(\1) \2') - - # Reverse modifier order - new_line.gsub!(/ final static /, ' static final ') - - in_single_quote = false - in_double_quote = false - in_comment = false - tokens = new_line.split(//) - (0...tokens.size).each { |i| - # Previous, current, next characters - p = i-1 >= 0 ? tokens[i-1] : '' - c = tokens[i] - n = i+1 < tokens.size ? tokens[i+1] : '' - - if c == '\'' && p != '\\' # Quote - in_single_quote = !in_single_quote - end - if c == '"' && p != '\\' # Quote - in_double_quote = !in_double_quote - end - if c == '/' && n == '/' # Comment - in_comment = true - end - if c == '/' && n == '*' # Begin multi-comment - in_multi_comment = true - end - - if !(in_single_quote || in_double_quote || in_comment || in_multi_comment || c == '') - # Replace if not in quote or comment - if c == ',' # One space after - c += ' ' if n != ' ' - elsif ['++', '--'].index(c+n) # Double character operators - c = c+n - n = '' - elsif ['==', '!=', '<=', '>=', '+=', '-=', '*=', '/=', '&=', '|=', '&&', '||'].index(c+n) # Double character operators - c = c+n - n = '' - c = ' ' + c if p != ' ' - c = c + ' ' if tokens[i+2] != ' ' - elsif '*/=%'.index(c) # Single character operators - # Don't do <, > because of generics - # Don't do , + because of unaries - c = ' ' + c if p != ' ' - c = c + ' ' if n != ' ' - end - - # Write back - tokens[i] = c - tokens[i+1] = n if i+1 < tokens.size - end - - if p == '*' && c == '/' # End multi-comment - in_multi_comment = false - end - } - new_line = tokens.join('') - #p new_line - - new_line.gsub!(/\. \* ;/, '.*;') - new_line.gsub!(/\s+$/, '') - - if line != new_line - puts "======= #{path} #{line_num}" - puts "OLD: [#{line}]" - puts "NEW: [#{new_line}]" - end - - new_line - } - - # Write it out - if mutate - out = open(path, 'w') - out.puts lines - out.close - end -} diff --git a/examples/lassie/sempre/scripts/generate-prediction-file.sh b/examples/lassie/sempre/scripts/generate-prediction-file.sh deleted file mode 100755 index b176c5ae8f..0000000000 --- a/examples/lassie/sempre/scripts/generate-prediction-file.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/sh -#Generate predictions file for evaluation script -#arguments: (1) execution numebr (2) iteration number (3) final output file - -fig/bin/tab e/$1.exec/learner.events iter group utterance targetValue predValue | grep -P "$2\ttest" | cut -f3,4,5 > pred_temp -java -cp "libsempre/*:lib/*" edu.stanford.nlp.sempre.freebase.utils.FileUtils pred_temp $3 -rm pred_temp diff --git a/examples/lassie/sempre/scripts/tunnel b/examples/lassie/sempre/scripts/tunnel deleted file mode 100755 index 12dcc698f4..0000000000 --- a/examples/lassie/sempre/scripts/tunnel +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash - -# Allows running things locally that need to connect to cache servers or sparql -# servers on NLP, which are behind a firewall. -# After starting this script, connect to localhost: to connect to :. - -# Cache server -host=jonsson -port=4000 -echo "Tunnel localhost:$port => $host:$port" -pid=$(ps ax | grep ssh.*:$port | grep -v grep | awk '{print $1}') -if [ -n "$pid" ]; then kill $pid; fi -ssh -N -n -L $port:$host:$port jacob.stanford.edu & - -# Sparql server for Freebase -host=jonsson -port=3093 -echo "Tunnel localhost:$port => $host:$port" -pid=$(ps ax | grep ssh.*:$port | grep -v grep | awk '{print $1}') -if [ -n "$pid" ]; then kill $pid; fi -ssh -N -n -L $port:$host:$port jacob.stanford.edu & - -# Sparql server for geo -host=jonsson -port=3094 -echo "Tunnel localhost:$port => $host:$port" -pid=$(ps ax | grep ssh.*:$port | grep -v grep | awk '{print $1}') -if [ -n "$pid" ]; then kill $pid; fi -ssh -N -n -L $port:$host:$port jacob.stanford.edu & - -# Sparql server for Paleo -host=jonsson -port=3021 -echo "Tunnel localhost:$port => $host:$port" -pid=$(ps ax | grep ssh.*:$port | grep -v grep | awk '{print $1}') -if [ -n "$pid" ]; then kill $pid; fi -ssh -N -n -L $port:$host:$port jacob.stanford.edu & - -# Cache server (immutable) -host=john5 -port=4001 -echo "Tunnel localhost:$port => $host:$port" -pid=$(ps ax | grep ssh.*:$port | grep -v grep | awk '{print $1}') -if [ -n "$pid" ]; then kill $pid; fi -ssh -N -n -L $port:$host:$port jacob.stanford.edu & - diff --git a/examples/lassie/sempre/scripts/verify-code-loop.rb b/examples/lassie/sempre/scripts/verify-code-loop.rb deleted file mode 100755 index 6b353e9d2b..0000000000 --- a/examples/lassie/sempre/scripts/verify-code-loop.rb +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env ruby - -# Verifies that the codebase is sane (compiles, doesn't crash, gets reasonable -# accuracy) every once in a while. If something fails, an email is sent out - -$mode = ARGV[0] -if $mode != 'now' && $mode != 'loop' - puts "Usage: #{$0} (now|loop)" - puts " now: check immediately and exit (don't email)" - puts " loop: check only when stuff changes and loop (email if something breaks)" - exit 1 -end - -# Who should be notified if the code breaks. -$recipient = 'stanford-sempre@googlegroups.com' -$logPath = "verify-code-loop.log" - -# Send out the log file to all the recipients. -def emailLog(subject) - return unless $mode == 'loop' - - maxLines = 100 # Maximum number of lines to send via email. - numLines = IO.readlines($logPath).size - if numLines <= maxLines - command = "cat #{$logPath}" - else - # Take first few lines and last few lines to keep under maxLines. - command = "(head -#{maxLines/2} #{$logPath}; echo '... (#{numLines - maxLines} lines omitted) ...'; tail -#{maxLines/2} #{$logPath})" - end - command = "#{command} | mail -s '#{subject}' #{$recipient}" - puts "Emailing log file: #{command}" - system command or exit 1 -end - -def emailBroken - emailLog('sempre code is broken!') -end - -# Print to stdout and log file. -def log(line, newline=true) - line = "[#{`date`.chomp}] #{line}" - if newline - puts line - else - print line - end - out = open($logPath, 'a') - out.puts line - out.close -end - -# Run and command; if fail, send email. -def run(command, verbose) - log("======== Running: #{command}", false) if verbose - ok = system "#{command} >> #{$logPath} 2>&1" - puts " [#{ok ? 'ok' : 'failed'}]" if verbose - emailBroken if not ok - ok -end - -def restart - exit 1 if $mode == 'now' - - system "cat #{$logPath}" - - # In case there are updates - log("Restarting #{$0}...") - exec($0 + ' loop') -end - -log("Started verify-code loop version 2") -log("Writing to #{$logPath}...") -firstTime = true -while true - break if $mode == 'now' && (not firstTime) - - # Whenever there's a change to the repository, run a test - system "rm -f #{$logPath}" - if not run('git pull', false) - log("git pull failed - this is bad, let's just quit.") - break - end - - if $mode == 'loop' && system("grep -q 'Already up-to-date' #{$logPath}") - # No changes, just wait - sleep 60 - next - end - firstTime = false - - # Check everything - log("Testing...") - run('git log -3', true) or restart # Print out last commit messages - run('./pull-dependencies', true) or restart - run('ant clean', true) or restart - run('ant', true) or restart - - run('scripts/find-hard-coded-paths.rb', true) or restart - - # Run tests - run("./run @mode=test", true) or restart - - emailLog('sempre code passes tests!') - - break if $mode == 'now' - restart -end diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/AbstractReinforcementParserState.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/AbstractReinforcementParserState.java deleted file mode 100644 index cf79b83481..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/AbstractReinforcementParserState.java +++ /dev/null @@ -1,201 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.google.common.collect.Sets; -import fig.basic.LogInfo; -import fig.basic.MapUtils; -import fig.basic.StopWatchSet; - -import java.util.*; - -/** - * Contains methods for putting derivations on the chart and combining them - * to add new derivations to the agenda - * @author joberant - */ -abstract class AbstractReinforcementParserState extends ChartParserState { - - protected final ReinforcementParser parser; - protected final CoarseParser coarseParser; - protected CoarseParser.CoarseParserState coarseParserState; - protected static final double EPSILON = 10e-20; // used to break ties between agenda items - - public AbstractReinforcementParserState(ReinforcementParser parser, Params params, Example ex, boolean computeExpectedCounts) { - super(parser, params, ex, computeExpectedCounts); - this.parser = parser; - coarseParser = parser.coarseParser; - } - - protected abstract void addToAgenda(DerivationStream derivationStream); - - protected boolean coarseAllows(String cat, int start, int end) { - return coarseParserState == null || coarseParserState.coarseAllows(cat, start, end); - } - - //don't add to a cell in the chart that is fill - protected boolean addToBoundedChart(Derivation deriv) { - - List derivations = chart[deriv.start][deriv.end].get(deriv.cat); - totalGeneratedDerivs++; - if (Parser.opts.visualizeChartFilling) { - chartFillingList.add(new CatSpan(deriv.start, deriv.end, deriv.cat)); - } - if (derivations == null) { - chart[deriv.start][deriv.end].put(deriv.cat, - derivations = new ArrayList<>()); - } - if (derivations.size() < getBeamSize()) { - derivations.add(deriv); - Collections.sort(derivations, Derivation.derivScoreComparator); // todo - perhaps can be removed - return true; - } else return false; - } - - // for [start, end) we try to create [start, end + i) or [start - i, end) and add unary rules - protected void combineWithChartDerivations(Derivation deriv) { - expandDerivRightwards(deriv); - expandDerivLeftwards(deriv); - applyCatUnaryRules(deriv); - } - - private void expandDerivRightwards(Derivation leftChild) { - if (parser.verbose(6)) - LogInfo.begin_track("Expanding rightward"); - Map> rhsCategoriesToRules = parser.leftToRightSiblingMap.get(leftChild.cat); - if (rhsCategoriesToRules != null) { - for (int i = 1; leftChild.end + i <= numTokens; ++i) { - Set intersection = Sets.intersection(rhsCategoriesToRules.keySet(), chart[leftChild.end][leftChild.end + i].keySet()); - - for (String rhsCategory : intersection) { - List compatibleRules = rhsCategoriesToRules.get(rhsCategory); - List rightChildren = chart[leftChild.end][leftChild.end + i].get(rhsCategory); - generateParentDerivations(leftChild, rightChildren, true, compatibleRules); - } - } - // handle terminals - if (leftChild.end < numTokens) - handleTerminalExpansion(leftChild, false, rhsCategoriesToRules); - } - if (parser.verbose(6)) - LogInfo.end_track(); - } - - private void expandDerivLeftwards(Derivation rightChild) { - if (parser.verbose(5)) - LogInfo.begin_track("Expanding leftward"); - Map> lhsCategorisToRules = parser.rightToLeftSiblingMap.get(rightChild.cat); - if (lhsCategorisToRules != null) { - for (int i = 1; rightChild.start - i >= 0; ++i) { - Set intersection = Sets.intersection(lhsCategorisToRules.keySet(), chart[rightChild.start - i][rightChild.start].keySet()); - - for (String lhsCategory : intersection) { - List compatibleRules = lhsCategorisToRules.get(lhsCategory); - List leftChildren = chart[rightChild.start - i][rightChild.start].get(lhsCategory); - generateParentDerivations(rightChild, leftChildren, false, compatibleRules); - } - } - // handle terminals - if (rightChild.start > 0) - handleTerminalExpansion(rightChild, true, lhsCategorisToRules); - } - if (parser.verbose(5)) - LogInfo.end_track(); - } - - private void generateParentDerivations(Derivation expandedDeriv, List otherDerivs, - boolean expandedLeftChild, List compatibleRules) { - - for (Derivation otherDeriv : otherDerivs) { - Derivation leftChild, rightChild; - if (expandedLeftChild) { - leftChild = expandedDeriv; - rightChild = otherDeriv; - } else { - leftChild = otherDeriv; - rightChild = expandedDeriv; - } - List children = new ArrayList<>(); - children.add(leftChild); - children.add(rightChild); - for (Rule rule : compatibleRules) { - if (coarseAllows(rule.lhs, leftChild.start, rightChild.end)) { - DerivationStream resDerivations = applyRule(leftChild.start, rightChild.end, rule, children); - - if (!resDerivations.hasNext()) - continue; - addToAgenda(resDerivations); - } - } - } - } - - // returns the score of derivation computed - private DerivationStream applyRule(int start, int end, Rule rule, List children) { - try { - if (Parser.opts.verbose >= 5) - LogInfo.logs("applyRule %s %s %s %s", start, end, rule, children); - StopWatchSet.begin(rule.getSemRepn()); // measuring time - StopWatchSet.begin(rule.toString()); - DerivationStream results = rule.sem.call(ex, - new SemanticFn.CallInfo(rule.lhs, start, end, rule, com.google.common.collect.ImmutableList.copyOf(children))); - StopWatchSet.end(); - StopWatchSet.end(); - return results; - } catch (Exception e) { - LogInfo.errors("Composition failed: rule = %s, children = %s", rule, children); - e.printStackTrace(); - throw new RuntimeException(e); - } - } - - private void applyCatUnaryRules(Derivation deriv) { - if (parser.verbose(4)) - LogInfo.begin_track("Category unary rules"); - for (Rule rule : parser.catUnaryRules) { - if (!coarseAllows(rule.lhs, deriv.start, deriv.end)) - continue; - if (deriv.cat.equals(rule.rhs.get(0))) { - DerivationStream resDerivations = applyRule(deriv.start, deriv.end, rule, Collections.singletonList(deriv)); - addToAgenda(resDerivations); - } - } - if (parser.verbose(4)) - LogInfo.end_track(); - } - - public List gatherRhsTerminalsDerivations() { - List derivs = new ArrayList<>(); - final List empty = Collections.emptyList(); - - for (int i = 0; i < numTokens; i++) { - for (int j = i + 1; j <= numTokens; j++) { - for (Rule rule : MapUtils.get(parser.terminalsToRulesList, phrases[i][j], Collections.emptyList())) { - if (!coarseAllows(rule.lhs, i, j)) - continue; - derivs.add(applyRule(i, j, rule, empty)); - } - } - } - return derivs; - } - - // rules where one word is a terminal and the other is a non-terminal - private void handleTerminalExpansion(Derivation child, boolean before, Map> categoriesToRules) { - - String phrase = before ? phrases[child.start - 1][child.start] : phrases[child.end][child.end + 1]; - int start = before ? child.start - 1 : child.start; - int end = before ? child.end : child.end + 1; - - if (categoriesToRules.containsKey(phrase)) { - List children = new ArrayList<>(); - children.add(child); - for (Rule rule : categoriesToRules.get(phrase)) { - if (coarseAllows(rule.lhs, start, end)) { - DerivationStream resDerivations = applyRule(start, end, rule, children); - if (!resDerivations.hasNext()) - continue; - addToAgenda(resDerivations); - } - } - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ActionFormula.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ActionFormula.java deleted file mode 100644 index 120493318a..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ActionFormula.java +++ /dev/null @@ -1,130 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.util.List; - -import com.google.common.base.Function; -import com.google.common.collect.Lists; - -import fig.basic.LispTree; - -/** - * An ActionFormula represent a compositional action used in the interactive - * package : is used as a prefix to denote an ActionFormula primitive (: - * actioname args) sequential (:s ActionFormula ActionFormula ...) repeat (:loop - * Number ActionFormula) conditional (:if Set ActionFormula) block scoping (:blk - * ActionFormula) - * - * @author sidaw - */ -public class ActionFormula extends Formula { - public enum Mode { - primitive(":"), // (: remove *) - sequential(":s"), // (:s (: add red top) (: remove this)) - repeat(":loop"), // (:loop (count (has color green)) (: add red top)) - conditional(":if"), // (:if (count (has color green)) (: add red top)) - whileloop(":while"), // (:while (count (has color green)) (: add red top)) - forset(":for"), // (:for (and this (color red)) (:s (: add red top) (: add - // yellow top) (: remove))) - foreach(":foreach"), // (:foreach * (add ((reverse color) this) top)) - - // primitives for declaring variables - // let(":let"), // (:let X *), - // set(":set"), // (:set X *) - - block(":blk"), // start a block of code (like {}) with a new scope - blockr(":blkr"), // also return a result after finishing the block - isolate(":isolate"), other(":?"); - - private final String value; - - Mode(String value) { - this.value = value; - } - - @Override - public String toString() { - return this.value; - } - }; - - public final Mode mode; - public final List args; - - public ActionFormula(Mode mode, List args) { - this.mode = mode; - this.args = args; - } - - public static Mode parseMode(String mode) { - if (mode == null) - return null; - for (Mode m : Mode.values()) { - // LogInfo.logs("mode string %s \t== %s \t!= %s", m.toString(), mode, - // m.name()); - if (m.toString().equals(mode)) - return m; - } - if (mode.startsWith(":")) - throw new RuntimeException("Unsupported ActionFormula mode"); - return null; - } - - @Override - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild(this.mode.toString()); - for (Formula arg : args) - tree.addChild(arg.toLispTree()); - return tree; - } - - @Override - public void forEach(Function func) { - if (!func.apply(this)) { - for (Formula arg : args) - arg.forEach(func); - } - } - - @Override - public Formula map(Function transform) { - Formula result = transform.apply(this); - if (result != null) - return result; - List newArgs = Lists.newArrayList(); - for (Formula arg : args) - newArgs.add(arg.map(transform)); - return new ActionFormula(this.mode, newArgs); - } - - @Override - public List mapToList(Function> transform, boolean alwaysRecurse) { - List res = transform.apply(this); - if (res.isEmpty() || alwaysRecurse) { - for (Formula arg : args) - res.addAll(arg.mapToList(transform, alwaysRecurse)); - } - return res; - } - - @SuppressWarnings({ "equalshashcode" }) - @Override - public boolean equals(Object thatObj) { - if (!(thatObj instanceof ActionFormula)) - return false; - ActionFormula that = (ActionFormula) thatObj; - if (!this.mode.equals(that.mode)) - return false; - if (!this.args.equals(that.args)) - return false; - return true; - } - - @Override - public int computeHashCode() { - int hash = 0x7ed55d16; - hash = hash * 0xd3a2646c + mode.hashCode(); - hash = hash * 0xd3a2646c + args.hashCode(); - return hash; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/AggregateFormula.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/AggregateFormula.java deleted file mode 100644 index c5ce22c7ac..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/AggregateFormula.java +++ /dev/null @@ -1,74 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.google.common.base.Function; -import fig.basic.LispTree; - -import java.util.List; - -/** - * 'Aggregate' takes a set and computes some number on that set. - * - * @author Percy Liang - */ -public class AggregateFormula extends Formula { - public enum Mode { count, sum, avg, min, max }; - public final Mode mode; - public final Formula child; - - public AggregateFormula(Mode mode, Formula child) { - this.mode = mode; - this.child = child; - } - - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild(mode.toString()); - tree.addChild(child.toLispTree()); - return tree; - } - - public static Mode parseMode(String mode) { - if ("count".equals(mode)) return Mode.count; - if ("sum".equals(mode)) return Mode.sum; - if ("avg".equals(mode)) return Mode.avg; - if ("min".equals(mode)) return Mode.min; - if ("max".equals(mode)) return Mode.max; - return null; - } - - @Override - public void forEach(Function func) { - if (!func.apply(this)) child.forEach(func); - } - - @Override - public Formula map(Function func) { - Formula result = func.apply(this); - return result == null ? new AggregateFormula(mode, child.map(func)) : result; - } - - @Override - public List mapToList(Function> func, boolean alwaysRecurse) { - List res = func.apply(this); - if (res.isEmpty() || alwaysRecurse) - res.addAll(child.mapToList(func, alwaysRecurse)); - return res; - } - - @SuppressWarnings({"equalshashcode"}) - @Override - public boolean equals(Object thatObj) { - if (!(thatObj instanceof AggregateFormula)) return false; - AggregateFormula that = (AggregateFormula) thatObj; - if (!this.mode.equals(that.mode)) return false; - if (!this.child.equals(that.child)) return false; - return true; - } - - public int computeHashCode() { - int hash = 0x7ed55d16; - hash = hash * 0xd3a2646c + mode.toString().hashCode(); - hash = hash * 0xd3a2646c + child.hashCode(); - return hash; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ArithmeticFormula.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ArithmeticFormula.java deleted file mode 100644 index 524e158614..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ArithmeticFormula.java +++ /dev/null @@ -1,92 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.google.common.base.Function; -import fig.basic.LispTree; - -import java.util.List; - -/** - * Performs arithmetic operations (+, -, *, /). - * Note that these are non-binary relations, which means we can't model them - * using a join. - * - * @author Percy Liang - */ -public class ArithmeticFormula extends Formula { - public enum Mode { add, sub, mul, div }; - public final Mode mode; - public final Formula child1; - public final Formula child2; - - public ArithmeticFormula(Mode mode, Formula child1, Formula child2) { - this.mode = mode; - this.child1 = child1; - this.child2 = child2; - } - - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild(modeToString(mode)); - tree.addChild(child1.toLispTree()); - tree.addChild(child2.toLispTree()); - return tree; - } - - @Override - public void forEach(Function func) { - if (!func.apply(this)) { child1.forEach(func); child2.forEach(func); } - } - - @Override - public Formula map(Function func) { - Formula result = func.apply(this); - return result == null ? new ArithmeticFormula(mode, child1.map(func), child2.map(func)) : result; - } - - @Override - public List mapToList(Function> func, boolean alwaysRecurse) { - List res = func.apply(this); - if (res.isEmpty() || alwaysRecurse) { - res.addAll(child1.mapToList(func, alwaysRecurse)); - res.addAll(child2.mapToList(func, alwaysRecurse)); - } - return res; - } - - public static Mode parseMode(String mode) { - if ("+".equals(mode)) return Mode.add; - if ("-".equals(mode)) return Mode.sub; - if ("*".equals(mode)) return Mode.mul; - if ("/".equals(mode)) return Mode.div; - return null; - } - - public static String modeToString(Mode mode) { - switch (mode) { - case add: return "+"; - case sub: return "-"; - case mul: return "*"; - case div: return "/"; - default: throw new RuntimeException("Invalid mode: " + mode); - } - } - - @SuppressWarnings({"equalshashcode"}) - @Override - public boolean equals(Object thatObj) { - if (!(thatObj instanceof ArithmeticFormula)) return false; - ArithmeticFormula that = (ArithmeticFormula) thatObj; - if (this.mode != that.mode) return false; - if (!this.child1.equals(that.child1)) return false; - if (!this.child2.equals(that.child2)) return false; - return true; - } - - public int computeHashCode() { - int hash = 0x7ed55d16; - hash = hash * 0xd3a2646c + mode.toString().hashCode(); // Note: don't call hashCode() on mode directly. - hash = hash * 0xd3a2646c + child1.hashCode(); - hash = hash * 0xd3a2646c + child2.hashCode(); - return hash; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/AtomicSemType.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/AtomicSemType.java deleted file mode 100644 index 962b0d66e4..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/AtomicSemType.java +++ /dev/null @@ -1,30 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.LispTree; - -// Represents an atomic type (strings, entities, numbers, dates, etc.). -public class AtomicSemType extends SemType { - public final String name; - public AtomicSemType(String name) { - if (name == null) throw new RuntimeException("Null name"); - this.name = name; - } - public boolean isValid() { return true; } - public SemType meet(SemType that) { - if (that instanceof TopSemType) return this; - if (that instanceof UnionSemType) return that.meet(this); - if (that instanceof AtomicSemType) { - String name1 = this.name; - String name2 = ((AtomicSemType) that).name; - if (name1.equals(name2)) return this; // Shortcut: the same - if (SemTypeHierarchy.singleton.getSupertypes(name1).contains(name2)) return this; - if (SemTypeHierarchy.singleton.getSupertypes(name2).contains(name1)) return that; - return SemType.bottomType; - } - return SemType.bottomType; - } - - public SemType apply(SemType that) { return SemType.bottomType; } - public SemType reverse() { return SemType.bottomType; } - public LispTree toLispTree() { return LispTree.proto.newLeaf(name); } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/BadFormulaException.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/BadFormulaException.java deleted file mode 100644 index cb4f6c8c4c..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/BadFormulaException.java +++ /dev/null @@ -1,21 +0,0 @@ -package edu.stanford.nlp.sempre; - -public class BadFormulaException extends RuntimeException { - public static final long serialVersionUID = 86586128316354597L; - - String message; - - public BadFormulaException(String message) { this.message = message; } - - // Combine multiple exceptions - public BadFormulaException(BadFormulaException... exceptions) { - StringBuilder builder = new StringBuilder(); - for (BadFormulaException exception : exceptions) - builder.append(" | ").append(exception.message); - //builder.append(exception).append("\n"); - this.message = builder.toString().substring(3); - } - - @Override - public String toString() { return message; } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/BeamParser.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/BeamParser.java deleted file mode 100644 index 7f9bb16231..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/BeamParser.java +++ /dev/null @@ -1,315 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.google.common.base.Joiner; -import com.google.common.collect.ImmutableList; - -import fig.basic.*; -import fig.exec.Execution; - -import java.util.*; - -/** - * A simple bottom-up chart-based parser that keeps the |beamSize| top - * derivations for each chart cell (cat, start, end). Also supports fast - * indexing of lexicalized rules using a trie. - * - * Note that this code does not rely on the Grammar being binarized, - * which makes it more complex. - * - * @author Percy Liang - */ -public class BeamParser extends Parser { - public static class Options { - @Option public int maxNewTreesPerSpan = Integer.MAX_VALUE; - } - public static Options opts = new Options(); - - Trie trie; // For non-cat-unary rules - - public BeamParser(Spec spec) { - super(spec); - - // Index the non-cat-unary rules - trie = new Trie(); - for (Rule rule : grammar.rules) - addRule(rule); - if (Parser.opts.visualizeChartFilling) - this.chartFillOut = IOUtils.openOutAppendEasy(Execution.getFile("chartfill")); - } - - public synchronized void addRule(Rule rule) { - if (!rule.isCatUnary()) - trie.add(rule); - } - - public ParserState newParserState(Params params, Example ex, boolean computeExpectedCounts) { - BeamParserState coarseState = null; - if (Parser.opts.coarsePrune) { - LogInfo.begin_track("Parser.coarsePrune"); - coarseState = new BeamParserState(this, params, ex, computeExpectedCounts, BeamParserState.Mode.bool, null); - coarseState.infer(); - coarseState.keepTopDownReachable(); - LogInfo.end_track(); - } - return new BeamParserState(this, params, ex, computeExpectedCounts, BeamParserState.Mode.full, coarseState); - } -} - -/** - * Stores BeamParser information about parsing a particular example. The actual - * parsing code lives here. - * - * @author Percy Liang - * @author Roy Frostig - */ -class BeamParserState extends ChartParserState { - public final Mode mode; - // Modes: - // 1) Bool: just check if cells (cat, start, end) are reachable (to prune chart) - // 2) Full: compute everything - public enum Mode { bool, full } - - private final BeamParser parser; - private final BeamParserState coarseState; // Used to prune - - public BeamParserState(BeamParser parser, Params params, Example ex, boolean computeExpectedCounts, - Mode mode, BeamParserState coarseState) { - super(parser, params, ex, computeExpectedCounts); - this.parser = parser; - this.mode = mode; - this.coarseState = coarseState; - } - - public void infer() { - if (numTokens == 0) - return; - - if (parser.verbose(2)) LogInfo.begin_track("ParserState.infer"); - - // Base case - for (Derivation deriv : gatherTokenAndPhraseDerivations()) { - featurizeAndScoreDerivation(deriv); - addToChart(deriv); - } - - // Recursive case - for (int len = 1; len <= numTokens; len++) - for (int i = 0; i + len <= numTokens; i++) - build(i, i + len); - - if (parser.verbose(2)) LogInfo.end_track(); - - // Visualize - if (parser.chartFillOut != null && Parser.opts.visualizeChartFilling && this.mode != Mode.bool) { - parser.chartFillOut.println(Json.writeValueAsStringHard(new ChartFillingData(ex.id, chartFillingList, - ex.utterance, ex.numTokens()))); - parser.chartFillOut.flush(); - } - - setPredDerivations(); - - if (mode == Mode.full) { - // Compute gradient with respect to the predicted derivations - ensureExecuted(); - if (computeExpectedCounts) { - expectedCounts = new HashMap<>(); - ParserState.computeExpectedCounts(predDerivations, expectedCounts); - } - } - } - - // Create all the derivations for the span [start, end). - protected void build(int start, int end) { - applyNonCatUnaryRules(start, end, start, parser.trie, new ArrayList(), new IntRef(0)); - - Set cellsPruned = new HashSet<>(); - applyCatUnaryRules(start, end, cellsPruned); - - for (Map.Entry> entry : chart[start][end].entrySet()) - pruneCell(cellsPruned, entry.getKey(), start, end, entry.getValue()); - } - - private static String cellString(String cat, int start, int end) { - return cat + ":" + start + ":" + end; - } - - // Return number of new derivations added - private int applyRule(int start, int end, Rule rule, List children) { - if (Parser.opts.verbose >= 5) LogInfo.logs("applyRule %s %s %s %s", start, end, rule, children); - try { - if (mode == Mode.full) { - StopWatchSet.begin(rule.getSemRepn()); - DerivationStream results = rule.sem.call(ex, - new SemanticFn.CallInfo(rule.lhs, start, end, rule, ImmutableList.copyOf(children))); - StopWatchSet.end(); - while (results.hasNext()) { - Derivation newDeriv = results.next(); - featurizeAndScoreDerivation(newDeriv); - addToChart(newDeriv); - } - return results.estimatedSize(); - } else if (mode == Mode.bool) { - Derivation deriv = new Derivation.Builder() - .cat(rule.lhs).start(start).end(end).rule(rule) - .children(ImmutableList.copyOf(children)) - .formula(Formula.nullFormula) - .createDerivation(); - addToChart(deriv); - return 1; - } else { - throw new RuntimeException("Invalid mode"); - } - } catch (Exception e) { - LogInfo.errors("Composition failed: rule = %s, children = %s", rule, children); - e.printStackTrace(); - throw new RuntimeException(e); - } - } - - // Don't prune the same cell more than once. - protected void pruneCell(Set cellsPruned, String cat, int start, int end, List derivations) { - String cell = cellString(cat, start, end); - if (cellsPruned.contains(cell)) return; - cellsPruned.add(cell); - pruneCell(cell, derivations); - } - - // Apply all unary rules with RHS category. - // Before applying each unary rule (rule.lhs -> rhsCat), we can prune the cell of rhsCat - // because we assume acyclicity, so rhsCat's cell will never grow. - private void applyCatUnaryRules(int start, int end, Set cellsPruned) { - for (Rule rule : parser.catUnaryRules) { - if (!coarseAllows(rule.lhs, start, end)) - continue; - String rhsCat = rule.rhs.get(0); - List derivations = chart[start][end].get(rhsCat); - if (Parser.opts.verbose >= 5) - LogInfo.logs("applyCatUnaryRules %s %s %s %s", start, end, rule, derivations); - if (derivations == null) continue; - - pruneCell(cellsPruned, rhsCat, start, end, derivations); // Prune before applying rules to eliminate cruft! - - for (Derivation deriv : derivations) - applyRule(start, end, rule, Collections.singletonList(deriv)); - } - } - - // Strategy: walk along the input on span (start:end) and traverse the trie - // to get the list of the rules that could apply by matching the RHS. - // start:end: span we're dealing with. - // i: current token position - // node: contains a link to the RHS that could apply. - // children: the derivations that't we're building up. - // numNew: Keep track of number of new derivations created - private void applyNonCatUnaryRules(int start, - int end, - int i, - Trie node, - ArrayList children, - IntRef numNew) { - if (node == null) return; - if (!coarseAllows(node, start, end)) return; - - if (Parser.opts.verbose >= 5) { - LogInfo.logs( - "applyNonCatUnaryRules(start=%d, end=%d, i=%d, children=[%s], %s rules)", - start, end, i, Joiner.on(", ").join(children), node.rules.size()); - } - - // Base case: our fencepost has walked to the end of the span, so - // apply the rule on all the children gathered during the walk. - if (i == end) { - for (Rule rule : node.rules) { - if (coarseAllows(rule.lhs, start, end)) { - numNew.value += applyRule(start, end, rule, children); - } - } - return; - } - - // Advance terminal token - applyNonCatUnaryRules( - start, end, i + 1, - node.next(ex.token(i)), - children, - numNew); - - // Advance non-terminal category - for (int j = i + 1; j <= end; j++) { - for (Map.Entry> entry : chart[i][j].entrySet()) { - Trie nextNode = node.next(entry.getKey()); - for (Derivation arg : entry.getValue()) { - children.add(arg); - applyNonCatUnaryRules(start, end, j, nextNode, children, numNew); - children.remove(children.size() - 1); - if (mode != Mode.full) break; // Only need one hypothesis - if (numNew.value >= BeamParser.opts.maxNewTreesPerSpan) return; - } - } - } - } - - // -- Coarse state pruning -- - - // Remove any (cat, start, end) which isn't reachable from the - // (Rule.rootCat, 0, numTokens) - public void keepTopDownReachable() { - if (numTokens == 0) return; - - Set reachable = new HashSet<>(); - collectReachable(reachable, Rule.rootCat, 0, numTokens); - - // Remove all derivations associated with (cat, start, end) that aren't reachable. - for (int start = 0; start < numTokens; start++) { - for (int end = start + 1; end <= numTokens; end++) { - List toRemoveCats = new LinkedList<>(); - for (String cat : chart[start][end].keySet()) { - String key = catStartEndKey(cat, start, end); - if (!reachable.contains(key)) { - toRemoveCats.add(cat); - } - } - Collections.sort(toRemoveCats); - for (String cat : toRemoveCats) { - if (parser.verbose(4)) { - LogInfo.logs("Pruning chart %s(%s,%s)", cat, start, end); - } - chart[start][end].remove(cat); - } - } - } - } - - private void collectReachable(Set reachable, String cat, int start, int end) { - String key = catStartEndKey(cat, start, end); - if (reachable.contains(key)) return; - - if (!chart[start][end].containsKey(cat)) { - // This should only happen for the root when there are no parses. - return; - } - - reachable.add(key); - for (Derivation deriv : chart[start][end].get(cat)) { - for (Derivation subderiv : deriv.children) { - collectReachable(reachable, subderiv.cat, subderiv.start, subderiv.end); - } - } - } - - private String catStartEndKey(String cat, int start, int end) { - return cat + ":" + start + ":" + end; - } - - // For pruning with the coarse state - protected boolean coarseAllows(Trie node, int start, int end) { - if (coarseState == null) return true; - return SetUtils.intersects( - node.cats, - coarseState.chart[start][end].keySet()); - } - protected boolean coarseAllows(String cat, int start, int end) { - if (coarseState == null) return true; - return coarseState.chart[start][end].containsKey(cat); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/BooleanValue.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/BooleanValue.java deleted file mode 100644 index 497683daf5..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/BooleanValue.java +++ /dev/null @@ -1,32 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.LispTree; - -/** - * Represents a boolean. - * @author Percy Liang - **/ -public class BooleanValue extends Value { - public final boolean value; - - public BooleanValue(boolean value) { this.value = value; } - public BooleanValue(LispTree tree) { this.value = Boolean.parseBoolean(tree.child(1).value); } - - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("boolean"); - tree.addChild(value + ""); - return tree; - } - - @Override public String sortString() { return "" + value; } - @Override public String pureString() { return "" + value; } - - @Override public int hashCode() { return Boolean.valueOf(value).hashCode(); } - @Override public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - BooleanValue that = (BooleanValue) o; - return this.value == that.value; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/BoundedPriorityQueue.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/BoundedPriorityQueue.java deleted file mode 100644 index e2775c9ba8..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/BoundedPriorityQueue.java +++ /dev/null @@ -1,77 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.util.*; - -/** - * Created by joberant on 3/27/14. - * A priority queue that holds no more than N elements - */ -public class BoundedPriorityQueue extends TreeSet { - private static final long serialVersionUID = 5724671156522771658L; - private int elementsLeft; - - public BoundedPriorityQueue(int maxSize, Comparator comparator) { - super(comparator); - this.elementsLeft = maxSize; - } - - /** - * @return true if element was added, false otherwise - * */ - @Override - public boolean add(E e) { - if (elementsLeft == 0 && size() == 0) { - // max size was initiated to zero => just return false - return false; - } else if (elementsLeft > 0) { - // queue isn't full => add element and decrement elementsLeft - boolean added = super.add(e); - if (added) { - elementsLeft--; - } - return added; - } else { - // there is already 1 or more elements => compare to the least - int compared = super.comparator().compare(e, this.last()); - if (compared == -1) { - // new element is larger than the least in queue => pull the least and add new one to queue - pollLast(); - super.add(e); - return true; - } else { - // new element is less than the least in queue => return false - return false; - } - } - } - - public List toList() { - List res = new ArrayList<>(); - for (E e : this) - res.add(e); - return res; - } - - public static void main(String[] args) { - - BoundedPriorityQueue queue = - new BoundedPriorityQueue<>(5, - new Comparator() { - @Override - public int compare(Integer o1, Integer o2) { - return o1.compareTo(o2); - } - }); - - queue.add(10); - queue.add(8); - queue.add(4); - queue.add(12); - queue.add(3); - queue.add(7); - queue.add(9); - for (Integer num : queue) { - System.out.println(num); - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Builder.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Builder.java deleted file mode 100644 index edfd89899e..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Builder.java +++ /dev/null @@ -1,94 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.google.common.base.Strings; - -import fig.basic.Option; -import fig.basic.Utils; - -/** - * Contains all the components (grammar, feature extractor, parser, parameters) - * needed for semantic parsing. - * - * @author Percy Liang - */ -public class Builder { - public static class Options { - @Option public String inParamsPath; - @Option public String executor = "JavaExecutor"; - @Option public String valueEvaluator = "ExactValueEvaluator"; - @Option public String parser = "BeamParser"; - } - - public static Options opts = new Options(); - - public Grammar grammar; - public Executor executor; - public ValueEvaluator valueEvaluator; - public FeatureExtractor extractor; - public Parser parser; - public Params params; - - public void build() { - grammar = null; - executor = null; - valueEvaluator = null; - extractor = null; - parser = null; - params = null; - buildUnspecified(); - } - - public void buildUnspecified() { - // Grammar - if (grammar == null) { - grammar = new Grammar(); - grammar.read(); - grammar.write(); - } - - // Executor - if (executor == null) - executor = (Executor) Utils.newInstanceHard(SempreUtils.resolveClassName(opts.executor)); - - // Value evaluator - if (valueEvaluator == null) - valueEvaluator = (ValueEvaluator) Utils.newInstanceHard(SempreUtils.resolveClassName(opts.valueEvaluator)); - - // Feature extractor - if (extractor == null) - extractor = new FeatureExtractor(executor); - - // Parser - if (parser == null) - parser = buildParser(new Parser.Spec(grammar, extractor, executor, valueEvaluator)); - - // Parameters - if (params == null) { - params = new Params(); - if (!Strings.isNullOrEmpty(opts.inParamsPath)) - params.read(opts.inParamsPath); - } - } - - public static Parser buildParser(Parser.Spec spec) { - switch (opts.parser) { - case "BeamParser": - return new BeamParser(spec); - case "ReinforcementParser": - return new ReinforcementParser(spec); - case "FloatingParser": - return new FloatingParser(spec); - default: - // Try instantiating by name - try { - Class parserClass = Class.forName(SempreUtils.resolveClassName(opts.parser)); - return (Parser) parserClass.getConstructor(spec.getClass()).newInstance(spec); - } catch (ClassNotFoundException e1) { - throw new RuntimeException("Illegal parser: " + opts.parser); - } catch (Exception e) { - e.printStackTrace(); - throw new RuntimeException("Error while instantiating parser: " + opts.parser + "\n" + e); - } - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/CallFormula.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/CallFormula.java deleted file mode 100644 index 98a9b14571..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/CallFormula.java +++ /dev/null @@ -1,85 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.google.common.base.Function; -import com.google.common.collect.Lists; -import fig.basic.LispTree; - -import java.util.List; - -/** - * A CallFormula represents a function call. - * See JavaExecutor for the semantics of this formula. - * (call func arg_1 ... arg_k) - * - * @author Percy Liang - */ -public class CallFormula extends Formula { - public final Formula func; - public final List args; - - public CallFormula(String func, List args) { - this(Formulas.newNameFormula(func), args); - } - - public CallFormula(Formula func, List args) { - this.func = func; - this.args = args; - } - - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("call"); - tree.addChild(func.toLispTree()); - for (Formula arg : args) - tree.addChild(arg.toLispTree()); - return tree; - } - - @Override - public void forEach(Function func) { - if (!func.apply(this)) { - this.func.forEach(func); - for (Formula arg: args) - arg.forEach(func); - } - } - - @Override - public Formula map(Function transform) { - Formula result = transform.apply(this); - if (result != null) return result; - Formula newFunc = func.map(transform); - List newArgs = Lists.newArrayList(); - for (Formula arg : args) - newArgs.add(arg.map(transform)); - return new CallFormula(newFunc, newArgs); - } - - @Override - public List mapToList(Function> transform, boolean alwaysRecurse) { - List res = transform.apply(this); - if (res.isEmpty() || alwaysRecurse) { - res.addAll(func.mapToList(transform, alwaysRecurse)); - for (Formula arg : args) - res.addAll(arg.mapToList(transform, alwaysRecurse)); - } - return res; - } - - @SuppressWarnings({"equalshashcode"}) - @Override - public boolean equals(Object thatObj) { - if (!(thatObj instanceof CallFormula)) return false; - CallFormula that = (CallFormula) thatObj; - if (!this.func.equals(that.func)) return false; - if (!this.args.equals(that.args)) return false; - return true; - } - - public int computeHashCode() { - int hash = 0x7ed55d16; - hash = hash * 0xd3a2646c + func.hashCode(); - hash = hash * 0xd3a2646c + args.hashCode(); - return hash; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/CallTypeInfo.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/CallTypeInfo.java deleted file mode 100644 index 841278cf0c..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/CallTypeInfo.java +++ /dev/null @@ -1,16 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.util.List; - -// Type information for each function in CallFormula. -public class CallTypeInfo { - public final String func; - public final List argTypes; - public final SemType retType; - - public CallTypeInfo(String func, List argTypes, SemType retType) { - this.func = func; - this.argTypes = argTypes; - this.retType = retType; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/CanonicalNames.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/CanonicalNames.java deleted file mode 100644 index 2521f67c17..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/CanonicalNames.java +++ /dev/null @@ -1,117 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.util.*; - -/** - * List of canonical names that we borrowed from Freebase. - * - * These names and helper methods are independent from the Freebase schema - * (even though the names begin with "fb:"). - * - * @author ppasupat - */ -public final class CanonicalNames { - private CanonicalNames() { } - - // Standard type names - public static final String PREFIX = "fb:"; - public static final String BOOLEAN = "fb:type.boolean"; - public static final String INT = "fb:type.int"; - public static final String FLOAT = "fb:type.float"; - public static final String DATE = "fb:type.datetime"; - public static final String TIME = "fb:type.time"; - public static final String TEXT = "fb:type.text"; - public static final String NUMBER = "fb:type.number"; - public static final String ENTITY = "fb:common.topic"; - public static final String ANY = "fb:type.any"; - - public static final List PRIMITIVES = Collections.unmodifiableList( - Arrays.asList(BOOLEAN, INT, FLOAT, DATE, TEXT, NUMBER)); - - // Standard relations - public static final String TYPE = "fb:type.object.type"; - public static final String NAME = "fb:type.object.name"; - - // Special Unary: star (*) - public static final String STAR = "*"; - - // Special Binaries: comparison - public static final Map COMPARATOR_REVERSE = new HashMap<>(); - static { - COMPARATOR_REVERSE.put("!=", "!="); // a != b implies b != a - COMPARATOR_REVERSE.put("<", ">="); - COMPARATOR_REVERSE.put(">", "<="); - COMPARATOR_REVERSE.put("<=", ">"); - COMPARATOR_REVERSE.put(">=", "<"); - } - public static final Set COMPARATORS = COMPARATOR_REVERSE.keySet(); - - // Special Binary: colon (:) - public static final String COLON = ":"; - - // SemType for special unaries and binaries - public static final Map SPECIAL_SEMTYPES = new HashMap<>(); - static { - SPECIAL_SEMTYPES.put("*", SemType.anyType); - SPECIAL_SEMTYPES.put("!=", SemType.anyAnyFunc); - SPECIAL_SEMTYPES.put("<", SemType.compareFunc); - SPECIAL_SEMTYPES.put(">", SemType.compareFunc); - SPECIAL_SEMTYPES.put("<=", SemType.compareFunc); - SPECIAL_SEMTYPES.put(">=", SemType.compareFunc); - SPECIAL_SEMTYPES.put(":", SemType.anyAnyFunc); - } - - // Unary: fb:domain.type [contains exactly one period] - // Special Unary: star (*) - public static boolean isUnary(String s) { - if (STAR.equals(s)) return true; - int i = s.indexOf('.'); - if (i == -1) return false; - i = s.indexOf('.', i + 1); - if (i == -1) return true; - return false; - } - public static boolean isUnary(Value value) { - return value instanceof NameValue && isUnary((((NameValue) value).id)); - } - - // Binary: fb:domain.type.property [contains two periods] - // Also catch reversed binary shorthand [!fb:people.person.parent] - // Special Binaries: comparison (<, >, etc.) and colon (:) - public static boolean isBinary(String s) { - if (COMPARATORS.contains(s) || COLON.equals(s)) return true; - int i = s.indexOf('.'); - if (i == -1) return false; - i = s.indexOf('.', i + 1); - if (i == -1) return false; - return true; - } - public static boolean isBinary(Value value) { - return value instanceof NameValue && isBinary((((NameValue) value).id)); - } - - // Return whether |property| is the name of a reverse property. - // Convention: ! is the prefix for reverses. - public static boolean isReverseProperty(String property) { - return property.startsWith("!") && !property.equals("!="); - } - public static boolean isReverseProperty(Value value) { - return value instanceof NameValue && isReverseProperty(((NameValue) value).id); - } - - // Return the reverse property as a String - public static String reverseProperty(String property) { - if (COMPARATORS.contains(property)) - return COMPARATOR_REVERSE.get(property); - if (isReverseProperty(property)) - return property.substring(1); - else return "!" + property; - } - public static NameValue reverseProperty(Value value) { - if (!(value instanceof NameValue)) - throw new RuntimeException("Cannot call reverseProperty on " + value); - return new NameValue(reverseProperty(((NameValue) value).id)); - } - - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/CatSizeBound.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/CatSizeBound.java deleted file mode 100644 index 4c3e0e2333..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/CatSizeBound.java +++ /dev/null @@ -1,78 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.util.*; - -import fig.basic.*; - -/** - * Given the maximum formula size in a floating grammar, compute the maximum size - * that each floating grammar category can have. - * - * For example, if the grammar looks like this: - * $ROOT -> $A | $A $B - * $A -> $C $A - * $B -> $C - * $C -> $D $B | nothing - * ... - * and the maximum formula size (for $ROOT) is 10, then the maximum formula sizes for - * $A, $B, $C and $D are 9, 9, 8, and 7, respectively. - * - * The bound is = maxFormulaSize - (shortest distance from $ROOT to cat) - * - * @author ppasupat - */ -public class CatSizeBound { - public static class Options { - @Option(gloss = "verbosity") public int verbose = 0; - } - public static Options opts = new Options(); - - private final int maxFormulaSize; - private final Map bound = new HashMap<>(); - - public CatSizeBound(int maxFormulaSize, Grammar grammar) { - this(maxFormulaSize, grammar.getRules()); - } - - public CatSizeBound(int maxFormulaSize, List rules) { - this.maxFormulaSize = maxFormulaSize; - if (!FloatingParser.opts.useSizeInsteadOfDepth) { - LogInfo.warnings("Currently CatSizeBound is usable only when useSizeInsteadOfDepth = true."); - return; - } - // Construct graph - Map> graph = new HashMap<>(); - for (Rule rule : rules) { - if (!Rule.isCat(rule.lhs)) - throw new RuntimeException("Non-cat found in LHS of rule " + rule); - for (String rhsCat : rule.rhs) { - if (Rule.isCat(rhsCat)) - MapUtils.addToSet(graph, rule.lhs, rhsCat); - } - } - // Breadth first search - bound.put(Rule.rootCat, maxFormulaSize); - Queue queue = new ArrayDeque<>(); - queue.add(Rule.rootCat); - while (!queue.isEmpty()) { - String cat = queue.remove(); - if (!graph.containsKey(cat)) continue; - for (String rhsCat : graph.get(cat)) { - if (bound.containsKey(rhsCat)) continue; - bound.put(rhsCat, bound.get(cat) - 1); - queue.add(rhsCat); - } - } - if (opts.verbose >= 1) { - LogInfo.begin_track("CatSizeBound: distances"); - for (Map.Entry entry : bound.entrySet()) - LogInfo.logs("%25s : %2d", entry.getKey(), entry.getValue()); - LogInfo.end_track(); - } - } - - public int getBound(String cat) { - return bound.getOrDefault(cat, maxFormulaSize); - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ChartParserState.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ChartParserState.java deleted file mode 100644 index c4708ba28b..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ChartParserState.java +++ /dev/null @@ -1,128 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import fig.basic.LogInfo; -import fig.basic.MapUtils; - -import java.lang.reflect.Array; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - * Actually does the parsing. Main method is infer(), whose job is to fill in - * - * @author Roy Frostig - * @author Percy Liang - */ -public abstract class ChartParserState extends ParserState { - // cell (start, end, category) -> list of derivations (sorted by decreasing score) [beam] - protected final Map>[][] chart; - - // For visualizing how chart is filled - protected List chartFillingList = new ArrayList<>(); - - protected String[][] phrases; // the phrases in the example - - @SuppressWarnings({ "unchecked" }) - public ChartParserState(Parser parser, Params params, Example ex, boolean computeExpectedCounts) { - super(parser, params, ex, computeExpectedCounts); - - // Initialize the chart. - this.chart = (HashMap>[][]) - Array.newInstance(HashMap.class, numTokens, numTokens + 1); - this.phrases = new String[numTokens][numTokens + 1]; - - for (int start = 0; start < numTokens; start++) { - StringBuilder sb = new StringBuilder(); - for (int end = start + 1; end <= numTokens; end++) { - if (end - start > 1) - sb.append(' '); - sb.append(this.ex.languageInfo.tokens.get(end - 1)); - phrases[start][end] = sb.toString(); - chart[start][end] = new HashMap<>(); - } - } - } - - public void clearChart() { - for (int start = 0; start < numTokens; start++) { - for (int end = start + 1; end <= numTokens; end++) { - chart[start][end].clear(); - } - } - } - - // Call this method in infer() - protected void setPredDerivations() { - predDerivations.clear(); - predDerivations.addAll(MapUtils.get(chart[0][numTokens], Rule.rootCat, Derivation.emptyList)); - } - - private void visualizeChart() { - for (int len = 1; len <= numTokens; ++len) { - for (int i = 0; i + len <= numTokens; ++i) { - for (String cat : chart[i][i + len].keySet()) { - List derivations = chart[i][i + len].get(cat); - for (Derivation deriv : derivations) { - LogInfo.logs("ParserState.visualize: %s(%s:%s): %s", cat, i, i + len, deriv); - } - } - } - } - } - - protected void addToChart(Derivation deriv) { - if (parser.verbose(3)) LogInfo.logs("addToChart %s: %s", deriv.cat, deriv); - - if (Parser.opts.pruneErrorValues && deriv.value instanceof ErrorValue) return; - - List derivations = chart[deriv.start][deriv.end].get(deriv.cat); - if (chart[deriv.start][deriv.end].get(deriv.cat) == null) - chart[deriv.start][deriv.end].put(deriv.cat, derivations = new ArrayList<>()); - derivations.add(deriv); - totalGeneratedDerivs++; - - if (Parser.opts.visualizeChartFilling) { - chartFillingList.add(new CatSpan(deriv.start, deriv.end, deriv.cat)); - } - } - - public Map>[][] getChart() { - return chart; - } - - // TODO(joberant): move to visualization utility class - public static class CatSpan { - @JsonProperty - public final int start; - @JsonProperty public final int end; - @JsonProperty public final String cat; - - @JsonCreator - public CatSpan(@JsonProperty("start") int start, @JsonProperty("end") int end, - @JsonProperty("cat") String cat) { - this.start = start; - this.end = end; - this.cat = cat; - } - } - - public static class ChartFillingData { - @JsonProperty public final String id; - @JsonProperty public final String utterance; - @JsonProperty public final int numOfTokens; - @JsonProperty public final List catSpans; - - @JsonCreator - public ChartFillingData(@JsonProperty("id") String id, @JsonProperty("catspans") List catSpans, - @JsonProperty("utterance") String utterance, @JsonProperty("numOfTokens") int numOfTokens) { - this.id = id; - this.utterance = utterance; - this.numOfTokens = numOfTokens; - this.catSpans = catSpans; - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ChildDerivationsGroup.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ChildDerivationsGroup.java deleted file mode 100644 index c6ca9e0e92..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ChildDerivationsGroup.java +++ /dev/null @@ -1,36 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.util.List; - -/** - * A group containing one or two lists of potential child derivations. - * - * The motivation is to group potential child derivations based on type compatibility. - * For example, when building (and __ __), considering all pairs of derivations - * is time-wasting since a lot of pairs don't type-check. We instead group - * derivations by type, and only apply the rule to the pairs that type-check. - * - * This idea also extends to one-argument rules. For example, for (sum ___), - * we should only look at child derivations with number type. - * - * During parsing, for each DerivationGroup: - * - For a one-argument rule (derivations2 == null): - * Apply the rule on all derivations in derivations1 - * - For a two-argument rule (derivations2 != null): - * Apply the rule to all pairs (d1, d2) where d1 is in derivations1 and d2 is in derivations2 - * - * @author ppasupat - */ -public class ChildDerivationsGroup { - public final List derivations1, derivations2; - - public ChildDerivationsGroup(List derivations1) { - this.derivations1 = derivations1; - this.derivations2 = null; - } - - public ChildDerivationsGroup(List derivations1, List derivations2) { - this.derivations1 = derivations1; - this.derivations2 = derivations2; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/CoarseParser.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/CoarseParser.java deleted file mode 100644 index 908fcf79f1..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/CoarseParser.java +++ /dev/null @@ -1,284 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.google.common.base.Joiner; -import fig.basic.LogInfo; -import fig.basic.MapUtils; -import fig.basic.Pair; -import fig.basic.StopWatch; - -import java.lang.reflect.Array; -import java.util.*; - -/** - * Parser that only has information on what categories can parse what spans - * Does not hold backpointers for getting full parse, only reachability information - * Important: assumes that the grammar is binary - * Independent from the Parser code and therefore there is duplicate code (traverse(), keepTopDownReachable()) - * @author jonathanberant - */ -public class CoarseParser { - - public final Grammar grammar; - private Map, Set> rhsToLhsMap; - ArrayList catUnaryRules; // Unary rules with category on RHS - Map> terminalsToRulesList = new HashMap<>(); - - public CoarseParser(Grammar grammar) { - this.grammar = grammar; - catUnaryRules = new ArrayList<>(); - rhsToLhsMap = new HashMap<>(); - - Map> graph = new HashMap<>(); // Node from LHS to list of rules - for (Rule rule : grammar.rules) { - if (rule.rhs.size() > 2) - throw new RuntimeException("We assume that the grammar is binarized, rule: " + rule); - if (rule.isCatUnary()) - MapUtils.addToList(graph, rule.lhs, rule); - else if (rule.rhs.size() == 2) { // binary grammar - MapUtils.addToSet(rhsToLhsMap, Pair.newPair(rule.rhs.get(0), rule.rhs.get(1)), rule.lhs); - } else { - assert rule.isRhsTerminals(); - MapUtils.addToList(terminalsToRulesList, Joiner.on(' ').join(rule.rhs), rule); - } - } - // Topologically sort catUnaryRules so that B->C occurs before A->B - Map done = new HashMap<>(); - for (String node : graph.keySet()) - traverse(catUnaryRules, node, graph, done); - LogInfo.logs("Coarse parser: %d catUnaryRules (sorted), %d nonCatUnaryRules", catUnaryRules.size(), grammar.rules.size() - catUnaryRules.size()); - } - - /** Helper function for transitive closure of unary rules. */ - private void traverse(List catUnaryRules, - String node, - Map> graph, - Map done) { - Boolean d = done.get(node); - if (Boolean.TRUE.equals(d)) return; - if (Boolean.FALSE.equals(d)) - throw new RuntimeException("Found cycle of unaries involving " + node); - done.put(node, false); - for (Rule rule : MapUtils.getList(graph, node)) { - traverse(catUnaryRules, rule.rhs.get(0), graph, done); - catUnaryRules.add(rule); - } - done.put(node, true); - } - - public CoarseParserState getCoarsePrunedChart(Example ex) { - CoarseParserState res = new CoarseParserState(ex, this); - res.infer(); - return res; - } - - class CoarseParserState { - - private Map>[][] chart; - public final Example example; - public final CoarseParser parser; - private int numTokens; - private long time; - private String[][] phrases; - - - - @SuppressWarnings({ "unchecked" }) - public CoarseParserState(Example example, CoarseParser parser) { - this.example = example; - this.parser = parser; - numTokens = example.numTokens(); - // Initialize the chart. - this.chart = (HashMap>[][]) - Array.newInstance( - HashMap.class, - numTokens, numTokens + 1); - phrases = new String[numTokens][numTokens + 1]; - - for (int start = 0; start < numTokens; start++) { - StringBuilder sb = new StringBuilder(); - for (int end = start + 1; end <= numTokens; end++) { - if (end - start > 1) - sb.append(' '); - sb.append(example.languageInfo.tokens.get(end - 1)); - phrases[start][end] = sb.toString(); - chart[start][end] = new HashMap<>(); - } - } - } - - public long getCoarseParseTime() { return time; } - - public void infer() { - - StopWatch watch = new StopWatch(); - watch.start(); - // parse with rules with tokens or RHS - parseTokensAndPhrases(); - // complete bottom up parsing - for (int len = 1; len <= numTokens; len++) - for (int i = 0; i + len <= numTokens; i++) - build(i, i + len); - // prune away things that are not reachable from the top - keepTopDownReachable(); - watch.stop(); - time = watch.getCurrTimeLong(); - } - - public boolean coarseAllows(String cat, int start, int end) { - return chart[start][end].containsKey(cat); - } - - private void build(int start, int end) { - handleBinaryRules(start, end); - handleUnaryRules(start, end); - } - - private void parseTokensAndPhrases() { - for (int i = 0; i < numTokens; ++i) { - addToChart(Rule.tokenCat, i, i + 1); - addToChart(Rule.lemmaTokenCat, i, i + 1); - } - for (int i = 0; i < numTokens; i++) { - for (int j = i + 1; j <= numTokens; j++) { - addToChart(Rule.phraseCat, i, j); - addToChart(Rule.lemmaPhraseCat, i, j); - } - } - } - - private void addToChart(String cat, int start, int end) { - if (Parser.opts.verbose >= 5) - LogInfo.logs("Adding to chart %s(%s,%s)", cat, start, end); - MapUtils.putIfAbsent(chart[start][end], cat, new ArrayList()); - } - - private void addToChart(String parentCat, String childCat, int start, int end) { - if (Parser.opts.verbose >= 5) - LogInfo.logs("Adding to chart %s(%s,%s)-->%s(%s,%s)", parentCat, start, end, childCat, start, end); - MapUtils.addToList(chart[start][end], parentCat, new CategorySpan(childCat, start, end)); } - - private void addToChart(String parentCat, String leftCat, String rightCat, int start, int i, int end) { - if (Parser.opts.verbose >= 5) - LogInfo.logs("Adding to chart %s(%s,%s)-->%s(%s,%s) %s(%s,%s)", parentCat, start, end, leftCat, start, i, rightCat, i, end); - MapUtils.addToList(chart[start][end], parentCat, new CategorySpan(leftCat, start, i)); - MapUtils.addToList(chart[start][end], parentCat, new CategorySpan(rightCat, i, end)); - } - - private void handleBinaryRules(int start, int end) { - for (int i = start + 1; i < end; ++i) { - List left = new ArrayList<>(chart[start][i].keySet()); - List right = new ArrayList<>(chart[i][end].keySet()); - if (i - start == 1) left.add(phrases[start][i]); // handle single terminal - if (end - i == 1) right.add(phrases[i][end]); // handle single terminal - - for (String l : left) { - for (String r : right) { - Set parentCats = rhsToLhsMap.get(Pair.newPair(l, r)); - - if (parentCats != null) { - for (String parentCat : parentCats) { - addToChart(parentCat, l, r, start, i, end); - } - } - } - } - } - } - - private void handleUnaryRules(int start, int end) { - - // terminals on RHS - for (Rule rule : MapUtils.get(terminalsToRulesList, phrases[start][end], Collections.emptyList())) { - addToChart(rule.lhs, start, end); - } - // catUnaryRules - for (Rule rule : parser.catUnaryRules) { - String rhsCat = rule.rhs.get(0); - if (chart[start][end].containsKey(rhsCat)) { - addToChart(rule.lhs, rhsCat, start, end); - } - } - } - - public void keepTopDownReachable() { - if (numTokens == 0) return; - - Set reachable = new HashSet(); - collectReachable(reachable, new CategorySpan(Rule.rootCat, 0, numTokens)); - - // Remove all derivations associated with (cat, start, end) that aren't reachable. - for (int start = 0; start < numTokens; start++) { - for (int end = start + 1; end <= numTokens; end++) { - List toRemoveCats = new LinkedList(); - for (String cat : chart[start][end].keySet()) { - if (!reachable.contains(new CategorySpan(cat, start, end))) { - toRemoveCats.add(cat); - } - } - Collections.sort(toRemoveCats); - for (String cat : toRemoveCats) { - if (Parser.opts.verbose >= 5) - LogInfo.logs("Pruning chart %s(%s,%s)", cat, start, end); - chart[start][end].remove(cat); - } - } - } - } - - private void collectReachable(Set reachable, CategorySpan catSpan) { - if (reachable.contains(catSpan)) - return; - if (!chart[catSpan.start][catSpan.end].containsKey(catSpan.cat)) { - // This should only happen for the root when there are no parses. - return; - } - reachable.add(catSpan); - for (CategorySpan childCatSpan : chart[catSpan.start][catSpan.end].get(catSpan.cat)) { - collectReachable(reachable, childCatSpan); - } - } - } - - class CategorySpan { - public final String cat; - public final int start; - public final int end; - - public CategorySpan(String cat, int start, int end) { - this.cat = cat; - this.start = start; - this.end = end; - } - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + ((cat == null) ? 0 : cat.hashCode()); - result = prime * result + end; - result = prime * result + start; - return result; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - CategorySpan other = (CategorySpan) obj; - if (cat == null) { - if (other.cat != null) - return false; - } else if (!cat.equals(other.cat)) - return false; - if (end != other.end) - return false; - if (start != other.start) - return false; - return true; - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Colorizer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Colorizer.java deleted file mode 100644 index 62291c2bd8..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Colorizer.java +++ /dev/null @@ -1,50 +0,0 @@ -package edu.stanford.nlp.sempre; - -/** - * Tools for colorizing output to console so easier to read - * - * @author Ziang Xie - */ - -public class Colorizer { - - public Colorizer() { } - - public String colorize(String s, String color) { - String cp = ""; - - // NOTE JDK 7+ feature - switch (color) { - case "black": - cp = "\u001B[30m"; - break; - case "red": - cp = "\u001B[31m"; - break; - case "green": - cp = "\u001B[32m"; - break; - case "yellow": - cp = "\u001B[33m"; - break; - case "blue": - cp = "\u001B[34m"; - break; - case "purple": - cp = "\u001B[35m"; - break; - case "cyan": - cp = "\u001B[36m"; - break; - case "white": - cp = "\u001B[37m"; - break; - default: - throw new RuntimeException("Invalid color: " + color); - } - - if (cp.equals("")) - return s; - return cp + s + "\u001B[0m"; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ConcatFn.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ConcatFn.java deleted file mode 100644 index 39202e5a7c..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ConcatFn.java +++ /dev/null @@ -1,39 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.LispTree; -/** - * Takes two strings and returns their concatenation. - * - * @author Percy Liang - */ -public class ConcatFn extends SemanticFn { - String delim; - - public ConcatFn() { } - - public ConcatFn(String delim) { - this.delim = delim; - } - - public void init(LispTree tree) { - super.init(tree); - delim = tree.child(1).value; - } - - public DerivationStream call(Example ex, final Callable c) { - return new SingleDerivationStream() { - @Override - public Derivation createDerivation() { - StringBuilder out = new StringBuilder(); - for (int i = 0; i < c.getChildren().size(); i++) { - if (i > 0) out.append(delim); - out.append(c.childStringValue(i)); - } - return new Derivation.Builder() - .withCallable(c) - .withStringFormulaFrom(out.toString()) - .createDerivation(); - } - }; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ConstantFn.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ConstantFn.java deleted file mode 100644 index c719d7a80d..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ConstantFn.java +++ /dev/null @@ -1,48 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.LispTree; - -/** - * Just returns a fixed logical formula. - * - * @author Percy Liang - */ -public class ConstantFn extends SemanticFn { - Formula formula; // Formula to return - SemType type; - - public ConstantFn() { } - - public ConstantFn(Formula formula) { - init(LispTree.proto.newList("ConstantFn", formula.toLispTree())); - } - - public void init(LispTree tree) { - super.init(tree); - this.formula = Formulas.fromLispTree(tree.child(1)); - if (2 < tree.children.size()) - this.type = SemType.fromLispTree(tree.child(2)); - else { - this.type = TypeInference.inferType(formula); - } - if (!this.type.isValid()) - throw new RuntimeException("ConstantFn: " + formula + " does not type check"); - } - - public DerivationStream call(final Example ex, final Callable c) { - return new SingleDerivationStream() { - @Override - public Derivation createDerivation() { - Derivation res = new Derivation.Builder() - .withCallable(c) - .formula(formula) - .type(type) - .createDerivation(); - // don't generate feature if it is not grounded to a string - if (FeatureExtractor.containsDomain("constant") && c.getStart() != -1) - res.addFeature("constant", ex.phraseString(c.getStart(), c.getEnd()) + " --- " + formula.toString()); - return res; - } - }; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ContextFn.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ContextFn.java deleted file mode 100644 index c8e5f11e7b..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ContextFn.java +++ /dev/null @@ -1,123 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.util.*; -import fig.basic.*; - -/** - * Produces predicates (like LexiconFn) but do it from the logical forms - * in the context (inspects the ContextValue of the example). - * - * Takes depth, restrictType, and forbiddenTypes arguments allowing you - * to specify the depth/size and type of (formula) subtrees that you want to - * extract from the context. - * - * ONLY USE WITH TYPES!! - * - * E.g., - * - * (rule $X (context) (ContextFn (depth 0) (type fb:type.any)) - * would extract any unary/entity. - * - * (rule $X (context) (ContextFn (depth 1) (type (-> fb:type.any - * fb:type.any)) (forbidden (-> fb:type.any fb:type.something)) would - * extract all binaries except those with arg1 of type fb:type.something - * - * @author William Hamilton - */ -// TODO(Will): Reintegrate useful functionality from old implementation. -public class ContextFn extends SemanticFn { - // the depth/size of subtrees to extract - private int depth; - // the type that you want to extract - private SemType restrictType = SemType.topType; - - // set of types to not extract (overrides restrictType). - // For example, if restrict type is very general (e.g., (-> type.any type.any)) - // and you don't want some specific subtype (e.g., (-> type.something type.any)) - // then you would say specify (forbidden (-> type.something type.any)) - // and all subtypes of (-> type.any type.any) would be permissible - // except the forbidden one(s). - private Set forbiddenTypes = new HashSet(); - - public void init(LispTree tree) { - super.init(tree); - for (int i = 1; i < tree.children.size(); i++) { - LispTree arg = tree.child(i); - if ("type".equals(arg.child(0).value)) { - restrictType = SemType.fromLispTree(arg.child(1)); - } else if ("depth".equals(arg.child(0).value)) { - depth = Integer.parseInt(arg.child(1).value); - } else if ("forbidden".equals(arg.child(0).value)) { - forbiddenTypes.add(SemType.fromLispTree(arg.child(1))); - } else { - throw new RuntimeException("Unknown argument: " + arg); - } - } - } - - public DerivationStream call(final Example ex, final Callable c) { - return new MultipleDerivationStream() { - int index = 0; - List formulas; - - public Derivation createDerivation() { - if (ex.context == null) return null; - - if (formulas == null) { - formulas = new ArrayList(); - for (int i = ex.context.exchanges.size() - 1; i >= 0; i--) { - ContextValue.Exchange e = ex.context.exchanges.get(i); - extractFormulas(e.formula.toLispTree()); - } - } - if (index >= formulas.size()) return null; - Formula formula = formulas.get(index++); - for (SemType forbiddenType : forbiddenTypes) { - if (TypeInference.inferType(formula).meet(forbiddenType).isValid()) - return null; - } - return new Derivation.Builder() - .withCallable(c) - .formula(formula) - .type(TypeInference.inferType(formula)) - .createDerivation(); - } - - private void addFormula(Formula formula) { - if (formulas.contains(formula)) - return; - formulas.add(formula); - } - - // Extract from the logical form. - private void extractFormulas(LispTree formula) { - if (correctDepth(formula, 0) && typeCheck(formula)) { - addFormula(Formulas.fromLispTree(formula)); - } - if (formula.isLeaf()) - return; - for (LispTree child : formula.children) - extractFormulas(child); - } - - private boolean correctDepth(LispTree formula, int currentLevel) { - if (formula.isLeaf()) { - return currentLevel == depth; - } else { - boolean isCorrect = true; - for (LispTree child : formula.children) - isCorrect = isCorrect && correctDepth(child, currentLevel + 1); - return isCorrect; - } - } - - private boolean typeCheck(LispTree treeFormula) { - Formula formula = Formulas.fromLispTree(treeFormula); - SemType type = TypeInference.inferType(formula); - type = restrictType.meet(type); - return type.isValid(); - } - - }; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ContextValue.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ContextValue.java deleted file mode 100644 index bf7238347d..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ContextValue.java +++ /dev/null @@ -1,147 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonValue; -import fig.basic.LispTree; -import java.util.*; - -/** - * Represents the discourse context (time, place, history of exchanges). - * This is part of an Example and used by ContextFn. - * - * @author Percy Liang - */ -public class ContextValue extends Value { - // A single exchange between the user and the system - // Note: we are not storing the entire derivation right now. - public static class Exchange { - public final String utterance; - public final Formula formula; - public final Value value; - public Exchange(String utterance, Formula formula, Value value) { - this.utterance = utterance; - this.formula = formula; - this.value = value; - } - public Exchange(LispTree tree) { - utterance = tree.child(1).value; - formula = Formulas.fromLispTree(tree.child(2)); - value = Values.fromLispTree(tree.child(3)); - } - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("exchange"); - tree.addChild(utterance); - tree.addChild(formula.toLispTree()); - tree.addChild(value.toLispTree()); - return tree; - } - @Override public String toString() { return toLispTree().toString(); } - } - - public final String user; - public final DateValue date; - public final List exchanges; // List of recent exchanges with the user - public final KnowledgeGraph graph; // Mini-knowledge graph that captures the context - - public ContextValue withDate(DateValue newDate) { - return new ContextValue(user, newDate, exchanges, graph); - } - - public ContextValue withNewExchange(List newExchanges) { - return new ContextValue(user, date, newExchanges, graph); - } - - public ContextValue withGraph(KnowledgeGraph newGraph) { - return new ContextValue(user, date, exchanges, newGraph); - } - - public ContextValue(String user, DateValue date, List exchanges, KnowledgeGraph graph) { - this.user = user; - this.date = date; - this.exchanges = exchanges; - this.graph = graph; - } - - public ContextValue(String user, DateValue date, List exchanges) { - this(user, date, exchanges, null); - } - - public ContextValue(KnowledgeGraph graph) { - this(null, null, new ArrayList(), graph); - } - - // Example: - // (context (user pliang) - // (date 2014 4 20) - // (exchange "when was chopin born" (!fb:people.person.date_of_birth fb:en.frederic_chopin) (date 1810 2 22)) - // (graph NaiveKnowledgeGraph ((string Obama) (string "born in") (string Hawaii)) ...)) - public ContextValue(LispTree tree) { - String user = null; - DateValue date = null; - KnowledgeGraph graph = null; - exchanges = new ArrayList(); - for (int i = 1; i < tree.children.size(); i++) { - String key = tree.child(i).child(0).value; - if (key.equals("user")) { - user = tree.child(i).child(1).value; - } else if (key.equals("date")) { - date = new DateValue(tree.child(i)); - } else if (key.equals("graph")) { - graph = KnowledgeGraph.fromLispTree(tree.child(i)); - } else if (key.equals("exchange")) { - exchanges.add(new Exchange(tree.child(i))); - } else { - throw new RuntimeException("Invalid: " + tree.child(i)); - } - } - this.user = user; - this.date = date; - this.graph = graph; - } - - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("context"); - if (user != null) - tree.addChild(LispTree.proto.newList("user", user)); - if (date != null) - tree.addChild(date.toLispTree()); - // When logging examples, logging the entire graph takes too much screen space. - // I don't think that we ever deserialize a graph from a serialized context, - // so this should be fine. - if (graph != null) - tree.addChild(graph.toShortLispTree()); - for (Exchange e : exchanges) - tree.addChild(LispTree.proto.newList("exchange", e.toLispTree())); - return tree; - } - - @Override public int hashCode() { - int hash = 0x7ed55d16; - hash = hash * 0xd3a2646c + user.hashCode(); - hash = hash * 0xd3a2646c + date.hashCode(); - hash = hash * 0xd3a2646c + exchanges.hashCode(); - hash = hash * 0xd3a2646c + graph.hashCode(); - return hash; - } - - @Override public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - ContextValue that = (ContextValue) o; - if (!this.user.equals(that.user)) return false; - if (!this.date.equals(that.date)) return false; - if (!this.exchanges.equals(that.exchanges)) return false; - if (!this.graph.equals(that.graph)) return false; - return true; - } - - @JsonValue - public String toString() { return toLispTree().toString(); } - - @JsonCreator - public static ContextValue fromString(String str) { - return new ContextValue(LispTree.proto.parseFromString(str)); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Dataset.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Dataset.java deleted file mode 100644 index 674a3113f9..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Dataset.java +++ /dev/null @@ -1,283 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.fasterxml.jackson.core.type.TypeReference; -import com.google.common.collect.Lists; - -import fig.basic.*; -import fig.exec.Execution; -import fig.prob.SampleUtils; - -import java.io.*; -import java.util.*; - -/** - * A dataset contains a set of examples, which are keyed by group (e.g., train, - * dev, test). - * - * @author Percy Liang - */ -public class Dataset { - public static class Options { - @Option(gloss = "Paths to read input files (format: :)") - public ArrayList> inPaths = new ArrayList>(); - @Option(gloss = "Maximum number of examples to read") - public ArrayList> maxExamples = new ArrayList>(); - - // Training file gets split into: - // | trainFrac --> | | <-- devFrac | - @Option(gloss = "Fraction of trainExamples (from the beginning) to keep for training") - public double trainFrac = 1; - @Option(gloss = "Fraction of trainExamples (from the end) to keep for development") - public double devFrac = 0; - @Option(gloss = "Used to randomly divide training examples") - public Random splitRandom = new Random(1); - @Option(gloss = "whether to split dev from train") - public boolean splitDevFromTrain = true; - - @Option(gloss = "Only keep examples which have at most this number of tokens") - public int maxTokens = Integer.MAX_VALUE; - - @Option(gloss = "Path to a knowledge graph that will be uploaded as global context") - public String globalGraphPath; - } - - public static Options opts = new Options(); - - // Group id -> examples in that group - private LinkedHashMap> allExamples = new LinkedHashMap>(); - - // General statistics about the examples. - private final HashSet tokenTypes = new HashSet(); - private final StatFig numTokensFig = new StatFig(); // For each example, number of tokens - - public Set groups() { return allExamples.keySet(); } - public List examples(String group) { return allExamples.get(group); } - - /** For JSON. */ - static class GroupInfo { - @JsonProperty final String group; - @JsonProperty final List examples; - String path; // Optional, used if this was read from a path. - @JsonCreator - public GroupInfo(@JsonProperty("group") String group, - @JsonProperty("examples") List examples) { - this.group = group; - this.examples = examples; - } - } - - /** For JSON. */ - @JsonProperty("groups") - public List getAllGroupInfos() { - List all = Lists.newArrayList(); - for (Map.Entry> entry : allExamples.entrySet()) - all.add(new GroupInfo(entry.getKey(), entry.getValue())); - return all; - } - - /** For JSON. */ - // Allows us to creates dataset from arbitrary JSON, not requiring a - // path from which to read. - @JsonCreator - public static Dataset fromGroupInfos(@JsonProperty("groups") List groups) { - Dataset d = new Dataset(); - d.readFromGroupInfos(groups); - return d; - } - - public void read() { - readFromPathPairs(opts.inPaths); - } - - public void readFromPathPairs(List> pathPairs) { - // Try to detect whether we need JSON. - for (Pair pathPair : pathPairs) { - if (pathPair.getSecond().endsWith(".json")) { - readJsonFromPathPairs(pathPairs); - return; - } - } - readLispTreeFromPathPairs(pathPairs); - updateGlobalContext(); - } - - private void updateGlobalContext() { - if (opts.globalGraphPath != null) { - KnowledgeGraph graph = NaiveKnowledgeGraph.fromFile(opts.globalGraphPath); - for (String group : allExamples.keySet()) { - for (Example ex : allExamples.get(group)) { - ex.setContext(new ContextValue(graph)); - } - } - } - } - - - private void readJsonFromPathPairs(List> pathPairs) { - List groups = Lists.newArrayListWithCapacity(pathPairs.size()); - for (Pair pathPair : pathPairs) { - String group = pathPair.getFirst(); - String path = pathPair.getSecond(); - List examples = Json.readValueHard( - IOUtils.openInHard(path), - new TypeReference>() { }); - GroupInfo gi = new GroupInfo(group, examples); - gi.path = path; - groups.add(gi); - } - readFromGroupInfos(groups); - } - - private void readFromGroupInfos(List groupInfos) { - LogInfo.begin_track_printAll("Dataset.read"); - - for (GroupInfo groupInfo : groupInfos) { - int maxExamples = getMaxExamplesForGroup(groupInfo.group); - List examples = allExamples.get(groupInfo.group); - if (examples == null) - allExamples.put(groupInfo.group, examples = new ArrayList()); - readHelper(groupInfo.examples, maxExamples, examples, groupInfo.path); - } - if (opts.splitDevFromTrain) splitDevFromTrain(); - collectStats(); - - LogInfo.end_track(); - } - - private void splitDevFromTrain() { - // Split original training examples randomly into train and dev. - List origTrainExamples = allExamples.get("train"); - if (origTrainExamples != null) { - int split1 = (int) (opts.trainFrac * origTrainExamples.size()); - int split2 = (int) ((1 - opts.devFrac) * origTrainExamples.size()); - int[] perm = SampleUtils.samplePermutation(opts.splitRandom, origTrainExamples.size()); - - List trainExamples = new ArrayList(); - allExamples.put("train", trainExamples); - List devExamples = allExamples.get("dev"); - if (devExamples == null) { - // Preserve order - LinkedHashMap> newAllExamples = new LinkedHashMap<>(); - for (Map.Entry> entry : allExamples.entrySet()) { - newAllExamples.put(entry.getKey(), entry.getValue()); - if (entry.getKey().equals("train")) - newAllExamples.put("dev", devExamples = new ArrayList<>()); - } - allExamples = newAllExamples; - } - for (int i = 0; i < split1; i++) - trainExamples.add(origTrainExamples.get(perm[i])); - for (int i = split2; i < origTrainExamples.size(); i++) - devExamples.add(origTrainExamples.get(perm[i])); - } - } - - private void readHelper(List incoming, - int maxExamples, - List examples, - String path) { - if (examples.size() >= maxExamples) - return; - - int i = 0; - for (Example ex : incoming) { - if (examples.size() >= maxExamples) break; - - if (ex.id == null) { - String id = (path != null ? path : "") + ":" + i; - ex = new Example.Builder().withExample(ex).setId(id).createExample(); - } - i++; - ex.preprocess(); - - // Skip example if too long - if (ex.numTokens() > opts.maxTokens) continue; - - LogInfo.logs("Example %s (%d): %s => %s", - ex.id, examples.size(), ex.getTokens(), ex.targetValue); - - examples.add(ex); - numTokensFig.add(ex.numTokens()); - for (String token : ex.getTokens()) tokenTypes.add(token); - } - } - - private void readLispTreeFromPathPairs(List> pathPairs) { - LogInfo.begin_track_printAll("Dataset.read"); - for (Pair pathPair : pathPairs) { - String group = pathPair.getFirst(); - String path = pathPair.getSecond(); - int maxExamples = getMaxExamplesForGroup(group); - List examples = allExamples.get(group); - if (examples == null) - allExamples.put(group, examples = new ArrayList()); - readLispTreeHelper(path, maxExamples, examples); - } - if (opts.splitDevFromTrain) splitDevFromTrain(); - LogInfo.end_track(); - } - - private void readLispTreeHelper(String path, int maxExamples, List examples) { - if (examples.size() >= maxExamples) return; - LogInfo.begin_track("Reading %s", path); - - Iterator trees = LispTree.proto.parseFromFile(path); - int n = 0; - while (examples.size() < maxExamples && trees.hasNext()) { - // Format: (example (id ...) (utterance ...) (targetFormula ...) (targetValue ...)) - LispTree tree = trees.next(); - if (tree.children.size() < 2 || !"example".equals(tree.child(0).value)) { - if ("metadata".equals(tree.child(0).value)) continue; - throw new RuntimeException("Invalid example: " + tree); - } - - Example ex = Example.fromLispTree(tree, path + ":" + n); // Specify a default id if it doesn't exist - n++; - ex.preprocess(); - - // Skip example if too long - if (ex.numTokens() > opts.maxTokens) continue; - - LogInfo.logs("Example %s (%d): %s => %s", ex.id, examples.size(), ex.getTokens(), ex.targetValue); - - examples.add(ex); - numTokensFig.add(ex.numTokens()); - for (String token : ex.getTokens()) tokenTypes.add(token); - } - LogInfo.end_track(); - } - - private void collectStats() { - LogInfo.begin_track_printAll("Dataset stats"); - Execution.putLogRec("numTokenTypes", tokenTypes.size()); - Execution.putLogRec("numTokensPerExample", numTokensFig); - for (Map.Entry> e : allExamples.entrySet()) - Execution.putLogRec("numExamples." + e.getKey(), e.getValue().size()); - LogInfo.end_track(); - } - - public static int getMaxExamplesForGroup(String group) { - int maxExamples = Integer.MAX_VALUE; - for (Pair maxPair : opts.maxExamples) - if (maxPair.getFirst().equals(group)) - maxExamples = maxPair.getSecond(); - return maxExamples; - } - - public static void appendExampleToFile(String path, Example ex) { - // JSON is an annoying format because we can't just append. - // So currently we have to read the entire file in and write it out. - List examples; - if (new File(path).exists()) { - examples = Json.readValueHard( - IOUtils.openInHard(path), - new TypeReference>() { }); - } else { - examples = new ArrayList(); - } - examples.add(ex); - Json.prettyWriteValueHard(new File(path), examples); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/DateFn.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/DateFn.java deleted file mode 100644 index 1113dade99..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/DateFn.java +++ /dev/null @@ -1,27 +0,0 @@ -package edu.stanford.nlp.sempre; - -/** - * Maps a string to a Date. - * - * @author Percy Liang - */ -public class DateFn extends SemanticFn { - public DerivationStream call(final Example ex, final Callable c) { - return new SingleDerivationStream() { - @Override - public Derivation createDerivation() { - String value = ex.languageInfo.getNormalizedNerSpan("DATE", c.getStart(), c.getEnd()); - if (value == null) - return null; - DateValue dateValue = DateValue.parseDateValue(value); - if (dateValue == null) - return null; - return new Derivation.Builder() - .withCallable(c) - .formula(new ValueFormula<>(dateValue)) - .type(SemType.dateType) - .createDerivation(); - } - }; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/DateRangeFn.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/DateRangeFn.java deleted file mode 100644 index eddc8ceb41..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/DateRangeFn.java +++ /dev/null @@ -1,64 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - * Parse strings representing date ranges such as "20th century". - * The result has the form (and (< ...) (>= ...)). - * - * Currently only supports patterns like "1990's" and "1800s" - * - * @author ppasupat - */ -public class DateRangeFn extends SemanticFn { - - @Override - public DerivationStream call(Example ex, Callable c) { - return new LazyDateRangeFnDerivs(ex, c); - } - - public static class LazyDateRangeFnDerivs extends MultipleDerivationStream { - Example ex; - Callable c; - - int index = 0; - List formulas; - - public LazyDateRangeFnDerivs(Example ex, Callable c) { - this.ex = ex; - this.c = c; - } - - @Override - public Derivation createDerivation() { - if (formulas == null) - populateFormulas(); - - if (index >= formulas.size()) return null; - Formula formula = formulas.get(index++); - - return new Derivation.Builder().withCallable(c) - .formula(formula).type(SemType.numberType).createDerivation(); - } - - private static final Pattern YEAR_RANGE = Pattern.compile("^(\\d+0+)\\s*'?s$"); - - // TODO: Handle more cases - private void populateFormulas() { - formulas = new ArrayList<>(); - String query = c.childStringValue(0); - Matcher matcher = YEAR_RANGE.matcher(query); - if (!matcher.matches()) return; - int year = Integer.parseInt(matcher.group(1)), range = 10; - while (year % range == 0) { - // Put "<" before ">=" to keep the children of MergeFormula sorted - formulas.add(new MergeFormula(MergeFormula.Mode.and, - new JoinFormula(new ValueFormula(new NameValue("<")), new ValueFormula<>(new NumberValue(year + range))), - new JoinFormula(new ValueFormula(new NameValue(">=")), new ValueFormula<>(new NumberValue(year))))); - range *= 10; - } - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/DateValue.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/DateValue.java deleted file mode 100644 index 3aa71f92ce..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/DateValue.java +++ /dev/null @@ -1,108 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.LispTree; -import java.util.Calendar; - -public class DateValue extends Value { - public final int year; - public final int month; - public final int day; - - // Format: YYYY-MM-DD (from Freebase). - // Return null if it's not a valid date string. - public static DateValue parseDateValue(String dateStr) { - if (dateStr.equals("PRESENT_REF")) return null; - if (dateStr.startsWith("OFFSET")) return null; - - // We don't handle the following things: - // - "30 A.D" since its value is "+0030" - // - "Dec 20, 2009 10:04am" since its value is "2009-12-20T10:04" - int year = -1, month = -1, day = -1; - boolean isBC = dateStr.startsWith("-"); - if (isBC) dateStr = dateStr.substring(1); - - // Ignore time - int t = dateStr.indexOf('T'); - if (t != -1) dateStr = dateStr.substring(0, t); - - String[] dateParts; - - if (dateStr.indexOf('T') != -1) - dateStr = dateStr.substring(0, dateStr.indexOf('T')); - - dateParts = dateStr.split("-"); - if (dateParts.length > 3) - throw new RuntimeException("Date has more than 3 parts: " + dateStr); - - if (dateParts.length >= 1) year = parseIntRobust(dateParts[0]) * (isBC ? -1 : 1); - if (dateParts.length >= 2) month = parseIntRobust(dateParts[1]); - if (dateParts.length >= 3) day = parseIntRobust(dateParts[2]); - - return new DateValue(year, month, day); - } - - private static int parseIntRobust(String i) { - int val; - try { - val = Integer.parseInt(i); - } catch (NumberFormatException ex) { - val = -1; - } - return val; - } - - public static DateValue now() { - Calendar cal = Calendar.getInstance(); - int year = cal.get(Calendar.YEAR); - int month = cal.get(Calendar.MONTH); - int day = cal.get(Calendar.DAY_OF_MONTH); - return new DateValue(year, month, day); - } - - public DateValue(int year, int month, int day) { - this.year = year; - this.month = month; - this.day = day; - } - - public DateValue(LispTree tree) { - this.year = Integer.valueOf(tree.child(1).value); - this.month = Integer.valueOf(tree.child(2).value); - this.day = Integer.valueOf(tree.child(3).value); - } - - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("date"); - tree.addChild(String.valueOf(year)); - tree.addChild(String.valueOf(month)); - tree.addChild(String.valueOf(day)); - return tree; - } - - @Override public String sortString() { return "" + year + "/" + month + "/" + day; } - public String isoString() { - return "" + (year == -1 ? "xxxx" : String.format("%04d", year)) - + "-" + (month == -1 ? "xx" : String.format("%02d", month)) - + "-" + (day == -1 ? "xx" : String.format("%02d", day)); - } - @Override public String pureString() { return isoString(); } - - @Override public int hashCode() { - int hash = 0x7ed55d16; - hash = hash * 0xd3a2646c + year; - hash = hash * 0xd3a2646c + month; - hash = hash * 0xd3a2646c + day; - return hash; - } - - @Override public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - DateValue that = (DateValue) o; - if (this.year != that.year) return false; - if (this.month != that.month) return false; - if (this.day != that.day) return false; - return true; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/DefaultDerivationPruningComputer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/DefaultDerivationPruningComputer.java deleted file mode 100644 index d47b5196a3..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/DefaultDerivationPruningComputer.java +++ /dev/null @@ -1,161 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.util.*; - -import fig.basic.LispTree; -import fig.basic.Option; - -/** - * Common pruning strategies that can be used in many semantic parsing tasks. - * - * @author ppasupat - */ -public class DefaultDerivationPruningComputer extends DerivationPruningComputer { - public static class Options { - @Option(gloss = "(for badSummarizerHead) allow count on sets of size 1") - public boolean allowCountOne = false; - } - public static Options opts = new Options(); - - public DefaultDerivationPruningComputer(DerivationPruner pruner) { - super(pruner); - } - - public static final String atomic = "atomic"; - public static final String emptyDenotation = "emptyDenotation"; - public static final String nonLambdaError = "nonLambdaError"; - public static final String tooManyValues = "tooManyValues"; - public static final String doubleSummarizers = "doubleSummarizers"; - public static final String multipleSuperlatives = "multipleSuperlatives"; - public static final String sameMerge = "sameMerge"; - public static final String mistypedMerge = "mistypedMerge"; - public static final String unsortedMerge = "unsortedMerge"; - public static final String badSummarizerHead = "badSummarizerHead"; - - @Override - public Collection getAllStrategyNames() { - return Arrays.asList( - atomic, - emptyDenotation, nonLambdaError, tooManyValues, - doubleSummarizers, multipleSuperlatives, - sameMerge, mistypedMerge, unsortedMerge, badSummarizerHead); - } - - @Override - public String isPruned(Derivation deriv) { - // atomic: Prune atomic formula at root. - // e.g., Prevent "Who was taller, Lincoln or Obama" --> fb:en.lincoln generated from lexicon without any computation - if (containsStrategy(atomic)) { - if (deriv.isRoot(ex.numTokens()) && deriv.formula instanceof ValueFormula) - return atomic; - } - // emptyDenotation: Prune if the denotation is empty - if (containsStrategy(emptyDenotation)) { - deriv.ensureExecuted(parser.executor, ex.context); - if (deriv.value instanceof ListValue && ((ListValue) deriv.value).values.isEmpty()) - return emptyDenotation; - } - // nonLambdaError: Prune if the denotation is an error and the formula is not a partial formula - if (containsStrategy(nonLambdaError) && !isLambdaFormula(deriv.formula)) { - deriv.ensureExecuted(parser.executor, ex.context); - if (deriv.value instanceof ErrorValue) - return nonLambdaError; - } - // tooManyValues: Prune if the denotation has too many values (at $ROOT only) - if (containsStrategy(tooManyValues) && deriv.isRoot(ex.numTokens())) { - if (!(deriv.value instanceof ListValue) || - ((ListValue) deriv.value).values.size() > DerivationPruner.opts.maxNumValues) - return tooManyValues; - } - // doubleSummarizers: Prune when two summarizers (aggregate or superlative) are directly nested - // e.g., in (sum (avg ...)) and (min (argmax ...)), the outer operation is redundant - if (containsStrategy(doubleSummarizers)) { - Formula innerFormula = null; - if (deriv.formula instanceof SuperlativeFormula) - innerFormula = ((SuperlativeFormula) deriv.formula).head; - else if (deriv.formula instanceof AggregateFormula) - innerFormula = ((AggregateFormula) deriv.formula).child; - if (innerFormula != null && - (innerFormula instanceof SuperlativeFormula || innerFormula instanceof AggregateFormula)) - return doubleSummarizers; - } - // multipleSuperlatives: Prune when more than one superlatives are used - // (don't need to be adjacent) - if (containsStrategy(multipleSuperlatives)) { - List stack = new ArrayList<>(); - int count = 0; - stack.add(deriv.formula.toLispTree()); - while (!stack.isEmpty()) { - LispTree tree = stack.remove(stack.size() - 1); - if (tree.isLeaf()) { - if ("argmax".equals(tree.value) || "argmin".equals(tree.value)) { - count++; - if (count >= 2) - return multipleSuperlatives; - } - } else { - for (LispTree subtree : tree.children) - stack.add(subtree); - } - } - } - // sameMerge: Prune merge formulas with two identical children - if (containsStrategy(sameMerge) && deriv.formula instanceof MergeFormula) { - MergeFormula merge = (MergeFormula) deriv.formula; - if (merge.child1.equals(merge.child2)) - return sameMerge; - } - // mistypedMerge: Prune merge formulas with children of different types - if (containsStrategy(mistypedMerge) && deriv.formula instanceof MergeFormula) { - MergeFormula merge = (MergeFormula) deriv.formula; - SemType type1 = TypeInference.inferType(merge.child1, true); - SemType type2 = TypeInference.inferType(merge.child2, true); - if (!type1.meet(type2).isValid()) - return mistypedMerge; - } - // unsortedMerge: Prune merge formulas where the children's string forms are not lexicographically sorted. - // Will remove redundant (and Y X) when (and Y X) is already present. - if (containsStrategy(unsortedMerge) && deriv.formula instanceof MergeFormula) { - MergeFormula merge = (MergeFormula) deriv.formula; - String child1 = merge.child1.toString(), child2 = merge.child2.toString(); - if (child1.compareTo(child2) >= 0) - return unsortedMerge; - } - // badSummarizerHead: Prune if the head of a superlative or a non-count aggregate - // is empty or is a single object - if (containsStrategy(badSummarizerHead)) { - Formula innerFormula = null; - boolean isCount = false; - if (deriv.formula instanceof SuperlativeFormula) - innerFormula = ((SuperlativeFormula) deriv.formula).head; - else if (deriv.formula instanceof AggregateFormula) { - innerFormula = ((AggregateFormula) deriv.formula).child; - if (((AggregateFormula) deriv.formula).mode == AggregateFormula.Mode.count) - isCount = true; - } - if (innerFormula != null) { - try { - TypeInference.inferType(innerFormula); - Value innerValue = parser.executor.execute(innerFormula, ex.context).value; - if (innerValue instanceof ListValue) { - int size = ((ListValue) innerValue).values.size(); - if (size == 0 || (size == 1 && !(opts.allowCountOne && isCount))) - return badSummarizerHead; - } - } catch (Exception e) { - // TypeInference fails; probably because of free variables. No need to do anything. - } - } - } - return null; - } - - // Helper function: return true if the result is clearly a binary - private boolean isLambdaFormula(Formula formula) { - if (formula instanceof LambdaFormula) return true; - if (formula instanceof ValueFormula && - CanonicalNames.isBinary(((ValueFormula) formula).value)) return true; - return false; - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/DerivOpCountFeatureComputer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/DerivOpCountFeatureComputer.java deleted file mode 100644 index ed1b110774..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/DerivOpCountFeatureComputer.java +++ /dev/null @@ -1,58 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.util.*; - -import com.google.common.collect.Sets; - -import fig.basic.*; - -/** - * Extracts indicator features that count how many times semantic functions and - * LHSs have been used in the derivation For now we count how many times MergeFn - * and JoinFn, also how many time unary, binary and entity lexical entries have - * been used. The feature is a pair with the operation and the count - * - * @author jonathanberant - */ -public class DerivOpCountFeatureComputer implements FeatureComputer { - public static class Options { - @Option(gloss = "Count only basic categories and SemanticFns") - public boolean countBasicOnly = true; - } - public static Options opts = new Options(); - - public static final String entityCat = "$Entity"; - public static final String unaryCat = "$Unary"; - public static final String binaryCat = "$Binary"; - public static final String joinFn = "JoinFn"; - public static final String mergeFn = "MergeFn"; - public static final String bridgeFn = "BridgeFn"; - public static Set featureNames = Sets.newHashSet(entityCat, unaryCat, binaryCat, joinFn, mergeFn, bridgeFn); - - @Override - public void extractLocal(Example ex, Derivation deriv) { - if (!FeatureExtractor.containsDomain("opCount")) return; - if (!deriv.isRoot(ex.numTokens())) return; - - // extract the operation count - Map opCounter = new HashMap<>(); - extractOperationsRecurse(deriv, opCounter); - addFeatures(deriv, opCounter); - } - - private void extractOperationsRecurse(Derivation deriv, Map opCounter) { - // Basic case: no rule - if (deriv.children.isEmpty()) return; - // increment counts for current rule - MapUtils.incr(opCounter, deriv.rule.lhs); - MapUtils.incr(opCounter, deriv.rule.sem.getClass().getSimpleName()); - // recursive call - for (Derivation child : deriv.children) - extractOperationsRecurse(child, opCounter); - } - - private void addFeatures(Derivation deriv, Map opCounter) { - for (String feature : (opts.countBasicOnly ? featureNames : opCounter.keySet())) - deriv.addFeature("opCount", "count(" + feature + ")=" + MapUtils.get(opCounter, feature, 0)); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Derivation.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Derivation.java deleted file mode 100644 index 7845b84292..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Derivation.java +++ /dev/null @@ -1,579 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.util.*; - -import fig.basic.*; - -/** - * A Derivation corresponds to the production of a (partial) logical form - * |formula| from a span of the utterance [start, end). Contains the formula and - * what was used to produce it (like a search state). Each derivation is created - * by a grammar rule and has some features and a score. - * - * @author Percy Liang - */ -public class Derivation implements SemanticFn.Callable, HasScore { - public static class Options { - @Option(gloss = "When printing derivations, to show values (could be quite verbose)") - public boolean showValues = true; - @Option(gloss = "When printing derivations, to show the first value (ignored when showValues is set)") - public boolean showFirstValue = false; - @Option(gloss = "When printing derivations, to show types") - public boolean showTypes = true; - @Option(gloss = "When printing derivations, to show rules") - public boolean showRules = false; - @Option(gloss = "When printing derivations, to show canonical utterance") - public boolean showUtterance = false; - @Option(gloss = "When printing derivations, show the category") - public boolean showCat = false; - @Option(gloss = "When executing, show formulae (for debugging)") - public boolean showExecutions = false; - @Option(gloss = "Pick the comparator used to sort derivations") - public String derivComparator = "ScoredDerivationComparator"; - @Option(gloss = "bonus score for being all anchored") - public double anchoredBonus = 0.0; - } - - public static Options opts = new Options(); - - //// Basic fields: created by the constructor. - - // Span that the derivation is built over - public final String cat; - public final int start; - public final int end; - - // Floating cell information - // TODO(yushi): make fields final - public String canonicalUtterance; - public boolean allAnchored = true; - private int[] numAnchors; // Number of times each token was anchored - - /** - * Information for grammar induction. - * For each descendant derivation of the body, this class tracks where and what in the head it matches - * GrammarInfo.start, GrammarInfo.end refer to matching positions in the head, as opposed to the body - * @author sidaw - **/ - public class GrammarInfo { - public boolean anchored = false; - public boolean matched = false; - public int start = -1, end = -1; - public Formula formula; - public List matches = new ArrayList<>(); - } - public GrammarInfo grammarInfo = new GrammarInfo(); - - - // If this derivation is composed of other derivations - public final Rule rule; // Which rule was used to produce this derivation? Set to nullRule if not. - public final List children; // Corresponds to the RHS of the rule. - - //// SemanticFn fields: read/written by SemanticFn. - // Note: SemanticFn should only depend on Formula and the Freebase type - // information. This could be its own class, but expose more right now to - // be more flexible. - - public final Formula formula; // Logical form produced by this derivation - public final SemType type; // Type corresponding to that logical form - - //// Fields produced by feature extractor, evaluation, etc. - - private List localChoices; // Just for printing/debugging. - - // TODO(pliang): make fields private - - // Information for scoring - private final FeatureVector localFeatureVector; // Features - double score = Double.NaN; // Weighted combination of features - double prob = Double.NaN; // Probability (normalized exp of score). - - // Used during parsing (by FeatureExtractor, SemanticFn) to cache arbitrary - // computation across different sub-Derivations. - // Convention: - // - use the featureDomain, FeatureComputer or SemanticFn as the key. - // - the value is whatever the FeatureExtractor needs. - // This information should be set to null after parsing is done. - private Map tempState; - - // What the formula evaluates to (optionally set later; only non-null for the root Derivation) - public Value value; - public Evaluation executorStats; - - // Number in [0, 1] denoting how correct the value is. - public double compatibility = Double.NaN; - - // Miscellaneous statistics - int maxBeamPosition = -1; // Lowest position that this tree or any of its children is on the beam (after sorting) - int maxUnsortedBeamPosition = -1; // Lowest position that this tree or any of its children is on the beam (before sorting) - int preSortBeamPosition = -1; - int postSortBeamPosition = -1; - - // Cache the hash code - int hashCode = -1; - - // Each derivation that gets created gets a unique ID in increasing order so that - // we can break ties consistently for reproducible results. - long creationIndex; - public static long numCreated = 0; // Incremented for each derivation we create. - @SuppressWarnings("unchecked") - public static final Comparator derivScoreComparator = - (Comparator)Utils.newInstanceHard(SempreUtils.resolveClassName("Derivation$" + opts.derivComparator)); - - public static final List emptyList = Collections.emptyList(); - - // A Derivation is built from - - /** Builder for everyone. */ - public static class Builder { - private String cat; - private int start; - private int end; - private Rule rule; - private List children; - private Formula formula; - private SemType type; - private FeatureVector localFeatureVector = new FeatureVector(); - private double score = Double.NaN; - private Value value; - private Evaluation executorStats; - private double compatibility = Double.NaN; - private double prob = Double.NaN; - private String canonicalUtterance = ""; - - public Builder cat(String cat) { this.cat = cat; return this; } - public Builder start(int start) { this.start = start; return this; } - public Builder end(int end) { this.end = end; return this; } - public Builder rule(Rule rule) { this.rule = rule; return this; } - public Builder children(List children) { this.children = children; return this; } - public Builder formula(Formula formula) { this.formula = formula; return this; } - public Builder type(SemType type) { this.type = type; return this; } - public Builder localFeatureVector(FeatureVector localFeatureVector) { this.localFeatureVector = localFeatureVector; return this; } - public Builder score(double score) { this.score = score; return this; } - public Builder value(Value value) { this.value = value; return this; } - public Builder executorStats(Evaluation executorStats) { this.executorStats = executorStats; return this; } - public Builder compatibility(double compatibility) { this.compatibility = compatibility; return this; } - public Builder prob(double prob) { this.prob = prob; return this; } - public Builder canonicalUtterance(String canonicalUtterance) { this.canonicalUtterance = canonicalUtterance; return this; } - - public Builder withStringFormulaFrom(String value) { - this.formula = new ValueFormula<>(new StringValue(value)); - this.type = SemType.stringType; - return this; - } - public Builder withFormulaFrom(Derivation deriv) { - this.formula = deriv.formula; - this.type = deriv.type; - return this; - } - - public Builder withCallable(SemanticFn.Callable c) { - this.cat = c.getCat(); - this.start = c.getStart(); - this.end = c.getEnd(); - this.rule = c.getRule(); - this.children = c.getChildren(); - return this; - } - - public Builder withAllFrom(Derivation deriv) { - this.cat = deriv.cat; - this.start = deriv.start; - this.end = deriv.end; - this.rule = deriv.rule; - this.children = deriv.children == null ? null : new ArrayList<>(deriv.children); - this.formula = deriv.formula; - this.type = deriv.type; - this.localFeatureVector = deriv.localFeatureVector; - this.score = deriv.score; - this.value = deriv.value; - this.executorStats = deriv.executorStats; - this.compatibility = deriv.compatibility; - this.prob = deriv.prob; - this.canonicalUtterance = deriv.canonicalUtterance; - return this; - } - - public Derivation createDerivation() { - return new Derivation( - cat, start, end, rule, children, formula, type, - localFeatureVector, score, value, executorStats, compatibility, prob, - canonicalUtterance); - } - } - - Derivation(String cat, int start, int end, Rule rule, List children, Formula formula, SemType type, - FeatureVector localFeatureVector, double score, Value value, Evaluation executorStats, double compatibility, double prob, - String canonicalUtterance) { - this.cat = cat; - this.start = start; - this.end = end; - this.rule = rule; - this.children = children; - this.formula = formula; - this.type = type; - this.localFeatureVector = localFeatureVector; - this.score = score; - this.value = value; - this.executorStats = executorStats; - this.compatibility = compatibility; - this.prob = prob; - this.canonicalUtterance = canonicalUtterance; - this.creationIndex = numCreated++; - } - - public Formula getFormula() { return formula; } - public double getScore() { return score; } - public double getProb() { return prob; } - public double getCompatibility() { return compatibility; } - public List getChildren() { return children; } - public Value getValue() { return value; } - - public boolean isFeaturizedAndScored() { return !Double.isNaN(score); } - public boolean isExecuted() { return value != null; } - public int getMaxBeamPosition() { return maxBeamPosition; } - public String getCat() { return cat; } - public int getStart() { return start; } - public int getEnd() { return end; } - public boolean containsIndex(int i) { return i < end && i >= start; } - public Rule getRule() { return rule; } - public Evaluation getExecutorStats() { return executorStats; } - public FeatureVector getLocalFeatureVector() { return localFeatureVector; } - - public Derivation child(int i) { return children.get(i); } - public String childStringValue(int i) { - return Formulas.getString(children.get(i).formula); - } - - // Return whether |deriv| is built over the root Derivation. - public boolean isRoot(int numTokens) { - return cat.equals(Rule.rootCat) && ((start == 0 && end == numTokens) || (start == -1)); - } - - // Return whether |deriv| has root category (for floating parser) - public boolean isRootCat() { - return cat.equals(Rule.rootCat); - } - - // Functions that operate on features. - public void addFeature(String domain, String name) { addFeature(domain, name, 1); } - public void addFeature(String domain, String name, double value) { this.localFeatureVector.add(domain, name, value); } - public void addHistogramFeature(String domain, String name, double value, - int initBinSize, int numBins, boolean exp) { - this.localFeatureVector.addHistogram(domain, name, value, initBinSize, numBins, exp); - } - public void addFeatureWithBias(String domain, String name, double value) { this.localFeatureVector.addWithBias(domain, name, value); } - public void addFeatures(FeatureVector fv) { this.localFeatureVector.add(fv); } - - public double localScore(Params params) { - return localFeatureVector.dotProduct(params) + (this.allAnchored()? opts.anchoredBonus : 0.0); - } - - // SHOULD NOT BE USED except during test time if the memory is desperately needed. - public void clearFeatures() { - localFeatureVector.clear(); - } - - /** - * Recursively compute the score for each node in derivation. Update |score| - * field as well as return its value. - */ - public double computeScore(Params params) { - score = localScore(params); - if (children != null) - for (Derivation child : children) - score += child.computeScore(params); - return score; - } - - /** - * Same as |computeScore()| but without recursion (assumes children are - * already scored). - */ - public double computeScoreLocal(Params params) { - score = localScore(params); - if (children != null) - for (Derivation child : children) - score += child.score; - return score; - } - - // If we haven't executed the formula associated with this derivation, then - // execute it! - public void ensureExecuted(Executor executor, ContextValue context) { - if (isExecuted()) return; - StopWatchSet.begin("Executor.execute"); - if (opts.showExecutions) - LogInfo.logs("%s - %s", canonicalUtterance, formula); - Executor.Response response = executor.execute(formula, context); - StopWatchSet.end(); - value = response.value; - executorStats = response.stats; - } - - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("derivation"); - if (formula != null) - tree.addChild(LispTree.proto.newList("formula", formula.toLispTree())); - if (value != null) { - if (opts.showValues) - tree.addChild(LispTree.proto.newList("value", value.toLispTree())); - else if (value instanceof ListValue) { - List values = ((ListValue) value).values; - if (opts.showFirstValue && values.size() > 0) { - tree.addChild(LispTree.proto.newList(values.size() + " values", values.get(0).toLispTree())); - } else { - tree.addChild(values.size() + " values"); - } - } - - } - if (type != null && opts.showTypes) - tree.addChild(LispTree.proto.newList("type", type.toLispTree())); - if (opts.showRules) { - if (rule != null) tree.addChild(getRuleLispTree()); - } - if (opts.showUtterance && canonicalUtterance != null) { - tree.addChild(LispTree.proto.newList("canonicalUtterance", canonicalUtterance)); - } - if (opts.showCat && cat != null) { - tree.addChild(LispTree.proto.newList("cat", cat)); - } - return tree; - } - - /** - * @return lisp tree showing the entire parse tree - */ - public LispTree toRecursiveLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("derivation"); - tree.addChild(LispTree.proto.newList("span", cat + "[" + start + ":" + end + "]")); - if (formula != null) - tree.addChild(LispTree.proto.newList("formula", formula.toLispTree())); - for (Derivation child : children) - tree.addChild(child.toRecursiveLispTree()); - return tree; - } - - public String toRecursiveString() { - return toRecursiveLispTree().toString(); - } - - // TODO(pliang): remove this in favor of localChoices - private LispTree getRuleLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("rules"); - getRuleLispTreeRecurs(tree); - return tree; - } - private void getRuleLispTreeRecurs(LispTree tree) { - if (children.size() > 0) { - tree.addChild(LispTree.proto.newList("rule", rule.toLispTree())); - for (Derivation child : children) { - child.getRuleLispTreeRecurs(tree); - } - } - } - - public String startEndString(List tokens) { - return start + ":" + end + (start == -1 ? "" : tokens.subList(start, end)); - } - public String toString() { return toLispTree().toString(); } - - public void incrementLocalFeatureVector(double factor, Map map) { - localFeatureVector.increment(factor, map, AllFeatureMatcher.matcher); - } - public void incrementAllFeatureVector(double factor, Map map) { - incrementAllFeatureVector(factor, map, AllFeatureMatcher.matcher); - } - public void incrementAllFeatureVector(double factor, Map map, FeatureMatcher updateFeatureMatcher) { - localFeatureVector.increment(factor, map, updateFeatureMatcher); - for (Derivation child : children) - child.incrementAllFeatureVector(factor, map, updateFeatureMatcher); - } - public void incrementAllFeatureVector(double factor, FeatureVector fv) { - localFeatureVector.add(factor, fv); - for (Derivation child : children) - child.incrementAllFeatureVector(factor, fv); - } - - // returns feature vector with renamed features by prefix - public FeatureVector addPrefixLocalFeatureVector(String prefix) { - return localFeatureVector.addPrefix(prefix); - } - - public Map getAllFeatureVector() { - Map m = new HashMap<>(); - incrementAllFeatureVector(1.0d, m, AllFeatureMatcher.matcher); - return m; - } - - // TODO(pliang): this is crazy inefficient - public double getAllFeatureVector(String featureName) { - Map m = new HashMap<>(); - incrementAllFeatureVector(1.0d, m, new ExactFeatureMatcher(featureName)); - return MapUtils.get(m, featureName, 0.0); - } - - public void addLocalChoice(String choice) { - if (localChoices == null) - localChoices = new ArrayList(); - localChoices.add(choice); - } - - public void incrementAllChoices(int factor, Map map) { - if (opts.showRules) - MapUtils.incr(map, "[" + start + ":" + end + "] " + rule.toString(), 1); - if (localChoices != null) { - for (String choice : localChoices) - MapUtils.incr(map, choice, factor); - } - for (Derivation child : children) - child.incrementAllChoices(factor, map); - } - - // Used to compare derivations by score. - public static class ScoredDerivationComparator implements Comparator { - @Override - public int compare(Derivation deriv1, Derivation deriv2) { - if (deriv1.score > deriv2.score) return -1; - if (deriv1.score < deriv2.score) return +1; - // Ensure reproducible randomness - if (deriv1.creationIndex < deriv2.creationIndex) return -1; - if (deriv1.creationIndex > deriv2.creationIndex) return +1; - return 0; - } - } - - // Used to compare derivations by compatibility. - public static class CompatibilityDerivationComparator implements Comparator { - @Override - public int compare(Derivation deriv1, Derivation deriv2) { - if (deriv1.compatibility > deriv2.compatibility) return -1; - if (deriv1.compatibility < deriv2.compatibility) return +1; - // Ensure reproducible randomness - if (deriv1.creationIndex < deriv2.creationIndex) return -1; - if (deriv1.creationIndex > deriv2.creationIndex) return +1; - return 0; - } - } - - //Used to compare derivations by score, prioritizing the fully anchored. - public static class AnchorPriorityScoreComparator implements Comparator { - @Override - public int compare(Derivation deriv1, Derivation deriv2) { - boolean deriv1Core = deriv1.allAnchored(); - boolean deriv2Core = deriv2.allAnchored(); - - if (deriv1Core && !deriv2Core) return -1; - if (deriv2Core && !deriv1Core) return +1; - - if (deriv1.score > deriv2.score) return -1; - if (deriv1.score < deriv2.score) return +1; - // Ensure reproducible randomness - if (deriv1.creationIndex < deriv2.creationIndex) return -1; - if (deriv1.creationIndex > deriv2.creationIndex) return +1; - return 0; - } - } - - // for debugging - public void printDerivationRecursively() { - LogInfo.logs("Deriv: %s(%s,%s) %s", cat, start, end, formula); - for (int i = 0; i < children.size(); i++) { - LogInfo.begin_track("child %s:", i); - children.get(i).printDerivationRecursively(); - LogInfo.end_track(); - } - } - - public static void sortByScore(List trees) { - Collections.sort(trees, derivScoreComparator); - } - - // Generate a probability distribution over derivations given their scores. - public static double[] getProbs(List derivations, double temperature) { - double[] probs = new double[derivations.size()]; - for (int i = 0; i < derivations.size(); i++) - probs[i] = derivations.get(i).getScore() / temperature; - if (probs.length > 0) - NumUtils.expNormalize(probs); - return probs; - } - - // Manipulation of temporary state used during parsing. - public Map getTempState() { - // Create the tempState if it doesn't exist. - if (tempState == null) - tempState = new HashMap(); - return tempState; - } - public void clearTempState() { - tempState = null; - if (children != null) - for (Derivation child : children) - child.clearTempState(); - } - - /** - * Return an int array numAnchors where numAnchors[i] is - * the number of times we anchored on token i. - * - * numAnchors[>= numAnchors.length] are 0 by default. - */ - public int[] getNumAnchors() { - if (numAnchors == null) { - if (rule.isAnchored()) { - numAnchors = new int[end]; - for (int i = start; i < end; i++) numAnchors[i] = 1; - } else { - numAnchors = new int[0]; - for (Derivation child : children) { - int[] childNumAnchors = child.getNumAnchors(); - if (numAnchors.length < childNumAnchors.length) { - int[] newNumAnchors = new int[childNumAnchors.length]; - for (int i = 0; i < numAnchors.length; i++) - newNumAnchors[i] = numAnchors[i]; - numAnchors = newNumAnchors; - } - for (int i = 0; i < childNumAnchors.length; i++) - numAnchors[i] += childNumAnchors[i]; - } - } - } - return numAnchors; - } - - /** - * Return a boolean array anchoredTokens where anchoredTokens[i] - * indicates whether we have anchored on token i. - * - * anchoredTokens[>= anchoredTokens.length] are False by default - */ - public boolean[] getAnchoredTokens() { - int[] numAnchors = getNumAnchors(); - boolean[] anchoredTokens = new boolean[numAnchors.length]; - for (int i = 0; i < numAnchors.length; i++) - anchoredTokens[i] = (numAnchors[i] > 0); - return anchoredTokens; - } - - public Derivation betaReduction() { - Formula reduced = Formulas.betaReduction(formula); - return new Builder().withAllFrom(this).formula(reduced).createDerivation(); - } - - public boolean allAnchored() { - if (rule.isInduced() || !this.allAnchored) { - this.allAnchored = false; - return false; - } else { - for (Derivation child : children) { - if (child.allAnchored() == false) return false; - } - return true; - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/DerivationPruner.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/DerivationPruner.java deleted file mode 100644 index 10cf516685..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/DerivationPruner.java +++ /dev/null @@ -1,111 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.util.*; - -import fig.basic.*; - -/** - * Prune derivations during parsing. - * - * To add custom pruning criteria, implement a DerivationPruningComputer class, - * and put the class name in the |pruningComputers| option. - * - * @author ppasupat - */ - -public class DerivationPruner { - public static class Options { - @Option(gloss = "Pruning strategies to use") - public List pruningStrategies = new ArrayList<>(); - @Option(gloss = "DerivationPruningComputer subclasses to look for pruning strategies") - public List pruningComputers = new ArrayList<>(); - @Option public int pruningVerbosity = 0; - @Option(gloss = "(for tooManyValues) maximum denotation size of the final formula") - public int maxNumValues = 10; - } - public static Options opts = new Options(); - - public final Parser parser; - public final Example ex; - private List pruningComputers = new ArrayList<>(); - // If not null, limit the pruning strategies to this list in addition to opts.pruningStrategies. - private List customAllowedPruningStrategies; - private final Set allStrategyNames; - - public DerivationPruner(ParserState parserState) { - this.parser = parserState.parser; - this.ex = parserState.ex; - this.pruningComputers.add(new DefaultDerivationPruningComputer(this)); - for (String pruningComputer : opts.pruningComputers) { - try { - Class pruningComputerClass = Class.forName(SempreUtils.resolveClassName(pruningComputer)); - pruningComputers.add((DerivationPruningComputer) pruningComputerClass.getConstructor(this.getClass()).newInstance(this)); - } catch (ClassNotFoundException e1) { - throw new RuntimeException("Illegal pruning computer: " + pruningComputer); - } catch (Exception e) { - e.printStackTrace(); - e.getCause().printStackTrace(); - throw new RuntimeException("Error while instantiating pruning computer: " + pruningComputer); - } - } - // Compile the list of all strategies - allStrategyNames = new HashSet<>(); - for (DerivationPruningComputer computer : pruningComputers) - allStrategyNames.addAll(computer.getAllStrategyNames()); - for (String strategy : opts.pruningStrategies) { - if (!allStrategyNames.contains(strategy)) - LogInfo.fails("Pruning strategy '%s' not found!", strategy); - } - } - - /** - * Set additional restrictions on the pruning strategies. - * - * If customAllowedPruningStrategies is not null, the pruning strategy must be in both - * opts.pruningStrategies and customAllowedPruningStrategies in order to be used. - * - * Useful when some pruning strategies can break the parsing mechanism. - */ - public void setCustomAllowedPruningStrategies(List customAllowedPruningStrategies) { - this.customAllowedPruningStrategies = customAllowedPruningStrategies; - } - - protected boolean containsStrategy(String name) { - return opts.pruningStrategies.contains(name) && - (customAllowedPruningStrategies == null || customAllowedPruningStrategies.contains(name)); - } - - public List getPruningComputers() { - return new ArrayList<>(pruningComputers); - } - - /** - * Return true if the derivation should be pruned. Otherwise, return false. - */ - public boolean isPruned(Derivation deriv) { - if (opts.pruningStrategies.isEmpty() && pruningComputers.isEmpty()) return false; - String matchedStrategy; - for (DerivationPruningComputer computer : pruningComputers) { - if ((matchedStrategy = computer.isPruned(deriv)) != null) { - if (opts.pruningVerbosity >= 2) - LogInfo.logs("PRUNED [%s] %s", matchedStrategy, deriv.formula); - return true; - } - } - return false; - } - - /** - * Run isPruned with a (temporary) custom set of allowed pruning strategies. - * If customAllowedPruningStrategies is null, all strategies are allowed. - * If customAllowedPruningStrategies is empty, no pruning happens. - */ - public boolean isPruned(Derivation deriv, List customAllowedPruningStategies) { - List old = this.customAllowedPruningStrategies; - this.customAllowedPruningStrategies = customAllowedPruningStategies; - boolean answer = isPruned(deriv); - this.customAllowedPruningStrategies = old; - return answer; - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/DerivationPruningComputer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/DerivationPruningComputer.java deleted file mode 100644 index 9ba39f434f..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/DerivationPruningComputer.java +++ /dev/null @@ -1,43 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.util.Collection; - -/** - * Used to prune formulas during parsing. - * - * Extend this class to add custom pruning criteria, - * then add the class name to the |pruningComputers| options of DerivationPruner. - * - * @author ppasupat - */ -public abstract class DerivationPruningComputer { - - protected final DerivationPruner pruner; - protected final Parser parser; - protected final Example ex; - - public DerivationPruningComputer(DerivationPruner pruner) { - this.pruner = pruner; - this.parser = pruner.parser; - this.ex = pruner.ex; - } - - /** - * Return a collection of all strategy names used in this class. - */ - abstract public Collection getAllStrategyNames(); - - // Shorthand - protected boolean containsStrategy(String name) { - return pruner.containsStrategy(name); - } - - /** - * Prune the derivation. - * - * To add pruning strategies, override this method. - * Return the strategy name to prune the formula, and null otherwise. - */ - public abstract String isPruned(Derivation deriv); - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/DerivationStream.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/DerivationStream.java deleted file mode 100644 index 34438600ea..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/DerivationStream.java +++ /dev/null @@ -1,13 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.util.Iterator; - -/** - * Represents a stream of Derivations which are constructed lazily for efficiency. - * Use either SingleDerivationStream or MultipleDerivationStream. - * Created by joberant on 3/14/14. - */ -public interface DerivationStream extends Iterator { - Derivation peek(); - int estimatedSize(); -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/DescriptionValue.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/DescriptionValue.java deleted file mode 100644 index d0517cc805..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/DescriptionValue.java +++ /dev/null @@ -1,31 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.LispTree; - -/** - * Represents the description part of a NameValue ("Barack Obama" rather than - * the id fb:en.barack_obama). - * - * @author Andrew Chou - */ -public class DescriptionValue extends Value { - public final String value; - - public DescriptionValue(LispTree tree) { this(tree.child(1).value); } - public DescriptionValue(String value) { this.value = value; } - - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("description"); - tree.addChild(value); - return tree; - } - - @Override public int hashCode() { return value.hashCode(); } - @Override public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - DescriptionValue that = (DescriptionValue) o; - return this.value.equals(that.value); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ErrorValue.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ErrorValue.java deleted file mode 100644 index 51da86f3d2..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ErrorValue.java +++ /dev/null @@ -1,71 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.LispTree; - -/** - * For timeouts, server exceptions, etc. - * - * @author Percy Liang - */ -public class ErrorValue extends Value { - // Logical form is invalid and can't be converted into SQL query for some reason. - // Example: (and x y), where x and y are two predicates - // Example: unbound variables - public static final ErrorValue badFormula(BadFormulaException e) { - return new ErrorValue("BADFORMULA: " + e); - } - - // Request is taking too long (caused by client-side timeouts). - public static final ErrorValue timeout = new ErrorValue("TIMEOUT"); - - // Server dropped the connection (sometimes because the request is taking too long). - public static final ErrorValue server408 = new ErrorValue("SERVER408"); - - // Internal server error (happens when Virtuoso thinks its going to take too long). - // Example: Virtuoso 42000 Error The estimated execution time 541 (sec) exceeds the limit of 400 (sec). - public static final ErrorValue server500 = new ErrorValue("SERVER500"); - - // Server returned back an empty response. - public static final ErrorValue empty = new ErrorValue("EMPTY"); - - // Server returned something back but it had a bad format (e.g., HTML instead of XML). - public static final ErrorValue badFormat = new ErrorValue("BADFORMAT"); - - // Execution of Java failed (generated by JavaExecutor). - public static final ErrorValue badJava(String message) { return new ErrorValue("BADJAVA: " + message); } - - public final String type; - - public ErrorValue(LispTree tree) { this.type = tree.child(1).value; } - public ErrorValue(String type) { this.type = type; } - - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("error"); - tree.addChild(type != null ? type : ""); - return tree; - } - - @Override - // TODO(pliang): return this (error type) to avoid clashes with NameValue - public String toString() { return type; } - public static ErrorValue fromString(String s) { - if (s.equals(timeout.type)) return timeout; - if (s.equals(server408.type)) return server408; - if (s.equals(server500.type)) return server500; - if (s.equals(empty.type)) return empty; - if (s.equals(badFormat.type)) return badFormat; - return null; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - ErrorValue that = (ErrorValue) o; - if (!type.equals(that.type)) return false; - return true; - } - - @Override public int hashCode() { return type.hashCode(); } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ExactValueEvaluator.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ExactValueEvaluator.java deleted file mode 100644 index 75f67e93fd..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ExactValueEvaluator.java +++ /dev/null @@ -1,8 +0,0 @@ -package edu.stanford.nlp.sempre; - -// This is the simplest evaluator, but exact match can sometimes be too harsh. -public class ExactValueEvaluator implements ValueEvaluator { - public double getCompatibility(Value target, Value pred) { - return target.equals(pred) ? 1 : 0; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Example.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Example.java deleted file mode 100644 index 2f93413d92..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Example.java +++ /dev/null @@ -1,394 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.Joiner; -import com.google.common.collect.Sets; -import fig.basic.Evaluation; -import fig.basic.LispTree; -import fig.basic.LogInfo; - -import java.util.*; - -/** - * An example corresponds roughly to an input-output pair, the basic unit which - * we make predictions on. The Example object stores both the input, - * preprocessing, and output of the parser. - * - * @author Percy Liang - * @author Roy Frostig - */ -@JsonIgnoreProperties(ignoreUnknown = true) -@JsonInclude(JsonInclude.Include.NON_NULL) -public class Example { - //// Information from the input file. - - // Unique identifier for this example. - @JsonProperty public final String id; - - // Input utterance - @JsonProperty public final String utterance; - - // Context - @JsonProperty public ContextValue context; - - // What we should try to predict. - @JsonProperty public Formula targetFormula; // Logical form (e.g., database query) - public List alternativeFormulas; // Alternative logical form (less canonical) - @JsonProperty public Value targetValue; // Denotation (e.g., answer) - - //// Information after preprocessing (e.g., tokenization, POS tagging, NER, syntactic parsing, etc.). - public LanguageInfo languageInfo = null; - - //// Output of the parser. - - // Predicted derivations (sorted by score). - public List predDerivations; - - // Temporary state while parsing an Example (see Derivation.java for analogous structure). - private Map tempState; - - // Statistics relating to processing the example. - public Evaluation evaluation; - - public static class Builder { - private String id; - private String utterance; - private ContextValue context; - private Formula targetFormula; - private Value targetValue; - private LanguageInfo languageInfo; - - public Builder setId(String id) { this.id = id; return this; } - public Builder setUtterance(String utterance) { this.utterance = utterance; return this; } - public Builder setContext(ContextValue context) { this.context = context; return this; } - public Builder setTargetFormula(Formula targetFormula) { this.targetFormula = targetFormula; return this; } - public Builder setTargetValue(Value targetValue) { this.targetValue = targetValue; return this; } - public Builder setLanguageInfo(LanguageInfo languageInfo) { this.languageInfo = languageInfo; return this; } - public Builder withExample(Example ex) { - setId(ex.id); - setUtterance(ex.utterance); - setContext(ex.context); - setTargetFormula(ex.targetFormula); - setTargetValue(ex.targetValue); - return this; - } - public Example createExample() { - return new Example(id, utterance, context, targetFormula, targetValue, languageInfo); - } - } - - @JsonCreator - public Example(@JsonProperty("id") String id, - @JsonProperty("utterance") String utterance, - @JsonProperty("context") ContextValue context, - @JsonProperty("targetFormula") Formula targetFormula, - @JsonProperty("targetValue") Value targetValue, - @JsonProperty("languageInfo") LanguageInfo languageInfo) { - this.id = id; - this.utterance = utterance; - this.context = context; - this.targetFormula = targetFormula; - this.targetValue = targetValue; - this.languageInfo = languageInfo; - } - - // Accessors - public String getId() { return id; } - public String getUtterance() { return utterance; } - public int numTokens() { return languageInfo.tokens.size(); } - public List getPredDerivations() { return predDerivations; } - - public void setContext(ContextValue context) { this.context = context; } - public void setTargetFormula(Formula targetFormula) { this.targetFormula = targetFormula; } - public void setAlternativeFormulas(List alternativeFormulas) { this.alternativeFormulas = alternativeFormulas; } - public void addAlternativeFormula(Formula alternativeFormula) { - if (this.alternativeFormulas == null) - this.alternativeFormulas = new ArrayList<>(); - this.alternativeFormulas.add(alternativeFormula); - } - public void setTargetValue(Value targetValue) { this.targetValue = targetValue; } - - public String spanString(int start, int end) { - return String.format("%d:%d[%s]", start, end, start != -1 ? phraseString(start, end) : "..."); - } - public String phraseString(int start, int end) { - return Joiner.on(' ').join(languageInfo.tokens.subList(start, end)); - } - - // Return a string representing the tokens between start and end. - public List getTokens() { return languageInfo.tokens; } - public List getLemmaTokens() { return languageInfo.lemmaTokens; } - public String token(int i) { return languageInfo.tokens.get(i); } - public String lemmaToken(int i) { return languageInfo.lemmaTokens.get(i); } - public String posTag(int i) { return languageInfo.posTags.get(i); } - public String phrase(int start, int end) { return languageInfo.phrase(start, end); } - public String lemmaPhrase(int start, int end) { return languageInfo.lemmaPhrase(start, end); } - - public String toJson() { return Json.writeValueAsStringHard(this); } - public static Example fromJson(String json) { return Json.readValueHard(json, Example.class); } - - public static Example fromLispTree(LispTree tree, String defaultId) { - Builder b = new Builder().setId(defaultId); - - for (int i = 1; i < tree.children.size(); i++) { - LispTree arg = tree.child(i); - String label = arg.child(0).value; - if ("id".equals(label)) { - b.setId(arg.child(1).value); - } else if ("utterance".equals(label)) { - b.setUtterance(arg.child(1).value); - } else if ("canonicalUtterance".equals(label)) { - b.setUtterance(arg.child(1).value); - } else if ("targetFormula".equals(label)) { - b.setTargetFormula(Formulas.fromLispTree(arg.child(1))); - } else if ("targetValue".equals(label) || "targetValues".equals(label)) { - if (arg.children.size() != 2) - throw new RuntimeException("Expect one target value"); - b.setTargetValue(Values.fromLispTree(arg.child(1))); - } else if ("context".equals(label)) { - b.setContext(new ContextValue(arg)); - } - } - b.setLanguageInfo(new LanguageInfo()); - - Example ex = b.createExample(); - - for (int i = 1; i < tree.children.size(); i++) { - LispTree arg = tree.child(i); - String label = arg.child(0).value; - if ("tokens".equals(label)) { - for (LispTree child : arg.child(1).children) - ex.languageInfo.tokens.add(child.value); - } else if ("lemmaTokens".equals(label)) { - for (LispTree child : arg.child(1).children) - ex.languageInfo.lemmaTokens.add(child.value); - } else if ("posTags".equals(label)) { - for (LispTree child : arg.child(1).children) - ex.languageInfo.posTags.add(child.value); - } else if ("nerTags".equals(label)) { - for (LispTree child : arg.child(1).children) - ex.languageInfo.nerTags.add(child.value); - } else if ("nerValues".equals(label)) { - for (LispTree child : arg.child(1).children) - ex.languageInfo.nerValues.add("null".equals(child.value) ? null : child.value); - } else if ("alternativeFormula".equals(label)) { - ex.addAlternativeFormula(Formulas.fromLispTree(arg.child(1))); - } else if ("evaluation".equals(label)) { - ex.evaluation = Evaluation.fromLispTree(arg.child(1)); - } else if ("predDerivations".equals(label)) { - // Featurized - ex.predDerivations = new ArrayList<>(); - for (int j = 1; j < arg.children.size(); j++) - ex.predDerivations.add(derivationFromLispTree(arg.child(j))); - } else if ("rawDerivations".equals(label) || "derivations".equals(label)) { - // Unfeaturized - ex.predDerivations = new ArrayList<>(); - for (int j = 1; j < arg.children.size(); j++) - ex.predDerivations.add(rawDerivationFromLispTree(arg.child(j))); - } else if (!Sets.newHashSet("id", "utterance", "targetFormula", "targetValue", "targetValues", "context", "original").contains(label)) { - throw new RuntimeException("Invalid example argument: " + arg); - } - } - - return ex; - } - - public void preprocess() { - this.languageInfo = LanguageAnalyzer.getSingleton().analyze(this.utterance); - this.targetValue = TargetValuePreprocessor.getSingleton().preprocess(this.targetValue, this); - } - - public void log() { - LogInfo.begin_track("Example: %s", utterance); - LogInfo.logs("Tokens: %s", getTokens()); - LogInfo.logs("Lemmatized tokens: %s", getLemmaTokens()); - LogInfo.logs("POS tags: %s", languageInfo.posTags); - LogInfo.logs("NER tags: %s", languageInfo.nerTags); - LogInfo.logs("NER values: %s", languageInfo.nerValues); - if (context != null) - LogInfo.logs("context: %s", context); - if (targetFormula != null) - LogInfo.logs("targetFormula: %s", targetFormula); - if (targetValue != null) - LogInfo.logs("targetValue: %s", targetValue); - LogInfo.logs("Dependency children: %s", languageInfo.dependencyChildren); - LogInfo.end_track(); - } - - public void logWithoutContext() { - LogInfo.begin_track("Example: %s", utterance); - LogInfo.logs("Tokens: %s", getTokens()); - LogInfo.logs("Lemmatized tokens: %s", getLemmaTokens()); - LogInfo.logs("POS tags: %s", languageInfo.posTags); - LogInfo.logs("NER tags: %s", languageInfo.nerTags); - LogInfo.logs("NER values: %s", languageInfo.nerValues); - if (targetFormula != null) - LogInfo.logs("targetFormula: %s", targetFormula); - if (targetValue != null) - LogInfo.logs("targetValue: %s", targetValue); - LogInfo.logs("Dependency children: %s", languageInfo.dependencyChildren); - LogInfo.end_track(); - } - - public List getCorrectDerivations() { - List res = new ArrayList<>(); - for (Derivation deriv : predDerivations) { - if (deriv.compatibility == Double.NaN) - throw new RuntimeException("Compatibility is not set"); - if (deriv.compatibility > 0) - res.add(deriv); - } - return res; - } - - public LispTree toLispTree(boolean outputPredDerivations) { - LispTree tree = LispTree.proto.newList(); - tree.addChild("example"); - - if (id != null) - tree.addChild(LispTree.proto.newList("id", id)); - if (utterance != null) - tree.addChild(LispTree.proto.newList("utterance", utterance)); - if (targetFormula != null) - tree.addChild(LispTree.proto.newList("targetFormula", targetFormula.toLispTree())); - if (targetValue != null) - tree.addChild(LispTree.proto.newList("targetValue", targetValue.toLispTree())); - - if (languageInfo != null) { - if (languageInfo.tokens != null) - tree.addChild(LispTree.proto.newList("tokens", LispTree.proto.newList(languageInfo.tokens))); - if (languageInfo.posTags != null) - tree.addChild(LispTree.proto.newList("posTags", Joiner.on(' ').join(languageInfo.posTags))); - if (languageInfo.nerTags != null) - tree.addChild(LispTree.proto.newList("nerTags", Joiner.on(' ').join(languageInfo.nerTags))); - } - - if (evaluation != null) - tree.addChild(LispTree.proto.newList("evaluation", evaluation.toLispTree())); - - if (predDerivations != null && outputPredDerivations) { - LispTree list = LispTree.proto.newList(); - list.addChild("predDerivations"); - for (Derivation deriv : predDerivations) - list.addChild(derivationToLispTree(deriv)); - tree.addChild(list); - } - - return tree; - } - - /** - * Parse a featurized derivation. - * - * Format: - * ({compatibility} {prob} {score} {value|null} {formula} {features}) - * where {features} = (({key} {value}) ({key} {value}) ...) - */ - public static Derivation derivationFromLispTree(LispTree item) { - Derivation.Builder b = new Derivation.Builder() - .cat(Rule.rootCat) - .start(-1) - .end(-1) - .rule(Rule.nullRule) - .children(new ArrayList()); - int i = 0; - - b.compatibility(Double.parseDouble(item.child(i++).value)); - b.prob(Double.parseDouble(item.child(i++).value)); - b.score(Double.parseDouble(item.child(i++).value)); - - LispTree valueTree = item.child(i++); - if (!valueTree.isLeaf() || !"null".equals(valueTree.value)) - b.value(Values.fromLispTree(valueTree)); - - b.formula(Formulas.fromLispTree(item.child(i++))); - - FeatureVector fv = new FeatureVector(); - LispTree features = item.child(i++); - for (int j = 0; j < features.children.size(); j++) - fv.addFromString(features.child(j).child(0).value, Double.parseDouble(features.child(j).child(1).value)); - - b.localFeatureVector(fv); - - return b.createDerivation(); - } - - public static LispTree derivationToLispTree(Derivation deriv) { - LispTree item = LispTree.proto.newList(); - - item.addChild(deriv.compatibility + ""); - item.addChild(deriv.prob + ""); - item.addChild(deriv.score + ""); - if (deriv.value != null) - item.addChild(deriv.value.toLispTree()); - else - item.addChild("null"); - item.addChild(deriv.formula.toLispTree()); - - HashMap features = new HashMap<>(); - deriv.incrementAllFeatureVector(1, features); - item.addChild(LispTree.proto.newList(features)); - - return item; - } - - /** - * Parse a LispTree with the format created by deriv.toLispTree(). - * Due to the complexity, rules and children are not parsed. - * - * Format: - * (derivation [(formula {formula})] [(value {value})] [(type {type})] - * [(canonicalUtterance {canonicalUtterance})]) - * @param item - * @return - */ - public static Derivation rawDerivationFromLispTree(LispTree item) { - Derivation.Builder b = new Derivation.Builder() - .cat(Rule.rootCat) - .start(-1).end(-1) - .rule(Rule.nullRule) - .children(new ArrayList()); - for (int i = 1; i < item.children.size(); i++) { - LispTree arg = item.child(i); - String label = arg.child(0).value; - if ("formula".equals(label)) { - b.formula(Formulas.fromLispTree(arg.child(1))); - } else if ("value".equals(label)) { - b.value(Values.fromLispTree(arg.child(1))); - } else if ("type".equals(label)) { - b.type(SemType.fromLispTree(arg.child(1))); - } else if ("canonicalUtterance".equals(label)) { - b.canonicalUtterance(arg.child(1).value); - } else { - throw new RuntimeException("Invalid example argument: " + arg); - } - } - return b.createDerivation(); - } - - public static LispTree rawDerivationToLispTree(Derivation deriv) { - return deriv.toLispTree(); - } - - public Map getTempState() { - // Create the tempState if it doesn't exist. - if (tempState == null) - tempState = new HashMap(); - return tempState; - } - public void clearTempState() { - tempState = null; - } - - /** - * Clean up things to save memory - */ - public void clean() { - predDerivations.clear(); - if (context.graph != null) - context.graph.clean(); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ExampleUtils.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ExampleUtils.java deleted file mode 100644 index 91a317bbe3..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ExampleUtils.java +++ /dev/null @@ -1,159 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.*; -import fig.exec.Execution; - -import java.io.*; -import java.util.*; - -/** - * Output examples in various forms. - * - * @author Percy Liang - */ -public final class ExampleUtils { - private ExampleUtils() { } - - // Output JSON file with just the basic input/output. - public static void writeJson(List examples) { - PrintWriter out = IOUtils.openOutHard(Execution.getFile("examples.json")); - for (Example ex : examples) - out.println(ex.toJson()); - out.close(); - } - - public static void writeJson(List examples, String outPath) throws IOException { - PrintWriter out = IOUtils.openOutHard(outPath); - out.println("["); - for (int i = 0; i < examples.size(); ++i) { - Example ex = examples.get(i); - out.print(ex.toJson()); - out.println(i < examples.size() - 1 ? "," : ""); - } - out.println("]"); - out.close(); - } - - private static String escapeSpace(String s) { - return s.replaceAll(" ", " "); - } - - // Output examples in Simple Dataset Format (Ranking). - public static void writeSDF(int iter, String group, - Evaluation evaluation, - List examples, - boolean outputPredDerivations) { - String basePath = "preds-iter" + iter + "-" + group + ".examples"; - String outPath = Execution.getFile(basePath); - if (outPath == null || examples.size() == 0) return; - LogInfo.begin_track("Writing examples to %s", basePath); - PrintWriter out = IOUtils.openOutHard(outPath); - - LispTree p = LispTree.proto; - out.println("# SDF version 1.1"); - out.println("# " + p.L(p.L("iter", iter), p.L("group", group), p.L("numExamples", examples.size()), p.L("evaluation", evaluation.toLispTree()))); - for (Example ex : examples) { - out.println(""); - out.println("example " + ex.id); - out.println("description " + p.L(p.L("utterance", ex.utterance), p.L("targetValue", ex.targetValue.toLispTree()), p.L("evaluation", ex.evaluation.toLispTree()))); - - if (outputPredDerivations) { - for (Derivation deriv : ex.predDerivations) { - StringBuilder buf = new StringBuilder(); - buf.append("item"); - LispTree description = p.newList(); - if (deriv.canonicalUtterance != null) - description.addChild(p.L("canonicalUtterance", deriv.canonicalUtterance)); - description.addChild(p.L("formula", deriv.formula.toLispTree())); - description.addChild(p.L("value", deriv.value.toLispTree())); - buf.append("\t" + description); - buf.append("\t" + deriv.compatibility); - Map features = deriv.getAllFeatureVector(); - buf.append("\t"); - boolean first = true; - for (Map.Entry e : features.entrySet()) { - if (!first) - buf.append(' '); - first = false; - buf.append(e.getKey() + ":" + e.getValue()); - } - out.println(buf.toString()); - } - } - } - out.close(); - LogInfo.end_track(); - } - - public static void writeParaphraseSDF(int iter, String group, Example ex, - boolean outputPredDerivations) { - String basePath = "preds-iter" + iter + "-" + group + ".examples"; - String outPath = Execution.getFile(basePath); - if (outPath == null) return; - PrintWriter out = IOUtils.openOutAppendHard(outPath); - - out.println("example " + ex.id); - - if (outputPredDerivations) { - int i = 0; - for (Derivation deriv : ex.predDerivations) { - if (deriv.canonicalUtterance != null) - out.println("Pred@" + i + ":\t" + ex.utterance + "\t" + deriv.canonicalUtterance + "\t" + deriv.compatibility + "\t" + deriv.formula + "\t" + deriv.prob); - i++; - } - } - out.close(); - } - - public static void writeEvaluationSDF(int iter, String group, - Evaluation evaluation, int numExamples) { - String basePath = "preds-iter" + iter + "-" + group + ".examples"; - String outPath = Execution.getFile(basePath); - if (outPath == null) return; - PrintWriter out = IOUtils.openOutAppendHard(outPath); - - LispTree p = LispTree.proto; - out.println(""); - out.println("# SDF version 1.1"); - out.println("# " + p.L(p.L("iter", iter), p.L("group", group), p.L("numExamples", numExamples), p.L("evaluation", evaluation.toLispTree()))); - out.close(); - } - - public static void writePredictionTSV(int iter, String group, Example ex) { - String basePath = "preds-iter" + iter + "-" + group + ".tsv"; - String outPath = Execution.getFile(basePath); - if (outPath == null) return; - PrintWriter out = IOUtils.openOutAppendHard(outPath); - - List fields = new ArrayList<>(); - fields.add(ex.id); - - if (!ex.predDerivations.isEmpty()) { - Derivation deriv = ex.predDerivations.get(0); - if (deriv.value instanceof ListValue) { - List values = ((ListValue) deriv.value).values; - for (Value v : values) { - fields.add(v.pureString().replaceAll("\\s+", " ").trim()); - } - } - } - - out.println(String.join("\t", fields)); - out.close(); - } - - //read lisptree and write json - public static void main(String[] args) { - Dataset dataset = new Dataset(); - Pair pair = Pair.newPair("train", args[0]); - Dataset.opts.splitDevFromTrain = false; - dataset.readFromPathPairs(Collections.singletonList(pair)); - List examples = dataset.examples("train"); - try { - writeJson(examples, args[1]); - } catch (IOException e) { - e.printStackTrace(); - throw new RuntimeException(e); - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Executor.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Executor.java deleted file mode 100644 index 5d8419a648..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Executor.java +++ /dev/null @@ -1,24 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.Evaluation; - -/** - * An Executor takes a logical form (Formula) and computes its denotation - * (Value). - * - * @author Percy Liang - */ -public abstract class Executor { - public static class Response { - public Response(Value value) { this(value, new Evaluation()); } - public Response(Value value, Evaluation stats) { - this.value = value; - this.stats = stats; - } - public final Value value; - public final Evaluation stats; - } - - // Execute the formula in the given context. - public abstract Response execute(Formula formula, ContextValue context); -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FeatureComputer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FeatureComputer.java deleted file mode 100644 index 4fdace48cb..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FeatureComputer.java +++ /dev/null @@ -1,25 +0,0 @@ -package edu.stanford.nlp.sempre; - -/** - * A feature computer. - * - * Look at a derivation and add features to the feature vector. - * A FeatureComputer should be stateless. - * - * Before computing features, a FeatureComputer should call - * - * if (!FeatureExtractor.containsDomain(...)) return; - * - * to check the feature domain first. - */ -public interface FeatureComputer { - - /** - * This function is called on every sub-Derivation. - * - * It should extract only the features which depend in some way on |deriv|, - * not just on its children. - */ - void extractLocal(Example ex, Derivation deriv); - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FeatureExtractor.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FeatureExtractor.java deleted file mode 100644 index 907e228cc7..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FeatureExtractor.java +++ /dev/null @@ -1,310 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.google.common.base.Joiner; -import com.google.common.collect.Sets; -import fig.basic.*; - -import java.util.*; - -/** - * A FeatureExtractor specifies a mapping from derivations to feature vectors. - * - * [Using features] - * - Specify the feature domains in the featureDomains option. - * - If the features are defined in a separate FeatureComputer class, also specify - * the class name in the featureComputers option. - * - * [Implementing new features] There are 3 ways to implement new features: - * 1) Define features in SemanticFn, which is called when the SemanticFn is called. - * 2) Create a FeatureComputer class, which is called on each sub-Derivation if the class name - * is specified in the featureComputers option. - * 3) Add a method to this class. The method is called on each sub-Derivation. - * - * @author Percy Liang - */ -public class FeatureExtractor { - // Global place to specify features. - public static class Options { - @Option(gloss = "Set of feature domains to include") - public Set featureDomains = new HashSet<>(); - @Option(gloss = "Set of feature computer classes to load") - public Set featureComputers = Sets.newHashSet("DerivOpCountFeatureComputer"); - @Option(gloss = "Disable denotation features") - public boolean disableDenotationFeatures = false; - @Option(gloss = "Use all possible features, regardless of what featureDomains says") - public boolean useAllFeatures = false; - @Option(gloss = "For bigram features in paraphrased utterances, maximum distance to consider") - public int maxBigramDistance = 3; - @Option(gloss = "Whether or not paraphrasing and bigram features should be lexicalized") - public boolean lexicalBigramParaphrase = true; - } - - private Executor executor; - private List featureComputers = new ArrayList<>(); - - public FeatureExtractor(Executor executor) { - this.executor = executor; - for (String featureComputer : opts.featureComputers) { - featureComputers.add((FeatureComputer) Utils.newInstanceHard(SempreUtils.resolveClassName(featureComputer))); - } - } - - public static Options opts = new Options(); - - public static boolean containsDomain(String domain) { - if (opts.disableDenotationFeatures && domain.equals("denotation")) return false; - return opts.useAllFeatures || opts.featureDomains.contains(domain); - } - - // This function is called on every sub-Derivation, so we should extract only - // features which depend in some way on |deriv|, not just on its children. - public void extractLocal(Example ex, Derivation deriv) { - StopWatchSet.begin("FeatureExtractor.extractLocal"); - extractRuleFeatures(ex, deriv); - extractSpanFeatures(ex, deriv); - extractDenotationFeatures(ex, deriv); - extractDependencyFeatures(ex, deriv); - extractWhTypeFeatures(ex, deriv); - conjoinLemmaAndBinary(ex, deriv); - extractBigramFeatures(ex, deriv); - for (FeatureComputer featureComputer : featureComputers) - featureComputer.extractLocal(ex, deriv); - StopWatchSet.end(); - } - - // Add an indicator for each applied rule. - void extractRuleFeatures(Example ex, Derivation deriv) { - if (!containsDomain("rule")) return; - if (deriv.rule != Rule.nullRule) { - deriv.addFeature("rule", "fire"); - deriv.addFeature("rule", deriv.rule.toString()); - } - } - - // Extract features on the linguistic information of the spanned (anchored) tokens. - // (Not applicable for floating rules) - void extractSpanFeatures(Example ex, Derivation deriv) { - if (!containsDomain("span") || deriv.start == -1) return; - deriv.addFeature("span", "cat=" + deriv.cat + ",#tokens=" + (deriv.end - deriv.start)); - deriv.addFeature("span", "cat=" + deriv.cat + ",POS=" + ex.posTag(deriv.start) + "..." + ex.posTag(deriv.end - 1)); - } - - // Extract features on the denotation of the logical form produced. - // (For example, number of items in the list) - void extractDenotationFeatures(Example ex, Derivation deriv) { - if (!containsDomain("denotation")) return; - if (!deriv.isRoot(ex.numTokens())) return; - - deriv.ensureExecuted(executor, ex.context); - - if (deriv.value instanceof ErrorValue) { - deriv.addFeature("denotation", "error"); - return; - } - - if (deriv.value instanceof StringValue) { - if (((StringValue) deriv.value).value.equals("[]") || ((StringValue) deriv.value).value.equals("[null]")) - deriv.addFeature("denotation", "empty"); - return; - } - - if (deriv.value instanceof ListValue) { - ListValue list = (ListValue) deriv.value; - - if (list.values.size() == 1 && list.values.get(0) instanceof NumberValue) { - int count = getNumber(list.values.get(0)); - deriv.addFeature("denotation", "count-size" + (count <= 1 ? "=" + count : ">1")); - } - else { - int size = list.values.size(); - deriv.addFeature("denotation", "size" + (size < 3 ? "=" + size : ">=" + 3)); - } - - } - } - - int getNumber(Value value) { - if (value instanceof NumberValue) return (int) ((NumberValue) value).value; - if (value instanceof ListValue) return getNumber(((ListValue) value).values.get(0)); - throw new RuntimeException("Can't extract number from " + value); - } - - // Add an indicator for each alignment between a syntactic dependency (produced by the - // Stanford dependency parser) and the application of a semantic function. - void extractDependencyFeatures(Example ex, Derivation deriv) { - if (!containsDomain("dependencyParse") && !containsDomain("fullDependencyParse")) return; - if (deriv.rule != Rule.nullRule) { - for (Derivation child : deriv.children) { - for (int i = child.start; i < child.end; i++) { - for (LanguageInfo.DependencyEdge dependency : ex.languageInfo.dependencyChildren.get(i)) { - if (!child.containsIndex(dependency.modifier)) { - String direction = dependency.modifier > i ? "forward" : "backward"; - String containment = deriv.containsIndex(dependency.modifier) ? "internal" : "external"; - if (containsDomain("fullDependencyParse")) - addAllDependencyFeatures(dependency, direction, containment, - deriv); - else - deriv.addFeature("dependencyParse", - "(" + dependency.label + " " + direction + " " + containment + ") --- " - + deriv.getRule().toString()); - } - } - } - } - } - } - - private void addAllDependencyFeatures(LanguageInfo.DependencyEdge dependency, - String direction, String containment, Derivation deriv) { - String[] types = {dependency.label, "*"}; - String[] directions = {" " + direction, ""}; - String[] containments = {" " + containment, ""}; - String[] rules = {deriv.getRule().toString(), ""}; - for (String typePresent : types) { - for (String directionPresent : directions) { - for (String containmentPresent : containments) { - for (String rulePresent : rules) { - deriv.addFeature("fullDependencyParse", - "(" + typePresent + directionPresent + containmentPresent + ") --- " + rulePresent); - } - } - } - } - } - - // Conjunction of wh-question word and type - // (For example, "who" should go with PERSON and not DATE) - void extractWhTypeFeatures(Example ex, Derivation deriv) { - if (!containsDomain("whType")) return; - if (!deriv.isRoot(ex.numTokens())) return; - - if (ex.posTag(0).startsWith("W")) { - deriv.addFeature("whType", - "token0=" + ex.token(0) + "," + - "type=" + coarseType(deriv.type.toString())); - } - } - - public static final String PERSON = "fb:people.person"; - public static final String LOC = "fb:location.location"; - public static final String ORG = "fb:organization.organization"; - - public static String coarseType(String type) { - Set superTypes = SemTypeHierarchy.singleton.getSupertypes(type); - if (superTypes != null) { - if (superTypes.contains(PERSON)) return PERSON; - if (superTypes.contains(LOC)) return LOC; - if (superTypes.contains(ORG)) return ORG; - if (superTypes.contains(CanonicalNames.NUMBER)) return CanonicalNames.NUMBER; - if (superTypes.contains(CanonicalNames.DATE)) return CanonicalNames.DATE; - } - return "OTHER"; - } - - - //used in Berant et al., 2013 and in the RL parser - //conjoins all binaries in the logical form with all non-entity lemmas - void conjoinLemmaAndBinary(Example ex, Derivation deriv) { - if (!containsDomain("lemmaAndBinaries")) return; - if (!deriv.isRoot(ex.numTokens())) return; - - List nonEntityLemmas = new LinkedList<>(); - extractNonEntityLemmas(ex, deriv, nonEntityLemmas); - List binaries = extractBinaries(deriv.formula); - if (!binaries.isEmpty()) { - String binariesStr = Joiner.on('_').join(binaries); - for (String nonEntityLemma : nonEntityLemmas) { - deriv.addFeature("lemmaAndBinaries", "nonEntitylemmas=" + nonEntityLemma + - ",binaries=" + binariesStr); - } - } - } - - // Extract the utterance that the derivation generates (not necessarily the - // one in the input utterance). - private void extractUtterance(Derivation deriv, List utterance) { - if (deriv.rule == Rule.nullRule) return; - int c = 0; // Index into children - for (String item : deriv.rule.rhs) { - if (Rule.isCat(item)) - extractUtterance(deriv.children.get(c++), utterance); - else - utterance.add(item); - } - } - - //Used in Berant et., EMNLP 2013, and in the agenda RL parser - //Extracts all content-word lemmas in the derivation tree not dominated by the category $Entity - private void extractNonEntityLemmas(Example ex, Derivation deriv, - List nonEntityLemmas) { - if (deriv.children.size() == 0) { // base case this means it is a word that should be appended - for (int i = deriv.start; i < deriv.end; i++) { - String pos = ex.languageInfo.posTags.get(i); - if ((pos.startsWith("N") || pos.startsWith("V") || pos.startsWith("W") || pos.startsWith("A") || pos.equals("IN")) - && !ex.languageInfo.lemmaTokens.get(i).equals("be")) - nonEntityLemmas.add(ex.languageInfo.lemmaTokens.get(i)); - } - } else { // recursion - for (Derivation child : deriv.children) { - if (child.rule.lhs == null || !child.rule.lhs.equals("$Entity")) { - extractNonEntityLemmas(ex, child, nonEntityLemmas); - } else if (child.rule.lhs.equals("$Entity")) { - nonEntityLemmas.add("E"); - } - } - } - } - - //Used in Berant et al., 2013 and in agenda-based RL parser - private List extractBinaries(Formula formula) { - List res = new LinkedList<>(); - Set atomicElements = Formulas.extractAtomicFreebaseElements(formula); - for (String atomicElement : atomicElements) { - if (atomicElement.split("\\.").length == 3 && !atomicElement.equals("fb:type.object.type")) - res.add(atomicElement); - } - return res; - } - - /** - * Add an indicator for each pair of bigrams that can be aligned from the original - * utterance and two (not necessarily contiguous) lemmas in the generated utterance - */ - private void extractBigramFeatures(Example ex, Derivation deriv) { - if (!containsDomain("bigram")) return; - if (!deriv.cat.equals(Rule.rootCat)) return; - LanguageInfo derivInfo = LanguageAnalyzer.getSingleton().analyze(deriv.canonicalUtterance); - List derivLemmas = derivInfo.lemmaTokens; - List exLemmas = ex.languageInfo.lemmaTokens; - Map bigramCounts = new HashMap(); - for (int i = 0; i < exLemmas.size() - 1; i++) { - for (int j = 0; j < derivLemmas.size() - 1; j++) { - if (derivLemmas.get(j).equals(exLemmas.get(i))) { - // Consider bigrams separated by up to maxBigramDistance in generated utterance - for (int k = 1; j + k < derivLemmas.size() && k <= opts.maxBigramDistance; k++) { - if (derivLemmas.get(j + k).equals(exLemmas.get(i + 1))) { - if (opts.lexicalBigramParaphrase) - deriv.addFeature("bigram", - exLemmas.get(i) + "," + exLemmas.get(i + 1) + " - " + k); - else MapUtils.incr(bigramCounts, k, 1); - } - } - } - } - } - if (!opts.lexicalBigramParaphrase) { - for (Integer dist : bigramCounts.keySet()) - deriv.addFeature("bigram", "distance " + dist + " - " + bigramCounts.get(dist)); - } - } - - // Joins arrayList of strings into string - String join(List l, String delimiter) { - StringBuilder sb = new StringBuilder(l.get(0)); - for (int i = 1; i < l.size(); i++) { - sb.append(delimiter); - sb.append(l.get(i)); - } - return sb.toString(); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FeatureMatcher.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FeatureMatcher.java deleted file mode 100644 index bcb96192bd..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FeatureMatcher.java +++ /dev/null @@ -1,28 +0,0 @@ -package edu.stanford.nlp.sempre; - -public interface FeatureMatcher { - boolean matches(String feature); -} - -final class AllFeatureMatcher implements FeatureMatcher { - private AllFeatureMatcher() { } - @Override - public boolean matches(String feature) { return true; } - public static final AllFeatureMatcher matcher = new AllFeatureMatcher(); -} - -final class ExactFeatureMatcher implements FeatureMatcher { - private String match; - public ExactFeatureMatcher(String match) { this.match = match; } - @Override - public boolean matches(String feature) { return feature.equals(match); } -} - -final class DenotationFeatureMatcher implements FeatureMatcher { - @Override - public boolean matches(String feature) { - return feature.startsWith("denotation-size") || - feature.startsWith("count-denotation-size"); - } - public static final DenotationFeatureMatcher matcher = new DenotationFeatureMatcher(); -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FeatureVector.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FeatureVector.java deleted file mode 100644 index 0b50f707af..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FeatureVector.java +++ /dev/null @@ -1,291 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonValue; - -import fig.basic.*; - -import java.util.*; - -/** - * A FeatureVector represents a mapping from feature (string) to value - * (double). - * - * We enforce the convention that each feature is (domain, name), - * so that the key space isn't a free-for-all. - * - * @author Percy Liang - * @author Jonathan Berant - */ -public class FeatureVector { - public static class Options { - @Option(gloss = "When logging, ignore features with zero weight") - public boolean ignoreZeroWeight = false; - @Option(gloss = "Log only this number of top and bottom features") - public int logFeaturesLimit = Integer.MAX_VALUE; - } - public static Options opts = new Options(); - - // These features map to the value 1 (most common case in NLP). - private ArrayList indicatorFeatures; - // General features - private ArrayList> generalFeatures; - // A dense array of features to save memory - private double[] denseFeatures; - private static final String DENSE_NAME = "Dns"; - - public FeatureVector() { } // constructor that does nothing - - public FeatureVector(int numOfDenseFeatures) { - denseFeatures = new double[numOfDenseFeatures]; - Arrays.fill(denseFeatures, 0d); - } - - private static String toFeature(String domain, String name) { return domain + " :: " + name; } - - public void add(String domain, String name) { - add(toFeature(domain, name)); - } - private void add(String feature) { - if (indicatorFeatures == null) indicatorFeatures = new ArrayList<>(); - indicatorFeatures.add(feature); - } - - public void add(String domain, String name, double value) { - add(toFeature(domain, name), value); - } - private void add(String feature, double value) { - if (generalFeatures == null) generalFeatures = new ArrayList<>(); - generalFeatures.add(Pair.newPair(feature, value)); - } - - public void addWithBias(String domain, String name, double value) { - add(domain, name, value); - add(domain, name + "-bias", 1); - } - - // Add histogram features, e.g., domain :: name>=4 - public void addHistogram(String domain, String name, double value) { addHistogram(domain, name, value, 2, 10, true); } - public void addHistogram(String domain, String name, double value, int initBinSize, int numBins, boolean exp) { - double upper = initBinSize; - String bin = null; - int sign = value > 0 ? +1 : -1; - value = Math.abs(value); - for (int i = 0; i < numBins; i++) { - double lastUpper = upper; - if (i > 0) { - if (exp) upper *= initBinSize; - else upper += initBinSize; - } - if (value < upper) { - bin = (sign > 0) ? lastUpper + ":" + upper : (-upper) + ":" + (-lastUpper); - break; - } - } - if (bin == null) - bin = (sign > 0) ? ">=" + upper : "<=" + (-upper); - - add(domain, name + bin); - } - - public void addFromString(String feature, double value) { - assert feature.contains(" :: ") : feature; - if (value == 1) add(feature); - else add(feature, value); - } - - public void addDenseFeature(int index, double value) { - denseFeatures[index] += value; - } - - public void add(FeatureVector that) { add(that, AllFeatureMatcher.matcher); } - public void add(double scale, FeatureVector that) { add(scale, that, AllFeatureMatcher.matcher); } - public void add(FeatureVector that, FeatureMatcher matcher) { add(1, that, matcher); } - public void add(double scale, FeatureVector that, FeatureMatcher matcher) { - if (that.indicatorFeatures != null) { - for (String f : that.indicatorFeatures) - if (matcher.matches(f)) { - if (scale == 1) - add(f); - else - add(f, scale); - } - } - if (that.generalFeatures != null) { - for (Pair pair : that.generalFeatures) - if (matcher.matches(pair.getFirst())) - add(pair.getFirst(), scale * pair.getSecond()); - } - // dense features are always added - if (that.denseFeatures != null) { - for (int i = 0; i < denseFeatures.length; ++i) - denseFeatures[i] += scale * that.denseFeatures[i]; - } - } - - // Return the dot product between this feature vector and the weight vector (parameters). - public double dotProduct(Params params) { - double sum = 0; - if (indicatorFeatures != null) { - for (String f : indicatorFeatures) - sum += params.getWeight(f); - } - if (generalFeatures != null) { - for (Pair pair : generalFeatures) - sum += params.getWeight(pair.getFirst()) * pair.getSecond(); - } - if (denseFeatures != null) { - for (int i = 0; i < denseFeatures.length; ++i) - sum += params.getWeight(DENSE_NAME + "_" + i) * denseFeatures[i]; - } - return sum; - } - - // Increment |map| by a factor times this feature vector. - // converts the dense features to a non-dense representation - public void increment(double factor, Map map) { - increment(factor, map, AllFeatureMatcher.matcher); - } - public void increment(double factor, Map map, FeatureMatcher matcher) { - if (indicatorFeatures != null) { - for (String feature : indicatorFeatures) - if (matcher.matches(feature)) - MapUtils.incr(map, feature, factor); - } - if (generalFeatures != null) { - for (Pair pair : generalFeatures) - if (matcher.matches(pair.getFirst())) - MapUtils.incr(map, pair.getFirst(), factor * pair.getSecond()); - } - if (denseFeatures != null) { - for (int i = 0; i < denseFeatures.length; ++i) - MapUtils.incr(map, DENSE_NAME + "_" + i, factor * denseFeatures[i]); - } - } - - // returns a feature vector where all features are prefixed - public FeatureVector addPrefix(String prefix) { - FeatureVector res = new FeatureVector(); - if (indicatorFeatures != null) { - for (String feature : indicatorFeatures) - res.add(prefix + feature); - } - if (generalFeatures != null) { - for (Pair pair : generalFeatures) { - res.add(prefix + pair.getFirst(), pair.getSecond()); - } - } - return res; - } - - @JsonValue - public Map toMap() { - HashMap map = new HashMap(); - increment(1, map); - if (denseFeatures != null) { - for (int i = 0; i < denseFeatures.length; ++i) { - map.put(DENSE_NAME + "_" + i, denseFeatures[i]); - } - } - return map; - } - - @JsonCreator - public static FeatureVector fromMap(Map m) { - // TODO (rf): - // Encoding is lossy. We guess that value of 1 means indicator, but we - // could be wrong. - // TODO(joberant) - takes care of dense features in a non efficient way - int maxDenseFeaturesIndex = -1; - for (Map.Entry entry : m.entrySet()) { - if (isDenseFeature(entry.getKey())) { - int index = denseFeatureIndex(entry.getKey()); - if (index > maxDenseFeaturesIndex) - maxDenseFeaturesIndex = index; - } - } - - FeatureVector fv = maxDenseFeaturesIndex == -1 ? new FeatureVector() : new FeatureVector(maxDenseFeaturesIndex + 1); - for (Map.Entry entry : m.entrySet()) { - if (isDenseFeature(entry.getKey())) { - fv.addDenseFeature(denseFeatureIndex(entry.getKey()), entry.getValue()); - } else { - if (entry.getValue() == 1.0d) - fv.add(entry.getKey()); - else - fv.add(entry.getKey(), entry.getValue()); - } - } - return fv; - } - - private static boolean isDenseFeature(String f) { - return f.startsWith(DENSE_NAME); - } - private static int denseFeatureIndex(String denseFeature) { - assert denseFeature.startsWith(DENSE_NAME); - return Integer.parseInt(denseFeature.split("_")[1]); - } - - public static void logChoices(String prefix, Map choices) { - LogInfo.begin_track("%s choices", prefix); - for (Map.Entry e : choices.entrySet()) { - int value = e.getValue(); - if (value == 0) continue; - LogInfo.logs("%s %s", value > 0 ? "+" + value : value, e.getKey()); - } - LogInfo.end_track(); - } - - public static void logFeatureWeights(String prefix, Map features, Params params) { - List> entries = new ArrayList>(); - double sumValue = 0; - for (Map.Entry entry : features.entrySet()) { - String feature = entry.getKey(); - if (entry.getValue() == 0) continue; - double value = entry.getValue() * params.getWeight(feature); - if (opts.ignoreZeroWeight && value == 0) continue; - sumValue += value; - entries.add(new java.util.AbstractMap.SimpleEntry(feature, value)); - } - Collections.sort(entries, new ValueComparator(false)); - LogInfo.begin_track_printAll("%s features [sum = %s] (format is feature value * weight)", prefix, Fmt.D(sumValue)); - if (entries.size() / 2 > opts.logFeaturesLimit) { - for (Map.Entry entry : entries.subList(0, opts.logFeaturesLimit)) { - String feature = entry.getKey(); - double value = entry.getValue(); - double weight = params.getWeight(feature); - LogInfo.logs("%-50s %6s = %s * %s", "[ " + feature + " ]", Fmt.D(value), Fmt.D(MapUtils.getDouble(features, feature, 0)), Fmt.D(weight)); - } - LogInfo.logs("... (%d more features) ...", entries.size() - 2 * opts.logFeaturesLimit); - for (Map.Entry entry : entries.subList(entries.size() - opts.logFeaturesLimit, entries.size())) { - String feature = entry.getKey(); - double value = entry.getValue(); - double weight = params.getWeight(feature); - LogInfo.logs("%-50s %6s = %s * %s", "[ " + feature + " ]", Fmt.D(value), Fmt.D(MapUtils.getDouble(features, feature, 0)), Fmt.D(weight)); - } - } else { - for (Map.Entry entry : entries) { - String feature = entry.getKey(); - double value = entry.getValue(); - double weight = params.getWeight(feature); - LogInfo.logs("%-50s %6s = %s * %s", "[ " + feature + " ]", Fmt.D(value), Fmt.D(MapUtils.getDouble(features, feature, 0)), Fmt.D(weight)); - } - } - LogInfo.end_track(); - } - - public static void logFeatures(Map features) { - for (String key : features.keySet()) { - LogInfo.logs("%s\t%s", key, features.get(key)); - } - } - - public void clear() { - if (indicatorFeatures != null) - indicatorFeatures.clear(); - if (generalFeatures != null) - generalFeatures.clear(); - denseFeatures = null; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FilterNerSpanFn.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FilterNerSpanFn.java deleted file mode 100644 index 4da87a68e5..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FilterNerSpanFn.java +++ /dev/null @@ -1,62 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.LispTree; - -import java.util.ArrayList; -import java.util.List; - -/** - * Given a phrase at a particular position, keep it if its NER tags all match - * and are from a select set. - * - * @author Andrew Chou - */ -public class FilterNerSpanFn extends SemanticFn { - // Accepted NER tags (PERSON, LOCATION, ORGANIZATION, etc) - List acceptableNerTags = new ArrayList<>(); - - public void init(LispTree tree) { - super.init(tree); - for (int j = 1; j < tree.children.size(); j++) - acceptableNerTags.add(tree.child(j).value); - } - - public DerivationStream call(final Example ex, final Callable c) { - return new SingleDerivationStream() { - @Override - public Derivation createDerivation() { - if (!isValid(ex, c)) - return null; - else { - return new Derivation.Builder() - .withCallable(c) - .withFormulaFrom(c.child(0)) - .createDerivation(); - } - } - }; - } - - private boolean isValid(Example ex, Callable c) { - String nerTag = ex.languageInfo.nerTags.get(c.getStart()); - - // Check that it's an acceptable tag - if (!acceptableNerTags.contains(nerTag)) - return false; - - // Check to make sure that all the tags are the same - for (int j = c.getStart() + 1; j < c.getEnd(); j++) - if (!nerTag.equals(ex.languageInfo.nerTags.get(j))) - return false; - - // Make sure that the whole NE is matched - if (c.getStart() > 0 && nerTag.equals(ex.languageInfo.nerTags.get(c.getStart() - 1))) - return false; - - if (c.getEnd() < ex.languageInfo.nerTags.size() && - nerTag.equals(ex.languageInfo.nerTags.get(c.getEnd()))) - return false; - assert (c.getChildren().size() == 1) : c.getChildren(); - return true; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FilterPosTagFn.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FilterPosTagFn.java deleted file mode 100644 index 60de146643..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FilterPosTagFn.java +++ /dev/null @@ -1,98 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.LispTree; - -import java.util.ArrayList; -import java.util.List; - -/** - * Given a token at a particular position, keep it if its POS tag is from a - * select set. - * - * @author Andrew Chou - */ -public class FilterPosTagFn extends SemanticFn { - // Accepted POS tags (e.g., NNP, NNS, etc.) - List posTags = new ArrayList<>(); - String mode; - boolean reverse; - - public void init(LispTree tree) { - super.init(tree); - mode = tree.child(1).value; - if (!mode.equals("span") && !mode.equals("token")) - throw new RuntimeException("Illegal description for whether to filter by token or span: " + tree.child(1).value); - - for (int j = 2; j < tree.children.size(); j++) { - // Optionally, we can use a reverse filter (only reject certain tags) - if (j == 2 && tree.child(2).value.equals("reverse")) { - reverse = true; - continue; - } - posTags.add(tree.child(j).value); - } - } - - public DerivationStream call(final Example ex, final Callable c) { - return new SingleDerivationStream() { - @Override - public Derivation createDerivation() { - if (mode.equals("span")) - return callSpan(ex, c); - else - return callToken(ex, c); - } - }; - } - - private Derivation callToken(Example ex, Callable c) { - // Only apply to single tokens - String posTag = ex.posTag(c.getStart()); - if (c.getEnd() - c.getStart() != 1 || - (!posTags.contains(posTag) ^ reverse)) - return null; - else { - return new Derivation.Builder() - .withCallable(c) - .withFormulaFrom(c.child(0)) - .createDerivation(); - } - } - - private Derivation callSpan(Example ex, Callable c) { - if (isValidSpan(ex, c)) { - return new Derivation.Builder() - .withCallable(c) - .withFormulaFrom(c.child(0)) - .createDerivation(); - } else { - return null; - } - } - - private boolean isValidSpan(Example ex, Callable c) { - if (reverse) { - for (int j = c.getStart(); j < c.getEnd(); j++) { - if (posTags.contains(ex.posTag(j))) - return false; - } - return true; - } - String posTag = ex.posTag(c.getStart()); - // Check that it's an acceptable tag - if (!posTags.contains(posTag)) - return false; - // Check to make sure that all the tags are the same - for (int j = c.getStart() + 1; j < c.getEnd(); j++) { - if (!posTag.equals(ex.posTag(j))) - return false; - } - // Make sure that the whole POS sequence is matched - if (c.getStart() > 0 && posTag.equals(ex.posTag(c.getStart() - 1))) - return false; - if (c.getEnd() < ex.numTokens() && posTag.equals(ex.posTag(c.getEnd()))) - return false; - assert (c.getChildren().size() == 1) : c.getChildren(); - return true; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FilterSpanLengthFn.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FilterSpanLengthFn.java deleted file mode 100644 index d88d94dd59..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FilterSpanLengthFn.java +++ /dev/null @@ -1,43 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.LispTree; - -public class FilterSpanLengthFn extends SemanticFn { - - private int minLength; - private int maxLength; - - private static final int NO_MAXIMUM = -1; - - public FilterSpanLengthFn() { } - public FilterSpanLengthFn(int minLength) { - init(LispTree.proto.newList("FilterSpanLengthFn", "" + minLength)); - } - - public void init(LispTree tree) { - super.init(tree); - minLength = Integer.parseInt(tree.child(1).value); - if (tree.children.size() > 2) { - maxLength = Integer.parseInt(tree.child(2).value); - } else { - maxLength = NO_MAXIMUM; - } - } - - @Override - public DerivationStream call(Example ex, final Callable c) { - return new SingleDerivationStream() { - @Override - public Derivation createDerivation() { - if (c.getEnd() - c.getStart() < minLength) - return null; - if (maxLength != NO_MAXIMUM && c.getEnd() - c.getStart() > maxLength) - return null; - return new Derivation.Builder() - .withCallable(c) - .withFormulaFrom(c.child(0)) - .createDerivation(); - } - }; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FilterTokenFn.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FilterTokenFn.java deleted file mode 100644 index 61b8847fee..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FilterTokenFn.java +++ /dev/null @@ -1,52 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.LispTree; - -import java.util.ArrayList; -import java.util.List; - -/** - * Given a token at a particular position, keep it is from a select set. - * - * @author ppasupat - */ -public class FilterTokenFn extends SemanticFn { - List acceptableTokens = new ArrayList<>(); - String mode; - - public void init(LispTree tree) { - super.init(tree); - mode = tree.child(1).value; - if (!mode.equals("token") && !mode.equals("lemma")) - throw new RuntimeException("Illegal description for FilterTokenFn: " + mode); - for (int j = 2; j < tree.children.size(); j++) { - acceptableTokens.add(tree.child(j).value); - } - } - - public DerivationStream call(final Example ex, final Callable c) { - return new SingleDerivationStream() { - @Override - public Derivation createDerivation() { - if (!isValid(ex, c)) - return null; - else { - return new Derivation.Builder() - .withCallable(c) - .withFormulaFrom(c.child(0)) - .createDerivation(); - } - } - }; - } - - private boolean isValid(Example ex, Callable c) { - if (c.getEnd() - c.getStart() != 1) return false; - String token; - if ("token".equals(mode)) - token = ex.token(c.getStart()); - else - token = ex.lemmaToken(c.getStart()); - return acceptableTokens.contains(token); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FloatingFeatureComputer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FloatingFeatureComputer.java deleted file mode 100644 index da0f16cf94..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FloatingFeatureComputer.java +++ /dev/null @@ -1,65 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.util.*; - -import fig.basic.IntPair; - -/** - * Extract features specific to the floating parser. - * - * @author ppasupat - */ -public class FloatingFeatureComputer implements FeatureComputer { - - @Override - public void extractLocal(Example ex, Derivation deriv) { - extractFloatingRuleFeatures(ex, deriv); - extractFloatingSkipFeatures(ex, deriv); - } - - // Conjunction of the rule and each lemma in the sentence - void extractFloatingRuleFeatures(Example ex, Derivation deriv) { - if (!FeatureExtractor.containsDomain("floatRule")) return; - for (String lemma : ex.getLemmaTokens()) - deriv.addFeature("floatRule", "lemma=" + lemma + ",rule=" + deriv.rule.toString()); - } - - // Look for words with no anchored rule applied - void extractFloatingSkipFeatures(Example ex, Derivation deriv) { - if (!FeatureExtractor.containsDomain("floatSkip")) return; - if (!deriv.isRoot(ex.numTokens())) return; - // Get all anchored tokens - boolean[] anchored = new boolean[ex.numTokens()]; - List stack = new ArrayList<>(); - stack.add(deriv); - while (!stack.isEmpty()) { - Derivation currentDeriv = stack.remove(stack.size() - 1); - if (deriv.start != -1) { - for (int i = deriv.start; i < deriv.end; i++) - anchored[i] = true; - } else { - for (Derivation child : currentDeriv.children) { - stack.add(child); - } - } - } - // Fire features based on tokens that are (not) anchored - // See if named entities are skipped - for (IntPair pair : ex.languageInfo.getNamedEntitySpans()) { - for (int i = pair.first; i < pair.second; i++) { - if (!anchored[i]) { - String nerTag = ex.languageInfo.nerTags.get(i); - deriv.addFeature("floatSkip", "skipped-ner=" + nerTag); - break; - } - } - } - // See which POS tags are skipped - for (int i = 0; i < anchored.length; i++) { - if (!anchored[i]) { - deriv.addFeature("floatSkip", "skipped-pos=" + ex.posTag(i)); - } - } - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FloatingParser.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FloatingParser.java deleted file mode 100644 index 6f7e5eb811..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FloatingParser.java +++ /dev/null @@ -1,642 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.*; -import fig.exec.Execution; - -import java.io.PrintWriter; -import java.util.*; - -import static fig.basic.LogInfo.logs; - -/** - * A FloatingParser builds Derivations according to a Grammar without having to - * generate the input utterance. In contrast, a conventional chart parser (e.g., - * BeamParser) constructs parses for each span of the utterance. This is very - * inefficient when we're performing a more extractive semantic parsing task - * where many of the words are unaccounted for. - * - * Assume the Grammar is binarized and only has rules of the following form: - * - * $Cat => token - * $Cat => $Cat - * $Cat => token token - * $Cat => token $Cat - * $Cat => $Cat token - * $Cat => $Cat $Cat - * - * Each rule can be either anchored or floating (or technically, both). - * For floating rules, tokens on the RHS are ignored. - * - * Chart cells are either: - * - anchored: (cat, start, end) [these are effectively at depth 0] - * - floating: (cat, depth or size) [depends on anchored cells as base cases] - * - * With rules: - * cat => cat1 cat2 [binary] - * cat => cat1 [unary] - * - * Anchored Combinations: - * (cat1, start, end) => (cat, start, end) - * (cat1, start, mid), (cat2, mid, end) => (cat, start, end) - * (cat, start, end) => (cat, 0) [anchored => floating] - * - * Floating Combinations: - * [nothing] => (cat, 1) [from $Cat => token] - * (cat1, depth) => (cat, depth + 1) - * (cat1, depth1), (cat2, depth2) => (cat, max(depth1, depth2) + 1) - * - * If --useSizeInsteadOfDepth is turned on, the floating combinations become: - * [nothing] => (cat, 1) [from $Cat => token] - * (cat1, size) => (cat, size + 1) - * (cat1, size1), (cat2, size2) => (cat, size1 + size2 + 1) - * - * @author Percy Liang - */ -public class FloatingParser extends Parser { - public static class Options { - // Floating rules - @Option(gloss = "Whether rules without the (anchored 1) or (floating 1) tag should be anchored or floating") - public boolean defaultIsFloating = true; - @Option(gloss = "Limit on formula depth (or formula size when --useSizeInsteadOfDepth is true)") - public int maxDepth = 10; - @Option(gloss = "Put a limit on formula size instead of formula depth") - public boolean useSizeInsteadOfDepth = false; - @Option(gloss = "Whether floating rules are allowed to be applied consecutively") - public boolean consecutiveRules = true; - @Option(gloss = "Whether floating rule (rule $A (a)) should have depth 0 or 1") - public boolean initialFloatingHasZeroDepth = false; - @Option(gloss = "Filter child derivations using the type information from SemanticFn") - public boolean filterChildDerivations = true; - // Anchored rules - @Option(gloss = "Whether anchored spans/tokens can only be used once in a derivation") - public boolean useAnchorsOnce = false; - @Option(gloss = "Each span can be anchored this number of times (unused if useAnchorsOnce is active)") - public int useMaxAnchors = -1; - // Other options - @Option(gloss = "Whether to always execute the derivation") - public boolean executeAllDerivations = false; - @Option(gloss = "Whether to output a file with all utterances predicted") - public boolean printPredictedUtterances = false; - @Option(gloss = "Custom beam size at training time (default = Parser.beamSize)") - public int trainBeamSize = -1; - @Option(gloss = "Whether to beta reduce the formula") - public boolean betaReduce = false; - @Option(gloss = "DEBUG: Print amount of time spent on each rule") - public boolean summarizeRuleTime = false; - @Option(gloss = "Stop the parser if it has used more than this amount of time (in seconds)") - public int maxFloatingParsingTime = Integer.MAX_VALUE; - } - - public static Options opts = new Options(); - - public boolean earlyStopOnConsistent = false; - public int earlyStopOnNumDerivs = -1; - - public FloatingParser(Spec spec) { - super(spec); - } - - /** - * Set early stopping criteria - * - * @param onConsistent - * Stop when a consistent derivation is found. (Only triggered when computeExpectedCounts = true) - * @param onNumDerivs - * Stop when the number of featurized derivations exceed this number (set to -1 to disable) - * @return - * this - */ - public FloatingParser setEarlyStopping(boolean onConsistent, int onNumDerivs) { - this.earlyStopOnConsistent = onConsistent; - this.earlyStopOnNumDerivs = onNumDerivs; - return this; - } - - /** - * computeCatUnaryRules, but do not topologically sort floating rules - */ - @Override - protected void computeCatUnaryRules() { - // Handle anchored catUnaryRules - catUnaryRules = new ArrayList<>(); - Map> graph = new HashMap<>(); // Node from LHS to list of rules - for (Rule rule : grammar.rules) - if (rule.isCatUnary() && rule.isAnchored()) - MapUtils.addToList(graph, rule.lhs, rule); - - // Topologically sort catUnaryRules so that B->C occurs before A->B - Map done = new HashMap<>(); - for (String node : graph.keySet()) - traverse(catUnaryRules, node, graph, done); - - // Add floating catUnaryRules - for (Rule rule : grammar.rules) - if (rule.isCatUnary() && rule.isFloating()) - catUnaryRules.add(rule); - } - - // Helper function for transitive closure of floating rules. - protected void traverseFloatingRules(List orderedFloatingRules, - String node, Map> graph, Map done) { - Boolean d = done.get(node); - if (Boolean.TRUE.equals(d)) return; - if (Boolean.FALSE.equals(d)) - throw new RuntimeException("Found cycle of floating rules involving " + node); - done.put(node, false); - for (Rule rule : MapUtils.getList(graph, node)) { - for (String rhsCat : rule.rhs) { - if (Grammar.isIntermediate(rhsCat)) - traverseFloatingRules(orderedFloatingRules, rhsCat, graph, done); - } - orderedFloatingRules.add(rule); - } - done.put(node, true); - } - - public ParserState newParserState(Params params, Example ex, boolean computeExpectedCounts) { - return new FloatingParserState(this, params, ex, computeExpectedCounts); - } -} - -/** - * Stores FloatingParser information about parsing a particular example. The actual - * parsing code lives here. - * - * Currently, many of the fields in ParserState are not used (chart). - * Those should be refactored out. - * - * @author Percy Liang - */ -class FloatingParserState extends ParserState { - - // cell => list of derivations - // Anchored cells: cat[start,end] - // Floating cells: cat:depth - private final Map> chart = new HashMap<>(); - - private final DerivationPruner pruner; - private final CatSizeBound catSizeBound; - private Map ruleTime; - private boolean timeout = false; - - public FloatingParserState(FloatingParser parser, Params params, Example ex, boolean computeExpectedCounts) { - super(parser, params, ex, computeExpectedCounts); - pruner = new DerivationPruner(this); - catSizeBound = new CatSizeBound(FloatingParser.opts.maxDepth, parser.grammar); - } - - @Override - protected int getBeamSize() { - if (computeExpectedCounts && FloatingParser.opts.trainBeamSize > 0) - return FloatingParser.opts.trainBeamSize; - return Parser.opts.beamSize; - } - - // Construct state names. - private Object floatingCell(String cat, int depth) { - return (cat + ":" + depth).intern(); - } - private Object anchoredCell(String cat, int start, int end) { - return (cat + "[" + start + "," + end + "]").intern(); - } - private Object cell(String cat, int start, int end, int depth) { - return (start != -1) ? anchoredCell(cat, start, end) : floatingCell(cat, depth); - } - - private void addToChart(Object cell, Derivation deriv) { - if (!deriv.isFeaturizedAndScored()) // A derivation could be belong in multiple cells. - featurizeAndScoreDerivation(deriv); - if (Parser.opts.pruneErrorValues && deriv.value instanceof ErrorValue) return; - if (Parser.opts.verbose >= 4) - LogInfo.logs("addToChart %s: %s", cell, deriv); - MapUtils.addToList(chart, cell, deriv); - } - - private boolean isRootRule(Rule rule) { - return Rule.rootCat.equals(rule.lhs); - } - - private boolean applyRule(Rule rule, int start, int end, int depth, - Derivation child1, Derivation child2, String canonicalUtterance) { - if (timeout && !isRootRule(rule)) return false; - applyRuleActual(rule, start, end, depth, child1, child2, canonicalUtterance); - return true; - } - - private void applyRuleActual(Rule rule, int start, int end, int depth, Derivation child1, Derivation child2, String canonicalUtterance) { - if (Parser.opts.verbose >= 5) logs("applyRule %s [%s:%s] depth=%s, %s %s", rule, start, end, depth, child1, child2); - List children; - if (child1 == null) // 0-ary - children = Collections.emptyList(); - else if (child2 == null) // 1-ary - children = Collections.singletonList(child1); - else { - // Optional: ensure that each anchor is only used once per derivation. - if (FloatingParser.opts.useAnchorsOnce) { - if (FloatingRuleUtils.derivationAnchorsOverlap(child1, child2)) - return; - } else if (FloatingParser.opts.useMaxAnchors >= 0) { - if (FloatingRuleUtils.maxNumAnchorOverlaps(child1, child2) - > FloatingParser.opts.useMaxAnchors) - return; - } - children = ListUtils.newList(child1, child2); - } - - // optionally: ensure that rule being applied is not the same as one of the children's - if (!FloatingParser.opts.consecutiveRules) { - for (Derivation child : children) { - if (child.rule.equals(rule)) return; - } - } - - DerivationStream results = rule.sem.call(ex, - new SemanticFn.CallInfo(rule.lhs, start, end, rule, children)); - while (results.hasNext()) { - Derivation newDeriv = results.next(); - if (FloatingParser.opts.betaReduce) newDeriv = newDeriv.betaReduction(); - newDeriv.canonicalUtterance = canonicalUtterance; - - // make sure we execute - if (FloatingParser.opts.executeAllDerivations && !(newDeriv.type instanceof FuncSemType)) - newDeriv.ensureExecuted(parser.executor, ex.context); - - if (pruner.isPruned(newDeriv)) continue; - // Avoid repetitive floating cells - addToChart(cell(rule.lhs, start, end, depth), newDeriv); - if (depth == -1) // In addition, anchored cells become floating at level 0 - addToChart(floatingCell(rule.lhs, 0), newDeriv); - } - } - - private boolean applyAnchoredRule(Rule rule, int start, int end, Derivation child1, Derivation child2, String canonicalUtterance) { - return applyRule(rule, start, end, -1, child1, child2, canonicalUtterance); - } - - private boolean applyFloatingRule(Rule rule, int depth, Derivation child1, Derivation child2, String canonicalUtterance) { - return applyRule(rule, -1, -1, depth, child1, child2, canonicalUtterance); - } - - /** - * Return a collection of Derivation. - */ - private List getDerivations(Object cell) { - List derivations = chart.get(cell); - // logs("getDerivations %s => %s", cell, derivations); - if (derivations == null) return Derivation.emptyList; - return derivations; - } - - /** - * Return a collection of DerivationGroup. - * - * The rule should be applied on all derivations (or all pairs of derivations) in each DerivationGroup. - */ - private Collection getFilteredDerivations(Rule rule, Object cell1, Object cell2) { - List derivations1 = getDerivations(cell1), - derivations2 = (cell2 == null) ? null : getDerivations(cell2); - if (!FloatingParser.opts.filterChildDerivations) - return Collections.singleton(new ChildDerivationsGroup(derivations1, derivations2)); - // Try to filter down the number of partial logical forms - if (rule.getSem().supportFilteringOnTypeData()) - return rule.getSem().getFilteredDerivations(derivations1, derivations2); - return Collections.singleton(new ChildDerivationsGroup(derivations1, derivations2)); - } - - private Collection getFilteredDerivations(Rule rule, Object cell) { - return getFilteredDerivations(rule, cell, null); - } - - // Build derivations over span |start|, |end|. - private void buildAnchored(int start, int end) { - // Apply unary tokens on spans (rule $A (a)) - for (Rule rule : parser.grammar.rules) { - if (!rule.isAnchored()) continue; - if (rule.rhs.size() != 1 || rule.isCatUnary()) continue; - boolean match = (end - start == 1) && ex.token(start).equals(rule.rhs.get(0)); - if (!match) continue; - StopWatch stopWatch = new StopWatch().start(); - applyAnchoredRule(rule, start, end, null, null, rule.rhs.get(0)); - ruleTime.put(rule, ruleTime.getOrDefault(rule, 0L) + stopWatch.stop().ms); - } - - // Apply binaries on spans (rule $A ($B $C)), ... - for (int mid = start + 1; mid < end; mid++) { - for (Rule rule : parser.grammar.rules) { - if (!rule.isAnchored()) continue; - if (rule.rhs.size() != 2) continue; - - StopWatch stopWatch = new StopWatch().start(); - String rhs1 = rule.rhs.get(0); - String rhs2 = rule.rhs.get(1); - boolean match1 = (mid - start == 1) && ex.token(start).equals(rhs1); - boolean match2 = (end - mid == 1) && ex.token(mid).equals(rhs2); - - if (!Rule.isCat(rhs1) && Rule.isCat(rhs2)) { // token $Cat - if (match1) { - List derivations = getDerivations(anchoredCell(rhs2, mid, end)); - for (Derivation deriv : derivations) - applyAnchoredRule(rule, start, end, deriv, null, rhs1 + " " + deriv.canonicalUtterance); - } - } else if (Rule.isCat(rhs1) && !Rule.isCat(rhs2)) { // $Cat token - if (match2) { - List derivations = getDerivations(anchoredCell(rhs1, start, mid)); - for (Derivation deriv : derivations) - applyAnchoredRule(rule, start, end, deriv, null, deriv.canonicalUtterance + " " + rhs2); - } - } else if (!Rule.isCat(rhs1) && !Rule.isCat(rhs2)) { // token token - if (match1 && match2) - applyAnchoredRule(rule, start, end, null, null, rhs1 + " " + rhs2); - } else { // $Cat $Cat - List derivations1 = getDerivations(anchoredCell(rhs1, start, mid)); - List derivations2 = getDerivations(anchoredCell(rhs2, mid, end)); - for (Derivation deriv1 : derivations1) - for (Derivation deriv2 : derivations2) - applyAnchoredRule(rule, start, end, deriv1, deriv2, deriv1.canonicalUtterance + " " + deriv2.canonicalUtterance); - } - ruleTime.put(rule, ruleTime.getOrDefault(rule, 0L) + stopWatch.stop().ms); - } - } - - // Apply unary categories on spans (rule $A ($B)) - // Important: do this in topologically sorted order and after all the binaries are done. - for (Rule rule : parser.catUnaryRules) { - if (!rule.isAnchored()) continue; - StopWatch stopWatch = new StopWatch().start(); - List derivations = getDerivations(anchoredCell(rule.rhs.get(0), start, end)); - for (Derivation deriv : derivations) - applyAnchoredRule(rule, start, end, deriv, null, deriv.canonicalUtterance); - ruleTime.put(rule, ruleTime.getOrDefault(rule, 0L) + stopWatch.stop().ms); - } - } - - // Build floating derivations of exactly depth |depth|. - private void buildFloating(int depth) { - // Build a floating predicate from thin air - // (rule $A (a)); note that "a" is ignored - if (depth == (FloatingParser.opts.initialFloatingHasZeroDepth ? 0 : 1)) { - for (Rule rule : parser.grammar.rules) { - if (timeout && !isRootRule(rule)) continue; - if (!rule.isFloating()) continue; - if (rule.rhs.size() != 1 || rule.isCatUnary()) continue; - StopWatch stopWatch = new StopWatch().start(); - applyFloatingRule(rule, depth, null, null, rule.rhs.get(0)); - ruleTime.put(rule, ruleTime.getOrDefault(rule, 0L) + stopWatch.stop().ms); - } - } - - // Apply binaries on spans (rule $A ($B $C)), ... - for (Rule rule : parser.grammar.rules) { - if (timeout && !isRootRule(rule)) continue; - if (!rule.isFloating()) continue; - if (rule.rhs.size() != 2) continue; - if (catSizeBound.getBound(rule.lhs) < depth) continue; - - StopWatch stopWatch = new StopWatch().start(); - String rhs1 = rule.rhs.get(0); - String rhs2 = rule.rhs.get(1); - - if (!Rule.isCat(rhs1) && !Rule.isCat(rhs2)) { // token token - if (depth == (FloatingParser.opts.initialFloatingHasZeroDepth ? 0 : 1)) { - applyFloatingRule(rule, depth, null, null, rhs1 + " " + rhs2); - } - - } else if (!Rule.isCat(rhs1) && Rule.isCat(rhs2)) { // token $Cat - List derivations = getDerivations(floatingCell(rhs2, depth - 1)); - for (Derivation deriv : derivations) - applyFloatingRule(rule, depth, deriv, null, rhs1 + " " + deriv.canonicalUtterance); - - } else if (Rule.isCat(rhs1) && !Rule.isCat(rhs2)) { // $Cat token - List derivations = getDerivations(floatingCell(rhs1, depth - 1)); - for (Derivation deriv : derivations) - applyFloatingRule(rule, depth, deriv, null, deriv.canonicalUtterance + " " + rhs2); - - } else { // $Cat $Cat - if (FloatingParser.opts.useSizeInsteadOfDepth) { - derivLoop: - for (int depth1 = 0; depth1 < depth; depth1++) { // sizes must add up to depth-1 (actually size-1) - int depth2 = depth - 1 - depth1; - for (ChildDerivationsGroup group : getFilteredDerivations(rule, floatingCell(rhs1, depth1), floatingCell(rhs2, depth2))) - for (Derivation deriv1 : group.derivations1) - for (Derivation deriv2 : group.derivations2) - if (!applyFloatingRule(rule, depth, deriv1, deriv2, deriv1.canonicalUtterance + " " + deriv2.canonicalUtterance)) - break derivLoop; - } - } else { - { - derivLoop: - for (int subDepth = 0; subDepth < depth; subDepth++) { // depth-1 <=depth-1 - for (ChildDerivationsGroup group : getFilteredDerivations(rule, floatingCell(rhs1, depth - 1), floatingCell(rhs2, subDepth))) - for (Derivation deriv1 : group.derivations1) - for (Derivation deriv2 : group.derivations2) - if (!applyFloatingRule(rule, depth, deriv1, deriv2, deriv1.canonicalUtterance + " " + deriv2.canonicalUtterance)) - break derivLoop; - } - } - { - derivLoop: - for (int subDepth = 0; subDepth < depth - 1; subDepth++) { // derivations) { - List myDerivations = chart.get(cell); - if (myDerivations != null) - derivations.addAll(myDerivations); - } - - /** - * Build derivations in a thread to allow timeout. - */ - class DerivationBuilder implements Runnable { - @Override public void run() { - // Base case ($TOKEN, $PHRASE) - for (Derivation deriv : gatherTokenAndPhraseDerivations()) { - addToChart(anchoredCell(deriv.cat, deriv.start, deriv.end), deriv); - addToChart(floatingCell(deriv.cat, 0), deriv); - } - - Set categories = new HashSet<>(); - for (Rule rule : parser.grammar.rules) - categories.add(rule.lhs); - - if (Parser.opts.verbose >= 1) - LogInfo.begin_track_printAll("Anchored"); - // Build up anchored derivations (like the BeamParser) - int numTokens = ex.numTokens(); - for (int len = 1; len <= numTokens; len++) { - for (int i = 0; i + len <= numTokens; i++) { - buildAnchored(i, i + len); - for (String cat : categories) { - String cell = anchoredCell(cat, i, i + len).toString(); - pruneCell(cell, chart.get(cell)); - } - } - } - if (Parser.opts.verbose >= 1) - LogInfo.end_track(); - - // Build up floating derivations - for (int depth = (FloatingParser.opts.initialFloatingHasZeroDepth ? 0 : 1); depth <= FloatingParser.opts.maxDepth; depth++) { - if (Parser.opts.verbose >= 1) - LogInfo.begin_track_printAll("%s = %d", FloatingParser.opts.useSizeInsteadOfDepth ? "SIZE" : "DEPTH", depth); - buildFloating(depth); - for (String cat : categories) { - String cell = floatingCell(cat, depth).toString(); - pruneCell(cell, chart.get(cell)); - } - if (Parser.opts.verbose >= 1) - LogInfo.end_track(); - // Early stopping - if (computeExpectedCounts && ((FloatingParser) parser).earlyStopOnConsistent) { - // Consistent derivation found? - String cell = floatingCell(Rule.rootCat, depth).toString(); - List rootDerivs = chart.get(cell); - if (rootDerivs != null) { - for (Derivation rootDeriv : rootDerivs) { - rootDeriv.ensureExecuted(parser.executor, ex.context); - if (parser.valueEvaluator.getCompatibility(ex.targetValue, rootDeriv.value) == 1) { - LogInfo.logs("Early stopped: consistent derivation found at depth = %d", depth); - return; - } - } - } - } - if (((FloatingParser) parser).earlyStopOnNumDerivs > 0) { - // Too many derivations generated? - if (numOfFeaturizedDerivs > ((FloatingParser) parser).earlyStopOnNumDerivs) { - LogInfo.logs("Early stopped: number of derivations exceeded at depth = %d", depth); - return; - } - } - } - } - } - - public void buildDerivations() { - DerivationBuilder derivBuilder = new DerivationBuilder(); - if (FloatingParser.opts.maxFloatingParsingTime == Integer.MAX_VALUE) { - derivBuilder.run(); - } else { - Thread parsingThread = new Thread(derivBuilder); - parsingThread.start(); - try { - parsingThread.join(FloatingParser.opts.maxFloatingParsingTime * 1000); - if (parsingThread.isAlive()) { - // This will only interrupt first or second passes, not the final candidate collection. - LogInfo.warnings("Parsing time exceeded %d seconds. Will now interrupt ...", FloatingParser.opts.maxFloatingParsingTime); - timeout = true; - parsingThread.interrupt(); - parsingThread.join(); - } - } catch (InterruptedException e) { - e.printStackTrace(); - LogInfo.fails("FloatingParser error: %s", e); - } - } - evaluation.add("timeout", timeout); - } - - // ============================================================ - // Main entry point - // ============================================================ - - @Override public void infer() { - LogInfo.begin_track_printAll("FloatingParser.infer()"); - ruleTime = new HashMap<>(); - - buildDerivations(); - - if (FloatingParser.opts.summarizeRuleTime) summarizeRuleTime(); - - // Collect final predicted derivations - addToDerivations(anchoredCell(Rule.rootCat, 0, numTokens), predDerivations); - for (int depth = 0; depth <= FloatingParser.opts.maxDepth; depth++) - addToDerivations(floatingCell(Rule.rootCat, depth), predDerivations); - - // Compute gradient with respect to the predicted derivations - ensureExecuted(); - if (computeExpectedCounts) { - expectedCounts = new HashMap<>(); - ParserState.computeExpectedCounts(predDerivations, expectedCounts); - } - - // Example summary - if (Parser.opts.verbose >= 2) { - LogInfo.begin_track_printAll("Summary of Example %s", ex.getUtterance()); - for (Derivation deriv : predDerivations) - LogInfo.logs("Generated: canonicalUtterance=%s, value=%s", deriv.canonicalUtterance, deriv.value); - LogInfo.end_track(); - } - - if (FloatingParser.opts.printPredictedUtterances) { - PrintWriter writer = IOUtils.openOutAppendEasy(Execution.getFile("canonical_utterances")); - PrintWriter fWriter = IOUtils.openOutAppendEasy(Execution.getFile("utterances_formula.tsv")); - Derivation.sortByScore(predDerivations); - for (Derivation deriv: predDerivations) { - if (deriv.score > -10) { - writer.println(String.format("%s\t%s", deriv.canonicalUtterance, deriv.score)); - fWriter.println(String.format("%s\t%s", deriv.canonicalUtterance, deriv.formula.toString())); - } - } - writer.close(); - fWriter.close(); - } - - LogInfo.end_track(); - } - - @Override - protected void setEvaluation() { - super.setEvaluation(); - evaluation.add("numCells", chart.size()); - } - - @SuppressWarnings("unused") - private void visualizeAnchoredChart(Set categories) { - for (String cat : categories) { - for (int len = 1; len <= numTokens; ++len) { - for (int i = 0; i + len <= numTokens; ++i) { - List derivations = getDerivations(anchoredCell(cat, i, i + len)); - for (Derivation deriv : derivations) { - LogInfo.logs("ParserState.visualize: %s(%s:%s): %s", cat, i, i + len, deriv); - } - } - } - } - } - - private void summarizeRuleTime() { - List> entries = new ArrayList<>(ruleTime.entrySet()); - entries.sort(new ValueComparator<>(true)); - LogInfo.begin_track_printAll("Rule time"); - for (Map.Entry entry : entries) { - LogInfo.logs("%9d : %s", entry.getValue(), entry.getKey()); - } - LogInfo.end_track(); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FloatingRuleUtils.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FloatingRuleUtils.java deleted file mode 100644 index 06d754fa17..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FloatingRuleUtils.java +++ /dev/null @@ -1,50 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.util.*; - -/** - * Utilities for floating rules. - */ -public final class FloatingRuleUtils { - private FloatingRuleUtils() { } // Should not be called. - - /** - * Get the anchored sub-derivations (sub-trees) of a derivation. - * I.e., gets all sub-derivations that are associated with a span of the utterance. - */ - public static List getDerivationAnchors(Derivation deriv) { - List anchors = new ArrayList<>(); - if (deriv.rule.isAnchored()) { - // if the sub-derivation is anchored to a span just add it - anchors.add(deriv); - } else if (!(deriv.children == null || deriv.children.size() == 0)) { - // if the derivation is not anchored but has children, recurse into children - for (Derivation child : deriv.children) - anchors.addAll(getDerivationAnchors(child)); - } - return anchors; - } - - /** - * Helper function to ensure that anchored spans are only used once in a final derivation. - * for example if A spans (or has a child that spans) [0, 3] and B spans (or has a child - * that spans) [2, 4] then we have an overlap. - */ - public static boolean derivationAnchorsOverlap(Derivation a, Derivation b) { - boolean[] aAnchors = a.getAnchoredTokens(), bAnchors = b.getAnchoredTokens(); - for (int i = 0; i < aAnchors.length && i < bAnchors.length; i++) - if (aAnchors[i] && bAnchors[i]) return true; - return false; - } - - public static int maxNumAnchorOverlaps(Derivation a, Derivation b) { - int[] aAnchors = a.getNumAnchors(), bAnchors = b.getNumAnchors(); - int n = Math.max(aAnchors.length, bAnchors.length); - int maxAnchors = 0; - for (int i = 0; i < n; i++) - maxAnchors = Math.max(maxAnchors, - (i < aAnchors.length ? aAnchors[i] : 0) + - (i < bAnchors.length ? bAnchors[i] : 0)); - return maxAnchors; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Formula.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Formula.java deleted file mode 100644 index 76070b83b2..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Formula.java +++ /dev/null @@ -1,61 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonValue; -import com.google.common.base.Function; -import fig.basic.LispTree; - -import java.util.List; - -/** - * A Formula is a logical form, which is the result of semantic parsing. Current - * implementation is lambda calculus with primitives like description logic and - * DCS to lessen the use of variables. - *

- * Important note: define hashCode() for each Formula which only depends on the - * value, not on random bits (don't include object IDs or enums). - * - * @author Percy Liang - */ -public abstract class Formula { - // cache the hashcode - private int hashCode = -1; - // Serialize as LispTree. - public abstract LispTree toLispTree(); - - // Recursively perform some operation on each formula. - // Apply to formulas. If |func| returns false, then recurse on children. - public abstract void forEach(Function func); - - // Recursively perform some operation on each formula. - // Apply to formulas. If |func| returns null, then recurse on children. - public abstract Formula map(Function func); - - // Recursively perform some operation on each formula. - // Apply to formulas. If |func| returns an empty set or |alwaysRecurse|, then recurse on children. - public abstract List mapToList(Function> func, boolean alwaysRecurse); - - @JsonValue - public String toString() { return toLispTree().toString(); } - - @JsonCreator - public static Formula fromString(String str) { - return Formulas.fromLispTree(LispTree.proto.parseFromString(str)); - } - - @Override public abstract boolean equals(Object o); - @Override public int hashCode() { - if (hashCode == -1) - hashCode = computeHashCode(); - return hashCode; - } - - public abstract int computeHashCode(); - - public static Formula nullFormula = new PrimitiveFormula() { - public LispTree toLispTree() { return LispTree.proto.newLeaf("null"); } - @SuppressWarnings({"equalshashcode"}) - @Override public boolean equals(Object o) { return this == o; } - public int computeHashCode() { return 0; } - }; -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FormulaMatchExecutor.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FormulaMatchExecutor.java deleted file mode 100644 index c657b1577a..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FormulaMatchExecutor.java +++ /dev/null @@ -1,18 +0,0 @@ -package edu.stanford.nlp.sempre; - -/** - * Return a string representation of a formula as the value. This enables - * evaluation against exact match of logical forms. This is overly stringent - * right now. - */ -public class FormulaMatchExecutor extends Executor { - public Response execute(Formula formula, ContextValue context) { - formula = Formulas.betaReduction(formula); - Value value; - if (formula instanceof ValueFormula) - value = ((ValueFormula) formula).value; - else - value = new StringValue(formula.toLispTree().toString()); - return new Response(value); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Formulas.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Formulas.java deleted file mode 100644 index a41cfa7a6d..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Formulas.java +++ /dev/null @@ -1,409 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.google.common.base.Function; -import com.google.common.collect.Lists; -import fig.basic.LispTree; - -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -/** - * Utilities for working with Formulas. - * - * @author Percy Liang - */ -public abstract class Formulas { - public static Formula fromLispTree(LispTree tree) { - // Try to interpret as ValueFormula - if (tree.isLeaf()) // Leaves are name values - return new ValueFormula(new NameValue(tree.value, null)); - Value value = Values.fromLispTreeOrNull(tree); // General case - if (value != null) - return new ValueFormula(value); - - String func = tree.child(0).value; - if (func != null) { - if (func.equals("var")) - return new VariableFormula(tree.child(1).value); - if (func.equals("lambda")) - return new LambdaFormula(tree.child(1).value, fromLispTree(tree.child(2))); - if (func.equals("mark")) - return new MarkFormula(tree.child(1).value, fromLispTree(tree.child(2))); - if (func.equals("not")) - return new NotFormula(fromLispTree(tree.child(1))); - if (func.equals("reverse")) - return new ReverseFormula(fromLispTree(tree.child(1))); - if (func.equals("call")) { - Formula callFunc = fromLispTree(tree.child(1)); - List args = Lists.newArrayList(); - for (int i = 2; i < tree.children.size(); i++) - args.add(fromLispTree(tree.child(i))); - return new CallFormula(callFunc, args); - } - } - - { // Merge: (and (fb:type.object.type fb:people.person) (fb:people.person.children fb:en.barack_obama)) - MergeFormula.Mode mode = MergeFormula.parseMode(func); - if (mode != null) - return new MergeFormula(mode, fromLispTree(tree.child(1)), fromLispTree(tree.child(2))); - } - - { // Aggregate: (count (fb:type.object.type fb:people.person)) - AggregateFormula.Mode mode = AggregateFormula.parseMode(func); - if (mode != null) - return new AggregateFormula(mode, fromLispTree(tree.child(1))); - } - - { // Superlative: (argmax 1 1 (fb:type.object.type fb:people.person) (lambda x (!fb:people.person.height_meters (var x)))) - SuperlativeFormula.Mode mode = SuperlativeFormula.parseMode(func); - if (mode != null) { - Formula rank = parseIntToFormula(tree.child(1)); - Formula count = parseIntToFormula(tree.child(2)); - return new SuperlativeFormula( - mode, - rank, - count, - fromLispTree(tree.child(3)), - fromLispTree(tree.child(4))); - } - } - - { // Arithmetic: (- (!fb:people.person.height_meters (var x)) (!fb:people.person.height_meters (var y))) - ArithmeticFormula.Mode mode = ArithmeticFormula.parseMode(func); - if (mode != null) - return new ArithmeticFormula(mode, fromLispTree(tree.child(1)), fromLispTree(tree.child(2))); - } - - { // ActionFormula - ActionFormula.Mode mode = ActionFormula.parseMode(func); - if (mode != null) { - List args = Lists.newArrayList(); - for (int i = 1; i < tree.children.size(); i++) - args.add(fromLispTree(tree.child(i))); - return new ActionFormula(mode, args); - } - } - - // Default is join: (fb:type.object.type fb:people.person) - if (tree.children.size() != 2) - throw new RuntimeException("Invalid number of arguments for join (want 2): " + tree); - return new JoinFormula(fromLispTree(tree.child(0)), fromLispTree(tree.child(1))); - } - - // Special case to enable "argmax 1 1" rather than "argmax (number 1) (number 1)" - private static Formula parseIntToFormula(LispTree tree) { - try { - int i = Integer.parseInt(tree.value); - double d = i; - NumberValue value = new NumberValue(d); - return new ValueFormula(value); - } catch (NumberFormatException e) { - Formula formula = fromLispTree(tree); - if (!(formula instanceof PrimitiveFormula)) - throw new RuntimeException("Rank and count of argmax must be variables or numbers"); - return formula; - } - } - - // Replace occurrences of the variable reference |var| with |formula|. - public static Formula substituteVar(Formula formula, final String var, final Formula replaceFormula) { - return formula.map( - new Function() { - public Formula apply(Formula formula) { - if (formula instanceof VariableFormula) { // Replace variable - String name = ((VariableFormula) formula).name; - return var.equals(name) ? replaceFormula : formula; - } else if (formula instanceof LambdaFormula) { - if (((LambdaFormula) formula).var.equals(var)) // |var| is bound, so don't substitute inside - return formula; - } - return null; - } - }); - } - - // Replace top-level occurrences of |searchFormula| inside |formula| with |replaceFormula|. - public static Formula substituteFormula(Formula formula, final Formula searchFormula, final Formula replaceFormula) { - return formula.map( - new Function() { - public Formula apply(Formula formula) { - if (formula.equals(searchFormula)) return replaceFormula; - return null; - } - }); - } - - // Beta-reduction. - public static Formula lambdaApply(LambdaFormula func, Formula arg) { - return substituteVar(func.body, func.var, arg); - } - - // Apply all the nested LambdaFormula's. - public static Formula betaReduction(Formula formula) { - return formula.map( - new Function() { - public Formula apply(Formula formula) { - if (formula instanceof JoinFormula) { - Formula relation = betaReduction(((JoinFormula) formula).relation); - Formula child = ((JoinFormula) formula).child; - if (relation instanceof LambdaFormula) - return betaReduction(lambdaApply((LambdaFormula) relation, child)); - } - return null; - } - }); - } - - // Return whether |formula| contains a free instance of |var|. - public static boolean containsFreeVar(Formula formula, VariableFormula var) { - if (formula instanceof PrimitiveFormula) - return formula.equals(var); - if (formula instanceof MergeFormula) { - MergeFormula merge = (MergeFormula) formula; - return containsFreeVar(merge.child1, var) || containsFreeVar(merge.child2, var); - } - if (formula instanceof JoinFormula) { - JoinFormula join = (JoinFormula) formula; - return containsFreeVar(join.relation, var) || containsFreeVar(join.child, var); - } - if (formula instanceof LambdaFormula) { - LambdaFormula lambda = (LambdaFormula) formula; - if (lambda.var.equals(var.name)) return false; // Blocked by bound variable - return containsFreeVar(lambda.body, var); - } - if (formula instanceof MarkFormula) { - MarkFormula mark = (MarkFormula) formula; - // Note: marks are transparent, unlike lambdas - return containsFreeVar(mark.body, var); - } - if (formula instanceof ReverseFormula) { - return containsFreeVar(((ReverseFormula) formula).child, var); - } - if (formula instanceof AggregateFormula) { - return containsFreeVar(((AggregateFormula) formula).child, var); - } - if (formula instanceof ArithmeticFormula) { - return containsFreeVar(((ArithmeticFormula) formula).child1, var) || containsFreeVar(((ArithmeticFormula) formula).child2, var); - } - if (formula instanceof SuperlativeFormula) { - SuperlativeFormula superlative = (SuperlativeFormula) formula; - return containsFreeVar(superlative.rank, var) || containsFreeVar(superlative.count, var) || containsFreeVar(superlative.head, var) || containsFreeVar(superlative.relation, var); - } - if (formula instanceof NotFormula) { - NotFormula notForm = (NotFormula) formula; - return containsFreeVar(notForm.child, var); - } - throw new RuntimeException("Unhandled: " + formula); - } - - // TODO(joberant): use Formula.map, and use CanonicalNames.isReverseProperty, etc. - public static Set extractAtomicFreebaseElements(Formula formula) { - Set res = new HashSet<>(); - LispTree formulaTree = formula.toLispTree(); - extractAtomicFreebaseElements(formulaTree, res); - return res; - } - private static void extractAtomicFreebaseElements(LispTree formulaTree, - Set res) { - if (formulaTree.isLeaf()) { // base - if (formulaTree.value.startsWith("fb:")) - res.add(formulaTree.value); - else if (formulaTree.value.startsWith("!fb:")) - res.add(formulaTree.value.substring(1)); - } else { // recursion - for (LispTree child : formulaTree.children) { - extractAtomicFreebaseElements(child, res); - } - } - } - - // TODO(jonathan): move to feature extractor (this function doesn't seem fundamental) - public static boolean isCountFormula(Formula formula) { - if (formula instanceof AggregateFormula) - return ((AggregateFormula) formula).mode == AggregateFormula.Mode.count; - if (formula instanceof JoinFormula) { - Formula relation = ((JoinFormula) formula).relation; - if (relation instanceof LambdaFormula) { - Formula l = ((LambdaFormula) relation).body; - if (l instanceof AggregateFormula) - return ((AggregateFormula) l).mode == AggregateFormula.Mode.count; - } - } - return false; - } - - public static String getString(Formula formula) { - if (formula instanceof ValueFormula) { - Value value = ((ValueFormula) formula).value; - if (value instanceof StringValue) - return ((StringValue) value).value; - if (value instanceof NameValue) - return ((NameValue) value).id; - if (value instanceof NumberValue) - return ((NumberValue) value).value + ""; - } else if (formula instanceof VariableFormula) { - return ((VariableFormula) formula).name; - } - return null; - } - - public static String getNameId(Formula formula) { - if (formula instanceof ValueFormula) { - Value value = ((ValueFormula) formula).value; - if (value instanceof NameValue) - return ((NameValue) value).id; - } - return null; - } - - public static double getDouble(Formula formula) { - if (formula instanceof ValueFormula) { - Value value = ((ValueFormula) formula).value; - if (value instanceof NumberValue) - return ((NumberValue) value).value; - } - return Double.NaN; - } - - public static int getInt(Formula formula) { - return (int) getDouble(formula); - } - - /** - * If the formula represents a binary (e.g., fb:a.b.c or <=), - * return the ID of the binary as a string. - * If the formula represents a reversed binary (e.g., !fb:a.b.c or (reverse fb:a.b.c)), - * return "!" + ID of the binary. - * Otherwise, return null. - */ - public static String getBinaryId(Formula formula) { - if (formula instanceof ReverseFormula) { - String childId = getBinaryId(((ReverseFormula) formula).child); - if (childId == null) return null; - return CanonicalNames.reverseProperty(childId); - } else if (formula instanceof ValueFormula) { - Value v = ((ValueFormula) formula).value; - if (v instanceof NameValue) { - return ((NameValue) v).id; - } - } - return null; - } - - public static ValueFormula newNameFormula(String id) { - return new ValueFormula(new NameValue(id)); - } - - /* - * Extract all subformulas in a string format (to also have primitive values) - * TODO(joberant): replace this with Formulas.map - */ - public static Set extractSubparts(Formula f) { - Set res = new HashSet(); - extractSubpartsRecursive(f, res); - return res; - } - - private static void extractSubpartsRecursive(Formula f, Set res) { - // base - res.add(f.toString()); - // recurse - if (f instanceof AggregateFormula) { - AggregateFormula aggFormula = (AggregateFormula) f; - extractSubpartsRecursive(aggFormula.child, res); - } else if (f instanceof CallFormula) { - CallFormula callFormula = (CallFormula) f; - extractSubpartsRecursive(callFormula.func, res); - for (Formula argFormula : callFormula.args) - extractSubpartsRecursive(argFormula, res); - } else if (f instanceof JoinFormula) { - JoinFormula joinFormula = (JoinFormula) f; - extractSubpartsRecursive(joinFormula.relation, res); - extractSubpartsRecursive(joinFormula.child, res); - } else if (f instanceof LambdaFormula) { - LambdaFormula lambdaFormula = (LambdaFormula) f; - extractSubpartsRecursive(lambdaFormula.body, res); - } else if (f instanceof MarkFormula) { - MarkFormula markFormula = (MarkFormula) f; - extractSubpartsRecursive(markFormula.body, res); - } else if (f instanceof MergeFormula) { - MergeFormula mergeFormula = (MergeFormula) f; - extractSubpartsRecursive(mergeFormula.child1, res); - extractSubpartsRecursive(mergeFormula.child2, res); - } else if (f instanceof NotFormula) { - NotFormula notFormula = (NotFormula) f; - extractSubpartsRecursive(notFormula.child, res); - } else if (f instanceof ReverseFormula) { - ReverseFormula revFormula = (ReverseFormula) f; - extractSubpartsRecursive(revFormula.child, res); - } else if (f instanceof SuperlativeFormula) { - SuperlativeFormula superlativeFormula = (SuperlativeFormula) f; - extractSubpartsRecursive(superlativeFormula.rank, res); - extractSubpartsRecursive(superlativeFormula.count, res); - extractSubpartsRecursive(superlativeFormula.head, res); - extractSubpartsRecursive(superlativeFormula.relation, res); - } - } - - // Takes in a |rawFormula| which represents a function x => y and returns a - // function y => x. - public static Formula reverseFormula(Formula rawFormula) { - if (rawFormula instanceof ValueFormula) { - @SuppressWarnings({ "unchecked" }) - ValueFormula vf = (ValueFormula) rawFormula; - return reverseNameFormula(vf); - } else if (rawFormula instanceof LambdaFormula) { - // Convert (lambda x (relation1 (relation2 (var x)))) <=> (lambda x (!relation2 (!relation1 (var x)))) - // Note: currently only handles chains. Make this more generic. - LambdaFormula formula = (LambdaFormula) rawFormula; - if (isChain(formula.body)) - return new LambdaFormula(formula.var, reverseChain(formula.body, new VariableFormula(formula.var))); - else - return new ReverseFormula(formula); - } else { - return new ReverseFormula(rawFormula); - // throw new RuntimeException("Not handled: " + rawFormula); - } - } - - // Helper function for reverseFormula(). - // Check to see if formula has the form (a (b (c (var x)))) - private static boolean isChain(Formula source) { - if (source instanceof JoinFormula) { - JoinFormula join = (JoinFormula) source; - return isChain(join.child); - } - return source instanceof VariableFormula; - } - // Reverse the chain - private static Formula reverseChain(Formula source, Formula result) { - if (source instanceof JoinFormula) { - JoinFormula join = (JoinFormula) source; - return reverseChain(join.child, new JoinFormula(reverseFormula(join.relation), result)); - } else if (source instanceof VariableFormula) { - return result; - } else { - throw new RuntimeException("Not handled: " + source); - } - } - - // !fb:people.person.place_of_birth <=> fb:people.person.place_of_birth - private static ValueFormula reverseNameFormula(ValueFormula formula) { - String id = formula.value.id; - return new ValueFormula<>(new NameValue(CanonicalNames.reverseProperty(id))); - } - - // Try to simplify reverse subformulas within the specified formula - public static Formula simplifyReverses(Formula formula) { - return formula.map(new Function() { - public Formula apply(Formula formula) { - if (formula instanceof ReverseFormula) - return reverseFormula(((ReverseFormula) formula).child); - return null; - } - }); - } - - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FuncSemType.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FuncSemType.java deleted file mode 100644 index cebfa40f44..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FuncSemType.java +++ /dev/null @@ -1,47 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.LispTree; - -/** - * FuncSemType really is used to represent a pair type (t1, t2) (despite its name). - * The lisp tree representation is (-> retType argType). - */ -public class FuncSemType extends SemType { - public final SemType argType; - public final SemType retType; - public FuncSemType(SemType argType, SemType retType) { - if (argType == null) throw new RuntimeException("Null argType"); - if (retType == null) throw new RuntimeException("Null retType"); - this.argType = argType; - this.retType = retType; - } - public FuncSemType(String argType, String retType) { - this(new AtomicSemType(argType), new AtomicSemType(retType)); - } - public boolean isValid() { return true; } - - public SemType meet(SemType that) { - if (that instanceof TopSemType) return this; - if (!(that instanceof FuncSemType)) return SemType.bottomType; - // Perform the meet elementwise (remember, treat this as a pair type). - FuncSemType thatFunc = (FuncSemType) that; - SemType newArgType = argType.meet(thatFunc.argType); - if (!newArgType.isValid()) return SemType.bottomType; - SemType newRetType = retType.meet(thatFunc.retType); - if (!newRetType.isValid()) return SemType.bottomType; - return new FuncSemType(newArgType, newRetType); - } - - public SemType apply(SemType that) { - if (argType.meet(that).isValid()) return retType; - return SemType.bottomType; - } - public FuncSemType reverse() { return new FuncSemType(retType, argType); } - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("->"); - tree.addChild(argType.toLispTree()); - tree.addChild(retType.toLispTree()); - return tree; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FuzzyMatchFn.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FuzzyMatchFn.java deleted file mode 100644 index 9f427c5f44..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FuzzyMatchFn.java +++ /dev/null @@ -1,125 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.util.*; - -import fig.basic.*; - -/** - * Similar to LexiconFn, but list all approximate matches from a FuzzyMatchable instance. - * - * @author ppasupat - */ -public class FuzzyMatchFn extends SemanticFn { - public static class Options { - @Option public int verbose = 0; - } - public static Options opts = new Options(); - - public enum FuzzyMatchFnMode { UNARY, BINARY, ENTITY, - ORDER_BEFORE, ORDER_AFTER, ORDER_NEXT, ORDER_PREV, ORDER_ADJACENT }; - private FuzzyMatchFnMode mode; - - // Generate all possible denotations regardless of the phrase - private boolean matchAny = false; - - public void init(LispTree tree) { - super.init(tree); - for (int i = 1; i < tree.children.size(); i++) { - String value = tree.child(i).value; - if ("unary".equals(value)) this.mode = FuzzyMatchFnMode.UNARY; - else if ("binary".equals(value)) this.mode = FuzzyMatchFnMode.BINARY; - else if ("entity".equals(value)) this.mode = FuzzyMatchFnMode.ENTITY; - else if ("any".equals(value)) this.matchAny = true; - else if ("before".equals(value)) this.mode = FuzzyMatchFnMode.ORDER_BEFORE; - else if ("after".equals(value)) this.mode = FuzzyMatchFnMode.ORDER_AFTER; - else if ("next".equals(value)) this.mode = FuzzyMatchFnMode.ORDER_NEXT; - else if ("prev".equals(value)) this.mode = FuzzyMatchFnMode.ORDER_PREV; - else if ("adjacent".equals(value)) this.mode = FuzzyMatchFnMode.ORDER_ADJACENT; - else throw new RuntimeException("Invalid argument: " + value); - } - } - - public FuzzyMatchFnMode getMode() { return mode; } - public boolean getMatchAny() { return matchAny; } - - @Override - public DerivationStream call(Example ex, Callable c) { - return new LazyFuzzyMatchFnDerivs(ex, c, mode, matchAny); - } - - // ============================================================ - // Derivation Stream - // ============================================================ - - public static class LazyFuzzyMatchFnDerivs extends MultipleDerivationStream { - final Example ex; - final FuzzyMatchable matchable; - final Callable c; - final String query; - final List sentence; - final FuzzyMatchFnMode mode; - final boolean matchAny; - - int index = 0; - List formulas; - - public LazyFuzzyMatchFnDerivs(Example ex, Callable c, FuzzyMatchFnMode mode, boolean matchAny) { - this.ex = ex; - if (ex.context != null && ex.context.graph != null && ex.context.graph instanceof FuzzyMatchable) - this.matchable = (FuzzyMatchable) ex.context.graph; - else - this.matchable = null; - this.c = c; - this.query = (matchAny || c.getChildren().isEmpty()) ? null : c.childStringValue(0); - if (c.getRule().rhs.size() == 1 && Rule.phraseCat.equals(c.getRule().rhs.get(0))) { - sentence = ex.getTokens(); - } else if (c.getRule().rhs.size() == 1 && Rule.lemmaPhraseCat.equals(c.getRule().rhs.get(0))) { - sentence = ex.getLemmaTokens(); - } else { - sentence = null; - } - this.mode = mode; - this.matchAny = matchAny; - if (opts.verbose >= 2) - LogInfo.logs("FuzzyMatchFn[%s]%s.call: %s", - this.mode, (this.matchAny ? "[matchAny]" : ""), this.query); - } - - @Override - public Derivation createDerivation() { - if (matchable == null) return null; - if (query == null && !matchAny) return null; - - // Compute the formulas if not computed yet - if (formulas == null) { - if (matchAny) - formulas = new ArrayList<>(matchable.getAllFormulas(mode)); - else if (sentence != null) - formulas = new ArrayList<>(matchable.getFuzzyMatchedFormulas(sentence, c.getStart(), c.getEnd(), mode)); - else - formulas = new ArrayList<>(matchable.getFuzzyMatchedFormulas(query, mode)); - } - - // Use the next formula to create a derivation - if (index >= formulas.size()) return null; - Formula formula = formulas.get(index++); - SemType type = TypeInference.inferType(formula); - - FeatureVector features = new FeatureVector(); - if (FeatureExtractor.containsDomain("fuzzyMatch")) { - features.add("fuzzyMatch", "mode=" + mode); - if (matchAny) - features.add("fuzzyMatch", "mode=" + mode + "_any"); - } - - return new Derivation.Builder() - .withCallable(c) - .formula(formula) - .type(type) - .localFeatureVector(features) - .createDerivation(); - } - - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FuzzyMatchable.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FuzzyMatchable.java deleted file mode 100644 index 2a4e4aaaef..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/FuzzyMatchable.java +++ /dev/null @@ -1,39 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.util.*; - -import edu.stanford.nlp.sempre.FuzzyMatchFn.FuzzyMatchFnMode; - -/** - * Interface for knowledge sources that, given a phrase, can retrieve all its - * predicates that (fuzzily) match the phrase. - * - * @author ppasupat - */ -public interface FuzzyMatchable { - - /** - * Return all entities / unaries / binaries that approximately match the - * string formed by joining sentence[startIndex], ..., sentence[endIndex-1] - * with spaces. - * - * This allows the algorithm to consider the context of the term being matched. - * - * One possible implementation, which ignores the context, is calling - * getFuzzyMatchedFormulas(term, mode) where - * term = String.join(" ", sentence.subList(startIndex, endIndex)) - */ - public abstract Collection getFuzzyMatchedFormulas( - List sentence, int startIndex, int endIndex, FuzzyMatchFnMode mode); - - /** - * Return all entities / unaries / binaries that approximately match the term - */ - public abstract Collection getFuzzyMatchedFormulas(String term, FuzzyMatchFnMode mode); - - /** - * Return all possible entities / unaries / binaries - */ - public abstract Collection getAllFormulas(FuzzyMatchFnMode mode); - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Grammar.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Grammar.java deleted file mode 100644 index 72cf0c8bfe..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Grammar.java +++ /dev/null @@ -1,563 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.google.common.collect.Iterables; -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; - -import fig.basic.*; -import fig.exec.Execution; - -import java.io.File; -import java.io.PrintWriter; -import java.util.*; - -/** - * The grammar is a set of rules of the form: - * (rule lhsCat (rhs ... rhs) semanticFn (key value) ... (key value)) - * - * Some programming-language-esque features are supported to make life easier. - * All grammar-file variables should start with '@' (both loop variables and global definitions). - * - * Includes (reads in commands from the given file): - * (include base.grammar) - * Control flow: - * (when (and tag1 (not tag2) (or tag3 tag4)) ...) - * Macro definitions: - * (def @type fb:type.object.type) - * Iteration: - * (for @x (a an the) (rule $DT (@x) (IdentityFn))) - * - * @author Percy Liang - */ -public class Grammar { - public static class Options { - @Option public List inPaths = new ArrayList<>(); - @Option(gloss = "Variables which are used to interpret the grammar file") - public List tags = new ArrayList<>(); - @Option public boolean binarizeRules = true; - @Option(gloss = "Specifiy which ApplyFn to use: defaults to JoinFn when null") - public String useApplyFn = null; - } - - public static Options opts = new Options(); - - // All the rules in the grammar. Each parser can read these and transform - // them however the parser wishes. - // This contains binarized rules - protected ArrayList rules = new ArrayList<>(); - public List getRules() { return rules; } - - Map macros = new HashMap<>(); // Map from macro name to its replacement value - Set validTags = new TreeSet<>(); // All tags in the grammar file - - // Verbatim copy of all the lines read, so we can preserve the grammar file. - List statements = new ArrayList<>(); - - public void read() { - LogInfo.begin_track("Grammar.read"); - read(opts.inPaths); - LogInfo.logs("%s rules", rules.size()); - LogInfo.end_track(); - } - - public void read(String path) { read(Collections.singletonList(path)); } - public void read(List paths) { - for (String path : paths) - readOnePath(path, Sets.newHashSet(opts.tags)); - verifyValid(); - } - - private void verifyValid() { - // Make sure that all the categories which are used are actually defined. - Set defined = new HashSet<>(); - defined.add(Rule.tokenCat); - defined.add(Rule.phraseCat); - defined.add(Rule.lemmaTokenCat); - defined.add(Rule.lemmaPhraseCat); - for (Rule rule : rules) - defined.add(rule.lhs); - - // Make sure every non-terminal is defined - for (Rule rule : rules) { - for (String item : rule.rhs) { - if (Rule.isCat(item) && !defined.contains(item)) { - LogInfo.warnings("Category not defined in the grammar: %s; used in rule: %s", item, rule); - } - } - } - - // Check if all tags are defined in a grammar file - LogInfo.logs("Valid tags: %s", validTags); - LogInfo.logs("Used tags: %s", new TreeSet<>(opts.tags)); - for (String tag : opts.tags) { - if (!validTags.contains(tag)) { - LogInfo.warnings("Tag %s not defined in grammar", tag); - } - } - } - - /** - * @param contextPath Path relative to which grammar includes and - * such are expanded. - */ - public void addStatement(String stmt, String contextPath, Set tags) { - statements.add(stmt); - interpret(contextPath, LispTree.proto.parseFromString(stmt), Sets.newHashSet(Iterables.concat(tags, opts.tags))); - } - - public void addStatement(String stmt, String contextPath) { - Set s = Collections.emptySet(); - addStatement(stmt, null, s); - } - - public void addStatement(String stmt) { - addStatement(stmt, null); - } - - private static boolean isValidVar(String var) { - return var.startsWith("@"); - } - - private static void checkIsValidVar(String var) { - if (!isValidVar("@")) - throw new RuntimeException("Invalid variable: '" + var + "' doesn't start with '@'"); - } - - // Replace all leaves of LispTree with value in macros if exists - private static LispTree applyMacros(Map macros, LispTree tree) { - if (tree.isLeaf()) { - LispTree replacement = macros.get(tree.value); - if (replacement != null) return replacement; - if (isValidVar(tree.value)) - throw new RuntimeException("Undefined macro: " + tree.value); - else - return tree; - } - LispTree newTree = LispTree.proto.newList(); - for (LispTree child : tree.children) - newTree.addChild(applyMacros(macros, child)); - return newTree; - } - - // Apply the macro substitutions to |tree|. - public LispTree applyMacros(LispTree tree) { - return applyMacros(this.macros, tree); - } - - private void readOnePath(String path, Set tags) { - // Save raw lines - if (statements.size() > 0) statements.add(""); - statements.add("####### " + path); - for (String line : IOUtils.readLinesHard(path)) - statements.add(line); - - Iterator trees = LispTree.proto.parseFromFile(path); - while (trees.hasNext()) { - LispTree tree = trees.next(); - interpret(path, tree, tags); - collectValidTags(tree); - } - } - - public void write() { - String path = Execution.getFile("grammar"); - if (path == null) return; - PrintWriter out = IOUtils.openOutHard(path); - for (String statement : statements) - out.println(statement); - out.close(); - - out = IOUtils.openOutHard(Execution.getFile("processed-grammar")); - for (Rule rule : rules) - out.println(rule.toLispTree().toString()); - out.close(); - } - - private void interpret(String path, LispTree tree, Set tags) { - if (tree.isLeaf()) - throw new RuntimeException("Expected list, got " + tree); - - try { - String command = tree.child(0).value; - if ("rule".equals(command)) { - interpretRule(tree); - } else if ("include".equals(command)) { - if (path == null) { - throw new RuntimeException( - "Grammar include statement given without context path"); - } - for (int i = 1; i < tree.children.size(); i++) - readOnePath(new File(path).getParent() + "/" + tree.child(i).value, tags); - } else if ("when".equals(command)) { - if (interpretBoolean(tree.child(1), tags)) { - for (int i = 2; i < tree.children.size(); i++) - interpret(path, tree.child(i), tags); - } - } else if ("def".equals(command)) { - interpretMacroDef(tree); - } else if ("for".equals(command)) { - interpretFor(path, tree, tags); - } else { - throw new RuntimeException("Invalid command: " + command); - } - } catch (Exception e) { - e.printStackTrace(); - throw new RuntimeException("Error on " + tree + ": " + e); - } - } - - private boolean interpretBoolean(LispTree tree, Set tags) { - if (tree.isLeaf()) - return tags.contains(tree.value); - if ("not".equals(tree.child(0).value)) { - if (tree.children.size() != 2) - throw new RuntimeException("Too many arguments for not: " + tree); - return !interpretBoolean(tree.child(1), tags); - } - if ("and".equals(tree.child(0).value)) { - for (int i = 1; i < tree.children.size(); i++) - if (!interpretBoolean(tree.child(i), tags)) return false; - return true; - } - if ("or".equals(tree.child(0).value)) { - for (int i = 1; i < tree.children.size(); i++) - if (interpretBoolean(tree.child(i), tags)) return true; - return false; - } - throw new RuntimeException("Expected a single tag, but got: " + tree); - } - - - public void interpretMacroDef(LispTree tree) { - if (tree.children.size() != 3 || !tree.child(1).isLeaf()) { - throw new RuntimeException("Invalid usage (def |name| |value|): " + tree); - } - String var = tree.child(1).value; - checkIsValidVar(var); - macros.put(var, applyMacros(tree.child(2))); - } - - public void interpretFor(String path, LispTree tree, Set tags) { - if (tree.children.size() <= 3 || !tree.child(1).isLeaf()) { - throw new RuntimeException("Invalid usage (for |var| (|value| ... |value|) |statement| ...) " + tree); - } - String var = tree.child(1).value; - checkIsValidVar(var); - List values = applyMacros(tree.child(2)).children; - LispTree old = macros.get(var); - for (LispTree value : values) { - macros.put(var, value); - for (int j = 3; j < tree.children.size(); j++) - interpret(path, tree.child(j), tags); - } - if (old == null) macros.remove(var); - else macros.put(var, old); - } - - private static String checkCatName(String cat) { - if (isIntermediate(cat)) - LogInfo.warnings("Category '%s' starts with '$Intermediate'; please avoid this unless you know what you are doing."); - return cat; - } - - protected void interpretRule(LispTree tree) { - if (tree.children.size() < 4) - throw new RuntimeException("Invalid rule: " + tree); - - // (rule lhs rhs semantics (key value) ... (key value)) - tree = applyMacros(tree); - - // Parse LHS - if (!tree.child(1).isLeaf()) - throw new RuntimeException("Invalid LHS: " + tree.child(1)); - String lhs = checkCatName(tree.child(1).value); - - // Parse RHS - List rhs = Lists.newArrayList(); - List isOptionals = new ArrayList<>(); - LispTree rhsTree = tree.child(2); - if (rhsTree.isLeaf()) - throw new RuntimeException("RHS needs to be list, but got: " + rhsTree); - for (int i = 0; i < rhsTree.children.size(); i++) { - LispTree child = rhsTree.child(i); - boolean isOptional = false; - if (child.isLeaf()) { // $PHRASE - rhs.add(checkCatName(child.value)); - } else { - // e.g., ($PHRASE optional) - // First item is the token/category; the rest of the items - // specify parameters (currently, only parameter is optional). - rhs.add(checkCatName(child.child(0).value)); - for (int j = 1; j < child.children.size(); j++) - if (child.child(j).value.equals("optional")) - isOptional = true; - } - isOptionals.add(isOptional); - } - - // Parse semantic function - SemanticFn sem = parseSemanticFn(tree.child(3)); - - Rule rule = new Rule(lhs, rhs, sem); - - // Parse extra info - for (int i = 4; i < tree.children.size(); i++) { - LispTree item = tree.child(i); - if (!item.isLeaf() && item.children.size() != 2) - throw new RuntimeException("Invalid key-value pair: " + item); - try { - rule.addInfo(item.child(0).value, Double.parseDouble(item.child(1).value)); - } catch (NumberFormatException e) { - throw new RuntimeException("Invalid key-value pair: " + item); - } - } - - addRule(rule, isOptionals); - } - - public synchronized void addRule(Rule rule) { - List isOptionals = new ArrayList<>(); - for (int i = 0; i < rule.rhs.size(); i++) isOptionals.add(false); - addRule(rule, isOptionals); - } - - // Add a rule to the grammar. - public synchronized void addRule(Rule rule, List isOptionals) { - rules.addAll(binarizeRule(rule, isOptionals)); - } - - private void collectValidTags(LispTree tree) { - if (tree.isLeaf()) - throw new RuntimeException("Expected list, got " + tree); - try { - String command = tree.child(0).value; - if ("when".equals(command)) { - collectValidTagsBoolean(tree.child(1)); - for (int i = 2; i < tree.children.size(); i++) - collectValidTags(tree.child(i)); - } else if ("for".equals(command)) { - for (int i = 3; i < tree.children.size(); i++) - collectValidTags(tree.child(i)); - } - } catch (Exception e) { - e.printStackTrace(); - throw new RuntimeException("Error on " + tree + ": " + e); - } - } - - private void collectValidTagsBoolean(LispTree tree) { - if (tree.isLeaf()) { - validTags.add(tree.value); - return; - } - String command = tree.child(0).value; - if ("not".equals(command)) - collectValidTagsBoolean(tree.child(1)); - else if ("and".equals(command) || "or".equals(command)) - for (int i = 1; i < tree.children.size(); i++) - collectValidTagsBoolean(tree.child(i)); - } - - // Generate intermediate categories for binarization. - public static final String INTERMEDIATE_PREFIX = "$Intermediate"; - protected int freshCatIndex = 0; - private String generateFreshCat() { - freshCatIndex++; - return INTERMEDIATE_PREFIX + freshCatIndex; - } - public static boolean isIntermediate(String cat) { - return cat.startsWith(INTERMEDIATE_PREFIX); - } - public int getFreshCatIndex() { - return freshCatIndex; - } - - // Create multiple versions of this rule if there are optional RHS. - // Restriction: must be able to split the RHS into two halves, each of - // which contains at most one non-optional category. - // Recall that only the non-optional categories on the RHS are arguments into the SemanticFn. - // Example: stop? $A stop $Stop? $B stop $Stop? - private List binarizeRule(Rule rule, List isOptionals) { - List newRules = new ArrayList<>(); - - // Don't binarize: do same as before - if (!opts.binarizeRules) { - if (isOptionals.contains(true)) - throw new RuntimeException("Can't have optionals if don't binarize: " + rule + " " + isOptionals); - newRules.add(rule); - return newRules; - } - - // Special case: JoinFn with an arg0Fn but with multiple non-optional categories. - // In this case, we need to use arg0Fn on just the first such category, and - // then use function application on the rest. - // Old: (rule $A (a ($Z optional) $B $C $D) (JoinFn (arg0 (lambda b (lambda c (lambda d ...)))))) - // New rules to binarize ($B is the first non-optional category): - // 1. (rule $I1 (a ($Z optional) $B) (JoinFn (arg0 (lambda b (lambda c (lambda d ...)))))) - // 2. (rule $A ($I1 $C $D) (JoinFn forward betaReduce)) - if (rule.sem instanceof JoinFn && ((JoinFn) rule.sem).getArg0Fn() != null) { - // Find the first non-optional category - int i = 0; - while (i < rule.rhs.size() && !(Rule.isCat(rule.rhs.get(i)) && !isOptionals.get(i))) - i++; - // Find the next non-optional category - int j = i + 1; - while (j < rule.rhs.size() && !(Rule.isCat(rule.rhs.get(j)) && !isOptionals.get(j))) - j++; - // If one exists, then we have to invoke special binarization - if (j < rule.rhs.size()) { - // Create an intermediate category - String intCat = generateFreshCat(); - - // Add rule 1 - List rhs1 = new ArrayList<>(rule.rhs.subList(0, i + 1)); - newRules.addAll(binarizeRule(new Rule(intCat, rhs1, rule.sem).setInfo(rule), isOptionals.subList(0, i + 1))); - - // Add rule 2 - List rhs2 = new ArrayList<>(); - rhs2.add(intCat); - rhs2.addAll(rule.rhs.subList(i + 1, rule.rhs.size())); - SemanticFn forwardBetaReduce = new JoinFn(); - forwardBetaReduce.init(LispTree.proto.parseFromString("(JoinFn forward betaReduce)")); - newRules.addAll(binarizeRule(new Rule(rule.lhs, rhs2, forwardBetaReduce).setInfo(rule), isOptionals.subList(i, isOptionals.size()))); - - return newRules; - } - } - - if (!isOptionals.contains(false)) - throw new RuntimeException("Can't have all RHS items be optional: " + rule + " " + isOptionals); - - // Zero-aries (for floating rules) and Unaries: don't need to binarize - if (rule.rhs.size() <= 1) { - newRules.add(rule); - return newRules; - } - - // Stores the current RHS that we're building up. - List newRhs = new ArrayList<>(); - List newIsOptional = new ArrayList<>(); - List isRequiredCat = new ArrayList<>(); // These are the arguments to the semantic function - - // Left-binarize. - assert rule.rhs.size() >= 2; - boolean appliedRuleSem = false; - for (int i = 0; i < rule.rhs.size(); i++) { - newRhs.add(rule.rhs.get(i)); - newIsOptional.add(isOptionals.get(i)); - isRequiredCat.add(!isOptionals.get(i) && Rule.isCat(rule.rhs.get(i))); - - // Aim is to create rules with two RHS required categories - // (binarized rules + tokens which don't cost anything). - // Sometimes semantic functions take more than one argument. - // Note: we assume they just left-binarize. - if (newRhs.size() < 2) // This should only happen in the beginning - continue; - - if (isRequiredCat.get(0) && isRequiredCat.get(1)) - appliedRuleSem = true; - - boolean atEnd = (i == rule.rhs.size() - 1); - // TODO(pliang): currently, too many intermediate categories are created. Remove - // to make flatter grammars (will generate fewer derivations). - String lhs = atEnd && appliedRuleSem ? rule.lhs : generateFreshCat(); - - // Create rule with newRhs possibly excluding the optionals (there should be at most 2) - assert (newRhs.size() == 2); - boolean allCanBeOptional = false; - for (int b0 = 0; b0 < 2; b0++) { // Whether to include newRhs.get(0) - if (b0 == 0 && !newIsOptional.get(0)) continue; - for (int b1 = 0; b1 < 2; b1++) { // Whether to include newRhs.get(1) - if (b1 == 0 && !newIsOptional.get(1)) continue; - - List rhs = Lists.newArrayList(); - SemanticFn sem; - - if (b0 == 1) rhs.add(newRhs.get(0)); - if (b1 == 1) rhs.add(newRhs.get(1)); - - if (isRequiredCat.get(0) && isRequiredCat.get(1)) - sem = rule.sem; - else if (b0 == 1 && Rule.isCat(rhs.get(0)) && isRequiredCat.get(1)) - sem = new SelectFn(1); - else if (isRequiredCat.get(0) || isRequiredCat.get(1)) - sem = new SelectFn(0); - else - sem = new ConstantFn(Formula.nullFormula); - - // We can't allow empty RHS, but if we need it, just mark it as all - // can be optional. - if (rhs.size() > 0) - newRules.add(new Rule(lhs, rhs, sem).setInfo(rule)); - else - allCanBeOptional = true; - } - } - boolean req = isRequiredCat.get(0) || isRequiredCat.get(1); - - // Replace with new category. - newRhs.clear(); newRhs.add(lhs); - newIsOptional.clear(); newIsOptional.add(allCanBeOptional); - isRequiredCat.clear(); isRequiredCat.add(req); - } - - assert newRhs.size() == 1; - assert !newIsOptional.get(0); - - // Final unary rule if needed - if (!appliedRuleSem) - newRules.add(new Rule(rule.lhs, Lists.newArrayList(newRhs), rule.sem).setInfo(rule)); - - // LogInfo.begin_track("binarize %s", rule); - // for (Rule r : newRules) LogInfo.logs("%s", r); - // LogInfo.end_track(); - - return newRules; - } - - // Examples of |tree| - // (ConstantFn null) - // (NumberFn) - private SemanticFn parseSemanticFn(LispTree tree) { - // Syntactic sugar: foo => (ConstantFn foo) - if (tree.isLeaf()) { - LispTree newTree = LispTree.proto.newList(); - newTree.addChild("ConstantFn"); - newTree.addChild(tree.value); - tree = newTree; - } - - String name = tree.child(0).value; - - // Syntactic sugar: (lambda x (f (var x))) => (useApplyFn (lambda x (f (var x)))) - // defaults to (lambda x (var x)) => (JoinFn betaReduce forward (arg0 (lambda x (var x)))) - if (name.equals("lambda")) { - LispTree newTree = LispTree.proto.newList(); - - if (Grammar.opts.useApplyFn == null) { - newTree.addChild("JoinFn"); - newTree.addChild("betaReduce"); - newTree.addChild("forward"); - newTree.addChild(LispTree.proto.newList("arg0", tree)); - } else { - newTree.addChild(Grammar.opts.useApplyFn); - newTree.addChild(tree); - } - tree = newTree; - name = tree.child(0).value; - } - - // For backward compatibility: SemanticFn which have moved. - if (name.equals("LexiconFn") || name.equals("BridgeFn")) - name = "freebase." + name; - - SemanticFn fn; - fn = (SemanticFn) Utils.newInstanceHard(SempreUtils.resolveClassName(name)); - if (fn == null) - throw new RuntimeException("Invalid SemanticFn name: " + name); - - fn.init(tree); - return fn; - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/HasScore.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/HasScore.java deleted file mode 100644 index 8fd1b49bae..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/HasScore.java +++ /dev/null @@ -1,8 +0,0 @@ -package edu.stanford.nlp.sempre; - -/** - * Things that have a score that is a dot product of weights and features - */ -public interface HasScore { - double getScore(); -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/IdentityFn.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/IdentityFn.java deleted file mode 100644 index 487efc724b..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/IdentityFn.java +++ /dev/null @@ -1,20 +0,0 @@ -package edu.stanford.nlp.sempre; - -/** - * Identity function. - * - * @author Percy Liang - */ -public class IdentityFn extends SemanticFn { - public DerivationStream call(Example ex, final Callable c) { - return new SingleDerivationStream() { - @Override - public Derivation createDerivation() { - return new Derivation.Builder() - .withCallable(c) - .withFormulaFrom(c.child(0)) - .createDerivation(); - } - }; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/JavaExecutor.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/JavaExecutor.java deleted file mode 100644 index 2afd178b91..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/JavaExecutor.java +++ /dev/null @@ -1,391 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.google.common.base.Strings; -import com.google.common.collect.Lists; -import com.google.common.collect.Maps; - -import fig.basic.MapUtils; -import fig.basic.Option; - -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; -import java.lang.reflect.Modifier; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; - -/** - * JavaExecutor takes a Formula which is composed recursively of CallFormulas, - * does reflection, and returns a Value. - * - * @author Percy Liang - */ -public class JavaExecutor extends Executor { - public static class Options { - @Option(gloss = "Whether to convert NumberValue to int/double") public boolean convertNumberValues = true; - @Option(gloss = "Print stack trace on exception") public boolean printStackTrace = false; - // the actual function will be called with the current ContextValue as its last argument if marked by contextPrefix - @Option(gloss = "Formula in the grammar whose name startsWith contextPrefix is context sensitive") - public String contextPrefix = "context:"; - @Option(gloss = "Reduce verbosity by automatically appending, for example, edu.stanford.nlp.sempre to java calls") - public String classPathPrefix = ""; // e.g. "edu.stanford.nlp.sempre"; - @Option(gloss = "Whether to convert name values to string literal") - public boolean convertNameValues = false; - } - public static Options opts = new Options(); - - private static JavaExecutor defaultExecutor = new JavaExecutor(); - - // To simplify logical forms, define some shortcuts. - private Map shortcuts = Maps.newHashMap(); - - public JavaExecutor() { - String className = BasicFunctions.class.getName(); - - shortcuts.put("+", className + ".plus"); - shortcuts.put("-", className + ".minus"); - shortcuts.put("*", className + ".times"); - shortcuts.put("/", className + ".divide"); - shortcuts.put("%", className + ".mod"); - shortcuts.put("!", className + ".not"); - - shortcuts.put("<", className + ".lessThan"); - shortcuts.put("<=", className + ".lessThanEq"); - shortcuts.put("==", className + ".equals"); - shortcuts.put(">", className + ".greaterThan"); - shortcuts.put(">=", className + ".greaterThanEq"); - - shortcuts.put("if", className + ".ifThenElse"); - shortcuts.put("map", className + ".map"); - shortcuts.put("reduce", className + ".reduce"); - shortcuts.put("select", className + ".select"); - shortcuts.put("range", className + ".range"); - } - - public static class BasicFunctions { - public static double plus(double x, double y) { return x + y; } - public static int plus(int x, int y) { return x + y; } - public static int minus(int x, int y) { return x - y; } - public static double minus(double x, double y) { return x - y; } - public static int times(int x, int y) { return x * y; } - public static double times(double x, double y) { return x * y; } - public static int divide(int x, int y) { return x / y; } - public static double divide(double x, double y) { return x / y; } - public static int mod(int x, int y) { return x % y; } - public static boolean not(boolean x) { return !x; } - - public static boolean lessThan(double x, double y) { return x < y; } - public static boolean lessThanEq(double x, double y) { return x <= y; } - public static boolean equals(double x, double y) { return x == y; } - public static boolean greaterThan(double x, double y) { return x > y; } - public static boolean greaterThanEq(double x, double y) { return x >= y; } - - public static Object ifThenElse(boolean b, Object x, Object y) { return b ? x : y; } - - // For very simple string concatenation - public static String plus(String a, String b) { return a + b; } - public static String plus(String a, String b, String c) { - return a + b + c; - } - public static String plus(String a, String b, String c, String d) { - return a + b + c + d; - } - public static String plus(String a, String b, String c, String d, String e) { - return a + b + c + d + e; - } - public static String plus(String a, String b, String c, String d, String e, String f) { - return a + b + c + d + e + f; - } - public static String plus(String a, String b, String c, String d, String e, String f, String g) { - return a + b + c + d + e + f + g; - } - public static String plus(String a, String b, String c, String d, String e, String f, String g, String h) { - return a + b + c + d + e + f + g + h; - } - public static String plus(String a, String b, String c, String d, String e, String f, String g, String h, String i) { - return a + b + c + d + e + f + g + h + i; - } - public static String plus(String a, String b, String c, String d, String e, String f, String g, String h, String i, String j) { - return a + b + c + d + e + f + g + h + i + j; - } - public static String plus(String a, String b, String c, String d, String e, String f, String g, String h, String i, String j, String k) { - return a + b + c + d + e + f + g + h + i + j + k; - } - public static String plus(String a, String b, String c, String d, String e, String f, String g, String h, String i, String j, String k, String l) { - return a + b + c + d + e + f + g + h + i + j + k + l; - } - public static String plus(String a, String b, String c, String d, String e, String f, String g, String h, String i, String j, String k, String l, String m) { - return a + b + c + d + e + f + g + h + i + j + k + l + m; - } - public static String plus(String a, String b, String c, String d, String e, String f, String g, String h, String i, String j, String k, String l, String m, String n) { - return a + b + c + d + e + f + g + h + i + j + k + l + m + n; - } - public static String plus(String a, String b, String c, String d, String e, String f, String g, String h, String i, String j, String k, String l, String m, String n, String o) { - return a + b + c + d + e + f + g + h + i + j + k + l + m + n + o; - } - private static String toString(Object x) { - if (x instanceof String) - return (String) x; - else if (x instanceof Value) - return (x instanceof NameValue) ? ((NameValue) x).id : ((StringValue) x).value; - else - return null; - } - - // Apply func to each element of |list| and return the resulting list. - public static List map(List list, LambdaFormula func) { - List newList = new ArrayList(); - for (Object elem : list) { - Object newElem = apply(func, elem); - newList.add(newElem); - } - return newList; - } - - // list = [3, 5, 2], func = (lambda x (lambda y (call + (var x) (var y)))) - // Returns (3 + 5) + 2 = 10 - public static Object reduce(List list, LambdaFormula func) { - if (list.size() == 0) return null; - Object x = list.get(0); - for (int i = 1; i < list.size(); i++) - x = apply(func, x, list.get(i)); - return x; - } - - // Return elements x of |list| such that func(x) is true. - public static List select(List list, LambdaFormula func) { - List newList = new ArrayList(); - for (Object elem : list) { - Object test = apply(func, elem); - if ((Boolean) test) - newList.add(elem); - } - return newList; - } - - private static Object apply(LambdaFormula func, Object x) { - // Apply the function func to x. In order to do that, need to convert x into a value. - Formula formula = Formulas.lambdaApply(func, new ValueFormula(toValue(x))); - return defaultExecutor.processFormula(formula, null); - } - private static Object apply(LambdaFormula func, Object x, Object y) { - // Apply the function func to x and y. In order to do that, need to convert x into a value. - Formula formula = Formulas.lambdaApply(func, new ValueFormula(toValue(x))); - formula = Formulas.lambdaApply((LambdaFormula) formula, new ValueFormula(toValue(y))); - return defaultExecutor.processFormula(formula, null); - } - - public static List range(int start, int end) { - List result = new ArrayList(); - for (int i = start; i < end; i++) - result.add(i); - return result; - } - } - - public Response execute(Formula formula, ContextValue context) { - // We can do beta reduction here since macro substitution preserves the - // denotation (unlike for lambda DCS). - formula = Formulas.betaReduction(formula); - try { - return new Response(toValue(processFormula(formula, context))); - } catch (Exception e) { - // Comment this out if we expect lots of innocuous type checking failures - if (opts.printStackTrace) e.printStackTrace(); - return new Response(ErrorValue.badJava(e.toString())); - } - } - - private Object processFormula(Formula formula, ContextValue context) { - if (formula instanceof ValueFormula) // Unpack value and convert to object (e.g., for ints) - return toObject(((ValueFormula) formula).value); - - if (formula instanceof CallFormula) { // Invoke the function. - // Recurse - CallFormula call = (CallFormula) formula; - Object func = processFormula(call.func, context); - List args = Lists.newArrayList(); - for (Formula arg : call.args) { - args.add(processFormula(arg, context)); - } - - if (!(func instanceof NameValue)) - //throw new RuntimeException("Invalid func: " + call.func + " => " + func); - func = new NameValue((String) func); - - String id = ((NameValue) func).id; - if (id.indexOf(opts.contextPrefix) != -1) { - args.add(context); - id = id.replace(opts.contextPrefix, ""); - } - id = MapUtils.get(shortcuts, id, id); - - // classPathPrefix, like edu.stanford.nlp.sempre.interactive - if (!Strings.isNullOrEmpty(opts.classPathPrefix) && !id.startsWith(".") && !id.startsWith(opts.classPathPrefix)) { - id = opts.classPathPrefix + "." + id; - } - - if (id.startsWith(".")) // Instance method - return invoke(id.substring(1), args.get(0), args.subList(1, args.size()).toArray(new Object[0])); - - else // Static method - return invoke(id, null, args.toArray(new Object[0])); - } - - // Just pass it through... - return formula; - } - - // Convert the Object back to a Value - private static Value toValue(Object obj) { - if (obj instanceof Value) return (Value) obj; - if (obj instanceof Boolean) return new BooleanValue((Boolean) obj); - if (obj instanceof Integer) return new NumberValue(((Integer) obj).intValue()); - if (obj instanceof Double) return new NumberValue(((Double) obj).doubleValue()); - if (obj instanceof String) return new StringValue((String) obj); - if (obj instanceof List) { - List list = Lists.newArrayList(); - for (Object elem : (List) obj) - list.add(toValue(elem)); - return new ListValue(list); - } - throw new RuntimeException("Unhandled object: " + obj + " with class " + obj.getClass()); - } - - // Convert a Value (which are specified in the formulas) to an Object (which - // many Java functions take). - private static Object toObject(Value value) { - if (value instanceof NumberValue && opts.convertNumberValues) { - // Unfortunately, NumberValues don't make a distinction between ints and - // doubles, so this is a hack. - double x = ((NumberValue) value).value; - if (x == (int) x) - return new Integer((int) x); - return new Double(x); - } else if (value instanceof NameValue && opts.convertNameValues) { - String id = ((NameValue) value).id; - return id; - } else if (value instanceof BooleanValue) { - return ((BooleanValue) value).value; - } else if (value instanceof StringValue) { - return ((StringValue) value).value; - } else if (value instanceof ListValue) { - List list = Lists.newArrayList(); - for (Value elem : ((ListValue) value).values) - list.add(toObject(elem)); - return list; - } else { - return value; // Preserve the Value (which can be an object) - } - } - - // Example: id = "Math.cos" - private Object invoke(String id, Object thisObj, Object[] args) { - Method[] methods; - Class cls; - String methodName; - boolean isStatic = thisObj == null; - - if (isStatic) { // Static methods - int i = id.lastIndexOf('.'); - if (i == -1) { - throw new RuntimeException("Expected ., but got: " + id); - } - String className = id.substring(0, i); - methodName = id.substring(i + 1); - - try { - cls = Class.forName(className); - } catch (ClassNotFoundException e) { - throw new RuntimeException(e); - } - methods = cls.getMethods(); - } else { // Instance methods - cls = thisObj.getClass(); - methodName = id; - methods = cls.getMethods(); - } - - // Find a suitable method - List nameMatches = Lists.newArrayList(); - Method bestMethod = null; - int bestCost = INVALID_TYPE_COST; - for (Method m : methods) { - if (!m.getName().equals(methodName)) continue; - m.setAccessible(true); - nameMatches.add(m); - if (isStatic != Modifier.isStatic(m.getModifiers())) continue; - int cost = typeCastCost(m.getParameterTypes(), args); - if (cost < bestCost) { - bestCost = cost; - bestMethod = m; - } - } - - if (bestMethod != null) { - try { - return bestMethod.invoke(thisObj, args); - } catch (InvocationTargetException e) { - throw new RuntimeException(e.getCause()); - } catch (IllegalAccessException e) { - throw new RuntimeException(e); - } - } - List types = Lists.newArrayList(); - for (Object arg : args) - types.add(arg.getClass().toString()); - throw new RuntimeException("Method " + methodName + " not found in class " + cls + " with arguments " + Arrays.asList(args) + " having types " + types + "; candidates: " + nameMatches); - } - - private int typeCastCost(Class[] types, Object[] args) { - if (types.length != args.length) return INVALID_TYPE_COST; - int cost = 0; - for (int i = 0; i < types.length; i++) { - cost += typeCastCost(types[i], args[i]); - if (cost >= INVALID_TYPE_COST) { - // LogInfo.dbgs("NOT COMPATIBLE: want %s, got %s with type %s", types[i], args[i], args[i].getClass()); - break; - } - } - return cost; - } - - // Return whether the object |arg| is compatible with |type|. - // 0: perfect match - // 1: don't match, but don't lose anything - // 2: don't match, and can lose something - // INVALID_TYPE_COST: impossible - private int typeCastCost(Class type, Object arg) { - if (arg == null) return !type.isPrimitive() ? 0 : INVALID_TYPE_COST; - if (type.isInstance(arg)) return 0; - if (type == Boolean.TYPE) return arg instanceof Boolean ? 0 : INVALID_TYPE_COST; - else if (type == Integer.TYPE) { - if (arg instanceof Integer) return 0; - if (arg instanceof Long) return 1; - return INVALID_TYPE_COST; - } - if (type == Long.TYPE) { - if (arg instanceof Integer) return 1; - if (arg instanceof Long) return 0; - return INVALID_TYPE_COST; - } - if (type == Float.TYPE) { - if (arg instanceof Integer) return 1; - if (arg instanceof Long) return 1; - if (arg instanceof Float) return 0; - if (arg instanceof Double) return 2; - return INVALID_TYPE_COST; - } - if (type == Double.TYPE) { - if (arg instanceof Integer) return 1; - if (arg instanceof Long) return 1; - if (arg instanceof Float) return 1; - if (arg instanceof Double) return 0; - return INVALID_TYPE_COST; - } - return INVALID_TYPE_COST; - } - - private static final int INVALID_TYPE_COST = 1000; -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/JoinFn.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/JoinFn.java deleted file mode 100644 index da53140660..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/JoinFn.java +++ /dev/null @@ -1,273 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.LispTree; -import fig.basic.LogInfo; -import fig.basic.Option; - - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -/** - * Input: two children (a binary and a unary in some order). - * JoinFn really serve two roles: - * 1. binary is just a relation (e.g., !fb:people.person.place_of_birth), in - * which case we do a join-project. - * 2. binary is a lambda calculus expression (e.g., (lambda x (count (var - * x)))), in which case we do (macro) function application. - * - * @author Percy Liang - */ -public class JoinFn extends SemanticFn { - public static class Options { - @Option(gloss = "Verbose") public int verbose = 0; - @Option public boolean showTypeCheckFailures = false; - @Option public boolean typeInference = true; - // TODO(joberant): this flag is for backward compatibility. If we don't - // need it for the new results, get rid of it. - @Option public boolean specializedTypeCheck = false; - } - - public static Options opts = new Options(); - - // The arguments to JoinFn can be either: - // - binary unary - // - unary binary - private boolean unaryFirst = false; - - // A binary has two arguments, arg0 and arg1. - // 1. arg0 = fb:en.barack_obama, binary = fb:people.person.place_of_birth, arg1 = fb:en.honolulu - // From a relation point viewpoint, arg0 is the subject and arg1 is the object. - // 2. arg0 = fb:en.barack_obama, binary = (lambda x (fb:people.person.place_of_birth (var x))), arg1 = fb:en.honolulu - // From a function application viewpoint, arg1 is the argument and arg0 is the return type. - // The unary can be placed into arg0 of the binary or arg1 of the binary. - // When we write a join (binary unary), unary goes into arg1, so if we want the unary to go into arg0, we need to reverse the binary. - private boolean unaryCanBeArg0 = false; - private boolean unaryCanBeArg1 = false; - - // If we want to do a betaReduction rather than creating a JoinFormula. - private boolean betaReduce = false; - - /** - * There are four different ways binaries and unaries can be combined: - * - where was unary[Obama] binary[born]? (unary,binary unaryCanBeArg0) - * - unary[Spanish] binary[speaking] countries (unary,binary unaryCanBeArg1) - * - binary[parents] of unary[Obama] (binary,unary unaryCanBeArg0) - * - has binary[parents] unary[Obama] (binary,unary unaryCanBeArg1) - */ - - // Optionally specify the first of the two arguments to the JoinFn, - // in which case, this function should only be called on one argument. - // Note: this is confusing - arg0 here refers to the arguments to JoinFn, not - // the arg0 and arg1 of the binary. - private ConstantFn arg0Fn = null; - public ConstantFn getArg0Fn() { return arg0Fn; } - - public void init(LispTree tree) { - super.init(tree); - for (int j = 1; j < tree.children.size(); j++) { - String arg = tree.child(j).value; - if (tree.child(j).isLeaf()) { - switch (arg) { - case "binary,unary": - unaryFirst = false; - break; - case "unary,binary": - unaryFirst = true; - break; - case "unaryCanBeArg0": - unaryCanBeArg0 = true; - break; - case "unaryCanBeArg1": - unaryCanBeArg1 = true; - break; - case "forward": - unaryFirst = false; - unaryCanBeArg1 = true; - break; - case "backward": - unaryFirst = true; - unaryCanBeArg1 = true; - break; - case "betaReduce": - betaReduce = true; - break; - default: - throw new RuntimeException("Invalid argument: " + arg); - } - } else { - if ("arg0".equals(tree.child(j).child(0).value)) { - arg0Fn = new ConstantFn(); - arg0Fn.init(tree.child(j)); - } else { - throw new RuntimeException("Invalid argument: " + tree.child(j)); - } - } - } - - if (!unaryCanBeArg0 && !unaryCanBeArg1) - throw new RuntimeException("At least one of unaryCanBeArg0 and unaryCanBeArg1 must be set"); - } - - public DerivationStream call(Example ex, Callable c) { - return new LazyJoinFnDerivs(ex, c); - } - - public class LazyJoinFnDerivs extends MultipleDerivationStream { - private int currIndex = 0; - private List derivations = new ArrayList<>(); - private Example ex; - private Callable callable; - Derivation unaryDeriv, binaryDeriv; - - public LazyJoinFnDerivs(Example ex, Callable c) { - this.ex = ex; - this.callable = c; - Derivation child0, child1; - // TODO(pliang): we can actually push most of this logic into createDerivation() - // don't need to get the exact size - - if (arg0Fn != null) { - if (c.getChildren().size() != 1) - throw new RuntimeException("Expected one argument (already have " + arg0Fn + "), but got args: " + c.getChildren()); - // This is just a virtual child which is not a derivation. - DerivationStream ld = arg0Fn.call(ex, CallInfo.NULL_INFO); - child0 = ld.next(); - child1 = c.child(0); - } else { - if (c.getChildren().size() != 2) - throw new RuntimeException("Expected two arguments, but got: " + c.getChildren()); - child0 = c.child(0); - child1 = c.child(1); - } - - if (unaryFirst) { - unaryDeriv = child0; - binaryDeriv = child1; - } else { - binaryDeriv = child0; - unaryDeriv = child1; - } - } - - @Override - public int estimatedSize() { - return 2; // This is an upper bound - } - - @Override - public Derivation createDerivation() { - if (currIndex == 0) - doJoins(binaryDeriv, unaryDeriv); - if (currIndex == derivations.size()) - return null; - return derivations.get(currIndex++); - } - - SemType specializedTypeCheck(SemType binaryType, SemType unaryType) { - // Ugly special case for Free917/WebQuestions: when |that| is a - // UnionSemType corresponding to an entity (e.g., - // fb:en.the_washington_post) and we are joining with a relation (e.g., - // fb:cvg.game_version.publisher), then we end up calling this function - // with: - // - that: (union fb:business.employer ...) - // - argType: fb:cvg.cvg_publisher - // The meet here is fb:cvg.cvg_publisher, but we actually want to return bottom (to be more stringent). - SemType argType = binaryType.getArgType(); - if (unaryType instanceof TopSemType) // Happens when we don't know the type of the unary - return SemType.bottomType; - if (unaryType instanceof AtomicSemType) // Make things uniform - unaryType = new UnionSemType(unaryType); - if (unaryType instanceof UnionSemType && argType instanceof AtomicSemType) { - for (SemType t : ((UnionSemType) unaryType).baseTypes) - if (t instanceof AtomicSemType && - SemTypeHierarchy.singleton.getSupertypes(((AtomicSemType) t).name).contains(((AtomicSemType) argType).name)) - return binaryType.getRetType(); - return SemType.bottomType; - } - return binaryType.apply(unaryType); // Default - } - - // Return null if unable to join. - private Derivation doJoin(Derivation binaryDeriv, Formula binaryFormula, SemType binaryType, - Derivation unaryDeriv, Formula unaryFormula, SemType unaryType, - String featureDesc) { - // Do a coarse type check. - SemType type = opts.specializedTypeCheck ? specializedTypeCheck(binaryType, unaryType) : binaryType.apply(unaryType); - if (!type.isValid()) { - if (opts.showTypeCheckFailures) - LogInfo.warnings("JoinFn: type check failed: [%s : %s] JOIN [%s : %s]", - binaryFormula, binaryType, - unaryFormula, unaryType); - return null; - } - - Formula f; - if (betaReduce) { - if (!(binaryFormula instanceof LambdaFormula)) - throw new RuntimeException("Expected LambdaFormula as the binary, but got: " + binaryFormula + ", unary is " + unaryFormula); - f = Formulas.lambdaApply((LambdaFormula) binaryFormula, unaryFormula); - } else { - f = new JoinFormula(binaryFormula, unaryFormula); - } - - // Do full type inference. - if (opts.typeInference) { - SemType fullType = TypeInference.inferType(f); - if (opts.verbose >= 2) - LogInfo.logs("JoinFn.typeInference: %s => %s [coarse type = %s]", f, fullType, type); - if (!fullType.isValid()) return null; // Rule out logical form - type = fullType; // Use the more specific type - } - - if (opts.verbose >= 3) { - LogInfo.logs( - "JoinFn: binary: %s [%s], unary: %s [%s], result: %s [%s]", - binaryFormula, binaryType, unaryFormula, unaryType, f, type); - } - - // Add features - FeatureVector features = new FeatureVector(); - if (FeatureExtractor.containsDomain("joinPos") && featureDesc != null) - features.add("joinPos", featureDesc); - - // FbFormulasInfo.touchBinaryFormula(binaryFormula); - Derivation newDeriv = new Derivation.Builder() - .withCallable(callable) - .formula(f) - .type(type) - .localFeatureVector(features) - .createDerivation(); - - if (SemanticFn.opts.trackLocalChoices) { - newDeriv.addLocalChoice( - "JoinFn " + - (binaryDeriv.start == -1 ? "-" : binaryDeriv.startEndString(ex.getTokens())) + " " + binaryDeriv.formula + " AND " + - (unaryDeriv.start == -1 ? "-" : unaryDeriv.startEndString(ex.getTokens())) + " " + unaryDeriv.formula); - } - - return newDeriv; - } - - private void doJoins(Derivation binaryDeriv, Derivation unaryDeriv) { - String binaryPos = ex.languageInfo.getCanonicalPos(binaryDeriv.start); - String unaryPos = ex.languageInfo.getCanonicalPos(unaryDeriv.start); - if (unaryCanBeArg0) { - Derivation deriv = doJoin( - binaryDeriv, Formulas.reverseFormula(binaryDeriv.formula), binaryDeriv.type.reverse(), - unaryDeriv, unaryDeriv.formula, unaryDeriv.type, - "binary=" + binaryPos + ",unary=" + unaryPos + "_reverse"); - if (deriv != null) derivations.add(deriv); - } - if (unaryCanBeArg1) { - Derivation deriv = doJoin( - binaryDeriv, binaryDeriv.formula, binaryDeriv.type, - unaryDeriv, unaryDeriv.formula, unaryDeriv.type, - "binary=" + binaryPos + ",unary=" + unaryPos); - if (deriv != null) derivations.add(deriv); - } - Collections.sort(derivations, Derivation.derivScoreComparator); - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/JoinFormula.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/JoinFormula.java deleted file mode 100644 index a219a58b96..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/JoinFormula.java +++ /dev/null @@ -1,76 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.google.common.base.Function; -import fig.basic.LispTree; - -import java.util.List; - -/** - * A join formula represents a database join and has the following form: - * (relation child) - * If |relation| is a ValueFormula, then the formula is equivalent - * to the following in lambda calculus: - * (lambda x (exists y (and (relation y x) (child y)))) - * If |relation| is a LambdaFormula, then (relation child) is just applying the - * lambda expression |relation| to the argument |child|. - * - * @author Percy Liang - */ -public class JoinFormula extends Formula { - public final Formula relation; - public final Formula child; - - public JoinFormula(String relation, Formula child) { - this(Formulas.newNameFormula(relation), child); - } - - public JoinFormula(Formula relation, Formula child) { - this.relation = relation; - this.child = child; - } - - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild(relation.toLispTree()); - tree.addChild(child.toLispTree()); - return tree; - } - - @Override - public void forEach(Function func) { - if (!func.apply(this)) { relation.forEach(func); child.forEach(func); } - } - - @Override - public Formula map(Function func) { - Formula result = func.apply(this); - return result == null ? new JoinFormula(relation.map(func), child.map(func)) : result; - } - - @Override - public List mapToList(Function> func, boolean alwaysRecurse) { - List res = func.apply(this); - if (res.isEmpty() || alwaysRecurse) { - res.addAll(relation.mapToList(func, alwaysRecurse)); - res.addAll(child.mapToList(func, alwaysRecurse)); - } - return res; - } - - @SuppressWarnings({"equalshashcode"}) - @Override - public boolean equals(Object thatObj) { - if (!(thatObj instanceof JoinFormula)) return false; - JoinFormula that = (JoinFormula) thatObj; - if (!this.relation.equals(that.relation)) return false; - if (!this.child.equals(that.child)) return false; - return true; - } - - public int computeHashCode() { - int hash = 0x7ed55d16; - hash = hash * 0xd3a2646c + relation.hashCode(); - hash = hash * 0xd3a2646c + child.hashCode(); - return hash; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Json.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Json.java deleted file mode 100644 index e61dd32c32..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Json.java +++ /dev/null @@ -1,215 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.fasterxml.jackson.annotation.JsonAutoDetect; -import com.fasterxml.jackson.annotation.PropertyAccessor; -import com.fasterxml.jackson.core.JsonGenerationException; -import com.fasterxml.jackson.core.JsonGenerator; -import com.fasterxml.jackson.core.JsonParseException; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.JsonMappingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.ObjectReader; -import com.fasterxml.jackson.databind.ObjectWriter; - -import java.io.*; -import java.util.Map; - -/** - * Simple wrappers and sane defaults for Jackson. - * - * @author Roy Frostig - */ -public final class Json { - private Json() { } - - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - static { - OBJECT_MAPPER.setVisibility(PropertyAccessor.ALL, JsonAutoDetect.Visibility.NONE); - } - - public static ObjectMapper getMapper() { - return OBJECT_MAPPER; - } - - private static ObjectWriter getWriter(Class view) { - if (view != null) - return getMapper().writerWithView(view); - else - return getMapper().writer(); - } - - private static ObjectReader getReader(Class view) { - if (view != null) - return getMapper().readerWithView(view); - else - return getMapper().reader(); - } - - // TODO (rf): - // - readValueHard from InputStream, Reader, JsonParser, and File - // (all forwards) - - public static T readValueHard(String json, Class klass) { - return readValueHard(json, klass, Object.class); - } - public static T readValueHard(String json, Class klass, Class view) { - try { - return getReader(view).withType(klass).readValue(json); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - public static T readValueHard(String json, TypeReference typeRef) { - return readValueHard(json, typeRef, Object.class); - } - public static T readValueHard(String json, TypeReference typeRef, Class view) { - try { - return getReader(view).withType(typeRef).readValue(json); - } catch (JsonMappingException e) { - throw new RuntimeException(e); - } catch (JsonParseException e) { - throw new RuntimeException(e); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - public static T readValueHard(Reader r, Class klass) { - return readValueHard(r, klass, Object.class); - } - public static T readValueHard(Reader r, Class klass, Class view) { - try { - return getReader(view).withType(klass).readValue(r); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - public static T readValueHard(Reader r, TypeReference typeRef) { - return readValueHard(r, typeRef, Object.class); - } - public static T readValueHard(Reader r, TypeReference typeRef, Class view) { - try { - return getReader(view).withType(typeRef).readValue(r); - } catch (JsonMappingException e) { - throw new RuntimeException(e); - } catch (JsonParseException e) { - throw new RuntimeException(e); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - public static Map readMapHard(String json) { - return readMapHard(json, Object.class); - } - public static Map readMapHard(String json, Class view) { - try { - return getReader(view).withType(Map.class).readValue(json); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - public static String prettyWriteValueAsStringHard(Object o) { - try { - return getMapper().writerWithDefaultPrettyPrinter().writeValueAsString(o); - } catch (JsonProcessingException e) { - throw new RuntimeException(e); - } - } - public static String writeValueAsStringHard(Object o) { - return writeValueAsStringHard(o, Object.class); - } - public static String writeValueAsStringHard(Object o, Class view) { - try { - return getWriter(view).writeValueAsString(o); - } catch (JsonProcessingException e) { - throw new RuntimeException(e); - } - } - - public static byte[] writeValueAsBytesHard(Object o) { - return writeValueAsBytesHard(o, Object.class); - } - public static byte[] writeValueAsBytesHard(Object o, Class view) { - try { - return getWriter(view).writeValueAsBytes(o); - } catch (JsonProcessingException e) { - throw new RuntimeException(e); - } - } - - public static void prettyWriteValueHard(File f, Object o) { - try { - getMapper().writerWithDefaultPrettyPrinter().writeValue(f, o); - } catch (JsonMappingException e) { - e.printStackTrace(); - } catch (JsonGenerationException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } - } - public static void writeValueHard(File f, Object o) { - writeValueHard(f, o, Object.class); - } - public static void writeValueHard(File f, Object o, Class view) { - try { - getWriter(view).writeValue(f, o); - } catch (JsonMappingException e) { - e.printStackTrace(); - } catch (JsonGenerationException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); - } - } - - public static void writeValueHard(OutputStream out, Object o) { - writeValueHard(out, o, Object.class); - } - public static void writeValueHard(OutputStream out, Object o, Class view) { - try { - getWriter(view).writeValue(out, o); - } catch (JsonMappingException e) { - throw new RuntimeException(e); - } catch (JsonGenerationException e) { - throw new RuntimeException(e); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - public static void writeValueHard(JsonGenerator jg, Object o) { - writeValueHard(jg, o, Object.class); - } - public static void writeValueHard(JsonGenerator jg, Object o, Class view) { - try { - getWriter(view).writeValue(jg, o); - } catch (JsonMappingException e) { - throw new RuntimeException(e); - } catch (JsonGenerationException e) { - throw new RuntimeException(e); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - public static void writeValueHard(Writer w, Object o) { - writeValueHard(w, o, Object.class); - } - public static void writeValueHard(Writer w, Object o, Class view) { - try { - getWriter(view).writeValue(w, o); - } catch (JsonMappingException e) { - throw new RuntimeException(e); - } catch (JsonGenerationException e) { - throw new RuntimeException(e); - } catch (IOException e) { - throw new RuntimeException(e); - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/KnowledgeGraph.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/KnowledgeGraph.java deleted file mode 100644 index 81e8b1d25c..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/KnowledgeGraph.java +++ /dev/null @@ -1,81 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.lang.reflect.InvocationTargetException; -import java.util.*; - -import fig.basic.*; - - -/** - * Represents a small knowledge graph (much smaller than Freebase). - * - * A KnowledgeGraph can be created from either - * - a list of triples, or - * - other data format (e.g., web tables in CSV format) - * - * @author ppasupat - */ -public abstract class KnowledgeGraph { - - public static KnowledgeGraph fromLispTree(LispTree tree) { - if ("graph".equals(tree.child(0).value)) { - if (tree.children.size() > 1 && tree.child(1).isLeaf()) { - // Use a specific subclass of KnowledgeGraph - try { - String className = tree.child(1).value; - Class classObject = Class.forName(SempreUtils.resolveClassName(className)); - return (KnowledgeGraph) classObject.getDeclaredMethod("fromLispTree", LispTree.class).invoke(null, tree); - } catch (InvocationTargetException e) { - e.getCause().printStackTrace(); - LogInfo.fail(e.getCause()); - throw new RuntimeException(e); - } catch (IllegalAccessException | IllegalArgumentException | - NoSuchMethodException | SecurityException | ClassNotFoundException e) { - e.printStackTrace(); - throw new RuntimeException(e); - } - } else { - // (graph (a1 r1 b1) (a2 r2 b2) ...) -- explicit triples - return NaiveKnowledgeGraph.fromLispTree(tree); - } - } else { - throw new RuntimeException("Cannot convert " + tree + " to KnowledgeGraph."); - } - } - - // ============================================================ - // Helper methods - // ============================================================ - - /** Reverse the pairs */ - public static List> getReversedPairs(Collection> pairs) { - List> reversed = new ArrayList<>(); - for (Pair pair : pairs) - reversed.add(new Pair<>(pair.getSecond(), pair.getFirst())); - return reversed; - } - - // ============================================================ - // Abstract methods - // ============================================================ - - public abstract LispTree toLispTree(); - public abstract LispTree toShortLispTree(); - @Override public String toString() { return toLispTree().toString(); } - - /** Return all y such that x in firsts and (x,r,y) in graph */ - public abstract List joinFirst(Value r, Collection firsts); - - /** Return all x such that y in seconds and (x,r,y) in graph */ - public abstract List joinSecond(Value r, Collection seconds); - - /** Return all (x,y) such that x in firsts and (x,r,y) in graph */ - public abstract List> filterFirst(Value r, Collection firsts); - - /** Return all (x,y) such that y in seconds and (x,r,y) in graph */ - public abstract List> filterSecond(Value r, Collection seconds); - - /** Clean up cached data. Default to doing nothing. */ - public void clean() { } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/LambdaFormula.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/LambdaFormula.java deleted file mode 100644 index bdc15046d8..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/LambdaFormula.java +++ /dev/null @@ -1,62 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.google.common.base.Function; -import fig.basic.LispTree; - -import java.util.List; - -/** - * Lambda abstraction (lambda |var| |body|) - * Percy Liang - */ -public class LambdaFormula extends Formula { - public final String var; - public final Formula body; - - public LambdaFormula(String var, Formula body) { - this.var = var; - this.body = body; - } - - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("lambda"); - tree.addChild(var); - tree.addChild(body.toLispTree()); - return tree; - } - - @Override - public void forEach(Function func) { - if (!func.apply(this)) body.forEach(func); - } - - @Override - public Formula map(Function func) { - Formula result = func.apply(this); - return result == null ? new LambdaFormula(var, body.map(func)) : result; - } - - @Override - public List mapToList(Function> func, boolean alwaysRecurse) { - List res = func.apply(this); - if (res.isEmpty() || alwaysRecurse) - res.addAll(body.mapToList(func, alwaysRecurse)); - return res; - } - - @SuppressWarnings({"equalshashcode"}) - @Override - public boolean equals(Object thatObj) { - if (!(thatObj instanceof LambdaFormula)) return false; - LambdaFormula that = (LambdaFormula) thatObj; - return this.var.equals(that.var) && this.body.equals(that.body); - } - - public int computeHashCode() { - int hash = 0x7ed55d16; - hash = hash * 0xd3a2646c + var.hashCode(); - hash = hash * 0xd3a2646c + body.hashCode(); - return hash; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/LanguageAnalyzer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/LanguageAnalyzer.java deleted file mode 100644 index e2170a4555..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/LanguageAnalyzer.java +++ /dev/null @@ -1,31 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.*; - -/** - * LanguageAnalyzer takes an utterance and applies various NLP pre-processing steps to - * to output a LanguageInfo object - * - * @author Alex Ratner - */ -public abstract class LanguageAnalyzer { - public static class Options { - @Option public String languageAnalyzer = "SimpleAnalyzer"; - - @Option(gloss = "Whether to convert tokens in the utterance to lowercase") - public boolean lowerCaseTokens = true; - } - public static Options opts = new Options(); - - // We keep a singleton LanguageAnalyzer because for any given run we - // generally will be working with one. - private static LanguageAnalyzer singleton; - public static LanguageAnalyzer getSingleton() { - if (singleton == null) - singleton = (LanguageAnalyzer) Utils.newInstanceHard(SempreUtils.resolveClassName(opts.languageAnalyzer)); - return singleton; - } - public static void setSingleton(LanguageAnalyzer analyzer) { singleton = analyzer; } - - public abstract LanguageInfo analyze(String utterance); -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/LanguageInfo.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/LanguageInfo.java deleted file mode 100644 index 1bece1d49b..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/LanguageInfo.java +++ /dev/null @@ -1,449 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.Joiner; -import fig.basic.IntPair; -import fig.basic.LispTree; -import fig.basic.MemUsage; - -import java.util.*; - -/** - * Represents an linguistic analysis of a sentence (provided by some LanguageAnalyzer). - * - * @author akchou - */ -@JsonIgnoreProperties(ignoreUnknown = true) -@JsonInclude(JsonInclude.Include.NON_NULL) -public class LanguageInfo implements MemUsage.Instrumented { - - // Tokenization of input. - @JsonProperty - public final List tokens; - @JsonProperty - public final List lemmaTokens; // Lemmatized version - - // Syntactic information from JavaNLP. - @JsonProperty - public final List posTags; // POS tags - @JsonProperty - public final List nerTags; // NER tags - @JsonProperty - public final List nerValues; // NER values (contains times, dates, etc.) - - private Map lemmaSpans; - private Set lowercasedSpans; - - - - public static class DependencyEdge { - @JsonProperty - public final String label; // Dependency label - @JsonProperty - public final int modifier; // Position of modifier - - @JsonCreator - public DependencyEdge(@JsonProperty("label") String label, @JsonProperty("modifier") int modifier) { - this.label = label; - this.modifier = modifier; - } - - @Override - public String toString() { - return label + "->" + modifier; - } - } - - @JsonProperty - // Dependencies of each token, represented as a (relation, parentIndex) pair - public final List> dependencyChildren; - - public LanguageInfo() { - this(new ArrayList(), - new ArrayList(), - new ArrayList(), - new ArrayList(), - new ArrayList(), - new ArrayList>()); - } - - @JsonCreator - public LanguageInfo(@JsonProperty("tokens") List tokens, - @JsonProperty("lemmaTokens") List lemmaTokens, - @JsonProperty("posTags") List posTags, - @JsonProperty("nerTags") List nerTags, - @JsonProperty("nerValues") List nerValues, - @JsonProperty("dependencyChildren") List> dependencyChildren) { - this.tokens = tokens; - this.lemmaTokens = lemmaTokens; - this.posTags = posTags; - this.nerTags = nerTags; - this.nerValues = nerValues; - this.dependencyChildren = dependencyChildren; - } - - // Return a string representing the tokens between start and end. - public String phrase(int start, int end) { - return sliceSequence(tokens, start, end); - } - public String lemmaPhrase(int start, int end) { - return sliceSequence(lemmaTokens, start, end); - } - public String posSeq(int start, int end) { - return sliceSequence(posTags, start, end); - } - - public String canonicalPosSeq(int start, int end) { - if (start >= end) throw new RuntimeException("Bad indices, start=" + start + ", end=" + end); - if (end - start == 1) return LanguageUtils.getCanonicalPos(posTags.get(start)); - StringBuilder out = new StringBuilder(); - for (int i = start; i < end; i++) { - if (out.length() > 0) out.append(' '); - out.append(LanguageUtils.getCanonicalPos(posTags.get(i))); - } - return out.toString(); - } - public String nerSeq(int start, int end) { - return sliceSequence(nerTags, start, end); - } - - private static String sliceSequence(List items, - int start, - int end) { - if (start >= end) throw new RuntimeException("Bad indices, start=" + start + ", end=" + end); - if (end - start == 1) return items.get(start); - StringBuilder out = new StringBuilder(); - for (int i = start; i < end; i++) { - if (out.length() > 0) out.append(' '); - out.append(items.get(i)); - } - return out.toString(); - } - - // If all the tokens in [start, end) have the same nerValues, but not - // start - 1 and end + 1 (in other words, [start, end) is maximal), then return - // the normalizedTag. Example: queryNerTag = "DATE". - public String getNormalizedNerSpan(String queryTag, int start, int end) { - String value = nerValues.get(start); - if (value == null) return null; - if (!queryTag.equals(nerTags.get(start))) return null; - if (start - 1 >= 0 && value.equals(nerValues.get(start - 1))) return null; - if (end < nerValues.size() && value.equals(nerValues.get(end))) return null; - for (int i = start + 1; i < end; i++) - if (!value.equals(nerValues.get(i))) return null; - value = omitComparative(value); - return value; - } - - private String omitComparative(String value) { - if (value.startsWith("<=") || value.startsWith(">=")) - return value.substring(2); - if (value.startsWith("<") || value.startsWith(">")) - return value.substring(1); - return value; - } - - public String getCanonicalPos(int index) { - if (index == -1) return "OUT"; - return LanguageUtils.getCanonicalPos(posTags.get(index)); - } - - public boolean equalTokens(LanguageInfo other) { - if (tokens.size() != other.tokens.size()) - return false; - for (int i = 0; i < tokens.size(); ++i) { - if (!tokens.get(i).equals(other.tokens.get(i))) - return false; - } - return true; - } - - public boolean equalLemmas(LanguageInfo other) { - if (lemmaTokens.size() != other.lemmaTokens.size()) - return false; - for (int i = 0; i < tokens.size(); ++i) { - if (!lemmaTokens.get(i).equals(other.lemmaTokens.get(i))) - return false; - } - return true; - } - - public int numTokens() { - return tokens.size(); - } - - public LanguageInfo remove(int startIndex, int endIndex) { - - if (startIndex > endIndex || startIndex < 0 || endIndex > numTokens()) - throw new RuntimeException("Illegal start or end index, start: " + startIndex + ", end: " + endIndex + ", info size: " + numTokens()); - - LanguageInfo res = new LanguageInfo(); - for (int i = 0; i < numTokens(); ++i) { - if (i < startIndex || i >= endIndex) { - res.tokens.add(this.tokens.get(i)); - res.lemmaTokens.add(this.lemmaTokens.get(i)); - res.nerTags.add(this.nerTags.get(i)); - res.nerValues.add(this.nerValues.get(i)); - res.posTags.add(this.posTags.get(i)); - } - } - return res; - } - - public void addSpan(LanguageInfo other, int start, int end) { - for (int i = start; i < end; ++i) { - this.tokens.add(other.tokens.get(i)); - this.lemmaTokens.add(other.lemmaTokens.get(i)); - this.posTags.add(other.posTags.get(i)); - this.nerTags.add(other.nerTags.get(i)); - this.nerValues.add(other.nerValues.get(i)); - } - } - - public List getSpanProperties(int start, int end) { - List res = new ArrayList(); - res.add("lemmas=" + lemmaPhrase(start, end)); - res.add("pos=" + posSeq(start, end)); - res.add("ner=" + nerSeq(start, end)); - return res; - } - - public void addWordInfo(WordInfo wordInfo) { - this.tokens.add(wordInfo.token); - this.lemmaTokens.add(wordInfo.lemma); - this.posTags.add(wordInfo.pos); - this.nerTags.add(wordInfo.nerTag); - this.nerValues.add(wordInfo.nerValue); - } - - public void addWordInfos(List wordInfos) { - for (WordInfo wInfo : wordInfos) - addWordInfo(wInfo); - } - - public WordInfo getWordInfo(int i) { - return new WordInfo(tokens.get(i), lemmaTokens.get(i), posTags.get(i), nerTags.get(i), nerValues.get(i)); - } - - /** - * returns spans of named entities - * @return - */ - public Set getNamedEntitySpans() { - Set res = new LinkedHashSet(); - int start = -1; - String prevTag = "O"; - - for (int i = 0; i < nerTags.size(); ++i) { - String currTag = nerTags.get(i); - if (currTag.equals("O")) { - if (!prevTag.equals("O")) { - res.add(new IntPair(start, i)); - start = -1; - } - } else { // currNe is not "O" - if (!currTag.equals(prevTag)) { - if (!prevTag.equals("O")) { - res.add(new IntPair(start, i)); - } - start = i; - } - } - prevTag = currTag; - } - if (start != -1) - res.add(new IntPair(start, nerTags.size())); - return res; - } - - /** - * returns spans of named entities - * @return - */ - public Set getProperNounSpans() { - Set res = new LinkedHashSet(); - int start = -1; - String prevTag = "O"; - - for (int i = 0; i < posTags.size(); ++i) { - String currTag = posTags.get(i); - if (LanguageUtils.isProperNoun(currTag)) { - if (!LanguageUtils.isProperNoun(prevTag)) - start = i; - } else { // curr tag is not proper noun - if (LanguageUtils.isProperNoun(prevTag)) { - res.add(new IntPair(start, i)); - start = -1; - } - } - prevTag = currTag; - } - if (start != -1) - res.add(new IntPair(start, posTags.size())); - return res; - } - - public Set getNamedEntitiesAndProperNouns() { - Set res = getNamedEntitySpans(); - res.addAll(getProperNounSpans()); - return res; - } - - public Map getLemmaSpans() { - if (lemmaSpans == null) { - lemmaSpans = new HashMap<>(); - for (int i = 0; i < numTokens() - 1; ++i) { - for (int j = i + 1; j < numTokens(); ++j) - lemmaSpans.put(lemmaPhrase(i, j), new IntPair(i, j)); - } - } - return lemmaSpans; - } - - public Set getLowerCasedSpans() { - if (lowercasedSpans == null) { - lowercasedSpans = new HashSet<>(); - for (int i = 0; i < numTokens() - 1; ++i) { - for (int j = i + 1; j < numTokens(); ++j) - lowercasedSpans.add(phrase(i, j).toLowerCase()); - } - } - return lowercasedSpans; - } - - public boolean matchLemmas(List wordInfos) { - for (int i = 0; i < numTokens(); ++i) { - if (matchLemmasFromIndex(wordInfos, i)) - return true; - } - return false; - } - - private boolean matchLemmasFromIndex(List wordInfos, int start) { - if (start + wordInfos.size() > numTokens()) - return false; - for (int j = 0; j < wordInfos.size(); ++j) { - if (!wordInfos.get(j).lemma.equals(lemmaTokens.get(start + j))) - return false; - } - return true; - } - - /** - * Static methods with langauge utilities - * @author jonathanberant - * - */ - public static class LanguageUtils { - - public static boolean sameProperNounClass(String noun1, String noun2) { - if ((noun1.equals("NNP") || noun1.equals("NNPS")) && - (noun2.equals("NNP") || noun2.equals("NNPS"))) - return true; - return false; - } - - public static boolean isProperNoun(String pos) { - return pos.startsWith("NNP"); - } - - public static boolean isSuperlative(String pos) { return pos.equals("RBS") || pos.equals("JJS"); } - public static boolean isComparative(String pos) { return pos.equals("RBR") || pos.equals("JJR"); } - - - public static boolean isEntity(LanguageInfo info, int i) { - return isProperNoun(info.posTags.get(i)) || !(info.nerTags.get(i).equals("O")); - } - - public static boolean isNN(String pos) { - return pos.startsWith("NN") && !pos.startsWith("NNP"); - } - - public static boolean isContentWord(String pos) { - return (pos.startsWith("N") || pos.startsWith("V") || pos.startsWith("J")); - } - - public static String getLemmaPhrase(List wordInfos) { - String[] res = new String[wordInfos.size()]; - for (int i = 0; i < wordInfos.size(); ++i) { - res[i] = wordInfos.get(i).lemma; - } - return Joiner.on(' ').join(res); - } - - public static String getCanonicalPos(String pos) { - if (pos.startsWith("N")) return "N"; - if (pos.startsWith("V")) return "V"; - if (pos.startsWith("W")) return "W"; - return pos; - } - - // Uses a few rules to stem tokens - public static String stem(String a) { - int i = a.indexOf(' '); - if (i != -1) - return stem(a.substring(0, i)) + ' ' + stem(a.substring(i + 1)); - //Maybe we should just use the Stanford stemmer - String res = a; - //hard coded words - if (a.equals("having") || a.equals("has")) res = "have"; - else if (a.equals("using")) res = "use"; - else if (a.equals("including")) res = "include"; - else if (a.equals("beginning")) res = "begin"; - else if (a.equals("utilizing")) res = "utilize"; - else if (a.equals("featuring")) res = "feature"; - else if (a.equals("preceding")) res = "precede"; - //rules - else if (a.endsWith("ing")) res = a.substring(0, a.length() - 3); - else if (a.endsWith("s") && !a.equals("'s")) res = a.substring(0, a.length() - 1); - //don't return an empty string - if (res.length() > 0) return res; - return a; - } - - } - - @Override - public long getBytes() { - return MemUsage.objectSize(MemUsage.pointerSize * 2) + MemUsage.getBytes(tokens) + MemUsage.getBytes(lemmaTokens) - + MemUsage.getBytes(posTags) + MemUsage.getBytes(nerTags) + MemUsage.getBytes(nerValues) - + MemUsage.getBytes(lemmaSpans); - } - - public boolean isNumberAndDate(int index) { - return posTags.get(index).equals("CD") && nerTags.get(index).equals("DATE"); - } - - public static boolean isContentWord(String pos) { - return pos.equals("NN") || pos.equals("NNS") || - (pos.startsWith("V") && !pos.equals("VBD-AUX")) || - pos.startsWith("J"); - } - - public static class WordInfo { - public final String token; - public final String lemma; - public final String pos; - public final String nerTag; - public final String nerValue; - public WordInfo(String token, String lemma, String pos, String nerTag, String nerValue) { - this.token = token; this.lemma = lemma; this.pos = pos; this.nerTag = nerTag; this.nerValue = nerValue; - } - - public String toString() { - return toLispTree().toString(); - } - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("wordinfo"); - tree.addChild(token); - tree.addChild(lemma); - tree.addChild(pos); - tree.addChild(nerTag); - return tree; - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Learner.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Learner.java deleted file mode 100644 index 5dd0f3f7a6..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Learner.java +++ /dev/null @@ -1,366 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.google.common.base.Joiner; -import com.google.common.collect.Maps; -import fig.basic.*; -import fig.exec.Execution; - -import java.io.PrintWriter; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - * The main learning loop. Goes over a dataset multiple times, calling the - * parser and updating parameters. - * - * @author Percy Liang - */ -public class Learner { - public static class Options { - @Option(gloss = "Number of iterations to train") - public int maxTrainIters = 0; - - @Option(gloss = "When using mini-batch updates for SGD, this is the batch size") - public int batchSize = 1; // Default is SGD - - @Option(gloss = "Write predDerivations to examples file (huge)") - public boolean outputPredDerivations = false; - @Option(gloss = "Write predicted values to a TSV file") - public boolean outputPredValues = false; - - @Option(gloss = "Dump all features and compatibility scores") - public boolean dumpFeaturesAndCompatibility = false; - - @Option(gloss = "Whether to add feedback") - public boolean addFeedback = false; - @Option(gloss = "Whether to sort on feedback") - public boolean sortOnFeedback = true; - - @Option(gloss = "Verbosity") public int verbose = 0; - - @Option(gloss = "Initialize with these parameters") - public List> initialization; - - @Option(gloss = "Whether to update weights") - public boolean updateWeights = true; - @Option(gloss = "Whether to check gradient") - public boolean checkGradient = false; - - @Option(gloss = "Whether to skip the 'train' group in the last iteration and non-'train' groups in other iterations") - public boolean skipUnnecessaryGroups = false; - - @Option(gloss = "Number of threads to parallelize") - public int numParallelThreads = 1; - } - public static Options opts = new Options(); - - private Parser parser; - private final Params params; - private final Dataset dataset; - private final PrintWriter eventsOut; // For printing a machine-readable log - private final List semFuncsToUpdate; - - public Learner(Parser parser, Params params, Dataset dataset) { - this.parser = parser; - this.params = params; - this.dataset = dataset; - this.eventsOut = IOUtils.openOutAppendEasy(Execution.getFile("learner.events")); - if (opts.initialization != null && this.params.isEmpty()) - this.params.init(opts.initialization); - - // Collect all semantic functions to update. - semFuncsToUpdate = new ArrayList<>(); - for (Rule rule : parser.grammar.getRules()) { - SemanticFn currSemFn = rule.getSem(); - boolean toAdd = true; - for (SemanticFn semFuncToUpdate : semFuncsToUpdate) { - if (semFuncToUpdate.getClass().equals(currSemFn.getClass())) { - toAdd = false; - break; - } - } - if (toAdd) - semFuncsToUpdate.add(currSemFn); - } - } - - public void learn() { - learn(opts.maxTrainIters, Maps.newHashMap()); - } - - /** - * @param evaluations Evaluations per iteration per group. - */ - public void learn(int numIters, Map> evaluations) { - LogInfo.begin_track("Learner.learn()"); - // if when we start we have parameters already - need to sort the semantic functions. - if (!params.isEmpty()) - sortOnFeedback(); - // For each iteration, go through the groups and parse (updating if train). - for (int iter = 0; iter <= numIters; iter++) { - - LogInfo.begin_track("Iteration %s/%s", iter, numIters); - Execution.putOutput("iter", iter); - - // Averaged over all iterations - // Group -> evaluation for that group. - Map meanEvaluations = Maps.newHashMap(); - - // Clear - for (String group : dataset.groups()) - meanEvaluations.put(group, new Evaluation()); - - // Test and train - for (String group : dataset.groups()) { - boolean lastIter = (iter == numIters); - boolean updateWeights = opts.updateWeights && group.equals("train") && !lastIter; // Don't train on last iteration - if (opts.skipUnnecessaryGroups) { - if ((group.equals("train") && lastIter) || (!group.equals("train") && !lastIter)) - continue; - } - // Allow the parser to change behavior based on current group and iteration - parser.onBeginDataGroup(iter, numIters, group); - Evaluation eval = processExamples(iter, group, dataset.examples(group), updateWeights); - MapUtils.addToList(evaluations, group, eval); - meanEvaluations.get(group).add(eval); - StopWatchSet.logStats(); - writeParams(iter); - } - LogInfo.end_track(); - } - LogInfo.end_track(); - } - - private void writeParams(int iter) { - String path = Execution.getFile("params." + iter); - if (path != null) { - params.write(path); - Utils.systemHard("ln -sf params." + iter + " " + Execution.getFile("params")); - } - } - - public void onlineLearnExample(Example ex) { - LogInfo.begin_track("onlineLearnExample: %s derivations", ex.predDerivations.size()); - HashMap counts = new HashMap<>(); - for (Derivation deriv : ex.predDerivations) - deriv.compatibility = parser.valueEvaluator.getCompatibility(ex.targetValue, deriv.value); - ParserState.computeExpectedCounts(ex.predDerivations, counts); - params.update(counts); - LogInfo.end_track(); - } - - public void onlineLearnExampleByFormula(Example ex, List formulas) { - HashMap counts = new HashMap<>(); - for (Derivation deriv : ex.predDerivations) - deriv.compatibility = formulas.contains(deriv.formula)? 1 : 0; - ParserState.computeExpectedCounts(ex.predDerivations, counts); - params.update(counts); - } - - private Evaluation processExamples(int iter, String group, - List examples, boolean computeExpectedCounts) { - Evaluation evaluation = new Evaluation(); - - if (examples.size() == 0) - return evaluation; - - final String prefix = "iter=" + iter + "." + group; - - Execution.putOutput("group", group); - LogInfo.begin_track_printAll( - "Processing %s: %s examples", prefix, examples.size()); - LogInfo.begin_track("Examples"); - - if (opts.numParallelThreads > 1) { - // Parallelize! - Parallelizer paral = new Parallelizer<>(opts.numParallelThreads); - LearnerParallelProcessor processor = new LearnerParallelProcessor( - parser, params, prefix, computeExpectedCounts, evaluation); - LogInfo.begin_threads(); - paral.process(examples, processor); - LogInfo.end_threads(); - - } else { - // Original code (single-threaded) - - Map counts = new HashMap<>(); - int batchSize = 0; - for (int e = 0; e < examples.size(); e++) { - - Example ex = examples.get(e); - - LogInfo.begin_track_printAll( - "%s: example %s/%s: %s", prefix, e, examples.size(), ex.id); - ex.log(); - Execution.putOutput("example", e); - - ParserState state = parseExample(params, ex, computeExpectedCounts); - if (computeExpectedCounts) { - if (opts.checkGradient) { - LogInfo.begin_track("Checking gradient"); - checkGradient(ex, state); - LogInfo.end_track(); - } - - SempreUtils.addToDoubleMap(counts, state.expectedCounts); - - batchSize++; - if (batchSize >= opts.batchSize) { - // Gathered enough examples, update parameters - updateWeights(counts); - batchSize = 0; - } - } - - LogInfo.logs("Current: %s", ex.evaluation.summary()); - evaluation.add(ex.evaluation); - LogInfo.logs("Cumulative(%s): %s", prefix, evaluation.summary()); - - printLearnerEventsIter(ex, iter, group); - LogInfo.end_track(); - if (opts.addFeedback && computeExpectedCounts) - addFeedback(ex); - - // Write out examples and predictions - if (opts.outputPredDerivations) { - ExampleUtils.writeParaphraseSDF(iter, group, ex, true); - } - if (opts.outputPredValues) { - ExampleUtils.writePredictionTSV(iter, group, ex); - } - - // To save memory - ex.predDerivations.clear(); - } - - if (computeExpectedCounts && batchSize > 0) - updateWeights(counts); - - } - - params.finalizeWeights(); - if (opts.sortOnFeedback && computeExpectedCounts) - sortOnFeedback(); - - LogInfo.end_track(); - logEvaluationStats(evaluation, prefix); - evaluation.putOutput(prefix.replace('.', '-')); - printLearnerEventsSummary(evaluation, iter, group); - ExampleUtils.writeEvaluationSDF(iter, group, evaluation, examples.size()); - LogInfo.end_track(); - return evaluation; - } - - private void checkGradient(Example ex, ParserState state) { - double eps = 1e-2; - for (String feature : state.expectedCounts.keySet()) { - LogInfo.begin_track("feature=%s", feature); - double computedGradient = state.expectedCounts.get(feature); - Params perturbedParams = this.params.copyParams(); - perturbedParams.getWeights().put(feature, perturbedParams.getWeight(feature) + eps); - ParserState perturbedState = parseExample(perturbedParams, ex, true); - double checkedGradient = (perturbedState.objectiveValue - state.objectiveValue) / eps; - LogInfo.logs("Learner.checkGradient(): weight=%s, pertWeight=%s, obj=%s, pertObj=%s, feature=%s, computed=%s, checked=%s, diff=%s", - params.getWeight(feature), perturbedParams.getWeight(feature), - state.objectiveValue, perturbedState.objectiveValue, - feature, - computedGradient, checkedGradient, Math.abs(checkedGradient - computedGradient)); - LogInfo.end_track(); - } - } - - private void sortOnFeedback() { - for (SemanticFn semFn : semFuncsToUpdate) { - semFn.sortOnFeedback(parser.getSearchParams(params)); - } - } - - private void addFeedback(Example ex) { - for (SemanticFn semFn : semFuncsToUpdate) { - semFn.addFeedback(ex); - } - } - - private ParserState parseExample(Params params, Example ex, boolean computeExpectedCounts) { - StopWatchSet.begin("Parser.parse"); - ParserState res = this.parser.parse(params, ex, computeExpectedCounts); - StopWatchSet.end(); - return res; - } - - private void updateWeights(Map counts) { - StopWatchSet.begin("Learner.updateWeights"); - LogInfo.begin_track("Updating learner weights"); - double sum = 0; - for (double v : counts.values()) sum += v * v; - if (opts.verbose >= 2) - SempreUtils.logMap(counts, "gradient"); - LogInfo.logs("L2 norm: %s", Math.sqrt(sum)); - params.update(counts); - if (opts.verbose >= 2) - params.log(); - counts.clear(); - LogInfo.end_track(); - StopWatchSet.end(); - } - - // Print summary over all examples - private void logEvaluationStats(Evaluation evaluation, String prefix) { - LogInfo.logs("Stats for %s: %s", prefix, evaluation.summary()); - // evaluation.add(LexiconFn.lexEval); - evaluation.logStats(prefix); - evaluation.putOutput(prefix); - evaluation.putOutput(prefix.replaceAll("iter=", "").replace('.', '_')); - } - - private void printLearnerEventsIter(Example ex, int iter, String group) { - if (eventsOut == null) - return; - List fields = new ArrayList<>(); - fields.add("iter=" + iter); - fields.add("group=" + group); - fields.add("utterance=" + ex.utterance); - fields.add("targetValue=" + ex.targetValue); - if (ex.predDerivations.size() > 0) { - Derivation deriv = ex.predDerivations.get(0); - fields.add("predValue=" + deriv.value); - fields.add("predFormula=" + deriv.formula); - } - fields.add(ex.evaluation.summary("\t")); - eventsOut.println(Joiner.on('\t').join(fields)); - eventsOut.flush(); - - // Print out features and the compatibility across all the derivations - if (opts.dumpFeaturesAndCompatibility) { - for (Derivation deriv : ex.predDerivations) { - fields = new ArrayList<>(); - fields.add("iter=" + iter); - fields.add("group=" + group); - fields.add("utterance=" + ex.utterance); - Map features = new HashMap<>(); - deriv.incrementAllFeatureVector(1, features); - for (String f : features.keySet()) { - double v = features.get(f); - fields.add(f + "=" + v); - } - fields.add("comp=" + deriv.compatibility); - eventsOut.println(Joiner.on('\t').join(fields)); - } - } - } - - private void printLearnerEventsSummary(Evaluation evaluation, - int iter, - String group) { - if (eventsOut == null) - return; - List fields = new ArrayList<>(); - fields.add("iter=" + iter); - fields.add("group=" + group); - fields.add(evaluation.summary("\t")); - eventsOut.println(Joiner.on('\t').join(fields)); - eventsOut.flush(); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/LearnerParallelProcessor.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/LearnerParallelProcessor.java deleted file mode 100644 index 0f7f147e91..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/LearnerParallelProcessor.java +++ /dev/null @@ -1,77 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.util.*; - -import fig.basic.Evaluation; -import fig.basic.LogInfo; -import fig.basic.Parallelizer; -import fig.basic.StopWatchSet; -import fig.exec.Execution; - -/** - * Parallel version of the Learner. - * - * Most of the codes are copied from the paraphrase package. - * - * @author ppasupat - */ -public class LearnerParallelProcessor implements Parallelizer.Processor { - - private final Parser parser; - private final String prefix; - private final boolean computeExpectedCounts; - private Params params; // this is common to threads and should be synchronized - private Evaluation evaluation; // this is common to threads and should be synchronized - - public LearnerParallelProcessor(Parser parser, Params params, String prefix, boolean computeExpectedCounts, Evaluation evaluation) { - this.prefix = prefix; - this.parser = parser; - this.computeExpectedCounts = computeExpectedCounts; - this.params = params; - this.evaluation = evaluation; - } - - @Override - public void process(Example ex, int i, int n) { - LogInfo.begin_track_printAll( - "%s: example %s/%s: %s", prefix, i, n, ex.id); - ex.log(); - Execution.putOutput("example", i); - - StopWatchSet.begin("Parser.parse"); - ParserState state = parser.parse(params, ex, computeExpectedCounts); - StopWatchSet.end(); - - if (computeExpectedCounts) { - Map counts = new HashMap<>(); - SempreUtils.addToDoubleMap(counts, state.expectedCounts); - - // Gathered enough examples, update parameters - StopWatchSet.begin("Learner.updateWeights"); - LogInfo.begin_track("Updating learner weights"); - if (Learner.opts.verbose >= 2) - SempreUtils.logMap(counts, "gradient"); - double sum = 0; - for (double v : counts.values()) sum += v * v; - LogInfo.logs("L2 norm: %s", Math.sqrt(sum)); - synchronized (params) { - params.update(counts); - } - counts.clear(); - LogInfo.end_track(); - StopWatchSet.end(); - } - - LogInfo.logs("Current: %s", ex.evaluation.summary()); - synchronized (evaluation) { - evaluation.add(ex.evaluation); - LogInfo.logs("Cumulative(%s): %s", prefix, evaluation.summary()); - } - - LogInfo.end_track(); - - // To save memory - ex.clean(); - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ListValue.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ListValue.java deleted file mode 100644 index e806255ad2..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ListValue.java +++ /dev/null @@ -1,59 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.util.*; - -import fig.basic.LispTree; -import fig.basic.LogInfo; - -public class ListValue extends Value { - public final List values; - - public ListValue(LispTree tree) { - values = new ArrayList(); - for (int i = 1; i < tree.children.size(); i++) - values.add(Values.fromLispTree(tree.child(i))); - } - - public ListValue(List values) { this.values = values; } - - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("list"); - for (Value value : values) - tree.addChild(value == null ? LispTree.proto.newLeaf(null) : value.toLispTree()); - return tree; - } - - public void log() { - for (Value value : values) - LogInfo.logs("%s", value); - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - ListValue that = (ListValue) o; - return values.equals(that.values); - } - - @Override public int hashCode() { return values.hashCode(); } - - // Sorted on string representation - public ListValue getSorted() { - List sorted = new ArrayList<>(values); - Collections.sort(sorted, - (Value v1, Value v2) -> ( - v1 == null ? "null" : v1.sortString()).compareTo(v2 == null ? "null" : v2.sortString())); - return new ListValue(sorted); - } - - // Unique - public ListValue getUnique() { - List sorted = new ArrayList<>(new HashSet<>(values)); - Collections.sort(sorted, - (Value v1, Value v2) -> ( - v1 == null ? "null" : v1.sortString()).compareTo(v2 == null ? "null" : v2.sortString())); - return new ListValue(sorted); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Main.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Main.java deleted file mode 100644 index c48fb0fe6d..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Main.java +++ /dev/null @@ -1,52 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.lang.reflect.Constructor; - - -import fig.basic.Option; -import fig.exec.Execution; - -/** - * Entry point for the semantic parser. - * - * @author Percy Liang - */ -public class Main implements Runnable { - @Option public boolean interactive = false; - @Option public boolean server = false; - @Option public String masterType = "edu.stanford.nlp.sempre.Master"; - - public void run() { - Builder builder = new Builder(); - builder.build(); - - Dataset dataset = new Dataset(); - dataset.read(); - - Learner learner = new Learner(builder.parser, builder.params, dataset); - learner.learn(); - - if (server || interactive) { - Master master = createMaster(masterType, builder); - if (server) - master.runServer(); - if (interactive) - master.runInteractivePrompt(); - } - } - - public Master createMaster(String masterType, Builder builder) { - try { - Class masterClass = Class.forName(masterType); - Constructor constructor = masterClass.getConstructor(Builder.class); - return (Master)constructor.newInstance(builder); - } catch (Throwable t) { - t.printStackTrace(); - } - return null; - } - - public static void main(String[] args) { - Execution.run(args, "Main", new Main(), Master.getOptionsParser()); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/MarkFormula.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/MarkFormula.java deleted file mode 100644 index 4cf609348e..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/MarkFormula.java +++ /dev/null @@ -1,69 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.google.common.base.Function; -import fig.basic.LispTree; - -import java.util.List; - -/** - * Usage: - * (mark |var| |body|). - * For example: - * (mark x (and person (likes (var x)))) - * is the set of people that like themselves. - * Like lambda abstraction, marking introduces a variable, - * but has the same type as the |body|. - * - * Percy Liang - */ -public class MarkFormula extends Formula { - public final String var; - public final Formula body; - - public MarkFormula(String var, Formula body) { - this.var = var; - this.body = body; - } - - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("mark"); - tree.addChild(var); - tree.addChild(body.toLispTree()); - return tree; - } - - @Override - public void forEach(Function func) { - if (!func.apply(this)) body.forEach(func); - } - - @Override - public Formula map(Function func) { - Formula result = func.apply(this); - return result == null ? new MarkFormula(var, body.map(func)) : result; - } - - @Override - public List mapToList(Function> func, boolean alwaysRecurse) { - List res = func.apply(this); - if (res.isEmpty() || alwaysRecurse) - res.addAll(body.mapToList(func, alwaysRecurse)); - return res; - } - - @SuppressWarnings({"equalshashcode"}) - @Override - public boolean equals(Object thatObj) { - if (!(thatObj instanceof MarkFormula)) return false; - MarkFormula that = (MarkFormula) thatObj; - return this.var.equals(that.var) && this.body.equals(that.body); - } - - public int computeHashCode() { - int hash = 0x7ed55d16; - hash = hash * 0xd3a2646c + var.hashCode(); - hash = hash * 0xd3a2646c + body.hashCode(); - return hash; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Master.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Master.java deleted file mode 100644 index e51f0cf585..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Master.java +++ /dev/null @@ -1,471 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.google.common.base.Joiner; -import com.google.common.base.Strings; -import com.google.common.collect.Lists; -import fig.basic.*; -import jline.console.ConsoleReader; - -import java.io.IOException; -import java.io.PrintWriter; -import java.io.StringWriter; -import java.lang.reflect.Field; -import java.util.*; - -import java.io.*; - -/** - * A Master manages multiple sessions. Currently, they all share the same model, - * but they need not in the future. - */ -public class Master { - public static class Options { - @Option(gloss = "Execute these commands before starting") - public List scriptPaths = Lists.newArrayList(); - @Option(gloss = "Execute these commands before starting (after scriptPaths)") - public List commands = Lists.newArrayList(); - @Option(gloss = "Write a log of this session to this path") - public String logPath; - - @Option(gloss = "Print help on startup") - public boolean printHelp = true; - - @Option(gloss = "Number of exchanges to keep in the context") - public int contextMaxExchanges = 0; - - @Option(gloss = "Online update weights on new examples.") - public boolean onlineLearnExamples = true; - @Option(gloss = "Write out new examples to this directory") - public String newExamplesPath; - @Option(gloss = "Write out new parameters to this directory") - public String newParamsPath; - - @Option(gloss = "Write out new grammar rules") - public String newGrammarPath; - } - public static Options opts = new Options(); - - public class Response { - // Example that was parsed, if any. - public Example ex; - - // Which derivation we're selecting to show - public int candidateIndex = -1; - - // Detailed information - public Map stats = new LinkedHashMap<>(); - public List lines = new ArrayList<>(); - - public String getFormulaAnswer() { - if (ex.getPredDerivations().size() == 0) - return "(no answer)"; - else if (candidateIndex == -1) - return "(not selected)"; - else { - Derivation deriv = getDerivation(); - return deriv.getFormula() + " => " + deriv.getValue(); - } - } - public String getAnswer() { - if (ex.getPredDerivations().size() == 0) - return "(no answer)"; - else if (candidateIndex == -1) - return "(not selected)"; - else { - Derivation deriv = getDerivation(); - deriv.ensureExecuted(builder.executor, ex.context); - return deriv.getValue().toString(); - } - } - public List getLines() { return lines; } - public Example getExample() { return ex; } - public int getCandidateIndex() { return candidateIndex; } - - public Derivation getDerivation() { - return ex.getPredDerivations().get(candidateIndex); - } - } - - protected Builder builder; - protected Learner learner; - protected HashMap sessions = new LinkedHashMap<>(); - - public Master(Builder builder) { - this.builder = builder; - this.learner = new Learner(builder.parser, builder.params, new Dataset()); - } - - public Params getParams() { return builder.params; } - - // Return the unique session identified by session id |id|. - // Create a new session if one doesn't exist. - public Session getSession(String id) { - Session session = sessions.get(id); - if (session == null) { - session = new Session(id); - for (String path : opts.scriptPaths) - processScript(session, path); - for (String command : opts.commands) - processQuery(session, command); - if (id != null) - sessions.put(id, session); - } - return session; - } - - protected void printHelp() { - LogInfo.log("Enter an utterance to parse or one of the following commands:"); - LogInfo.log(" (help): show this help message"); - LogInfo.log(" (status): prints out status of the system"); - LogInfo.log(" (get |option|): get a command-line option (e.g., (get Parser.verbose))"); - LogInfo.log(" (set |option| |value|): set a command-line option (e.g., (set Parser.verbose 5))"); - LogInfo.log(" (reload): reload the grammar/parameters"); - LogInfo.log(" (grammar): prints out the grammar"); - LogInfo.log(" (params [|file|]): dumps all the model parameters"); - LogInfo.log(" (select |candidate index|): show information about the |index|-th candidate of the last utterance."); - LogInfo.log(" (accept |candidate index|): record the |index|-th candidate as the correct answer for the last utterance."); - LogInfo.log(" (answer |answer|): record |answer| as the correct answer for the last utterance (e.g., (answer (list (number 3))))."); - LogInfo.log(" (rule |lhs| (|rhs_1| ... |rhs_k|) |sem|): adds a rule to the grammar (e.g., (rule $Number ($TOKEN) (NumberFn)))"); - LogInfo.log(" (type |logical form|): perform type inference (e.g., (type (number 3)))"); - LogInfo.log(" (execute |logical form|): execute the logical form (e.g., (execute (call + (number 3) (number 4))))"); - LogInfo.log(" (def |key| |value|): define a macro to replace |key| with |value| in all commands (e.g., (def type fb:type.object type)))"); - LogInfo.log(" (context [(user |user|) (date |date|) (exchange |exchange|) (graph |graph|)]): prints out or set the context"); - LogInfo.log("Press Ctrl-D to exit."); - } - - public void runServer() { - Server server = new Server(this); - server.run();; - } - - public void runInteractivePrompt() { - Session session = getSession("stdin"); - - if (opts.printHelp) - printHelp(); - try { - ConsoleReader reader = new ConsoleReader(); - reader.setPrompt("#SEMPRE# "); - String line; - while ((line = reader.readLine()) != null) { - int indent = LogInfo.getIndLevel(); - try { - processQuery(session, line); - } catch (Throwable t) { - while (LogInfo.getIndLevel() > indent) - LogInfo.end_track(); - t.printStackTrace(); - } - } - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - // Read LispTrees from |scriptPath| and process each of them. - public void processScript(Session session, String scriptPath) { - Iterator it = LispTree.proto.parseFromFile(scriptPath); - while (it.hasNext()) { - LispTree tree = it.next(); - processQuery(session, tree.toString()); - } - } - - // Process user's input |line| - // Currently, synchronize a very crude level. - // In the future, refine this. - // Currently need the synchronization because of writing to stdout. - public synchronized Response processQuery(Session session, String line) { - line = line.trim(); - Response response = new Response(); - - // Capture log output and put it into response. - // Hack: modifying a static variable to capture the logging. - // Make sure we're synchronized! - StringWriter stringOut = new StringWriter(); - LogInfo.setFileOut(new PrintWriter(stringOut)); - - if (line.startsWith("(")) - handleCommand(session, line, response); - else - handleUtterance(session, line, response); - - // Clean up - for (String outLine : stringOut.toString().split("\n")) - response.lines.add(outLine); - LogInfo.setFileOut(null); - - // Log interaction to disk - if (!Strings.isNullOrEmpty(opts.logPath)) { - PrintWriter out = IOUtils.openOutAppendHard(opts.logPath); - out.println( - Joiner.on("\t").join( - Lists.newArrayList( - "date=" + new Date().toString(), - "sessionId=" + session.id, - "remote=" + session.remoteHost, - "format=" + session.format, - "query=" + line, - "response=" + summaryString(response)))); - out.close(); - } - - return response; - } - - String summaryString(Response response) { - if (response.getExample() != null) - return response.getFormulaAnswer(); - if (response.getLines().size() > 0) - return response.getLines().get(0); - return null; - } - - private void handleUtterance(Session session, String query, Response response) { - session.updateContext(); - - // Create example - Example.Builder b = new Example.Builder(); - b.setId("session:" + session.id); - b.setUtterance(query); - b.setContext(session.context); - Example ex = b.createExample(); - - ex.preprocess(); - - // Parse! - builder.parser.parse(builder.params, ex, false); - - response.ex = ex; - ex.logWithoutContext(); - if (ex.predDerivations.size() > 0) { - response.candidateIndex = 0; - printDerivation(response.getDerivation()); - } - session.updateContext(ex, opts.contextMaxExchanges); - } - - public void printDerivation(Derivation deriv) { - // Print features - HashMap featureVector = new HashMap<>(); - deriv.incrementAllFeatureVector(1, featureVector); - FeatureVector.logFeatureWeights("Pred", featureVector, builder.params); - - // Print choices - Map choices = new LinkedHashMap<>(); - deriv.incrementAllChoices(1, choices); - FeatureVector.logChoices("Pred", choices); - - // Print denotation - LogInfo.begin_track("Top formula"); - LogInfo.logs("%s", deriv.formula); - LogInfo.end_track(); - if (deriv.value != null) { - LogInfo.begin_track("Top value"); - deriv.value.log(); - LogInfo.end_track(); - } - } - - - private void handleCommand(Session session, String line, Response response) { - LispTree tree = LispTree.proto.parseFromString(line); - tree = builder.grammar.applyMacros(tree); - - String command = tree.child(0).value; - - if (command == null || command.equals("help")) { - printHelp(); - } else if (command.equals("status")) { - LogInfo.begin_track("%d sessions", sessions.size()); - for (Session otherSession : sessions.values()) - LogInfo.log(otherSession + (session == otherSession ? " *" : "")); - LogInfo.end_track(); - StopWatchSet.logStats(); - } else if (command.equals("reload")) { - builder.build(); - } else if (command.equals("grammar")) { - for (Rule rule : builder.grammar.rules) - LogInfo.logs("%s", rule.toLispTree()); - } else if (command.equals("params")) { - if (tree.children.size() == 1) { - builder.params.write(LogInfo.stdout); - if (LogInfo.getFileOut() != null) - builder.params.write(LogInfo.getFileOut()); - } else { - builder.params.write(tree.child(1).value); - } - } else if (command.equals("get")) { - if (tree.children.size() != 2) { - LogInfo.log("Invalid usage: (get |option|)"); - return; - } - String option = tree.child(1).value; - LogInfo.logs("%s", getOptionsParser().getValue(option)); - } else if (command.equals("set")) { - if (tree.children.size() != 3) { - LogInfo.log("Invalid usage: (set |option| |value|)"); - return; - } - String option = tree.child(1).value; - String value = tree.child(2).value; - if (!getOptionsParser().parse(new String[] {"-" + option, value})) - LogInfo.log("Unknown option: " + option); - } else if (command.equals("select") || command.equals("accept") || - command.equals("s") || command.equals("a")) { - // Select an answer - if (tree.children.size() != 2) { - LogInfo.logs("Invalid usage: (%s |candidate index|)", command); - return; - } - - Example ex = session.getLastExample(); - if (ex == null) { - LogInfo.log("No examples - please enter a query first."); - return; - } - int index = Integer.parseInt(tree.child(1).value); - if (index < 0 || index >= ex.predDerivations.size()) { - LogInfo.log("Candidate index out of range: " + index); - return; - } - - response.ex = ex; - response.candidateIndex = index; - session.updateContextWithNewAnswer(ex, response.getDerivation()); - printDerivation(response.getDerivation()); - - // Add a training example. While the user selects a particular derivation, there are three ways to interpret this signal: - // 1. This is the correct derivation (Derivation). - // 2. This is the correct logical form (Formula). - // 3. This is the correct denotation (Value). - // Currently: - // - Parameters based on the denotation. - // - Grammar rules are induced based on the denotation. - // We always save the logical form and the denotation (but not the entire - // derivation) in the example. - if (command.equals("accept") || command.equals("a")) { - ex.setTargetFormula(response.getDerivation().getFormula()); - ex.setTargetValue(response.getDerivation().getValue()); - ex.setContext(session.getContextExcludingLast()); - addNewExample(ex); - } - } else if (command.equals("answer")) { - if (tree.children.size() != 2) { - LogInfo.log("Missing answer."); - } - - // Set the target value. - Example ex = session.getLastExample(); - if (ex == null) { - LogInfo.log("Please enter a query first."); - return; - } - ex.setTargetValue(Values.fromLispTree(tree.child(1))); - addNewExample(ex); - } else if (command.equals("rule")) { - int n = builder.grammar.rules.size(); - builder.grammar.addStatement(tree.toString()); - for (int i = n; i < builder.grammar.rules.size(); i++) - LogInfo.logs("Added %s", builder.grammar.rules.get(i)); - // Need to update the parser given that the grammar has changed. - builder.parser = null; - builder.buildUnspecified(); - } else if (command.equals("type")) { - LogInfo.logs("%s", TypeInference.inferType(Formulas.fromLispTree(tree.child(1)))); - } else if (command.equals("execute")) { - Example ex = session.getLastExample(); - ContextValue context = (ex != null ? ex.context : session.context); - Executor.Response execResponse = builder.executor.execute(Formulas.fromLispTree(tree.child(1)), context); - LogInfo.logs("%s", execResponse.value); - } else if (command.equals("def")) { - builder.grammar.interpretMacroDef(tree); - } else if (command.equals("context")) { - if (tree.children.size() == 1) { - LogInfo.logs("%s", session.context); - } else { - session.context = new ContextValue(tree); - } - } else if (command.equals("loadgraph")) { - if (tree.children.size() != 2 || !tree.child(1).isLeaf()) - throw new RuntimeException("Invalid argument: argument should be a file path"); - KnowledgeGraph graph = NaiveKnowledgeGraph.fromFile(tree.child(1).value); - session.context = new ContextValue(session.context.user, session.context.date, - session.context.exchanges, graph); - } - else { - throw new RuntimeException("Invalid command: " + tree); - } - } - - void addNewExample(Example origEx) { - // Create the new example, but only add relevant information. - Example ex = new Example.Builder() - .setId(origEx.id) - .setUtterance(origEx.utterance) - .setContext(origEx.context) - .setTargetFormula(origEx.targetFormula) - .setTargetValue(origEx.targetValue) - .createExample(); - - if (!Strings.isNullOrEmpty(opts.newExamplesPath)) { - LogInfo.begin_track("Adding new example"); - Dataset.appendExampleToFile(opts.newExamplesPath, ex); - LogInfo.end_track(); - } - - if (opts.onlineLearnExamples) { - LogInfo.begin_track("Updating parameters"); - learner.onlineLearnExample(origEx); - if (!Strings.isNullOrEmpty(opts.newParamsPath)) - builder.params.write(opts.newParamsPath); - LogInfo.end_track(); - } - } - - public static OptionsParser getOptionsParser() { - OptionsParser parser = new OptionsParser(); - // Dynamically figure out which options we need to load - // To specify this: - // java -Dmodules=core,freebase - List modules = Arrays.asList(System.getProperty("modules", "core").split(",")); - - // All options are assumed to be of the form opts. - // Read the module-classes.txt file, which specifies which classes are - // associated with each module. - List args = new ArrayList(); - for (String line : IOUtils.readLinesHard("module-classes.txt")) { - - // Example: core edu.stanford.nlp.sempre.Grammar - String[] tokens = line.split(" "); - if (tokens.length != 2) throw new RuntimeException("Invalid: " + line); - String module = tokens[0]; - String className = tokens[1]; - if (!modules.contains(tokens[0])) continue; - - // Group (e.g., Grammar) - String[] classNameTokens = className.split("\\."); - String group = classNameTokens[classNameTokens.length - 1]; - - // Object (e.g., Grammar.opts) - Object opts = null; - try { - for (Field field : Class.forName(className).getDeclaredFields()) { - if (!"opts".equals(field.getName())) continue; - opts = field.get(null); - } - } catch (Throwable t) { - System.out.println("Problem processing: " + line); - throw new RuntimeException(t); - } - - if (opts != null) { - args.add(group); - args.add(opts); - } - } - - parser.registerAll(args.toArray(new Object[0])); - return parser; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/MergeFn.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/MergeFn.java deleted file mode 100644 index 51d0550706..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/MergeFn.java +++ /dev/null @@ -1,82 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.LispTree; -import fig.basic.Option; -import fig.basic.LogInfo; - -/** - * Takes two unaries and merges (takes the intersection) of them. - * - * @author Percy Liang - */ -public class MergeFn extends SemanticFn { - public static class Options { - @Option(gloss = "whether to do a hard type-check") - public boolean hardTypeCheck = true; - - @Option public boolean showTypeCheckFailures = false; - @Option(gloss = "Verbose") public int verbose = 0; - } - - public static Options opts = new Options(); - - MergeFormula.Mode mode; // How to merge - Formula formula; // Optional: merge with this if exists - - public void init(LispTree tree) { - super.init(tree); - mode = MergeFormula.parseMode(tree.child(1).value); - if (tree.children.size() == 3) { - formula = Formulas.fromLispTree(tree.child(2)); - } - } - - public DerivationStream call(final Example ex, final Callable c) { - return new SingleDerivationStream() { - @Override - public Derivation createDerivation() { - Formula result; - if (c.getChildren().size() == 1) - result = c.child(0).formula; - else if (c.getChildren().size() == 2) - result = new MergeFormula(mode, c.child(0).formula, c.child(1).formula); - else - throw new RuntimeException("Bad args: " + c.getChildren()); - - // Compute resulting type - Derivation child0 = c.child(0); - Derivation child1 = c.child(1); - SemType type = child0.type.meet(child1.type); - FeatureVector features = new FeatureVector(); - if (opts.verbose >= 5) - LogInfo.logs("MergeFn: %s | %s | %s", child0, child1, type); - - if (!type.isValid()) { - if (opts.hardTypeCheck) { - if (opts.showTypeCheckFailures) - LogInfo.warnings("MergeFn: type check failed: [%s : %s] AND [%s : %s]", child0.formula, child0.type, child1.formula, child1.type); - return null; - } - } - - if (formula != null) - result = new MergeFormula(mode, formula, result); - - Derivation deriv = new Derivation.Builder() - .withCallable(c) - .formula(result) - .type(type) - .localFeatureVector(features) - .createDerivation(); - - if (SemanticFn.opts.trackLocalChoices) { - deriv.addLocalChoice( - "MergeFn " + - child0.startEndString(ex.getTokens()) + " " + child0.formula + " AND " + - child1.startEndString(ex.getTokens()) + " " + child1.formula); - } - return deriv; - } - }; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/MergeFormula.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/MergeFormula.java deleted file mode 100644 index 6c3f1a9d04..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/MergeFormula.java +++ /dev/null @@ -1,78 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.google.common.base.Function; -import fig.basic.LispTree; - -import java.util.List; - -/** - * Takes two unary formulas and performs either the intersection or union. - * - * @author Percy Liang - */ -public class MergeFormula extends Formula { - public enum Mode { and, or }; - public final Mode mode; - public final Formula child1; - public final Formula child2; - - public MergeFormula(Mode mode, Formula child1, Formula child2) { - this.mode = mode; - this.child1 = child1; - this.child2 = child2; - } - - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild(mode.toString()); - tree.addChild(child1.toLispTree()); - tree.addChild(child2.toLispTree()); - return tree; - } - - @Override - public void forEach(Function func) { - if (!func.apply(this)) { child1.forEach(func); child2.forEach(func); } - } - - @Override - public Formula map(Function func) { - Formula result = func.apply(this); - return result == null ? new MergeFormula(mode, child1.map(func), child2.map(func)) : result; - } - - @Override - public List mapToList(Function> func, boolean alwaysRecurse) { - List res = func.apply(this); - if (res.isEmpty() || alwaysRecurse) { - res.addAll(child1.mapToList(func, alwaysRecurse)); - res.addAll(child2.mapToList(func, alwaysRecurse)); - } - return res; - } - - public static Mode parseMode(String mode) { - if ("and".equals(mode)) return Mode.and; - if ("or".equals(mode)) return Mode.or; - return null; - } - - @SuppressWarnings({"equalshashcode"}) - @Override - public boolean equals(Object thatObj) { - if (!(thatObj instanceof MergeFormula)) return false; - MergeFormula that = (MergeFormula) thatObj; - if (this.mode != that.mode) return false; - if (!this.child1.equals(that.child1)) return false; - if (!this.child2.equals(that.child2)) return false; - return true; - } - - public int computeHashCode() { - int hash = 0x7ed55d16; - hash = hash * 0xd3a2646c + mode.toString().hashCode(); // Note: don't call hashCode() on mode directly. - hash = hash * 0xd3a2646c + child1.hashCode(); - hash = hash * 0xd3a2646c + child2.hashCode(); - return hash; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/MixParser.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/MixParser.java deleted file mode 100644 index 61b96e9011..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/MixParser.java +++ /dev/null @@ -1,155 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.util.*; -import java.util.regex.Pattern; - -import fig.basic.LogInfo; -import fig.basic.Option; -import fig.basic.Pair; - -/** - * A parser that mixes the derivation lists from other parsers. - * - * @author ppasupat - */ -public class MixParser extends Parser { - public static class Options { - @Option(gloss = "verbosity") - public int verbose = 1; - - /** - * Syntax: [className]:[options] - * - className also includes the package name (BeamParser, FloatingParser, tables.dpd.DPDParser, ...) - * - options is a comma-separated list of [group] or [group]-[iter1]-[iter2]-... - * [iter1], [iter2], ... are iteration indices (0-based), "xc" (computing expected counts), - * "nxc" (not computing expected counts), index + "xc", or index + "nxc" - * - * Example: Using - * -MixParser.parsers FloatingParser tables.serialize.SerializedParser:train-0-2-3,dev - * will cause derivations from SerializedParser to be mixed in during all dev iterations - * and training iterations 0, 2, and 3. - */ - @Option(gloss = "list of parsers to use along with options") - public List parsers = new ArrayList<>(); - } - public static Options opts = new Options(); - - final List> parsers; - int iter, numIters; - String group; - - public MixParser(Spec spec) { - super(spec); - parsers = new ArrayList<>(); - for (String parserAndOptions : opts.parsers) { - if (opts.verbose >= 1) - LogInfo.logs("Adding parser %s", parserAndOptions); - String[] tokens = parserAndOptions.split(":"); - if (tokens.length > 2) - throw new RuntimeException("Invalid parser options: " + parserAndOptions); - String parserName = tokens[0]; - Parser parser; - try { - Class parserClass = Class.forName(SempreUtils.resolveClassName(parserName)); - parser = ((Parser) parserClass.getConstructor(spec.getClass()).newInstance(spec)); - } catch (ClassNotFoundException e1) { - throw new RuntimeException("Illegal parser: " + parserName); - } catch (Exception e) { - e.printStackTrace(); - throw new RuntimeException("Error while instantiating parser: " + parserName + "\n" + e); - } - if (tokens.length > 1) - parsers.add(new Pair<>(parser, new MixParserOption(this, tokens[1]))); - else - parsers.add(new Pair<>(parser, new MixParserOption(this))); - } - } - - // Don't do it. - @Override protected void computeCatUnaryRules() { - catUnaryRules = Collections.emptyList(); - }; - - @Override - public ParserState newParserState(Params params, Example ex, boolean computeExpectedCounts) { - return new MixParserState(this, params, ex, computeExpectedCounts); - } - - @Override - public void onBeginDataGroup(int iter, int numIters, String group) { - this.iter = iter; - this.numIters = numIters; - this.group = group; - } -} - -class MixParserOption { - private final MixParser mixParser; - private boolean allowedAll = false; - private List> allowedGroupsAndIter = new ArrayList<>(); - - public MixParserOption(MixParser mixParser) { - this.mixParser = mixParser; - // Allow in all groups - allowedAll = true; - } - - public MixParserOption(MixParser mixParser, String optionString) { - this.mixParser = mixParser; - String[] tokens = optionString.split(","); - for (String option : tokens) { - String[] subtokens = option.split("-"); - if (subtokens.length == 1) - allowedGroupsAndIter.add(new Pair<>(subtokens[0], "all")); - else - for (int i = 1; i < subtokens.length; i++) { - if (!Pattern.matches("^([0-9]*(n?xc)?)$", subtokens[i])) - throw new RuntimeException("Invalid iteration options: " + subtokens[i]); - allowedGroupsAndIter.add(new Pair<>(subtokens[0], subtokens[i])); - } - } - } - - public boolean isAllowed(boolean computeExpectedCounts) { - if (allowedAll) return true; - String xcFlag = computeExpectedCounts ? "xc" : "nxc"; - return allowedGroupsAndIter.contains(new Pair<>(mixParser.group, "all")) - || allowedGroupsAndIter.contains(new Pair<>(mixParser.group, "" + mixParser.iter)) - || allowedGroupsAndIter.contains(new Pair<>(mixParser.group, xcFlag)) - || allowedGroupsAndIter.contains(new Pair<>(mixParser.group, "" + mixParser.iter + xcFlag)); - } -} - -class MixParserState extends ParserState { - - public MixParserState(Parser parser, Params params, Example ex, boolean computeExpectedCounts) { - super(parser, params, ex, computeExpectedCounts); - } - - @Override - public void infer() { - for (Pair pair : ((MixParser) parser).parsers) { - if (!pair.getSecond().isAllowed(computeExpectedCounts)) { - if (MixParser.opts.verbose >= 1) - LogInfo.logs("Skipping %s", pair.getFirst().getClass().getSimpleName()); - continue; - } - if (MixParser.opts.verbose >= 1) - LogInfo.begin_track("Using %s", pair.getFirst().getClass().getSimpleName()); - ParserState parserState = pair.getFirst().newParserState(params, ex, false); - parserState.infer(); - predDerivations.addAll(parserState.predDerivations); - if (MixParser.opts.verbose >= 1) { - LogInfo.logs("Number of derivations: %d", parserState.predDerivations.size()); - LogInfo.end_track(); - } - - } - ensureExecuted(); - if (computeExpectedCounts) { - expectedCounts = new HashMap<>(); - ParserState.computeExpectedCounts(predDerivations, expectedCounts); - } - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/MultipleDerivationStream.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/MultipleDerivationStream.java deleted file mode 100644 index a8d155b7fa..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/MultipleDerivationStream.java +++ /dev/null @@ -1,48 +0,0 @@ -package edu.stanford.nlp.sempre; - -// Encapsulates the production of possibly many Derivations. -// The subclass has to maintain the cursor to keep track of which is coming next. -public abstract class MultipleDerivationStream implements DerivationStream { - private Derivation nextDeriv; // Next one to return. - int numGenerated = 0; - - // Override this class: should create a new Derivation. - // Return null if there are no more. - public abstract Derivation createDerivation(); - - @Override public boolean hasNext() { - if (nextDeriv != null) return true; // Still one in the queue - nextDeriv = createDerivation(); // Ask for another - return nextDeriv != null; - } - - @Override - public Derivation next() { - if (nextDeriv == null) throw new RuntimeException("No more derivations!"); - Derivation deriv = nextDeriv; - if (FeatureExtractor.containsDomain("derivRank")) { - numGenerated++; - if (numGenerated <= 3) - deriv.addFeature("derivRank", deriv.rule.sem.toString() + " " + numGenerated); - else if (numGenerated <= 5) - deriv.addFeature("derivRank", deriv.rule.sem.toString() + " 4:5"); - else if (numGenerated <= 10) - deriv.addFeature("derivRank", deriv.rule.sem.toString() + " 6:10"); - else - deriv.addFeature("derivRank", deriv.rule.sem.toString() + " 11:"); - } - nextDeriv = createDerivation(); - return deriv; - } - - @Override - public Derivation peek() { - if (nextDeriv == null) throw new RuntimeException("No more derivations!"); - return nextDeriv; - } - - @Override public void remove() { throw new RuntimeException("Cannot remove from DerivationStream"); } - - // Default: but can overload this if desired - @Override public int estimatedSize() { return 2; } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/NaiveKnowledgeGraph.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/NaiveKnowledgeGraph.java deleted file mode 100644 index 351ac8fa19..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/NaiveKnowledgeGraph.java +++ /dev/null @@ -1,199 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.util.*; -import fig.basic.*; - -/** - * Represent a knowledge graph explicitly as triples (e1, r, e2). - * - * The graph is immutable. Once the graph is initialized, we precompute several mappings - * (e.g., list of all outgoing edges from each entity e). - * - * @author ppasupat - */ -public class NaiveKnowledgeGraph extends KnowledgeGraph { - - // Represent a triple (entity, relation, entity) - public static class KnowledgeGraphTriple { - public final Value e1, r, e2; - - public KnowledgeGraphTriple(Value e1, Value r, Value e2) { - this.e1 = e1; - this.r = r; - this.e2 = e2; - } - - public KnowledgeGraphTriple(String e1, String r, String e2) { - this.e1 = new StringValue(e1); - this.r = new StringValue(r); - this.e2 = new StringValue(e2); - } - - public KnowledgeGraphTriple(LispTree tree) { - if (tree.children.size() != 3) - throw new RuntimeException("Invalid triple size (" + tree.children.size() + " != 3): " - + tree); - this.e1 = valueFromLispTree(tree.child(0)); - this.r = valueFromLispTree(tree.child(1)); - this.e2 = valueFromLispTree(tree.child(2)); - } - - protected static Value valueFromLispTree(LispTree tree) { - if (tree.isLeaf()) return new NameValue(tree.value, null); - return Values.fromLispTree(tree); - } - - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild(e1.toLispTree()); - tree.addChild(r.toLispTree()); - tree.addChild(e2.toLispTree()); - return tree; - } - - @Override - public String toString() { - return "<" + e1 + ", " + r + ", " + e2 + ">"; - } - } - - // Simplest graph representation: triples of values - public final List triples; - - // ============================================================ - // Constructor / Precomputation - // ============================================================ - - public Map> relationToTriples; - public Map> firstToTriples; - public Map> secondToTriples; - - public NaiveKnowledgeGraph(Collection triples) { - this.triples = new ArrayList<>(triples); - precomputeMappings(); - } - - public void precomputeMappings() { - relationToTriples = new HashMap<>(); - firstToTriples = new HashMap<>(); - secondToTriples = new HashMap<>(); - for (KnowledgeGraphTriple triple : triples) { - MapUtils.addToList(relationToTriples, triple.r, triple); - MapUtils.addToList(firstToTriples, triple.e1, triple); - MapUtils.addToList(secondToTriples, triple.e2, triple); - } - } - - // ============================================================ - // Queries - // ============================================================ - - @Override - public List joinFirst(Value r, Collection firsts) { - if (CanonicalNames.isReverseProperty(r)) - return joinSecond(CanonicalNames.reverseProperty(r), firsts); - List seconds = new ArrayList<>(); - List relationTriples = relationToTriples.get(r); - if (relationTriples != null) { - for (KnowledgeGraphTriple triple : relationTriples) { - if (firsts.contains(triple.e1)) - seconds.add(triple.e2); - } - } - return seconds; - } - - @Override - public List joinSecond(Value r, Collection seconds) { - if (CanonicalNames.isReverseProperty(r)) - return joinFirst(CanonicalNames.reverseProperty(r), seconds); - List firsts = new ArrayList<>(); - List relationTriples = relationToTriples.get(r); - if (relationTriples != null) { - for (KnowledgeGraphTriple triple : relationTriples) { - if (seconds.contains(triple.e2)) - firsts.add(triple.e1); - } - } - return firsts; - } - - @Override - public List> filterFirst(Value r, Collection firsts) { - if (CanonicalNames.isReverseProperty(r)) - return getReversedPairs(filterSecond(CanonicalNames.reverseProperty(r), firsts)); - List> pairs = new ArrayList<>(); - List relationTriples = relationToTriples.get(r); - if (relationTriples != null) { - for (KnowledgeGraphTriple triple : relationTriples) { - if (firsts.contains(triple.e1)) - pairs.add(new Pair<>(triple.e1, triple.e2)); - } - } - return pairs; - } - - @Override - public List> filterSecond(Value r, Collection seconds) { - if (CanonicalNames.isReverseProperty(r)) - return getReversedPairs(filterFirst(CanonicalNames.reverseProperty(r), seconds)); - List> pairs = new ArrayList<>(); - List relationTriples = relationToTriples.get(r); - if (relationTriples != null) { - for (KnowledgeGraphTriple triple : relationTriples) { - if (seconds.contains(triple.e2)) - pairs.add(new Pair<>(triple.e1, triple.e2)); - } - } - return pairs; - } - - // ============================================================ - // LispTree conversion - // ============================================================ - - /** - * Convert LispTree to KnowledgeGraph - * - * The |tree| should look like - * - * (graph NaiveKnowledgeGraph - * ((string Obama) (string "born in") (string Hawaii)) - * ((string Einstein) (string "born in") (string Ulm)) - * ...) - */ - public static KnowledgeGraph fromLispTree(LispTree tree) { - List triples = new ArrayList<>(); - for (int i = 2; i < tree.children.size(); i++) { - triples.add(new KnowledgeGraphTriple(tree.child(i))); - } - return new NaiveKnowledgeGraph(triples); - } - - public static KnowledgeGraph fromFile(String path) { - return fromLispTree(LispTree.proto.parseFromFile(path).next()); - } - - @Override - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("graph"); - tree.addChild("NaiveKnowledgeGraph"); - for (KnowledgeGraphTriple triple : triples) { - tree.addChild(triple.toLispTree()); - } - return tree; - } - - @Override - public LispTree toShortLispTree() { - if (triples.size() > 1000) { - LispTree tree = LispTree.proto.newList(); - tree.addChild("graph"); - tree.addChild("NaiveKnowledgeGraph"); - tree.addChild(("TooManyTriples")); - return tree; - } - return toLispTree(); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/NameValue.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/NameValue.java deleted file mode 100644 index 38d99f9ab3..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/NameValue.java +++ /dev/null @@ -1,55 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.LispTree; -import fig.basic.LogInfo; - -/** - * Represents a logical predicate. - * @author Percy Liang - */ -public class NameValue extends Value { - public final String id; // Identifier (e.g., "fb:en.barack_obama") - public final String description; // Readable description (e.g., "Barack Obama") - - public NameValue(LispTree tree) { - this.id = tree.child(1).value; - if (tree.children.size() > 2) - this.description = tree.child(2).value; - else - this.description = null; - assert (this.id != null) : tree; - } - - public NameValue(String id) { - this(id, null); - } - - public NameValue(String id, String description) { - if (id == null) { - LogInfo.errors("Got null id, description is %s", description); - id = "fb:en.null"; - } - this.id = id; - this.description = description; - } - - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("name"); - tree.addChild(id); - if (description != null) tree.addChild(description); - return tree; - } - - @Override public String sortString() { return id; } - @Override public String pureString() { return description == null ? id : description; } - - @Override public int hashCode() { return id.hashCode(); } - @Override public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - NameValue that = (NameValue) o; - // Note: only check id, not description - return this.id.equals(that.id); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/NotFormula.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/NotFormula.java deleted file mode 100644 index 485e964c06..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/NotFormula.java +++ /dev/null @@ -1,58 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.google.common.base.Function; -import fig.basic.LispTree; - -import java.util.List; - -/** - * (not expression) returns the truth value which is opposite of expression. - * - * @author Percy Liang - */ -public class NotFormula extends Formula { - public final Formula child; - - public NotFormula(Formula child) { this.child = child; } - - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("not"); - tree.addChild(child.toLispTree()); - return tree; - } - - @Override - public void forEach(Function func) { - if (!func.apply(this)) child.forEach(func); - } - - @Override - public Formula map(Function func) { - Formula result = func.apply(this); - return result == null ? new NotFormula(child.map(func)) : result; - } - - @Override - public List mapToList(Function> func, boolean alwaysRecurse) { - List res = func.apply(this); - if (res.isEmpty() || alwaysRecurse) - res.addAll(child.mapToList(func, alwaysRecurse)); - return res; - } - - @SuppressWarnings({"equalshashcode"}) - @Override - public boolean equals(Object thatObj) { - if (!(thatObj instanceof NotFormula)) return false; - NotFormula that = (NotFormula) thatObj; - if (!this.child.equals(that.child)) return false; - return true; - } - - public int computeHashCode() { - int hash = 0x7ed55d16; - hash = hash * 0xd3a2646c + child.hashCode(); - return hash; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/NullExecutor.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/NullExecutor.java deleted file mode 100644 index 63ac6aa2a8..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/NullExecutor.java +++ /dev/null @@ -1,12 +0,0 @@ -package edu.stanford.nlp.sempre; - -/** - * Assign null semantics to each formula. - * - * @author Percy Liang - */ -public class NullExecutor extends Executor { - public Response execute(Formula formula, ContextValue context) { - return new Response(null); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/NullTypeLookup.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/NullTypeLookup.java deleted file mode 100644 index 41d151c499..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/NullTypeLookup.java +++ /dev/null @@ -1,17 +0,0 @@ -package edu.stanford.nlp.sempre; - -/** - * Default implementation of TypeLookup: just return null (I don't know what - * the type is). - */ -public class NullTypeLookup implements TypeLookup { - @Override - public SemType getEntityType(String entity) { - return null; - } - - @Override - public SemType getPropertyType(String property) { - return null; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/NumberFn.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/NumberFn.java deleted file mode 100644 index 1396435ae4..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/NumberFn.java +++ /dev/null @@ -1,168 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.util.ArrayList; -import java.util.List; - -import fig.basic.*; - -/** - * Maps a string to a number (double). - * - * @author Percy Liang - */ -public class NumberFn extends SemanticFn { - public static class Options { - @Option(gloss = "Omit units") public boolean unitless = false; - @Option(gloss = "Also test numbers by try converting to float (instead of using NER tags)") - public boolean alsoTestByConversion = false; - @Option(gloss = "Also test numbers by applying NER on just the phrase") - public boolean alsoTestByIsolatedNER = false; - @Option(gloss = "range of allowed numbers. e.g. null: no limits, Lists.newArrayList(0,100): 0-100 inclusive") - public List allowedRange = null; - } - public static Options opts = new Options(); - - private List requests; // List of types of fields to get (e.g., NUMBER) - - private boolean request(String req) { - return requests == null || requests.contains(req); - } - - public void init(LispTree tree) { - super.init(tree); - if (tree.children.size() > 1) { - requests = new ArrayList(); - for (int i = 1; i < tree.children.size(); i++) - requests.add(tree.child(1).value); - } - } - - // TODO(pliang): handle measurements too (e.g., 3cm) - public DerivationStream call(final Example ex, final Callable c) { - return new SingleDerivationStream() { - public Derivation createDerivation() { - // Test using NER span - Derivation deriv = check(ex.languageInfo, c.getStart(), c.getEnd()); - if (deriv != null) return deriv; - - // Test by converting string to number directly (don't look at NER) - if (opts.alsoTestByConversion && request("NUMBER") & c.getEnd() - c.getStart() == 1) { - String value = ex.languageInfo.tokens.get(c.getStart()); - if (value != null) { - try { - NumberValue numberValue = new NumberValue(Double.parseDouble(value)); - SemType type = numberValue.value == (int) numberValue.value ? SemType.intType : SemType.floatType; - return new Derivation.Builder() - .withCallable(c) - .formula(new ValueFormula<>(numberValue)) - .type(type) - .createDerivation(); - } catch (NumberFormatException e) { - // Don't issue warnings; most spans are not numbers - } - } - } - - // Test by applying NER on just the phrase - if (opts.alsoTestByIsolatedNER) { - String phrase = ex.phraseString(c.getStart(), c.getEnd()); - LanguageInfo languageInfo = LanguageAnalyzer.getSingleton().analyze(phrase); - deriv = check(languageInfo, 0, languageInfo.numTokens()); - if (deriv != null) - return deriv; - } - - return null; - } - - public Derivation check(LanguageInfo languageInfo, int start, int end) { - // Numbers: If it is an integer, set its type to integer. Otherwise, use float. - if (request("NUMBER")) { - String value = languageInfo.getNormalizedNerSpan("NUMBER", start, end); - if (value != null) { - try { - NumberValue numberValue = new NumberValue(Double.parseDouble(value)); - if (opts.allowedRange != null) { - if (numberValue.value < opts.allowedRange.get(0) || numberValue.value > opts.allowedRange.get(1)) { - LogInfo.warnings("NumberFn: %f is outside of the allowed range %s", numberValue.value, opts.allowedRange); - return null; - } - } - - SemType type = numberValue.value == (int) numberValue.value ? SemType.intType : SemType.floatType; - return new Derivation.Builder() - .withCallable(c) - .formula(new ValueFormula<>(numberValue)) - .type(type) - .createDerivation(); - } catch (NumberFormatException e) { - LogInfo.warnings("NumberFn: Cannot convert NerSpan \"%s\" to a number", value); - } - } - } - - // Ordinals - if (request("ORDINAL")) { - String value = languageInfo.getNormalizedNerSpan("ORDINAL", start, end); - if (value != null) { - try { - NumberValue numberValue = (opts.unitless ? - new NumberValue(Double.parseDouble(value)) : - new NumberValue(Double.parseDouble(value), "fb:en.ordinal_number")); - SemType type = SemType.intType; - return new Derivation.Builder() - .withCallable(c) - .formula(new ValueFormula<>(numberValue)) - .type(type) - .createDerivation(); - } catch (NumberFormatException e) { - LogInfo.warnings("NumberFn: Cannot convert NerSpan \"%s\" to a number", value); - } - } - } - - // Percents - if (request("PERCENT")) { - String value = languageInfo.getNormalizedNerSpan("PERCENT", start, end); - if (value != null) { - try { - NumberValue numberValue = (opts.unitless ? - new NumberValue(Double.parseDouble(value.substring(1))) : - new NumberValue(0.01 * Double.parseDouble(value.substring(1)))); - SemType type = SemType.floatType; - return new Derivation.Builder() - .withCallable(c) - .formula(new ValueFormula<>(numberValue)) - .type(type) - .createDerivation(); - } catch (NumberFormatException e) { - LogInfo.warnings("NumberFn: Cannot convert NerSpan \"%s\" to a number", value); - } - } - } - - // Money - if (request("MONEY")) { - String value = languageInfo.getNormalizedNerSpan("MONEY", start, end); - if (value != null) { - try { - NumberValue numberValue = (opts.unitless ? - new NumberValue(Double.parseDouble(value.substring(1))) : - new NumberValue(Double.parseDouble(value.substring(1)), "fb:en.dollar")); - SemType type = SemType.floatType; - return new Derivation.Builder() - .withCallable(c) - .formula(new ValueFormula<>(numberValue)) - .type(type) - .createDerivation(); - } catch (NumberFormatException e) { - LogInfo.warnings("NumberFn: Cannot convert NerSpan \"%s\" to a number", value); - } - } - } - - return null; - } - }; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/NumberValue.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/NumberValue.java deleted file mode 100644 index cd48725d20..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/NumberValue.java +++ /dev/null @@ -1,54 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.LispTree; -import fig.basic.Fmt; - -/** - * Represents a numerical value (optionally comes with a unit). - * In the future, might want to split this into an Integer version? - * - * @author Percy Liang - */ -public class NumberValue extends Value { - public static final String unitless = "fb:en.unitless"; - public static final String yearUnit = "fb:en.year"; - - public final double value; - public final String unit; // What measurement (e.g., "fb:en.meter" or unitless) - - public NumberValue(double value) { - this(value, unitless); - } - - public NumberValue(double value, String unit) { - this.value = value; - this.unit = unit; - } - - public NumberValue(LispTree tree) { - this.value = Double.parseDouble(tree.child(1).value); - this.unit = 2 < tree.children.size() ? tree.child(2).value : unitless; - } - - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("number"); - tree.addChild(Fmt.D(value)); - if (!unit.equals(unitless)) - tree.addChild(unit); - return tree; - } - - @Override public String sortString() { return "" + value; } - @Override public String pureString() { return "" + value; } - - @Override public int hashCode() { return Double.valueOf(value).hashCode(); } - @Override public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - NumberValue that = (NumberValue) o; - if (this.value != that.value) return false; // Warning: doing exact equality checking - if (!this.unit.equals(that.unit)) return false; - return true; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/PairListValue.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/PairListValue.java deleted file mode 100644 index 1dca0e0efb..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/PairListValue.java +++ /dev/null @@ -1,68 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -import fig.basic.LispTree; -import fig.basic.LogInfo; -import fig.basic.Pair; - -/** - * Represent a binary using a list of pairs. - * - * @author ppasupat - */ -public class PairListValue extends Value { - public final List> pairs; - - public PairListValue(LispTree tree) { - pairs = new ArrayList<>(); - for (int i = 1; i < tree.children.size(); i++) - pairs.add(new Pair<>( - Values.fromLispTree(tree.child(i).child(0)), - Values.fromLispTree(tree.child(i).child(1)))); - } - - public PairListValue(List> pairs) { this.pairs = pairs; } - - protected static final LispTree NULL_LEAF = LispTree.proto.newLeaf(null); - - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("pairs"); - for (Pair pair : pairs) { - Value first = pair.getFirst(), second = pair.getSecond(); - tree.addChild(LispTree.proto.newList( - first == null ? NULL_LEAF : first.toLispTree(), second == null ? NULL_LEAF : second.toLispTree())); - } - return tree; - } - - public void log() { - for (Pair pair : pairs) - LogInfo.logs("%s | %s", pair.getFirst(), pair.getSecond()); - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - PairListValue that = (PairListValue) o; - return pairs.equals(that.pairs); - } - - @Override public int hashCode() { return pairs.hashCode(); } - - // Sorted on string representation - public PairListValue getSorted() { - List> sorted = new ArrayList<>(pairs); - Collections.sort(sorted, - (Pair p1, Pair p2) -> getQuickStringOfPair(p1).compareTo(getQuickStringOfPair(p2))); - return new PairListValue(sorted); - } - private static String getQuickStringOfPair(Pair pair) { - Value v1 = pair.getFirst(), v2 = pair.getSecond(); - return (v1 == null ? "null" : v1.sortString()) + " " + (v2 == null ? "null" : v2.sortString()); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Params.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Params.java deleted file mode 100644 index 46441cf984..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Params.java +++ /dev/null @@ -1,287 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.google.common.base.Splitter; -import com.google.common.collect.Lists; - -import fig.basic.*; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.PrintWriter; -import java.util.*; - -/** - * Params contains the parameters of the model. Currently consists of a map from - * features to weights. - * - * @author Percy Liang - */ -public class Params { - public static class Options { - @Option(gloss = "By default, all features have this weight") - public double defaultWeight = 0; - @Option(gloss = "Randomly initialize the weights") - public boolean initWeightsRandomly = false; - @Option(gloss = "Randomly initialize the weights") - public Random initRandom = new Random(1); - - @Option(gloss = "Initial step size") public double initStepSize = 1; - @Option(gloss = "How fast to reduce the step size") - public double stepSizeReduction = 0; - @Option(gloss = "Use the AdaGrad algorithm (different step size for each coordinate)") - public boolean adaptiveStepSize = true; - @Option(gloss = "Use dual averaging") public boolean dualAveraging = false; - @Option(gloss = "Whether to do lazy l1 reg updates") public String l1Reg = "none"; - @Option(gloss = "L1 reg coefficient") public double l1RegCoeff = 0d; - @Option(gloss = "Lazy L1 full update frequency") public int lazyL1FullUpdateFreq = 5000; - } - public static Options opts = new Options(); - public enum L1Reg { - LAZY, - NONLAZY, - NONE; - } - private L1Reg parseReg(String l1Reg) { - if ("lazy".equals(l1Reg)) return L1Reg.LAZY; - if ("nonlazy".equals(l1Reg)) return L1Reg.NONLAZY; - if ("none".equals(l1Reg)) return L1Reg.NONE; - throw new RuntimeException("not legal l1reg"); - } - private L1Reg l1Reg = parseReg(opts.l1Reg); - - // Discriminative weights - private Map weights = new HashMap<>(); - - // For AdaGrad - Map sumSquaredGradients = new HashMap<>(); - - // For dual averaging - Map sumGradients = new HashMap<>(); - - // Number of stochastic updates we've made so far (for determining step size). - int numUpdates; - - // for lazy l1-reg update - Map l1UpdateTimeMap = new HashMap<>(); - - // Initialize the weights - public void init(List> initialization) { - if (!weights.isEmpty()) - throw new RuntimeException("Initialization is not legal when there are non-zero weights"); - for (Pair pair: initialization) - weights.put(pair.getFirst(), pair.getSecond()); - } - - // Read parameters from |path|. - public void read(String path) { - LogInfo.begin_track("Reading parameters from %s", path); - try { - BufferedReader in = IOUtils.openIn(path); - String line; - while ((line = in.readLine()) != null) { - String[] pair = Lists.newArrayList(Splitter.on('\t').split(line)).toArray(new String[2]); - weights.put(pair[0], Double.parseDouble(pair[1])); - } - in.close(); - } catch (IOException e) { - throw new RuntimeException(e); - } - LogInfo.logs("Read %s weights", weights.size()); - LogInfo.end_track(); - } - - // Read parameters from |path|. - public void read(String path, String prefix) { - LogInfo.begin_track("Reading parameters from %s", path); - try { - BufferedReader in = IOUtils.openIn(path); - String line; - while ((line = in.readLine()) != null) { - String[] pair = Lists.newArrayList(Splitter.on('\t').split(line)).toArray(new String[2]); - weights.put(pair[0], Double.parseDouble(pair[1])); - weights.put(prefix + pair[0], Double.parseDouble(pair[1])); - } - in.close(); - } catch (IOException e) { - throw new RuntimeException(e); - } - LogInfo.logs("Read %s weights", weights.size()); - LogInfo.end_track(); - } - - // Update weights by adding |gradient| (modified appropriately with step size). - public synchronized void update(Map gradient) { - for (Map.Entry entry : gradient.entrySet()) { - String f = entry.getKey(); - double g = entry.getValue(); - if (g * g == 0) continue; // In order to not divide by zero - - if (l1Reg == L1Reg.LAZY) lazyL1Update(f); - double stepSize = computeStepSize(f, g); - - if (opts.dualAveraging) { - if (!opts.adaptiveStepSize && opts.stepSizeReduction != 0) - throw new RuntimeException("Dual averaging not supported when " + - "step-size changes across iterations for " + - "features for which the gradient is zero"); - MapUtils.incr(sumGradients, f, g); - MapUtils.set(weights, f, stepSize * sumGradients.get(f)); - } else { - if (stepSize * g == Double.POSITIVE_INFINITY || stepSize * g == Double.NEGATIVE_INFINITY) { - LogInfo.logs("WEIRD FEATURE UPDATE: feature=%s, currentWeight=%s, stepSize=%s, gradient=%s", f, getWeight(f), stepSize, g); - throw new RuntimeException("Gradient absolute value is too large or too small"); - } - MapUtils.incr(weights, f, stepSize * g); - if (l1Reg == L1Reg.LAZY) l1UpdateTimeMap.put(f, numUpdates); - } - } - // non lazy implementation goes over all weights - if (l1Reg == L1Reg.NONLAZY) { - Set features = new HashSet(weights.keySet()); - for (String f : features) { - double stepSize = computeStepSize(f, 0d); // no update for gradient here - double update = opts.l1RegCoeff * -Math.signum(MapUtils.getDouble(weights, f, opts.defaultWeight)); - clipUpdate(f, stepSize * update); - } - } - numUpdates++; - if (l1Reg == L1Reg.LAZY && opts.lazyL1FullUpdateFreq > 0 && numUpdates % opts.lazyL1FullUpdateFreq == 0) { - LogInfo.begin_track("Fully apply L1 regularization."); - finalizeWeights(); - System.gc(); - LogInfo.end_track(); - } - } - - private double computeStepSize(String feature, double gradient) { - if (opts.adaptiveStepSize) { - MapUtils.incr(sumSquaredGradients, feature, gradient * gradient); - // ugly - adding one to the denominator when using l1 reg. - if (l1Reg != L1Reg.NONE) - return opts.initStepSize / (Math.sqrt(sumSquaredGradients.get(feature) + 1)); - else - return opts.initStepSize / Math.sqrt(sumSquaredGradients.get(feature)); - } else { - return opts.initStepSize / Math.pow(numUpdates, opts.stepSizeReduction); - } - } - - /* - * If the update changes the sign, remove the feature - */ - private void clipUpdate(String f, double update) { - double currWeight = MapUtils.getDouble(weights, f, 0); - if (currWeight == 0) - return; - - if (currWeight * (currWeight + update) < 0.0) { - weights.remove(f); - } else { - MapUtils.incr(weights, f, update); - } - } - - private void lazyL1Update(String f) { - if (MapUtils.getDouble(weights, f, 0.0) == 0) return; - // For pre-initialized weights, which have no updates yet - if (sumSquaredGradients.get(f) == null || l1UpdateTimeMap.get(f) == null) { - l1UpdateTimeMap.put(f, numUpdates); - sumSquaredGradients.put(f, 0.0); - return; - } - int numOfIter = numUpdates - MapUtils.get(l1UpdateTimeMap, f, 0); - if (numOfIter == 0) return; - if (numOfIter < 0) throw new RuntimeException("l1UpdateTimeMap is out of sync."); - - double stepSize = (numOfIter * opts.initStepSize) / (Math.sqrt(sumSquaredGradients.get(f) + 1)); - double update = -opts.l1RegCoeff * Math.signum(MapUtils.getDouble(weights, f, 0.0)); - clipUpdate(f, stepSize * update); - if (weights.containsKey(f)) - l1UpdateTimeMap.put(f, numUpdates); - else - l1UpdateTimeMap.remove(f); - } - - public synchronized double getWeight(String f) { - if (l1Reg == L1Reg.LAZY) - lazyL1Update(f); - if (opts.initWeightsRandomly) - return MapUtils.getDouble(weights, f, 2 * opts.initRandom.nextDouble() - 1); - else { - return MapUtils.getDouble(weights, f, opts.defaultWeight); - } - } - - public synchronized Map getWeights() { finalizeWeights(); return weights; } - - public void write(PrintWriter out) { write(null, out); } - - public void write(String prefix, PrintWriter out) { - List> entries = Lists.newArrayList(weights.entrySet()); - Collections.sort(entries, new ValueComparator(true)); - for (Map.Entry entry : entries) { - double value = entry.getValue(); - out.println((prefix == null ? "" : prefix + "\t") + entry.getKey() + "\t" + value); - } - } - - public void write(String path) { - LogInfo.begin_track("Params.write(%s)", path); - PrintWriter out = IOUtils.openOutHard(path); - write(out); - out.close(); - LogInfo.end_track(); - } - - public void log() { - LogInfo.begin_track("Params"); - List> entries = Lists.newArrayList(weights.entrySet()); - Collections.sort(entries, new ValueComparator(true)); - for (Map.Entry entry : entries) { - double value = entry.getValue(); - LogInfo.logs("%s\t%s", entry.getKey(), value); - } - LogInfo.end_track(); - } - - public synchronized void finalizeWeights() { - if (l1Reg == L1Reg.LAZY) { - Set features = new HashSet<>(weights.keySet()); - for (String f : features) - lazyL1Update(f); - } - } - - public Params copyParams() { - Params result = new Params(); - for (String feature : this.getWeights().keySet()) { - result.weights.put(feature, this.getWeight(feature)); - } - return result; - } - - // copy params starting with prefix and drop the prefix - public Params copyParamsByPrefix(String prefix) { - Params result = new Params(); - for (String feature : this.getWeights().keySet()) { - if (feature.startsWith(prefix)) { - String newFeature = feature.substring(prefix.length()); - result.weights.put(newFeature, this.getWeight(feature)); - } - } - return result; - } - - public boolean isEmpty() { - return weights.size() == 0; - } - - public Params getRandomWeightParams() { - Random rand = new Random(); - Params result = new Params(); - for (String feature : this.getWeights().keySet()) { - result.weights.put(feature, 2 * rand.nextDouble() - 1); // between -1 and 1 - } - return result; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Parser.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Parser.java deleted file mode 100644 index 00fa76bfd5..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Parser.java +++ /dev/null @@ -1,361 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.*; - -import java.io.PrintWriter; -import java.util.*; - -//////////////////////////////////////////////////////////// - -/** - * A Parser takes an example, parses the sequence of tokens, and stores the - * derivations back in the example. It also computes a gradient with respect - * to some objective function. In this light, Parser it is more than a parser. - * - * @author Percy Liang - */ -public abstract class Parser { - public static class Options { - @Option(gloss = "For debugging, whether to print out all the predicted derivations") - public boolean printAllPredictions; - - @Option(gloss = "Maximal number of predictions to print") - public int maxPrintedPredictions = Integer.MAX_VALUE; - @Option(gloss = "Maximal number of correct predictions to print") - public int maxPrintedTrue = Integer.MAX_VALUE; - - @Option(gloss = "Use a coarse pass to prune the chart before full parsing") - public boolean coarsePrune = false; - - @Option(gloss = "How much output to print") - public int verbose = 0; - - @Option(gloss = "Execute only top formula to be cheap (hack at test time for fast demo)") - public boolean executeTopFormulaOnly = false; - - @Option(gloss = "Whether to output chart filling visualization (huge file!)") - public boolean visualizeChartFilling = false; - - @Option(gloss = "Keep this number of derivations per cell (exact use depends on the parser)") - public int beamSize = 200; - - @Option(gloss = "Whether to update based on partial reward (for learning)") - public boolean partialReward = true; - - @Option(gloss = "Whether to unroll derivation streams (applies to lazy parsers)") - public boolean unrollStream = false; - - @Option(gloss = "Inject random noise into the score (to mix things up a bit)") - public double derivationScoreNoise = 0; - - @Option(gloss = "Source of random noise") - public Random derivationScoreRandom = new Random(1); - - @Option(gloss = "Prune away error denotations") - public boolean pruneErrorValues = false; - - @Option(gloss = "Dump all features (for debugging)") - public boolean dumpAllFeatures = false; - - @Option(gloss = "Call SetEvaluation during parsing") - public boolean callSetEvaluation = true; - } - - public static final Options opts = new Options(); - - public boolean verbose(int level) { return opts.verbose >= level; } - - // Used to instantiate a parser. - public static class Spec { - public final Grammar grammar; - public final FeatureExtractor extractor; - public final Executor executor; - public final ValueEvaluator valueEvaluator; - - public Spec(Grammar grammar, FeatureExtractor extractor, Executor executor, ValueEvaluator valueEvaluator) { - this.grammar = grammar; - this.extractor = extractor; - this.executor = executor; - this.valueEvaluator = valueEvaluator; - } - } - - // Inputs to the parser - public final Grammar grammar; - public final FeatureExtractor extractor; - public final Executor executor; - public final ValueEvaluator valueEvaluator; - - // Precomputations to make looking up grammar rules faster. - protected List catUnaryRules; // Unary rules with category on RHS ($A => $B) - public List getCatUnaryRules() { return catUnaryRules; } - - // TODO(joberant): move this to a separate class in charge of visualizing charts - public PrintWriter chartFillOut = null; // For printing a machine-readable json file - - public Parser(Spec spec) { - this.grammar = spec.grammar; - this.extractor = spec.extractor; - this.executor = spec.executor; - this.valueEvaluator = spec.valueEvaluator; - - computeCatUnaryRules(); - LogInfo.logs("%s: %d catUnaryRules (sorted), %d nonCatUnaryRules (in trie)", - this.getClass().getSimpleName(), catUnaryRules.size(), grammar.rules.size() - catUnaryRules.size()); - } - - // If grammar changes, then we might need to update aspects of the parser. - public synchronized void addRule(Rule rule) { - if (rule.isCatUnary()) - catUnaryRules.add(rule); - } - - protected void computeCatUnaryRules() { - // Handle catUnaryRules - catUnaryRules = new ArrayList<>(); - Map> graph = new HashMap<>(); // Node from LHS to list of rules - for (Rule rule : grammar.rules) - if (rule.isCatUnary()) - MapUtils.addToList(graph, rule.lhs, rule); - - // Topologically sort catUnaryRules so that B->C occurs before A->B - Map done = new HashMap<>(); - for (String node : graph.keySet()) - traverse(catUnaryRules, node, graph, done); - } - - // Helper function for transitive closure of unary rules. - protected void traverse(List catUnaryRules, String node, - Map> graph, Map done) { - Boolean d = done.get(node); - if (Boolean.TRUE.equals(d)) return; - if (Boolean.FALSE.equals(d)) - throw new RuntimeException("Found cycle of unaries involving " + node); - done.put(node, false); - for (Rule rule : MapUtils.getList(graph, node)) { - traverse(catUnaryRules, rule.rhs.get(0), graph, done); - catUnaryRules.add(rule); - } - done.put(node, true); - } - - /** - * Override this method to change the parser's behavior based on current - * group name and iteration number. This method will be called at the - * beginning of each data group. - */ - public void onBeginDataGroup(int iter, int numIters, String group) { - // DEFAULT: Do nothing. - } - - // Main thing for parsers to implement. - public abstract ParserState newParserState(Params params, Example ex, boolean computeExpectedCounts); - public Params getSearchParams(Params params) { return params; } - - /** - * Parse the given example |ex| using the given parameters |params| - * and populate the fields of |ex| (e.g., predDerivations). Note: - * |ex| is modified in place. - */ - public ParserState parse(Params params, Example ex, boolean computeExpectedCounts) { - // Execute target formula (if applicable). - if (ex.targetFormula != null && ex.targetValue == null) - ex.targetValue = executor.execute(ex.targetFormula, ex.context).value; - - // Parse - StopWatch watch = new StopWatch(); - watch.start(); - LogInfo.begin_track_printAll("Parser.parse: parse"); - ParserState state = newParserState(params, ex, computeExpectedCounts); - state.infer(); - LogInfo.end_track(); - watch.stop(); - state.parseTime = watch.getCurrTimeLong(); - state.setEvaluation(); - - ex.predDerivations = state.predDerivations; - Derivation.sortByScore(ex.predDerivations); - - // Evaluate - if (opts.callSetEvaluation) { - ex.evaluation = new Evaluation(); - addToEvaluation(state, ex.evaluation); - } - // Clean up temporary state used during parsing - ex.clearTempState(); - for (Derivation deriv : ex.predDerivations) - deriv.clearTempState(); - return state; - } - - /** - * Compute the evaluation based on the results of parsing and add it to |evaluation| - */ - public void addToEvaluation(ParserState state, Evaluation evaluation) { - Example ex = state.ex; - List predDerivations = state.predDerivations; - - boolean printAllPredictions = opts.printAllPredictions; - int numCandidates = predDerivations.size(); - LogInfo.begin_track_printAll("Parser.setEvaluation: %d candidates", numCandidates); - - // Each derivation has a compatibility score (in [0, 1]) as well as a model probability. - // Terminology: - // True (correct): compatibility = 1 - // Partial: 0 < compatibility < 1 - // Wrong: compatibility = 0 - - // Did we get the answer correct? - int correctIndex = -1; // Index of first correct derivation - int correctIndexAfterParse = -1; - double maxCompatibility = 0.0; - double[] compatibilities = null; - int numCorrect = 0, numPartialCorrect = 0, numIncorrect = 0; - - if (ex.targetValue != null) { - compatibilities = new double[numCandidates]; - for (int i = 0; i < numCandidates; i++) { - Derivation deriv = predDerivations.get(i); - compatibilities[i] = deriv.compatibility; - // Must be fully compatible to count as correct. - if (compatibilities[i] == 1 && correctIndex == -1) - correctIndex = i; - // record maximum compatibility for partial oracle - maxCompatibility = Math.max(compatibilities[i], maxCompatibility); - // Count - if (compatibilities[i] == 1) { - numCorrect++; - } else if (compatibilities[i] == 0) { - numIncorrect++; - } else { - numPartialCorrect++; - } - } - // What if we only had parsed bottom up? - for (int i = 0; i < numCandidates; i++) { - if (compatibilities[i] == 1) { - correctIndexAfterParse = i; - break; - } - } - } - - // Compute probabilities - double[] probs = Derivation.getProbs(predDerivations, 1); - for (int i = 0; i < numCandidates; i++) { - Derivation deriv = predDerivations.get(i); - deriv.prob = probs[i]; - } - - // Number of derivations which have the same top score - int numTop = 0; - double topMass = 0; - if (ex.targetValue != null) { - while (numTop < numCandidates && - Math.abs(predDerivations.get(numTop).score - predDerivations.get(0).score) < 1e-10) { - topMass += probs[numTop]; - numTop++; - } - } - double correct = 0, partCorrect = 0; - if (ex.targetValue != null) { - for (int i = 0; i < numTop; i++) { - if (compatibilities[i] == 1) correct += probs[i] / topMass; - if (compatibilities[i] > 0) - partCorrect += (compatibilities[i] * probs[i]) / topMass; - } - } - - // Print features (note this is only with respect to the first correct, is NOT the gradient). - // Things are not printed if there is only partial compatability. - if (correctIndex != -1 && correct != 1) { - Derivation trueDeriv = predDerivations.get(correctIndex); - Derivation predDeriv = predDerivations.get(0); - HashMap featureDiff = new HashMap<>(); - trueDeriv.incrementAllFeatureVector(+1, featureDiff); - predDeriv.incrementAllFeatureVector(-1, featureDiff); - String heading = String.format("TopTrue (%d) - Pred (%d) = Diff", correctIndex, 0); - FeatureVector.logFeatureWeights(heading, featureDiff, state.params); - - HashMap choiceDiff = new LinkedHashMap<>(); - trueDeriv.incrementAllChoices(+1, choiceDiff); - predDeriv.incrementAllChoices(-1, choiceDiff); - FeatureVector.logChoices(heading, choiceDiff); - } - - // Fully correct - int numPrintedSoFar = 0; - for (int i = 0; i < predDerivations.size(); i++) { - Derivation deriv = predDerivations.get(i); - if (compatibilities != null && compatibilities[i] == 1) { - boolean print = printAllPredictions || (numPrintedSoFar < opts.maxPrintedTrue); - if (print) { - LogInfo.logs( - "True@%04d: %s [score=%s, prob=%s%s]", i, deriv.toString(), - Fmt.D(deriv.score), Fmt.D(probs[i]), compatibilities != null ? ", comp=" + Fmt.D(compatibilities[i]) : ""); - numPrintedSoFar++; - if (opts.dumpAllFeatures) FeatureVector.logFeatureWeights("Features", deriv.getAllFeatureVector(), state.params); - } - } - } - // Partially correct - numPrintedSoFar = 0; - for (int i = 0; i < predDerivations.size(); i++) { - Derivation deriv = predDerivations.get(i); - if (compatibilities != null && compatibilities[i] > 0 && compatibilities[i] < 1) { - boolean print = printAllPredictions || (numPrintedSoFar < opts.maxPrintedTrue); - if (print) { - LogInfo.logs( - "Part@%04d: %s [score=%s, prob=%s%s]", i, deriv.toString(), - Fmt.D(deriv.score), Fmt.D(probs[i]), compatibilities != null ? ", comp=" + Fmt.D(compatibilities[i]) : ""); - numPrintedSoFar++; - if (opts.dumpAllFeatures) FeatureVector.logFeatureWeights("Features", deriv.getAllFeatureVector(), state.params); - } - } - } - // Anything that's predicted. - for (int i = 0; i < predDerivations.size(); i++) { - Derivation deriv = predDerivations.get(i); - - // Either print all predictions or this prediction is worse by some amount. - boolean print = printAllPredictions || ((probs[i] >= probs[0] / 2 || i < 10) && i < opts.maxPrintedPredictions); - if (print) { - LogInfo.logs( - "Pred@%04d: %s [score=%s, prob=%s%s]", i, deriv.toString(), - Fmt.D(deriv.score), Fmt.D(probs[i]), compatibilities != null ? ", comp=" + Fmt.D(compatibilities[i]) : ""); - // LogInfo.logs("Derivation tree: %s", deriv.toRecursiveString()); - if (opts.dumpAllFeatures) FeatureVector.logFeatureWeights("Features", deriv.getAllFeatureVector(), state.params); - } - } - - evaluation.add("correct", correct); - evaluation.add("oracle", correctIndex != -1); - evaluation.add("partCorrect", partCorrect); - evaluation.add("partOracle", maxCompatibility); - if (correctIndexAfterParse != -1) - evaluation.add("correctIndexAfterParse", correctIndexAfterParse); - - if (correctIndex != -1) { - evaluation.add("correctMaxBeamPosition", predDerivations.get(correctIndex).maxBeamPosition); - evaluation.add("correctMaxUnsortedBeamPosition", predDerivations.get(correctIndex).maxUnsortedBeamPosition); - } - evaluation.add("parsed", numCandidates > 0); - evaluation.add("numCandidates", numCandidates); // From this parse - if (numCandidates > 0) - evaluation.add("parsedNumCandidates", numCandidates); - evaluation.add("numCorrect", numCorrect); - evaluation.add("numPartialCorrect", numPartialCorrect); - evaluation.add("numIncorrect", numIncorrect); - - // Add parsing stats - evaluation.add(state.evaluation); - - // Add executor stats - for (Derivation deriv : predDerivations) { - if (deriv.executorStats != null) - evaluation.add(deriv.executorStats); - } - - LogInfo.end_track(); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ParserAgenda.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ParserAgenda.java deleted file mode 100644 index 8e31960048..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ParserAgenda.java +++ /dev/null @@ -1,120 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.LogInfo; -import fig.basic.PriorityQueue; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; - -/** - * Data structure for agenda in reinforcement parser - * Created by joberant on 10/20/14. - */ -public interface ParserAgenda extends Iterable { - void sort(); - boolean add(PrioritizedDerivationStream item, double priority); - int size(); - void clear(); - PrioritizedDerivationStream pop(); - PrioritizedDerivationStream get(int i); - void remove(PrioritizedDerivationStream pds, int index); -} - -class ListParserAgenda implements ParserAgenda { - - private List agenda = new ArrayList<>(); - - @Override - public void sort() { - Collections.sort(agenda); - } - - @Override - public boolean add(PrioritizedDerivationStream item, double priority) { - return agenda.add(item); - } - - @Override - public int size() { - return agenda.size(); - } - - @Override - public void clear() { - agenda.clear(); - } - - @Override - public PrioritizedDerivationStream pop() { - // todo - replace sort with finding max (check if makes it faster) - sort(); - PrioritizedDerivationStream pds = agenda.get(0); - remove(pds, 0); - return pds; - } - - @Override - public PrioritizedDerivationStream get(int i) { - return agenda.get(i); - } - - @Override - public void remove(PrioritizedDerivationStream pds, int index) { - PrioritizedDerivationStream last = agenda.remove(agenda.size() - 1); - if (last != pds) - agenda.set(index, last); - } - - @Override - public Iterator iterator() { - return agenda.iterator(); - } -} - -class QueueParserAgenda implements ParserAgenda { - - private PriorityQueue agenda = new PriorityQueue<>(); - - @Override - public void sort() { } - - @Override - public boolean add(PrioritizedDerivationStream item, double priority) { - return agenda.add(item, priority); - } - - @Override - public int size() { - return agenda.size(); - } - - @Override - public void clear() { - // hopefully this is never called since we sample just one - LogInfo.warning("QueueParserAgenda: clear is only called when we have more than one sample"); - while (agenda.hasNext()) - agenda.next(); - } - - @Override - public PrioritizedDerivationStream pop() { - return agenda.next(); - } - - @Override - public PrioritizedDerivationStream get(int i) { - throw new RuntimeException("Not supported"); - } - - @Override - public void remove(PrioritizedDerivationStream pds, int index) { - throw new RuntimeException("Not supported"); - } - - @Override - public Iterator iterator() { - throw new RuntimeException("Not supported"); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ParserState.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ParserState.java deleted file mode 100644 index 9194bcb298..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ParserState.java +++ /dev/null @@ -1,364 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.util.*; -import fig.basic.*; - -/** - * Actually does the parsing. Main method is infer(), whose job is to fill in - * - * @author Roy Frostig - * @author Percy Liang - */ -public abstract class ParserState { - public static class Options { - @Option(gloss = "Use a custom distribution for computing expected counts") - public CustomExpectedCount customExpectedCounts = CustomExpectedCount.NONE; - @Option(gloss = "For customExpectedCounts = TOP, only update if good < bad + margin") - public double contrastiveMargin = 1e6; // default = always update - @Option(gloss = "Whether to prune based on probability difference") - public boolean pruneByProbDiff = false; - @Option(gloss = "Difference in probability for pruning by prob diff") - public double probDiffPruningThresh = 100; - @Option(gloss = "Throw features away after scoring to save memory") - public boolean throwFeaturesAway = false; - } - public static Options opts = new Options(); - - public enum CustomExpectedCount { NONE, UNIFORM, TOP, TOPALT, RANDOM, } - - //// Input: specification of how to parse - - public final Parser parser; - public final Params params; - public final Example ex; - public final boolean computeExpectedCounts; // Whether we're learning - - //// Output - - public final List predDerivations = new ArrayList(); - public final Evaluation evaluation = new Evaluation(); - - // If computeExpectedCounts is true (for learning), then fill this out. - public Map expectedCounts; - public double objectiveValue; - - // Statistics generated while parsing - public final int numTokens; - public long parseTime; // Number of milliseconds to parse this example - public int maxCellSize; // Maximum number of derivations in any chart cell prior to pruning. - public String maxCellDescription; // Description of that cell (for debugging) - public boolean fallOffBeam; // Did any hypotheses fall off the beam? - public int totalGeneratedDerivs; // Total number of derivations produced - public int numOfFeaturizedDerivs = 0; // Number of derivations featured - - public ParserState(Parser parser, Params params, Example ex, boolean computeExpectedCounts) { - this.parser = parser; - this.params = params; - this.ex = ex; - this.computeExpectedCounts = computeExpectedCounts; - this.numTokens = ex.numTokens(); - } - - protected int getBeamSize() { return Parser.opts.beamSize; } - - // Main entry point. Should set all the output variables. - public abstract void infer(); - - protected void featurizeAndScoreDerivation(Derivation deriv) { - if (deriv.isFeaturizedAndScored()) { - LogInfo.warnings("Derivation already featurized: %s", deriv); - return; - } - - // Compute features - parser.extractor.extractLocal(ex, deriv); - - // Compute score - deriv.computeScoreLocal(params); - - if (opts.throwFeaturesAway) - deriv.clearFeatures(); - - if (parser.verbose(5)) { - LogInfo.logs("featurizeAndScoreDerivation(score=%s) %s %s: %s [rule: %s]", - Fmt.D(deriv.score), deriv.cat, ex.spanString(deriv.start, deriv.end), deriv, deriv.rule); - } - numOfFeaturizedDerivs++; - } - - /** - * Prune down the number of derivations in |derivations| to the beam size. - * Sort the beam by score. - * Update beam statistics. - */ - protected void pruneCell(String cellDescription, List derivations) { - if (derivations == null) return; - - // Update stats about cell size. - if (derivations.size() > maxCellSize) { - maxCellSize = derivations.size(); - maxCellDescription = cellDescription; - if (maxCellSize > 5000) - LogInfo.logs("ParserState.pruneCell %s: maxCellSize = %s entries (not pruned yet)", - maxCellDescription, maxCellSize); - } - - // The extra code blocks in here that set |deriv.maxXBeamPosition| - // are there to track, over the course of parsing, the lowest - // position at which any of a derivation's constituents ever - // placed on any of the relevant beams. - - // Max beam position (before sorting) - int i = 0; - for (Derivation deriv : derivations) { - deriv.maxUnsortedBeamPosition = i; - if (deriv.children != null) { - for (Derivation child : deriv.children) - deriv.maxUnsortedBeamPosition = Math.max(deriv.maxUnsortedBeamPosition, child.maxUnsortedBeamPosition); - } - if (deriv.preSortBeamPosition == -1) { - // Need to be careful to only do this once since |pruneCell()| - // might be called several times for the same beam and the - // second time around we have already sorted once. - deriv.preSortBeamPosition = i; - } - i++; - } - - // Inject noise into the noise (to simulate sampling); ideally would add Gumbel noise - if (Parser.opts.derivationScoreNoise > 0) { - for (Derivation deriv : derivations) - deriv.score += Parser.opts.derivationScoreRandom.nextDouble() * Parser.opts.derivationScoreNoise; - } - - Derivation.sortByScore(derivations); - - // Print out information - if (Parser.opts.verbose >= 3) { - LogInfo.begin_track("ParserState.pruneCell(%s): %d derivations", cellDescription, derivations.size()); - for (Derivation deriv : derivations) { - LogInfo.logs("%s(%s,%s): %s %s, [score=%s] allAnchored: %s", deriv.cat, deriv.start, deriv.end, deriv.formula, - deriv.canonicalUtterance, deriv.score, deriv.allAnchored()); - } - LogInfo.end_track(); - } - - // Max beam position (after sorting) - i = 0; - for (Derivation deriv : derivations) { - deriv.maxBeamPosition = i; - if (deriv.children != null) { - for (Derivation child : deriv.children) - deriv.maxBeamPosition = Math.max(deriv.maxBeamPosition, child.maxBeamPosition); - } - deriv.postSortBeamPosition = i; - i++; - } - - //prune all d_i s.t p(d_1) > CONST \cdot p(d_i) - if(ChartParserState.opts.pruneByProbDiff) { - double highestScore = derivations.get(0).score; - while (highestScore - derivations.get(derivations.size()-1).score > Math.log(opts.probDiffPruningThresh)) { - derivations.remove(derivations.size() - 1); - fallOffBeam = true; - } - } - //prune by beam size - else { - // Keep only the top hypotheses - int beamSize = getBeamSize(); - if (derivations.size() > beamSize && Parser.opts.verbose >= 1) { - LogInfo.logs("ParserState.pruneCell %s: Pruning %d -> %d derivations", cellDescription, derivations.size(), beamSize); - } - while (derivations.size() > beamSize) { - derivations.remove(derivations.size() - 1); - fallOffBeam = true; - } - } - } - - // -- Base case -- - public List gatherTokenAndPhraseDerivations() { - List derivs = new ArrayList<>(); - - // All tokens (length 1) - for (int i = 0; i < numTokens; i++) { - derivs.add( - new Derivation.Builder() - .cat(Rule.tokenCat).start(i).end(i + 1) - .rule(Rule.nullRule) - .children(Derivation.emptyList) - .withStringFormulaFrom(ex.token(i)) - .canonicalUtterance(ex.token(i)) - .createDerivation()); - - // Lemmatized version - derivs.add( - new Derivation.Builder() - .cat(Rule.lemmaTokenCat).start(i).end(i + 1) - .rule(Rule.nullRule) - .children(Derivation.emptyList) - .withStringFormulaFrom(ex.lemmaToken(i)) - .canonicalUtterance(ex.token(i)) - .createDerivation()); - } - - // All phrases (any length) - for (int i = 0; i < numTokens; i++) { - for (int j = i + 1; j <= numTokens; j++) { - derivs.add( - new Derivation.Builder() - .cat(Rule.phraseCat).start(i).end(j) - .rule(Rule.nullRule) - .children(Derivation.emptyList) - .withStringFormulaFrom(ex.phrase(i, j)) - .canonicalUtterance(ex.phrase(i, j)) - .createDerivation()); - - // Lemmatized version - derivs.add( - new Derivation.Builder() - .cat(Rule.lemmaPhraseCat).start(i).end(j) - .rule(Rule.nullRule) - .children(Derivation.emptyList) - .withStringFormulaFrom(ex.lemmaPhrase(i, j)) - .canonicalUtterance(ex.phrase(i, j)) - .createDerivation()); - } - } - return derivs; - } - - // Ensure that all the logical forms are executed and compatibilities are computed. - public void ensureExecuted() { - LogInfo.begin_track("Parser.ensureExecuted"); - // Execute predicted derivations to get value. - for (Derivation deriv : predDerivations) { - deriv.ensureExecuted(parser.executor, ex.context); - if (ex.targetValue != null) - deriv.compatibility = parser.valueEvaluator.getCompatibility(ex.targetValue, deriv.value); - if (!computeExpectedCounts && Parser.opts.executeTopFormulaOnly) break; - } - LogInfo.end_track(); - } - - // Add statistics to |evaluation|. - // Override if we have more statistics. - protected void setEvaluation() { - evaluation.add("numTokens", numTokens); - evaluation.add("parseTime", parseTime); - evaluation.add("maxCellSize", maxCellDescription, maxCellSize); - evaluation.add("fallOffBeam", fallOffBeam); - evaluation.add("totalDerivs", totalGeneratedDerivs); - evaluation.add("numOfFeaturizedDerivs", numOfFeaturizedDerivs); - } - - public static double compatibilityToReward(double compatibility) { - if (Parser.opts.partialReward) - return compatibility; - return compatibility == 1 ? 1 : 0; // All or nothing - } - - /** - * Fill |counts| with the gradient with respect to the derivations - * according to a standard exponential family model over a finite set of derivations. - * Assume that everything has been executed, and compatibility has been computed. - */ - public static void computeExpectedCounts(List derivations, Map counts) { - double[] trueScores; - double[] predScores; - - int n = derivations.size(); - if (n == 0) return; - - trueScores = new double[n]; - predScores = new double[n]; - // For update schemas that choose one good and one bad candidate to update - int[] goodAndBad = null; - if (opts.customExpectedCounts == CustomExpectedCount.TOP || opts.customExpectedCounts == CustomExpectedCount.TOPALT) { - goodAndBad = getTopDerivations(derivations); - if (goodAndBad == null) return; - } else if (opts.customExpectedCounts == CustomExpectedCount.RANDOM) { - goodAndBad = getRandomDerivations(derivations); - if (goodAndBad == null) return; - } - - for (int i = 0; i < n; i++) { - Derivation deriv = derivations.get(i); - double logReward = Math.log(compatibilityToReward(deriv.compatibility)); - - switch (opts.customExpectedCounts) { - case NONE: - trueScores[i] = deriv.score + logReward; - predScores[i] = deriv.score; - break; - case UNIFORM: - trueScores[i] = logReward; - predScores[i] = 0; - break; - case TOP: case RANDOM: - trueScores[i] = (i == goodAndBad[0]) ? 0 : Double.NEGATIVE_INFINITY; - predScores[i] = (i == goodAndBad[1]) ? 0 : Double.NEGATIVE_INFINITY; - break; - case TOPALT: - trueScores[i] = (i == goodAndBad[0]) ? 0 : Double.NEGATIVE_INFINITY; - predScores[i] = (i == goodAndBad[0] || i == goodAndBad[1]) ? deriv.score : Double.NEGATIVE_INFINITY; - break; - default: - throw new RuntimeException("Unknown customExpectedCounts: " + opts.customExpectedCounts); - } - } - - // Usually this happens when there are no derivations. - if (!NumUtils.expNormalize(trueScores)) return; - if (!NumUtils.expNormalize(predScores)) return; - - // Update parameters - for (int i = 0; i < n; i++) { - Derivation deriv = derivations.get(i); - double incr = trueScores[i] - predScores[i]; - if (incr == 0) continue; - deriv.incrementAllFeatureVector(incr, counts); - } - } - - private static int[] getTopDerivations(List derivations) { - int chosenGood = -1, chosenBad = -1; - double chosenGoodScore = Double.NEGATIVE_INFINITY, chosenBadScore = Double.NEGATIVE_INFINITY; - for (int i = 0; i < derivations.size(); i++) { - Derivation deriv = derivations.get(i); - if (deriv.compatibility == 1) { // good - if (deriv.score > chosenGoodScore) { - chosenGood = i; chosenGoodScore = deriv.score; - } - } else { // bad - if (deriv.score > chosenBadScore) { - chosenBad = i; chosenBadScore = deriv.score; - } - } - } - if (chosenGood == -1 || chosenBad == -1 || chosenGoodScore >= chosenBadScore + opts.contrastiveMargin) - return null; - return new int[] {chosenGood, chosenBad}; - } - - private static int[] getRandomDerivations(List derivations) { - int chosenGood = -1, chosenBad = -1, numGoodSoFar = 0, numBadSoFar = 0; - // Get a uniform random sample from the stream - for (int i = 0; i < derivations.size(); i++) { - Derivation deriv = derivations.get(i); - if (deriv.compatibility == 1) { - numGoodSoFar++; - if (Math.random() <= 1.0 / numGoodSoFar) { - chosenGood = i; - } - } else { // bad - numBadSoFar++; - if (Math.random() <= 1.0 / numBadSoFar) { - chosenBad = i; - } - } - } - return (chosenGood == -1 || chosenBad == -1) ? null : new int[] {chosenGood, chosenBad}; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/PrimitiveFormula.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/PrimitiveFormula.java deleted file mode 100644 index 90a0a86080..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/PrimitiveFormula.java +++ /dev/null @@ -1,30 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.google.common.base.Function; - -import java.util.List; - -/** - * A PrimitiveFormula represents an atomic value which is cannot be decomposed - * into further symbols. Either a ValueFormula or a VariableFormula. - * - * @author Percy Liang - */ -public abstract class PrimitiveFormula extends Formula { - - @Override - public void forEach(Function func) { - func.apply(this); - } - - @Override - public Formula map(Function func) { - Formula result = func.apply(this); - return result == null ? this : result; - } - - @Override - public List mapToList(Function> func, boolean alwaysRecurse) { - return func.apply(this); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ReinforcementParser.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ReinforcementParser.java deleted file mode 100644 index 107d82cc88..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ReinforcementParser.java +++ /dev/null @@ -1,973 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.google.common.base.Joiner; -import fig.basic.*; -import fig.exec.Execution; -import fig.prob.SampleUtils; - -import java.util.*; - -/** - * @author joberant - * Parser and learning using re-inforcement learning, that is maximizing future rewards. - * During learning we sample from the agenda that contains all possible next actions - * Then we update the gradient so that actions that were taken and led to good reward - * will get better weights than other actions - * - * At test time we simply find the highest probability parse so that is done using regular parsing - * - * We implement this using a chart and an agenda that holds the possible future actions - * - * NOTE: during search we search with the agenda and then do re-ranking - * features are separated by a "search" prefix. - * For this when we compute a derivation score on the agenda we prefix the derivation - * features with "search", and in addition, when updating the expected counts we add the - * "search" prefix. This is brittle... but at the end we score with re-ranking features - * and then the score of the derivation is \theta \times \phi as usual - */ -public class ReinforcementParser extends Parser { - public static class Options { - @Option(gloss = "Whether to do coarse pruning") - public boolean efficientCoarsePrune = true; - @Option(gloss = "Whether to do importance sampling") - public double multiplicativeBonus = 1000d; - @Option (gloss = "Number of samples") - public int numOfSamplesPerExample = 1; - @Option (gloss = "Whether to update gradient only for correct moves") - public boolean updateGradientForCorrectMovesOnly = true; - @Option (gloss = "Low probability for which we don't unroll the stream") - public double lowProb = 0.01; - @Option (gloss = "Whether to simulate the log liklihood objective") - public boolean simulateNonRlObjective = false; - @Option (gloss = "Whether to always unroll (even at test time)") - public boolean alwaysUnroll = false; - } - public static Options opts = new Options(); - - // we assume here a binarized grammar - Map>> leftToRightSiblingMap = new HashMap<>(); - Map>> rightToLeftSiblingMap = new HashMap<>(); - Map> terminalsToRulesList = new HashMap<>(); - final CoarseParser coarseParser; - public static final String SEARCH_PREFIX = "search_"; - public final String searchPrefix; - - public ReinforcementParser(Spec spec) { - super(spec); - coarseParser = new CoarseParser(grammar); - - // generate maps from left to right and vice versa - for (Rule rule : grammar.rules) { - if (rule.rhs.size() > 2) - throw new RuntimeException("We assume that the grammar is binarized, rule: " + rule); - if (rule.rhs.size() == 2) { - String left = rule.rhs.get(0); - String right = rule.rhs.get(1); - addToSiblingMap(left, right, rule, leftToRightSiblingMap); - addToSiblingMap(right, left, rule, rightToLeftSiblingMap); - } - if (rule.isRhsTerminals()) - MapUtils.addToList(terminalsToRulesList, Joiner.on(' ').join(rule.rhs), rule); - } - if (Parser.opts.visualizeChartFilling) - this.chartFillOut = IOUtils.openOutAppendEasy(Execution.getFile("chartfill")); - searchPrefix = opts.simulateNonRlObjective ? "" : SEARCH_PREFIX; - LogInfo.logs("ReinforcementParser(): search prefix is %s", searchPrefix); - } - - private void addToSiblingMap(String keySibling, String valueSibling, Rule rule, - Map>> siblingToSiblingMap) { - Map> valueSiblingMap = siblingToSiblingMap.get(keySibling); - if (valueSiblingMap == null) - siblingToSiblingMap.put(keySibling, valueSiblingMap = new HashMap<>()); - MapUtils.addToList(valueSiblingMap, valueSibling, rule); - } - - @Override - public ParserState newParserState(Params params, Example ex, boolean computeExpectedCounts) { - if (computeExpectedCounts) { // if we learn - use sampling, otherwise, use max - // if we simulate non RL we just take the max and not do sampling - if (opts.simulateNonRlObjective) { - return new ReinforcementParserState.StateBuilder() - .parser(this) - .params(params) - .example(ex) - .samplingStrategy("max") - .computeExpectedCounts(true) - .createState(); - } else { - return (new ReinforcementParserState.StateBuilder() - .parser(this) - .params(params) - .example(ex)) - .samplingStrategy("proposal") - .computeExpectedCounts(true) - .createState(); - } - } - return (new ReinforcementParserState.StateBuilder() - .parser(this) - .params(params) - .example(ex)) - .samplingStrategy("max") - .computeExpectedCounts(false) - .createState(); - } - - @Override - public Params getSearchParams(Params params) { - return params.copyParamsByPrefix(searchPrefix); - } -} - -// Parsing by sampling items from the agenda -// Defined by: (a) sampling strategy (from agenda, from true derivation, take the max) -// (b) whether an update needs to be done -final class ReinforcementParserState extends AbstractReinforcementParserState { - - public enum NecessaryDeriv { NECESSARY_DERIV, UNNECESSARY_DERIV, UNKNOWN } - private static final double LOG_SMALL_PROB = Math.log(ReinforcementParser.opts.lowProb); - - private final ParserAgenda agenda; - - private int completeDerivationsPushed = 0; - private int firstCorrectItem = -1; - private String samplingStrategy; - private Sampler sampler; - List correctDerivations = new ArrayList<>(); - private Map stateSequenceExpectedCounts = new HashMap<>(); - Random randGen = new Random(1); - // backpointers for remembering what derivations on the stream were popped before others - private Map, Integer>> backpointerList; - private int numItemsSampled = 0; - - public static class StateBuilder { - private ReinforcementParser parser; - private Params params; - private Example example; - private ParserState coarseState; - private String samplingStrategy = null; - private boolean computeExpectedCounts; - - public StateBuilder parser(ReinforcementParser parser) { this.parser = parser; return this; } - public StateBuilder params(Params params) { this.params = params; return this; } - public StateBuilder example(Example example) { this.example = example; return this; } - public StateBuilder samplingStrategy(String samplingStrategy) { this.samplingStrategy = samplingStrategy; return this; } - public StateBuilder computeExpectedCounts(boolean computeExpectedCounts) { this.computeExpectedCounts = computeExpectedCounts; return this; } - public ReinforcementParserState createState() { - return new ReinforcementParserState(this.parser, this.params, this.example, this.computeExpectedCounts, - this.samplingStrategy); - } - } - - // note that the sampler has a pointer to the fields of the state where they were created - private ReinforcementParserState(ReinforcementParser parser, Params params, Example ex, boolean computeExpectedCounts, - String samplingStrategy) { - super(parser, params, ex, computeExpectedCounts); - this.samplingStrategy = samplingStrategy; - backpointerList = new HashMap<>(); - agenda = samplingStrategy.equals("max") ? new QueueParserAgenda() : new ListParserAgenda(); - } - - private void clearState() { - agenda.clear(); - clearChart(); - completeDerivationsPushed = 0; - firstCorrectItem = -1; - correctDerivations.clear(); - stateSequenceExpectedCounts.clear(); - backpointerList.clear(); - numItemsSampled = 0; - } - - - - protected void addToAgenda(DerivationStream derivationStream) { - addToAgenda(derivationStream, 0d); - } - - private void addToAgenda(DerivationStream derivationStream, double probSum) { - - if (!derivationStream.hasNext()) return; - - //if it's less than one we can just add it even if we unroll everything (optimization) - if(!ReinforcementParser.opts.alwaysUnroll || derivationStream.estimatedSize() <= 1) { - Derivation deriv = derivationStream.peek(); - featurizeAndScoreDerivation(deriv); - addToAgendaWithScore(derivationStream, deriv.score, probSum); - if (completeDerivationsPushed % 100 == 0) // sort the agenda - agenda.sort(); - } - else { - while(derivationStream.hasNext()) { - Derivation deriv = derivationStream.next(); - featurizeAndScoreDerivation(deriv); - DerivationStream newStream = SingleDerivationStream.constant(deriv); - addToAgendaWithScore(newStream, deriv.score, probSum); - if (completeDerivationsPushed % 100 == 0) // sort the agenda - agenda.sort(); - } - } - } - - // we need to override the method because parameters are prefixed with "search_" - // this means that the score will not be the dot product and features and weights - protected void featurizeAndScoreDerivation(Derivation deriv) { - if (deriv.isFeaturizedAndScored()) return; - - // Compute features - parser.extractor.extractLocal(ex, deriv); - - // Compute score by adding |SEARCH_PREFIX| prefix and adding children scores - FeatureVector searchFV = deriv.addPrefixLocalFeatureVector(parser.searchPrefix); - deriv.score = searchFV.dotProduct(params); - if (deriv.children != null) - for (Derivation child : deriv.children) - deriv.score += child.score; - - if (parser.verbose(3)) - LogInfo.logs( - "featurizeAndScore(score=%s) %s %s: %s [rule: %s]", - Fmt.D(deriv.score), deriv.cat, ex.spanString(deriv.start, deriv.end), deriv, deriv.rule); - numOfFeaturizedDerivs++; - } - - private void addToAgendaWithScore(DerivationStream derivationStream, double derivScore, double probSum) { - if (derivScore == Double.NEGATIVE_INFINITY) return; // no need to add bad derivations - shouldn't happen - - Derivation deriv = derivationStream.peek(); // Score a DerivationStream based on the first item in the stream. - double priority = derivScore - (completeDerivationsPushed++ * EPSILON); - agenda.add(new PrioritizedDerivationStream(derivationStream, priority, probSum), priority); // when adding to agenda probsum is 0 - - if (parser.verbose(3)) { - LogInfo.logs("ReinforcementParser: adding to agenda: size=%s, priority=%s, deriv=%s(%s,%s), formula=%s,|pushed|=%s", - agenda.size(), priority, deriv.cat, deriv.start, deriv.end, deriv.formula, completeDerivationsPushed); - } - } - - public boolean continueParsing() { - if (agenda.size() == 0) { - LogInfo.log("Agenda is empty"); - return false; - } - - return chart[0][numTokens].get(Rule.rootCat) == null || - chart[0][numTokens].get(Rule.rootCat).size() < getBeamSize(); - } - - public void infer() { - if (numTokens == 0) - return; - - ReinforcementParserState oracleState = null; - expectedCounts = new HashMap<>(); - if (computeExpectedCounts && !ReinforcementParser.opts.simulateNonRlObjective) { // when updating params we first find a correct derivation to set the oracle sampler - // TODO(jonathan): move to ReinforcementParser, not ParserState - LogInfo.begin_track("Finding oracle derivation"); - oracleState = new StateBuilder() - .parser(this.parser) - .params(this.params) - .example(this.ex) - .samplingStrategy("agenda") - .computeExpectedCounts(false).createState(); // update params is false preventing an infinite loop - oracleState.infer(); - LogInfo.end_track(); - if (oracleState.correctDerivations.isEmpty()) { - LogInfo.logs("No oracle derivation found"); - return; - } - } - createSampler(oracleState); // we can only create the sampler after we have the oracle derivation - - LogInfo.begin_track("Coarse parsing"); - coarseParserState = null; - if (ReinforcementParser.opts.efficientCoarsePrune) - coarseParserState = coarseParser.getCoarsePrunedChart(ex); - LogInfo.end_track(); - - // draw a sample to compute gradient and expected reward - LogInfo.begin_track("ReinforcementParserState.inferBySampling"); - sampleHistoryAndInfer(); - LogInfo.end_track(); - - // Compute gradient - setPredDerivations(); - if (parser.verbose(3)) - LogInfo.logs("Expected reward = %s", objectiveValue); - visualizeChart(); - } - - private void sampleHistoryAndInfer() { - - // add to chart the token and phrase parts - for (Derivation deriv : gatherTokenAndPhraseDerivations()) - addToAgenda(SingleDerivationStream.constant(deriv)); - // add to agenda unaries where RHS is just terminals - for (DerivationStream derivStream : gatherRhsTerminalsDerivations()) - addToAgenda(derivStream); - - ensureExecuted(); - - while (continueParsing()) { - - unrollHighProbStreams(); - Pair pdsAndProbability = sampler.sample(); - - DerivationStream sampledDerivations = pdsAndProbability.getFirst().derivStream; - Derivation sampledDerivation = sampledDerivations.next(); - updateBackpointers(sampledDerivations, sampledDerivation); // to be able to get all correct actions - numItemsSampled++; - - assert sampledDerivation.isFeaturizedAndScored() : "top derivation is not featurized and scored: " + sampledDerivation; - assert Math.abs(sampledDerivation.score - pdsAndProbability.getFirst().priority) < 1e-4 : - sampledDerivation.score + " != " + pdsAndProbability.getFirst().priority; - - if (parser.verbose(2)) { - LogInfo.begin_track("Item %d (|agenda|=%d), priority %s: |item|=%s -> %s %s %s [%s], prob=%s", - numItemsSampled, agenda.size() + 1, Fmt.D(pdsAndProbability.getFirst().priority), sampledDerivations.estimatedSize(), - sampledDerivation.cat, ex.spanString(sampledDerivation.start, sampledDerivation.end), sampledDerivation, - sampledDerivation.rule, pdsAndProbability.getSecond()); - } - - // handle root derivations - get compatibility and record number of compatible derivations - handleRootDerivation(ex, numItemsSampled, sampledDerivation); - - if (computeExpectedCounts) { - Map counts = new HashMap<>(); - // add the feature vector and subtract for the time it was in the agenda unless has negative probability - //pretty hacky - if (pdsAndProbability.getSecond() > -0.0001) - sampledDerivation.incrementLocalFeatureVector(1 - pdsAndProbability.getFirst().probSum, counts); - else - sampledDerivation.incrementLocalFeatureVector(-pdsAndProbability.getFirst().probSum, counts); - if (parser.verbose(3)) - SempreUtils.logMap(counts, "agenda item gradient"); - ReinforcementUtils.addToDoubleMap(stateSequenceExpectedCounts, counts, parser.searchPrefix); // upate the gradient incrementally - } - // only after update of params we can change the chart and the agenda - if (addToBoundedChart(sampledDerivation)) { - if (parser.verbose(5)) - LogInfo.logs("ReinforcementParserState.infer: adding to chart %s(%s,%s) formula=%s", - sampledDerivation.cat, sampledDerivation.start, sampledDerivation.end, sampledDerivation.formula); - combineWithChartDerivations(sampledDerivation); - } - addToAgenda(sampledDerivations); - if (parser.verbose(2)) - LogInfo.end_track(); - } - - finalizeSearchExpectedCounts(); // gradient for remaining agenda items - rerankRootDerivations(); // last action - if (computeExpectedCounts) { - computeGradient(); - } - } - - private void unrollHighProbStreams() { - - if (samplingStrategy.equals("max")) return; - - sampler.unroll(); // if multiplicative, then unroll oracle stuff (ignore \beta currently!) - - if (parser.verbose(3)) - LogInfo.begin_track("Unrolling high probability streams"); - - double lb=Double.NEGATIVE_INFINITY; - int numOfHiddenStreams = 0; - for (PrioritizedDerivationStream pds : agenda) { - lb = NumUtils.logAdd(lb, pds.getScore()); - if(pds.derivStream.estimatedSize() > 1) - numOfHiddenStreams++; - } - - if (parser.verbose(3)) - LogInfo.logs("unrollHighProbStreams(): |agenda|=%s, lb=%s, |hiddenstreams|=%s", agenda.size(), lb, numOfHiddenStreams); - - List> derivsToAdd = new ArrayList<>(); - List indicesToRemove = new ArrayList<>(); - for (int i = 0; i < agenda.size(); ++i) { - PrioritizedDerivationStream pds = agenda.get(i); - boolean modified = false; - while (pds.derivStream.hasNext() && pds.derivStream.estimatedSize() > 1 && - illegalStream(pds.derivStream, lb, pds.derivStream.estimatedSize(), numOfHiddenStreams)) { - modified = true; - Derivation nextDeriv = pds.derivStream.next(); - updateBackpointers(pds.derivStream, nextDeriv); - - DerivationStream derivStream = SingleDerivationStream.constant(nextDeriv); - if (parser.verbose(3) && derivStream.hasNext()) { - Derivation deriv = derivStream.peek(); - LogInfo.logs("unrollIllegalStreams(): add deriv=%s(%s,%s) [%s] score=%s, |stream|=%s", - deriv.cat, deriv.start, deriv.end, deriv.formula, deriv.score, pds.derivStream.estimatedSize()); - } - derivsToAdd.add(Pair.newPair(derivStream, pds.probSum)); - //update lb - if (pds.derivStream.hasNext()) { - featurizeAndScoreDerivation(pds.derivStream.peek()); - lb = NumUtils.logAdd(lb, pds.getScore()); - } - //update num of hidden streams - if(pds.derivStream.estimatedSize() <= 1) - numOfHiddenStreams--; - } - if (modified) { - indicesToRemove.add(i); - derivsToAdd.add(Pair.newPair(pds.derivStream, pds.probSum)); - } - } - - // remove - need to make sure indices don't change due to removal so go from end to start - for (int i = indicesToRemove.size() - 1; i >= 0; --i) - agenda.remove(agenda.get(indicesToRemove.get(i)), indicesToRemove.get(i)); - - // add - for (Pair pair : derivsToAdd) - addToAgenda(pair.getFirst(), pair.getSecond()); - - if (parser.verbose(3)) - LogInfo.logs("unrollHighProbStreams(): |agenda|=%s", agenda.size()); - if (parser.verbose(3)) - LogInfo.end_track(); - } - - private boolean illegalStream(DerivationStream derivStream, double logSum, int estimatedSize, int numOfHiddenStreams) { - Derivation deriv = derivStream.peek(); - double firstItemLogProb = deriv.score - logSum; //log(exp(s(g_1))/L) - double upperBound = Math.log(estimatedSize) + Math.log(numOfHiddenStreams); //log(M(g)|G'|) - - if (parser.verbose(3)) - LogInfo.logs("IllegalStream(): score=%s, logsum=%s, |stream|=%s, |hiddenstreams|=%s, deriv=%s(%s,%s) %s, sum=%s", - deriv.score, logSum, estimatedSize, numOfHiddenStreams, deriv.cat, deriv.start, deriv.end, deriv.formula, firstItemLogProb+upperBound); - - return (firstItemLogProb+upperBound) > LOG_SMALL_PROB; - } - - private boolean isHighProbStream(DerivationStream derivStream, double maxScore, int estimatedSize) { - Derivation deriv = derivStream.peek(); - double gapFromMax = deriv.score - maxScore; - double threshold = LOG_SMALL_PROB - Math.log(estimatedSize); - if (parser.verbose(3)) - LogInfo.logs("isHighProbStream(): gapFromMax=%s, threshold=%s, deriv=%s(%s,%s) %s |stream|=%s", gapFromMax, threshold, - deriv.cat, deriv.start, deriv.end, deriv.formula, derivStream.estimatedSize()); - - return gapFromMax > threshold; - } - - // recompute score using dot product of features and re-ranking feature weights - private void rerankRootDerivations() { - setPredDerivations(); - for (Derivation rootDeriv : predDerivations) { - double oldScore = rootDeriv.score; - rootDeriv.computeScore(params); - if (parser.verbose(3)) - LogInfo.logs("ReinforcementParser.rerankRootDerivations: deriv=%s, old=%s, new=%s", rootDeriv, oldScore, rootDeriv.score); - } - Derivation.sortByScore(predDerivations); - } - - private void updateBackpointers(DerivationStream stream, Derivation sampledDeriv) { - Pair, Integer> pair = backpointerList.get(sampledDeriv.creationIndex); - if (!stream.hasNext()) return; - - if (pair == null) { - ArrayList list = new ArrayList<>(); - list.add(sampledDeriv); - backpointerList.put(sampledDeriv.creationIndex, pair = Pair.newPair(list, 0)); - } - List list = pair.getFirst(); - Derivation nextDeriv = stream.peek(); - list.add(nextDeriv); - backpointerList.put(nextDeriv.creationIndex, Pair.newPair(pair.getFirst(), list.size() - 1)); - } - - private double computeExpectedReward(List predDerivations, double[] probs) { - double rewardExpectation = 0d; - for (int i = 0; i < predDerivations.size(); ++i) { - rewardExpectation += probs[i] * compatibilityToReward(predDerivations.get(i).compatibility); - } - return rewardExpectation; - } - - // q - proposal distribution - // pi = model distribution - private void computeGradient() { - if (predDerivations.isEmpty()) return; - - double[] qDist = sampler.getDerivDistribution(predDerivations); //uniform over correct things when \beta is high - double[] piDist = ReinforcementUtils.expNormalize(predDerivations); - // compute E_q(R(d)) - LogInfo.begin_track("Computing gradient"); - double rewardExpectation = computeExpectedReward(predDerivations, qDist); - - // compute E_q(\phi(d) R(d)) and E_pi(\phi(d)) - Map featureExpectation = new HashMap<>(), rewardInfusedFeatureExpectation = new HashMap<>(); - for (int i = 0; i < predDerivations.size(); ++i) { - Derivation deriv = predDerivations.get(i); - deriv.incrementAllFeatureVector(piDist[i], featureExpectation); - deriv.incrementAllFeatureVector(qDist[i] * compatibilityToReward(deriv.compatibility), rewardInfusedFeatureExpectation); - } - // final gradient computation - Map sampleCounts = new HashMap<>(); - if (ReinforcementParser.opts.simulateNonRlObjective) { - ParserState.computeExpectedCounts(predDerivations, sampleCounts); - } else { - sampleCounts = ReinforcementUtils.multiplyDoubleMap(stateSequenceExpectedCounts, rewardExpectation); - SempreUtils.addToDoubleMap(sampleCounts, rewardInfusedFeatureExpectation); - ReinforcementUtils.subtractFromDoubleMap(sampleCounts, ReinforcementUtils.multiplyDoubleMap(featureExpectation, rewardExpectation)); - } - SempreUtils.addToDoubleMap(expectedCounts, sampleCounts); - - double sum = 0d; - for (String key : sampleCounts.keySet()) { - double value = sampleCounts.get(key); - if (parser.verbose(3)) - LogInfo.logs("feature=%s, value=%s", key, value); - sum += value * value; - } - LogInfo.logs("L2 norm: %s", Math.sqrt(sum)); - LogInfo.end_track(); - } - - private void createSampler(ReinforcementParserState oracleState) { - if ("proposal".equals(samplingStrategy)) { - if (oracleState == null) - throw new RuntimeException("missing oracle state"); - this.sampler = new MultiplicativeProposalSampler(oracleState); - } else if ("max".equals(samplingStrategy)) { - this.sampler = new MaxSampler(); - } else if ("agenda".equals(samplingStrategy) || samplingStrategy == null) // default - this.sampler = new AgendaSampler(); - } - - // info for visualizing chart - private void visualizeChart() { - if (parser.chartFillOut != null && Parser.opts.visualizeChartFilling) { - parser.chartFillOut.println(Json.writeValueAsStringHard(new ChartFillingData(ex.id, chartFillingList, - ex.utterance, ex.numTokens()))); - parser.chartFillOut.flush(); - } - } - - // go over all agenda items that we did not subtract counts for and finalize - private void finalizeSearchExpectedCounts() { - if (ReinforcementParser.opts.simulateNonRlObjective) return; - if (!computeExpectedCounts) return; - Map counts = new HashMap<>(); - for (PrioritizedDerivationStream pds : agenda) { - pds.derivStream.peek().incrementLocalFeatureVector(pds.probSum, counts); - } - if (parser.verbose(3)) { - SempreUtils.logMap(counts, "subtracted"); - } - ReinforcementUtils.subtractFromDoubleMap(stateSequenceExpectedCounts, counts, parser.searchPrefix); - if (parser.verbose(3)) { - SempreUtils.logMap(stateSequenceExpectedCounts, "final search gradient"); - } - } - - private void handleRootDerivation(Example ex, int numItemsSampled, Derivation sampledDerivation) { - if (!sampledDerivation.isRoot(ex.numTokens())) return; - - sampledDerivation.ensureExecuted(parser.executor, ex.context); - if (ex.targetValue != null) - sampledDerivation.compatibility = parser.valueEvaluator.getCompatibility(ex.targetValue, sampledDerivation.value); - if (Parser.opts.partialReward ? (sampledDerivation.compatibility > 0) : (sampledDerivation.compatibility == 1)) { - if (parser.verbose(2)) - LogInfo.logs("Top-level %s: reward = %s", numItemsSampled, sampledDerivation.compatibility); - // put in position 0 the derivation with best compatibility - correctDerivations.add(sampledDerivation); - if (correctDerivations.get(0).compatibility < sampledDerivation.compatibility) { - Collections.swap(correctDerivations, 0, correctDerivations.size() - 1); - } - - if (firstCorrectItem == -1) - firstCorrectItem = numItemsSampled; - } - } - - public void setEvaluation() { - LogInfo.begin_track_printAll("ReinforcementParserParserState.setEvaluation"); - super.setEvaluation(); - - if (coarseParserState != null) - evaluation.add("coarseParseTime", coarseParserState.getCoarseParseTime()); - if (firstCorrectItem != -1) - evaluation.add("firstCorrectItem", firstCorrectItem); - LogInfo.end_track(); - } - - // Defines how to sample the next state - abstract class Sampler { - // returns a derivation stream with the sample probability - public abstract Pair sample(); - public abstract double[] getDerivDistribution(List rootDerivs); - public abstract void unroll(); - - // go over agenda and update the probability sum for gradient computation before sampling - public void updateProbSum(double[] modelProbs) { - for (int i = 0; i < agenda.size(); ++i) { - PrioritizedDerivationStream pds = agenda.get(i); - pds.addProb(modelProbs[i]); - if (parser.verbose(3)) - LogInfo.logs("updateProbSum(): deriv=%s, probSum=%s", pds.derivStream.peek(), pds.probSum); - } - } - } - - // sample from agenda based on agenda scores - class AgendaSampler extends Sampler { - - @Override - - public Pair sample() { - - double[] modelProbs = ReinforcementUtils.expNormalize(agenda); - if (computeExpectedCounts)// compute probability sum before sampling for gradient computation (easier before sampling) - updateProbSum(modelProbs); - - int sampledIndex = SampleUtils.sampleMultinomial(randGen, modelProbs); - PrioritizedDerivationStream pds = agenda.get(sampledIndex); - double prob = modelProbs[sampledIndex]; - agenda.remove(pds, sampledIndex); - return Pair.newPair(pds, prob); - } - - @Override - public double[] getDerivDistribution(List rootDerivs) { - return ReinforcementUtils.expNormalize(rootDerivs); - } - - @Override - public void unroll() { } - } - - class MaxSampler extends Sampler { - - @Override - public Pair sample() { - PrioritizedDerivationStream pds = agenda.pop(); - return Pair.newPair(pds, 1d); - } - - @Override - public double[] getDerivDistribution(List rootDerivs) { - double[] res = new double[rootDerivs.size()]; - Arrays.fill(res, 0d); - res[0] = 1d; - return res; - } - - @Override - public void unroll() { } - } - - // sample categories and spans that are in a correct derivation - class OracleInfo { - - private List necessaryDerivInfos; // info for derivations necessary to generate oracle derivations - private List oracleDerivInfos; // info for oracle derivations - private Map, Integer>> backPointers; - private NecessaryDeriv[] necessaryDerivsCache; // memorizing whether a deriv is necessary or not - long firstCorrectDerivNumber = -1; // offset for necessaryDerivs - - // we sample from the end to the start - public OracleInfo(ReinforcementParserState oracleState) { - if (oracleState == null) throw new RuntimeException("oracle state is null"); - this.necessaryDerivInfos = new ArrayList<>(); - this.oracleDerivInfos = new ArrayList<>(); - if (!oracleState.correctDerivations.isEmpty()) { - Collections.sort(oracleState.correctDerivations, new CorrectDerivationComparator()); - this.backPointers = oracleState.backpointerList; - Derivation oracleDeriv = oracleState.correctDerivations.get(0); - this.firstCorrectDerivNumber = oracleDeriv.creationIndex; - - LogInfo.logs("OracleSampler: deriv=%s, comp=%s", oracleDeriv, oracleDeriv.compatibility); - populateCorrectDerivations(oracleDeriv); - - if (parser.verbose(2)) { - LogInfo.begin_track("OracleSampler: necessary infos:"); - for (DerivInfo necessaryInfo : necessaryDerivInfos) LogInfo.log(necessaryInfo); - LogInfo.end_track(); - LogInfo.begin_track("OracleSampler: oracle infos:"); - for (DerivInfo oracleInfo : oracleDerivInfos) LogInfo.log(oracleInfo); - LogInfo.end_track(); - } - } - } - - private void populateCorrectDerivations(Derivation oracleDeriv) { - // add derivation info and also all upstream derivations - if (parser.verbose(4)) - LogInfo.logs("populateCorrectDerivations(): oracle deriv: %s", oracleDeriv); - - Pair, Integer> listAndIndex = this.backPointers.get(oracleDeriv.creationIndex); - if (listAndIndex != null) { - for (int i = listAndIndex.getSecond() - 1; i >= 0; i--) { - Derivation deriv = listAndIndex.getFirst().get(i); - if (parser.verbose(4)) - LogInfo.logs("populateCorrectDerivations(): necessary deriv: %s", deriv); - DerivInfo derivInfo = new DerivInfo(deriv.cat, deriv.start, deriv.end, deriv.formula, deriv.rule); - if (!necessaryDerivInfos.contains(derivInfo)) - necessaryDerivInfos.add(derivInfo); - } - } - DerivInfo derivInfo = new DerivInfo(oracleDeriv.cat, oracleDeriv.start, oracleDeriv.end, oracleDeriv.formula, oracleDeriv.rule); - if (!oracleDerivInfos.contains(derivInfo)) { - necessaryDerivInfos.add(derivInfo); - oracleDerivInfos.add(derivInfo); - } - - // recurse - for (Derivation child : oracleDeriv.children) { - populateCorrectDerivations(child); - } - } - - // checking if derivation is necessary using the cache - protected boolean isNecessaryDeriv(Derivation deriv) { - - if (necessaryDerivInfos.isEmpty()) return false; - if (necessaryDerivsCache == null) { - necessaryDerivsCache = new NecessaryDeriv[200000]; - Arrays.fill(necessaryDerivsCache, NecessaryDeriv.UNKNOWN); - } - int index = (int) (deriv.creationIndex - firstCorrectDerivNumber); - if (index < 0) throw new RuntimeException("Negative index - correct index larger than deriv number"); - if (index >= 200000) { - LogInfo.warnings("isNecessaryDeriv(): index larger than 200000: %s", index); - return necessaryDerivInfos.contains(new DerivInfo(deriv.cat, deriv.start, deriv.end, deriv.formula, deriv.rule)); - } - if (necessaryDerivsCache[index] == NecessaryDeriv.UNNECESSARY_DERIV) - return false; - if (necessaryDerivsCache[index] == NecessaryDeriv.NECESSARY_DERIV) - return true; - // unknown - boolean res = necessaryDerivInfos.contains(new DerivInfo(deriv.cat, deriv.start, deriv.end, deriv.formula, deriv.rule)); - necessaryDerivsCache[index] = res ? NecessaryDeriv.NECESSARY_DERIV : NecessaryDeriv.UNNECESSARY_DERIV; - return res; - } - } - - class MultiplicativeProposalSampler extends Sampler { - - private double bonus; - private OracleInfo oracleInfo; - - public MultiplicativeProposalSampler(ReinforcementParserState oracleState) { - oracleInfo = new OracleInfo(oracleState); - bonus = ReinforcementParser.opts.multiplicativeBonus; - LogInfo.logs("Bonus=%s", bonus); - } - - // We assume that oracle stuff has been unrolled - @Override - public Pair sample() { - - double[] modelProbs = ReinforcementUtils.expNormalize(agenda); - double[] samplerProbs = getUnnormalizedAgendaDistribution(); - if (!NumUtils.expNormalize(samplerProbs)) throw new RuntimeException("Normalization failed" + Arrays.toString(samplerProbs)); - - int sampledIndex = ReinforcementUtils.sampleIndex(randGen, samplerProbs); - PrioritizedDerivationStream pds = agenda.get(sampledIndex); - double prob = samplerProbs[sampledIndex]; - - if (parser.verbose(3)) { - Derivation deriv = pds.derivStream.peek(); - if (oracleInfo.oracleDerivInfos.contains(new DerivInfo(deriv.cat, deriv.start, deriv.end, deriv.formula, deriv.rule))) - LogInfo.logs("MultiplicativeProposalSampler.sample(): Sampled from correct!, prob=%s", prob); - else - LogInfo.logs("MultiplicativeProposalSampler.sample(): Sampled from incorrect!, prob=%s", prob); - } - boolean returnProb = true; - - // whether to update only for correct moves or not hack - if (computeExpectedCounts && ReinforcementParser.opts.updateGradientForCorrectMovesOnly) { - Derivation deriv = pds.derivStream.peek(); - DerivInfo derivInfo = new DerivInfo(deriv.cat, deriv.start, deriv.end, deriv.formula, deriv.rule); - if (oracleInfo.oracleDerivInfos.contains(derivInfo)) - updateProbSum(modelProbs); - else returnProb = false; - - if (parser.verbose(3)) { - LogInfo.logs("Updating gradient=%s", returnProb); - } - } else { - if (computeExpectedCounts) // compute probability sum before sampling for gradient computation (easier before sampling) - updateProbSum(modelProbs); - } - - agenda.remove(pds, sampledIndex); - return Pair.newPair(pds, returnProb ? prob : -1d); - } - - //todo : This unrolls all necessary derivations, and ignore \beta - //assumes probability mass on non-necessary things is very very small - //might need to be fixed if we anneal \beta to 0 - @Override - public void unroll() { - - if (parser.verbose(3)) - LogInfo.begin_track("MultiplicativeBonusSampler.unroll()"); - - List> derivsToAdd = new ArrayList<>(); - List indicesToRemove = new ArrayList<>(); - - for (int i = 0; i < agenda.size(); ++i) { - PrioritizedDerivationStream pds = agenda.get(i); - boolean modified = false; - while (pds.derivStream.hasNext() && pds.derivStream.estimatedSize() > 1 && oracleInfo.isNecessaryDeriv(pds.derivStream.peek())) { - modified = true; - Derivation nextDeriv = pds.derivStream.next(); - DerivationStream newDerivStream = SingleDerivationStream.constant(nextDeriv); - if (parser.verbose(3) && newDerivStream.hasNext()) { - Derivation deriv = newDerivStream.peek(); - LogInfo.logs("MultiplicativeSampler.unroll(): add necessary deriv=%s(%s,%s) [%s] score=%s, |stream|=%s, creationIndex=%s", - deriv.cat, deriv.start, deriv.end, deriv.formula, deriv.score, pds.derivStream.estimatedSize(), deriv.creationIndex); - } - derivsToAdd.add(Pair.newPair(newDerivStream, pds.probSum)); - if (pds.derivStream.hasNext()) - featurizeAndScoreDerivation(pds.derivStream.peek()); - } - if (modified) { - indicesToRemove.add(i); - derivsToAdd.add(Pair.newPair(pds.derivStream, pds.probSum)); - } - } - // remove - need to make sure indices don't change due to removal so go from end to start - for (int i = indicesToRemove.size() - 1; i >= 0; --i) - agenda.remove(agenda.get(indicesToRemove.get(i)), indicesToRemove.get(i)); - // add - for (Pair pair: derivsToAdd) - addToAgenda(pair.getFirst(), pair.getSecond()); - if (parser.verbose(3)) - LogInfo.end_track(); - } - - private double[] getUnnormalizedAgendaDistribution() { - double[] probs = new double[agenda.size()]; - for (int i = 0; i < agenda.size(); ++i) { - Derivation d = agenda.get(i).derivStream.peek(); - probs[i] = d.score; - // we assume all necessary things have been unrolled already so no need to handle that - if (oracleInfo.oracleDerivInfos.contains(new DerivInfo(d.cat, d.start, d.end, d.formula, d.rule))) { - probs[i] += bonus; - } - } - return probs; - } - - @Override - public double[] getDerivDistribution(List rootDerivs) { - double[] res = new double[rootDerivs.size()]; - for (int i = 0; i < rootDerivs.size(); ++i) { - Derivation rootDeriv = rootDerivs.get(i); - res[i] = rootDeriv.score + bonus * rootDeriv.compatibility; - } - NumUtils.expNormalize(res); - return res; - } - } - - //A heuristic for choosing the oracle derivation. It'd be good to get rid of this or simplify - public static class CorrectDerivationComparator implements Comparator { - @Override - public int compare(Derivation deriv1, Derivation deriv2) { - if (deriv1.compatibility > deriv2.compatibility) return -1; - if (deriv1.compatibility < deriv2.compatibility) return +1; - - boolean deriv1Join = containsJoin(deriv1); - boolean deriv2Join = containsJoin(deriv2); - if (deriv1Join && !deriv2Join) return -1; - if (!deriv1Join && deriv2Join) return +1; - // by score - if (deriv1.score > deriv2.score) return -1; - if (deriv1.score < deriv2.score) return +1; - // Ensure reproducible randomness - if (deriv1.creationIndex < deriv2.creationIndex) return -1; - if (deriv1.creationIndex > deriv2.creationIndex) return +1; - return 0; - } - - private boolean containsJoin(Derivation d) { - SemanticFn semanticFn = d.rule.getSem(); - if (semanticFn != null) { - if (semanticFn instanceof JoinFn) - return true; - } - for (Derivation child : d.children) { - if (containsJoin(child)) - return true; - } - return false; - } - } -} - -//holds the stream, the priority, and a probability sum to make grdient computation efficient -class PrioritizedDerivationStream implements Comparable, HasScore { - public final DerivationStream derivStream; - public final double priority; - public double probSum; - - PrioritizedDerivationStream(DerivationStream derivStream, double priority, double probSum) { - this.derivStream = derivStream; - this.priority = priority; - this.probSum = probSum; - } - - @Override - public int compareTo(PrioritizedDerivationStream o) { - if (this.priority > o.priority) return -1; - if (this.priority < o.priority) return +1; - return 0; - } - - public double getScore() { return derivStream.peek().score; } - public void addProb(double prob) { probSum += prob; } -} - -class DerivInfo { - public final String cat; - public final int start; - public final int end; - public final Formula formula; - public final Rule rule; - - DerivInfo(String cat, int start, int end, Formula formula, Rule rule) { - this.cat = cat; - this.start = start; - this.end = end; - this.formula = formula; - this.rule = rule; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - - DerivInfo derivInfo = (DerivInfo) o; - - if (end != derivInfo.end) return false; - if (start != derivInfo.start) return false; - if (cat != null ? !cat.equals(derivInfo.cat) : derivInfo.cat != null) return false; - if (formula != null ? !formula.equals(derivInfo.formula) : derivInfo.formula != null) return false; - return !(rule != null ? !rule.equals(derivInfo.rule) : derivInfo.rule != null); - - } - - @Override - public int hashCode() { - int result = cat != null ? cat.hashCode() : 0; - result = 31 * result + start; - result = 31 * result + end; - result = 31 * result + (formula != null ? formula.hashCode() : 0); - result = 31 * result + (rule != null ? rule.hashCode() : 0); - return result; - } - - public String toString() { - return cat + "(" + start + "," + end + ") " + formula.toString(); - } -} - diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ReinforcementUtils.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ReinforcementUtils.java deleted file mode 100644 index 8cc1285af6..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ReinforcementUtils.java +++ /dev/null @@ -1,151 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.MapUtils; -import fig.basic.NumUtils; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Random; - -/** - * Utils for ReinforcementParser - */ -public final class ReinforcementUtils { - private static double logMaxValue = Math.log(Double.MAX_VALUE); - private ReinforcementUtils() { } - - // add to double map after adding prefix to all keys - public static void addToDoubleMap(Map mutatedMap, Map addedMap, String prefix) { - for (String key : addedMap.keySet()) - MapUtils.incr(mutatedMap, prefix + key, addedMap.get(key)); - } - - public static void subtractFromDoubleMap(Map mutatedMap, Map subtractedMap) { - for (String key : subtractedMap.keySet()) - MapUtils.incr(mutatedMap, key, -1 * subtractedMap.get(key)); - } - // subtract from double map after adding prefix to all keys - public static void subtractFromDoubleMap(Map mutatedMap, Map subtractedMap, String prefix) { - for (String key : subtractedMap.keySet()) - MapUtils.incr(mutatedMap, prefix + key, -1 * subtractedMap.get(key)); - } - - public static Map multiplyDoubleMap(Map map, double factor) { - Map res = new HashMap<>(); - for (Map.Entry entry: map.entrySet()) - res.put(entry.getKey(), entry.getValue() * factor); - return res; - } - - public static int sampleIndex(Random rand, List scorables, double denominator) { - double randD = rand.nextDouble(); - double sum = 0; - - for (int i = 0; i < scorables.size(); ++i) { - HasScore pds = scorables.get(i); - double prob = computeProb(pds, denominator); - sum += prob; - if (randD < sum) { - return i; - } - } - throw new RuntimeException(sum + " < " + randD); - } - - public static int sampleIndex(Random rand, double[] scores, double denominator) { - double randD = rand.nextDouble(); - double sum = 0; - - for (int i = 0; i < scores.length; ++i) { - double pds = scores[i]; - double prob = computeProb(pds, denominator); - sum += prob; - if (randD < sum) { - return i; - } - } - throw new RuntimeException(sum + " < " + randD); - } - - public static int sampleIndex(Random rand, double[] probs) { - double randD = rand.nextDouble(); - double sum = 0; - - for (int i = 0; i < probs.length; ++i) { - sum += probs[i]; - if (randD < sum) return i; - } - throw new RuntimeException(sum + " < " + randD); - } - - public static double computeProb(HasScore deriv, double denominator) { - double prob = Math.exp(deriv.getScore() - denominator); - if (prob < -0.0001 || prob > 1.0001) - throw new RuntimeException("Probability is out of range, prob=" + prob + - ",score=" + deriv.getScore() + ", denom=" + denominator); - return prob; - } - - public static double computeProb(double score, double denominator) { - double prob = Math.exp(score - denominator); - if (prob < -0.0001 || prob > 1.0001) - throw new RuntimeException("Probability is out of range, prob=" + prob + - ",score=" + score + ", denom=" + denominator); - return prob; - } - - public static double computeLogExpSum(List scorables) { - double sum = Double.NEGATIVE_INFINITY; - for (HasScore scorable : scorables) { - sum = NumUtils.logAdd(sum, scorable.getScore()); - } - return sum; - } - - public static double[] expNormalize(List scorables) { - // Input: log probabilities (unnormalized too) - // Output: normalized probabilities - // probs actually contains log probabilities; so we can add an arbitrary constant to make - // the largest log prob 0 to prevent overflow problems - double[] res = new double[scorables.size()]; - double max = Double.NEGATIVE_INFINITY; - for (int i = 0; i < scorables.size(); i++) - max = Math.max(max, scorables.get(i).getScore()); - if (Double.isInfinite(max)) - throw new RuntimeException("Scoreables is probably empty"); - for (int i = 0; i < scorables.size(); i++) - res[i] = Math.exp(scorables.get(i).getScore() - max); - NumUtils.normalize(res); - return res; - } - - public static double[] expNormalize(ParserAgenda scorables) { - // Input: log probabilities (unnormalized too) - // Output: normalized probabilities - // probs actually contains log probabilities; so we can add an arbitrary constant to make - // the largest log prob 0 to prevent overflow problems - double[] res = new double[scorables.size()]; - double max = Double.NEGATIVE_INFINITY; - - - for (HasScore scorable : scorables) - max = Math.max(max, scorable.getScore()); - - if (Double.isInfinite(max)) - throw new RuntimeException("Scoreables is probably empty"); - - int i = 0; - for (HasScore scorable : scorables) - res[i++] = Math.exp(scorable.getScore() - max); - NumUtils.normalize(res); - return res; - } - - // Return log(exp(a)-exp(b)) - public static double logSub(double a, double b) { - if(a <= b) throw new RuntimeException("First argument must be strictly greater than second argument"); - if(Double.isInfinite(b) || a-b > logMaxValue || b-a < 30) return a; - return a + Math.log(1d - Math.exp(b-a)); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ReverseFormula.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ReverseFormula.java deleted file mode 100644 index 4b7f8b7840..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ReverseFormula.java +++ /dev/null @@ -1,62 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.google.common.base.Function; -import fig.basic.LispTree; - -import java.util.List; - -/** - * If |expr| denotes a set of pairs S, - * then (reverse |expr|) denotes the set of pairs {(y, x) : (x, y) \in S}. - * Example: - * (reverse fb:people.person.date_of_birth) - * (reverse (lambda x (fb:location.statistical_region.population (fb:measurement_unit.dated_integer.number (var x))))) - * - * @author Percy Liang - */ -public class ReverseFormula extends Formula { - public final Formula child; - - public ReverseFormula(Formula child) { this.child = child; } - - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("reverse"); - tree.addChild(child.toLispTree()); - return tree; - } - - @Override - public void forEach(Function func) { - if (!func.apply(this)) child.forEach(func); - } - - @Override - public Formula map(Function func) { - Formula result = func.apply(this); - return result == null ? new ReverseFormula(child.map(func)) : result; - } - - @Override - public List mapToList(Function> func, boolean alwaysRecurse) { - List res = func.apply(this); - if (res.isEmpty() || alwaysRecurse) - res.addAll(child.mapToList(func, alwaysRecurse)); - return res; - } - - @SuppressWarnings({"equalshashcode"}) - @Override - public boolean equals(Object thatObj) { - if (!(thatObj instanceof ReverseFormula)) return false; - ReverseFormula that = (ReverseFormula) thatObj; - if (!this.child.equals(that.child)) return false; - return true; - } - - public int computeHashCode() { - int hash = 0x7ed55d16; - hash = hash * 0xd3a2646c + child.hashCode(); - return hash; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Rule.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Rule.java deleted file mode 100644 index 31f30b72c3..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Rule.java +++ /dev/null @@ -1,173 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.google.common.base.Joiner; -import com.google.common.collect.Lists; -import fig.basic.LispTree; -import fig.basic.Pair; - -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; - -/** - * A rule specifies how to take a right hand of terminals and non-terminals. - * - * @author Percy Liang - */ -public class Rule { - public static Rule nullRule = new Rule(null, null, null); - - // Categories begin with $. - public static boolean isCat(String item) { return item.charAt(0) == '$'; } - - public static final String rootCat = "$ROOT"; - public static final String tokenCat = "$TOKEN"; - public static final String phraseCat = "$PHRASE"; // Sequence of tokens - public static final String lemmaTokenCat = "$LEMMA_TOKEN"; // Lemmatized version - public static final String lemmaPhraseCat = "$LEMMA_PHRASE"; // Lemmatized version - public static final List specialCats = Lists.newArrayList(rootCat, tokenCat, phraseCat, lemmaTokenCat, lemmaPhraseCat); - public final String lhs; // Left-hand side: category. - public final List rhs; // Right-hand side: sequence of categories (have $ prefix) and tokens. - public final SemanticFn sem; // Takes derivations corresponding to RHS categories and produces a set of derivations corresponding to LHS. - public List> info; // Extra info - public RuleSource source = null; // for tracking where the rule comes from when they are induced - - // Cache the semanticRepn - public String getSemRepn() { - if (semRepn == null) semRepn = sem.getClass().getSimpleName(); - return semRepn; - } - private String semRepn = null; - - public Rule(String lhs, - List rhs, - SemanticFn sem) { - this.lhs = lhs; - this.rhs = rhs; - this.sem = sem; - } - - @Override - public String toString() { - if (stringRepn == null) { - String semStr = sem == null? "NullSemanticFn" : sem.toString(); - //int maxLength = 100; - //if (semStr.length() > maxLength) - // semStr = String.format("%s...(%d total)", semStr.substring(0,maxLength), semStr.length()); - stringRepn = lhs + " -> " + (rhs == null ? "" : Joiner.on(' ').join(rhs)) + " " + semStr; - } - return stringRepn; - } - private String stringRepn; // Cache toString() - - // Get/set info - public void addInfo(String key, double value) { - if (info == null) info = Lists.newArrayList(); - info.add(Pair.newPair(key, value)); - } - public Rule setInfo(Rule rule) { this.info = rule.info; return this; } - - // Accessors - public SemanticFn getSem() { return sem; } - public String getLhs() { return lhs; } - - // Return whether rule has form A -> B (both LHS and RHS contain one category). - public boolean isCatUnary() { return rhs.size() == 1 && isCat(rhs.get(0)); } - - // Return if all RHS tokens are terminals - public boolean isRhsTerminals() { - for (int i = 0; i < rhs.size(); ++i) { - if (isCat(rhs.get(i))) - return false; - } - return true; - } - - // Return the number of categories on the RHS - public int numRhsCats() { - int ret = 0; - for (int i = 0; i < rhs.size(); ++i) { - if (isCat(rhs.get(i))) - ret++; - } - return ret; - } - - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("rule"); - tree.addChild(lhs); - tree.addChild(LispTree.proto.newList(rhs)); - tree.addChild(sem.toLispTree()); - if (info != null) { - for (Pair p : info) - tree.addChild(LispTree.proto.newList(p.getFirst(), "" + p.getSecond())); - } - if (source != null) - tree.addChild(source.toJson()); - return tree; - } - - /* Extract tag info */ - public double getInfoTag(String infoTag) { - if (info != null) { - for (Pair p : info) { - if (p.getFirst().equals(infoTag)) return p.getSecond(); - } - } - return -1.0; - } - - public boolean isFloating() { - double f = getInfoTag("floating"); - double a = getInfoTag("anchored"); - if (f == 1.0) - return true; - else if (f == 0.0) - return false; - else - return a == 1.0 ? false : FloatingParser.opts.defaultIsFloating; - } - - public boolean isAnchored() { - double f = getInfoTag("floating"); - double a = getInfoTag("anchored"); - if (a == 1.0) - return true; - else if (a == 0.0) - return false; - else - return f == 1.0 ? false : !FloatingParser.opts.defaultIsFloating; - } - - public boolean isInduced() { - double a = getInfoTag("induced"); - if (a == 1.0) return true; - return false; - } - - @Override - public boolean equals(Object o) { - if (!(o instanceof Rule)) return false; - return ((Rule)o).toString().equals(this.toString()); - } - @Override - public int hashCode() { - return this.toString().hashCode(); - } - - public String toJson() { - Map jsonMap = new LinkedHashMap<>(); - jsonMap.put("lhs", lhs); - jsonMap.put("rhs", rhs); - if (source != null) { - jsonMap.put("source", source); - } - if (info != null) { - for (Pair p : info) - jsonMap.put(p.getFirst(), p.getSecond()); - } - jsonMap.put("sem", sem.toString()); - return Json.writeValueAsStringHard(jsonMap); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/RuleSource.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/RuleSource.java deleted file mode 100644 index c02e8bbfa9..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/RuleSource.java +++ /dev/null @@ -1,48 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.time.LocalDateTime; -import java.util.List; - -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; -import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.annotation.JsonProperty; - -/** - * Tracking where the rule comes from in the grammar induction process. - * - * @author sidaw - */ - -@JsonIgnoreProperties(ignoreUnknown = true) -@JsonInclude(JsonInclude.Include.NON_NULL) -public class RuleSource { - @JsonProperty - public String uid; - @JsonProperty - public LocalDateTime time; - @JsonProperty - public String head; - @JsonProperty - public List body; - - @JsonProperty - public int cite = 0; - @JsonProperty - public int self = 0; - @JsonProperty - public boolean align = false; - @JsonProperty - public String alignInfo = ""; - - public RuleSource(String uid, String head, List body) { - this.uid = uid; - this.head = head; - this.body = body; - this.time = LocalDateTime.now(); - } - - public String toJson() { - return Json.writeValueAsStringHard(this); - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SelectFn.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SelectFn.java deleted file mode 100644 index 6f14b99e60..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SelectFn.java +++ /dev/null @@ -1,66 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.LispTree; -import fig.basic.LogInfo; -import fig.basic.Option; - -import java.util.List; - -/** - * Given a particular position i, return the ith element on the RHS of the - * derivation's rule. - * - * @author Andrew Chou - */ -public class SelectFn extends SemanticFn { - public static class Options { - @Option(gloss = "Verbose") public int verbose = 0; - } - - public static Options opts = new Options(); - - // Which child derivation to select and return. - int position = -1; - - public SelectFn() { } - - public SelectFn(int position) { - init(LispTree.proto.newList("SelectFn", position + "")); - } - - public void init(LispTree tree) { - super.init(tree); - this.position = Integer.valueOf(tree.child(1).value); - } - - public DerivationStream call(final Example ex, final Callable c) { - return new SingleDerivationStream() { - @Override - public Derivation createDerivation() { - FeatureVector features = new FeatureVector(); - // TODO(pliang): move into FeatureExtractor - if (FeatureExtractor.containsDomain("skipPos")) { - for (int i = 0; i < c.getChildren().size(); ++i) { - if (i != position) { - Derivation child = c.child(i); - for (int index = child.start; index < child.end; ++index) { - List posTags = ex.languageInfo.posTags; - features.add("skipPos", posTags.get(index)); - if (opts.verbose > 0) { - LogInfo.logs( - "SelectFn: adding pos-skipping feature, pos: %s, word: %s", - posTags.get(index), ex.languageInfo.tokens.get(index)); - } - } - } - } - } - return new Derivation.Builder() - .withCallable(c) - .withFormulaFrom(c.child(position)) - .localFeatureVector(features) - .createDerivation(); - } - }; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SemType.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SemType.java deleted file mode 100644 index e7d4891086..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SemType.java +++ /dev/null @@ -1,114 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonValue; - -import fig.basic.LispTree; - -import java.util.*; - -/** - * A simple type system for Formulas. SemType represents a union over base - * types, where each base type is either - * - entity type, or - * - entity type -> base type Example of a 0-ary (for booleans) (type bool) - * - * Example of a unary (for Obama) - * (union fb:government.politician fb:government.us_president ...) - * - * Example of a binary (for born in) [remember, arg1 is the argument, arg0 is - * the return type] - * (-> fb:location.location fb:people.person) - * - * Note: type equality is not implemented, since it's better to use meet() to - * exploit the finer lattice structure of the type system. - * - * @author Percy Liang - */ -public abstract class SemType { - // Return whether the type is valid (not bottom). - public abstract boolean isValid(); - - // Return the meet of |this| and |that|. - public abstract SemType meet(SemType that); - - // Return the reversed type for functions: (s -> t) to (t -> s) - public abstract SemType reverse(); - - // Treat |this| as a function type and apply it to the argument |that|. - // This is just an internal function. - public abstract SemType apply(SemType that); - - // These are really the primitives. - public SemType getArgType() { return reverse().apply(SemType.topType); } - public SemType getRetType() { return apply(SemType.topType); } - - public abstract LispTree toLispTree(); - - @JsonValue - @Override - public String toString() { return toLispTree().toString(); } - - @JsonCreator - public static SemType fromString(String s) { - return fromLispTree(LispTree.proto.parseFromString(s)); - } - - public static SemType fromLispTree(LispTree tree) { - if (tree.isLeaf()) { - if (tree.value.equals("top")) return topType; - return new AtomicSemType(tree.value); - } - if ("union".equals(tree.child(0).value)) { - List result = new ArrayList<>(); - for (int i = 1; i < tree.children.size(); i++) - result.add(fromLispTree(tree.child(i))); - return new UnionSemType(result); - } - if ("->".equals(tree.child(0).value)) { - SemType result = fromLispTree(tree.child(tree.children.size() - 1)); - for (int i = tree.children.size() - 2; i >= 1; i--) - result = new FuncSemType(fromLispTree(tree.child(i)), result); - return result; - } - throw new RuntimeException("Invalid type: " + tree); - } - - // Create a new instance of SemType from type names (Strings) - - public static SemType newAtomicSemType(String type) { - return new AtomicSemType(type); - } - - public static SemType newFuncSemType(String argType, String retType) { - return new FuncSemType(argType, retType); - } - - public static SemType newUnionSemType(Collection types) { - List t = new ArrayList<>(); - for (String x : types) - t.add(new AtomicSemType(x)); - return new UnionSemType(t).simplify(); - } - - public static SemType newUnionSemType(String... types) { - return newUnionSemType(Arrays.asList(types)); - } - - // Common types - public static final SemType topType = new TopSemType(); - public static final SemType bottomType = new UnionSemType(); - public static final SemType stringType = new AtomicSemType(CanonicalNames.TEXT); - public static final SemType intType = new AtomicSemType(CanonicalNames.INT); - public static final SemType floatType = new AtomicSemType(CanonicalNames.FLOAT); - public static final SemType dateType = new AtomicSemType(CanonicalNames.DATE); - public static final SemType timeType = new AtomicSemType(CanonicalNames.TIME); - public static final SemType numberType = new AtomicSemType(CanonicalNames.NUMBER); - public static final SemType numberOrDateType = new UnionSemType(numberType, dateType); - public static final SemType entityType = new AtomicSemType(CanonicalNames.ENTITY); - public static final SemType anyType = new AtomicSemType(CanonicalNames.ANY); - - public static final FuncSemType topTopFunc = new FuncSemType(topType, topType); - public static final FuncSemType anyAnyFunc = new FuncSemType(anyType, anyType); - public static final FuncSemType compareFunc = new FuncSemType(numberOrDateType, numberOrDateType); -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SemTypeHierarchy.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SemTypeHierarchy.java deleted file mode 100644 index 3b0d956d1a..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SemTypeHierarchy.java +++ /dev/null @@ -1,82 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.util.*; -import fig.basic.*; - -/** - * Maintain a hierarchy (DAG) over strings. - * subtype < type < supertype - * - * @author Percy Liang - */ -public class SemTypeHierarchy { - public static class Options { - @Option(gloss = "Throw an error if the type is not registered in the type hierarchy.") - public boolean failOnUnknownTypes = false; - } - public static Options opts = new Options(); - - public static final SemTypeHierarchy singleton = new SemTypeHierarchy(); - - // type => list of all supertypes (assume we don't have that many supertypes) - private Map> supertypesMap = new HashMap<>(); // type => supertypes of type - private Map> subtypesMap = new HashMap<>(); // type => subtype of type - // Note: don't always need this, so can maybe remove later - - public SemTypeHierarchy() { - // Add basic types. - addSupertype(CanonicalNames.BOOLEAN, CanonicalNames.BOOLEAN); - addSupertype(CanonicalNames.BOOLEAN, CanonicalNames.ANY); - addSupertype(CanonicalNames.INT, CanonicalNames.INT); - addSupertype(CanonicalNames.INT, CanonicalNames.NUMBER); - addSupertype(CanonicalNames.INT, CanonicalNames.ANY); - addSupertype(CanonicalNames.FLOAT, CanonicalNames.FLOAT); - addSupertype(CanonicalNames.FLOAT, CanonicalNames.NUMBER); - addSupertype(CanonicalNames.FLOAT, CanonicalNames.ANY); - addSupertype(CanonicalNames.DATE, CanonicalNames.DATE); - addSupertype(CanonicalNames.DATE, CanonicalNames.ANY); - addSupertype(CanonicalNames.TEXT, CanonicalNames.TEXT); - addSupertype(CanonicalNames.TEXT, CanonicalNames.ANY); - addSupertype(CanonicalNames.NUMBER, CanonicalNames.NUMBER); - addSupertype(CanonicalNames.NUMBER, CanonicalNames.ANY); - addSupertype(CanonicalNames.ENTITY, CanonicalNames.ENTITY); - addSupertype(CanonicalNames.ENTITY, CanonicalNames.ANY); - addSupertype(CanonicalNames.ANY, CanonicalNames.ANY); - } - - // Add standard supertypes of entity - public void addEntitySupertypes(String type) { - // LogInfo.logs("addEntitySupertypes %s", type); - addSupertype(type, type); - addSupertype(type, CanonicalNames.ENTITY); - addSupertype(type, CanonicalNames.ANY); - } - - // Add: subtype < supertype - public void addSupertype(String subtype, String supertype) { - MapUtils.addToSet(supertypesMap, subtype, supertype); - MapUtils.addToSet(subtypesMap, supertype, subtype); - } - - public Set getSupertypes(String type) { - Set set = supertypesMap.get(type); - if (set == null) { - if (opts.failOnUnknownTypes) - LogInfo.fails("SemTypeHierarchy.getSupertypes: don't know about type %s", type); - addEntitySupertypes(type); - set = supertypesMap.get(type); - } - return set; - } - - public Set getSubtypes(String type) { - Set set = subtypesMap.get(type); - if (set == null) { - if (opts.failOnUnknownTypes) - LogInfo.fails("SemTypeHierarchy.getSubtypes: don't know about type %s", type); - addEntitySupertypes(type); - set = supertypesMap.get(type); - } - return set; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SemanticFn.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SemanticFn.java deleted file mode 100644 index 3c546d25fe..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SemanticFn.java +++ /dev/null @@ -1,102 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.LispTree; -import fig.basic.Option; -import fig.basic.Utils; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; - -/** - * A semantic function takes a sequence of child derivations and produces a set - * of parent derivations. This is a pretty general concept, which can be used to: - * - Generating candidates (lexicon) - * - Do simple combination - * - Filtering of derivations - * - * To override implement this function, you just need to fill out the call() function. - * - * @author Percy Liang - */ -public abstract class SemanticFn { - public static class Options { - @Option(gloss = "Whether or not to add to Derivation.localChoices during " + - "function application (for debugging only).") - public boolean trackLocalChoices = false; - } - - public static final Options opts = new Options(); - - // Used to define this SemanticFn. - private LispTree tree; - - // Initialize the semantic function with any arguments (optional). - // Override this function and call super.init(tree); - public void init(LispTree tree) { - this.tree = tree; - } - - public interface Callable { - String getCat(); - int getStart(); - int getEnd(); - Rule getRule(); - List getChildren(); - Derivation child(int i); - String childStringValue(int i); - } - - public static class CallInfo implements Callable { - final String cat; - final int start; - final int end; - final Rule rule; - final List children; - public CallInfo(String cat, int start, int end, Rule rule, List children) { - this.cat = cat; - this.start = start; - this.end = end; - this.rule = rule; - this.children = children; - } - public String getCat() { return cat; } - public int getStart() { return start; } - public int getEnd() { return end; } - public Rule getRule() { return rule; } - public List getChildren() { return children; } - public Derivation child(int i) { return children.get(i); } - public String childStringValue(int i) { - return Formulas.getString(children.get(i).formula); - } - - public static final CallInfo NULL_INFO = - new CallInfo("", -1, -1, Rule.nullRule, new ArrayList()); - } - - // Main entry point: return a stream of Derivations (possibly none). - // The computation of the Derivations should be done lazily. - public abstract DerivationStream call(Example ex, Callable c); - - public LispTree toLispTree() { return tree; } - @Override public String toString() { return tree.toString(); } - - // default does nothing - public void addFeedback(Example ex) { return; } - - // default does nothing - public void sortOnFeedback(Params params) { return; } - - /* - * Filter on type data to save time. - * Return a collection of DerivationGroup. The rule will be applied on each DerivationGroup. - * - * See an example in tables.grow.ApplyFn - */ - public boolean supportFilteringOnTypeData() { return false; } - public Collection getFilteredDerivations( - List derivations1, List derivations2) { - throw new UnsupportedOperationException(); - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SempreUtils.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SempreUtils.java deleted file mode 100644 index 5337e640cf..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SempreUtils.java +++ /dev/null @@ -1,34 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.LogInfo; -import fig.basic.MapUtils; - -import java.util.Map; - -/** - * Created by joberant on 10/18/14. - */ -public final class SempreUtils { - private SempreUtils() { } - - // "java.util.ArrayList" => "java.util.ArrayList" - // "TypeLookup" => "edu.stanford.nlp.sempre.TypeLookup" - public static String resolveClassName(String name) { - if (name.startsWith("edu.") || name.startsWith("org.") || - name.startsWith("com.") || name.startsWith("net.")) - return name; - return "edu.stanford.nlp.sempre." + name; - } - - public static void logMap(Map map, String desc) { - LogInfo.begin_track("Logging %s map", desc); - for (K key : map.keySet()) - LogInfo.log(key + "\t" + map.get(key)); - LogInfo.end_track(); - } - - public static void addToDoubleMap(Map mutatedMap, Map addedMap) { - for (String key : addedMap.keySet()) - MapUtils.incr(mutatedMap, key, addedMap.get(key)); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Server.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Server.java deleted file mode 100644 index 6e2e08a732..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Server.java +++ /dev/null @@ -1,678 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.google.common.collect.Lists; -import fig.basic.*; -import fig.html.HtmlElement; -import fig.html.HtmlUtils; -import java.net.InetSocketAddress; -import java.net.URI; -import java.net.URLDecoder; -import java.net.HttpCookie; -import com.sun.net.httpserver.HttpServer; -import com.sun.net.httpserver.HttpHandler; -import com.sun.net.httpserver.HttpExchange; -import com.sun.net.httpserver.Headers; - -import java.io.*; -import java.util.*; -import java.util.concurrent.Executors; -import java.util.concurrent.ExecutorService; -import java.math.BigInteger; -import java.security.SecureRandom; - -import static fig.basic.LogInfo.logs; - -final class SecureIdentifiers { - private SecureIdentifiers() { } - - private static SecureRandom random = new SecureRandom(); - public static String getId() { - return new BigInteger(130, random).toString(32); - } -} - -/** - * This class implements a simple HTTP server which provides a web interface - * into SEMPRE just like Master.runInteractivePrompt() exposes a command-line - * tool. Most of the work is dispatched to Master.processLine(). - * Cookies are used to store the session ID. - * - * @author Percy Liang - */ -public class Server { - public static class Options { - @Option public int port = 8400; - @Option public int numThreads = 4; - @Option public String title = "SEMPRE Demo"; - @Option public String headerPath; - @Option public String basePath = "demo-www"; - @Option public int verbose = 1; - @Option public int htmlVerbose = 1; - } - public static Options opts = new Options(); - - Master master; - public static final HtmlUtils H = new HtmlUtils(); - - class Handler implements HttpHandler { - public void handle(HttpExchange exchange) { - try { - new ExchangeState(exchange); - } catch (Exception e) { - e.printStackTrace(); - } - } - } - - class ExchangeState { - // Input - HttpExchange exchange; - Map reqParams = new HashMap<>(); - String remoteHost; - - // For header - HttpCookie cookie; - boolean isNewSession; - String format; - boolean jsonFormat() { return format.equals("json"); } - - // For writing main content - - public ExchangeState(HttpExchange exchange) throws IOException { - this.exchange = exchange; - - URI uri = exchange.getRequestURI(); - this.remoteHost = exchange.getRemoteAddress().getHostName(); - - // Don't use uri.getQuery: it can't distinguish between '+' and '-' - String[] tokens = uri.toString().split("\\?"); - if (tokens.length == 2) { - for (String s : tokens[1].split("&")) { - String[] kv = s.split("=", 2); - try { - String key = URLDecoder.decode(kv[0], "UTF-8"); - String value = URLDecoder.decode(kv[1], "UTF-8"); - logs("%s => %s", key, value); - reqParams.put(key, value); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e); - } - } - } - this.format = MapUtils.get(reqParams, "format", "html"); - - String cookieStr = exchange.getRequestHeaders().getFirst("Cookie"); - if (cookieStr != null) { // Cookie already exists - cookie = HttpCookie.parse(cookieStr).get(0); - isNewSession = false; - } else { - if (!jsonFormat()) { - cookie = new HttpCookie("sessionId", SecureIdentifiers.getId()); - } else { - cookie = null; - } - isNewSession = true; // Create a new cookie - } - - String sessionId = null; - if (cookie != null) sessionId = cookie.getValue(); - if (opts.verbose >= 2) - LogInfo.logs("GET %s from %s (%ssessionId=%s)", uri, remoteHost, isNewSession ? "new " : "", sessionId); - - String uriPath = uri.getPath(); - if (uriPath.equals("/")) uriPath += "index.html"; - if (uriPath.equals("/sempre")) { - handleQuery(sessionId); - } else { - getFile(opts.basePath + uriPath); - } - - exchange.close(); - } - - String getMimeType(String path) { - String[] tokens = path.split("\\."); - String ext = tokens[tokens.length - 1]; - if (ext.equals("html")) return "text/html"; - if (ext.equals("css")) return "text/css"; - if (ext.equals("jpeg")) return "image/jpeg"; - if (ext.equals("gif")) return "image/gif"; - return "text/plain"; - } - - void setHeaders(String mimeType) throws IOException { - Headers headers = exchange.getResponseHeaders(); - headers.set("Content-Type", mimeType); - headers.set("Access-Control-Allow-Origin", "*"); - if (isNewSession && cookie != null) - headers.set("Set-Cookie", cookie.toString()); - exchange.sendResponseHeaders(200, 0); - } - - private HtmlElement makeInputBox(String line, String action) { - return H.div().child( - H.form().action(action) - .child(H.text(line == null ? "" : line).cls("question").autofocus().size(50).name("q")) - .child(H.button("Go").cls("ask")) - .end()); - } - - private HtmlElement makeTooltip(HtmlElement main, HtmlElement aux) { - return H.a().cls("info").child(main).child(H.span().cls("tooltip").child(aux)); - } - private HtmlElement makeTooltip(HtmlElement main, HtmlElement aux, String link) { - return H.a().href(link).cls("info").child(main).child(H.span().cls("tooltip").child(aux)); - } - - public final String freebaseWebsite = "http://www.freebase.com/"; - public String id2website(String id) { - assert id.startsWith("fb:") : id; - return freebaseWebsite + id.substring(3).replaceAll("\\.", "/"); - } - - HtmlElement valueToElem(Value value) { - if (value == null) return H.span(); - if (value instanceof NameValue) { - NameValue nameValue = (NameValue) value; - return H.a().href(id2website(nameValue.id)).child(nameValue.description == null ? nameValue.id : nameValue.description); - } else if (value instanceof NumberValue) { - NumberValue numberValue = (NumberValue) value; - return H.span().child(Fmt.D(numberValue.value) + (numberValue.unit.equals(NumberValue.unitless) ? "" : " " + numberValue.unit)); - } else if (value instanceof UriValue) { - UriValue uriValue = (UriValue) value; - return H.a().href(uriValue.value).child(uriValue.value); - } else if (value instanceof DateValue) { - DateValue dateValue = (DateValue) value; - return H.span().child(dateValue.year + (dateValue.month == -1 ? "" : "-" + dateValue.month + (dateValue.day == -1 ? "" : "-" + dateValue.day))); - } else if (value instanceof StringValue) { - return H.span().child(((StringValue) value).value); - } else if (value instanceof TableValue) { - HtmlElement table = H.table().cls("valueTable"); - HtmlElement header = H.tr(); - boolean first = true; - for (String item : ((TableValue) value).header) { - if (!first) header.child(H.td("   ")); - first = false; - header.child(H.td(H.b(item))); - } - table.child(header); - for (List rowValues : ((TableValue) value).rows) { - HtmlElement row = H.tr(); - first = true; - for (Value x : rowValues) { - // TODO(pliang): add horizontal spacing only using CSS - if (!first) row.child(H.td("   ")); - first = false; - row.child(H.td(valueToElem(x))); - } - table.child(row); - } - return table; - } else { - // Default rendering - return H.span().child(value.toString()); - } - } - - private HtmlElement makeAnswerBox(Master.Response response, String uri) { - HtmlElement answer; - if (response.getExample().getPredDerivations().size() == 0) { - answer = H.span().child("(none)"); - } else { - answer = valueToElem(response.getDerivation().getValue()); - } - - return H.table().child( - H.tr() - .child( - H.td( - makeTooltip( - H.span().cls("correctButton").child("[Correct]"), - H.div().cls("bubble").child("If this answer is correct, click to add as a new training example!"), - uri + "&accept=" + response.getCandidateIndex()))) - .child(H.td(H.span().cls("answer").child(answer))) - .end()); - } - - private HtmlElement makeGroup(List items) { - HtmlElement table = H.table().cls("groupResponse"); - for (HtmlElement item : items) - table.child(H.tr().child(H.td(item))); - return table; - } - - HtmlElement makeDetails(Master.Response response, String uri) { - Example ex = response.getExample(); - List items = new ArrayList(); - if (opts.htmlVerbose >= 1) - items.add(makeLexical(ex)); - if (ex.getPredDerivations().size() > 0) { - if (opts.htmlVerbose >= 1) { - items.add(makeDerivation(ex, response.getDerivation(), true)); - items.add(makeFeatures(response.getDerivation(), false)); - } - items.add(makeCandidates(ex, uri)); - } - - return H.div().cls("details").child(makeGroup(items)); - } - - HtmlElement makeDerivation(Example ex, Derivation deriv, boolean moreInfo) { - HtmlElement table = H.table(); - - // Show the derivation - table.child(H.tr().child(H.td(makeDerivationHelper(ex, deriv, "", moreInfo)))); - - String header = "Derivation"; - return H.div() - .child(H.span().cls("listHeader").child(header)) - .child(table); - } - - HtmlElement makeDerivationHelper(Example ex, Derivation deriv, String indent, boolean moreInfo) { - // TODO(pliang): make this prettier - HtmlElement cat; - if (moreInfo) { - HtmlElement tooltip = H.div(); - tooltip.child(H.span(deriv.rule.toString()).cls("nowrap")); - tooltip.child(makeFeatures(deriv, true)); - cat = makeTooltip(H.span(deriv.cat), tooltip); - } else { - cat = H.span(deriv.cat); - } - String description = cat + "[ " + H.span().child(ex.phraseString(deriv.start, deriv.end)).cls("word") + "]" + " → " + deriv.formula; - HtmlElement node = H.div().child(indent + description); - for (Derivation child : deriv.children) - node.child(makeDerivationHelper(ex, child, indent + "    ", moreInfo)); - return node; - } - - HtmlElement makeFeatures(Derivation deriv, boolean local) { - HtmlElement table = H.table(); - - Params params = master.getParams(); - Map features = new HashMap(); - if (local) - deriv.incrementLocalFeatureVector(1, features); - else - deriv.incrementAllFeatureVector(1, features); - - List> entries = Lists.newArrayList(); - double sumValue = 0; - for (Map.Entry entry : features.entrySet()) { - String feature = entry.getKey(); - if (entry.getValue() == 0) continue; - double value = entry.getValue() * params.getWeight(feature); - sumValue += value; - entries.add(new java.util.AbstractMap.SimpleEntry(feature, value)); - } - Collections.sort(entries, new ValueComparator(false)); - table.child( - H.tr() - .child(H.td(H.b("Feature"))) - .child(H.td(H.b("Value"))) - .child(H.td(H.b("Weight")))); - - for (Map.Entry entry : entries) { - String feature = entry.getKey(); - double value = entry.getValue(); - double weight = params.getWeight(feature); - table.child( - H.tr() - .child(H.td(feature)) - .child(H.td(Fmt.D(MapUtils.getDouble(features, feature, 0)))) - .child(H.td(Fmt.D(weight)))); - } - - String header; - if (local) { - double localScore = deriv.localScore(params); - double score = deriv.getScore(); - if (deriv.children == null) - header = String.format("Local features (score = %s)", Fmt.D(score)); - else - header = String.format("Local features (score = %s + %s = %s)", Fmt.D(score - localScore), Fmt.D(localScore), Fmt.D(score)); - } else { - header = String.format("All features (score=%s, prob=%s)", Fmt.D(deriv.getScore()), Fmt.D(deriv.getProb())); - } - return H.div() - .child(H.span().cls("listHeader").child(header)) - .child(table); - } - - HtmlElement linkSelect(int index, String uri, String str) { - return H.a().href(uri + "&select=" + index).child(str); - } - - private HtmlElement makeCandidates(Example ex, String uri) { - HtmlElement table = H.table().cls("candidateTable"); - HtmlElement header = H.tr() - .child(H.td(H.b("Rank"))) - .child(H.td(H.b("Score"))) - .child(H.td(H.b("Answer"))); - if (opts.htmlVerbose >= 1) - header.child(H.td(H.b("Formula"))); - table.child(header); - for (int i = 0; i < ex.getPredDerivations().size(); i++) { - Derivation deriv = ex.getPredDerivations().get(i); - - HtmlElement correct = makeTooltip( - H.span().cls("correctButton").child("[Correct]"), - H.div().cls("bubble").child("If this answer is correct, click to add as a new training example!"), - uri + "&accept=" + i); - String value = shorten(deriv.getValue() == null ? "" : deriv.getValue().toString(), 200); - HtmlElement formula = makeTooltip( - H.span(deriv.getFormula().toString()), - H.div().cls("nowrap").child(makeDerivation(ex, deriv, false)), - uri + "&select=" + i); - HtmlElement row = H.tr() - .child(H.td(linkSelect(i, uri, i + " " + correct)).cls("nowrap")) - .child(H.td(Fmt.D(deriv.getScore()))) - .child(H.td(value)).style("width:250px"); - if (opts.htmlVerbose >= 1) - row.child(H.td(formula)); - table.child(row); - } - return H.div() - .child(H.span().cls("listHeader").child("Candidates")) - .child(table); - } - - private String shorten(String s, int n) { - if (s.length() <= n) return s; - return s.substring(0, n / 2) + "..." + s.substring(s.length() - n / 2); - } - - private void markLexical(Derivation deriv, CandidatePredicates[] predicates) { - // TODO(pliang): generalize this to the case where the formula is a - // NameFormula but the child is a StringFormula? - if (deriv.getRule() != null && - deriv.getRule().getSem() != null && - deriv.getRule().getSem().getClass().getSimpleName().equals("LexiconFn")) - predicates[deriv.getStart()].add(deriv.getFormula(), deriv.getEnd() - deriv.getStart(), deriv.getScore()); - for (Derivation child : deriv.getChildren()) - markLexical(child, predicates); - } - - class CandidatePredicates { - // Parallel arrays - List predicates = new ArrayList(); - List spanLengths = new ArrayList(); - List scores = new ArrayList(); - - void add(Formula formula, int spanLength, double score) { - predicates.add(formula); - spanLengths.add(spanLength); - scores.add(score); - } - int size() { return predicates.size(); } - - String formatPredicate(int i) { - return predicates.get(i).toString() + (spanLengths.get(i) == 1 ? "" : " [" + spanLengths.get(i) + "]"); - } - } - - // Move to fig - double[] toDoubleArray(List l) { - double[] a = new double[l.size()]; - for (int i = 0; i < l.size(); i++) a[i] = l.get(i); - return a; - } - - - HtmlElement makeLexical(Example ex) { - HtmlElement predicatesElem = H.tr(); - HtmlElement tokensElem = H.tr(); - - // Mark all the predicates used in any derivation on the beam. - // Note: this is not all possible. - CandidatePredicates[] predicates = new CandidatePredicates[ex.getTokens().size()]; - for (int i = 0; i < ex.getTokens().size(); i++) - predicates[i] = new CandidatePredicates(); - for (Derivation deriv : ex.getPredDerivations()) - markLexical(deriv, predicates); - - // Build up |predicatesElem| and |tokensElem| - for (int i = 0; i < ex.getTokens().size(); i++) { - tokensElem.child( - H.td( - makeTooltip( - H.span().cls("word").child(ex.getTokens().get(i)), - H.span().cls("tag").child("POS: " + ex.languageInfo.posTags.get(i)), - ""))); - - if (predicates[i].size() == 0) { - predicatesElem.child(H.td("")); - } else { - // Show possible predicates for a word - HtmlElement pe = H.table().cls("predInfo"); - int[] perm = ListUtils.sortedIndices(toDoubleArray(predicates[i].scores), true); - Set formulaSet = new HashSet(); - for (int j : perm) { - String formula = predicates[i].formatPredicate(j); - if (formulaSet.contains(formula)) continue; // Dedup - formulaSet.add(formula); - double score = predicates[i].scores.get(j); - pe.child(H.tr().child(H.td(formula)).child(H.td(Fmt.D(score)))); - } - predicatesElem.child(H.td(makeTooltip(H.span().child(predicates[i].formatPredicate(perm[0])), pe, ""))); - } - } - - return H.div().cls("lexicalResponse") - .child(H.span().cls("listHeader").child("Lexical Triggers")) - .child(H.table().child(predicatesElem).child(tokensElem)); - } - - String makeJson(Master.Response response) { - Map json = new HashMap(); - List items = new ArrayList(); - json.put("candidates", items); - for (Derivation deriv : response.getExample().getPredDerivations()) { - Map item = new HashMap(); - Value value = deriv.getValue(); - if (value instanceof UriValue) { - item.put("url", ((UriValue) value).value); - } else if (value instanceof TableValue) { - TableValue tableValue = (TableValue) value; - item.put("header", tableValue.header); - List> rowsObj = new ArrayList>(); - item.put("rows", rowsObj); - for (List row : tableValue.rows) { - List rowObj = new ArrayList(); - for (Value v : row) - rowObj.add(v.toString()); - rowsObj.add(rowObj); - } - } else { - item.put("value", value.toString()); - } - item.put("score", deriv.score); - item.put("prob", deriv.prob); - items.add(item); - } - - return Json.writeValueAsStringHard(json); - } - - // Catch exception if any. - Master.Response processQuery(Session session, String query) { - try { - return master.processQuery(session, query); - } catch (Throwable t) { - t.printStackTrace(); - return null; - } - } - - // If query is not already the last query, make it the last query. - boolean ensureQueryIsLast(Session session, String query) { - if (query != null && !query.equals(session.getLastQuery())) { - Master.Response response = processQuery(session, query); - if (response == null) return false; - } - return true; - } - - void handleQuery(String sessionId) throws IOException { - String query = reqParams.get("q"); - - // If JSON, don't store cookies. - Session session = master.getSession(sessionId); - session.remoteHost = remoteHost; - session.format = format; - - if (query == null) query = session.getLastQuery(); - if (query == null) query = ""; - logs("Server.handleQuery %s: %s", session.id, query); - - // Print header - if (jsonFormat()) - setHeaders("application/json"); - else - setHeaders("text/html"); - PrintWriter out = new PrintWriter(new OutputStreamWriter(exchange.getResponseBody())); - if (!jsonFormat()) { - out.println(""); - out.println(H.html().open()); - out.println( - H.head() - .child(H.title(opts.title)) - .child(H.link().rel("stylesheet").type("text/css").href("main.css")) - .child(H.script().src("main.js")) - .end()); - - out.println(H.body().open()); - - if (opts.headerPath != null) { - for (String line : IOUtils.readLinesHard(opts.headerPath)) - out.println(line); - } - } - - String uri = exchange.getRequestURI().toString(); - - // Encode the URL parameters into the freeform text. - // A bit backwards, but keeps uniformity. - String select = reqParams.get("select"); - if (select != null) { - if (ensureQueryIsLast(session, query)) - query = LispTree.proto.newList("select", select).toString(); - else - query = null; - } - String accept = reqParams.get("accept"); - if (accept != null) { - if (ensureQueryIsLast(session, query)) - query = LispTree.proto.newList("accept", accept).toString(); - else - query = null; - } - - // Handle the request - Master.Response masterResponse = null; - if (query != null) - masterResponse = processQuery(session, query); - - // Print history of exchanges - if (session.context.exchanges.size() > 0 && !jsonFormat()) { - HtmlElement context = H.table().cls("context"); - for (ContextValue.Exchange e : session.context.exchanges) { - HtmlElement row = H.tr().child(H.td(H.span().cls("word").child(e.utterance))); - row.child(H.td(H.span("    "))).child(H.td(e.value.toString())); - if (opts.htmlVerbose >= 1) - row.child(H.td(H.span("    "))).child(H.td(e.formula.toString())); - context.child(row); - } - out.println(context.toString()); - } - - // Print input box for new utterance - if (!jsonFormat()) { - String defaultQuery = query != null ? query : session.getLastQuery(); - out.println(makeInputBox(defaultQuery, uri).toString()); - } - - if (masterResponse != null) { - // Render answer - Example ex = masterResponse.getExample(); - if (ex != null) { - if (!jsonFormat()) { - out.println(makeAnswerBox(masterResponse, uri).toString()); - out.println(makeDetails(masterResponse, uri).toString()); - } else { - out.println(makeJson(masterResponse)); - } - } - - if (!jsonFormat() && opts.htmlVerbose >= 1) { - // Write response to user - out.println(H.elem("pre").open()); - for (String outLine : masterResponse.getLines()) - out.println(outLine); - out.println(H.elem("pre").close()); - } - } else { - if (query != null && !jsonFormat()) - out.println(H.span("Internal error!").cls("error")); - } - - if (!jsonFormat()) { - out.println(H.body().close()); - out.println(H.html().close()); - } - - out.close(); - } - - void getResults() throws IOException { - setHeaders("application/json"); - Map map = new HashMap<>(); - map.put("a", "3"); - map.put("b", "4"); - - PrintWriter writer = new PrintWriter(new OutputStreamWriter(exchange.getResponseBody())); - writer.println(Json.writeValueAsStringHard(map)); - writer.close(); - } - - void getFile(String path) throws IOException { - if (!new File(path).exists()) { - LogInfo.logs("File doesn't exist: %s", path); - exchange.sendResponseHeaders(404, 0); // File not found - return; - } - - setHeaders(getMimeType(path)); - if (opts.verbose >= 2) - LogInfo.logs("Sending %s", path); - OutputStream out = new BufferedOutputStream(exchange.getResponseBody()); - InputStream in = new FileInputStream(path); - IOUtils.copy(in, out); - } - } - - public Server(Master master) { - this.master = master; - } - - void run() { - try { - String hostname = fig.basic.SysInfoUtils.getHostName(); - HttpServer server = HttpServer.create(new InetSocketAddress(opts.port), 10); - ExecutorService pool = Executors.newFixedThreadPool(opts.numThreads); - server.createContext("/", new Handler()); - server.setExecutor(pool); - server.start(); - LogInfo.logs("Server started at http://%s:%s/sempre", hostname, opts.port); - LogInfo.log("Press Ctrl-D to terminate."); - while (LogInfo.stdin.readLine() != null) { } - LogInfo.log("Shutting down server..."); - server.stop(0); - LogInfo.log("Shutting down executor pool..."); - pool.shutdown(); - } catch (IOException e) { - throw new RuntimeException(e); - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Session.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Session.java deleted file mode 100644 index 04ec86ae70..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Session.java +++ /dev/null @@ -1,103 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -import com.google.common.base.Strings; - -import fig.basic.Option; - -/** - * A Session contains the information specific to a user. - * It maintains the context for discourse as well as the last example, so that - * we can inspect the different predicted derivations and generate new training - * examples / update parameters interactively. - * - * @author Percy Liang - */ -public class Session { - public final String id; // Session id - public static class Options { - // path for default parameters, if using a different set for each session - @Option public String inParamsPath; - } - public String remoteHost; // Where we connected from - public String format; // html or json - public ContextValue context; // Current context used to create new examples - Example lastEx; // Last example that we processed - - // if every user have their own model - Params params; - Learner learner; - public Map reqParams; - - public static Options opts = new Options(); - - // per session parameters - public Session(String id) { - this.id = id; - context = new ContextValue(id, DateValue.now(), new ArrayList()); - } - - public Example getLastExample() { return lastEx; } - public String getLastQuery() { return lastEx == null ? null : lastEx.utterance; } - - public void updateContext() { - context = context.withDate(DateValue.now()); - } - - public void updateContext(Example ex, int maxExchanges) { - lastEx = ex; - List derivations = lastEx.getPredDerivations(); - if (derivations.size() > 0) { - Derivation deriv = derivations.get(0); - List newExchanges = new ArrayList(); - newExchanges.addAll(context.exchanges); - newExchanges.add(new ContextValue.Exchange(ex.utterance, deriv.formula, deriv.value)); - while (newExchanges.size() > maxExchanges) - newExchanges.remove(0); - context = context.withNewExchange(newExchanges); - } - } - - public void updateContextWithNewAnswer(Example ex, Derivation deriv) { - List newExchanges = new ArrayList(); - for (int i = 0; i < context.exchanges.size() - 1; i++) - newExchanges.add(context.exchanges.get(i)); - newExchanges.add(new ContextValue.Exchange(ex.utterance, deriv.formula, deriv.value)); - context = context.withNewExchange(newExchanges); - } - - public ContextValue getContextExcludingLast() { - List newExchanges = new ArrayList(); - for (int i = 0; i < context.exchanges.size() - 1; i++) - newExchanges.add(context.exchanges.get(i)); - return context.withNewExchange(newExchanges); - } - - public void useIndependentLearner(Builder builder) { - this.params = new Params(); - if (!Strings.isNullOrEmpty(opts.inParamsPath)) - this.params.read(opts.inParamsPath); - this.learner = new Learner(builder.parser, this.params, new Dataset()); - } - - @Override - public String toString() { - return String.format("%s: %s; last: %s", id, context, lastEx); - } - - // Decides if we write out any logs - public boolean isLogging() { return defaultTrue("logging");} - public boolean isWritingCitation() { return defaultTrue("cite");} - public boolean isWritingGrammar() { return defaultTrue("grammar");} - public boolean isLearning() { return defaultTrue("learn");} - public boolean isStatsing() { return defaultTrue("stats");} - - private boolean defaultTrue(String key) { - if (this.reqParams == null) return true; - if (!this.reqParams.containsKey(key)) return true; - return !this.reqParams.get(key).equals("0"); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SimpleAnalyzer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SimpleAnalyzer.java deleted file mode 100644 index ed995ff297..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SimpleAnalyzer.java +++ /dev/null @@ -1,103 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.util.Arrays; - -/** - * SimpleAnalyzer takes an utterance and applies simple methods to pre-process - * - * @author akchou - */ -public class SimpleAnalyzer extends LanguageAnalyzer { - - // Stanford tokenizer doesn't break hyphens. - // Replace hypens with spaces for utterances like - // "Spanish-speaking countries" but not for "2012-03-28". - public static String breakHyphens(String utterance) { - StringBuilder buf = new StringBuilder(utterance); - for (int i = 0; i < buf.length(); i++) { - if (buf.charAt(i) == '-' && (i + 1 < buf.length() && Character.isLetter(buf.charAt(i + 1)))) - buf.setCharAt(i, ' '); - } - return buf.toString(); - } - - private static final String[] numbers = {"zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten"}; - - public LanguageInfo analyze(String utterance) { - LanguageInfo languageInfo = new LanguageInfo(); - - // Clear these so that analyze can hypothetically be called - // multiple times. - languageInfo.tokens.clear(); - languageInfo.posTags.clear(); - languageInfo.nerTags.clear(); - languageInfo.nerValues.clear(); - languageInfo.lemmaTokens.clear(); - - // Break hyphens - utterance = breakHyphens(utterance); - - // Default analysis - create tokens crudely - StringBuilder buf = new StringBuilder(); - for (int i = 0; i < utterance.length(); i++) { - char c = utterance.charAt(i); - // Put whitespace around certain characters. - // TODO(pliang): handle contractions such as "can't" properly. - boolean boundaryBefore = !(i - 1 >= 0) || utterance.charAt(i - 1) == ' '; - boolean boundaryAfter = !(i + 1 < utterance.length()) || utterance.charAt(i + 1) == ' '; - boolean separate; - if (c == '.') // Break off period if already space around it (to preserve numbers like 3.5) - separate = boundaryBefore || boundaryAfter; - else - separate = (",?'\"[]".indexOf(c) != -1); - - if (separate) buf.append(' '); - // Convert quotes - if (c == '"') - buf.append(boundaryBefore ? "``" : "''"); - else if (c == '\'') - buf.append(boundaryBefore ? "`" : "'"); - else - buf.append(c); - if (separate) buf.append(' '); - } - utterance = buf.toString().trim(); - if (!utterance.equals("")) { - String[] tokens = utterance.split("\\s+"); - for (String token : tokens) { - languageInfo.tokens.add(LanguageAnalyzer.opts.lowerCaseTokens ? token.toLowerCase() : token); - String lemma = token; - if (token.endsWith("s") && token.length() > 1) - lemma = token.substring(0, token.length() - 1); - languageInfo.lemmaTokens.add(LanguageAnalyzer.opts.lowerCaseTokens ? lemma.toLowerCase() : lemma); - - // Is it a written out number? - int x = Arrays.asList(numbers).indexOf(token); - if (x != -1) { - languageInfo.posTags.add("CD"); - languageInfo.nerTags.add("NUMBER"); - languageInfo.nerValues.add(x + ""); - continue; - } - - try { - Double.parseDouble(token); - languageInfo.posTags.add("CD"); - languageInfo.nerTags.add("NUMBER"); - languageInfo.nerValues.add(token); - } catch (NumberFormatException e) { - // Guess that capitalized nouns are proper - if (Character.isUpperCase(token.charAt(0))) - languageInfo.posTags.add("NNP"); - else if (token.equals("'") || token.equals("\"") || token.equals("''") || token.equals("``")) - languageInfo.posTags.add("''"); - else - languageInfo.posTags.add("UNK"); - languageInfo.nerTags.add("UNK"); - languageInfo.nerValues.add("UNK"); - } - } - } - return languageInfo; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SimpleLexicon.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SimpleLexicon.java deleted file mode 100644 index 2b8cd6fc69..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SimpleLexicon.java +++ /dev/null @@ -1,130 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.*; - -import java.io.BufferedReader; -import java.io.IOException; -import java.util.*; - -import edu.stanford.nlp.sempre.interactive.lassie.HOLOntology; - -/** - * A Lexicon maps phrases (e.g., born) to lexical entries, which contain a - * formula (e.g., fb:people.person.place_of_birth) and a type. - * This class is meant to be simpler/faster than the normal - * UnaryLexicon/BinaryLexicon. - * - * Note: this class exists because it was annoying to add types into the normal - * UnaryLexicon and I didn't want to break backward compatibility with the rest - * of the code. Also, the normal lexicon is really slow with all those hash - * maps. - * - * The lexicon here only knows about Formulas and fuzzy matching. - * In the lexicon, we have rawPhrase. - * In the user query, we have phrase. - * - * @author Percy Liang - */ -public final class SimpleLexicon { - public static class Entry { - // rawPhrase was the original phrase in the Lexicon - public Entry(String rawPhrase, Formula formula, SemType type, StringDoubleVec features) { - this.rawPhrase = rawPhrase; - this.formula = formula; - this.type = type; - this.features = features; - } - public final String rawPhrase; - public final Formula formula; - public final SemType type; - public final StringDoubleVec features; - - @Override public String toString() { - return "[" + rawPhrase + " => " + formula + " : " + type + "]"; - } - } - - public static class Options { - @Option(gloss = "Path to load lexicon files from") public List inPaths; - @Option(gloss = "Types to allow suffix (last word) matche (for people names") public List matchSuffixTypes; - @Option(gloss = "Normalize lexicon phrases to lowercase") public boolean lowerCaseTokens = true; - } - public static Options opts = new Options(); - - private static SimpleLexicon lexicon; - public static SimpleLexicon getSingleton() { - if (lexicon == null) lexicon = new SimpleLexicon(); - return lexicon; - } - - private SimpleLexicon() { - if (opts.inPaths == null) return; - HOLOntology.getTheOntology(); // make sure ontology (thus lexicon) was created - for (String path : opts.inPaths) read(path); - } - - // Mapping from phrase - Map> entries = new HashMap>(); - - public void read(String path) { - LogInfo.begin_track("SimpleLexicon.read(%s)", path); - try { - BufferedReader in = IOUtils.openIn(path); - String line; - int numLines = 0; - int oldNumEntries = entries.size(); - while ((line = in.readLine()) != null) { - Map map = Json.readMapHard(line); - numLines++; - - String rawPhrase = (String) map.get("lexeme"); - Formula formula = Formula.fromString((String) map.get("formula")); - - // Type - String typeStr = (String) map.get("type"); - SemType type = typeStr != null ? SemType.fromString(typeStr) : TypeInference.inferType(formula); - - // Features - StringDoubleVec features = null; - Map featureMap = (Map) map.get("features"); - if (featureMap != null) { - features = new StringDoubleVec(); - for (Map.Entry e : featureMap.entrySet()) - features.add(e.getKey(), e.getValue()); - features.trimToSize(); - } - - // Add verbatim feature - Entry entry = new Entry(rawPhrase, formula, type, features); - String phrase = rawPhrase; - if (opts.lowerCaseTokens) { - phrase = entry.rawPhrase.toLowerCase(); - } - MapUtils.addToList(entries, phrase, entry); - - // For last names - String[] parts = phrase.split(" "); - if (opts.matchSuffixTypes != null && opts.matchSuffixTypes.contains(typeStr) && parts.length > 1) { - StringDoubleVec newFeatures = new StringDoubleVec(); - if (features != null) { // Copy over features - for (StringDoubleVec.Entry e : features) - newFeatures.add(e.getFirst(), e.getSecond()); - } - newFeatures.add("isSuffix", 1); - newFeatures.trimToSize(); - Entry newEntry = new Entry(rawPhrase, formula, type, newFeatures); - MapUtils.addToList(entries, parts[parts.length - 1], newEntry); - } - // In the future, add other mechanisms for lemmatization. - } - LogInfo.logs("Read %s lines, generated %d entries (now %d total)", numLines, entries.size() - oldNumEntries, entries.size()); - } catch (IOException e) { - throw new RuntimeException(e); - } - LogInfo.end_track(); - } - - public List lookup(String phrase) { - return MapUtils.get(entries, phrase, Collections.EMPTY_LIST); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SimpleLexiconFn.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SimpleLexiconFn.java deleted file mode 100644 index 05223bc269..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SimpleLexiconFn.java +++ /dev/null @@ -1,107 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.*; -import java.util.*; - -/** - * Uses the SimpleLexicon. - * - * Example: - * (rule $ROOT ($PHRASE) (SimpleLexiconFn (type fb:type.any))) - * - * @author Percy Liang - */ -public class SimpleLexiconFn extends SemanticFn { - public static class Options { - @Option(gloss = "Number of entities to return from entity lexicon") - public int maxEntityEntries = 100; - - @Option(gloss = "Verbosity level") public int verbose = 0; - } - - public static Options opts = new Options(); - - private static SimpleLexicon lexicon; - - // Only return entries whose type matches this - private SemType restrictType = SemType.topType; - - public SimpleLexiconFn() { - lexicon = SimpleLexicon.getSingleton(); - } - - public void init(LispTree tree) { - super.init(tree); - for (int i = 1; i < tree.children.size(); i++) { - // (type fb:people.person): allow us to restrict the type - LispTree arg = tree.child(i); - if ("type".equals(arg.child(0).value)) { - restrictType = SemType.fromLispTree(arg.child(1)); - } - } - } - - public DerivationStream call(Example ex, Callable c) { - String phrase = c.childStringValue(0); - List entries = lexicon.lookup(phrase); - - // Filter by type - List newEntries = new ArrayList(); - for (SimpleLexicon.Entry e : entries) { - if (opts.verbose >= 3) - LogInfo.logs("SimpleLexiconFn: %s => %s [type = %s meet-> %s]", phrase, e.formula, e.type, restrictType.meet(e.type)); - if (!restrictType.meet(e.type).isValid()) continue; - newEntries.add(e); - } - entries = newEntries; - - return new MyDerivationStream(ex, c, entries, phrase); - } - - public class MyDerivationStream extends MultipleDerivationStream { - private Example ex; - private Callable callable; - private List entries; - private String phrase; - private int currIndex = 0; - - public MyDerivationStream(Example ex, Callable c, List entries, String phrase) { - this.ex = ex; - this.callable = c; - this.entries = entries; - this.phrase = phrase; - } - - @Override public int estimatedSize() { return entries.size(); } - - @Override - public Derivation createDerivation() { - if (currIndex == entries.size()) return null; - - SimpleLexicon.Entry entry = entries.get(currIndex++); - FeatureVector features = new FeatureVector(); - Derivation deriv = new Derivation.Builder() - .withCallable(callable) - .formula(entry.formula) - .type(entry.type) - .localFeatureVector(features) - .createDerivation(); - - if (FeatureExtractor.containsDomain("basicStats")) { - if (entry.features != null) { - for (StringDoubleVec.Entry e : entry.features) - features.add("basicStats", e.getFirst(), e.getSecond()); - } - } - - // Doesn't generalize, but add it for now, otherwise not separable - if (FeatureExtractor.containsDomain("lexAlign")) - deriv.addFeature("lexAlign", phrase + " --- " + entry.formula); - - if (SemanticFn.opts.trackLocalChoices) - deriv.addLocalChoice("SimpleLexiconFn " + deriv.startEndString(ex.getTokens()) + " " + entry); - - return deriv; - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SingleDerivationStream.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SingleDerivationStream.java deleted file mode 100644 index ba57229c61..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SingleDerivationStream.java +++ /dev/null @@ -1,44 +0,0 @@ -package edu.stanford.nlp.sempre; - -/** - * Encapsulates the production of at most one Derivation. - */ -public abstract class SingleDerivationStream implements DerivationStream { - private Derivation nextDeriv; // Next one to return. - private boolean consumed; - - // Override this class: should create a new Derivation. - // Return null if there is none. - public abstract Derivation createDerivation(); - - @Override public boolean hasNext() { - if (nextDeriv != null) return true; // Still one in the queue - if (consumed) return false; // No more - nextDeriv = createDerivation(); // Ask for one - consumed = true; - return nextDeriv != null; - } - - @Override public Derivation peek() { - if (!hasNext()) throw new RuntimeException("No more derivations!"); - //if (nextDeriv == null) throw new RuntimeException("No more derivations!"); - return nextDeriv; - } - - @Override public Derivation next() { - if (!hasNext()) throw new RuntimeException("No more derivations!"); - Derivation deriv = nextDeriv; - nextDeriv = null; - return deriv; - } - - @Override public void remove() { throw new RuntimeException("Cannot remove from DerivationStream"); } - - @Override public int estimatedSize() { return 1; } - - public static SingleDerivationStream constant(final Derivation deriv) { - return new SingleDerivationStream() { - public Derivation createDerivation() { return deriv; } - }; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/StringValue.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/StringValue.java deleted file mode 100644 index 91ebda6042..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/StringValue.java +++ /dev/null @@ -1,32 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.LispTree; - -/** - * Represents a string value. - * @author Percy Liang - **/ -public class StringValue extends Value { - public final String value; - - public StringValue(String value) { this.value = value; } - public StringValue(LispTree tree) { this.value = tree.child(1).value; } - - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("string"); - tree.addChild(value); - return tree; - } - - @Override public String sortString() { return "\"" + value + "\""; } - @Override public String pureString() { return value; } - - @Override public int hashCode() { return value.hashCode(); } - @Override public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - StringValue that = (StringValue) o; - return this.value.equals(that.value); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SuperlativeFormula.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SuperlativeFormula.java deleted file mode 100644 index e8deca1b82..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/SuperlativeFormula.java +++ /dev/null @@ -1,92 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.google.common.base.Function; -import fig.basic.LispTree; - -import java.util.List; - -/** - * Computes the extreme elements of a set |head| according to the degree given - * by |relation|. - * - * @author Percy Liang - */ -public class SuperlativeFormula extends Formula { - public enum Mode { argmin, argmax }; - - public final Mode mode; - public final Formula rank; // rank-th item - public final Formula count; // Number of items to fetch - public final Formula head; - public final Formula relation; // Apply relation(head, degree) and sort by degree. - - public SuperlativeFormula(Mode mode, Formula rank, Formula count, Formula head, Formula relation) { - this.mode = mode; - this.rank = rank; - this.count = count; - this.head = head; - this.relation = relation; - } - - public static Mode parseMode(String mode) { - if ("argmin".equals(mode)) return Mode.argmin; - if ("argmax".equals(mode)) return Mode.argmax; - return null; - } - - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild(mode + ""); - tree.addChild(rank.toLispTree()); - tree.addChild(count.toLispTree()); - tree.addChild(head.toLispTree()); - tree.addChild(relation.toLispTree()); - return tree; - } - - @Override - public void forEach(Function func) { - if (!func.apply(this)) { rank.forEach(func); count.forEach(func); head.forEach(func); relation.forEach(func); } - } - - @Override - public Formula map(Function func) { - Formula result = func.apply(this); - return result == null ? new SuperlativeFormula(mode, rank.map(func), count.map(func), head.map(func), relation.map(func)) : result; - } - - @Override - public List mapToList(Function> func, boolean alwaysRecurse) { - List res = func.apply(this); - if (res.isEmpty() || alwaysRecurse) { - res.addAll(rank.mapToList(func, alwaysRecurse)); - res.addAll(count.mapToList(func, alwaysRecurse)); - res.addAll(head.mapToList(func, alwaysRecurse)); - res.addAll(relation.mapToList(func, alwaysRecurse)); - } - return res; - } - - @SuppressWarnings({"equalshashcode"}) - @Override - public boolean equals(Object thatObj) { - if (!(thatObj instanceof SuperlativeFormula)) return false; - SuperlativeFormula that = (SuperlativeFormula) thatObj; - if (this.mode != that.mode) return false; - if (!this.rank.equals(that.rank)) return false; - if (!this.count.equals(that.count)) return false; - if (!this.head.equals(that.head)) return false; - if (!this.relation.equals(that.relation)) return false; - return true; - } - - public int computeHashCode() { - int hash = 0x7ed55d16; - hash = hash * 0xd3a2646c + mode.toString().hashCode(); - hash = hash * 0xd3a2646c + rank.hashCode(); - hash = hash * 0xd3a2646c + count.hashCode(); - hash = hash * 0xd3a2646c + head.hashCode(); - hash = hash * 0xd3a2646c + relation.hashCode(); - return hash; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/TableValue.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/TableValue.java deleted file mode 100644 index d962639272..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/TableValue.java +++ /dev/null @@ -1,81 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.LispTree; -import fig.basic.LogInfo; -import fig.basic.StrUtils; - -import java.util.ArrayList; -import java.util.List; - -/** - * Represents a table (has a header and a list of rows). - * - * (table (State Capital) ((name fb:en.california) (name fb:en.sacramento)) ((name fb:en.oregon) (name fb:en.salem))) - * - * Future: contain information about which columns are important (the head of a - * phrase)? - * - * @author Percy Liang - */ -public class TableValue extends Value { - public final List header; - public final List> rows; - - public int numRows() { return rows.size(); } - public int numCols() { return header.size(); } - - public TableValue(LispTree tree) { - header = new ArrayList(); - rows = new ArrayList>(); - // Read header - LispTree headerTree = tree.child(1); - for (LispTree item : headerTree.children) - header.add(item.value); - // Read rows - for (int i = 2; i < tree.children.size(); i++) { - List row = new ArrayList(); - for (LispTree item : tree.child(i).children) - row.add(Values.fromLispTree(item)); - rows.add(row); - } - } - - public TableValue(List header, List> rows) { - this.header = header; - this.rows = rows; - } - - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("table"); - LispTree headerTree = LispTree.proto.newList(); - for (String item : header) - headerTree.addChild(item); - tree.addChild(headerTree); - for (List row : rows) { - LispTree rowTree = LispTree.proto.newList(); - for (Value value : row) - rowTree.addChild(value == null ? LispTree.proto.newLeaf(null) : value.toLispTree()); - tree.addChild(rowTree); - } - return tree; - } - - public void log() { - LogInfo.begin_track("%s", StrUtils.join(header, "\t")); - for (List row : rows) - LogInfo.logs("%s", StrUtils.join(row, "\t")); - LogInfo.end_track(); - } - - // Note: don't compare the headers right now - @Override public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - TableValue that = (TableValue) o; - if (!rows.equals(that.rows)) return false; - return true; - } - - @Override public int hashCode() { return rows.hashCode(); } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/TargetValuePreprocessor.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/TargetValuePreprocessor.java deleted file mode 100644 index 40d3938287..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/TargetValuePreprocessor.java +++ /dev/null @@ -1,36 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.*; - -/** - * Preprocess the targetValue of an example. - * - * @author ppasupat - */ -public abstract class TargetValuePreprocessor { - public static class Options { - @Option public String targetValuePreprocessor = null; - } - public static Options opts = new Options(); - - private static TargetValuePreprocessor singleton; - - public static TargetValuePreprocessor getSingleton() { - if (singleton == null) { - if (opts.targetValuePreprocessor == null || opts.targetValuePreprocessor.isEmpty()) - singleton = new IdentityTargetValuePreprocessor(); - else - singleton = (TargetValuePreprocessor) Utils.newInstanceHard( - SempreUtils.resolveClassName(opts.targetValuePreprocessor)); - } - return singleton; - } - public static void setSingleton(TargetValuePreprocessor processor) { singleton = processor; } - - public abstract Value preprocess(Value value, Example ex); - -} - -class IdentityTargetValuePreprocessor extends TargetValuePreprocessor { - public Value preprocess(Value value, Example ex) { return value; } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/TimeValue.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/TimeValue.java deleted file mode 100644 index d4f278ac7a..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/TimeValue.java +++ /dev/null @@ -1,51 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.LispTree; - -/** - * Created by joberant on 1/23/15. - * Value for representing time - */ -public class TimeValue extends Value { - - public final int hour; - public final int minute; - - public TimeValue(int hour, int minute) { - if (hour > 23 || hour < 0) throw new RuntimeException("Illegal hour: " + hour); - if (minute > 59 || minute < 0) throw new RuntimeException("Illegal minute: " + minute); - this.hour = hour; - this.minute = minute; - } - - public TimeValue(LispTree tree) { - this.hour = Integer.valueOf(tree.child(1).value); - this.minute = Integer.valueOf(tree.child(2).value); - } - - @Override - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("time"); - tree.addChild(String.valueOf(hour)); - tree.addChild(String.valueOf(minute)); - return tree; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - TimeValue timeValue = (TimeValue) o; - if (hour != timeValue.hour) return false; - if (minute != timeValue.minute) return false; - return true; - } - - @Override - public int hashCode() { - int result = hour; - result = 31 * result + minute; - return result; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/TopSemType.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/TopSemType.java deleted file mode 100644 index e92c61c1e3..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/TopSemType.java +++ /dev/null @@ -1,12 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.LispTree; - -// Represents any possible value. -public class TopSemType extends SemType { - public boolean isValid() { return true; } - public SemType meet(SemType that) { return that; } - public SemType apply(SemType that) { return this; } - public SemType reverse() { return this; } - public LispTree toLispTree() { return LispTree.proto.newLeaf("top"); } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Trie.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Trie.java deleted file mode 100644 index 181db88388..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Trie.java +++ /dev/null @@ -1,39 +0,0 @@ -package edu.stanford.nlp.sempre; - -import java.util.ArrayList; -import java.util.HashSet; -import java.util.LinkedHashMap; -import java.util.LinkedHashSet; -import java.util.HashMap; -import java.util.Map; -import java.util.Set; - -/** - * Used to access rules efficiently by walking down their RHS. - * @author Percy Liang - */ -public class Trie { - public ArrayList rules = new ArrayList<>(); - Map children = new LinkedHashMap<>(); - // Set of LHS categories of all rules in this subtree - public Set cats = new LinkedHashSet<>(); - - public Trie next(String item) { return children.get(item); } - - public void add(Rule rule) { add(rule, 0); } - private void add(Rule rule, int i) { - cats.add(rule.lhs); - - if (i == rule.rhs.size()) { - if (!rules.contains(rule)) // filter exact match - rules.add(rule); - return; - } - - String item = rule.rhs.get(i); - Trie child = children.get(item); - if (child == null) - children.put(item, child = new Trie()); - child.add(rule, i + 1); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/TypeInference.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/TypeInference.java deleted file mode 100644 index fa82ecfad2..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/TypeInference.java +++ /dev/null @@ -1,280 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.*; - -import java.util.*; - -/** - * Performs type inference: given a Formula, return a SemType. - * - * Use a TypeLookup class to look up types of entities and properties. - * - The default NullTypeLookup returns the most generic types. - * - If the Freebase schema is loaded, FreebaseTypeLookup will give - * the type information from the schema. - * - Furthermore, if EntityLexicon is loaded, EntityLexicon will give - * even more refined types. - * - * Note that we just return an upper bound on the type. - * Doesn't have to be perfect, since this is just used to prune out bad combinations. - * - * @author Percy Liang - */ -public final class TypeInference { - private TypeInference() { } - - public static class Options { - @Option(gloss = "Verbosity level") public int verbose = 1; - @Option(gloss = "Class for looking up types") public String typeLookup = "NullTypeLookup"; - } - public static Options opts = new Options(); - - private static TypeLookup typeLookup; - public static TypeLookup getTypeLookup() { - if (typeLookup == null) - typeLookup = (TypeLookup) Utils.newInstanceHard(SempreUtils.resolveClassName(opts.typeLookup)); - return typeLookup; - } - public static void setTypeLookup(TypeLookup typeLookup) { // Kind of hacky, only used in tests - TypeInference.typeLookup = typeLookup; - } - - // For computing type of (call ...) expressions. - private static Map callTypeInfos; - public static void addCallTypeInfo(CallTypeInfo info) { - if (callTypeInfos.containsKey(info.func)) - throw new RuntimeException("Already contains " + info.func); - callTypeInfos.put(info.func, info); - } - private static void initCallTypeInfo() { - if (callTypeInfos != null) return; - callTypeInfos = new HashMap(); - addCallTypeInfo(new CallTypeInfo("Math.cos", ListUtils.newList(SemType.floatType), SemType.floatType)); - addCallTypeInfo(new CallTypeInfo(".concat", ListUtils.newList(SemType.stringType, SemType.stringType), SemType.stringType)); - addCallTypeInfo(new CallTypeInfo(".length", ListUtils.newList(SemType.stringType), SemType.intType)); - addCallTypeInfo(new CallTypeInfo(".toString", ListUtils.newList(SemType.anyType), SemType.stringType)); - // This is just a placeholder now...need to have a more systematic way of - // putting types in (see JavaExecutor). - } - - private static final ValueFormula typeFormula = new ValueFormula(new NameValue(CanonicalNames.TYPE)); - - private static final Set comparisonFormulas = new HashSet<>(Arrays.asList( - new ValueFormula(new NameValue("<")), - new ValueFormula(new NameValue(">")), - new ValueFormula(new NameValue("<=")), - new ValueFormula(new NameValue(">=")))); - - @SuppressWarnings("serial") - private static class TypeException extends Exception { } - - private static class Env { - private final TypeLookup typeLookup; - private final boolean allowFreeVariable; // Don't throw an error if there is an unbound variable. - private final ImmutableAssocList> list; - private Env(ImmutableAssocList> list, TypeLookup typeLookup, boolean allowFreeVariable) { - this.list = list; - this.typeLookup = typeLookup; - this.allowFreeVariable = allowFreeVariable; - } - public Env(TypeLookup typeLookup, boolean allowFreeVariable) { - this(ImmutableAssocList.emptyList, typeLookup, allowFreeVariable); - } - - public Env addVar(String var) { - return new Env(list.prepend(var, new Ref(SemType.topType)), typeLookup, allowFreeVariable); - } - public SemType updateType(String var, SemType type) { - Ref ref = list.get(var); - if (ref == null) { - if (!allowFreeVariable) - throw new RuntimeException("Free variable not defined: " + var); - else { - // This does not save the new type to the list - ref = new Ref(SemType.topType); - } - } - SemType newType = ref.value.meet(type); - if (!newType.isValid() && opts.verbose >= 2) - LogInfo.warnings("Invalid type from [%s MEET %s]", ref.value, type); - ref.value = newType; - return newType; - } - - @Override - public String toString() { - // Used for debugging, so no need to be efficient. - String answer = typeLookup.getClass().getSimpleName() + " {"; - ImmutableAssocList> now = list; - while (!now.isEmpty()) { - answer += now.key + ": " + now.value; - now = now.next; - } - return answer + "}"; - } - } - - // Use the default typeLookup - public static SemType inferType(Formula formula) { - return inferType(formula, getTypeLookup(), false); - } - public static SemType inferType(Formula formula, boolean allowFreeVariable) { - return inferType(formula, getTypeLookup(), allowFreeVariable); - } - public static SemType inferType(Formula formula, TypeLookup typeLookup) { - return inferType(formula, typeLookup, false); - } - - public static SemType inferType(Formula formula, TypeLookup typeLookup, boolean allowFreeVariable) { - SemType type; - try { - type = inferType(formula, new Env(typeLookup, allowFreeVariable), SemType.topType); - } catch (TypeException e) { - type = SemType.bottomType; - } - if (opts.verbose >= 2) LogInfo.logs("TypeInference: %s => %s", formula, type); - return type; - } - - private static SemType check(SemType type) throws TypeException { - if (!type.isValid()) throw new TypeException(); - return type; - } - - // Return the type of |formula| (|type| is an upper bound on the type). - // |env| specifies the mapping form variables to their types. This should be updated. - private static SemType inferType(Formula formula, Env env, SemType type) throws TypeException { - if (opts.verbose >= 5) - LogInfo.logs("TypeInference.inferType(%s, %s, %s)", formula, env, type); - if (formula instanceof VariableFormula) { - return check(env.updateType(((VariableFormula) formula).name, type)); - - } else if (formula instanceof ValueFormula) { - Value value = ((ValueFormula) formula).value; - if (value instanceof NumberValue) return check(type.meet(SemType.numberType)); - else if (value instanceof StringValue) return check(type.meet(SemType.stringType)); - else if (value instanceof DateValue) return check(type.meet(SemType.dateType)); - else if (value instanceof TimeValue) return check(type.meet(SemType.timeType)); - else if (value instanceof NameValue) { - String id = ((NameValue) value).id; - - if (CanonicalNames.isUnary(id)) { // Unary - SemType unaryType = env.typeLookup.getEntityType(id); - if (unaryType == null) - unaryType = SemType.entityType; - type = check(type.meet(unaryType)); - } else { // Binary - // Careful of the reversal. - SemType propertyType = null; - if (CanonicalNames.SPECIAL_SEMTYPES.containsKey(id)) { - propertyType = CanonicalNames.SPECIAL_SEMTYPES.get(id); - } else if (!CanonicalNames.isReverseProperty(id)) { - propertyType = env.typeLookup.getPropertyType(id); - } else { - propertyType = env.typeLookup.getPropertyType(CanonicalNames.reverseProperty(id)); - if (propertyType != null) propertyType = propertyType.reverse(); - } - if (propertyType == null) - propertyType = SemType.anyAnyFunc; // Don't know - type = check(type.meet(propertyType)); - } - return type; - } else { - throw new RuntimeException("Unhandled value: " + value); - } - - } else if (formula instanceof JoinFormula) { - JoinFormula join = (JoinFormula) formula; - - // Special case: (fb:type.object.type fb:people.person) => fb:people.person - if (typeFormula.equals(join.relation) && join.child instanceof ValueFormula) - return check(type.meet(SemType.newAtomicSemType(Formulas.getString(join.child)))); - - // Special case: (<= (number 5)) => same type as (number 5) - if (comparisonFormulas.contains(join.relation)) - return check(type.meet(inferType(join.child, env, SemType.numberOrDateType))); - - SemType relationType = inferType(join.relation, env, new FuncSemType(SemType.topType, type)); // Relation - SemType childType = inferType(join.child, env, relationType.getArgType()); // Child - relationType = inferType(join.relation, env, new FuncSemType(childType, type)); // Relation again - return check(relationType.getRetType()); - - } else if (formula instanceof MergeFormula) { - MergeFormula merge = (MergeFormula) formula; - type = check(type.meet(SemType.anyType)); // Must be not higher-order - type = inferType(merge.child1, env, type); - type = inferType(merge.child2, env, type); - return type; - - } else if (formula instanceof MarkFormula) { - MarkFormula mark = (MarkFormula) formula; - env = env.addVar(mark.var); - type = check(type.meet(SemType.anyType)); // Must be not higher-order - type = inferType(mark.body, env, type); - type = check(env.updateType(mark.var, type)); - return type; - - } else if (formula instanceof LambdaFormula) { - LambdaFormula lambda = (LambdaFormula) formula; - env = env.addVar(lambda.var); - SemType bodyType = inferType(lambda.body, env, type.getRetType()); - SemType varType = check(env.updateType(lambda.var, type.getArgType())); - return new FuncSemType(varType, bodyType); - - } else if (formula instanceof NotFormula) { - NotFormula not = (NotFormula) formula; - type = check(type.meet(SemType.anyType)); // Must be not higher-order - return inferType(not.child, env, type); - - } else if (formula instanceof AggregateFormula) { - AggregateFormula aggregate = (AggregateFormula) formula; - SemType childType = inferType(aggregate.child, env, SemType.anyType); - if (aggregate.mode == AggregateFormula.Mode.count) - return check(SemType.numberType.meet(type)); - else - return check(SemType.numberOrDateType.meet(type).meet(childType)); - - } else if (formula instanceof ArithmeticFormula) { - ArithmeticFormula arith = (ArithmeticFormula) formula; - // TODO(pliang): allow date + duration - type = inferType(arith.child1, env, type); - type = inferType(arith.child2, env, type); - return check(type.meet(SemType.numberOrDateType)); - - } else if (formula instanceof ReverseFormula) { - ReverseFormula reverse = (ReverseFormula) formula; - SemType reverseType = inferType(reverse.child, env, type.reverse()); - return check(reverseType.reverse()); - - } else if (formula instanceof SuperlativeFormula) { - SuperlativeFormula superlative = (SuperlativeFormula) formula; - inferType(superlative.rank, env, SemType.numberType); - inferType(superlative.count, env, SemType.numberType); - type = check(type.meet(SemType.anyType)); // Must be not higher-order - type = inferType(superlative.head, env, type); // Head - SemType relationType = inferType(superlative.relation, env, new FuncSemType(SemType.numberOrDateType, type)); // Relation - type = inferType(superlative.head, env, relationType.getRetType()); // Head again - return type; - - } else if (formula instanceof CallFormula) { - initCallTypeInfo(); - CallFormula call = (CallFormula) formula; - if (!(call.func instanceof ValueFormula)) return SemType.bottomType; - Value value = ((ValueFormula) call.func).value; - if (!(value instanceof NameValue)) return SemType.bottomType; - String func = ((NameValue) value).id; - - CallTypeInfo info = callTypeInfos.get(func); - if (info == null) return SemType.anyType; // Don't know - - if (info.argTypes.size() != call.args.size()) return SemType.bottomType; - for (int i = 0; i < info.argTypes.size(); i++) - inferType(call.args.get(i), env, info.argTypes.get(i)); - return check(type.meet(info.retType)); - } else if (formula instanceof ActionFormula) { - initCallTypeInfo(); - return SemType.anyType; - } else { - throw new RuntimeException("Can't infer type of formula: " + formula); - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/TypeLookup.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/TypeLookup.java deleted file mode 100644 index d31fd4b170..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/TypeLookup.java +++ /dev/null @@ -1,17 +0,0 @@ -package edu.stanford.nlp.sempre; - -/** - * A TypeLookup object handles the domain-specific part of type inference. - * TypeInference handles the domain general part. - * - * Given an entity or a property, return the appropriate SemType. - */ -public interface TypeLookup { - // e.g., fb:en.barack_obama => (union fb:people.person ...) - // Return null if unknown - SemType getEntityType(String entity); - - // e.g., fb:people.person.place_of_birth => (-> fb:location.location fb:people.person) - // Return null if unknown - SemType getPropertyType(String property); -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/UnionSemType.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/UnionSemType.java deleted file mode 100644 index cc1dcd264c..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/UnionSemType.java +++ /dev/null @@ -1,64 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.LispTree; -import java.util.*; - -// Represents the union of a set of base types. -public class UnionSemType extends SemType { - public final List baseTypes; - public boolean isValid() { return baseTypes.size() > 0; } - - // Constructors - public UnionSemType() { - this.baseTypes = new ArrayList(); - } - public UnionSemType(SemType... baseTypes) { - this.baseTypes = new ArrayList(); - for (SemType baseType : baseTypes) - if (baseType.isValid()) - this.baseTypes.add(baseType); - } - public UnionSemType(Collection baseTypes) { - this.baseTypes = new ArrayList(); - for (SemType baseType : baseTypes) - if (baseType.isValid()) - this.baseTypes.add(baseType); - } - - public SemType meet(SemType that) { - if (that instanceof TopSemType) return this; - List result = new ArrayList<>(); - for (SemType baseType : baseTypes) - result.add(baseType.meet(that)); - return new UnionSemType(result).simplify(); - } - - public SemType apply(SemType that) { - List result = new ArrayList<>(); - for (SemType baseType : baseTypes) - result.add(baseType.apply(that)); - return new UnionSemType(result).simplify(); - } - - public SemType reverse() { - List result = new ArrayList<>(); - for (SemType baseType : baseTypes) - result.add(baseType.reverse()); - return new UnionSemType(result).simplify(); - } - - public LispTree toLispTree() { - LispTree result = LispTree.proto.newList(); - result.addChild("union"); - for (SemType baseType : baseTypes) - result.addChild(baseType.toLispTree()); - return result; - } - - public SemType simplify() { - if (baseTypes.size() == 0) return SemType.bottomType; - if (baseTypes.size() == 1) return baseTypes.get(0); - if (baseTypes.contains(SemType.topType)) return SemType.topType; - return this; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/UriValue.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/UriValue.java deleted file mode 100644 index e039898a2e..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/UriValue.java +++ /dev/null @@ -1,33 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.LispTree; - -public class UriValue extends Value { - public final String value; - - public UriValue(LispTree tree) { - this.value = tree.child(1).value; - } - - public UriValue(String value) { - this.value = value; - } - - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("url"); - tree.addChild(value != null ? value : ""); - return tree; - } - - @Override public String sortString() { return "" + value; } - @Override public String pureString() { return "" + value; } - - @Override public int hashCode() { return value.hashCode(); } - @Override public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - UriValue that = (UriValue) o; - return this.value.equals(that.value); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Value.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Value.java deleted file mode 100644 index 486d5ece0b..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Value.java +++ /dev/null @@ -1,44 +0,0 @@ -package edu.stanford.nlp.sempre; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonValue; -import fig.basic.LispTree; -import fig.basic.LogInfo; - -import java.util.Comparator; - -/** - * Values represent denotations (or partial denotations). - * - * @author Percy Liang - */ -public abstract class Value { - public abstract LispTree toLispTree(); - - // Print using LogInfo. - public void log() { LogInfo.logs("%s", toString()); } - - @JsonValue - public String toString() { return toLispTree().toString(); } - - // (optional) String used for sorting Values. The default is to call toString() - public String sortString() { return toString(); } - - // (optional) String without the LispTree structure. The default is to call toString() - public String pureString() { return toString(); } - - @JsonCreator - public static Value fromString(String str) { - return Values.fromLispTree(LispTree.proto.parseFromString(str)); - } - - @Override public abstract boolean equals(Object o); - @Override public abstract int hashCode(); - - public static class ValueComparator implements Comparator { - @Override - public int compare(Value o1, Value o2) { - return o1.toString().compareTo(o2.toString()); - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ValueEvaluator.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ValueEvaluator.java deleted file mode 100644 index 7bd790a31e..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ValueEvaluator.java +++ /dev/null @@ -1,10 +0,0 @@ -package edu.stanford.nlp.sempre; - -/** - * Given a target denotation Value and a predicted denotation Value, - * return a compatibility. - */ -public interface ValueEvaluator { - // Return a number [0, 1] that denotes how well we're doing. - double getCompatibility(Value target, Value pred); -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ValueFormula.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ValueFormula.java deleted file mode 100644 index eb9c0179f1..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/ValueFormula.java +++ /dev/null @@ -1,33 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.LispTree; - -/** - * A ValueFormula represents an atomic value which is cannot be decomposed - * into further symbols. Simply a wrapper around Value. - * - * @author Percy Liang - */ -public class ValueFormula extends PrimitiveFormula { - public final T value; - - public ValueFormula(T value) { this.value = value; } - public LispTree toLispTree() { - if (value instanceof NameValue) return LispTree.proto.newLeaf(((NameValue) value).id); - return value.toLispTree(); - } - - @SuppressWarnings({"equalshashcode"}) - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - ValueFormula that = (ValueFormula) o; - if (!value.equals(that.value)) return false; - return true; - } - - public int computeHashCode() { - return value.hashCode(); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Values.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Values.java deleted file mode 100644 index 714bc3b816..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/Values.java +++ /dev/null @@ -1,43 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.LispTree; - -/** - * Utilities for Value. - * - * @author Percy Liang - */ -public final class Values { - private Values() { } - - // Try to parse the LispTree into a value. - // If it fails, just return null. - public static Value fromLispTreeOrNull(LispTree tree) { - if (tree.isLeaf()) - return null; - String type = tree.child(0).value; - if ("name".equals(type)) return new NameValue(tree); - if ("boolean".equals(type)) return new BooleanValue(tree); - if ("number".equals(type)) return new NumberValue(tree); - if ("string".equals(type)) return new StringValue(tree); - if ("list".equals(type)) return new ListValue(tree); - if ("table".equals(type)) return new TableValue(tree); - if ("description".equals(type)) return new DescriptionValue(tree); - if ("url".equals(type)) return new UriValue(tree); - if ("context".equals(type)) return new ContextValue(tree); - if ("date".equals(type)) return new DateValue(tree); - if ("error".equals(type)) return new ErrorValue(tree); - if ("time".equals(type)) return new TimeValue(tree); - return null; - } - - // Try to parse. If it fails, throw an exception. - public static Value fromLispTree(LispTree tree) { - Value value = fromLispTreeOrNull(tree); - if (value == null) - throw new RuntimeException("Invalid value: " + tree); - return value; - } - - public static Value fromString(String s) { return fromLispTree(LispTree.proto.parseFromString(s)); } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/VariableFormula.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/VariableFormula.java deleted file mode 100644 index bd7fcca99c..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/VariableFormula.java +++ /dev/null @@ -1,24 +0,0 @@ -package edu.stanford.nlp.sempre; - -import fig.basic.LispTree; - -/** - * Corresponds to a variable reference. - * - * @author Percy Liang - */ -public class VariableFormula extends PrimitiveFormula { - public final String name; // Name of variable. - public VariableFormula(String name) { this.name = name; } - public LispTree toLispTree() { return LispTree.proto.newList("var", name); } - - @SuppressWarnings({"equalshashcode"}) - @Override - public boolean equals(Object thatObj) { - if (!(thatObj instanceof VariableFormula)) return false; - VariableFormula that = (VariableFormula) thatObj; - return this.name.equals(that.name); - } - - public int computeHashCode() { return name.hashCode(); } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cache/FileStringCache.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cache/FileStringCache.java deleted file mode 100644 index f2e7ac28c8..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cache/FileStringCache.java +++ /dev/null @@ -1,174 +0,0 @@ -package edu.stanford.nlp.sempre.cache; - -import fig.basic.*; - -import java.io.BufferedReader; -import java.io.File; -import java.io.IOException; -import java.io.PrintWriter; -import java.nio.file.FileSystems; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.StandardCopyOption; -import java.util.LinkedHashMap; -import java.util.Map; - -/** - * Cache backed by a file. - * - * @author Percy Liang - */ -public class FileStringCache implements StringCache, LruCallback { - public static class Options { - @Option(gloss = "Cache capacity (in MB)") - public int capacity = 35 * 1024; - - @Option(gloss = "Auto-flush cache to disk every N accesses") - public int flushFrequency = Integer.MAX_VALUE; - - @Option(gloss = "Append mode instead of dump mode") - public boolean appendMode = true; - - public int verbose = 0; - } - public static final Options opts = new Options(); - - private String path; - private PrintWriter out; - - private final LinkedHashMap cache; - private final StatFig keyStats = new StatFig(); - private final StatFig valStats = new StatFig(); - private int numTouches = 0; - private int numEvictions = 0; - private boolean readOnly; - - public FileStringCache() { - int cap = opts.capacity; - cap = (cap < 0) ? cap : (cap * 1024 * 1024); - if (cap < 0) { - cache = new LinkedHashMap(); - } else { - cache = new LruMap(cap, this); - } - } - - public String getPath() { return path; } - - public void init(String path) { init(path, false); } - public void init(String path, boolean readOnly) { - if (this.path != null) throw new RuntimeException("Already initialized with " + this.path); - this.path = path; - this.readOnly = readOnly; - - // Read existing. - if (new File(path).exists()) { - try { - BufferedReader in = IOUtils.openInHard(path); - String line; - while ((line = in.readLine()) != null) { - String[] tokens = line.split("\t", 2); - if (tokens.length != 2) - throw new RuntimeException("Invalid line in cache file: " + line); - cache.put(tokens[0], tokens[1]); - } - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - LogInfo.logs("Using cache %s (%d entries)", path, cache.size()); - - if (!readOnly && opts.appendMode) - out = IOUtils.openOutAppendHard(path); - flush(); - } - - private void flush() { - if (readOnly) - return; - - if (out != null) // Append mode - return; - - if (path == null) // No file-backing - return; - - if (opts.verbose >= 2) { - LogInfo.begin_track("FileStringCache FLUSH (dump mode)"); - LogInfo.logs("Size: %d", size()); - if (cache instanceof LruMap) - LogInfo.logs("Memory: %d", ((LruMap) cache).getBytes()); - LogInfo.logs("Touches: %d", numTouches); - LogInfo.logs("Evictions: %d", numEvictions); - LogInfo.logs("Evicted keys: %s", keyStats); - LogInfo.logs("Evicted values: %s", valStats); - LogInfo.end_track(); - } - - PrintWriter dumpOut = IOUtils.openOutHard(this.path + ".tmp"); - for (Map.Entry entry : cache.entrySet()) { - dumpOut.println(entry.getKey() + "\t" + entry.getValue()); - } - dumpOut.flush(); - dumpOut.close(); - try { - Path src = FileSystems.getDefault().getPath(this.path + ".tmp"); - Path dst = FileSystems.getDefault().getPath(this.path); - Files.move(src, dst, - StandardCopyOption.REPLACE_EXISTING, - StandardCopyOption.ATOMIC_MOVE); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - public String get(String key) { return cache.get(key); } - - public void put(String key, String value) { - assert key.indexOf('\t') == -1 : key; - assert key.indexOf('\n') == -1 : key; - assert value.indexOf('\n') == -1 : value; - if (opts.verbose >= 5) { - logTrack("FileStringCache PUT (before)", key, value); - } - cache.put(key, value); - if (out != null) { // Append mode - out.println(key + "\t" + value); - out.flush(); - } - if (numTouches++ % opts.flushFrequency == 0) - flush(); - } - - public int size() { return cache.size(); } - - @Override - public void onEvict(Map.Entry entry) { - if (opts.verbose >= 5) { - logTrack("FileStringCache EVICT (after)", entry.getKey(), entry.getValue()); - } - numEvictions++; - keyStats.add(entry.getKey(), entry.getKey().length()); - valStats.add(entry.getValue(), entry.getValue().length()); - } - - private void logTrack(String header, String key, String value) { - LogInfo.begin_track(header); - LogInfo.logs("Key size: %d (%d bytes)", key.length(), MemUsage.getBytes(key)); - LogInfo.logs("Val size: %d (%d bytes)", value.length(), MemUsage.getBytes(value)); - if (cache instanceof LruMap) { - LogInfo.logs("Cache size: %d entries (%d bytes of %d)", - cache.size(), - ((LruMap) cache).getBytes(), - ((LruMap) cache).getCapacity()); - } - LogInfo.end_track(); - } - - // For tests - @Deprecated - public int getNumTouches() { - return numTouches; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cache/LruCallback.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cache/LruCallback.java deleted file mode 100644 index ce92ca5050..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cache/LruCallback.java +++ /dev/null @@ -1,10 +0,0 @@ -package edu.stanford.nlp.sempre.cache; - -import java.util.Map; - -/** - * @author Roy Frostig - */ -public interface LruCallback { - void onEvict(Map.Entry entry); -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cache/LruMap.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cache/LruMap.java deleted file mode 100644 index 6421f0c2ca..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cache/LruMap.java +++ /dev/null @@ -1,86 +0,0 @@ -package edu.stanford.nlp.sempre.cache; - -import fig.basic.MemUsage; - -import java.util.LinkedHashMap; -import java.util.Map; - -/** - * TODO(rf): Fig.MemUsage is buggy wrt computing LinkedHashMap byte - * usage, so we do our own bookkeeping here, but we are doing so very - * incompletely (i.e. only on put() and remove()). - * - * @author Roy Frostig - */ -public class LruMap extends LinkedHashMap { - private final int cap; - private final LruCallback callback; - private int bytes = 0; - - public LruMap(int capacity) { - this(capacity, null); - } - - public LruMap(int capacity, LruCallback evictCallback) { - // "As a general rule, the default load factor (.75) offers a good - // tradeoff between time and space costs." - // -- Java 8 API, - // http://docs.oracle.com/javase/8/docs/api/java/util/HashMap.html - super(capacity, 0.75f, true); // Flag true for access-order. - this.cap = capacity; - this.callback = evictCallback; - } - - public int getCapacity() { - return cap; - } - - public int getBytes() { - return bytes; - } - - @Override - public V put(K key, V value) { - boolean replacing = containsKey(key); - V old = super.get(key); - bytes += MemUsage.getBytes(value); - if (replacing) { - bytes -= MemUsage.getBytes(old); - } else { - bytes += MemUsage.getBytes(key); - } - return super.put(key, value); - } - - @Override - public V remove(Object key) { - boolean decr = containsKey(key); - V old = super.remove(key); - if (decr) { - bytes -= MemUsage.getBytes(key); - bytes -= MemUsage.getBytes(old); - } - return old; - } - - /** - * Ignore the argument, iterate in access order and remove until - * memory constraints are satisfied. Always return false, since - * we did our own removal. - */ - @Override - protected boolean removeEldestEntry(Map.Entry eldest) { - while (getBytes() > cap && !isEmpty()) { - Map.Entry toRemove = null; - for (Map.Entry entry : entrySet()) { - toRemove = entry; - break; - } - remove(toRemove.getKey()); - if (callback != null) - callback.onEvict(toRemove); - } - return false; - } -} - diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cache/RemoteStringCache.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cache/RemoteStringCache.java deleted file mode 100644 index 43278ecd9b..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cache/RemoteStringCache.java +++ /dev/null @@ -1,82 +0,0 @@ -package edu.stanford.nlp.sempre.cache; - -import java.io.*; -import java.net.*; - -import fig.basic.LogInfo; - -/** - * Cache backed by a remote service (see StringCacheServer). - * - * @author Percy Liang - */ -public class RemoteStringCache implements StringCache { - public static final int NUM_TRIES = 5; - - private Socket socket; - private PrintWriter out; - private BufferedReader in; - - // Cache things locally. - private FileStringCache local = new FileStringCache(); - - public RemoteStringCache(String path, String host, int port) { - try { - LogInfo.begin_track("RemoteStringCache: connecting to %s:%s to access %s", host, port, path); - this.socket = new Socket(host, port); - this.out = new PrintWriter(socket.getOutputStream(), true); - this.in = new BufferedReader(new InputStreamReader(socket.getInputStream())); - String response = makeRequest("open", path, null); - LogInfo.logs("Using cache path=%s, host=%s, port=%s", path, host, port); - if (!response.equals("OK")) { - throw new RuntimeException(response); - } - LogInfo.end_track(); - } catch (UnknownHostException e) { - LogInfo.end_track(); - throw new RuntimeException(e); - } catch (IOException e) { - LogInfo.end_track(); - throw new RuntimeException(e); - } - } - - public String makeRequest(String method, String key, String value) { - try { - if (value == null) - out.println(method + "\t" + key); - else - out.println(method + "\t" + key + "\t" + value); - out.flush(); - for (int i = 0; i < NUM_TRIES; i++) { - try { - String result = in.readLine(); - if (result.equals(StringCacheServer.nullString)) result = null; - return result; - } catch (NullPointerException e) { - LogInfo.logs("RemoteStringCache.makeRequest(%s, %s, %s) failed", method, key, value); - } - } - throw new NullPointerException(); - } catch (SocketTimeoutException e) { - throw new RuntimeException(e); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - public String get(String key) { - // First check the local cache. - String value = local.get(key); - if (value == null) - value = makeRequest("get", key, null); - return value; - } - - public void put(String key, String value) { - local.put(key, value); - makeRequest("put", key, value); - } - - public int size() { return local.size(); } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cache/StringCache.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cache/StringCache.java deleted file mode 100644 index 4ce1f9e313..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cache/StringCache.java +++ /dev/null @@ -1,12 +0,0 @@ -package edu.stanford.nlp.sempre.cache; - -/** - * Stores a Map, which is synchronized with disk. - * - * @author Percy Liang - */ -public interface StringCache { - String get(String key); - void put(String key, String value); -} - diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cache/StringCacheServer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cache/StringCacheServer.java deleted file mode 100644 index 1cb75bd6f2..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cache/StringCacheServer.java +++ /dev/null @@ -1,158 +0,0 @@ -package edu.stanford.nlp.sempre.cache; - -import fig.basic.LogInfo; -import fig.basic.Option; -import fig.exec.Execution; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.File; -import java.io.PrintWriter; -import java.net.ServerSocket; -import java.net.Socket; -import java.util.Date; -import java.util.HashMap; - -/** - * Runs a server backed by a file which accepts requests of the following form: - * get\t|key| put\t|key|\t|value| - *

- * You can test it out by doing: telnet - *

- * To use it in code, see RemoteStringCache. - * - * @author Percy Liang - */ -public class StringCacheServer implements Runnable { - @Option(gloss = "Open port here", required = true) public int port; - @Option(gloss = "How much output to print") public int verbose = 0; - @Option(gloss = "Read only") public boolean readOnly = false; - @Option(gloss = "Only allow files in this directory") public String basePath; - - // Shared state - private HashMap caches = new HashMap(); - private boolean terminated = false; - - // Represents the null value to be returned back to the user. - public static String nullString = "__NULL__"; - - class ClientHandler implements Runnable { - Socket client; - FileStringCache cache; - - public ClientHandler(Socket client) { - this.client = client; - } - - public void run() { - try { - PrintWriter out = new PrintWriter(client.getOutputStream()); - BufferedReader in = new BufferedReader(new InputStreamReader(client.getInputStream())); - String line; - int numGets = 0, numPuts = 0, numErrors = 0; - while (!terminated && (line = in.readLine()) != null) { - // LogInfo.logs("Input: %s", line); - String[] tokens = line.split("\t"); - String response = null; - if (tokens[0].equals("open") && tokens.length == 2) { - if (basePath != null && tokens[1].contains("/")) { - response = "ERROR: only simple file names allowed"; - } else { - String path = tokens[1]; - if (basePath != null) - path = new File(basePath, path).toString(); - // Create the cache if necessary - synchronized (caches) { - cache = caches.get(path); - if (cache == null) { - cache = new FileStringCache(); - caches.put(path, cache); - } - } - response = "OK"; - synchronized (cache) { - if (cache.getPath() == null) { - LogInfo.begin_track("Loading %s", path); - try { - cache.init(path, readOnly); - } catch (Throwable t) { - response = "ERROR: " + t; - } - LogInfo.logs("Response: %s", response); - LogInfo.end_track(); - } - } - } - } else if (tokens[0].equals("get") && tokens.length == 2) { - if (cache == null) { - response = "ERROR: no file opened yet"; - } else { - synchronized (cache) { response = cache.get(tokens[1]); } - if (response == null) response = nullString; - numGets++; - } - } else if (tokens[0].equals("put") && tokens.length == 3) { - if (readOnly) { - response = "ERROR: read-only"; - } else if (cache == null) { - response = "ERROR: no file opened yet"; - } else { - synchronized (cache) { cache.put(tokens[1], tokens[2]); } - response = "OK"; - numPuts++; - } - } else if (tokens[0].equals("stats")) { - response = "Caches:"; - synchronized (caches) { - for (String path : caches.keySet()) - response += "\n " + path + " (" + caches.get(path).size() + " entries)"; - } - } else if (tokens[0].equals("terminate")) { - if (readOnly) { - response = "ERROR: read-only"; - } else { - response = "OK; telnet to the port again to terminate"; - terminated = true; - } - } else if (tokens[0].equals("help")) { - response = "Commands (tab-separated):\n open |path|\n get |key|\n put |key| |value|\n terminate\n stats\n help"; - } else { - response = "ERROR: " + line; - numErrors++; - } - // LogInfo.logs("Response: %s", response); - out.println(response); - out.flush(); - } - in.close(); - LogInfo.logs("[%s] Closed connection %s: %d gets, %d puts, %d errors", new Date(), client, numGets, numPuts, numErrors); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - } - - public void run() { - LogInfo.logs("[%s] Starting server on port %d", new Date(), port); - - try { - ServerSocket server = new ServerSocket(port); - while (!terminated) { - Socket client = server.accept(); - LogInfo.logs("[%s] Opened connection from %s", new Date(), client); - Thread t = new Thread(new ClientHandler(client)); - t.start(); - } - LogInfo.log("Done"); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - public static void main(String[] args) throws Exception { - Execution.run(args, - new StringCacheServer(), - "FileStringCache", FileStringCache.opts); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cache/StringCacheUtils.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cache/StringCacheUtils.java deleted file mode 100644 index 51c7700099..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cache/StringCacheUtils.java +++ /dev/null @@ -1,25 +0,0 @@ -package edu.stanford.nlp.sempre.cache; - - -public final class StringCacheUtils { - private StringCacheUtils() { } - // description could be - // Local path: ... - // Remote path: jacko:4000:/u/nlp/... - public static StringCache create(String description) { - // Remote - if (description != null && description.indexOf(':') != -1) { - String[] tokens = description.split(":", 3); - if (tokens.length != 3) - throw new RuntimeException("Invalid format (not server:port:path): " + description); - RemoteStringCache cache = new RemoteStringCache(tokens[2], tokens[0], Integer.parseInt(tokens[1])); - return cache; - } - - // Local - FileStringCache cache = new FileStringCache(); - if (description != null) - cache.init(description); - return cache; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cache/test/StringCacheTest.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cache/test/StringCacheTest.java deleted file mode 100644 index a001339ff0..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cache/test/StringCacheTest.java +++ /dev/null @@ -1,112 +0,0 @@ -package edu.stanford.nlp.sempre.cache.test; - -import edu.stanford.nlp.sempre.cache.FileStringCache; -import fig.basic.IOUtils; -import fig.basic.MemUsage; - -import org.testng.annotations.Test; - -import java.io.BufferedReader; -import java.io.IOException; -import java.nio.file.FileSystems; -import java.nio.file.Files; -import java.nio.file.Path; - -import static org.testng.AssertJUnit.assertEquals; - -/** - * @author Roy Frostig - */ -public class StringCacheTest { - private int numLines(BufferedReader in) throws IOException { - int n = 0; - while (in.readLine() != null) n++; - return n; - } - - /** - * Check that we flush at the given frequency - * @throws IOException - */ - @SuppressWarnings("deprecation") - @Test(groups = "fs") - public void testFlush() throws IOException { - FileStringCache.opts.appendMode = false; - FileStringCache.opts.capacity = 1; - FileStringCache.opts.flushFrequency = 10; - FileStringCache.opts.verbose = 5; - - final String fs = "StringCacheTest-cache.tmp"; - final Path fsPath = FileSystems.getDefault().getPath(fs); - - Files.deleteIfExists(fsPath); - FileStringCache cache = new FileStringCache(); - cache.init(fs); - - for (int i = 0; i <= 100; i++) { - String key = "key:" + i; - String val = "val:" + i; - cache.put(key, val); - assertEquals(cache.getNumTouches(), i + 1); - if (i > 0 && i % 10 == 0) { - int lines = numLines(IOUtils.openInHard(fs)); - System.out.println("!!! " + lines + " = " + i); - assertEquals(lines, i + 1); - } - } - Files.deleteIfExists(fsPath); - } - - /** - * Check that we really do evict at the capacity. - * @throws IOException - */ - @Test(groups = "fs") - public void testEvict() throws IOException { - FileStringCache.opts.appendMode = false; - FileStringCache.opts.capacity = 10; - FileStringCache.opts.flushFrequency = 10; - FileStringCache.opts.verbose = 5; - - final String fs = "StringCacheTest-cache.tmp"; - final Path fsPath = FileSystems.getDefault().getPath(fs); - - Files.deleteIfExists(fsPath); - FileStringCache cache = new FileStringCache(); - cache.init(fs); - - // Make ~20 MB of string data - String junk20MB = "junk"; - while (MemUsage.getBytes(junk20MB) <= 20 * 1024 * 1024) - junk20MB += junk20MB; - - // Make ~1 MB of string data - String junk1MB = "junk"; - while (MemUsage.getBytes(junk1MB) <= 1024 * 1024) - junk1MB += junk1MB; - - // Add something small, to be evicted - cache.put(junk1MB + "1", "test1"); - assertEquals(1, cache.size()); - assert cache.get(junk1MB + "1").equals("test1"); - - // Add something small, shouldn't evict anything - cache.put(junk1MB + "2", "test2"); - assertEquals(2, cache.size()); - assert cache.get(junk1MB + "1").equals("test1"); - assert cache.get(junk1MB + "2").equals("test2"); - - // Add something big, to saturate cache and kick out the small entries. - // Also kicks itself out. - cache.put(junk20MB + "1", "big1"); - assertEquals(0, cache.size()); - assertEquals(cache.get(junk20MB + "1"), null); - - // Do that again. - cache.put(junk20MB + "2", "big2"); - assertEquals(0, cache.size()); - assertEquals(cache.get(junk20MB + "2"), null); - - Files.deleteIfExists(fsPath); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/corenlp/CoreNLPAnalyzer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/corenlp/CoreNLPAnalyzer.java deleted file mode 100644 index 1a9889504a..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/corenlp/CoreNLPAnalyzer.java +++ /dev/null @@ -1,163 +0,0 @@ -package edu.stanford.nlp.sempre.corenlp; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.LanguageInfo.DependencyEdge; -import edu.stanford.nlp.ling.CoreAnnotations; -import edu.stanford.nlp.ling.IndexedWord; -import edu.stanford.nlp.ling.CoreAnnotations.*; -import edu.stanford.nlp.ling.CoreLabel; -import edu.stanford.nlp.pipeline.Annotation; -import edu.stanford.nlp.pipeline.StanfordCoreNLP; -import edu.stanford.nlp.semgraph.SemanticGraph; -import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations; -import edu.stanford.nlp.semgraph.SemanticGraphEdge; -import edu.stanford.nlp.util.CoreMap; - -import com.google.common.collect.Lists; -import com.google.common.base.Joiner; - -import fig.basic.*; - -import java.io.*; -import java.util.*; - -/** - * CoreNLPAnalyzer uses Stanford CoreNLP pipeline to analyze an input string utterance - * and return a LanguageInfo object - * - * @author akchou - */ -public class CoreNLPAnalyzer extends LanguageAnalyzer { - public static class Options { - @Option(gloss = "What CoreNLP annotators to run") - public List annotators = Lists.newArrayList("tokenize", "ssplit", "pos", "lemma", "ner", "parse"); - - @Option(gloss = "Whether to use case-sensitive models") - public boolean caseSensitive = false; - } - - public static Options opts = new Options(); - - // TODO(pliang): don't muck with the POS tag; instead have a separate flag - // for isContent which looks at posTag != "MD" && lemma != "be" && lemma != - // "have" - // Need to update TextToTextMatcher - private static final String[] AUX_VERB_ARR = new String[] {"is", "are", "was", - "were", "am", "be", "been", "will", "shall", "have", "has", "had", - "would", "could", "should", "do", "does", "did", "can", "may", "might", - "must", "seem"}; - private static final Set AUX_VERBS = new HashSet(Arrays.asList(AUX_VERB_ARR)); - private static final String AUX_VERB_TAG = "VBD-AUX"; - - public static StanfordCoreNLP pipeline = null; - - public static void initModels() { - if (pipeline != null) return; - Properties props = new Properties(); - props.put("annotators", Joiner.on(',').join(opts.annotators)); - if (opts.caseSensitive) { - props.put("pos.model", "edu/stanford/nlp/models/pos-tagger/english-bidirectional/english-bidirectional-distsim.tagger"); - props.put("ner.model", "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz,edu/stanford/nlp/models/ner/english.conll.4class.distsim.crf.ser.gz"); - } else { - props.put("pos.model", "edu/stanford/nlp/models/pos-tagger/english-caseless-left3words-distsim.tagger"); - props.put("ner.model", "edu/stanford/nlp/models/ner/english.all.3class.caseless.distsim.crf.ser.gz,edu/stanford/nlp/models/ner/english.conll.4class.caseless.distsim.crf.ser.gz"); - } - pipeline = new StanfordCoreNLP(props); - } - - // Stanford tokenizer doesn't break hyphens. - // Replace hypens with spaces for utterances like - // "Spanish-speaking countries" but not for "2012-03-28". - public static String breakHyphens(String utterance) { - StringBuilder buf = new StringBuilder(utterance); - for (int i = 0; i < buf.length(); i++) { - if (buf.charAt(i) == '-' && (i + 1 < buf.length() && Character.isLetter(buf.charAt(i + 1)))) - buf.setCharAt(i, ' '); - } - return buf.toString(); - } - - public LanguageInfo analyze(String utterance) { - LanguageInfo languageInfo = new LanguageInfo(); - - // Clear these so that analyze can hypothetically be called - // multiple times. - languageInfo.tokens.clear(); - languageInfo.posTags.clear(); - languageInfo.nerTags.clear(); - languageInfo.nerValues.clear(); - languageInfo.lemmaTokens.clear(); - languageInfo.dependencyChildren.clear(); - - // Break hyphens - utterance = breakHyphens(utterance); - - // Run Stanford CoreNLP - initModels(); - Annotation annotation = pipeline.process(utterance); - - for (CoreLabel token : annotation.get(CoreAnnotations.TokensAnnotation.class)) { - String word = token.get(TextAnnotation.class); - String wordLower = word.toLowerCase(); - if (LanguageAnalyzer.opts.lowerCaseTokens) { - languageInfo.tokens.add(wordLower); - } else { - languageInfo.tokens.add(word); - } - languageInfo.posTags.add( - AUX_VERBS.contains(wordLower) ? - AUX_VERB_TAG : - token.get(PartOfSpeechAnnotation.class)); - languageInfo.nerTags.add(token.get(NamedEntityTagAnnotation.class)); - languageInfo.lemmaTokens.add(token.get(LemmaAnnotation.class)); - languageInfo.nerValues.add(token.get(NormalizedNamedEntityTagAnnotation.class)); - } - - // Fills in a stanford dependency graph for constructing a feature - for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { - SemanticGraph ccDeps = sentence.get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class); - if (ccDeps == null) continue; - int sentenceBegin = sentence.get(CoreAnnotations.TokenBeginAnnotation.class); - - // Iterate over all tokens and their dependencies - for (int sourceTokenIndex = sentenceBegin; - sourceTokenIndex < sentence.get(CoreAnnotations.TokenEndAnnotation.class); - sourceTokenIndex++) { - final ArrayList outgoing = new ArrayList(); - languageInfo.dependencyChildren.add(outgoing); - IndexedWord node = ccDeps.getNodeByIndexSafe(sourceTokenIndex - sentenceBegin + 1); // + 1 for ROOT - if (node != null) { - for (SemanticGraphEdge edge : ccDeps.outgoingEdgeList(node)) { - final String relation = edge.getRelation().toString(); - final int targetTokenIndex = sentenceBegin + edge.getTarget().index() - 1; - outgoing.add(new DependencyEdge(relation, targetTokenIndex)); - } - } - } - } - return languageInfo; - } - - // Test on example sentence. - public static void main(String[] args) { - CoreNLPAnalyzer analyzer = new CoreNLPAnalyzer(); - while (true) { - try { - BufferedReader reader = new BufferedReader(new InputStreamReader(System.in)); - System.out.println("Enter some text:"); - String text = reader.readLine(); - LanguageInfo langInfo = analyzer.analyze(text); - LogInfo.begin_track("Analyzing \"%s\"", text); - LogInfo.logs("tokens: %s", langInfo.tokens); - LogInfo.logs("lemmaTokens: %s", langInfo.lemmaTokens); - LogInfo.logs("posTags: %s", langInfo.posTags); - LogInfo.logs("nerTags: %s", langInfo.nerTags); - LogInfo.logs("nerValues: %s", langInfo.nerValues); - LogInfo.logs("dependencyChildren: %s", langInfo.dependencyChildren); - LogInfo.end_track(); - } catch (IOException e) { - e.printStackTrace(); - } - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/corenlp/test/CoreNLPSemanticFnTest.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/corenlp/test/CoreNLPSemanticFnTest.java deleted file mode 100644 index 9e2e21f476..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/corenlp/test/CoreNLPSemanticFnTest.java +++ /dev/null @@ -1,92 +0,0 @@ -package edu.stanford.nlp.sempre.corenlp.test; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.corenlp.CoreNLPAnalyzer; -import edu.stanford.nlp.sempre.test.TestUtils; -import fig.basic.LispTree; -import org.testng.annotations.Test; - -import java.util.Collections; -import java.util.List; - -import static org.testng.AssertJUnit.assertEquals; - -/** - * Test SemanticFns that depend on CoreNLP (e.g., NumberFn on "one thousand") - * @author Percy Liang - */ -public class CoreNLPSemanticFnTest { - private static Formula F(String s) { return Formula.fromString(s); } - - void check(Formula target, DerivationStream derivations) { - if (!derivations.hasNext()) throw new RuntimeException("Expected 1 derivation, got " + derivations); - assertEquals(target, derivations.next().formula); - } - - void check(Formula target, String utterance, SemanticFn fn, List children) { - Example ex = TestUtils.makeSimpleExample(utterance); - check(target, fn.call(ex, new SemanticFn.CallInfo(null, 0, ex.numTokens(), Rule.nullRule, children))); - } - - void check(Formula target, String utterance, SemanticFn fn) { - List empty = Collections.emptyList(); - check(target, utterance, fn, empty); - } - - void checkNumDerivations(DerivationStream derivations, int num) { - assertEquals(num, derivations.estimatedSize()); - } - - Derivation D(Formula f) { - return (new Derivation.Builder()) - .formula(f) - .prob(1.0) - .createDerivation(); - } - - LispTree T(String str) { - return LispTree.proto.parseFromString(str); - } - - // TODO(chaganty): Test bridge fn - requires freebase (?) - // TODO(chaganty): Test context fn - - @Test public void dateFn() { - LanguageAnalyzer.setSingleton(new CoreNLPAnalyzer()); - check(F("(date 2013 8 7)"), "August 7, 2013", new DateFn()); - check(F("(date 1982 -1 -1)"), "1982", new DateFn()); - check(F("(date -1 6 4)"), "june 4", new DateFn()); - } - - @Test public void filterNerTagFn() { - LanguageAnalyzer.setSingleton(new CoreNLPAnalyzer()); - FilterNerSpanFn filter = new FilterNerSpanFn(); - filter.init(T("(FilterNerSpanFn token PERSON)")); - Derivation child = new Derivation.Builder().createDerivation(); - Example ex = TestUtils.makeSimpleExample("where is Obama"); - assertEquals(filter.call(ex, - new SemanticFn.CallInfo(null, 0, 1, Rule.nullRule, Collections.singletonList(child))).hasNext(), - false); - assertEquals(filter.call(ex, - new SemanticFn.CallInfo(null, 1, 2, Rule.nullRule, Collections.singletonList(child))).hasNext(), - false); - assertEquals(filter.call(ex, - new SemanticFn.CallInfo(null, 2, 3, Rule.nullRule, Collections.singletonList(child))).hasNext(), - true); - } - - // TODO(chaganty): Test fuzzy match fn - // TODO(chaganty): Test identity fn - // TODO(chaganty): Test join fn - // TODO(chaganty): Test lexicon fn - // TODO(chaganty): Test merge fn - - @Test public void numberFn() { - LanguageAnalyzer.setSingleton(new CoreNLPAnalyzer()); - check(F("(number 35000)"), "thirty-five thousand", new NumberFn()); - } - - // TODO(chaganty): Test select fn - // TODO(chaganty): Test simple lexicon fn - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cprune/CPruneDerivInfo.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cprune/CPruneDerivInfo.java deleted file mode 100644 index 3c487bbe78..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cprune/CPruneDerivInfo.java +++ /dev/null @@ -1,13 +0,0 @@ -package edu.stanford.nlp.sempre.cprune; - -import java.util.Map; -import java.util.List; - -public class CPruneDerivInfo { - - public Map treeSymbols; - public Map ruleSymbols; - public List customRuleStrings; - public boolean containsCrossReference; - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cprune/CPruneFloatingParser.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cprune/CPruneFloatingParser.java deleted file mode 100644 index 2b6ef29a0c..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cprune/CPruneFloatingParser.java +++ /dev/null @@ -1,119 +0,0 @@ -package edu.stanford.nlp.sempre.cprune; - -import java.util.List; - -import edu.stanford.nlp.sempre.*; -import fig.basic.LogInfo; - -/** - * A parser that first tries to exploit the macro grammar and only fall back to full search when needed. - */ -public class CPruneFloatingParser extends FloatingParser { - - FloatingParser exploreParser; - - public CPruneFloatingParser(Spec spec) { - super(spec); - exploreParser = new FloatingParser(spec).setEarlyStopping(true, CollaborativePruner.opts.maxDerivations); - } - - @Override - public void onBeginDataGroup(int iter, int numIters, String group) { - if (CollaborativePruner.uidToCachedNeighbors == null) { - CollaborativePruner.customGrammar.init(grammar); - CollaborativePruner.loadNeighbors(); - } - CollaborativePruner.stats.reset(iter + "." + group); - } - - @Override - public ParserState newParserState(Params params, Example ex, boolean computeExpectedCounts) { - return new CPruneFloatingParserState(this, params, ex, computeExpectedCounts); - } - -} - -class CPruneFloatingParserState extends ParserState { - - public CPruneFloatingParserState(Parser parser, Params params, Example ex, boolean computeExpectedCounts) { - super(parser, params, ex, computeExpectedCounts); - } - - @Override - public void infer() { - LogInfo.begin_track("CPruneFloatingParser.infer()"); - boolean exploitSucceeds = exploit(); - if (computeExpectedCounts) { - LogInfo.begin_track("Summary of Collaborative Pruning"); - LogInfo.logs("Exploit succeeds: " + exploitSucceeds); - LogInfo.logs("Exploit success rate: " + CollaborativePruner.stats.successfulExploit + "/" + CollaborativePruner.stats.totalExploit); - LogInfo.end_track(); - } - // Explore only on the first training iteration - if (CollaborativePruner.stats.iter.equals("0.train") && computeExpectedCounts && !exploitSucceeds - && (CollaborativePruner.stats.totalExplore <= CollaborativePruner.opts.maxExplorationIters)) { - explore(); - LogInfo.logs("Consistent pattern: " + CollaborativePruner.getConsistentPattern(ex)); - LogInfo.logs("Explore success rate: " + CollaborativePruner.stats.successfulExplore + "/" + CollaborativePruner.stats.totalExplore); - } - LogInfo.end_track(); - } - - public void explore() { - LogInfo.begin_track("Explore"); - CollaborativePruner.initialize(ex, CollaborativePruner.Mode.EXPLORE); - ParserState exploreParserState = ((CPruneFloatingParser) parser).exploreParser.newParserState(params, ex, computeExpectedCounts); - exploreParserState.infer(); - predDerivations.clear(); - predDerivations.addAll(exploreParserState.predDerivations); - expectedCounts = exploreParserState.expectedCounts; - if (computeExpectedCounts) { - for (Derivation deriv : predDerivations) - CollaborativePruner.updateConsistentPattern(parser.valueEvaluator, ex, deriv); - } - CollaborativePruner.stats.totalExplore += 1; - if (CollaborativePruner.foundConsistentDerivation) - CollaborativePruner.stats.successfulExplore += 1; - LogInfo.end_track(); - } - - public boolean exploit() { - LogInfo.begin_track("Exploit"); - CollaborativePruner.initialize(ex, CollaborativePruner.Mode.EXPLOIT); - Grammar miniGrammar = new MiniGrammar(CollaborativePruner.predictedRules); - Parser exploitParser = new FloatingParser(new Parser.Spec(miniGrammar, parser.extractor, parser.executor, parser.valueEvaluator)); - ParserState exploitParserState = exploitParser.newParserState(params, ex, computeExpectedCounts); - exploitParserState.infer(); - predDerivations.clear(); - predDerivations.addAll(exploitParserState.predDerivations); - expectedCounts = exploitParserState.expectedCounts; - if (computeExpectedCounts) { - for (Derivation deriv : predDerivations) - CollaborativePruner.updateConsistentPattern(parser.valueEvaluator, ex, deriv); - } - boolean succeeds = CollaborativePruner.foundConsistentDerivation; - CollaborativePruner.stats.totalExploit += 1; - if (succeeds) - CollaborativePruner.stats.successfulExploit += 1; - LogInfo.end_track(); - return succeeds; - } -} - -// ============================================================ -// Helper classes -// ============================================================ - -class MiniGrammar extends Grammar { - - public MiniGrammar(List rules) { - this.rules.addAll(rules); - if (CollaborativePruner.opts.verbose >= 2) { - LogInfo.begin_track("MiniGrammar Rules"); - for (Rule rule : rules) - LogInfo.logs("%s %s", rule, rule.isAnchored() ? "[A]" : "[F]"); - LogInfo.end_track(); - } - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cprune/CPruneStats.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cprune/CPruneStats.java deleted file mode 100644 index 6892bc3484..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cprune/CPruneStats.java +++ /dev/null @@ -1,20 +0,0 @@ -package edu.stanford.nlp.sempre.cprune; - -/** - * Stores various statistic. - */ -public class CPruneStats { - public String iter; - public int totalExplore = 0; - public int successfulExplore = 0; - public int totalExploit = 0; - public int successfulExploit = 0; - - public void reset(String iter) { - this.iter = iter; - this.totalExplore = 0; - this.successfulExplore = 0; - this.totalExploit = 0; - this.successfulExploit = 0; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cprune/CollaborativePruner.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cprune/CollaborativePruner.java deleted file mode 100644 index c65a1c9210..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cprune/CollaborativePruner.java +++ /dev/null @@ -1,187 +0,0 @@ -package edu.stanford.nlp.sempre.cprune; - -import java.io.*; -import java.util.*; - -import fig.basic.*; -import edu.stanford.nlp.sempre.*; - -/** - * Static class for collaborative pruning. - */ -public class CollaborativePruner { - public static class Options { - @Option(gloss = "Logging verbosity") - public int verbose = 0; - @Option(gloss = "K = Maximum number of nearest-neighbor examples to consider (-1 to use all examples so far)") - public int maxNumNeighbors = -1; - @Option(gloss = "Load cached neighbors from this file") - public String neighborFilePath = null; - @Option(gloss = "Maximum number of matching patterns (default = use all patterns)") - public int maxPredictedPatterns = Integer.MAX_VALUE; - @Option(gloss = "Maximum number of derivations per example") - public int maxDerivations = 5000; - @Option(gloss = "Maximum number of times to fall back to exploration") - public int maxExplorationIters = Integer.MAX_VALUE; - } - - public static Options opts = new Options(); - - public enum Mode { EXPLORE, EXPLOIT, NONE } - - public static Mode mode = Mode.NONE; - public static CPruneStats stats = new CPruneStats(); - public static CustomGrammar customGrammar = new CustomGrammar(); - - // Static class; do not instantiate - private CollaborativePruner() { throw new RuntimeException("Cannot instantiate CollaborativePruner"); } - - // Global variables - // Nearest neighbors - static Map> uidToCachedNeighbors; - // uid => pattern - static Map consistentPattern = new HashMap<>(); - // patternString => customRuleString - static Map> customRules = new HashMap<>(); - // set of patternStrings - static Set allConsistentPatterns = new HashSet<>(); - - // Example-level variables - public static boolean foundConsistentDerivation = false; - public static Map predictedPatterns; - public static List predictedRules; - - /** - * Read the cached neighbors file. - * Line Format: ex_id [tab] neighbor_id1,neighbor_id2,... - */ - public static void loadNeighbors() { - if (opts.neighborFilePath == null) { - LogInfo.logs("neighborFilePath is null."); - return; - } - LogInfo.begin_track("Loading cached neighbors from %s", opts.neighborFilePath); - uidToCachedNeighbors = new HashMap<>(); - try { - BufferedReader reader = IOUtils.openIn(opts.neighborFilePath); - String line; - while ((line = reader.readLine()) != null) { - String[] tokens = line.split("\t"); - String uid = tokens[0]; - String[] nids = tokens[1].split(","); - uidToCachedNeighbors.put(uid, Arrays.asList(nids)); - } - reader.close(); - } catch (IOException e) { - throw new RuntimeException(e); - } - LogInfo.end_track(); - } - - public static void initialize(Example ex, Mode mode) { - CollaborativePruner.mode = mode; - predictedRules = null; - predictedPatterns = null; - foundConsistentDerivation = false; - if (mode == Mode.EXPLOIT) { - preprocessExample(ex); - } - } - - static void preprocessExample(Example ex) { - Map patternFreqMap = new HashMap<>(); - List cachedNeighbors = uidToCachedNeighbors.get(ex.id); - int total = 0; - - // Gather the neighbors - if (opts.maxNumNeighbors > 0) { - for (String nid : cachedNeighbors) { - // Only get examples that have been previously processed + found a consistent formula - if (!consistentPattern.containsKey(nid)) - continue; - - String neighborPattern = consistentPattern.get(nid).pattern; - if (!patternFreqMap.containsKey(neighborPattern)) - patternFreqMap.put(neighborPattern, new FormulaPattern(neighborPattern, 0)); - patternFreqMap.get(neighborPattern).frequency++; - total++; - if (total >= opts.maxNumNeighbors) - break; - } - } else { - for (String patternString : allConsistentPatterns) { - patternFreqMap.put(patternString, new FormulaPattern(patternString, 1)); - } - } - - // Sort by frequency (more frequent = smaller; see FormulaPattern.compareTo) - List> patternFreqEntries = new ArrayList<>(patternFreqMap.entrySet()); - patternFreqEntries.sort(new ValueComparator<>(false)); - - // Gather the patterns - LogInfo.begin_track("Predicted patterns"); - int rank = 0; - Set predictedRulesStrings = new HashSet<>(); - predictedPatterns = new HashMap<>(); - for (Map.Entry entry : patternFreqEntries) { - FormulaPattern newPattern = entry.getValue(); - predictedPatterns.put(newPattern.pattern, newPattern); - predictedRulesStrings.addAll(customRules.get(newPattern.pattern)); - LogInfo.logs((rank + 1) + ". " + newPattern.pattern + " (" + newPattern.frequency + ")"); - rank++; - if (rank >= opts.maxPredictedPatterns) - break; - } - // Gather the rules - predictedRules = customGrammar.getRules(predictedRulesStrings); - LogInfo.end_track(); - } - - public static String getPatternString(Derivation deriv) { - if (deriv.cat.equals("$TOKEN") || deriv.cat.equals("$PHRASE") - || deriv.cat.equals("$LEMMA_TOKEN") || deriv.cat.equals("$LEMMA_PHRASE")) { - return deriv.cat; - } else { - return FormulaPattern.convertToIndexedPattern(deriv); - } - } - - public static void addRules(String patternString, Derivation deriv, Example ex) { - if (!customRules.containsKey(patternString)) { - customRules.put(patternString, new HashSet()); - } - Set parsedCustomRules = customGrammar.addCustomRule(deriv, ex); - customRules.get(patternString).addAll(parsedCustomRules); - } - - /** - * Get called when a (consistent) formula is found. - * Update the consistent patterns. - */ - public static void updateConsistentPattern(ValueEvaluator evaluator, Example ex, Derivation deriv) { - String uid = ex.id; - if (ex.targetValue != null) - deriv.compatibility = evaluator.getCompatibility(ex.targetValue, deriv.value); - - if (deriv.isRootCat() && deriv.compatibility == 1) { - foundConsistentDerivation = true; - LogInfo.logs("Found consistent deriv: %s", deriv); - - String patternString = getPatternString(deriv); - FormulaPattern newConsistentPattern = new FormulaPattern(patternString, 0); - newConsistentPattern.score = deriv.getScore(); - - FormulaPattern oldConsistentPattern = consistentPattern.get(uid); - if (oldConsistentPattern == null || newConsistentPattern.score > oldConsistentPattern.score) { - addRules(patternString, deriv, ex); - consistentPattern.put(uid, newConsistentPattern); - allConsistentPatterns.add(patternString); - } - } - } - - public static FormulaPattern getConsistentPattern(Example ex) { - return consistentPattern.get(ex.id); - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cprune/CustomGrammar.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cprune/CustomGrammar.java deleted file mode 100644 index 1dbca835c9..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cprune/CustomGrammar.java +++ /dev/null @@ -1,268 +0,0 @@ -package edu.stanford.nlp.sempre.cprune; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import fig.basic.*; - -public class CustomGrammar extends Grammar { - public static class Options { - @Option(gloss = "Whether to decompose the templates into multiple rules") - public boolean enableTemplateDecomposition = true; - } - - public static Options opts = new Options(); - - public static final Set baseCategories = new HashSet(Arrays.asList( - Rule.tokenCat, Rule.phraseCat, Rule.lemmaTokenCat, Rule.lemmaPhraseCat, - "$Unary", "$Binary", "$Entity", "$Property")); - - ArrayList baseRules = new ArrayList<>(); - // symbolicFormulas => symbolicFormula ID - Map symbolicFormulas = new HashMap<>(); - // indexedSymbolicFormula => customRuleString - Map> customRules = new HashMap<>(); - // customRuleString => Binarized rules - Map> customBinarizedRules = new HashMap<>(); - - public void init(Grammar initGrammar) { - baseRules = new ArrayList<>(); - for (Rule rule : initGrammar.getRules()) { - if (baseCategories.contains(rule.lhs)) { - baseRules.add(rule); - } - } - this.freshCatIndex = initGrammar.getFreshCatIndex(); - } - - public List getRules(Collection customRuleStrings) { - Set ruleSet = new LinkedHashSet<>(); - ruleSet.addAll(baseRules); - for (String ruleString : customRuleStrings) { - ruleSet.addAll(customBinarizedRules.get(ruleString)); - } - return new ArrayList(ruleSet); - } - - public Set addCustomRule(Derivation deriv, Example ex) { - String indexedSymbolicFormula = getIndexedSymbolicFormula(deriv); - if (customRules.containsKey(indexedSymbolicFormula)) { - return customRules.get(indexedSymbolicFormula); - } - - CPruneDerivInfo derivInfo = aggregateSymbols(deriv); - Set crossReferences = new HashSet<>(); - for (Symbol symbol : derivInfo.treeSymbols.values()) { - if (symbol.frequency > 1) { - crossReferences.add(symbol.formula); - } - } - computeCustomRules(deriv, crossReferences); - customRules.put(indexedSymbolicFormula, new HashSet(derivInfo.customRuleStrings)); - - LogInfo.begin_track("Add custom rules for formula: " + indexedSymbolicFormula); - for (String customRuleString : derivInfo.customRuleStrings) { - if (customBinarizedRules.containsKey(customRuleString)) { - LogInfo.log("Custom rule exists: " + customRuleString); - continue; - } - - rules = new ArrayList<>(); - LispTree tree = LispTree.proto.parseFromString(customRuleString); - interpretRule(tree); - customBinarizedRules.put(customRuleString, new HashSet(rules)); - - // Debug - LogInfo.begin_track("Add custom rule: " + customRuleString); - for (Rule rule : rules) { - LogInfo.log(rule.toString()); - } - LogInfo.end_track(); - } - LogInfo.end_track(); - - // Debug - System.out.println("consistent_lf\t" + ex.id + "\t" + deriv.formula.toString()); - - return customRules.get(indexedSymbolicFormula); - } - - public static String getIndexedSymbolicFormula(Derivation deriv) { - return getIndexedSymbolicFormula(deriv, deriv.formula.toString()); - } - - /** - * Replace symbols (e.g., fb:row.row.name) with placeholders (e.g., Binary#1). - */ - public static String getIndexedSymbolicFormula(Derivation deriv, String formula) { - CPruneDerivInfo derivInfo = aggregateSymbols(deriv); - int index = 1; - List symbolList = new ArrayList<>(derivInfo.treeSymbols.values()); - for (Symbol symbol : symbolList) - symbol.computeIndex(formula); - Collections.sort(symbolList); - for (Symbol symbol : symbolList) { - if (formula.equals(symbol.formula)) - formula = symbol.category + "#" + index; - formula = safeReplace(formula, symbol.formula, symbol.category + "#" + index); - index += 1; - } - return formula; - } - - // ============================================================ - // Private methods - // ============================================================ - - private static String safeReplace(String formula, String target, String replacement) { - // (argmin 1 1 ...) and (argmax 1 1 ...) are troublesome - String before = formula, targetBefore = target; - formula = formula.replace("(argmin (number 1) (number 1)", "(ARGMIN"); - formula = formula.replace("(argmax (number 1) (number 1)", "(ARGMAX"); - target = target.replace("(argmin (number 1) (number 1)", "(ARGMIN"); - target = target.replace("(argmax (number 1) (number 1)", "(ARGMAX"); - formula = formula.replace(target + ")", replacement + ")"); - formula = formula.replace(target + " ", replacement + " "); - formula = formula.replace("(ARGMIN", "(argmin (number 1) (number 1)"); - formula = formula.replace("(ARGMAX", "(argmax (number 1) (number 1)"); - if (CollaborativePruner.opts.verbose >= 2) - LogInfo.logs("REPLACE: [%s | %s] %s | %s", targetBefore, replacement, before, formula); - return formula; - } - - /** - * Cache the symbols in deriv.tempState[cprune].treeSymbols - */ - private static CPruneDerivInfo aggregateSymbols(Derivation deriv) { - Map tempState = deriv.getTempState(); - if (tempState.containsKey("cprune")) { - return (CPruneDerivInfo) tempState.get("cprune"); - } - CPruneDerivInfo derivInfo = new CPruneDerivInfo(); - tempState.put("cprune", derivInfo); - - Map treeSymbols = new LinkedHashMap<>(); - derivInfo.treeSymbols = treeSymbols; - if (baseCategories.contains(deriv.cat)) { - String formula = deriv.formula.toString(); - treeSymbols.put(formula, new Symbol(deriv.cat, formula, 1)); - } else { - for (Derivation child : deriv.children) { - CPruneDerivInfo childInfo = aggregateSymbols(child); - for (Symbol symbol : childInfo.treeSymbols.values()) { - if (derivInfo.treeSymbols.containsKey(symbol.formula)) { - treeSymbols.get(symbol.formula).frequency += symbol.frequency; - } else { - treeSymbols.put(symbol.formula, symbol); - } - } - } - } - return derivInfo; - } - - private CPruneDerivInfo computeCustomRules(Derivation deriv, Set crossReferences) { - CPruneDerivInfo derivInfo = (CPruneDerivInfo) deriv.getTempState().get("cprune"); - Map ruleSymbols = new LinkedHashMap<>(); - derivInfo.ruleSymbols = ruleSymbols; - derivInfo.customRuleStrings = new ArrayList<>(); - String formula = deriv.formula.toString(); - - if (baseCategories.contains(deriv.cat)) { - // Leaf node induces no custom rule - derivInfo.containsCrossReference = crossReferences.contains(formula); - // Propagate the symbol of this derivation to the parent - ruleSymbols.putAll(derivInfo.treeSymbols); - } else { - derivInfo.containsCrossReference = false; - for (Derivation child : deriv.children) { - CPruneDerivInfo childInfo = computeCustomRules(child, crossReferences); - derivInfo.containsCrossReference = derivInfo.containsCrossReference || childInfo.containsCrossReference; - } - - for (Derivation child : deriv.children) { - CPruneDerivInfo childInfo = (CPruneDerivInfo) child.getTempState().get("cprune"); - ruleSymbols.putAll(childInfo.ruleSymbols); - derivInfo.customRuleStrings.addAll(childInfo.customRuleStrings); - } - - if (opts.enableTemplateDecomposition == false || derivInfo.containsCrossReference) { - // If this node contains a cross reference - if (deriv.isRootCat()) { - // If this is the root node, then generate a custom rule - derivInfo.customRuleStrings.add(getCustomRuleString(deriv, derivInfo)); - } - } else { - if (!deriv.cat.startsWith("$Intermediate")) { - // Generate a custom rule for this node - derivInfo.customRuleStrings.add(getCustomRuleString(deriv, derivInfo)); - - // Propagate this derivation as a category to the parent - ruleSymbols.clear(); - ruleSymbols.put(formula, new Symbol(hash(deriv), deriv.formula.toString(), 1)); - } - } - } - return derivInfo; - } - - private String getCustomRuleString(Derivation deriv, CPruneDerivInfo derivInfo) { - String formula = deriv.formula.toString(); - List rhsSymbols = new ArrayList<>(derivInfo.ruleSymbols.values()); - for (Symbol symbol : rhsSymbols) - symbol.computeIndex(formula); - Collections.sort(rhsSymbols); - - String lhs = null; - if (derivInfo.containsCrossReference) - lhs = deriv.cat; - else - lhs = deriv.isRootCat() ? "$ROOT" : hash(deriv); - - LinkedList rhsList = new LinkedList<>(); - int index = 1; - for (Symbol symbol : rhsSymbols) { - if (formula.equals(symbol.formula)) { - formula = "(IdentityFn)"; - } else { - formula = safeReplace(formula, symbol.formula, "(var s" + index + ")"); - formula = "(lambda s" + index + " " + formula + ")"; - } - rhsList.addFirst(symbol.category); - index += 1; - } - String rhs = null; - if (rhsList.size() > 0) { - rhs = "(" + String.join(" ", rhsList) + ")"; - } else { - rhs = "(nothing)"; - formula = "(ConstantFn " + formula + ")"; - } - return "(rule " + lhs + " " + rhs + " " + formula + ")"; - } - - private String hash(Derivation deriv) { - if (baseCategories.contains(deriv.cat)) - return deriv.cat; - - String formula = getSymbolicFormula(deriv); - if (!symbolicFormulas.containsKey(formula)) { - symbolicFormulas.put(formula, symbolicFormulas.size() + 1); - String hashString = "$Formula" + symbolicFormulas.get(formula); - LogInfo.log("Add symbolic formula: " + hashString + " = " + formula + " (" + deriv.cat + ")"); - } - return "$Formula" + symbolicFormulas.get(formula); - } - - private static String getSymbolicFormula(Derivation deriv) { - CPruneDerivInfo derivInfo = aggregateSymbols(deriv); - String formula = deriv.formula.toString(); - for (Symbol symbol : derivInfo.treeSymbols.values()) { - if (formula.equals(symbol.formula)) - formula = symbol.category; - formula = safeReplace(formula, symbol.formula, symbol.category); - } - return formula; - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cprune/FormulaPattern.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cprune/FormulaPattern.java deleted file mode 100644 index 0c83d046c3..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cprune/FormulaPattern.java +++ /dev/null @@ -1,106 +0,0 @@ -package edu.stanford.nlp.sempre.cprune; - -import java.util.regex.Pattern; - -import edu.stanford.nlp.sempre.Derivation; -import fig.basic.LogInfo; - -public class FormulaPattern implements Comparable { - public String pattern; - public Integer frequency; - public Double score; - - public FormulaPattern(String pattern, Integer frequency) { - this.pattern = pattern; - this.frequency = frequency; - } - - public Double complexity() { - // Roughly the number of predicates - return (double) (pattern.length() - pattern.replace("(@R", "***").replace("(", "").length()); - } - - @Override - public String toString() { - return "(" + pattern + ", " + frequency + ")"; - } - - @Override - public int compareTo(FormulaPattern that) { - if (this.frequency > that.frequency) { - return -1; - } else if (this.frequency < that.frequency) { - return 1; - } else { - return this.complexity().compareTo(that.complexity()); - } - } - - // ============================================================ - // Utilities - // ============================================================ - - private static Pattern reverseRelation = Pattern.compile("!(fb:[._a-z0-9]+)"); - private static Pattern varName = Pattern.compile("\\((lambda|var) [a-z0-9]+"); - private static Pattern compare = Pattern.compile("(<=|>=|>|<)"); - private static Pattern whitespace = Pattern.compile("\\s+"); - - public static String convertToIndexedPattern(Derivation deriv) { - String formula = deriv.formula.toString(); - - // These can interfere with (number 1) - formula = formula.replace("argmax (number 1) (number 1)", "argmax"); - formula = formula.replace("argmin (number 1) (number 1)", "argmin"); - - formula = removePropertyPredicates(formula); - formula = CustomGrammar.getIndexedSymbolicFormula(deriv, formula); - - formula = formula.replace("fb:type.object.type fb:type.row", "@type @row"); - formula = reverseRelation.matcher(formula).replaceAll("(reverse $1)"); - formula = formula.replace("fb:row.row.index", "(reverse (lambda x ((reverse @index) (var x))))"); - formula = formula.replace("fb:row.row.next", "@next"); - formula = varName.matcher(formula).replaceAll("($1 x"); - formula = formula.replace("reverse", "@R"); - formula = compare.matcher(formula).replaceAll("@compare"); - formula = whitespace.matcher(formula).replaceAll(" "); - - if (CollaborativePruner.opts.verbose >= 2) - LogInfo.logs("PATTERN: %s -> %s", deriv.formula, formula); - return formula; - } - - private static Pattern cellProperty = Pattern.compile("!?fb:cell\\.cell\\.[_a-z0-9]+|\\(reverse fb:cell\\.cell\\.[_a-z0-9]+\\)"); - - /** - * Remove cell property relations (fb:cell.cell.*) - */ - public static String removePropertyPredicates(String formula) { - formula = cellProperty.matcher(formula).replaceAll("@PPT"); - while (formula.contains("@PPT")) { - int begin = formula.indexOf("(@PPT"); - if (begin == -1) { - formula = formula.replace("@PPT", ""); - break; - } - // Find the matching parenthesis - int count = 1; - for (int i = begin + 1; i < formula.length(); i++) { - if (formula.charAt(i) == '(') { - count++; - } else if (formula.charAt(i) == ')') { - count--; - if (count == 0) { - int end = i; - formula = formula.substring(0, begin) + formula.substring(begin + 6, end) + formula.substring(end + 1, formula.length()); - break; - } - } - if (i == formula.length() - 1) { - LogInfo.fails("Unbalanced parentheses: %s", formula); - } - } - } - return formula; - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cprune/Symbol.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cprune/Symbol.java deleted file mode 100644 index 75d8537af5..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/cprune/Symbol.java +++ /dev/null @@ -1,31 +0,0 @@ -package edu.stanford.nlp.sempre.cprune; - -/** - * Represents the leaf node of the parse tree. - * - * Any sub-derivation whose category is in CustomGrammar.baseCategories becomes a Symbol. - */ -public class Symbol implements Comparable { - String category; - String formula; - Integer frequency; - Integer index; - - public Symbol(String category, String formula, int frequency) { - this.category = category; - this.formula = formula; - this.frequency = frequency; - } - - public void computeIndex(String referenceString) { - index = referenceString.indexOf(formula); - if (index < 0) { - index = Integer.MAX_VALUE; - } - } - - @Override - public int compareTo(Symbol that) { - return index.compareTo(that.index); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/BinaryLexicon.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/BinaryLexicon.java deleted file mode 100644 index c31d6c5534..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/BinaryLexicon.java +++ /dev/null @@ -1,282 +0,0 @@ -package edu.stanford.nlp.sempre.freebase; - -import com.google.common.base.Strings; -import edu.stanford.nlp.io.IOUtils; -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.freebase.FbFormulasInfo.BinaryFormulaInfo; -import edu.stanford.nlp.sempre.freebase.lexicons.EntrySource; -import edu.stanford.nlp.sempre.freebase.lexicons.LexicalEntry.BinaryLexicalEntry; -import edu.stanford.nlp.sempre.freebase.lexicons.LexicalEntry.LexiconValue; -import edu.stanford.nlp.sempre.freebase.lexicons.normalizers.EntryNormalizer; -import edu.stanford.nlp.sempre.freebase.lexicons.normalizers.PrepDropNormalizer; -import fig.basic.*; -import fig.exec.Execution; - -import java.io.IOException; -import java.io.PrintWriter; -import java.util.*; - -/** - * Lexicon for binary predicates, "born" --> fb:people.person.place_of_birth - * @author jonathanberant - */ -public final class BinaryLexicon { - - public static class Options { - @Option(gloss = "Number of results return by the lexicon") - public int maxEntries = 1000; - @Option(gloss = "Path to binary lexicon files") - public String binaryLexiconFilesPath = "lib/fb_data/7/binaryInfoStringAndAlignment.txt"; - @Option(gloss = "Verbosity") public int verbose = 0; - @Option(gloss = "Alignment score to sort by") - public String keyToSortBy = INTERSECTION; - } - - private static BinaryLexicon binaryLexicon; - public static BinaryLexicon getInstance() { - if (binaryLexicon == null) - try { - binaryLexicon = new BinaryLexicon(); - } catch (IOException e) { - throw new RuntimeException(e); - } - return binaryLexicon; - } - - public static Options opts = new Options(); - - private EntryNormalizer lexiconLoadingNormalizer; - private FbFormulasInfo fbFormulasInfo; - - public static final String INTERSECTION = "Intersection_size_typed"; - - Map> lexemeToEntryList = new HashMap<>(); - - private BinaryLexicon() throws IOException { - if (Strings.isNullOrEmpty(opts.binaryLexiconFilesPath)) - throw new RuntimeException("Missing unary lexicon file"); - fbFormulasInfo = FbFormulasInfo.getSingleton(); - // if we omit prepositions then the lexicon normalizer does that, otherwise, it is a normalizer that does nothing - lexiconLoadingNormalizer = new PrepDropNormalizer(); // the alignment lexicon already contains stemmed stuff so just need to drop prepositions - read(opts.binaryLexiconFilesPath); - } - - private void read(String lexiconFile) throws IOException { - - LogInfo.begin_track_printAll("Loading binary lexicon file " + lexiconFile); - for (String line : IOUtils.readLines(lexiconFile)) { - LexiconValue lv = Json.readValueHard(line, LexiconValue.class); - String lexemeKey = lv.lexeme; - String normalizedLexemeKey = lexiconLoadingNormalizer.normalize(lexemeKey); - // add lexeme and normalized lexeme - addEntryToMap(lexemeKey, lv); - if (!lexemeKey.equals(normalizedLexemeKey)) { - addEntryToMap(normalizedLexemeKey, lv); - } - } - sortLexiconEntries(); - LogInfo.log("Number of entries: " + lexemeToEntryList.size()); - LogInfo.end_track(); - } - - public void addEntryToMap(String lexemeKey, LexiconValue lv) { - List bEntries = buildEntry(lv, lexemeKey); - for (BinaryLexicalEntry bEntry : bEntries) - MapUtils.addToList(lexemeToEntryList, lexemeKey, bEntry); - } - - private void sortLexiconEntries() { - for (List entries: lexemeToEntryList.values()) { - Collections.sort(entries, new BinaryLexEntryByCounterComparator()); - } - } - - public List buildEntry(LexiconValue lexValue, String lexemeKey) { - - EntrySource source = EntrySource.parseSourceDesc(lexValue.source); - BinaryFormulaInfo info = fbFormulasInfo.getBinaryInfo(lexValue.formula); - - if (!validBinaryFormula(lexValue.formula)) - return Collections.emptyList(); - - if (info == null) { - if (opts.verbose >= 3) - LogInfo.log("BinaryLexicon: skipping entry since there is no info for formula: " + lexValue.formula.toString()); - return Collections.emptyList(); - } - // get alignment features - Map alignmentScores = new TreeMap<>(lexValue.features); - - if (fbFormulasInfo.isCvtFormula(info) && source == EntrySource.STRING_MATCH) { - - List entries = new ArrayList<>(); - for (BinaryFormulaInfo cvtInfo : fbFormulasInfo.getCvtExpansions(info)) { - entries.add( - new BinaryLexicalEntry( - lexemeKey, lexemeKey, new HashSet<>(cvtInfo.descriptions), cvtInfo.formula, source, - cvtInfo.popularity, cvtInfo.expectedType1, cvtInfo.expectedType2, cvtInfo.unitId, cvtInfo.unitDesc, alignmentScores, lexValue.lexeme)); - } - return entries; - } else { - BinaryLexicalEntry entry = new BinaryLexicalEntry( - lexemeKey, lexemeKey, new HashSet<>(info.descriptions), lexValue.formula, source, - info.popularity, info.expectedType1, info.expectedType2, info.unitId, info.unitDesc, alignmentScores, lexValue.lexeme); - return Collections.singletonList(entry); - } - } - - public List lookupEntries(String textDesc) throws IOException { - List entries = lexemeToEntryList.get(textDesc.toLowerCase()); - if (entries != null) { - List res = new ArrayList<>(); - for (int i = 0; i < Math.min(entries.size(), opts.maxEntries); ++i) { - res.add(entries.get(i)); - } - return res; - } - return Collections.emptyList(); - } - - /** If the property has a reverse, keep it if it reversed*/ - public boolean validBinaryFormula(Formula formula) { - if (fbFormulasInfo.hasOpposite(formula)) { - boolean valid = fbFormulasInfo.isReversed(formula); - if (opts.verbose >= 3) { - if (!valid) - LogInfo.logs("BinaryLexicon: invalid formula: %s", formula); - else - LogInfo.logs("BinaryLexicon: valid formula: %s", formula); - } - return valid; - } - return true; - } - - public void updateLexicon(Pair lexemeFormulaPair, int support) { - StopWatchSet.begin("BinaryLexicon.updateLexicon"); - if (opts.verbose > 0) - LogInfo.logs("Pair=%s, score=%s", lexemeFormulaPair, support); - boolean exists = false; - String lexeme = lexemeFormulaPair.getFirst(); - Formula formula = lexemeFormulaPair.getSecond(); - - List bEntries = MapUtils.get(lexemeToEntryList, lexeme, Collections.emptyList()); - for (BinaryLexicalEntry bEntry : bEntries) { - if (bEntry.formula.equals(formula)) { - bEntry.alignmentScores.put("Feedback", (double) support); - if (opts.verbose > 0) - LogInfo.logs("Entry exists: %s", bEntry); - exists = true; - break; - } - } - if (!exists) { - BinaryFormulaInfo bInfo = fbFormulasInfo.getBinaryInfo(formula); - if (bInfo == null) { - LogInfo.warnings("BinaryLexicon.updateLexicon: no binary info for %s", formula); - return; - } - BinaryLexicalEntry newEntry = - new BinaryLexicalEntry( - lexeme, lexeme, new HashSet<>(bInfo.descriptions), bInfo.formula, EntrySource.FEEDBACK, - bInfo.popularity, bInfo.expectedType1, bInfo.expectedType2, bInfo.unitId, bInfo.unitDesc, new HashMap<>(), lexeme); - MapUtils.addToList(lexemeToEntryList, lexeme, newEntry); - newEntry.alignmentScores.put("Feedback", (double) support); - LogInfo.logs("Adding new binary entry=%s", newEntry); - - } - StopWatchSet.end(); - } - - public void sortLexiconByFeedback(Params params) { - StopWatchSet.begin("BinaryLexicon.sortLexiconByFeedback"); - LogInfo.log("Number of entries: " + lexemeToEntryList.size()); - BinaryLexEntrybyFeaturesComparator comparator = - new BinaryLexEntrybyFeaturesComparator(params); - for (String lexeme : lexemeToEntryList.keySet()) { - Collections.sort(lexemeToEntryList.get(lexeme), comparator); - if (opts.verbose > 1) { - LogInfo.logs("Sorted list for lexeme=%s", lexeme); - for (BinaryLexicalEntry bEntry : lexemeToEntryList.get(lexeme)) { - FeatureVector fv = new FeatureVector(); - LexiconFn.getBinaryEntryFeatures(bEntry, fv); - LogInfo.logs("Entry=%s, dotprod=%s", bEntry, fv.dotProduct(comparator.params)); - } - } - } - try { - // Output the lexicon to the execution directory. - String path = Execution.getFile("lexicon"); - if (path != null) { - PrintWriter writer = fig.basic.IOUtils.openOut(path); - for (String lexeme : lexemeToEntryList.keySet()) { - writer.println(lexeme + "\t" + lexemeToEntryList.get(lexeme)); - } - writer.flush(); - } - } catch (IOException e) { - throw new RuntimeException(e); - } - - - StopWatchSet.end(); - } - - public class BinaryLexEntrybyFeaturesComparator implements Comparator { - - public final Params params; - public BinaryLexEntrybyFeaturesComparator(Params params) { - this.params = params; - } - @Override - public int compare(BinaryLexicalEntry entry1, BinaryLexicalEntry entry2) { - - FeatureVector features1 = new FeatureVector(); - FeatureVector features2 = new FeatureVector(); - LexiconFn.getBinaryEntryFeatures(entry1, features1); - LexiconFn.getBinaryEntryFeatures(entry2, features2); - double score1 = features1.dotProduct(params); - double score2 = features2.dotProduct(params); - if (score1 > score2) return -1; - if (score1 < score2) return +1; - // back off to usual thing - double entry1Score = MapUtils.getDouble(entry1.alignmentScores, opts.keyToSortBy, 0.0); - double entry2Score = MapUtils.getDouble(entry2.alignmentScores, opts.keyToSortBy, 0.0); - - if (entry1Score > entry2Score) - return -1; - if (entry1Score < entry2Score) - return +1; - if (entry1.popularity > entry2.popularity) - return -1; - if (entry1.popularity < entry2.popularity) - return +1; - return 0; - } - } - - public class BinaryLexEntryByCounterComparator implements Comparator { - - @Override - public int compare(BinaryLexicalEntry entry1, BinaryLexicalEntry entry2) { - double entry1Score = MapUtils.getDouble(entry1.alignmentScores, opts.keyToSortBy, 0.0); - double entry2Score = MapUtils.getDouble(entry2.alignmentScores, opts.keyToSortBy, 0.0); - - if (entry1Score > entry2Score) - return -1; - if (entry1Score < entry2Score) - return +1; - if (entry1.popularity > entry2.popularity) - return -1; - if (entry1.popularity < entry2.popularity) - return +1; - // to do - this is to break ties - make more efficient - int stringComparison = entry1.formula.toString().compareTo(entry2.formula.toString()); - if (stringComparison < 0) - return -1; - if (stringComparison > 0) - return +1; - return 0; - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/BridgeFn.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/BridgeFn.java deleted file mode 100644 index e05a48d1be..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/BridgeFn.java +++ /dev/null @@ -1,548 +0,0 @@ -package edu.stanford.nlp.sempre.freebase; - -import edu.stanford.nlp.sempre.freebase.FbFormulasInfo.BinaryFormulaInfo; -import edu.stanford.nlp.sempre.MergeFormula.Mode; -import fig.basic.LispTree; -import fig.basic.LogInfo; -import fig.basic.Option; -import edu.stanford.nlp.sempre.LanguageInfo.DependencyEdge; -import edu.stanford.nlp.sempre.*; - -import java.io.IOException; -import java.util.*; - -/** - * Bridge between two derivations by type-raising one of them. - * @author jonathanberant - */ -public class BridgeFn extends SemanticFn { - - private static final Formula intFormula = Formulas.fromLispTree(LispTree.proto.parseFromString("(fb:type.object.type fb:type.int)")); - private static final Formula floatFormula = Formulas.fromLispTree(LispTree.proto.parseFromString("(fb:type.object.type fb:type.float)")); - - public static class Options { - @Option (gloss = "Verbose") public int verbose = 0; - @Option (gloss = "Whether to have binary predicate features (ovrefits on small data)") - public boolean useBinaryPredicateFeatures = true; - @Option (gloss = "Whether to filter bad domains such as user and common") - public boolean filterBadDomain = true; - } - - public static Options opts = new Options(); - - private FbFormulasInfo fbFormulaInfo = null; - private String description; - private boolean headFirst; - private TextToTextMatcher textToTextMatcher; - - public void init(LispTree tree) { - super.init(tree); - if (tree.children.size() != 3) - throw new RuntimeException("Number of children is: " + tree.children.size()); - if (!tree.child(2).value.equals("headFirst") && !tree.child(2).value.equals("headLast")) - throw new RuntimeException("Bad argument for head position: " + tree.child(2).value); - if (!tree.child(1).value.equals("unary") && !tree.child(1).value.equals("inject") && !tree.child(1).value.equals("entity")) - throw new RuntimeException("Bad description: " + tree.child(1).value); - - this.description = tree.child(1).value; - headFirst = tree.child(2).value.equals("headFirst"); - } - - public BridgeFn() { - fbFormulaInfo = FbFormulasInfo.getSingleton(); - textToTextMatcher = TextToTextMatcher.getSingleton(); - } - - @Override - public DerivationStream call(Example ex, Callable c) { - try { - switch (description) { - case "unary": - return bridgeUnary(ex, c); - case "inject": - return injectIntoCvt(ex, c); - case "entity": - return bridgeEntity(ex, c); - default: - throw new RuntimeException("Invalid (expected unary, inject, or entity): " + description); - } - } catch (Exception e) { - e.printStackTrace(); - throw new RuntimeException(e); - } - } - - @Override - public void sortOnFeedback(Params params) { - LogInfo.begin_track("Learner.BridgeFeedback"); - FbFormulasInfo fbFormulasInfo = FbFormulasInfo.getSingleton(); - Comparator feedbackComparator = fbFormulasInfo.new FormulaByFeaturesComparator(params); - fbFormulasInfo.sortType2ToBinaryMaps(feedbackComparator); - LogInfo.end_track(); - } - - - private boolean isCvt(Derivation headDeriv) { - if (!(headDeriv.formula instanceof JoinFormula)) - return false; - JoinFormula join = (JoinFormula) headDeriv.formula; - return join.relation instanceof LambdaFormula || - join.child instanceof JoinFormula || join.child instanceof MergeFormula; - } - - // Return all the entity supertypes of |type|. - // TODO(joberant): make this more efficient. - private Set getSupertypes(SemType type, Set supertypes) { - if (type instanceof AtomicSemType) - supertypes.addAll(SemTypeHierarchy.singleton.getSupertypes(((AtomicSemType) type).name)); - else if (type instanceof UnionSemType) - for (SemType baseType : ((UnionSemType) type).baseTypes) - getSupertypes(baseType, supertypes); - else { - // TODO(joberant): FIXME HACK for when passing binary into lambda formula and - // getSuperTypes doesn't work - getSupertypes(SemType.fromString("topic"), supertypes); - // throw new RuntimeException("Unexpected type (must be unary): " + type); - } - return supertypes; - } - - private DerivationStream bridgeUnary(Example ex, Callable c) throws IOException { - - assert ex != null; - // Example (headFirst = false): modifier[Hanks] head[movies] - Derivation headDeriv = headFirst ? c.child(0) : c.child(1); - Derivation modifierDeriv = !headFirst ? c.child(0) : c.child(1); - - Set headTypes = getSupertypes(headDeriv.type, new HashSet<>()); - Set modifierTypes = getSupertypes(modifierDeriv.type, new HashSet<>()); - ArrayList bridgingInfoList = new ArrayList<>(); - - for (String modifierType : modifierTypes) { // For each head type... - List binaries = fbFormulaInfo.getBinariesForType2(modifierType); - for (Formula binary : binaries) { // For each possible binary... - if (opts.filterBadDomain && badDomain(binary)) - continue; - BinaryFormulaInfo binaryInfo = fbFormulaInfo.getBinaryInfo(binary); - - if (opts.verbose >= 3) - LogInfo.logs("%s => %s", modifierType, binary); - - if (headTypes.contains(binaryInfo.expectedType1)) { - BridgingInfo bridgingInfo = new BridgingInfo(ex, c, binaryInfo, headFirst, headDeriv, modifierDeriv); - bridgingInfoList.add(bridgingInfo); - } - } - } - Collections.sort(bridgingInfoList); - return new LazyBridgeFnDerivs(bridgingInfoList); - } - - // bridge without a unary - simply by looking at binaries leading to the entity and string matching binary description to example tokens/lemmas/stems - private DerivationStream bridgeEntity(Example ex, Callable c) throws IOException { - - assert ex != null; - Derivation modifierDeriv = c.child(0); - Set modifierTypes = getSupertypes(modifierDeriv.type, new HashSet<>()); - ArrayList bridgingInfoList = new ArrayList<>(); - - if (opts.verbose >= 1) - LogInfo.logs("bridgeEntity: %s | %s", modifierDeriv, modifierTypes); - - for (String modifierType : modifierTypes) { // For each head type... - List binaries = fbFormulaInfo.getBinariesForType2(modifierType); - for (Formula binary : binaries) { // For each possible binary... - if (opts.filterBadDomain && badDomain(binary)) - continue; - BinaryFormulaInfo binaryInfo = fbFormulaInfo.getBinaryInfo(binary); - - if (opts.verbose >= 3) - LogInfo.logs("%s => %s", modifierType, binary); - - BridgingInfo bridgingInfo = new BridgingInfo(ex, c, binaryInfo, headFirst, null, modifierDeriv); - bridgingInfoList.add(bridgingInfo); - } - } - Collections.sort(bridgingInfoList); - return new LazyBridgeFnDerivs(bridgingInfoList); - } - - private boolean badDomain(String binary) { - return binary.contains("fb:user.") || binary.contains("fb:base.") || binary.contains("fb:dataworld.") || - binary.contains("fb:type.") || binary.contains("fb:common.") || binary.contains("fb:freebase."); - } - - private boolean badDomain(Formula formula) { - if (formula instanceof VariableFormula) return false; - if (formula instanceof ValueFormula) { - return badDomain(formula.toString()); - } - if (formula instanceof JoinFormula) { - JoinFormula jFormula = (JoinFormula) formula; - return badDomain(jFormula.relation) || badDomain(jFormula.child); - } - if (formula instanceof LambdaFormula) { - LambdaFormula lambdaFormula = (LambdaFormula) formula; - return badDomain(lambdaFormula.body); - } - if (formula instanceof ReverseFormula) { - ReverseFormula reverseFormula = (ReverseFormula) formula; - return badDomain(reverseFormula.child); - } - if (formula instanceof NotFormula) { - NotFormula notFormula = (NotFormula) formula; - return badDomain(notFormula.child); - } - throw new RuntimeException("Binary has formula type that is not supported"); - } - - // generate from example array of content word tokens/lemmas/stems that are not dominated by child derivations - private List> generateExampleInfo(Example ex, Callable c) { - - List tokens = new ArrayList<>(); - List posTags = new ArrayList<>(); - List lemmas = new ArrayList<>(); - List> res = new ArrayList<>(); - res.add(tokens); - res.add(posTags); - res.add(lemmas); - - Derivation modifierDeriv = headFirst ? c.child(1) : c.child(0); - - for (int i = 0; i < ex.languageInfo.tokens.size(); ++i) { - if (i >= modifierDeriv.start && i < modifierDeriv.end) { // do not consider the modifier words { - continue; - } - tokens.add(ex.languageInfo.tokens.get(i)); - posTags.add(ex.languageInfo.posTags.get(i)); - lemmas.add(ex.languageInfo.lemmaTokens.get(i)); - } - return res; - } - - private DerivationStream injectIntoCvt(Example ex, Callable c) { - assert ex != null; - - if (opts.verbose >= 2) - LogInfo.logs("child1=%s, child2=%s", ex.phrase(c.child(0).start, c.child(0).end), ex.phrase(c.child(1).start, c.child(1).end)); - - // Example: modifier[Braveheart] head[Mel Gibson plays in] - Derivation headDeriv = headFirst ? c.child(0) : c.child(1); - if (!isCvt(headDeriv)) // only works on cvts - return new LazyBridgeFnDerivs(new ArrayList<>()); - Derivation modifierDeriv = !headFirst ? c.child(0) : c.child(1); - JoinFormula headFormula = (JoinFormula) Formulas.betaReduction(headDeriv.formula); - // find the type of the cvt node - Set headTypes = Collections.singleton(fbFormulaInfo.getBinaryInfo(headFormula.relation).expectedType2); - Set modifierTypes = getSupertypes(modifierDeriv.type, new HashSet<>()); - ArrayList bridgingInfoList = new ArrayList<>(); - - for (String modifierType : modifierTypes) { - List binaries = fbFormulaInfo.getAtomicBinariesForType2(modifierType); // here we use atomic binaries since we inject into a CVT - for (Formula binary : binaries) { // For each possible binary... - if (opts.filterBadDomain && badDomain(binary)) - continue; - BinaryFormulaInfo info = fbFormulaInfo.getBinaryInfo(binary); - - if (headTypes.contains(info.expectedType1)) { - BridgingInfo bridgingInfo = new BridgingInfo(ex, c, info, headFirst, headDeriv, modifierDeriv); - bridgingInfoList.add(bridgingInfo); - } - } - } - Collections.sort(bridgingInfoList); - return new LazyBridgeFnDerivs(bridgingInfoList); - } - - // Checks whether "var" is used as a binary in "formula" - private boolean varIsBinary(Formula formula, String var) { - boolean isBinary = false; - LispTree tree = formula.toLispTree(); - VariableFormula vf = new VariableFormula(var); - for (LispTree child : tree.children) { - if (child.isLeaf()) - continue; - if (child.children.size() == 2 && vf.equals(Formulas.fromLispTree(child.child(0)))) { - isBinary = true; - break; - } - if (varIsBinary(Formulas.fromLispTree(child), var)) { - isBinary = true; - break; - } - } - return isBinary; - } - - private Formula buildBridge(Formula headFormula, Formula modifierFormula, Formula binary) { - // Handle cases like "what state has the most cities" where "has the" is mapped - // to "contains" predicate via bridging but "most" triggers a nested lambda w/ - // argmax on a count relation - // (Corresponds to $MetaMetaOperator in grammar) - if (modifierFormula instanceof LambdaFormula) { - LambdaFormula lf = (LambdaFormula) modifierFormula; - if (varIsBinary(lf, lf.var)) { - Formula newBinary = Formulas.lambdaApply(lf, binary); - if (newBinary instanceof LambdaFormula) { - return Formulas.lambdaApply((LambdaFormula) newBinary, headFormula); - } - } - } - - Formula join = new JoinFormula(binary, modifierFormula); - Formula merge = new MergeFormula(Mode.and, headFormula, join); - // Don't merge on ints and floats - return (headFormula.equals(intFormula) || headFormula.equals(floatFormula)) ? join : merge; - } - - private Formula buildBridgeFromCvt(JoinFormula headFormula, Formula modifierFormula, Formula binary) { - Formula join = new JoinFormula(binary, modifierFormula); - Formula merge = new MergeFormula(Mode.and, headFormula.child, join); - return new JoinFormula(headFormula.relation, merge); - } - - public static FeatureVector getBinaryBridgeFeatures(BinaryFormulaInfo bInfo) { - FeatureVector features = new FeatureVector(); - if (opts.useBinaryPredicateFeatures) - features.add("BridgeFn", "binary=" + bInfo.formula); - features.add("BridgeFn", "domain=" + bInfo.extractDomain(bInfo.formula)); - features.addWithBias("BridgeFn", "popularity", Math.log(bInfo.popularity + 1)); - return features; - } - - /** - * Lazy iterator for bridging - we always have the next derivation ready since it is possible that - * items in the list do not produce a derivation - */ - public class LazyBridgeFnDerivs extends MultipleDerivationStream { - - private ArrayList bridgingInfoList; - private int currIndex = 0; - - public LazyBridgeFnDerivs(ArrayList bridgingInfoList) { - this.bridgingInfoList = bridgingInfoList; - } - - @Override - public int estimatedSize() { - return bridgingInfoList.size() - currIndex; - } - - @Override - public Derivation createDerivation() { - if (currIndex == bridgingInfoList.size()) - return null; - Derivation res; - switch (description) { - case "unary": - res = nextUnary(); - break; - case "inject": - res = nextInject(); - break; - case "entity": - res = nextEntity(); - break; - default: - throw new RuntimeException("Bad description " + description); - } - if (opts.verbose >= 2) - LogInfo.logs("mode=%s,deriv=%s", description, res); - return res; - } - // not every BridgingInfo produces a derivation so we iterate until we find one - private Derivation nextEntity() { - - if (opts.verbose >= 3) - LogInfo.begin_track("Compute next entity"); - BridgingInfo currBridgingInfo = bridgingInfoList.get(currIndex++); - if (opts.verbose >= 2) - LogInfo.logs("BridgeFn.nextEntity: binary=%s, popularity=%s", currBridgingInfo.bInfo.formula, - currBridgingInfo.bInfo.popularity); - List> exampleInfo = generateExampleInfo(currBridgingInfo.ex, currBridgingInfo.c); // this is not done in text to text matcher so done here - Formula join = new JoinFormula(currBridgingInfo.bInfo.formula, currBridgingInfo.modifierDeriv.formula); - - Derivation res = new Derivation.Builder() - .withCallable(currBridgingInfo.c) - .formula(join) - .type(SemType.newAtomicSemType(currBridgingInfo.bInfo.expectedType1)) - .createDerivation(); - - if (SemanticFn.opts.trackLocalChoices) { - res.addLocalChoice( - String.format( - "BridgeFn: entity %s --> %s %s", - currBridgingInfo.bInfo.formula, - currBridgingInfo.modifierDeriv.startEndString(currBridgingInfo.ex.getTokens()), currBridgingInfo.modifierDeriv.formula)); - } - - if (opts.verbose >= 2) - LogInfo.logs("BridgeStringFn: %s", join); - - // features - res.addFeature("BridgeFn", "entity"); - res.addFeatures(getBinaryBridgeFeatures(currBridgingInfo.bInfo)); - - // Adds dependencies for every bridging relation/entity - if (FeatureExtractor.containsDomain("dependencyBridge")) { - addBridgeDependency(res, currBridgingInfo, "entity"); - } - - FeatureVector textMatchFeatures = textToTextMatcher.extractFeatures( - exampleInfo.get(0), exampleInfo.get(1), exampleInfo.get(2), - new HashSet<>(currBridgingInfo.bInfo.descriptions)); - res.addFeatures(textMatchFeatures); - - - if (opts.verbose >= 3) - LogInfo.end_track(); - return res; - } - - /* Adds dependencies for every bridging relation/entity. */ - private void addBridgeDependency(Derivation res, BridgingInfo currBridgingInfo, String type) { - List> deps = currBridgingInfo.ex.languageInfo.dependencyChildren; - Derivation entityDeriv = currBridgingInfo.modifierDeriv; - for (int currWord = 0; currWord < deps.size(); currWord++) { - if (entityDeriv.containsIndex(currWord)) { - continue; - } - for (DependencyEdge e : deps.get(currWord)) { - if (entityDeriv.containsIndex(e.modifier)) { - res.addFeature("dependencyBridge", "type=" + type + - "," + e.label + " -- " + - "relation=" + currBridgingInfo.bInfo.formula); - } - } - } - } - - private Derivation nextInject() { - BridgingInfo currBridgingInfo = bridgingInfoList.get(currIndex++); - if (opts.verbose >= 2) - LogInfo.logs("BridgingFn.nextInject: binary=%s, popularity=%s", currBridgingInfo.bInfo.formula, - currBridgingInfo.bInfo.popularity); - JoinFormula headFormula = (JoinFormula) Formulas.betaReduction(currBridgingInfo.headDeriv.formula); - - Formula bridgedFormula = buildBridgeFromCvt(headFormula, currBridgingInfo.modifierDeriv.formula, - currBridgingInfo.bInfo.formula); - Derivation res = new Derivation.Builder() - .withCallable(currBridgingInfo.c) - .formula(bridgedFormula) - .type(currBridgingInfo.headDeriv.type) - .createDerivation(); - if (SemanticFn.opts.trackLocalChoices) { - res.addLocalChoice( - String.format( - "BridgeFn: %s %s --> %s %s --> %s %s", - currBridgingInfo.headDeriv.startEndString(currBridgingInfo.ex.getTokens()), - currBridgingInfo.headDeriv.formula, - currBridgingInfo.ex.getTokens().subList(currBridgingInfo.c.child(0).end, currBridgingInfo.c.child(1).start), - currBridgingInfo.bInfo.formula, - currBridgingInfo.modifierDeriv.startEndString(currBridgingInfo.ex.getTokens()), currBridgingInfo.modifierDeriv.formula)); - } - - if (opts.verbose >= 3) - LogInfo.logs("BridgeFn: injecting %s to %s --> %s ", currBridgingInfo.modifierDeriv.formula, headFormula, bridgedFormula); - - String headModifierOrder = headFirst ? "head-modifier" : "modifier-head"; - res.addFeature("BridgeFn", - "inject_order=" + headModifierOrder + "," + "pos=" + - currBridgingInfo.ex.languageInfo.getCanonicalPos(currBridgingInfo.headDeriv.start) + "-" + - currBridgingInfo.ex.languageInfo.getCanonicalPos(currBridgingInfo.modifierDeriv.start) - ); - - - res.addFeature("BridgeFn", "binary=" + currBridgingInfo.bInfo.formula); - res.addFeature("BridgeFn", "domain=" + currBridgingInfo.bInfo.extractDomain(currBridgingInfo.bInfo.formula)); - res.addFeatureWithBias("BridgeFn", "popularity", Math.log(currBridgingInfo.bInfo.popularity + 1)); - /* Adds dependencies for every bridging relation/entity */ - if (FeatureExtractor.containsDomain("dependencyBridge")) { - addBridgeDependency(res, currBridgingInfo, "inject"); - } - return res; - } - - private Derivation nextUnary() { - BridgingInfo currBridgingInfo = bridgingInfoList.get(currIndex++); - - if (opts.verbose >= 2) - LogInfo.logs("BridgingFn.nextUnary: binary=%s, popularity=%s", currBridgingInfo.bInfo.formula, - currBridgingInfo.bInfo.popularity); - - Formula bridgedFormula = buildBridge(currBridgingInfo.headDeriv.formula, - currBridgingInfo.modifierDeriv.formula, currBridgingInfo.bInfo.formula); - - Derivation res = new Derivation.Builder() - .withCallable(currBridgingInfo.c) - .formula(bridgedFormula) - .type(currBridgingInfo.headDeriv.type) - .createDerivation(); - - if (SemanticFn.opts.trackLocalChoices) { - res.addLocalChoice( - String.format( - "BridgeFn: %s %s --> %s %s --> %s %s", - currBridgingInfo.headDeriv.startEndString(currBridgingInfo.ex.getTokens()), - currBridgingInfo.headDeriv.formula, - currBridgingInfo.ex.getTokens().subList(currBridgingInfo.c.child(0).end, currBridgingInfo.c.child(1).start), - currBridgingInfo.bInfo.formula, - currBridgingInfo.modifierDeriv.startEndString(currBridgingInfo.ex.getTokens()), currBridgingInfo.modifierDeriv.formula)); - } - - // Add features - res.addFeature("BridgeFn", "unary"); - res.addFeatures(getBinaryBridgeFeatures(currBridgingInfo.bInfo)); - - // head modifier POS tags - String headModifierOrder = headFirst ? "head-modifier" : "modifier-head"; - res.addFeature("BridgeFn", - "order=" + headModifierOrder + "," + - "pos=" + - currBridgingInfo.ex.languageInfo.getCanonicalPos(currBridgingInfo.headDeriv.start) + "-" + - currBridgingInfo.ex.languageInfo.getCanonicalPos(currBridgingInfo.modifierDeriv.start) - ); - - List> exampleInfo = generateExampleInfo(currBridgingInfo.ex, currBridgingInfo.c); // this is not done in text to text matcher so done here - - FeatureVector vector = textToTextMatcher.extractFeatures( - exampleInfo.get(0), exampleInfo.get(1), exampleInfo.get(2), - new HashSet<>(currBridgingInfo.bInfo.descriptions)); - - res.addFeatures(vector); - /* Adds dependencies for every bridging relation/entity */ - if (FeatureExtractor.containsDomain("dependencyBridge")) { - addBridgeDependency(res, currBridgingInfo, "unary"); - } - return res; - } - - @Override - public void remove() { - throw new RuntimeException("Does not support remove"); - } - } - - public class BridgingInfo implements Comparable { - public final Example ex; - public final Callable c; - public final BinaryFormulaInfo bInfo; - public final boolean headFirst; - public final Derivation headDeriv; - public final Derivation modifierDeriv; - - public BridgingInfo(Example ex, Callable c, BinaryFormulaInfo bInfo, boolean headFirst, Derivation headDeriv, Derivation modifierDeriv) { - this.ex = ex; - this.c = c; - this.bInfo = bInfo; - this.headFirst = headFirst; - this.headDeriv = headDeriv; - this.modifierDeriv = modifierDeriv; - } - - @Override - public int compareTo(BridgingInfo o) { - return fbFormulaInfo.compare(this.bInfo.formula, o.bInfo.formula); - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/BuildCanonicalIdMap.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/BuildCanonicalIdMap.java deleted file mode 100644 index 5c14f93336..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/BuildCanonicalIdMap.java +++ /dev/null @@ -1,146 +0,0 @@ -package edu.stanford.nlp.sempre.freebase; - -import fig.basic.IOUtils; -import fig.basic.LogInfo; -import fig.basic.Option; -import fig.exec.Execution; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.PrintWriter; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - -/** - * Motivation: the Freebase dump uses a mix of mids and ids. The mapping - * computed by this program will be used to standardize everything to a nice - * looking id or a mid. - *

- * Input: raw RDF Freebase dump (ttl format) downloaded from the Freebase - * website. - *

- * Output: tab-separated file containing mapping from mid/id to canonical id. - * fb:m.02mjmr fb:en.barack_obama fb:m.07t65 fb:en.united_nations - * fb:en.united_staff fb:en.united_nations ... To save space, only output mid/id - * which are mids or acceptable ids. - *

- * Acceptable ids include: - Something that starts with fb:en. - Something that - * occurs in the arg1 position (for types and properties, excluding fb:m.* and - * fb:g.*). If there are multiple ids, just take the first one in the file. If - * there are no acceptable ids, then just take the mid. - * - * @author Percy Liang - */ -public class BuildCanonicalIdMap implements Runnable { - @Option public int maxInputLines = Integer.MAX_VALUE; - @Option(required = true) public String rawPath; - @Option(required = true) public String canonicalIdMapPath; - - Set allowableIds = new HashSet(); - - PrintWriter out; - int numMids = 0; - - void computeAllowableIds() { - // Compute allowable ids - LogInfo.begin_track("Compute allowable ids"); - try { - BufferedReader in = IOUtils.openIn(rawPath); - String line; - int numInputLines = 0; - while (numInputLines < maxInputLines && (line = in.readLine()) != null) { - numInputLines++; - if (numInputLines % 10000000 == 0) - LogInfo.logs("Read %s lines, %d allowable ids", numInputLines, allowableIds.size()); - String[] tokens = Utils.parseTriple(line); - if (tokens == null) continue; - String arg1 = tokens[0]; - if (!arg1.startsWith("fb:g.") && !arg1.startsWith("fb:m.")) - allowableIds.add(arg1); - } - LogInfo.logs("%d allowable ids", allowableIds.size()); - in.close(); - } catch (IOException e) { - throw new RuntimeException(e); - } - LogInfo.end_track(); - } - - void flush(String mid, List ids) { - if (ids.size() == 0) return; - numMids++; - - // Find the best id for this entity and put it first to use as the canonical id. - String bestId = mid; - for (String id : ids) { - if (id.startsWith("fb:en.") || allowableIds.contains(id)) { - bestId = id; - break; - } - } - - if (!bestId.equals(mid)) - out.println(mid + "\t" + bestId); - for (String id : ids) { - if (id.equals(bestId)) continue; - if (!(allowableIds.contains(id) || id.startsWith("fb:en."))) - continue; // To save space, only map ids which look reasonable - out.println(id + "\t" + bestId); - } - } - - public void run() { - computeAllowableIds(); - - // Map to ids - out = IOUtils.openOutHard(canonicalIdMapPath); - try { - BufferedReader in = IOUtils.openIn(rawPath); - String line; - int numInputLines = 0; - - // Current block of triples corresponds to a single mid. - String mid = null; - List ids = new ArrayList(); - - while (numInputLines < maxInputLines && (line = in.readLine()) != null) { - numInputLines++; - if (numInputLines % 10000000 == 0) - LogInfo.logs("Read %s lines, %d entities", numInputLines, numMids); - String[] tokens = Utils.parseTriple(line); - if (tokens == null) continue; - String arg1 = tokens[0]; - String property = tokens[1]; - String arg2 = tokens[2]; - - if (!arg1.startsWith("fb:m.")) continue; - if (!property.equals("fb:type.object.key")) continue; - - // Flush last block - if (!arg1.equals(mid)) { - flush(mid, ids); - - // Reset - mid = arg1; - ids.clear(); - } - - // Record information - arg2 = Utils.stringToRdf(arg2); - ids.add(arg2); - } - in.close(); - flush(mid, ids); - } catch (IOException e) { - throw new RuntimeException(e); - } - - out.close(); - } - - public static void main(String[] args) { - Execution.run(args, new BuildCanonicalIdMap()); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/BuildTypesMap.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/BuildTypesMap.java deleted file mode 100644 index 539551cefd..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/BuildTypesMap.java +++ /dev/null @@ -1,64 +0,0 @@ -package edu.stanford.nlp.sempre.freebase; - -import fig.basic.IOUtils; -import fig.basic.MapUtils; -import fig.basic.StrUtils; -import fig.basic.LogInfo; -import fig.basic.Option; -import fig.exec.Execution; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.PrintWriter; -import java.util.*; - -/** - * Input: canonical Freebase dump - * fb:en.arnold_schwarzenegger fb:type.object.type fb:people.person. - * ... - * Output: map from ids to comma-separated list of types - * fb:en.arnold_schwarzenegger fb:people.person, fb:government.politician - * - * @author Percy Liang - */ -public class BuildTypesMap implements Runnable { - @Option public int maxInputLines = Integer.MAX_VALUE; - @Option(required = true, gloss = "Input") public String inPath; - @Option(required = true, gloss = "Output") public String outPath; - @Option(gloss = "keep only fb:en.*") public boolean keepOnlyEnIds = true; - - Map> types = new LinkedHashMap>(); - - public void run() { - LogInfo.begin_track("Reading %s", inPath); - try { - BufferedReader in = IOUtils.openIn(inPath); - String line; - int numLines = 0; - while ((line = in.readLine()) != null) { - String[] triple = Utils.parseTriple(line); - if (++numLines % 100000000 == 0) LogInfo.logs("%d lines", numLines); - if (triple == null) continue; - if (!triple[1].equals("fb:type.object.type")) continue; - if (keepOnlyEnIds && !triple[0].startsWith("fb:en.")) continue; - MapUtils.addToList(types, triple[0], triple[2]); - } - in.close(); - } catch (IOException e) { - throw new RuntimeException(e); - } - LogInfo.end_track(); - - LogInfo.begin_track("Writing to %s", outPath); - PrintWriter out = IOUtils.openOutHard(outPath); - for (Map.Entry> e : types.entrySet()) { - out.println(e.getKey() + "\t" + StrUtils.join(e.getValue(), ",")); - } - out.close(); - LogInfo.end_track(); - } - - public static void main(String[] args) { - Execution.run(args, new BuildTypesMap()); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/CanonicalizeExamples.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/CanonicalizeExamples.java deleted file mode 100644 index ca55ca9e83..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/CanonicalizeExamples.java +++ /dev/null @@ -1,83 +0,0 @@ -package edu.stanford.nlp.sempre.freebase; - -import com.google.common.base.Function; -import edu.stanford.nlp.sempre.*; -import fig.basic.*; -import fig.exec.Execution; - -import java.io.PrintWriter; -import java.util.Iterator; -import java.util.List; -import java.util.Map; - -/** - * Replaces all names (e.g., fb:m.02mjmr) with canonical identifiers (e.g., fb:en.barack_obama) - * For creating the dataset. - * - * @author Percy Liang - */ -public class CanonicalizeExamples implements Runnable { - @Option(required = true, gloss = "File mapping ids to canonical ids") - public String canonicalIdMapPath; - @Option(required = true, gloss = "Input path to examples to canonicalize (output to same directory) with .canonicalized extension") - public List examplePaths; - - Map canonicalIdMap; - - private String convert(String name) { - boolean reverse = false; - if (name.startsWith("!")) { - name = name.substring(1); - reverse = true; - } - name = MapUtils.get(canonicalIdMap, name, name); - if (reverse) name = "!" + name; - return name; - } - - public Formula convert(Formula formula) { - return formula.map( - new Function() { - public Formula apply(Formula formula) { - String name = Formulas.getNameId(formula); - if (name != null) { - name = convert(name); - return Formulas.newNameFormula(name); - } - return null; - } - }); - } - - public void run() { - canonicalIdMap = edu.stanford.nlp.sempre.freebase.Utils.readCanonicalIdMap(canonicalIdMapPath); - - for (String inPath : examplePaths) { - String outPath = inPath + ".canonicalized"; - LogInfo.logs("Converting %s => %s", inPath, outPath); - Iterator it = LispTree.proto.parseFromFile(inPath); - PrintWriter out = IOUtils.openOutHard(outPath); - while (it.hasNext()) { - LispTree tree = it.next(); - if (!"example".equals(tree.child(0).value)) - throw new RuntimeException("Bad: " + tree); - for (int i = 1; i < tree.children.size(); i++) { - LispTree subtree = tree.child(i); - if ("targetFormula".equals(subtree.child(0).value)) { - for (int j = 1; j < subtree.children.size(); j++) { - Formula formula = Formulas.fromLispTree(subtree.child(j)); - formula = convert(formula); - subtree.children.set(j, formula.toLispTree()); // Use converted formula - } - } - } - out.println(tree.toStringWrap(100)); - } - out.close(); - } - } - - public static void main(String[] args) { - Execution.run(args, new CanonicalizeExamples()); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/CanonicalizeIds.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/CanonicalizeIds.java deleted file mode 100644 index 7af1be163a..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/CanonicalizeIds.java +++ /dev/null @@ -1,80 +0,0 @@ -package edu.stanford.nlp.sempre.freebase; - -import fig.basic.IOUtils; -import fig.basic.LogInfo; -import fig.basic.MapUtils; -import fig.basic.Option; -import fig.exec.Execution; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.PrintWriter; -import java.util.Map; - -/** - * Input: raw RDF Freebase dump (ttl format) downloaded from the Freebase - * website. Input: canonical id map file output by BuildCanonicalIdMap. - *

- * Output: Freebase dump with all mids and ids standardized. - * - * @author Percy Liang - */ -public class CanonicalizeIds implements Runnable { - @Option public int maxInputLines = Integer.MAX_VALUE; - @Option(required = true) public String canonicalIdMapPath; - @Option(required = true, gloss = "Raw Freebase triples") - public String rawPath; - @Option(required = true, gloss = "Canonicalized Freebase triples") - public String canonicalizedPath; - - public void run() { - Map canonicalIdMap = Utils.readCanonicalIdMap(canonicalIdMapPath, maxInputLines); - - // Do conversion - LogInfo.begin_track("Convert"); - PrintWriter out = IOUtils.openOutHard(canonicalizedPath); - out.println(Utils.ttlPrefix); - try { - BufferedReader in = IOUtils.openIn(rawPath); - String line; - int numInputLines = 0; - while (numInputLines < maxInputLines && (line = in.readLine()) != null) { - numInputLines++; - if (numInputLines % 10000000 == 0) - LogInfo.logs("Read %s lines", numInputLines); - String[] tokens = Utils.parseTriple(line); - if (tokens == null) continue; - String arg1 = tokens[0]; - String property = tokens[1]; - String arg2 = tokens[2]; - - // Do some simple filtering - if (!property.startsWith("fb:")) continue; - if (property.contains("..")) - continue; // Freebase dumps started containing paths through CVTs, which we don't need - if (property.equals("fb:type.type.instance")) - continue; // Already have type.object.type, don't need reverse map explicitly - if (arg2.startsWith("\"") && !(arg2.endsWith("@en") || arg2.contains("^^xsd:"))) - continue; // Strings must be in English or xsd values (boolean, int, float, datetime) - - arg2 = Utils.quoteValues(arg2); // Fix numerical values - - // Convert everything to use canonical ids. - arg1 = MapUtils.get(canonicalIdMap, arg1, arg1); - property = MapUtils.get(canonicalIdMap, property, property); - arg2 = MapUtils.get(canonicalIdMap, arg2, arg2); - - Utils.writeTriple(out, arg1, property, arg2); - } - in.close(); - } catch (IOException e) { - throw new RuntimeException(e); - } - out.close(); - LogInfo.end_track(); - } - - public static void main(String[] args) { - Execution.run(args, new CanonicalizeIds()); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/EntityLexicon.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/EntityLexicon.java deleted file mode 100644 index c916fb1167..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/EntityLexicon.java +++ /dev/null @@ -1,184 +0,0 @@ -package edu.stanford.nlp.sempre.freebase; - -import edu.stanford.nlp.io.IOUtils; -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.cache.StringCache; -import edu.stanford.nlp.sempre.cache.StringCacheUtils; -import edu.stanford.nlp.sempre.freebase.index.FbEntitySearcher; -import edu.stanford.nlp.sempre.freebase.index.FbIndexField; -import edu.stanford.nlp.sempre.freebase.lexicons.EntrySource; -import edu.stanford.nlp.sempre.freebase.lexicons.LexicalEntry; -import edu.stanford.nlp.sempre.freebase.lexicons.LexicalEntry.EntityLexicalEntry; -import edu.stanford.nlp.sempre.freebase.lexicons.TokenLevelMatchFeatures; -import edu.stanford.nlp.sempre.freebase.utils.FileUtils; -import edu.stanford.nlp.stats.Counter; -import edu.stanford.nlp.util.ArrayUtils; -import edu.stanford.nlp.util.StringUtils; -import fig.basic.MapUtils; -import fig.basic.Option; -import org.apache.lucene.document.Document; -import org.apache.lucene.queryparser.classic.ParseException; - -import java.io.IOException; -import java.util.*; - -public final class EntityLexicon { - public enum SearchStrategy { exact, inexact, fbsearch } - - public static class Options { - @Option(gloss = "Verbosity") - public int verbose = 0; - @Option(gloss = "Number of results return by the lexicon") - public int maxEntries = 1000; - @Option(gloss = "Number of documents queried from Lucene") - public int numOfDocs = 10000; - @Option(gloss = "Path to the exact match lucene index directory") - public String exactMatchIndex; - @Option(gloss = "Path to the inexact match lucene index directory") - public String inexactMatchIndex = "lib/lucene/4.4/inexact"; - @Option(gloss = "Cache path to the mid-to-id path") - public String mid2idPath; - @Option(gloss = "Path to entity popularity file") - public String entityPopularityPath; - } - - public static Options opts = new Options(); - - private static EntityLexicon entityLexicon; - public static EntityLexicon getInstance() { - if (entityLexicon == null) entityLexicon = new EntityLexicon(); - return entityLexicon; - } - - FbEntitySearcher exactSearcher; // Lucene - FbEntitySearcher inexactSearcher; // Lucene - - FreebaseSearch freebaseSearch; // Google's API - StringCache mid2idCache; // Google's API spits back mids, which we need to convert to ids - Map entityPopularityMap; - - private EntityLexicon() { - loadEntityPopularity(); - } - - public List lookupEntries(String query, SearchStrategy strategy) throws ParseException, IOException { - if (strategy == null) - throw new RuntimeException("No entity search strategy specified"); - switch (strategy) { - case exact: - if (exactSearcher == null) exactSearcher = new FbEntitySearcher(opts.exactMatchIndex, opts.numOfDocs, "exact"); - return lookupEntries(exactSearcher, query); - case inexact: - if (inexactSearcher == null) inexactSearcher = new FbEntitySearcher(opts.inexactMatchIndex, opts.numOfDocs, "inexact"); - return lookupEntries(inexactSearcher, query); - case fbsearch: - if (freebaseSearch == null) freebaseSearch = new FreebaseSearch(); - if (mid2idCache == null) mid2idCache = StringCacheUtils.create(opts.mid2idPath); - return lookupFreebaseSearchEntities(query); - default: - throw new RuntimeException("Unknown entity search strategy: " + strategy); - } - } - - private void loadEntityPopularity() { - entityPopularityMap = new HashMap<>(); - if (opts.entityPopularityPath == null) return; - for (String line : IOUtils.readLines(opts.entityPopularityPath)) { - String[] tokens = line.split("\t"); - entityPopularityMap.put(tokens[0], Double.parseDouble(tokens[1])); - } - } - - public List lookupFreebaseSearchEntities(String query) { - FreebaseSearch.ServerResponse response = freebaseSearch.lookup(query); - List entities = new ArrayList<>(); - if (response.error != null) { - throw new RuntimeException(response.error.toString()); - } - // num of words in query - int numOfQueryWords = query.split("\\s+").length; - for (FreebaseSearch.Entry e : response.entries) { - if (entities.size() >= opts.maxEntries) break; - // Note: e.id might not be the same one we're using (e.g., fb:en.john_f_kennedy_airport versus fb:en.john_f_kennedy_international_airport), - // so get the one from our canonical mid2idCache - String id = mid2idCache.get(e.mid); - if (id == null) continue; // Skip if no ID (probably not worth referencing) - // skip if it is a long phrase that is not an exact match - if (numOfQueryWords >= 4 && !query.toLowerCase().equals(e.name.toLowerCase())) { - continue; - } - - int distance = editDistance(query.toLowerCase(), e.name.toLowerCase()); // Is this actually useful? - Counter entityFeatures = TokenLevelMatchFeatures.extractFeatures(query, e.name); - double popularity = MapUtils.get(entityPopularityMap, id, 0d); - entityFeatures.incrementCount("text_popularity", Math.log(popularity + 1)); - entities.add(new EntityLexicalEntry(query, query, Collections.singleton(e.name), - new ValueFormula<>(new NameValue(id, e.name)), EntrySource.FBSEARCH, e.score, distance, - new FreebaseTypeLookup().getEntityTypes(id), entityFeatures)); - } - return entities; - } - - public List lookupEntries(FbEntitySearcher searcher, String textDesc) throws ParseException, IOException { - - List res = new ArrayList<>(); - textDesc = textDesc.replaceAll("\\?", "\\\\?").toLowerCase(); - List docs = searcher.searchDocs(textDesc); - for (Document doc : docs) { - - Formula formula = Formula.fromString(doc.get(FbIndexField.ID.fieldName())); - String[] fbDescriptions = new String[]{doc.get(FbIndexField.TEXT.fieldName())}; - String typesDesc = doc.get(FbIndexField.TYPES.fieldName()); - - Set types = new HashSet<>(); - if (typesDesc != null) { - String[] tokens = typesDesc.split(","); - Collections.addAll(types, tokens); - } - - double popularity = Double.parseDouble(doc.get(FbIndexField.POPULARITY.fieldName())); - int distance = editDistance(textDesc.toLowerCase(), fbDescriptions[0].toLowerCase()); - Counter tokenEditDistanceFeatures = TokenLevelMatchFeatures.extractFeatures(textDesc, fbDescriptions[0]); - - if ((popularity > 0 || distance == 0) && TokenLevelMatchFeatures.diffSetSize(textDesc, fbDescriptions[0]) < 4) { - res.add(new EntityLexicalEntry(textDesc, textDesc, ArrayUtils.asSet(fbDescriptions), formula, EntrySource.LUCENE, popularity, distance, types, tokenEditDistanceFeatures)); - } - } - Collections.sort(res, new LexicalEntryComparator()); - return res.subList(0, Math.min(res.size(), opts.maxEntries)); - } - - private int editDistance(String query, String name) { - - String[] queryTokens = FileUtils.omitPunct(query).split("\\s+"); - String[] nameTokens = FileUtils.omitPunct(name).split("\\s+"); - - StringBuilder querySb = new StringBuilder(); - for (String queryToken : queryTokens) - querySb.append(queryToken).append(" "); - - StringBuilder nameSb = new StringBuilder(); - for (String nameToken : nameTokens) - nameSb.append(nameToken).append(" "); - - return StringUtils.editDistance(querySb.toString().trim(), nameSb.toString().trim()); - } - - public static class LexicalEntryComparator implements Comparator { - @Override - public int compare(LexicalEntry arg0, LexicalEntry arg1) { - - if (arg0.popularity > arg1.popularity) - return -1; - if (arg0.popularity < arg1.popularity) - return 1; - if (arg0.distance < arg1.distance) - return -1; - if (arg0.distance > arg1.distance) - return 1; - return 0; - } - } -} - - diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/ExecuteExamples.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/ExecuteExamples.java deleted file mode 100644 index 2c1692bf80..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/ExecuteExamples.java +++ /dev/null @@ -1,84 +0,0 @@ -package edu.stanford.nlp.sempre.freebase; - -import edu.stanford.nlp.sempre.*; - -import com.google.common.base.Function; -import fig.basic.LispTree; -import fig.basic.LogInfo; -import fig.basic.Option; -import fig.exec.Execution; - -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; - -/** - * Executes all examples and reports any badness. - * Was used to prepare the Free917 dataset. - * - * @author Percy Liang - */ -public class ExecuteExamples implements Runnable { - @Option(required = true, gloss = "Input path to examples to canonicalize") - public List examplesPaths; - - SparqlExecutor executor; - Map cache = new HashMap(); - - private boolean queryReturnsResults(Formula formula) { - // If counting, look inside to make sure the actual set is non-empty. - if (formula instanceof AggregateFormula) - formula = ((AggregateFormula) formula).child; - - Executor.Response response = cache.get(formula); - if (response == null) - cache.put(formula, response = executor.execute(formula, null)); - if (!(response.value instanceof ListValue) || - ((ListValue) response.value).values.size() == 0) { - LogInfo.logs("BAD QUERY: %s => %s", formula, response.value); - return false; - } else { - LogInfo.logs("GOOD QUERY: %s => %s", formula, response.value); - return true; - } - } - - // Test each individual nested NameFormula. - public Formula test(Formula formula) { - return formula.map( - new Function() { - public Formula apply(Formula formula) { - String name = Formulas.getNameId(formula); - if (name != null) { - if (name.startsWith("!")) name = name.substring(1); - queryReturnsResults(Formulas.newNameFormula(name)); - } - return null; - } - }); - } - - public void run() { - executor = new SparqlExecutor(); - for (String path : examplesPaths) { - Iterator it = LispTree.proto.parseFromFile(path); - while (it.hasNext()) { - LispTree tree = it.next(); - if (!"example".equals(tree.child(0).value)) - throw new RuntimeException("Bad: " + tree); - for (int i = 1; i < tree.children.size(); i++) { - if ("targetFormula".equals(tree.child(i).child(0).value)) { - Formula formula = Formulas.fromLispTree(tree.child(i).child(1)); - formula = test(formula); - queryReturnsResults(formula); - } - } - } - } - } - - public static void main(String[] args) { - Execution.run(args, new ExecuteExamples(), "SparqlExecutor", SparqlExecutor.opts); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/FbFormulasInfo.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/FbFormulasInfo.java deleted file mode 100644 index ab81febd93..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/FbFormulasInfo.java +++ /dev/null @@ -1,499 +0,0 @@ -package edu.stanford.nlp.sempre.freebase; - -import com.google.common.base.Joiner; -import edu.stanford.nlp.stats.ClassicCounter; -import edu.stanford.nlp.stats.Counter; -import edu.stanford.nlp.sempre.*; -import fig.basic.LispTree; -import fig.basic.LogInfo; -import fig.basic.MapUtils; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.*; - -/** - * Class for keeping info and manipulating FB formulas. For example, given a - * Freebase formula computes the reverse of the formula (flipping arguments) and - * the equivalent formula (using reverse property from Freebase) Reversing works - * now only for chains - * - * @author jonathanberant - */ -public final class FbFormulasInfo { - // Everyone should use the singleton. - private static FbFormulasInfo fbFormulaInfo; - - public static FbFormulasInfo getSingleton() { - if (fbFormulaInfo == null) { - fbFormulaInfo = new FbFormulasInfo(); - } - return fbFormulaInfo; - } - - private FreebaseInfo freebaseInfo = null; - public Map binaryFormulaInfoMap = new HashMap<>(); - public Map unaryFormulaInfoMap = new HashMap<>(); - private Map> typeToNumericalPredicates = new HashMap<>(); - private Map> atomicExtype2ToBinaryMap = new HashMap<>(); // contains map to all atomic properties - private Map> extype2ToNonCvtBinaryMap = new HashMap<>(); // contains map to all binaries for which extype 1 is not a CVT - private Map> cvtExpansionsMap = new HashMap<>(); - private Map> cvtTypeToBinaries = new HashMap<>(); - - private Comparator formulaComparator; - - private FbFormulasInfo() { - try { - freebaseInfo = FreebaseInfo.getSingleton(); - loadFormulaInfo(); - computeNumericalPredicatesMap(); - } catch (IOException | NumberFormatException e) { - throw new RuntimeException(e); - } - } - - /** - * Map that given a type provides all Freebase predicates that have that type - * as expected type 2 and a number for expected type 1 - */ - private void computeNumericalPredicatesMap() { - for (BinaryFormulaInfo info : binaryFormulaInfoMap.values()) { - if (info.expectedType1.equals("fb:type.int") || info.expectedType1.equals("fb:type.float")) { - MapUtils.addToSet(typeToNumericalPredicates, info.expectedType2, info); - } - } - } - - public Set getNumericalPredicates(String expectedType) { - return MapUtils.get(typeToNumericalPredicates, expectedType, new HashSet()); - } - - private void computeReverseFormulaInfo() { - List entriesToAdd = new LinkedList<>(); - for (Formula formula : binaryFormulaInfoMap.keySet()) { - BinaryFormulaInfo info = binaryFormulaInfoMap.get(formula); - Formula reverseFormula = Formulas.reverseFormula(formula); - - if (!binaryFormulaInfoMap.containsKey(reverseFormula)) { - entriesToAdd.add( - new BinaryFormulaInfo( - reverseFormula, - info.expectedType2, info.expectedType1, info.unitId, info.unitDesc, info.descriptions, info.popularity)); - } - } - LogInfo.log("Adding reverse formulas: " + entriesToAdd.size()); - for (BinaryFormulaInfo e : entriesToAdd) { - binaryFormulaInfoMap.put(e.formula, e); - } - } - - public BinaryFormulaInfo getBinaryInfo(Formula formula) { - return binaryFormulaInfoMap.get(formula); - } - - public UnaryFormulaInfo getUnaryInfo(Formula formula) { - return unaryFormulaInfoMap.get(formula); - } - - private void loadFormulaInfo() throws IOException { - - LogInfo.begin_track("Loading formula info..."); - LogInfo.log("Adding schema properties"); - binaryFormulaInfoMap = freebaseInfo.createBinaryFormulaInfoMap(); - unaryFormulaInfoMap = freebaseInfo.createUnaryFormulaInfoMap(); - LogInfo.log("Current number of binary formulas: " + binaryFormulaInfoMap.size()); - LogInfo.log("Current number of unary formulas: " + unaryFormulaInfoMap.size()); - - LogInfo.log("Compuing reverse for schema formulas"); - computeReverseFormulaInfo(); - LogInfo.log("Current number of binary formulas: " + binaryFormulaInfoMap.size()); - for (BinaryFormulaInfo info : binaryFormulaInfoMap.values()) { - - MapUtils.addToList(atomicExtype2ToBinaryMap, info.expectedType2, info.formula); - if (!isCvt(info.expectedType1)) { - addMappingFromType2ToFormula(info.expectedType2, info.formula); - } - } - - LogInfo.log("Generate formulas through CVTs"); - generateCvtFormulas(); // generate formulas for CVTs - LogInfo.log("Current number of binary formulas: " + binaryFormulaInfoMap.size()); - // we first sort by popularity - Comparator comparator = getPopularityComparator(); - sortType2ToBinaryMaps(comparator); - LogInfo.end_track(); - } - - public void sortType2ToBinaryMaps(Comparator comparator) { - this.formulaComparator = comparator; - for (List binaries: atomicExtype2ToBinaryMap.values()) - Collections.sort(binaries, comparator); - - for (List binaries: extype2ToNonCvtBinaryMap.values()) - Collections.sort(binaries, comparator); - - } - - public int compare(Formula f1, Formula f2) { - return formulaComparator.compare(f1, f2); - } - - /** - * Adding mapping from type 2 to formula - makes sure to insert just one of the 2 equivalent formulas if they exist - */ - private void addMappingFromType2ToFormula(String type2, Formula formula) { - MapUtils.addToList(extype2ToNonCvtBinaryMap, type2, formula); - } - - private void generateCvtFormulas() throws FileNotFoundException { - - List toAdd = new ArrayList<>(); - for (BinaryFormulaInfo innerInfo : binaryFormulaInfoMap.values()) { - - if (isCvt(innerInfo.expectedType1)) { // if expected type 1 is a CVT - MapUtils.addToSet(cvtTypeToBinaries, innerInfo.expectedType1, innerInfo.formula); - - List outers = atomicExtype2ToBinaryMap.get(innerInfo.expectedType1); // find those whose expected type 2 is that CVT - for (Formula outer : outers) { - BinaryFormulaInfo outerInfo = binaryFormulaInfoMap.get(outer); - if (!isLegalCvt(innerInfo.formula, outer)) - continue; - - // build new formula - LambdaFormula cvtFormula = new LambdaFormula("x", new JoinFormula(outer, new JoinFormula(innerInfo.formula, new VariableFormula("x")))); - - BinaryFormulaInfo newFormulaInfo = binaryFormulaInfoMap.get(cvtFormula); - if (newFormulaInfo == null) { - String exType1 = outerInfo.expectedType1; - if (exType1 == null) - throw new RuntimeException("Missing expected type 1 for formula: " + outer); - - List newDescriptions = new LinkedList<>(); - newDescriptions.add(outerInfo.descriptions.get(0)); - newDescriptions.add(innerInfo.descriptions.get(0)); - - newFormulaInfo = new BinaryFormulaInfo(cvtFormula, exType1, innerInfo.expectedType2, newDescriptions, Math.min(outerInfo.popularity, innerInfo.popularity)); - toAdd.add(newFormulaInfo); - } - MapUtils.addToSet(cvtExpansionsMap, innerInfo.formula, newFormulaInfo); - MapUtils.addToSet(cvtExpansionsMap, outerInfo.formula, newFormulaInfo); - } - } - } - for (BinaryFormulaInfo info : toAdd) { - addMappingFromType2ToFormula(info.expectedType2, info.formula); - binaryFormulaInfoMap.put(info.formula, info); - } - } - - private boolean isLegalCvt(Formula inner, Formula outer) { - if (FreebaseInfo.isReverseProperty(inner.toString()) && !FreebaseInfo.isReverseProperty(outer.toString())) - return false; - if (!FreebaseInfo.isReverseProperty(inner.toString()) && FreebaseInfo.isReverseProperty(outer.toString())) - return false; - return true; - } - - /** supports chains only */ - public boolean hasOpposite(String formula) { return hasOpposite(LispTree.proto.parseFromString(formula)); } - public boolean hasOpposite(Formula formula) { return hasOpposite(formula.toLispTree()); } - private boolean hasOpposite(LispTree tree) { - if (tree.isLeaf()) { - String fbProperty = FreebaseInfo.isReverseProperty(tree.value) ? tree.value.substring(1) : tree.value; - return freebaseInfo.propertyHasOpposite(fbProperty); - } else { - // Un-reverse everything. - String binary1 = tree.child(2).child(0).value; - binary1 = FreebaseInfo.isReverseProperty(binary1) ? binary1.substring(1) : binary1; - String binary2 = tree.child(2).child(1).child(0).value; - binary2 = FreebaseInfo.isReverseProperty(binary2) ? binary2.substring(1) : binary2; - return freebaseInfo.propertyHasOpposite(binary1) && freebaseInfo.propertyHasOpposite(binary2); - } - } - - /** supports chains only */ - public boolean isReversed(Formula formula) { - LispTree tree = formula.toLispTree(); - if (tree.isLeaf()) - return FreebaseInfo.isReverseProperty(tree.value); - else - return FreebaseInfo.isReverseProperty(tree.child(2).child(0).value); - } - - /** assumes we checked there is an opposite formula */ - public Formula equivalentFormula(String formula) { - LispTree tree = LispTree.proto.parseFromString(formula); - return equivalentFormula(tree); - } - - public Formula equivalentFormula(Formula formula) { - LispTree tree = formula.toLispTree(); - return equivalentFormula(tree); - } - - // two formulas can be equivalent because there are two names for every edge using the reverse label - //fb:people.person.place_of_birth --> !fb:location.location.people_born_here - //!fb:people.person.place_of_birth --> fb:location.location.people_born_here - public Formula equivalentFormula(LispTree tree) { - - if (tree.isLeaf()) { - boolean rev = FreebaseInfo.isReverseProperty(tree.value); - String fbProperty = rev ? tree.value.substring(1) : tree.value; - String oppositeProperty = freebaseInfo.getOppositeFbProperty(fbProperty); - return rev ? Formulas.newNameFormula(oppositeProperty) : Formulas.newNameFormula("!" + oppositeProperty); - } else { - String binary1 = tree.child(2).child(0).value; - binary1 = FreebaseInfo.isReverseProperty(binary1) ? binary1.substring(1) : binary1; - String binary2 = tree.child(2).child(1).child(0).value; - binary2 = FreebaseInfo.isReverseProperty(binary2) ? binary2.substring(1) : binary2; - String oppositeBinary1 = freebaseInfo.getOppositeFbProperty(binary1); - String oppositeBinary2 = freebaseInfo.getOppositeFbProperty(binary2); - boolean rev = FreebaseInfo.isReverseProperty(tree.child(2).child(0).value); - return buildLambdaFormula(oppositeBinary1, oppositeBinary2, !rev); - } - } - - //input: |binary1|=fb:people.place_lived.location, - // |binary2|=fb:people.person.places_lived, |reverse|=true - //output: (lambda x (!fb:people.place_lived.location (!fb:people.person.places_lived (var x)))) - public static Formula buildLambdaFormula(String binary1, String binary2, boolean reverse) { - - Formula binary1Formula = reverse ? Formulas.newNameFormula("!" + binary1) : Formulas.newNameFormula(binary1); - Formula binary2Formula = reverse ? Formulas.newNameFormula("!" + binary2) : Formulas.newNameFormula(binary2); - Formula join1 = new JoinFormula(binary2Formula, new VariableFormula("x")); - Formula join2 = new JoinFormula(binary1Formula, join1); - return new LambdaFormula("x", join2); - } - - - - //for binary formulas that are paths in the graph, if formula1 is a path s-->t - //then formula2 is the opposite if it is the path t-->s - // fb:people.person.place_of_birth is the opposite of !fb:people.person.place_of_birth - // fb:people.person.place_of_birth is the opposite of fb: - private boolean isOpposite(Formula formula1, Formula formula2) { - - if (isReversed(formula1) && !isReversed(formula2)) { - String formula1Desc = formula1.toString().substring(1); - return formula1Desc.equals(formula2.toString()); - } - if (isReversed(formula2) && !isReversed(formula1)) { - String formula2Desc = formula2.toString().substring(1); - return formula2Desc.equals(formula1.toString()); - } - if (hasOpposite(formula1)) { - Formula equivalentFormula = equivalentFormula(formula1); - if (isReversed(equivalentFormula)) { - String equivalentFormaulDesc = equivalentFormula.toString().substring(1); - return equivalentFormaulDesc.equals(formula2.toString()); - } else { - String formula2Desc = formula2.toString().substring(1); - return formula2Desc.equals(equivalentFormula.toString()); - } - } - return false; - } - - public List getBinariesForType2(String type) { - return MapUtils.get(extype2ToNonCvtBinaryMap, type, new ArrayList()); - } - - public List getAtomicBinariesForType2(String type) { - return MapUtils.get(atomicExtype2ToBinaryMap, type, new ArrayList()); - } - - public boolean isCvtFormula(BinaryFormulaInfo info) { - return isCvt(info.expectedType1) || isCvt(info.expectedType2); - } - - public Set getCvtExpansions(BinaryFormulaInfo info) { - return MapUtils.getSet(cvtExpansionsMap, info.formula); - } - - public Set expandCvts(String cvt) { - return MapUtils.getSet(cvtTypeToBinaries, cvt); - } - - //For a binary lambda formula that goes through CVTs, find all binaries that can - //be injected to this lambda binary formula. - //example: - //input: (lambda (fb:people.person.places_lived (fb:people.place_lived.location (var x)))) - //output: fb:people.place_lived/start_date, fb:people.place_lived.end_date - public List getInjectableBinaries(Formula formula) { - List res = new ArrayList<>(); - if (!(formula instanceof LambdaFormula)) return res; - LambdaFormula lambdaFormula = (LambdaFormula) formula; - Formula first = ((JoinFormula) lambdaFormula.body).relation; - Formula second = ((JoinFormula) ((JoinFormula) lambdaFormula.body).child).relation; - Set binaryFormulas = expandCvts(getBinaryInfo(first).expectedType2); - - for (Formula binaryFormula : binaryFormulas) { - if (!second.equals(binaryFormula) && !isOpposite(first, binaryFormula)) { - res.add(binaryFormula); - } - } - return res; - } - - public boolean isCvt(String type) { - return freebaseInfo.isCvt(type); - } - - public Comparator getPopularityComparator() { - Counter counter = new ClassicCounter<>(); - for (Formula binaryFormula : binaryFormulaInfoMap.keySet()) - counter.incrementCount(binaryFormula, binaryFormulaInfoMap.get(binaryFormula).popularity); - - return new FormulaByCounterComparator(counter); - } - - public class FormulaByCounterComparator implements Comparator { - - private Counter fCounter; - - public FormulaByCounterComparator(Counter fCounter) { - this.fCounter = fCounter; - } - public int compare(Formula f1, Formula f2) { - double count1 = fCounter.getCount(f1); - double count2 = fCounter.getCount(f2); - if (count1 > count2) return -1; - if (count1 < count2) return +1; - double pop1 = binaryFormulaInfoMap.get(f1).popularity; - double pop2 = binaryFormulaInfoMap.get(f2).popularity; - if (pop1 > pop2) return -1; - if (pop1 < pop2) return +1; - return 0; - } - public double getCount(Formula f) { return fCounter.getCount(f); } - } - - public class FormulaByFeaturesComparator implements Comparator { - - private Params params; - - public FormulaByFeaturesComparator(Params params) { - this.params = params; - } - public int compare(Formula f1, Formula f2) { - - FeatureVector features1 = BridgeFn.getBinaryBridgeFeatures(fbFormulaInfo.getBinaryInfo(f1)); - FeatureVector features2 = BridgeFn.getBinaryBridgeFeatures(fbFormulaInfo.getBinaryInfo(f2)); - - double score1 = features1.dotProduct(params); - double score2 = features2.dotProduct(params); - if (score1 > score2) return -1; - if (score1 < score2) return +1; - double pop1 = binaryFormulaInfoMap.get(f1).popularity; - double pop2 = binaryFormulaInfoMap.get(f2).popularity; - if (pop1 > pop2) return -1; - if (pop1 < pop2) return +1; - return 0; - } - } - - //Information from freebase about binary formulas - public static class BinaryFormulaInfo { - public Formula formula; //fb:people.person.place_of_birth - public String expectedType1; //fb:people.person - public String expectedType2; //fb:location.location - public String unitId = ""; //fb:en.meter - public String unitDesc = ""; //Meter - public List descriptions = new LinkedList<>(); // "place of birth" - public double popularity; //Number of instances of binary in KB: 16184.0 - - public BinaryFormulaInfo(Formula formula, String exType1, String exType2, List descs, double popularity) { - this.formula = formula; - this.expectedType1 = exType1; - this.expectedType2 = exType2; - this.descriptions = descs; - this.popularity = popularity; - this.unitId = ""; - this.unitDesc = ""; - } - public BinaryFormulaInfo(Formula formula, String exType1, String exType2, String unitId, String unitDesc, List descs, double popularity) { - this.formula = formula; - this.expectedType1 = exType1; - this.expectedType2 = exType2; - this.descriptions = descs; - this.popularity = popularity; - this.unitId = ""; - this.unitDesc = ""; - } - public String toString() { - return formula.toString() + "\t" + popularity + "\t" + expectedType1 + "\t" + expectedType2 + "\t" + unitId + "\t" - + unitDesc + "\t" + Joiner.on("###").join(descriptions); - } - public String toReverseString() { - return Formulas.reverseFormula(formula).toString() + "\t" + popularity + "\t" + expectedType2 + "\t" + expectedType1 + "\t" + unitId + "\t" - + unitDesc + "\t" + Joiner.on("###").join(descriptions); - } - - public static List tokenizeFbDescription(String fbDesc) { - List res = new ArrayList<>(); - String[] tokens = fbDesc.split("\\s+"); - for (String token : tokens) { - token = token.replace("(", ""); - token = token.replace(")", ""); - token = token.replace("\"", ""); - res.add(token); - } - return res; - } - - public boolean isComplete() { - if (formula == null || expectedType1 == null || expectedType2 == null || - expectedType1.equals("") || expectedType2.equals("") || descriptions == null || - descriptions.size() == 0 || - popularity == 0.0) - return false; - return true; - } - - public SemType getSemType() { - return SemType.newFuncSemType(expectedType2, expectedType1); - } - - public String extractDomain(Formula binary) { - LispTree tree = binary.toLispTree(); - String property = tree.isLeaf() ? tree.value : tree.child(2).child(0).value; - if (property.startsWith("!")) - property = property.substring(1); - return property.substring(0, property.indexOf('.')); - } - } - - public static class UnaryFormulaInfo { - - public Formula formula; - public double popularity; - public List descriptions; - public Set types; - - public UnaryFormulaInfo(Formula formula, double popularity, - List descriptions, Set types) { - - this.formula = formula; - this.popularity = popularity; - this.descriptions = descriptions; - this.types = types; - } - - public boolean isComplete() { - if (formula == null || descriptions == null || descriptions.size() == 0 || - popularity == 0.0) - return false; - return true; - } - - public String toString() { - return formula + "\t" + popularity + "\t" + Joiner.on("###").join(descriptions); - } - - public String getRepresentativeDescrption() { - if (descriptions.get(0).contains("/") && descriptions.size() > 1) - return descriptions.get(1); - return descriptions.get(0); - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/FilterFreebase.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/FilterFreebase.java deleted file mode 100644 index fa71c0e845..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/FilterFreebase.java +++ /dev/null @@ -1,218 +0,0 @@ -package edu.stanford.nlp.sempre.freebase; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.Lists; -import edu.stanford.nlp.sempre.Formula; -import edu.stanford.nlp.sempre.Formulas; -import fig.basic.*; -import fig.exec.Execution; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.PrintWriter; -import java.util.*; - -/** - * Input: canonicalized Freebase ttl file. Input: example files. Output: subset - * of the ttl file that only involves the referenced properties. - * - * @author Percy Liang - */ -public class FilterFreebase implements Runnable { - @Option(required = true, gloss = "Canonicalized Freebase dump") - public String inPath; - @Option public int maxInputLines = Integer.MAX_VALUE; - @Option(gloss = "Examples files (keep properties that show up in these files)") - public List examplesPaths = new ArrayList(); - - @Option(gloss = "Keep only type entries involving these") - public List keepTypesPaths = new ArrayList(); - @Option(gloss = "Keep these properties") - public List keepPropertiesPaths = new ArrayList(); - @Option(gloss = "Ignore these properties") - public List notKeepPropertiesPaths = new ArrayList(); - - @Option(gloss = "Schema properties to keep") - public HashSet schemaProperties = new HashSet( - ImmutableList.of( - "fb:type.property.schema", - "fb:type.property.unit", - "fb:type.property.expected_type", - "fb:type.property.reverse_property", - "fb:freebase.type_hints.mediator", - "fb:freebase.type_hints.included_types" - )); - - @Option(gloss = "General properties that we should keep") - public HashSet generalProperties = new HashSet( - ImmutableList.of( - "fb:type.object.type", - "fb:type.object.name", - "fb:measurement_unit.dated_integer.number", - "fb:measurement_unit.dated_integer.year" - )); - - // Set this if we want to make a small Freebase. - @Option(gloss = "If true, keep general properties only for entities seen with the other keepProperties (uses much more memory, but results in smaller output)") - public boolean keepGeneralPropertiesOnlyForSeenEntities = false; - - @Option public boolean keepAllProperties = false; - - // Keep only type assertions involving these types. - // If empty, don't filter. - Set keepTypes = new LinkedHashSet(); - - // These are the properties for which we should keep all entity pairs. Derived from many sources. - // Should never be empty. - Set keepProperties = new LinkedHashSet(); - - // Entities that we saw (only needed if we need to use them to filter general properties later). - Set seenEntities = new HashSet(); - - // Fill out |keepProperties| - private void readKeep() { - LogInfo.begin_track("readKeep"); - - // Always keep schema - keepProperties.addAll(schemaProperties); - - // General properties to keep - if (!keepGeneralPropertiesOnlyForSeenEntities) - keepProperties.addAll(generalProperties); - - // Keep properties mentioned in examples - for (String path : examplesPaths) { - LogInfo.logs("Reading %s", path); - Iterator it = LispTree.proto.parseFromFile(path); - while (it.hasNext()) { - LispTree tree = it.next(); - if (!"example".equals(tree.child(0).value)) - throw new RuntimeException("Bad: " + tree); - for (int i = 1; i < tree.children.size(); i++) { - if ("targetFormula".equals(tree.child(i).child(0).value)) { - Formula formula = Formulas.fromLispTree(tree.child(i).child(1)); - keepProperties.addAll(Formulas.extractAtomicFreebaseElements(formula)); - } - } - } - } - - // Keep types - for (String path : keepTypesPaths) - for (String type : IOUtils.readLinesHard(path)) - keepTypes.add(type); - - // Keep and not keep properties - for (String path : keepPropertiesPaths) - for (String property : IOUtils.readLinesHard(path)) - keepProperties.add(property); - for (String path : notKeepPropertiesPaths) - for (String property : IOUtils.readLinesHard(path)) - keepProperties.remove(property); - - PrintWriter out = IOUtils.openOutHard(Execution.getFile("keepProperties")); - for (String property : keepProperties) - out.println(property); - out.close(); - LogInfo.logs("Keeping %s properties", keepProperties.size()); - LogInfo.end_track(); - } - - private void filterTuples() { - LogInfo.begin_track("filterTuples"); - TDoubleMap propertyCounts = new TDoubleMap(); - - PrintWriter out = IOUtils.openOutHard(Execution.getFile("0.ttl")); - out.println(Utils.ttlPrefix); - - try { - BufferedReader in = IOUtils.openIn(inPath); - String line; - int numInputLines = 0; - int numOutputLines = 0; - while (numInputLines < maxInputLines && (line = in.readLine()) != null) { - numInputLines++; - if (numInputLines % 10000000 == 0) - LogInfo.logs("filterTuples: Read %s lines, written %d lines", numInputLines, numOutputLines); - String[] tokens = Utils.parseTriple(line); - if (tokens == null) continue; - String arg1 = tokens[0]; - String property = tokens[1]; - String arg2 = tokens[2]; - if (!keepAllProperties && !keepProperties.contains(property)) continue; - - if (keepGeneralPropertiesOnlyForSeenEntities) { - seenEntities.add(arg1); - seenEntities.add(arg2); - } - - // Additional filtering of characters that Virtuoso can't index (we would need to be escape these). - if (Utils.isUrl(arg2)) continue; - if (Utils.identifierContainsStrangeCharacters(arg1) || Utils.identifierContainsStrangeCharacters(arg2)) - continue; - - Utils.writeTriple(out, arg1, property, arg2); - - propertyCounts.incr(property, 1); - numOutputLines++; - } - } catch (IOException e) { - throw new RuntimeException(e); - } - - // Make a second pass to only output general properties. - if (keepGeneralPropertiesOnlyForSeenEntities) { - LogInfo.begin_track("Second pass to output general properties for the %d seen entities", seenEntities.size()); - try { - BufferedReader in = IOUtils.openIn(inPath); - String line; - int numInputLines = 0; - int numOutputLines = 0; - while (numInputLines < maxInputLines && (line = in.readLine()) != null) { - numInputLines++; - if (numInputLines % 10000000 == 0) - LogInfo.logs("filterTuples: Read %s lines, written %d lines", numInputLines, numOutputLines); - String[] tokens = Utils.parseTriple(line); - if (tokens == null) continue; - String arg1 = tokens[0]; - String property = tokens[1]; - String arg2 = tokens[2]; - if (!generalProperties.contains(property)) continue; - if (!seenEntities.contains(arg1)) continue; - - // Only keep types that matter - if (keepTypes.size() != 0 && property.equals("fb:type.object.type") && !keepTypes.contains(arg2)) continue; - - Utils.writeTriple(out, arg1, property, arg2); - - numOutputLines++; - } - } catch (IOException e) { - throw new RuntimeException(e); - } - LogInfo.end_track(); - } - - out.close(); - - // Output property statistics - PrintWriter propertyCountsOut = IOUtils.openOutHard(Execution.getFile("propertyCounts")); - List.Entry> entries = Lists.newArrayList(propertyCounts.entrySet()); - Collections.sort(entries, propertyCounts.entryValueComparator()); - for (TDoubleMap.Entry e : entries) { - propertyCountsOut.println(e.getKey() + "\t" + e.getValue()); - } - propertyCountsOut.close(); - - LogInfo.end_track(); - } - - public void run() { - readKeep(); - filterTuples(); - } - - public static void main(String[] args) { - Execution.run(args, new FilterFreebase()); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/Free917Converter.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/Free917Converter.java deleted file mode 100644 index f1944d2a8a..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/Free917Converter.java +++ /dev/null @@ -1,727 +0,0 @@ -package edu.stanford.nlp.sempre.freebase; - -import edu.stanford.nlp.io.IOUtils; -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.freebase.FbFormulasInfo.BinaryFormulaInfo; -import edu.stanford.nlp.sempre.MergeFormula.Mode; -import edu.stanford.nlp.sempre.freebase.utils.FileUtils; -import edu.stanford.nlp.sempre.freebase.utils.FormatConverter; -import edu.stanford.nlp.stats.ClassicCounter; -import edu.stanford.nlp.stats.Counter; -import edu.stanford.nlp.util.StringUtils; -import fig.basic.LispTree; -import fig.basic.LogInfo; -import fig.basic.MapUtils; -import fig.basic.Option; -import fig.exec.Execution; -import fig.prob.SampleUtils; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.PrintWriter; -import java.util.*; - -/** - * One-time hack that converts the Cai & Yates dataset to our format. - * @author Jonathan Berant - */ -public class Free917Converter implements Runnable { - - private Counter argnumCounter = new ClassicCounter(); - private FbFormulasInfo formulaInfo = FbFormulasInfo.getSingleton(); - private Set cvts; - @Option(gloss = "Input path to examples to canonicalize") - public String inDir; - @Option(gloss = "Input path to examples to canonicalize") - public String outDir; - @Option(gloss = "Input path to examples to canonicalize") - public String entityInfoFile; - @Option(gloss = "Input path to examples to canonicalize") - public String cvtFile; - @Option(gloss = "Input path to examples to canonicalize") - public String midToIdFile; - - @Override - public void run() { - try { - String inQuestionsFile = inDir + "question-and-logical-form-917/dataset-all-917.txt"; - String inNpFile = inDir + "fixed-np-manually.txt"; - String outQuestionFile = outDir + "dataset-all-917_corrected.txt"; - String outNpFile = outDir + "fixed-np-manually_corrected.txt"; - String outputPrefix = outDir + "free917"; - String free917EntityInfoFile = outDir + "entityInfo.txt"; - String free917MissingEntitiesFile = outDir + "missingEntities.txt"; - cvts = FileUtils.loadSet(cvtFile); - - correctErrors(inQuestionsFile, inNpFile, outQuestionFile, outNpFile); - convertExampleFile(outQuestionFile, outputPrefix); - genreateEntityInfoFile(outNpFile, entityInfoFile, free917EntityInfoFile, free917MissingEntitiesFile); - } catch (Exception e) { - e.printStackTrace(); - throw new RuntimeException(e); - } - } - - public static void main(String[] args) throws IOException { - Execution.run(args, new Free917Converter()); - } - - private void correctErrors(String inQuestionsFile, String inNpFile, - String outQuestionFile, String outNpFile) throws IOException { - - // manual corrections of question file - int i = 0; - PrintWriter questionWriter = IOUtils.getPrintWriter(outQuestionFile); - for (String line : IOUtils.readLines(inQuestionsFile)) { - if (line.equals("(lambda $0 /type/int (exists $1 (/award/ranking@rank@year@note@list@item:t $0 /type/datetime/2000:/type/datetime $1 /en/fortune_500:/award/ranked_list /en/monsanto:/award/ranked_item)))")) - questionWriter.println("(lambda $0 /type/int (exists $1 (/award/ranking@rank@year@note@list@item:t $0 /type/datetime/2000:/type/datetime $1 $2 /en/fortune_500:/award/ranked_list /en/monsanto:/award/ranked_item)))"); - else if (line.equals("(lambda $0 /common/topic (exists $1 (exists $2 (exists $3 (exists $4 (/medicine/disease/survival_rates&/medicine/survival_rate@gender@race@years@rate@disease_stage:t /en/prostate_cancer:/medicine/disease $1 $2 $0 $3 $4))))))")) - questionWriter.println("(lambda $0 /common/topic (exists $1 (exists $2 (exists $3 (exists $4 (/medicine/disease/survival_rates&/medicine/survival_rate@gender@race@years@rate@disease_stage:t /en/prostate_cancer:/medicine/disease $-1 $1 $2 $3 $0 $4))))))"); - else if (line.equals("(lambda $0 /common/topic (exists $1 (exists $2 (exists $3 (/soccer/football_league_participation@team@league@from@to:t $1 $0 $2 $3)))))")) - questionWriter.println("(lambda $0 /common/topic (exists $1 (exists $2 (exists $3 (/soccer/football_league_participation@team@league@from@to:t /en/real_madrid:/soccer/football_team $1 $0 $2 $3)))))"); - else if (line.equals("(lambda $0 /common/topic (/tv/tv_program@languages:t /base/ranker/rankerurlname/firefly$002f143400:/tv/tv_program $0))")) - questionWriter.println("(lambda $0 /common/topic (/tv/tv_program@languages:t /m/014v3t:/tv/tv_program $0))"); - else if (line.equals("(lambda $0 /common/topic (exists $1 (exists $2 (/film/film/estimated_budget&/measurement_unit/dated_money_value@valid_date@amount@currency:t /en/edward_scissorhands:/film/film $1 $0 $2))))")) - questionWriter.println("(lambda $0 /common/topic (exists $1 (exists $2 (/film/film/estimated_budget&/measurement_unit/dated_money_value@valid_date@amount@currency:t /en/edward_scissorhands:/film/film $-1 $1 $0 $2))))"); - else if (line.equals("(lambda $0 /common/topic (exists $1 (exists $2 (/film/film/estimated_budget&/measurement_unit/dated_money_value@valid_date@amount@currency:t /en/transformers:/film/film $1 $0 $2))))")) - questionWriter.println("(lambda $0 /common/topic (exists $1 (exists $2 (/film/film/estimated_budget&/measurement_unit/dated_money_value@valid_date@amount@currency:t /en/transformers:/film/film $-1 $1 $0 $2))))"); - else if (line.equals("(lambda $0 /location/location (exists $1 (exists $2 (exists $3 (exists $4 (exists $5 (/library/public_library/address&/location/mailing_address@street_address@street_address_2@citytown@postal_code@state_province_region@country:t /m/02ncllz:/library/public_library $1 $2 $0 $3 $4 $5)))))))")) - questionWriter.println("(lambda $0 /location/location (exists $1 (exists $2 (exists $3 (exists $4 (exists $5 (/library/public_library/address&/location/mailing_address@street_address@street_address_2@citytown@postal_code@state_province_region@country:t /m/02ncllz:/library/public_library $-1 $1 $2 $0 $3 $4 $5)))))))"); - else if (line.matches("who won ali.*frazier ii")) - questionWriter.println("who won muhammad ali vs. joe frazier ii"); - else if (line.equals("(lambda $0 /people/person (exists $1 (exists $2 (exists $3 (/base/boxing/match_boxer_relationship@match@boxer@winner_won@points:t $1 $0 $2 $3)))))")) { - if (i++ == 0) - questionWriter.println("(lambda $0 /people/person (exists $1 (exists $2 (exists $3 (/boxing/match_boxer_relationship@match@boxer@winner_won@points:t /en/ali-frazier_ii:/boxing/boxing_match $1 $0 /type/boolean/true $2 $3)))))"); - else - questionWriter.println("(lambda $0 /people/person (exists $1 (exists $2 (exists $3 (/boxing/match_boxer_relationship@match@boxer@winner_won@points:t /m/0kvlz:/boxing/boxing_match $1 $0 $2 $3)))))"); - } else if (line.equals("(lambda $0 /people/person (exists $1 (exists $2 (exists $3 (exists $4 (exists $5 (exists $6 (/base/boxing/boxing_title_tenure@champion@weight@from@to@defenses@title@notes:t $0 $1 $2 $3 $4 $5 $6))))))))")) - questionWriter.println("(lambda $0 /people/person (exists $1 (exists $2 (exists $3 (exists $4 (exists $5 (exists $6 (/boxing/boxing_title_tenure@champion@weight@from@to@defenses@title@notes:t $0 $1 $2 $3 $4 /m/0chgh2j:/boxing/boxing_title $5 $6))))))))"); - else if (line.equals("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/business_operation/revenue&/measurement_unit/dated_money_value@valid_date@amount@currency:t /en/motorola:/business/business_operation $0 $1 $2))))")) - questionWriter.println("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/business_operation/revenue&/measurement_unit/dated_money_value@valid_date@amount@currency:t /en/motorola:/business/business_operation $-1 $0 $1 $2))))"); - else if (line.equals("(lambda $1 /common/topic (exists $2 (exists $3 (/business/business_operation/net_profit&/measurement_unit/dated_money_value@valid_date@amount@currency:t /en/procter_gamble:/business/business_operation $1 $2 $3))))")) - questionWriter.println("(lambda $0 /common/topic (exists $1 (exists $2 (/business/business_operation/net_profit&/measurement_unit/dated_money_value@valid_date@amount@currency:t /en/procter_gamble:/business/business_operation $-1 $1 $0 $2))))"); - else if (line.equals("(lambda $0 /common/topic (exists $1 (/business/business_operation/revenue&/measurement_unit/dated_money_value@valid_date@amount@currency:t /en/viacom:/business/business_operation /un/2009:/type/datetime $0 $1)))")) - questionWriter.println("(lambda $0 /common/topic (exists $1 (/business/business_operation/revenue&/measurement_unit/dated_money_value@valid_date@amount@currency:t /en/viacom:/business/business_operation /type/datetime/2009:/type/datetime $1 $0)))"); - else if (line.equals("(lambda $0 /common/topic (exists $1 (exists $2 (/business/business_operation/operating_income&/measurement_unit/dated_money_value@valid_date@amount@currency:t /en/j_c_penney:/business/business_operation $1 $0 $2))))")) - questionWriter.println("(lambda $0 /common/topic (exists $1 (exists $2 (/business/business_operation/operating_income&/measurement_unit/dated_money_value@valid_date@amount@currency:t /en/j_c_penney:/business/business_operation $-1 $1 $0 $2))))"); - else if (line.equals("(lambda $0 /type/int (exists $1 (exists $2 (/location/statistical_region/population&/measurement_unit/dated_integer@number@year@source:t /en/belgium:/location/statistical_region $0 $1 $2))))")) - questionWriter.println("(lambda $0 /type/int (exists $1 (exists $2 (/location/statistical_region/population&/measurement_unit/dated_integer@number@year@source:t /en/belgium:/location/statistical_region $-1 $0 $1 $2))))"); - else if (line.equals("(lambda $0 /type/int (exists $1 (exists $2 (/location/statistical_region/population&/measurement_unit/dated_integer@number@year@source:t /en/iowa:/location/statistical_region $0 $1 $2))))")) - questionWriter.println("(lambda $0 /type/int (exists $1 (exists $2 (/location/statistical_region/population&/measurement_unit/dated_integer@number@year@source:t /en/iowa:/location/statistical_region $-1 $0 $1 $2))))"); - else if (line.equals("(lambda $0 /common/topic (exists $1 (exists $2 (exists $3 (/location/statistical_region/major_exports&/location/imports_exports_by_industry@amount@currency@date@industry:t /en/madagascar:/location/statistical_region $1 $2 $3 $0)))))")) - questionWriter.println("(lambda $0 /common/topic (exists $1 (exists $2 (exists $3 (/location/statistical_region/major_exports&/location/imports_exports_by_industry@amount@currency@date@industry:t /en/madagascar:/location/statistical_region $-1 $1 $2 $3 $0)))))"); - else if (line.equals("(lambda $0 /type/int (exists $1 (exists $2 (/location/statistical_region/population&/measurement_unit/dated_integer@number@year@source:t /en/africa:/location/statistical_region $0 $1 $2))))")) - questionWriter.println("(lambda $0 /type/int (exists $1 (exists $2 (/location/statistical_region/population&/measurement_unit/dated_integer@number@year@source:t /en/africa:/location/statistical_region $-1 $0 $1 $2))))"); - else if (line.equals("(lambda $0 /type/int (exists $1 (exists $2 (/location/statistical_region/population&/measurement_unit/dated_integer@number@year@source:t /en/asia:/location/statistical_region $0 $1 $2))))")) - questionWriter.println("(lambda $0 /type/int (exists $1 (exists $2 (/location/statistical_region/population&/measurement_unit/dated_integer@number@year@source:t /en/asia:/location/statistical_region $-1 $0 $1 $2))))"); - else if (line.equals("(lambda $0 /type/int (exists $1 (exists $2 (/location/statistical_region/population&/measurement_unit/dated_integer@number@year@source:t /en/earth:/location/statistical_region $0 $1 $2))))")) - questionWriter.println("(lambda $0 /type/int (exists $1 (exists $2 (/location/statistical_region/population&/measurement_unit/dated_integer@number@year@source:t /en/earth:/location/statistical_region $-1 $0 $1 $2))))"); - else if (line.equals("(lambda $0 /type/int (exists $1 (exists $2 (/location/statistical_region/population&/measurement_unit/dated_integer@number@year@source:t /en/europe:/location/statistical_region $0 $1 $2))))")) - questionWriter.println("(lambda $0 /type/int (exists $1 (exists $2 (/location/statistical_region/population&/measurement_unit/dated_integer@number@year@source:t /en/europe:/location/statistical_region $-1 $0 $1 $2))))"); - else if (line.equals("(lambda $0 /type/int (exists $1 (exists $2 (/location/statistical_region/population&/measurement_unit/dated_integer@number@year@source:t /en/earth:/location/statistical_region $0 $1 $2))))")) - questionWriter.println("(lambda $0 /type/int (exists $1 (exists $2 (/location/statistical_region/population&/measurement_unit/dated_integer@number@year@source:t /en/earth:/location/statistical_region $-1 $0 $1 $2))))"); - else if (line.equals("(count $0 (exists $1 (exists $2 (exists $3 (exists $4 (exists $5 (/tv/regular_tv_appearance@actor@character@series@from@to@special_performance_type@seasons:t /en/ron_glass:/tv/tv_actor $1 $0 $2 $3 $4 $5)))))))")) - questionWriter.println("(count $0 (exists $1 (exists $2 (exists $3 (exists $4 (exists $5 (/tv/regular_tv_appearance@actor@character@series@from@to@special_performance_type@seasons:t /en/ron_glass:/tv/tv_actor $1 $2 $0 $3 $4 $5)))))))"); - else if (line.equals("(lambda $0 /common/topic (/fashion/garment@specialization_of:t /fashion/garment:/fashion/garment $0))")) - questionWriter.println("(lambda $0 /common/topic (/fashion/garment@specialization_of:t /en/knickerbockers:/fashion/garment $0))"); - else if (line.equals("(lambda $0 /common/topic (exists $1 (exists $2 (exists $3 (exists $4 (/award/award_honor@notes_description@year@award@award_winner@honored_for@ceremony:t $1 $2 /en/golden_globe_award_for_best_motion_picture_-_drama:/award/award_category $3 $0 $4))))))")) - questionWriter.println("(lambda $0 /common/topic (exists $1 (exists $2 (exists $3 (exists $4 (/award/award_honor@notes_description@year@award@award_winner@honored_for@ceremony:t $1 $2 /en/golden_globe_award_for_best_motion_picture_-_drama:/award/award_category $3 $4 $0))))))"); - else if (line.equals("(lambda $0 /common/topic (exists $1 (exists $2 (/celebrities/sexual_orientation_phase@celebrity@start@end@sexual_orientation:t /en/britney_spears:/celebrities/celebrity $1 $2 $0))))")) - questionWriter.println("(lambda $0 /common/topic (exists $1 (exists $2 (/celebrities/sexual_orientation_phase@celebrity@start@end@sexual_orientation:t /en/britney_spears:/celebrities/celebrity $1 $2 $3 $0))))"); - else if (line.equals("(lambda $0 /type/datetime (exists $1 (exists $2 (/projects/project_participation@project@participant@role@from_date@to_date:t /m/0gk9x46:/projects/project /en/francesco_sabatini:/projects/project_participant $1 $0 $2))))")) - questionWriter.println("(lambda $0 /type/datetime (exists $1 (exists $2 (/projects/project_participation@project@participant@role@from_date@to_date:t /m/0gk9x46:/projects/project $1 /en/francesco_sabatini:/projects/project_participant $2 $3 $0))))"); - else if (line.equals("(lambda $0 /people/person (exists $1 (exists $2 (exists $3 (exists $4 (exists $5 (exists $6 (/government/government_position_held@office_holder@office_position_or_title@governmental_body@district_represented@appointed_by@from@to@jurisdiction_of_office@legislative_sessions:t $0 /en/united_states_senator:/government/government_office_or_title $1 $2 $3 $4 $5 /en/colorado:/government/governmental_jurisdiction $6))))))))")) - questionWriter.println("(lambda $0 /people/person (exists $1 (exists $2 (exists $3 (exists $4 (exists $5 (exists $6 (/government/government_position_held@office_holder@office_position_or_title@governmental_body@district_represented@appointed_by@from@to@jurisdiction_of_office@legislative_sessions:t $0 /en/united_states_senator:/government/government_office_or_title $1 $2 /en/colorado:/government/governmental_jurisdiction $3 $4 $5 $6 $7 $8))))))))"); - else if (line.equals("(lambda $0 /type/datetime (exists $1 (/ice_hockey/hockey_previous_roster_position@team@player@from@to:t /en/montreal_canadiens:/ice_hockey/hockey_team /en/christopher_higgins:/ice_hockey/hockey_player $0 $1)))")) - questionWriter.println("(lambda $0 /type/datetime (exists $1 (/ice_hockey/hockey_previous_roster_position@team@player@from@to:t /en/montreal_canadiens:/ice_hockey/hockey_team /en/christopher_higgins:/ice_hockey/hockey_player $1 $2 $0)))"); - else if (line.equals("(lambda $0 /location/location (/organization/organization@headquarters:t /en/apple_inc:/organization/organization $0))")) - questionWriter.println("(lambda $0 /location/location (exists $1 (exists $2 (exists $3 (exists $4 (/organization/organization/headquarters&/location/mailing_address@citytown@race@years@rate@disease_stage:t /en/apple_inc:/organization/organization $-1 $0))))))"); - else if (line.equals("(lambda $0 /type/datetime (exists $1 (/celebrities/substance_abuse_problem@substance@start@end@celebrity:t /en/cocaine:/celebrities/abused_substance $0 $1 /en/robin_williams:/celebrities/celebrity)))")) - questionWriter.println("(lambda $0 /type/datetime (exists $1 (/celebrities/substance_abuse_problem@substance@start@end@celebrity:t /en/cocaine:/celebrities/abused_substance $1 $2 $0 /en/robin_williams:/celebrities/celebrity)))"); - else if (line.equals("(lambda $0 /common/topic (exists $1 (exists $2 (/travel/transportation@travel_destination@mode_of_transportation@transport_operator@transport_terminus:t /en/paris:/travel/travel_destination $1 $0 $2))))")) - questionWriter.println("(lambda $0 /common/topic (exists $1 (exists $2 (/travel/transportation@travel_destination@mode_of_transportation@transport_operator@transport_terminus:t /en/paris:/travel/travel_destination $1 $2 $0))))"); - else if (line.equals("(lambda $0 /type/datetime (exists $1 (/martial_arts/martial_arts_certification@person@qualification@certifying_body@date@art:t /en/cathy_landers:/martial_arts/martial_artist /en/fifth_degree:/martial_arts/martial_arts_qualification $1 $0 /en/seishindo_kenpo:/martial_arts/martial_art)))")) - questionWriter.println("(lambda $0 /type/datetime (exists $1 (/martial_arts/martial_arts_certification@person@qualification@certifying_body@date@art:t /en/cathy_landers:/martial_arts/martial_artist /en/fifth_degree:/martial_arts/martial_arts_qualification $1 $2 $0 /en/seishindo_kenpo:/martial_arts/martial_art)))"); - else if (line.equals("(lambda $0 /type/int (exists $1 (exists $2 (exists $3 (/american_football/football_historical_roster_position@player@team@from@to@number@position_s:t /en/david_akers:/american_football/football_player /en/philadelphia_eagles:/american_football/football_team $1 $2 $0 $3)))))")) - questionWriter.println("(lambda $0 /type/int (exists $1 (exists $2 (exists $3 (/american_football/football_historical_roster_position@player@team@from@to@number@position_s:t /en/david_akers:/american_football/football_player /en/philadelphia_eagles:/american_football/football_team $1 $2 $3 $0 $4)))))"); - else if (line.equals("(lambda $0 /common/topic (exists $1 (exists $2 (exists $3 (exists $4 (/award/award_honor@notes_description@year@award@award_winner@honored_for@ceremony:t $1 $2 /m/04d215m:/award/award_category $3 $0 $4))))))")) - questionWriter.println("(lambda $0 /common/topic (exists $1 (exists $2 (exists $3 (exists $4 (/award/award_honor@notes_description@year@award@award_winner@honored_for@ceremony:t $1 $2 /m/04d215m:/award/award_category $3 $4 $0))))))"); - else if (line.equals("(count $0 (/architecture/architect@structures_designed:t /en/frank_lloyd_wright:/architecture/architect $0))")) - questionWriter.println("(lambda $0 /common/topic (exists $1 (/architecture/architect@structures_designed:t /en/frank_lloyd_wright:/architecture/architect $0)))"); - else if (line.equals("(lambda $0 /common/topic (/conferences/conference_subject@series_of_conferences_about_this:t /en/mathematics:/conferences/conference_subject $0))")) - questionWriter.println("(count $0 (/conferences/conference_subject@series_of_conferences_about_this:t /en/mathematics:/conferences/conference_subject $0))"); - else if (line.equals("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/ritz_cracker:/business/brand $1 $0 $2))))")) - questionWriter.println("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/ritz_cracker:/business/brand $1 $2 $0))))"); - else if (line.equals("(lambda $0 /type/int (exists $1 (exists $2 (exists $3 (/american_football/player_game_statistics@player@season@team@games@starts@as_of_week:t /en/donovan_mcnabb:/american_football/football_player /en/2008_nfl_season:/sports/sports_league_season $1 $0 $2 $3)))))")) - questionWriter.println("(lambda $0 /type/int (exists $1 (exists $2 (exists $3 (/american_football/player_game_statistics@player@season@team@games@starts@as_of_week:t /en/donovan_mcnabb:/american_football/football_player /en/2008_nfl_season:/sports/sports_league_season $1 $2 $0 $3)))))"); - else if (line.equals("(count $0 (exists $1 (exists $2 (/royalty/chivalric_order_position_tenure@order@chivalric_office@from@until@officer:t /en/order_of_the_most_holy_annunciation:/royalty/order_of_chivalry /en/grand_master:/royalty/chivalric_office $1 $2 $0))))")) - questionWriter.println("(count $0 (exists $1 (exists $2 (/royalty/chivalric_order_position_tenure@order@chivalric_office@from@until@officer:t /en/order_of_the_most_holy_annunciation:/royalty/order_of_chivalry /en/grand_master:/royalty/chivalric_office $1 $2 $3 $0))))"); - else if (line.equals("(count $0 (exists $1 (exists $2 (exists $3 (/film/performance@actor@film@special_performance_type@character@character_note:t $1 /en/charlies_angels:/film/film $2 $0 $3)))))")) - questionWriter.println("(count $0 (exists $1 (exists $2 (exists $3 (/film/performance@actor@film@special_performance_type@character@character_note:t $1 /en/charlies_angels:/film/film $2 $3 $0 $4)))))"); - else if (line.equals("(lambda $0 /location/location (/library/public_library@address:t /en/mitchell_public_library:/library/public_library $0))")) - questionWriter.println("(lambda $0 /location/location (/library/public_library/address&/location/mailing_address@citytown@street_address:t /m/0j9by57:/library/public_library $-1 $1 $0))"); - else if (line.equals("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/chips_ahoy:/business/brand $1 $0 $2))))")) - questionWriter.println("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/chips_ahoy:/business/brand $1 $2 $0 $3))))"); - else if (line.equals("(count $0 (exists $1 (exists $2 (/media_common/dedication@dedicated_by@dedicated_to@work_dedicated@notes:t /en/joseph_haydn:/media_common/dedicator $1 $0 $2))))")) - questionWriter.println("(count $0 (exists $1 (exists $2 (/media_common/dedication@dedicated_by@dedicated_to@work_dedicated@notes:t /en/wolfgang_amadeus_mozart:/media_common/dedicator $1 /en/joseph_haydn:/media_common/dedicator $2 $0 $3))))"); - else if (line.equals("(lambda $0 /common/topic (exists $1 (exists $2 (/government/political_party_tenure@politician@from@to@party:t /en/grover_cleveland:/government/politician $1 $2 $0))))")) - questionWriter.println("(lambda $0 /common/topic (exists $1 (exists $2 (/government/political_party_tenure@politician@from@to@party:t /en/grover_cleveland:/government/politician $1 $2 $3 $0))))"); - else if (line.equals("(lambda $0 /location/location (/organization/organization@headquarters:t /en/h_r_block:/organization/organization $0))")) - questionWriter.println("(lambda $0 /location/location (/organization/organization/headquarters&/location/mailing_address@citytown@street_address:t /en/h_r_block:/organization/organization $-1 $0))"); - else if (line.equals("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/country_time:/business/brand $1 $0 $2))))")) - questionWriter.println("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/country_time:/business/brand $1 $2 $0 $3))))"); - else if (line.equals("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/girl_scouts_of_the_usa:/business/brand $1 $0 $2))))")) - questionWriter.println("(lambda $0 /type/datetime (/organization/organization@date_founded:t /en/girl_scouts_of_the_usa:/organization/organization $0))"); - else if (line.equals("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/tostitos:/business/brand $1 $0 $2))))")) - questionWriter.println("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/tostitos:/business/brand $1 $2 $0 $3))))"); - else if (line.equals("(lambda $0 /common/topic (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/girl_scouts_of_the_usa:/business/brand $0 $1 $2))))")) - questionWriter.println("(lambda $0 /common/topic (/organization/organization@founders:t /en/girl_scouts_of_the_usa:/business/brand $0))"); - else if (line.equals("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /m/02r3cjp:/business/brand $1 $0 $2))))")) - questionWriter.println("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /m/02r3cjp:/business/brand $1 $2 $0 $3))))"); - else if (line.equals("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/stove_top_stuffing:/business/brand $1 $0 $2))))")) - questionWriter.println("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/stove_top_stuffing:/business/brand $1 $2 $0 $3))))"); - else if (line.equals("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/oreo:/business/brand $1 $0 $2))))")) - questionWriter.println("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/oreo:/business/brand $1 $2 $0 $3))))"); - else if (line.equals("(count $0 (exists $1 (exists $2 (exists $3 (/cvg/musical_game_song_relationship@download@game@platforms@song@release_date:t $1 /en/guitar_hero_aerosmith:/cvg/musical_game $2 $0 $3)))))")) - questionWriter.println("(count $0 (exists $1 (exists $2 (exists $3 (/cvg/musical_game_song_relationship@download@game@platforms@song@release_date:t $1 /en/guitar_hero_aerosmith:/cvg/musical_game $2 $3 $0 $4)))))"); - else if (line.equals("(lambda $0 /common/topic (exists $1 (/olympics/olympic_athlete_affiliation@athlete@country@olympics@sport:t /m/04dnjr9:/olympics/olympic_athlete $1 /en/1992_summer_olympics:/olympics/olympic_games $0)))")) - questionWriter.println("(lambda $0 /common/topic (exists $1 (/olympics/olympic_athlete_affiliation@athlete@country@olympics@sport:t /m/04dnjr9:/olympics/olympic_athlete $1 $2 /en/1992_summer_olympics:/olympics/olympic_games $3 $0)))"); - else if (line.equals("(count $0 (exists $1 (exists $2 (exists $3 (exists $4 (exists $5 (/tv/regular_tv_appearance@actor@character@series@from@to@special_performance_type@seasons:t /en/jerry_seinfeld:/tv/tv_actor $1 $0 $2 $3 $4 $5)))))))")) - questionWriter.println("(count $0 (exists $1 (exists $2 (exists $3 (exists $4 (exists $5 (/tv/regular_tv_appearance@actor@character@series@from@to@special_performance_type@seasons:t /en/jerry_seinfeld:/tv/tv_actor $1 $2 $0 $3 $4 $5 $6)))))))"); - else if (line.equals("(lambda $0 /common/topic (exists $1 (exists $2 (/basketball/basketball_roster_position@number@player@position@team:t $1 /en/richard_hamilton:/basketball/basketball_player $2 $0))))")) - questionWriter.println("(lambda $0 /common/topic (exists $1 (exists $2 (/basketball/basketball_roster_position@number@player@position@team:t $1 /en/richard_hamilton:/basketball/basketball_player $2 $3 $0))))"); - else if (line.equals("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/crystal_light:/business/brand $1 $0 $2))))")) - questionWriter.println("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/crystal_light:/business/brand $1 $2 $0 $3))))"); - else if (line.equals("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/kool-aid:/business/brand $1 $0 $2))))")) - questionWriter.println("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/kool-aid:/business/brand $1 $2 $0 $3))))"); - else if (line.equals("(lambda $0 /common/topic (exists $1 (exists $2 (exists $3 (/award/award_honor@notes_description@year@award@award_winner@honored_for@ceremony:t $1 /type/datetime/1981:/type/datetime $2 /en/danny_devito:/award/award_winner $0 $3)))))")) - questionWriter.println("(lambda $0 /common/topic (exists $1 (exists $2 (exists $3 (/award/award_honor@notes_description@year@award@award_winner@honored_for@ceremony:t $1 /type/datetime/1981:/type/datetime $2 $3 /en/danny_devito:/award/award_winner $4 $0 $5)))))"); - else if (line.equals("(lambda $0 /people/person (exists $1 (exists $2 (exists $3 (/american_football/football_historical_roster_position@player@team@from@to@number@position_s:t $0 /en/green_bay_packers:/american_football/football_team $1 $2 $3 /en/quarterback:/american_football/football_position)))))")) - questionWriter.println("(lambda $0 /people/person (exists $1 (exists $2 (exists $3 (/american_football/football_historical_roster_position@player@team@from@to@number@position_s:t $0 /en/green_bay_packers:/american_football/football_team $1 $2 $3 $4 /en/quarterback:/american_football/football_position)))))"); - else if (line.equals("(count $0 (exists $1 (exists $2 (exists $3 (exists $4 (/event/speech_or_presentation@event@speech_topic@speaker_s@type_or_format_of_presentation@presented_work@date:t $1 /en/world_war_ii:/event/speech_topic $2 $3 $0 $4))))))")) - questionWriter.println("(count $0 (exists $1 (exists $2 (exists $3 (exists $4 (/event/speech_or_presentation@event@speech_topic@speaker_s@type_or_format_of_presentation@presented_work@date:t $1 /en/world_war_ii:/event/speech_topic $2 $3 $4 $0 $5))))))"); - else if (line.equals("(count $0 (exists $1 (exists $2 (/celebrities/substance_abuse_problem@substance@start@end@celebrity:t /en/cocaine:/celebrities/abused_substance $1 $2 $0))))")) - questionWriter.println("(count $0 (exists $1 (exists $2 (/celebrities/substance_abuse_problem@substance@start@end@celebrity:t /en/cocaine:/celebrities/abused_substance $1 $2 $3 $0))))"); - else if (line.equals("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/doritos:/business/brand $1 $0 $2))))")) - questionWriter.println("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/doritos:/business/brand $1 $2 $0 $3))))"); - else if (line.equals("(count $0 (exists $1 (exists $2 (exists $3 (exists $4 (/event/speech_or_presentation@event@speech_topic@speaker_s@type_or_format_of_presentation@presented_work@date:t $1 $2 /en/winston_churchill:/event/public_speaker $3 $0 $4))))))")) - questionWriter.println("(count $0 (exists $1 (exists $2 (exists $3 (exists $4 (/event/speech_or_presentation@event@speech_topic@speaker_s@type_or_format_of_presentation@presented_work@date:t $1 $2 /en/winston_churchill:/event/public_speaker $3 $4 $0 $5))))))"); - else if (line.equals("(lambda $0 /common/topic (/transportation/bridge@bridge_type:t /en/suspension_bridge:/transportation/bridge $0))")) - questionWriter.println("(lambda $0 /common/topic (/transportation/bridge@bridge_type:t /en/manhattan_bridge:/transportation/bridge $0))"); - else if (line.equals("(lambda $0 /common/topic (exists $1 (exists $2 (/martial_arts/martial_arts_certification@person@qualification@certifying_body@date@art:t /en/christopher_adams:/martial_arts/martial_artist /en/black_belt:/martial_arts/martial_arts_qualification $1 $2 $0))))")) - questionWriter.println("(lambda $0 /common/topic (exists $1 (exists $2 (/martial_arts/martial_arts_certification@person@qualification@certifying_body@date@art:t /en/christopher_adams:/martial_arts/martial_artist $1 /en/black_belt:/martial_arts/martial_arts_qualification $2 $3 $4 $0))))"); - else if (line.equals("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/capri_sun:/business/brand $1 $0 $2))))")) - questionWriter.println("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/capri_sun:/business/brand $1 $2 $0 $3))))"); - else if (line.equals("(lambda $0 /people/person (exists $1 (exists $2 (exists $3 (exists $4 (/organization/leadership@person@title@as_of_date@organization@role@from@to:t $0 /en/chief_executive_officer:/type/text $1 /en/save-a-lot:/organization/organization $2 $3 $4))))))")) - questionWriter.println("(lambda $0 /people/person (exists $1 (exists $2 (exists $3 (exists $4 (/organization/leadership@person@title@as_of_date@organization@role@from@to:t $0 $1 $2 /en/save-a-lot:/organization/organization $3 /en/chief_executive_officer:/type/text $4 $5))))))"); - else if (line.equals("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/barbie:/business/brand $1 $0 $2))))")) - questionWriter.println("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/barbie:/business/brand $1 $2 $0 $3))))"); - else if (line.equals("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/capn_crunch:/business/brand $1 $0 $2))))")) - questionWriter.println("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/capn_crunch:/business/brand $1 $2 $0 $3))))"); - else if (line.equals("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/gatorade:/business/brand $1 $0 $2))))")) - questionWriter.println("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/gatorade:/business/brand $1 $2 $0 $3))))"); - else if (line.equals("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/mountain_dew:/business/brand $1 $0 $2))))")) - questionWriter.println("(lambda $0 /type/datetime (exists $1 (exists $2 (/business/company_brand_relationship@brand@company@from_date@to_date:t /en/mountain_dew:/business/brand $1 $2 $0 $3))))"); - // entity issues - else if (line.equals("(lambda $0 /common/topic (/music/track@producer:t /m/0l16j8:/music/track $0))")) - questionWriter.println("(lambda $0 /common/topic (/music/recording@producer:t /m/0l16j8:/music/recording $0))"); - else if (line.equals("(lambda $0 /people/person (/book/written_work@author:t /m/03crp32:/book/written_work $0))")) - questionWriter.println("(lambda $0 /people/person (/book/written_work@author:t /m/067y_k7:/book/written_work $0))"); - else if (line.equals("(lambda $0 /common/topic (/book/literary_genre@books_in_this_genre:t $0 /en/the_hound_of_the_baskervilles:/book/book))")) - questionWriter.println("(lambda $0 /common/topic (/media_common/literary_genre@books_in_this_genre:t $0 /en/the_hound_of_the_baskervilles:/book/book))"); - else if (line.equals("(count $0 (/book/literary_genre@books_in_this_genre:t /en/computer_programming:/book/literary_genre $0))")) - questionWriter.println("(count $0 (/media_common/literary_genre@books_in_this_genre:t /en/computer_programming:/book/literary_genre $0))"); - else if (line.equals("(lambda $0 /type/int (/freebase/type_profile@instance_count:t /film/film_actor:/freebase/type_profile $0))")) - questionWriter.println("(lambda $0 /type/int (/freebase/type_profile@instance_count:t /film/actor:/freebase/type_profile $0))"); - else if (line.equals("(lambda $0 /people/person (exists $1 (exists $2 (/olympics/olympic_medal_honor@country@event@medal@medalist@olympics:t $1 /view/en/tennis_at_the_1896_summer_olympics_mens_singles:/olympics/olympic_event_competition /en/gold_medal:/olympics/olympic_medal $0 $2))))")) - questionWriter.println("(lambda $0 /people/person (exists $1 (exists $2 (/olympics/olympic_medal_honor@country@event@medal@medalist@olympics:t $1 /en/tennis_at_the_1896_summer_olympics_mens_singles:/olympics/olympic_event_competition /en/gold_medal:/olympics/olympic_medal $0 $2))))"); - else if (line.equals("(lambda $0 /type/datetime (/amusement_parks/ride@opened:t /m/03mfjrv:/amusement_parks/ride $0))")) - questionWriter.println("(lambda $0 /type/datetime (/amusement_parks/ride@opened:t /m/0flmt0:/amusement_parks/ride $0))"); - else if (line.equals("(lambda $0 /common/topic (/base/dinosaur/dinosaur@diet:t /en/ceratopsia/-/base/dinosaur:/base/dinosaur/dinosaur $0))")) - questionWriter.println("(lambda $0 /common/topic (/base/dinosaur/dinosaur@diet:t /en/ceratopsia:/base/dinosaur/dinosaur $0))"); - else if (line.equals("(lambda $0 /location/location (/base/dinosaur/dinosaur_location@dinosaur_s:t $0 /en/barosaurus/-/base/dinosaur:/base/dinosaur/dinosaur))")) - questionWriter.println("(lambda $0 /location/location (/base/dinosaur/dinosaur_location@dinosaur_s:t $0 /en/barosaurus:/base/dinosaur/dinosaur))"); - else if (line.equals("(lambda $0 /common/topic (/chemistry/chemical_element@symbol:t /authority/us/gov/hhs/fda/srs-unii/fxs1by2pgl:/chemistry/chemical_element $0))")) - questionWriter.println("(lambda $0 /common/topic (/chemistry/chemical_element@symbol:t /m/025sw5g:/chemistry/chemical_element $0))"); - else if (line.equals("(count $0 (/amusement_parks/park@annual_visits:t /en/magic_kingdom:/amusement_parks/park $0))")) - questionWriter.println("(lambda $0 /type/int (/amusement_parks/park@annual_visits:t /en/magic_kingdom:/amusement_parks/park $0))"); - else if (line.equals("(lambda $0 /common/topic (/organization/organization@slogan:t /business/cik/0001011006:/organization/organization $0))")) - questionWriter.println("(lambda $0 /common/topic (/organization/organization@slogan:t /m/019rl6:/organization/organization $0))"); - else if (line.equals("(lambda $0 /location/location (/location/location@containedby:t /base/usnris/item/86000083:/location/location $0))")) - questionWriter.println("(lambda $0 /location/location (/location/location@containedby:t /m/019zhn:/location/location $0))"); - else if (line.equals("(lambda $0 /common/topic (/opera/opera@language:t /base/imslp/65847:/opera/opera $0))")) - questionWriter.println("(lambda $0 /common/topic (/opera/opera@language:t /m/09hvx:/opera/opera $0))"); - else if (line.equals("(lambda $0 /people/person (/film/film@costume_design_by:t /source/allocine/fr/film/132663:/film/film $0))")) - questionWriter.println("(lambda $0 /people/person (/film/film@costume_design_by:t /m/04jpg2p:/film/film $0))"); - else if (line.equals("(lambda $0 /people/person (/architecture/architectural_style@architects:t /en/bauhaus:/architecture/architectural_style $0))")) - questionWriter.println("(lambda $0 /people/person (/architecture/architectural_style@architects:t /en/international_style:/architecture/architectural_style $0))"); - else if (line.equals("(lambda $0 /common/topic (/astronomy/star@temperature_k:t /en/polaris:/astronomy/star $0))")) - questionWriter.println("(lambda $0 /common/topic (/astronomy/star@temperature_k:t /m/0kjyrc7:/astronomy/star $0))"); - else if (line.equals("(lambda $0 /common/topic (/book/literary_series@fictional_universe:t /en/the_lord_of_the_rings:/book/literary_series $0))")) - questionWriter.println("(lambda $0 /common/topic (/fictional_universe/work_of_fiction@setting:t /en/the_lord_of_the_rings:/book/literary_series $0))"); - else if (line.equals("(lambda $0 /common/topic (/chemistry/chemical_element@melting_point:t /quotationsbook/subject/gold:/chemistry/chemical_element $0))")) - questionWriter.println("(lambda $0 /common/topic (/chemistry/chemical_element@melting_point:t /m/025rs2z:/chemistry/chemical_element $0))"); - else if (line.equals("(lambda $0 /type/datetime (exists $1 (exists $2 (exists $3 (/award/award_honor@notes_description@year@award@award_winner@honored_for@ceremony:t $1 $0 /en/guardian_first_book_award:/award/award_category $2 /en/everything_is_illuminated:/award/award_winning_work $3)))))")) - questionWriter.println("(lambda $0 /type/datetime (exists $1 (exists $2 (exists $3 (/award/award_honor@notes_description@year@award@award_winner@honored_for@ceremony:t $1 $0 /en/guardian_first_book_award:/award/award_category $2 $3 /en/everything_is_illuminated:/award/award_winning_work $4)))))"); - else if (line.equals("(lambda $0 /type/datetime (exists $1 (exists $2 (exists $3 (/award/award_honor@notes_description@year@award@award_winner@honored_for@ceremony:t $1 $0 /en/hugo_award_for_best_novel:/award/award_category $2 /en/harry_potter_and_the_goblet_of_fire:/award/award_winning_work $3)))))")) - questionWriter.println("(lambda $0 /type/datetime (exists $1 (exists $2 (exists $3 (/award/award_honor@notes_description@year@award@award_winner@honored_for@ceremony:t $1 $0 /en/hugo_award_for_best_novel:/award/award_category $2 $3 /en/harry_potter_and_the_goblet_of_fire:/award/award_winning_work $3)))))"); - else if (line.equals("(lambda $0 /common/topic (exists $1 (/american_football/football_roster_position@team@player@position@number:t /en/baltimore_ravens:/american_football/football_team /en/ray_lewis:/american_football/football_player $0 $1)))")) - questionWriter.println("(lambda $0 /common/topic (exists $1 (/american_football/football_historical_roster_position@team@player@position_s@number:t /en/baltimore_ravens:/american_football/football_team /en/ray_lewis:/american_football/football_player $0 $1)))"); - else if (line.equals("(lambda $0 /common/topic (exists $1 (exists $2 (exists $3 (/award/award_nomination@award@year@award_nominee@nominated_for@notes_description:t /en/peoples_choice_award_for_favorite_comedy_movie:/award/award_category $1 $2 $0 $3)))))")) - questionWriter.println("(lambda $0 /common/topic (exists $1 (exists $2 (exists $3 (/award/award_nomination@award@year@award_nominee@nominated_for@notes_description:t /en/peoples_choice_award_for_favorite_comedy_movie:/award/award_category $1 $2 $3 $0 $4)))))"); - else if (line.equals("(lambda $0 /type/int (exists $1 (/award/ranking@rank@year@note@list@item:t $0 /type/datetime/2010:/type/datetime $1 /en/fortune_500:/award/ranked_list /en/target_corporation:/award/ranked_item)))")) - questionWriter.println("(lambda $0 /type/int (exists $1 (/award/ranking@rank@year@note@list@item:t $0 /type/datetime/2010:/type/datetime $1 $2 /en/fortune_500:/award/ranked_list /en/target_corporation:/award/ranked_item)))"); - else if (line.equals("(lambda $0 /type/int (exists $1 (/baseball/baseball_roster_position@position@team@player@number:t $1 /en/boston_red_sox:/baseball/baseball_team /en/kevin_youkilis:/baseball/baseball_player $0)))")) - questionWriter.println("(lambda $0 /type/int (exists $1 (/sports/sports_team_roster@position@team@player@number:t $1 /en/boston_red_sox:/baseball/baseball_team /en/kevin_youkilis:/baseball/baseball_player $0)))"); - else if (line.equals("(lambda $0 /common/topic (exists $1 (exists $2 (/basketball/basketball_roster_position@number@player@position@team:t $1 /en/keyon_dooling:/basketball/basketball_player $0 $2))))")) - questionWriter.println("(lambda $0 /common/topic (/basketball/basketball_player@position_s:t /en/keyon_dooling:/basketball/basketball_player $0))"); - else if (line.equals("(lambda $0 /common/topic (exists $1 (exists $2 (/business/sponsorship@sponsored_by@from@to@sponsored_recipient:t $0 $1 $2 /en/gatorade:/business/sponsored_recipient))))")) - questionWriter.println("(lambda $0 /common/topic (exists $1 (exists $2 (/business/sponsorship@sponsored_by@from@to@sponsored_recipient:t /en/gatorade:/business/sponsored_recipient $1 $2 $3 $0))))"); - else if (line.equals("(lambda $0 /common/topic (/freebase/domain_profile@category:t /film:/freebase/domain_profile $0))")) - questionWriter.println("(lambda $0 /common/topic (/freebase/domain_profile@category:t /m/010s:/freebase/domain_profile $0))"); - else if (line.equals("(count $0 (exists $1 (exists $2 (/soccer/football_league_participation@team@league@from@to:t $0 /en/uefa:/soccer/football_league $1 $2))))")) - questionWriter.println("(count $0 (exists $1 (exists $2 (/sports/sports_league_participation@team@league@from@to:t $0 /en/uefa:/soccer/football_league $1 $2))))"); - else if (line.equals("(lambda $0 /common/topic (exists $1 (exists $2 (exists $3 (/tv/tv_regular_personal_appearance@to@from@appearance_type@person@program@seasons:t $1 $2 /en/newscaster:/tv/non_character_role $0 /en/abc_news:/tv/tv_program $3)))))")) - questionWriter.println("(lambda $0 /common/topic (exists $1 (exists $2 (exists $3 (/business/employment_tenure@person@company@title:t $0 /en/abc_news:/tv/tv_program $1 /en/news_presenter:/tv/non_character_role $2)))))"); - else if (line.equals("(lambda $0 /people/person (exists $1 (exists $2 (exists $3 (exists $4 (/organization/leadership@person@title@as_of_date@organization@role@from@to:t $0 /en/chief_executive_officer:/type/text $1 /en/apple_inc:/organization/organization $2 $3 $4))))))")) - questionWriter.println("(lambda $0 /people/person (exists $1 (exists $2 (exists $3 (exists $4 (/organization/leadership@person@title@as_of_date@organization@role@from@to:t $0 $1 $2 /en/apple_inc:/organization/organization $3 /en/chief_executive_officer:/type/text $4 $5))))))"); - else if (line.contains("/en/the_nutty_professor")) { - String replaceLine = line.replace("/en/the_nutty_professor", "/en/the_nutty_professor_1996"); - questionWriter.println(replaceLine); - } else if (line.contains("/base/boxing")) { - String replaceLine = line.replace("/base/boxing", "/boxing"); - questionWriter.println(replaceLine); - } else questionWriter.println(line); - } - questionWriter.close(); - - PrintWriter npWriter = IOUtils.getPrintWriter(outNpFile); - for (String line : IOUtils.readLines(inNpFile)) { - if (line.equals("firefly :- NP : /base/ranker/rankerurlname/firefly$002F143400:/tv/tv_program")) - npWriter.println("firefly :- NP : /m/014v3t:/tv/tv_program"); - else if (line.equals("manhattan bridge :- NP : /en/suspension_bridge:/transportation/bridge")) - npWriter.println("manhattan bridge :- NP : /en/manhattan_bridge:/transportation/bridge"); - else if (line.equals("the beastie boys :- NP : /m/0116j8:/music/track")) - npWriter.println("the beastie boys :- NP : /m/0116j8:/music/recording"); - else if (line.equals("sabotage :- NP : /m/0l16j8:/music/track")) - npWriter.println("sabotage :- NP : /m/0l16j8:/music/recording"); - else if (line.equals("travels with my cello :- NP : /m/03crp32:/book/written_work")) - npWriter.println("travels with my cello :- NP : /m/067y_k7:/book/written_work"); - else if (line.equals("film actor :- NP : /film/film_actor:/freebase/type_profile")) - npWriter.println("film actor :- NP : /film/actor:/freebase/type_profile"); - else if (line.equals("invertigo :- NP : /m/03mfjrv:/amusement_parks/ride")) - npWriter.println("invertigo :- NP : /m/0flmt0:/amusement_parks/ride"); - else if (line.equals("ceratopsia :- NP : /en/ceratopsia/-/base/dinosaur:/base/dinosaur/dinosaur")) - npWriter.println("ceratopsia :- NP : /en/ceratopsia:/base/dinosaur/dinosaur"); - else if (line.equals("barosaurus :- NP : /en/barosaurus/-/base/dinosaur:/base/dinosaur/dinosaur")) - npWriter.println("barosaurus :- NP : /en/barosaurus:/base/dinosaur/dinosaur"); - else if (line.equals("mercury :- NP : /authority/us/gov/hhs/fda/srs-unii/FXS1BY2PGL:/chemistry/chemical_element")) - npWriter.println("mercury :- NP : /m/025sw5g:/chemistry/chemical_element"); - else if (line.equals("knickerbockers :- NP : /fashion/garment:/fashion/garment")) - npWriter.println("knickerbockers :- NP : /en/knickerbockers:/fashion/garment"); - else if (line.equals("yahoo! :- NP : /business/cik/0001011006:/organization/organization")) - npWriter.println("yahoo! :- NP : /m/019rl6:/organization/organization"); - else if (line.equals("uss alabama :- NP : /base/usnris/item/86000083:/location/location")) - npWriter.println("uss alabama :- NP : /m/019zhn:/location/location"); - else if (line.equals("lohengrin :- NP : /base/imslp/65847:/opera/opera")) - npWriter.println("lohengrin :- NP : /m/09hvx:/opera/opera"); - else if (line.equals("alice in wonderland :- NP : /source/allocine/fr/film/132663:/film/film")) - npWriter.println("alice in wonderland :- NP : /m/04jpg2p:/film/film"); - else if (line.equals("bauhaus :- NP : /en/bauhaus:/architecture/architectural_style")) - npWriter.println("bauhaus :- NP : /en/international_style:/architecture/architectural_style"); - else if (line.equals("polaris :- NP : /en/polaris:/astronomy/star")) - npWriter.println("polaris :- NP : /m/0kjyrc7:/astronomy/star"); - else if (line.equals("mitchell public library :- NP : /en/mitchell_public_library:/library/public_library")) - npWriter.println("mitchell public library :- NP : /m/0j9by57:/library/public_library"); - else if (line.equals("gold :- NP : /quotationsbook/subject/gold:/chemistry/chemical_element")) - npWriter.println("gold :- NP : /m/025rs2z:/chemistry/chemical_element"); - else if (line.equals("nutty professor :- NP : /en/the_nutty_professor:/film/film")) - npWriter.println("nutty professor :- NP : /en/the_nutty_professor_1996:/film/film"); - else if (line.equals("film domain :- NP : /film:/freebase/domain_profile")) - npWriter.println("film domain :- NP : /m/010s:/freebase/domain_profile"); - else if (line.equals("newscaster :- NP : /en/newscaster:/tv/non_character_role")) - npWriter.println("newscaster :- NP : /en/news_presenter:/tv/non_character_role"); - else if (line.matches("ali.*frazier ii :- NP : /en/ali-frazier_ii:/base/boxing/boxing_match")) { - } else - npWriter.println(line); - } - npWriter.println("the battle of the champions :- NP : /m/0kvlz:/boxing/boxing_match"); - npWriter.println("wba world champion :- NP : /m/0chgh2j:/boxing/boxing_title"); - npWriter.println("muhammad ali vs. joe frazier ii :- NP : /en/ali-frazier_ii:/boxing/boxing_match"); - npWriter.close(); - } - - // TODO - handle all entities that do not start with fb:m. or fb:en. - private void convertExampleFile(String inFile, String outPrefix) throws IOException { - - PrintWriter formulaWriter = IOUtils.getPrintWriter(outPrefix + ".formulas"); - BufferedReader reader = IOUtils.getBufferedFileReader(inFile); - List examples = new ArrayList(); - String line = reader.readLine(); - while (line != null) { - Example example = new Example.Builder() - .setUtterance(line) - .setTargetFormula(processFree917LogicalForm(reader.readLine())) - .createExample(); - line = reader.readLine(); - line = reader.readLine(); - examples.add(example.toJson()); - formulaWriter.println(example.targetFormula); - } - LogInfo.log("Arg count distribution: " + argnumCounter); - reader.close(); - formulaWriter.close(); - - - int split = (int) (0.7 * examples.size()); - int[] perm = SampleUtils.samplePermutation(new Random(1), examples.size()); - List train = new ArrayList(); - List test = new ArrayList(); - for (int i = 0; i < split; i++) - train.add(examples.get(perm[i])); - for (int i = split; i < examples.size(); i++) - test.add(examples.get(perm[i])); - printToFile(outPrefix + ".train.examples", train); - printToFile(outPrefix + ".test.examples", test); - printToFile(outPrefix + ".examples", examples); - } - - private void printToFile(String fileName, List examples) throws IOException { - PrintWriter exampleWriter = IOUtils.getPrintWriter(fileName); - for (String example : examples) { - exampleWriter.println(example); - } - exampleWriter.close(); - } - - private Formula processFree917LogicalForm(String free917LogicalForm) { - - LispTree tree = LispTree.proto.parseFromString(free917LogicalForm); - if (tree.child(0).value.equals("lambda")) { - // error check - if (!tree.child(1).value.equals("$0") || tree.children.size() != 4) - throw new RuntimeException("Illegal lambda expression: " + free917LogicalForm); - return handleLambda(tree); - } else if (tree.child(0).value.equals("count")) { - if (!tree.child(1).value.equals("$0") || tree.children.size() != 3) - throw new RuntimeException("Illegal lambda expression: " + free917LogicalForm); - return handleCount(tree); - } else if (tree.child(0).value.startsWith("/")) { - return handleAsk(tree); - } else - throw new RuntimeException("Unknown free917 logical form: " + free917LogicalForm); - } - - private Formula handleLambda(LispTree tree) { - return handleBody(tree.child(3)); - } - - private Formula handleCount(LispTree tree) { - Formula formula = handleBody(tree.child(2)); - if (formula == null) - return null; - return new AggregateFormula(AggregateFormula.Mode.count, formula); - } - - private Formula handleBody(LispTree tree) { - Map argToPredMap = new HashMap(); - handleBodyRecurse(tree, argToPredMap); - return generateFormula(argToPredMap); - } - - private Formula generateFormula(Map argToPredMap) { - - if (argToPredMap.size() == 1) { - String arg = argToPredMap.keySet().iterator().next(); - String pred = argToPredMap.get(arg); - - BinaryFormulaInfo info = formulaInfo.getBinaryInfo(Formulas.fromLispTree(LispTree.proto.parseFromString(pred))); - if (info != null) { - String type = formulaInfo.getBinaryInfo(Formulas.fromLispTree(LispTree.proto.parseFromString(pred))).expectedType1; - if (cvts.contains(type)) { - return fixCvtFormulas(pred, arg); - } - } - - return new JoinFormula(pred, getArgFormula(arg)); - } else { - if (argToPredMap.get("target") == null) - throw new RuntimeException("target is null: " + argToPredMap); - - Formula argsFormula = conjunctArgs(argToPredMap); - Formula targetFormula = Formulas.reverseFormula( - new ValueFormula(new NameValue(argToPredMap.get("target")))); - Formula res = new JoinFormula(targetFormula, argsFormula); - return res; - } - } - - private Formula fixCvtFormulas(String pred, String arg) { - - Formula join = new JoinFormula(pred, getArgFormula(arg)); - if (pred.equals("!fb:automotive.trim_level.msrp") || pred.equals("!fb:event.disaster.damage") || - pred.equals("!fb:comic_books.comic_book_issue.cover_price")) { - return new JoinFormula("!fb:measurement_unit.money_value.amount", join); - } else if (pred.equals("!fb:celebrities.celebrity.net_worth") || pred.equals("!fb:projects.project.actual_cost") - || pred.equals("!fb:digicams.digital_camera.street_price") || pred.equals("!fb:amusement_parks.ride.cost")) { - return new JoinFormula("!fb:measurement_unit.dated_money_value.amount", join); - } else if (pred.equals("!fb:computer.software.compatible_oses")) { - return new JoinFormula("!fb:computer.software_compatibility.operating_system", join); - } else if (pred.equals("!fb:finance.stock_exchange.companies_traded")) { - return new JoinFormula("!fb:business.stock_ticker_symbol.ticker_symbol", join); - } else if (pred.equals("!fb:medicine.hospital.beds") || pred.equals("!fb:metropolitan_transit.transit_system.daily_riders") - || pred.equals("!fb:library.public_library_system.collection_size") || pred.equals("!fb:library.public_library_system.annual_visits") - || pred.equals("!fb:protected_sites.protected_site.annual_visitors") || pred.equals("!fb:amusement_parks.park.annual_visits") - || pred.equals("!fb:education.educational_institution.total_enrollment") - || pred.equals("!fb:religion.religion.number_of_adherents")) { - return new JoinFormula("!fb:measurement_unit.dated_integer.number", join); - } else if (pred.equals("!fb:business.employer.employees")) { - return new JoinFormula("!fb:business.employment_tenure.person", join); - } else if (pred.equals("!fb:tv.tv_series_episode.producers")) { - return new JoinFormula("!fb:tv.tv_producer_episode_credit.producer", join); - } else if (pred.equals("!fb:book.periodical.frequency_or_issues_per_year")) { - return new JoinFormula("!fb:book.periodical_frequency.issues_per_year", join); - } else if (pred.equals("!fb:book.periodical.first_issue_date")) { - return new JoinFormula("!fb:book.periodical_publication_date.date", join); - } else if (pred.equals("!fb:games.game.number_of_players") || pred.equals("!fb:aviation.aircraft_model.passengers")) { - return new JoinFormula("!fb:measurement_unit.integer_range.high_value", join); - } else if (pred.equals("!fb:military.armed_force.personnel")) { - return new JoinFormula("!fb:military.military_service.military_person", join); - } else if (pred.equals("!fb:business.consumer_company.products")) { - return new JoinFormula("!fb:business.company_product_relationship.consumer_product", join); - } else if (pred.equals("!fb:location.location.geolocation")) { - return new JoinFormula("!fb:location.geocode.longitude", join); - } - return new JoinFormula(pred, getArgFormula(arg)); - - } - - private Formula conjunctArgs(Map argToPredMap) { - - List pivots = new ArrayList(); - for (String arg : argToPredMap.keySet()) { - if (!arg.equals("target")) - pivots.add(constructJoin(arg, argToPredMap.get(arg))); - } - Formula res = pivots.get(0); - if (pivots.size() == 1) return res; - for (int i = 1; i < pivots.size(); ++i) - res = new MergeFormula(Mode.and, res, pivots.get(i)); - return res; - } - - private Formula getArgFormula(String arg) { - if (arg.startsWith("fb:")) - return new ValueFormula(new NameValue(arg)); - if (arg.startsWith("DATE::")) { - String[] tokens = arg.split("::"); - return new ValueFormula(DateValue.parseDateValue(tokens[1])); - } - // TODO make sure ints and booleans work - if (arg.startsWith("INT::")) { - String[] tokens = arg.split("::"); - return new ValueFormula(new NumberValue(Double.parseDouble(tokens[1]), NumberValue.unitless)); - } - if (arg.startsWith("BOOL::")) { - String[] tokens = arg.split("::"); - return new ValueFormula(new NameValue(tokens[1])); - } - if (arg.startsWith("TEXT::")) { - String[] tokens = arg.split("::"); - return new ValueFormula(new StringValue(tokens[1])); - } - throw new RuntimeException("Unknown arg: " + arg); - } - - private JoinFormula constructJoin(String arg, String pred) { - VariableFormula var = new VariableFormula("x"); - JoinFormula join = new JoinFormula(pred, var); - LambdaFormula lambda = new LambdaFormula("x", join); - return new JoinFormula(lambda, getArgFormula(arg)); - } - - private void handleBodyRecurse(LispTree tree, Map argToPredMap) { - if (tree.child(0).value.equals("exists")) { - if (tree.children.size() != 3) - throw new RuntimeException("bad exists clause: " + tree); - handleBodyRecurse(tree.child(2), argToPredMap); - } else { - if (!tree.child(0).value.startsWith("/")) - throw new RuntimeException("bad exists clause: " + tree); - - // parse the relation - - String predicate = tree.child(0).value; - String[] predTokens = predicate.substring(0, predicate.lastIndexOf(':')).split("@"); - if (predTokens.length <= 1) - throw new RuntimeException("Bad body: " + tree); - - String fbType = constructFbType(predTokens[0]); - List fbRelations = constructFbRelations(predTokens, fbType); - - // parse the arguments - if (predTokens.length == 2) { - if (tree.child(1).value.equals("$0")) - argToPredMap.put(parseEntity(tree.child(2).value), fbRelations.get(0)); - else if (tree.child(2).value.equals("$0")) - argToPredMap.put(parseEntity(tree.child(1).value), "!" + fbRelations.get(0)); - else throw new RuntimeException("bad non-cvt tree: " + tree); - } else { - List fbArguments = constructFbArguments(tree); - for (int i = 0; i < fbArguments.size(); ++i) { - String fbRelation = fbRelations.get(i); - if (fbArguments.get(i).equals("$0")) - argToPredMap.put("target", fbRelation); - else if (!fbArguments.get(i).startsWith("$")) { - argToPredMap.put(fbArguments.get(i), fbRelation); - } - } - } - argnumCounter.incrementCount(argToPredMap.size()); - } - } - - private List constructFbArguments(LispTree tree) { - boolean lastName = false; - List res = new ArrayList(); - for (int j = 1; j < tree.children.size(); ++j) { - - if (tree.child(j).value.equals("$0")) { - res.add("$0"); - lastName = false; - } else if (tree.child(j).value.startsWith("/")) { - String entity = parseEntity(tree.child(j).value); - res.add(entity); - lastName = true; - } else { - if (!lastName) - res.add(tree.child(j).value); - lastName = false; - } - } - return res; - } - - private List constructFbRelations(String[] predTokens, String fbType) { - - List res = new ArrayList(); - if (predTokens[0].contains("&")) { - String relation = "!" + FormatConverter.fromSlashToDot(predTokens[0].substring(0, predTokens[0].indexOf('&')), true); - res.add(relation); - } - for (int j = 1; j < predTokens.length; ++j) - res.add(fbType + "." + predTokens[j]); - return res; - } - - private String constructFbType(String predHead) { - if (predHead.contains("&")) - return FormatConverter.fromSlashToDot(predHead.substring(predHead.indexOf('&') + 1), true); - return FormatConverter.fromSlashToDot(predHead, true); - - } - - private String parseEntity(String value) { - - if (value.startsWith("/type/datetime/")) { - String[] tokens = value.split(":"); - String date = tokens[0].substring(tokens[0].lastIndexOf('/') + 1); - return "DATE::" + date; - } else if (value.startsWith("/type/boolean/")) { - String[] tokens = value.split(":"); - String b = tokens[0].substring(tokens[0].lastIndexOf('/') + 1); - return "BOOL::" + b; - } else if (value.startsWith("/type/int/")) { - String[] tokens = value.split(":"); - String i = tokens[0].substring(tokens[0].lastIndexOf('/') + 1); - return "INT::" + i; - } else if (value.startsWith("/type/text/")) { - String[] tokens = value.split(":"); - String i = tokens[0].substring(tokens[0].lastIndexOf('/') + 1); - return "TEXT::" + i; - } - return FormatConverter.fromSlashToDot(value.substring(0, value.indexOf(':')), true); - } - - /** There is one example and this method is tailored for that */ - private Formula handleAsk(LispTree tree) { - String pred = tree.child(0).value; - ValueFormula arg1 = Formulas.newNameFormula(parseEntity(tree.child(1).value)); - ValueFormula arg2 = Formulas.newNameFormula(parseEntity(tree.child(2).value)); - pred = pred.replace('@', '/'); - pred = pred.substring(0, pred.lastIndexOf(':')); - pred = FormatConverter.fromSlashToDot(pred, true); - ValueFormula predFormula = Formulas.newNameFormula(pred); - JoinFormula pf = new JoinFormula(predFormula, arg2); - MergeFormula mf = new MergeFormula(Mode.and, arg1, pf); - return mf; - } - - public void genreateEntityInfoFile(String free917EntityFile, String entityInfoFile, String outFile, String missingEntitiesFile) throws IOException { - - Map midToIdMap = FileUtils.loadStringToStringMap(midToIdFile); - - Map> idToNameMap = new HashMap>(); - for (String line : IOUtils.readLines(free917EntityFile)) { - - String[] tokens = line.split(":-"); - String name = tokens[0].trim().replace('-', ' '); - name = name.replace("#", "# "); - name = name.replace("!", " !"); - name = name.replace("'", " '"); - name = name.replace(",", " ,"); - name = name.replace(":", " :"); - String entry = tokens[1].trim(); - String[] entryTokens = entry.split(":"); - String id = entryTokens[1].trim(); - id = FormatConverter.fromSlashToDot(id, true); - - if (midToIdMap.containsKey(id)) - id = midToIdMap.get(id); - - MapUtils.add(idToNameMap, id, name); - } - LogInfo.log("Number of entries: " + idToNameMap.size()); - - PrintWriter writer = IOUtils.getPrintWriter(outFile); - int i = 0; - for (String line : IOUtils.readLines(entityInfoFile)) { - String[] tokens = line.split("\t"); - String id = tokens[1]; - - if (idToNameMap.containsKey(id)) { - - if (idToNameMap.get(id).size() > 1) - System.out.println("Multiple names: " + idToNameMap.get(id)); - for (String name : idToNameMap.get(id)) { - if (name.equals("beer")) { - tokens[3] = "beer"; - writer.println(StringUtils.join(tokens, "\t")); - tokens[3] = "beers"; - writer.println(StringUtils.join(tokens, "\t")); - } else if (name.equals("film actor")) { - tokens[3] = "film actor"; - writer.println(StringUtils.join(tokens, "\t")); - tokens[3] = "film actors"; - writer.println(StringUtils.join(tokens, "\t")); - } else { - tokens[3] = name; - writer.println(StringUtils.join(tokens, "\t")); - } - } - idToNameMap.remove(id); - } - if (i % 1000000 == 0) - System.out.println("Lines: " + i++); - i++; - } - writer.close(); - PrintWriter missingWriter = IOUtils.getPrintWriter(missingEntitiesFile); - for (String id : idToNameMap.keySet()) { - if (id.startsWith("fb:type") || id.startsWith("fb:un.")) - continue; - missingWriter.println(id + "\t" + idToNameMap.get(id)); - } - missingWriter.close(); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/FreebaseInfo.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/FreebaseInfo.java deleted file mode 100644 index d5ba121cb4..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/FreebaseInfo.java +++ /dev/null @@ -1,293 +0,0 @@ -package edu.stanford.nlp.sempre.freebase; - -import com.google.common.collect.BiMap; -import com.google.common.collect.HashBiMap; - -import edu.stanford.nlp.sempre.freebase.FbFormulasInfo.BinaryFormulaInfo; -import edu.stanford.nlp.sempre.freebase.FbFormulasInfo.UnaryFormulaInfo; -import edu.stanford.nlp.sempre.*; -import fig.basic.*; - -import java.io.BufferedReader; -import java.io.IOException; -import java.util.*; - -/** - * Class for keeping info from Freebase schema - * @author jonathanberant - */ -public final class FreebaseInfo { - private static FreebaseInfo singleton; - public static FreebaseInfo getSingleton() { - if (singleton == null) singleton = new FreebaseInfo(); - return singleton; - } - - public static class Options { - @Option(gloss = "ttl file with schema information") - public String schemaPath = "lib/fb_data/93.exec/schema2.ttl"; - } - public static Options opts = new Options(); - - // any - // - number (boolean, int, float, date) - // - text - // - entity (people, loc, org, ...) - // - cvt - - // Concrete primitive types - public static final String BOOLEAN = CanonicalNames.BOOLEAN; - public static final String INT = CanonicalNames.INT; - public static final String FLOAT = CanonicalNames.FLOAT; - public static final String DATE = CanonicalNames.DATE; - public static final String TEXT = CanonicalNames.TEXT; - public static final String NUMBER = CanonicalNames.NUMBER; - public static final String ENTITY = CanonicalNames.ENTITY; - public static final String ANY = CanonicalNames.ANY; - - // Common entity types - public static final String PERSON = "fb:people.person"; - - // Non-standard abstract types - public static final String CVT = "fb:type.cvt"; - - // Common relations - public static final String TYPE = CanonicalNames.TYPE; - public static final String NAME = CanonicalNames.NAME; - public static final String PROF = "fb:people.person.profession"; - public static final String ALIAS = "fb:common.topic.alias"; - - // mapping from master property to its opposite (e.g., fb:people.person.place_of_birth => fb:location.location.people_born_here) - private BiMap masterToOppositeMap = HashBiMap.create(); - - private Set cvts = new HashSet<>(); - private Map type1Map = new HashMap<>(); // property => type of arg1 - private Map type2Map = new HashMap<>(); // property => type of arg2 - private Map unit2Map = new HashMap<>(); // property => unit of arg2 (if exists) - private Map> bDescriptionsMap = new HashMap<>(); // property => descriptions - private Map bPopularityMap = new HashMap<>(); // property => popularity - // unary maps - private Map professionPopularityMap = new HashMap<>(); // property => popularity - private Map typePopularityMap = new HashMap<>(); // property => popularity - private Map> professionDescriptionsMap = new HashMap<>(); // property => descriptions - private Map> typeDescriptionsMap = new HashMap<>(); // property => descriptions - - private Map nameMap = new HashMap(); // id => name of id - - public String getArg1Type(String property) { return type1Map.get(property); } - public String getArg2Type(String property) { return type2Map.get(property); } - - private FreebaseInfo() { - try { - readSchema(); - } catch (NumberFormatException e) { - throw new RuntimeException(e); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - /** - * Go over schema twice - once to populate all fields except descriptions, the second time we populate descriptions after we now what - * are the properties we are interested in - * @throws NumberFormatException - * @throws IOException - */ - public void readSchema() throws IOException { - LogInfo.begin_track("Loading Freebase schema: %s", opts.schemaPath); - BufferedReader in = IOUtils.openInHard(opts.schemaPath); - - // Include mediator types - SemTypeHierarchy.singleton.addSupertype(CVT, CVT); - SemTypeHierarchy.singleton.addSupertype(CVT, ANY); - - String line; - while ((line = in.readLine()) != null) { - String[] tokens = edu.stanford.nlp.sempre.freebase.Utils.parseTriple(line); - if (tokens == null) continue; - String arg1 = tokens[0]; - String property = tokens[1]; - String arg2 = tokens[2]; - - if (property.equals("fb:type.property.reverse_property")) { // reverse_property => opposite_property - // Duplicates logically really shouldn't happen but the Freebase RDF - // reverse properties are not 1:1. We should monitor this and make - // sure we don't lose any alignments. - if (masterToOppositeMap.containsKey(arg1)) { - // LogInfo.errors("arg1 exists multiple times: %s", line); - continue; - } - if (masterToOppositeMap.inverse().containsKey(arg2)) { - // LogInfo.errors("arg2 exists multiple times: %s", line); - continue; - } - masterToOppositeMap.put(arg1, arg2); - } else if (property.equals("fb:freebase.type_hints.included_types")) { // included_types => supertypes - SemTypeHierarchy.singleton.addSupertype(arg1, arg2); - SemTypeHierarchy.singleton.addEntitySupertypes(arg1); - SemTypeHierarchy.singleton.addEntitySupertypes(arg2); - } else if (property.equals("fb:freebase.type_hints.mediator")) { // mediator => cvt - if (arg2.equals("\"true\"^^xsd:boolean")) cvts.add(arg1); - else if (arg2.equals("\"false\"^^xsd:boolean")) cvts.remove(arg1); - else throw new RuntimeException("Invalid xsd:boolean: " + arg2); - } else if (property.equals("fb:type.property.schema")) { // schema => type1 - if (type1Map.containsKey(arg1)) - LogInfo.errors("%s already has type1 %s, assigning %s", arg1, type1Map.get(arg1), arg2); - type1Map.put(arg1, arg2); - } else if (property.equals("fb:type.property.expected_type")) { // expected_type => type2 - if (type2Map.containsKey(arg1)) - LogInfo.errors("%s already has type2 %s, assigning %s", arg1, type2Map.get(arg1), arg2); - type2Map.put(arg1, arg2); - } else if (property.equals("fb:type.property.unit")) { - unit2Map.put(arg1, arg2); - } else if (property.equals("fb:user.custom.type.property.num_instances")) { - bPopularityMap.put(arg1, edu.stanford.nlp.sempre.freebase.Utils.parseInt(arg2)); - } else if (property.equals("fb:user.custom.people.person.profession.num_instances")) { - professionPopularityMap.put(arg1, edu.stanford.nlp.sempre.freebase.Utils.parseInt(arg2)); - } else if (property.equals("fb:user.custom.type.object.type.num_instances")) { - typePopularityMap.put(arg1, edu.stanford.nlp.sempre.freebase.Utils.parseInt(arg2)); - } - } - in.close(); - - // Second iteration - populate descriptions assumes all properties have the fb:type.property.num_instances field - in = IOUtils.openInHard(opts.schemaPath); - while ((line = in.readLine()) != null) { - String[] tokens = edu.stanford.nlp.sempre.freebase.Utils.parseTriple(line); - if (tokens == null) continue; - String arg1 = tokens[0]; - String property = tokens[1]; - String arg2 = tokens[2]; - - if (property.equals(NAME) || property.equals(ALIAS)) { - if (bPopularityMap.containsKey(arg1)) { - MapUtils.addToList(bDescriptionsMap, arg1, edu.stanford.nlp.sempre.freebase.Utils.parseStr(arg2).toLowerCase()); - } else if (professionPopularityMap.containsKey(arg1)) { - MapUtils.addToList(professionDescriptionsMap, arg1, edu.stanford.nlp.sempre.freebase.Utils.parseStr(arg2).toLowerCase()); - } else if (typePopularityMap.containsKey(arg1)) { - MapUtils.addToList(typeDescriptionsMap, arg1, edu.stanford.nlp.sempre.freebase.Utils.parseStr(arg2).toLowerCase()); - } - } - - if (property.equals(NAME)) - nameMap.put(arg1, edu.stanford.nlp.sempre.freebase.Utils.parseStr(arg2)); - } - LogInfo.logs("%d CVTs, (%d,%d) property types, %d property units", cvts.size(), type1Map.size(), type2Map.size(), unit2Map.size()); - LogInfo.end_track(); - } - - public Map createBinaryFormulaInfoMap() { - - Map res = new HashMap<>(); - for (String property : bPopularityMap.keySet()) { - Formula f = Formulas.fromLispTree(LispTree.proto.parseFromString(property)); - BinaryFormulaInfo info = new BinaryFormulaInfo(f, type1Map.get(property), type2Map.get(property), unit2Map.get(property), "", bDescriptionsMap.get(property), bPopularityMap.get(property)); - if (!info.isComplete()) { - continue; - } - res.put(f, info); - } - return res; - } - - public Map createUnaryFormulaInfoMap() { - - Map res = new HashMap(); - // professions - for (String profession : professionPopularityMap.keySet()) { - Formula f = new JoinFormula(PROF, new ValueFormula(new NameValue(profession))); - UnaryFormulaInfo info = new UnaryFormulaInfo(f, professionPopularityMap.get(profession), - MapUtils.get(professionDescriptionsMap, profession, new LinkedList()), - Collections.singleton(PERSON)); - if (!info.isComplete()) { - continue; - } - res.put(f, info); - } - // types - for (String type : typePopularityMap.keySet()) { - Formula f = new JoinFormula(TYPE, new ValueFormula(new NameValue(type))); - UnaryFormulaInfo info = new UnaryFormulaInfo(f, typePopularityMap.get(type), - MapUtils.get(typeDescriptionsMap, type, new LinkedList()), - Collections.singleton(type)); - if (!info.isComplete()) { - continue; - } - res.put(f, info); - } - return res; - } - - // fb:people.person.place_of_birth => true - public boolean propertyHasOpposite(String property) { - return masterToOppositeMap.containsKey(property) || masterToOppositeMap.inverse().containsKey(property); - } - // fb:people.person.place_of_birth => fb:location.location.people_born_here - public String getOppositeFbProperty(String property) { - if (masterToOppositeMap.containsKey(property)) - return masterToOppositeMap.get(property); - if (masterToOppositeMap.inverse().containsKey(property)) - return masterToOppositeMap.inverse().get(property); - throw new RuntimeException("Property does not have an opposite: " + property); - } - - public String getUnit1(String property) { return typeToUnit(type1Map.get(property), property); } - public String getUnit2(String property) { return typeToUnit(type2Map.get(property), property); } - - // Get the measurement unit associated with this type. - // If something is not a number, then return something crude (e.g. fb:type.cvt). - // Return null if we don't know anything. - public String typeToUnit(String type, String property) { - if (type == null) { - // LogInfo.errors("No type information for property: %s", property); - return null; - } - if (type.equals(INT) || type.equals(FLOAT)) { - String unit = unit2Map.get(property); - if (unit == null) { - // LogInfo.errors("No unit information for property: %s", property); - return NumberValue.unitless; - } - return unit; - } - if (type.equals(BOOLEAN) || type.equals(TEXT) || type.equals(DATE)) // Use the type as the unit - return type; - if (isCvt(type)) return CVT; // CVT - return ENTITY; // Entity - } - - public boolean isCvt(String type) { - return cvts.contains(type); - } - - public String getPropertyName(String property) { - List names = bDescriptionsMap.get(property); - if (names == null) return null; - return names.get(0); - } - - public String getName(String id) { return nameMap.get(id); } - - public static boolean isReverseProperty(String property) { - return CanonicalNames.isReverseProperty(property); - } - public static String reverseProperty(String property) { - return CanonicalNames.reverseProperty(property); - } - - // fb:en.barack_obama => http://rdf.freebase.com/ns/en/barack_obama - public static final String freebaseNamespace = "http://rdf.freebase.com/ns/"; - - public static String id2uri(String id) { - assert id.startsWith("fb:") : id; - return freebaseNamespace + id.substring(3).replaceAll("\\.", "/"); - } - public static String uri2id(String uri) { - if (!uri.startsWith(freebaseNamespace)) { - LogInfo.logs("Warning: invalid Freebase uri: %s", uri); - // Don't do any conversion; this is not necessarily the best thing to do. - return uri; - } - return "fb:" + uri.substring(freebaseNamespace.length()).replaceAll("/", "."); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/FreebaseSearch.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/FreebaseSearch.java deleted file mode 100644 index 869d055235..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/FreebaseSearch.java +++ /dev/null @@ -1,161 +0,0 @@ -package edu.stanford.nlp.sempre.freebase; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.cache.StringCache; -import edu.stanford.nlp.sempre.cache.StringCacheUtils; -import fig.basic.*; -import java.io.*; -import java.net.SocketTimeoutException; -import java.net.URL; -import java.net.URLConnection; -import java.net.URLEncoder; -import java.util.*; - -/** - * Takes a string (e.g., "obama") and asks the Freebase Search API. - * Caches if necessary. - * Outputs a set of entities. - * - * @author Percy Liang - */ -public class FreebaseSearch { - public static class Entry { - public Entry(String mid, String id, String name, double score) { - this.mid = mid; - this.id = id; - this.name = name; - this.score = score; - } - - public final String mid; - public final String id; - public final String name; - public final double score; - - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild(mid); - tree.addChild(id == null ? "" : id); - tree.addChild(name); - tree.addChild(score + ""); - return tree; - } - - public String toString() { return toLispTree().toString(); } - } - - public static class Options { - @Option(gloss = "Milliseconds to wait until opening connection times out") - public int connectTimeoutMs = 1 * 60 * 1000; - - @Option(gloss = "Milliseconds to wait until reading connection times out") - public int readTimeoutMs = 1 * 60 * 1000; - - @Option(gloss = "API key (needed to get more access)") - public String apiKey; - - @Option(gloss = "Save results of Freebase API search") - public String cachePath; - } - public static Options opts = new Options(); - - private final StringCache cache; - - public class ServerResponse { - public ServerResponse() { this.entries = new ArrayList<>(); this.error = null; } - public ServerResponse(ErrorValue error) { this.entries = null; this.error = error; } - public final List entries; - public final ErrorValue error; - boolean cached; - long timeMs; - } - - public FreebaseSearch() { - if (opts.cachePath != null) - this.cache = StringCacheUtils.create(opts.cachePath); - else - this.cache = null; - } - - @SuppressWarnings("unchecked") - public ServerResponse lookup(String query) { - StopWatch watch = new StopWatch(); - watch.start(); - String output = null; - ServerResponse response = new ServerResponse(); - - // First, try the cache. - if (cache != null) { - output = cache.get(query); - if (output != null) response.cached = true; - } - - // If got nothing, then need to hit the server. - if (output == null) { - try { - // Setup the connection - String url = String.format("https://www.googleapis.com/freebase/v1/search?query=%s", URLEncoder.encode(query, "UTF-8")); - if (opts.apiKey != null) url += "&key=" + opts.apiKey; - URLConnection conn = new URL(url).openConnection(); - conn.setConnectTimeout(opts.connectTimeoutMs); - conn.setReadTimeout(opts.readTimeoutMs); - InputStream in = conn.getInputStream(); - - // Read the response - StringBuilder buf = new StringBuilder(); - BufferedReader reader = new BufferedReader(new InputStreamReader(in)); - String line; - while ((line = reader.readLine()) != null) - buf.append(line); - reader.close(); - - // Put the result in the cache - output = buf.toString(); - if (cache != null) - cache.put(query, output); - - } catch (SocketTimeoutException e) { - return new ServerResponse(ErrorValue.timeout); - } catch (IOException e) { - LogInfo.errors("Server exception: %s", e); - if (e.toString().contains("HTTP response code: 408")) - return new ServerResponse(ErrorValue.server408); - if (e.toString().contains("HTTP response code: 500")) - return new ServerResponse(ErrorValue.server500); - throw new RuntimeException(e); // Haven't seen this happen yet... - } - } - - // Parse the result - Map results = Json.readMapHard(output); - for (Object resultObj : (List) results.get("result")) { - Map result = (Map) resultObj; - String mid = (String) result.get("mid"); - String id = (String) result.get("id"); - mid = toRDF(mid); - id = toRDF(id); - String name = (String) result.get("name"); - double score = (double) result.get("score"); - response.entries.add(new Entry(mid, id, name, score)); - } - - watch.stop(); - response.timeMs = watch.getCurrTimeLong(); - LogInfo.logs("FreebaseSearch %s => %s results (cached=%s)", query, response.entries.size(), response.cached); - return response; - } - - // /en/barack_obama => fb:en.barack_obama - private String toRDF(String s) { - if (s == null) return s; - return "fb:" + s.substring(1).replaceAll("/", "."); - } - - public static void main(String[] args) { - opts.cachePath = "FreebaseSearch.cache"; - String query = StrUtils.join(args, " "); - query = "obama"; - FreebaseSearch search = new FreebaseSearch(); - LogInfo.logs("%s", search.lookup(query).entries); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/FreebaseTypeLookup.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/FreebaseTypeLookup.java deleted file mode 100644 index 79e60330a9..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/FreebaseTypeLookup.java +++ /dev/null @@ -1,64 +0,0 @@ -package edu.stanford.nlp.sempre.freebase; - -import java.util.*; -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.cache.*; -import fig.basic.*; - -/** - * Provides types of Freebase entities and properties. - * For entities, look them up (requires access to the cache file). - * For properties, just look them up in the FreebaseInfo schema. - */ -public class FreebaseTypeLookup implements TypeLookup { - public static class Options { - @Option(gloss = "Cache path to the types path") - public String entityTypesPath; - } - public static Options opts = new Options(); - - // Given those ids, we retrieve the set of types - private static StringCache entityTypesCache; - - public Set getEntityTypes(String entity) { - if (opts.entityTypesPath == null) - return Collections.singleton(FreebaseInfo.ENTITY); - - // Read types from cache - if (entityTypesCache == null) entityTypesCache = StringCacheUtils.create(opts.entityTypesPath); - Set types = new HashSet<>(); - String typesStr = entityTypesCache.get(entity); - if (typesStr != null) { - Collections.addAll(types, typesStr.split(",")); - } else { - types.add(FreebaseInfo.ENTITY); - } - return types; - } - - @Override - public SemType getEntityType(String entity) { - Set types = getEntityTypes(entity); - // Remove supertypes - // TODO(pliang): this is inefficient! - Set resultTypes = new HashSet<>(types); - for (String entityType : types) { - for (String supertype : SemTypeHierarchy.singleton.getSupertypes(entityType)) { - if (!supertype.equals(entityType)) - resultTypes.remove(supertype); - } - } - return SemType.newUnionSemType(resultTypes); - } - - @Override - public SemType getPropertyType(String property) { - // property = fb:location.location.area - // arg1Type = fb:location.location --> becomes retType (head of formula) - // arg2Type = fb:type.float --> becomes argType - FreebaseInfo info = FreebaseInfo.getSingleton(); - String arg1Type = info.getArg1Type(property), arg2Type = info.getArg2Type(property); - if (arg1Type == null || arg2Type == null) return null; - return SemType.newFuncSemType(arg2Type, arg1Type); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/FreebaseValueEvaluator.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/FreebaseValueEvaluator.java deleted file mode 100644 index 6d04b49415..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/FreebaseValueEvaluator.java +++ /dev/null @@ -1,92 +0,0 @@ -package edu.stanford.nlp.sempre.freebase; - -import fig.basic.*; -import java.util.List; -import edu.stanford.nlp.sempre.*; - -/** - * Used to evaluate Freebase question answering. - * Denotation is a list of entities. - * Nothing in here is specific to Freebase, but this is not really meant to be - * a general-purpose class. - * - * @author Percy Liang - */ -public class FreebaseValueEvaluator implements ValueEvaluator { - public static class Options { - @Option(gloss = "When evaluating lists, compute F1 rather than exact match") public boolean useF1 = true; - } - public static final Options opts = new Options(); - - public double getCompatibility(Value target, Value pred) { - double f1 = getF1(target, pred); - return opts.useF1 ? f1 : (f1 == 1 ? 1 : 0); - } - - // Compute F1 score between two lists (partial match). - // this is target, that is predicted. - private double getF1(Value target, Value pred) { - List targetList = ((ListValue) target).values; - if (!(pred instanceof ListValue)) return 0; - List predList = ((ListValue) pred).values; - - if (targetList.size() == 0 && predList.size() == 0) - return 1; - if (targetList.size() == 0 || predList.size() == 0) - return 0; - - double precision = 0; - for (Value v2 : predList) { // For every predicted value... - double score = 0; - for (Value v1 : targetList) - score = Math.max(score, getItemCompatibility(v1, v2)); - precision += score; - } - precision /= predList.size(); - assert precision >= 0 && precision <= 1 : precision; - - double recall = 0; - for (Value v1 : targetList) { // For every true value... - double score = 0; - for (Value v2 : predList) - score = Math.max(score, getItemCompatibility(v1, v2)); - recall += score; - } - recall /= targetList.size(); - assert recall >= 0 && recall <= 1 : recall; - - if (precision + recall == 0) return 0; - - double f1 = 2 * precision * recall / (precision + recall); - assert f1 >= 0 && f1 <= 1 : f1; - - return f1; - } - - // Compare one element of the list. - public double getItemCompatibility(Value target, Value pred) { - if (target instanceof DescriptionValue) { - // Just has to match the description - if (pred instanceof NameValue) - return ((DescriptionValue) target).value.equals(((NameValue) pred).description) ? 1 : 0; - return 0; - } - - if (pred instanceof ErrorValue) return 0; // Never award points for error - if (pred == null) { - LogInfo.warning("Predicted value is null!"); - return 0; - } - if (target.getClass() != pred.getClass()) return 0; - - if (target instanceof DateValue) { - DateValue targetDate = (DateValue) target; - DateValue predDate = (DateValue) pred; - // Only comparing the year right now! This is crude. - boolean match = (targetDate.year == predDate.year); - return match ? 1 : 0; - } - - return target.equals(pred) ? 1 : 0; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/LambdaCalculusConverter.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/LambdaCalculusConverter.java deleted file mode 100644 index a891a29cc9..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/LambdaCalculusConverter.java +++ /dev/null @@ -1,552 +0,0 @@ -package edu.stanford.nlp.sempre.freebase; - -import edu.stanford.nlp.sempre.*; -import fig.basic.*; -import fig.exec.Execution; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.PrintWriter; -import java.util.*; - -/** - * Converts Luke Zettlemoyer's lambda calculus data format into our example files. - * @author Percy Liang - * @author Ziang Xie - */ - -public class LambdaCalculusConverter implements Runnable { - public static class Options { - @Option(gloss = "Input path (lambda calculus)") - public String inPath = "overnight/geo/geosents280-typed.ccg.test.new"; - @Option(gloss = "Specification of translations") - public String specPath = "overnight/geo/geo.spec"; - @Option(gloss = "Specification of variable names") - public String varPath = "overnight/geo/geo.vars"; - @Option(gloss = "Specification of primitive types") - public String primPath = "overnight/geo/geo.primitives"; - @Option(gloss = "Specification of formula replacements") - public String replacePath = "overnight/geo/geo.replace"; - @Option(gloss = "Specification of manual conversions") - public String manualConversionsPath = "overnight/geo/geo.manual_conversions"; - @Option(gloss = "Output path (examples)") - public String outPath = "overnight/geo/geo.out.json"; - @Option(gloss = "Specific example to parse and run") - public int runInd = -1; - @Option(gloss = "Output path for lexicon grammar") - public String lexiconPath = "overnight/geo/geo.out.grammar"; - @Option(gloss = "Verbose output (for debugging)") - public boolean verbose = false; // TODO Currently unused - } - public static Options opts = new Options(); - - // Mapping between predicates - Map predicatesMap = new HashMap(); - // Mapping from variables in the input (e.g. "$1") to our variables (e.g. "x") - Map varMap = new HashMap(); - // Mapping from types (e.g. "i" in "population:i") to semparse primitive types - Map primitiveMap = new HashMap(); - // Hardcoded replacements for mis-specified formulas in the input - Map replaceMap = new HashMap(); - // Hardcoded conversions for where the converter fails - Map manualConversionsMap = new HashMap(); - - // Colorize output - Colorizer color = new Colorizer(); - - // Examples to be converted - List examples = new ArrayList(); - // Indices of examples that were executed without error - List validExampleIds = new ArrayList(); - List failedExampleIds = new ArrayList(); - // Use this to later sort the valid examples - List validExampleLengths = new ArrayList(); - - public void run() { - readPrereqs(); - - convertExamples(); - executeExamples(); - - if (opts.runInd < 0) { - writeExamples(); - printSummary(); - } - } - - public void readPrereqs() { - LogInfo.begin_track("Reading prereq"); - readSpec(); - readPrimitives(); - readVars(); - readStringMap(opts.replacePath, replaceMap); - readStringMap(opts.manualConversionsPath, manualConversionsMap); - LogInfo.end_track(); - } - - void readSpec() { - for (String line : IOUtils.readLinesHard(opts.specPath)) { - if (line.startsWith("#")) continue; - if (line.equals("")) continue; - String[] tokens = line.split(" ", 2); - predicatesMap.put(tokens[0], Formula.fromString(tokens[1])); - } - } - - void readPrimitives() { - for (String line : IOUtils.readLinesHard(opts.primPath)) { - if (line.startsWith("#")) continue; - if (line.equals("")) continue; - String[] tokens = line.split(" ", 2); - primitiveMap.put(tokens[0], tokens[1]); - } - } - - void readVars() { - for (String line : IOUtils.readLinesHard(opts.varPath)) { - if (line.startsWith("#")) continue; - if (line.equals("")) continue; - String[] tokens = line.split(" ", 2); - varMap.put(tokens[0], Formula.fromString(tokens[1])); - } - } - - void readStringMap(String path, Map map) { - boolean readOriginal = false; - String original = ""; - for (String line : IOUtils.readLinesHard(path)) { - if (line.startsWith("#")) continue; - if (line.equals("")) continue; - if (!readOriginal) { - readOriginal = true; - original = line; - } else { - readOriginal = false; - map.put(original, line); - } - } - } - - Formula toPredicate(String func) { - boolean reverse = false; - while (func.startsWith("!")) { - LogInfo.log(func); - reverse = !reverse; - func = func.substring(1); - } - if (!predicatesMap.containsKey(func)) - func = removeGeoType(func); - if (func.startsWith("$")) - return Formula.fromString(String.format("(var %s)", toVariable(func).toString())); - if (!predicatesMap.containsKey(func)) { - throw new RuntimeException("Unknown predicate: " + func); - } - Formula form = predicatesMap.get(func); - if (reverse) { - return new ReverseFormula(form); - } else { - return form; - } - } - - Formula toLambdaVar(String var) { - return toVariable(var); - } - - boolean isVar(String var) { - return var != null && var.startsWith("$"); - } - - Formula toVariable(String var) { - var = removeGeoType(var); - if (!varMap.containsKey(var)) { - throw new RuntimeException("Unknown variable: " + var); - } - return varMap.get(var); - } - - // FIXME Not general to converters - String removeGeoType(String pred) { - return pred.split(":", 2)[0]; - } - - String getGeoType(String pred) { - String[] parts = pred.split(":", 2); - if (parts.length > 0) - return parts[1]; - return ""; - } - - Formula toJoin(String func, Formula arg) { - return new JoinFormula(toPredicate(func), arg); - } - - Formula toLambda(String var, Formula body) { - return new LambdaFormula(toLambdaVar(var).toString(), body); - } - - Formula toMark(String var, Formula body) { - return new MarkFormula(toPredicate(var).toString(), body); - } - - Formula toAndFormula(List clauses, boolean[] hit, String headVar, List existsVars) { - // (and (river:t $1) (loc:t $1 $0)) - Formula formula = null; - for (int i = 0; i < clauses.size(); i++) { - if (hit[i]) { - LogInfo.log("hit " + i); - continue; - } - // if (!hasHeadVar(clauses.get(i), headVar)) continue; - hit[i] = true; - LispTree tree = clauses.get(i); - Formula newFormula = toFormula(tree, headVar, existsVars); - if (formula == null) formula = newFormula; - else { - formula = new MergeFormula(MergeFormula.Mode.and, formula, newFormula); - } - } - return formula; - } - - Formula toOrFormula(List clauses, boolean[] hit, String headVar, List existsVars) { - // (or (town:t $1) (city:t $1)) - LogInfo.log("# or clauses: " + clauses.size()); - Formula formula = null; - for (int i = 0; i < clauses.size(); i++) { - if (hit[i]) { - LogInfo.log("hit " + i); - continue; - } - // if (!hasHeadVar(clauses.get(i), headVar)) continue; - hit[i] = true; - LispTree tree = clauses.get(i); - Formula newFormula = toFormula(tree, headVar, existsVars); - if (formula == null) formula = newFormula; - else { - formula = new MergeFormula(MergeFormula.Mode.or, formula, newFormula); - } - } - return formula; - } - - boolean validVar(String var, String headVar, List existsVars) { - if (var == null) - return false; - if (!var.startsWith("$")) - return true; - - boolean valid = false; - if (var.equals(headVar)) - valid = true; - if (existsVars != null && existsVars.contains(var)) - valid = true; - return valid; - } - - Formula toPrimitive(String s) { - String[] parts = s.split(":", 2); - if (parts.length < 2) - return null; - String type = parts[1]; - String primitive = primitiveMap.get(type); - if (primitive == null) - return null; - // if (primitive.equals("string")) - // parts[0] = capitalizeWords(parts[0]); - // return Formula.fromString(String.format("(%s %s)", primitive, parts[0])); - if (primitive.equals("string")) - return Formula.fromString(String.format("(!fb:type.object.name fb:en.%s)", parts[0])); - - return Formula.fromString(String.format("(%s %s)", primitive, parts[0])); - } - - Formula handleExists(LispTree tree, String headVar, List existsVars) { - // (exists $1 (and (river:t $1) (loc:t $1 $0))) - String eVar = tree.child(1).value; - existsVars.add(eVar); - // FIXME Currently assuming format where exists contains and statement with - // n clauses where first n - 1 clauses describe exists var and last clause - // relates exists var to the head var - List andTreeList = tree.child(2).children; - // FIXME Don't assume and tree - LispTree newAndTree = LispTree.proto.newList(); - - int predTreeInd = -1; - int numPredTrees = 0; - for (int k = 0; k < andTreeList.size(); k++) { - String childStr = andTreeList.get(k).toString(); - if (childStr.contains(headVar)) { - predTreeInd = k; - numPredTrees++; - } - } - - String func = tree.child(2).child(0).value; - if (numPredTrees == 0 || func.equals("exists")) { - return toLambda(eVar, toFormula(tree.child(2), eVar, existsVars)); - } - - newAndTree.children = new ArrayList(andTreeList.subList(0, andTreeList.size())); - LispTree predTree = newAndTree.children.remove(predTreeInd); - if (newAndTree.children.size() == 2) - newAndTree = newAndTree.child(1); - - LispTree newPredTree = LispTree.proto.parseFromString(predTree.toString().replace(eVar, newAndTree.toString())); - - Formula form = toFormula(newPredTree, eVar, existsVars); - existsVars.remove(eVar); - return form; - } - - // hit: ignore these clauses in (and ...) constructions - public Formula toFormula(LispTree tree, String headVar, List existsVars) { - - if (tree.isLeaf()) { - Formula s = toPrimitive(tree.toString()); - if (s != null) - return s; - return toPredicate(tree.toString()); - } - - String func = tree.child(0).value; - - if (func.equals("lambda")) { - // (lambda $0 e (and (river:t $0) (loc:t $0 arkansas:s))) - return toFormula(tree.child(3), tree.child(1).value, existsVars); - } else if (func.equals("count")) { - // (count $0 (and (river:t $0) (loc:t $0 washington:s))) - String countVar = tree.child(1).value; - if (headVar == null) - headVar = countVar; - if (!countVar.equals(headVar) && !existsVars.contains(countVar)) { - - return toLambda(countVar, new AggregateFormula(AggregateFormula.Mode.count, toFormula(tree.child(2), countVar, existsVars))); - } else - return new AggregateFormula(AggregateFormula.Mode.count, toFormula(tree.child(2), countVar, existsVars)); - } else if (func.equals("sum")) { - // (sum $0 (and (state:t $0) (next_to:t $0 texas:s)) (population:i $0)) - String numFunc = tree.child(3).child(0).value; - return new AggregateFormula(AggregateFormula.Mode.sum, - toJoin(numFunc, toFormula(tree.child(2), tree.child(1).value, existsVars))); - } else if (func.equals("argmax") || func.equals("argmin")) { - // (argmax $0 (state:t $0) (density:i $0)) - // (argmin $1 (river:t $1) (len:i $1)) - SuperlativeFormula.Mode mode = SuperlativeFormula.Mode.argmax; - if (func.equals("argmin")) - mode = SuperlativeFormula.Mode.argmin; - String superVar = tree.child(1).value; - Formula headFormula = toFormula(tree.child(2), superVar, existsVars); - Formula degreeFormula = toFormula(tree.child(3), superVar, existsVars); - NumberValue one = new NumberValue(1.0); - Formula rankFormula = new ValueFormula(one); - Formula countFormula = new ValueFormula(one); - // FIXME Hack to handle "most"/"least" expressions - if (tree.child(3).child(0).value.equals("count")) - degreeFormula = new ReverseFormula(degreeFormula); - // FIXME Currently assumes 1 1 - return new SuperlativeFormula(mode, rankFormula, countFormula, headFormula, degreeFormula); - } else if (func.equals("exists")) { - if (headVar == null) - headVar = tree.child(1).value; - return handleExists(tree, headVar, existsVars); - } else if (func.equals("not")) { - return new NotFormula(toFormula(tree.child(1), headVar, existsVars)); - } else if (func.equals("and")) { - return toAndFormula(tree.children.subList(1, tree.children.size()), new boolean[tree.children.size() - 1], headVar, existsVars); - } else if (func.equals("or")) { - return toOrFormula(tree.children.subList(1, tree.children.size()), new boolean[tree.children.size() - 1], headVar, existsVars); - } else { // Predicate - // Find head var - if (tree.children.size() == 2) { // Unary - if (isVar(tree.child(1).value)) { - // FIXME HACK - if (!getGeoType(func).equals("t")) - return toPredicate("!" + func); - return toPredicate(func); - } else - return toJoin(func, toFormula(tree.child(1), headVar, existsVars)); - } else if (tree.children.size() == 3) { // Binary - // FIXME Move elsewhere, there's both a binary and unary "capital:t" - if (func.equals("capital:t")) - func = "is_capital:t"; - - Formula form1 = toFormula(tree.child(1), headVar, existsVars); - Formula form2 = toFormula(tree.child(2), headVar, existsVars); - String first = tree.child(1).value; - String second = tree.child(2).value; - - // FIXME So ugly - boolean secondIsHead = isVar(second) && !existsVars.contains(second); - boolean firstIsHead = isVar(first) && (!existsVars.contains(first) || !secondIsHead); - if (firstIsHead) - return toJoin(func, form2); - else // secondIsHead - return toJoin("!" + func, form1); - // else - // return toJoin(func, form1); - } else { - throw new RuntimeException("Bad arity: " + tree); - } - } - } - - void countTokens(LispTree tree, Map counts) { - if (tree.isLeaf()) - MapUtils.incr(counts, tree.value, 1); - else { - for (LispTree child : tree.children) - countTokens(child, counts); - } - } - - String preprocessPredicates(String lispLine, String utterance) { - // Replacements - boolean replaced = false; - if (replaceMap.containsKey(lispLine)) { - lispLine = replaceMap.get(lispLine); - replaced = true; - } - - // FIXME Assumes that only one "major" in the line - // FIXME Specific to geoquery - if (utterance.contains("major river")) - lispLine = lispLine.replace("major", "major_river"); - else if (utterance.contains("major lake")) - lispLine = lispLine.replace("major", "major_lake"); - else - lispLine = lispLine.replace("major", "major_city"); - - if (utterance.contains("river")) - if (!replaced) - lispLine = lispLine.replace("loc:t", "river_loc:t"); - - lispLine = lispLine.replace("new_york:s", "new_york_state:s"); - - return lispLine; - } - - void convertExamples() { - String line; - int runInd = opts.runInd; - - try { - Example.Builder ex = null; - BufferedReader in = IOUtils.openIn(opts.inPath); - boolean gotUtterance = false; - int newId = 0; - String utterance = ""; - - while ((line = in.readLine()) != null) { - if (line.equals("") || line.startsWith("//")) continue; - if (ex == null) { - newId++; - ex = new Example.Builder(); - ex.setId("" + newId); - gotUtterance = false; - } - - - if (!gotUtterance) { - ex.setUtterance(line); - utterance = line; - gotUtterance = true; - } else { - if (runInd > 0 && newId != runInd) { - ex = null; - continue; - } - - LispTree tree = LispTree.proto.parseFromString(preprocessPredicates(line, utterance)); - LogInfo.logs(color.colorize("IN [%d]: %s", "blue"), newId, utterance); - LogInfo.logs(color.colorize("IN [%d]: %s", "purple"), newId, tree); - ArrayList existsVars = new ArrayList(); - - if (manualConversionsMap.containsKey(line)) { - LogInfo.logs("MANUAL CONVERSION=%s", line); - ex.setTargetFormula(Formula.fromString(manualConversionsMap.get(line))); - } else { - LogInfo.log("AUTOMATIC CONVERSION"); - ex.setTargetFormula(toFormula(tree, null, existsVars)); - } - LogInfo.logs(color.colorize("OUT [%d]: %s", "yellow"), newId, ex.createExample().targetFormula.toLispTree()); - examples.add(ex.createExample()); - ex = null; - } - } - in.close(); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - void executeExamples() { - int runInd = opts.runInd; - // FIXME Should be passed in - SparqlExecutor.opts.endpointUrl = "http://localhost:3094/sparql"; - SparqlExecutor.opts.cachePath = "SparqlExecutor.cache"; - SparqlExecutor.opts.lambdaAllowDiagonals = false; //jonathan - SparqlExecutor executor = new SparqlExecutor(); - - int exInd = 1; - if (runInd > 0) - exInd = runInd; - for (Example ex : examples) { - // Useful for just testing specific portion of examples - if (runInd > 0 && exInd != runInd) { - break; - } - - LogInfo.logs(color.colorize("[%d] %s", "blue"), exInd, ex.utterance); - LogInfo.logs(color.colorize("[%d] %s", "yellow"), exInd, ex.targetFormula.toString()); - try { - Executor.Response response = executor.execute(ex.targetFormula, null); - LogInfo.logs("\t\t [%d] %s", exInd, response.value.toString()); - validExampleIds.add(exInd); - validExampleLengths.add(ex.utterance.length()); - } catch (RuntimeException e) { - LogInfo.error(e); - failedExampleIds.add(exInd); - } - exInd++; - } - } - - void writeExamples() { - // Sort validExampleIds by the length of the utterance of the example -// Collections.sort(validExampleIds, new Comparator() { -// public int compare(Integer left, Integer right) { -// return Integer.compare(validExampleLengths.get(validExampleIds.indexOf(left)), -// validExampleLengths.get(validExampleIds.indexOf(right))); -// } -// }); - - PrintWriter out = IOUtils.openOutHard(opts.outPath); - out.println("["); // Print out as a list - String indent = " "; - for (int k = 0; k < validExampleIds.size(); k++) { - int j = validExampleIds.get(k) - 1; - Example ex = examples.get(j); - if (k < validExampleIds.size() - 1) - out.println(indent + ex.toJson() + ","); - else - out.println(indent + ex.toJson()); - } - out.println("]"); // Print out as a list - out.close(); - } - - void printSummary() { - LogInfo.logs("%d input examples", examples.size()); - LogInfo.logs("%d successful executions", validExampleIds.size()); - LogInfo.logs("Failed executions (%d):", failedExampleIds.size()); - for (int k : failedExampleIds) { - Example ex = examples.get(k - 1); - LogInfo.log(ex.toJson()); - } - } - - public static void main(String[] args) throws InterruptedException { - Execution.run(args, new LambdaCalculusConverter(), "lcc", LambdaCalculusConverter.opts); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/Lexicon.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/Lexicon.java deleted file mode 100644 index 8967274d77..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/Lexicon.java +++ /dev/null @@ -1,94 +0,0 @@ -package edu.stanford.nlp.sempre.freebase; - -import edu.stanford.nlp.sempre.cache.StringCache; -import edu.stanford.nlp.sempre.cache.StringCacheUtils; -import edu.stanford.nlp.sempre.freebase.lexicons.LexicalEntry; -import edu.stanford.nlp.sempre.freebase.lexicons.LexicalEntry.LexicalEntrySerializer; -import fig.basic.LispTree; -import fig.basic.LogInfo; -import fig.basic.Option; -import org.apache.lucene.queryparser.classic.ParseException; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -public final class Lexicon { - public static class Options { - @Option(gloss = "The path for the cache") - public String cachePath; - } - public static Options opts = new Options(); - - private static Lexicon lexicon; - public static Lexicon getSingleton() { - try { - if (lexicon == null) - lexicon = new Lexicon(); - return lexicon; - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - public StringCache cache; - - private EntityLexicon entityLexicon; - private UnaryLexicon unaryLexicon; - private BinaryLexicon binaryLexicon; - - public EntityLexicon getEntityLexicon() { return entityLexicon; } - - private Lexicon() throws IOException { - LogInfo.begin_track("Lexicon()"); - // TODO(joberant): why is BinaryLexicon special? -- wait why is it special? - entityLexicon = EntityLexicon.getInstance(); - unaryLexicon = UnaryLexicon.getInstance(); - binaryLexicon = BinaryLexicon.getInstance(); - LogInfo.end_track(); - - if (opts.cachePath != null) - cache = StringCacheUtils.create(opts.cachePath); - } - - public List lookupUnaryPredicates(String query) throws IOException { - return unaryLexicon.lookupEntries(query); - } - - public List lookupBinaryPredicates(String query) throws IOException { - return binaryLexicon.lookupEntries(query); - } - - public List lookupEntities(String query, EntityLexicon.SearchStrategy strategy) throws IOException, ParseException { - List entries = getCache("entity", query); - if (entries == null) - putCache("entity", query, entries = entityLexicon.lookupEntries(query, strategy)); - return entries; - } - - private List getCache(String mode, String query) { - if (cache == null) return null; - String key = mode + ":" + query; - String response; - synchronized (cache) { - response = cache.get(key); - } - if (response == null) return null; - LispTree tree = LispTree.proto.parseFromString(response); - List entries = new ArrayList<>(); - for (int i = 0; i < tree.children.size(); i++) - entries.add(LexicalEntrySerializer.entryFromLispTree(tree.child(i))); - return entries; - } - - private void putCache(String mode, String query, List entries) { - if (cache == null) return; - String key = mode + ":" + query; - LispTree result = LispTree.proto.newList(); - for (LexicalEntry entry : entries) - result.addChild(LexicalEntrySerializer.entryToLispTree(entry)); - synchronized (cache) { - cache.put(key, result.toString()); - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/LexiconFn.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/LexiconFn.java deleted file mode 100644 index e70237673b..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/LexiconFn.java +++ /dev/null @@ -1,412 +0,0 @@ -package edu.stanford.nlp.sempre.freebase; - -import com.google.common.base.Joiner; -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.freebase.lexicons.LexicalEntry; -import edu.stanford.nlp.sempre.freebase.lexicons.LexicalEntry.BinaryLexicalEntry; -import fig.basic.*; -import org.apache.lucene.queryparser.classic.ParseException; - -import java.io.IOException; -import java.util.*; - -/** - * Looks up a string into the lexicon, interfacing with the fbalignment code. - * @author Percy Liang - */ -public class LexiconFn extends SemanticFn { - public static class Options { - // Note: we filter here and not in entity lexicon so that don't need different cache for different numbers. - @Option(gloss = "Number of entities to return from entity lexicon") - public int maxEntityEntries = 100; - @Option public int maxUnaryEntries = Integer.MAX_VALUE; - @Option public int maxBinaryEntries = Integer.MAX_VALUE; - - @Option(gloss = "Verbose") public int verbose = 0; - @Option(gloss = "Class name for lexicon") public String lexiconClassName; - @Option public boolean useHistogramFeatures = true; - } - - public static Options opts = new Options(); - private static Lexicon lexicon; - public static Evaluation lexEval = new Evaluation(); - - private String mode; // unary, binary, or entity - private EntityLexicon.SearchStrategy entitySearchStrategy; // For entities, how to search - private TextToTextMatcher textToTextMatcher = new TextToTextMatcher(); - private FbFormulasInfo fbFormulaInfo; - - public static final Map, Set> correctEntryToExampleIds = - new HashMap<>(); - - public LexiconFn() throws IOException { - lexicon = Lexicon.getSingleton(); - fbFormulaInfo = FbFormulasInfo.getSingleton(); - } - - public void init(LispTree tree) { - super.init(tree); - for (int i = 1; i < tree.children.size(); i++) { - String value = tree.child(i).value; - - // mode - if (value.equals("unary")) this.mode = "unary"; - else if (value.equals("binary")) this.mode = "binary"; - else if (value.equals("entity")) this.mode = "entity"; - // entity search strategy - // TODO(joberant): aren't we saying these should be set by a flag so the cache doesn't get messed up? - else if (value.equals("inexact")) this.entitySearchStrategy = EntityLexicon.SearchStrategy.inexact; - else if (value.equals("exact")) this.entitySearchStrategy = EntityLexicon.SearchStrategy.exact; - else if (value.equals("fbsearch")) this.entitySearchStrategy = EntityLexicon.SearchStrategy.fbsearch; - else throw new RuntimeException("Invalid argument: " + value); - } - } - - public static void getEntityEntryFeatures(LexicalEntry.EntityLexicalEntry eEntry, FeatureVector features, - Callable c, Example ex) { - - if (FeatureExtractor.containsDomain("basicStats")) { - if (opts.useHistogramFeatures) - features.addHistogram("basicStats", "entity.popularity ", eEntry.getPopularity()); - else - features.addWithBias("basicStats", "entity.popularity", Math.log(eEntry.getPopularity() + 1)); - } - if (FeatureExtractor.containsDomain("entityFeatures")) { - for (String feature : eEntry.entityFeatures.keySet()) { - double value = eEntry.entityFeatures.getCount(feature); - features.addWithBias("entityFeatures", "entity." + feature, value); - } - features.add("entityFeatures", "entity.pos=" + ex.languageInfo.posSeq(c.getStart(), c.getEnd())); - features.add("entityFeatures", "entity.mention_length=" + (c.getEnd() - c.getStart())); - } - if (FeatureExtractor.containsDomain("lexAlign")) - features.add("lexAlign", eEntry.textDescription + " --- " + eEntry.formula); - } - - public static void getUnaryEntryFeatures(LexicalEntry.UnaryLexicalEntry uEntry, FeatureVector features) { - if (FeatureExtractor.containsDomain("basicStats")) { - if (opts.useHistogramFeatures) - features.addHistogram("basicStats", "unary.popularity ", uEntry.getPopularity() + 1); - else - features.addWithBias("basicStats", "unary.popularity", Math.log(uEntry.getPopularity() + 1)); - } - // Alignment scores features - if (FeatureExtractor.containsDomain("alignmentScores")) { - for (String feature : uEntry.alignmentScores.keySet()) { - features.addWithBias("alignmentScores", "unary." + feature, Math.log(MapUtils.getDouble(uEntry.alignmentScores, feature, 0.0) + 1)); - } - } - - if (FeatureExtractor.containsDomain("basicStats")) { - if (uEntry.getDistance() < 0.0001) - features.add("basicStats", "unary.equal"); - - // adding the source of the lexical entry as a feature - features.add("basicStats", "unary.source=" + uEntry.source); - } - if (FeatureExtractor.containsDomain("lexAlign")) - features.add("lexAlign", uEntry.textDescription + " --- " + uEntry.formula); - } - - public static void getBinaryEntryFeatures(BinaryLexicalEntry bEntry, FeatureVector features) { - if (FeatureExtractor.containsDomain("basicStats")) { - if (opts.useHistogramFeatures) - features.addHistogram("basicStats", "binary.popularity ", bEntry.getPopularity() + 1); - else - features.addWithBias("basicStats", "binary.popularity", Math.log(bEntry.getPopularity() + 1)); - // adding the source of the lexical entry as a feature - features.add("basicStats", "binary." + bEntry.source); - } - // Alignment scores features - if (FeatureExtractor.containsDomain("alignmentScores")) { - for (String feature : bEntry.alignmentScores.keySet()) { - features.addWithBias("alignmentScores", "binary." + feature, Math.log(MapUtils.getDouble(bEntry.alignmentScores, feature, 0.0) + 1)); - } - } - if (FeatureExtractor.containsDomain("lexAlign")) - features.add("lexAlign", bEntry.textDescription + " --- " + bEntry.formula); - } - - // Convert LexicalEntry into a form consumable by the semantic parser. - private Derivation convert(Example ex, - Callable c, - String mode, - String word, - LexicalEntry entry) { - FeatureVector features = new FeatureVector(); - SemType type; - - switch (mode) { - case "entity": - // Entities - LexicalEntry.EntityLexicalEntry eEntry = (LexicalEntry.EntityLexicalEntry) entry; - getEntityEntryFeatures(eEntry, features, c, ex); - type = ((LexicalEntry.EntityLexicalEntry) entry).type; - break; - case "unary": - // Unaries - LexicalEntry.UnaryLexicalEntry uEntry = (LexicalEntry.UnaryLexicalEntry) entry; - getUnaryEntryFeatures(uEntry, features); - type = ((LexicalEntry.UnaryLexicalEntry) entry).type; - break; - case "binary": - // Binaries - BinaryLexicalEntry bEntry = (BinaryLexicalEntry) entry; - getBinaryEntryFeatures(bEntry, features); - // features that depend on entry but also on the example - features.add( - textToTextMatcher.extractFeatures( - ex.languageInfo.tokens.subList(c.getStart(), c.getEnd()), - ex.languageInfo.posTags.subList(c.getStart(), c.getEnd()), - ex.languageInfo.lemmaTokens.subList(c.getStart(), c.getEnd()), - bEntry.fbDescriptions)); - - // Note that expectedType2 is the argument type, expectedType1 is the return type. - type = SemType.newFuncSemType(bEntry.getExpectedType2(), bEntry.getExpectedType1()); - break; - default: - throw new RuntimeException("Invalid mode: " + mode); - } - - Derivation newDeriv = new Derivation.Builder() - .withCallable(c) - .formula(entry.formula) - .type(type) - .localFeatureVector(features) - .createDerivation(); - - if (SemanticFn.opts.trackLocalChoices) - newDeriv.addLocalChoice("LexiconFn " + newDeriv.startEndString(ex.getTokens()) + " " + entry); - - if (opts.verbose >= 3) { - LogInfo.logs( - "LexiconFn: %s [%s => %s ~ %s | %s]: popularity = %s, distance = %s, type = %s, source=%s", - mode, word, entry.normalizedTextDesc, entry.fbDescriptions, newDeriv.formula, - entry.getPopularity(), entry.getDistance(), newDeriv.type, entry.source); - } - return newDeriv; - } - - public DerivationStream call(Example ex, Callable c) { - - if (opts.verbose >= 5) LogInfo.begin_track("LexicalFn.call: %s", c.childStringValue(0)); - - String query = c.childStringValue(0); - DerivationStream res; - - try { - switch (mode) { - // Entities - case "entity": { - // if (opts.verbose >= 2) - // LogInfo.log("LexiconFn: querying for entity: " + query); - - List entries = lexicon.lookupEntities(query, entitySearchStrategy); - lexEval.add("entity", !entries.isEmpty()); - entries = entries.subList(0, Math.min(opts.maxEntityEntries, entries.size())); - res = new LazyLexiconFnDerivs(ex, c, entries, query); - break; - } - // Unaries - case "unary": { - List entries = lexicon.lookupUnaryPredicates(query); - lexEval.add("unary", !entries.isEmpty()); - entries = entries.subList(0, Math.min(opts.maxUnaryEntries, entries.size())); - res = new LazyLexiconFnDerivs(ex, c, entries, query); - break; - } - // Binaries - case "binary": { - List entries = lexicon.lookupBinaryPredicates(query); - lexEval.add("binary", !entries.isEmpty()); - List filteredEntries = new ArrayList<>(); - // filter cvt entries (TODO(joberant): remove this hack) - for (LexicalEntry entry : entries) { - if (!fbFormulaInfo.isCvt(((BinaryLexicalEntry) entry).expectedType1) - && !fbFormulaInfo.isCvt(((BinaryLexicalEntry) entry).expectedType2)) - filteredEntries.add(entry); - } - filteredEntries = filteredEntries.subList(0, Math.min(opts.maxBinaryEntries, filteredEntries.size())); - res = new LazyLexiconFnDerivs(ex, c, filteredEntries, query); - break; - } - default: - throw new RuntimeException("Illegal mode: " + mode); - } - } catch (IOException | ParseException e) { - throw new RuntimeException(e); - } - - if (opts.verbose >= 5) LogInfo.end_track(); - return res; - } - - // if there was bridging then have a rule from tokens to binary - @Override - public void addFeedback(Example ex) { - LogInfo.begin_track("LexiconFn.addFeedback"); - Set> correctLexemeFormulaMatches = collectLexemeFormulaPairs(ex); - - for (Pair pair: correctLexemeFormulaMatches) { - LogInfo.logs("LexiconFn.addFeedback: %s => %s", pair.getFirst(), pair.getSecond()); - // TODO(joberant): hack to get id - MapUtils.addToSet(correctEntryToExampleIds, pair, Integer.parseInt(ex.id.substring(ex.id.lastIndexOf(':') + 1))); - BinaryLexicon.getInstance().updateLexicon(pair, correctEntryToExampleIds.get(pair).size()); - } - LogInfo.end_track(); - } - - private static Set> collectLexemeFormulaPairs(Example ex) { - Set> res = new HashSet<>(); - Set> temp = new HashSet<>(); - for (Derivation correctDerivation : ex.getCorrectDerivations()) { - // get all join formulas - List relations = - correctDerivation.formula.mapToList(formula -> { - List res1 = new ArrayList<>(); - if (formula instanceof JoinFormula) - res1.add(((JoinFormula) formula).relation); - return res1; - }, false); - Set validIndices = new HashSet<>(); - findValidIndices(correctDerivation, validIndices); - // match formulas - for (Formula relation : relations) { - for (int i = 0; i < ex.numTokens(); ++i) { - if (LanguageInfo.isContentWord(ex.posTag(i)) && - validIndices.contains(i)) { - temp.add(Pair.newPair(ex.languageInfo.lemmaTokens.get(i), relation)); - } - } - } - } - // reverse invalid relations - for (Pair pair: temp) { - if (!BinaryLexicon.getInstance().validBinaryFormula(pair.getSecond())) { - pair.setSecond(FbFormulasInfo.getSingleton().equivalentFormula(pair.getSecond())); - } - res.add(pair); - } - return res; - } - - @Override - public void sortOnFeedback(Params params) { - LogInfo.begin_track("Learner.sortLexiconOnFeedback"); - BinaryLexicon.getInstance().sortLexiconByFeedback(params); - UnaryLexicon.getInstance().sortLexiconByFeedback(params); - LogInfo.end_track(); - } - - // todo - this method is grammar specific and that is bad - private static void findValidIndices(Derivation deriv, Set indices) { - if (deriv.cat.equals("$Entity")) - return; - if (deriv.children.size() == 0) { - for (int i = deriv.start; i < deriv.end; ++i) - indices.add(i); - return; - } - for (Derivation child : deriv.children) - findValidIndices(child, indices); - } - - /** For now this ignores stemming!!! */ - private static boolean doesContextMatch(Example ex, Derivation deriv, BinaryLexicalEntry bEntry) { - - if (bEntry.isFullLexemeEqualToNormalizedText()) - return true; - // get the left and right context surrounding the core (normalized text) - String[] leftContext = bEntry.getLeftContext(); - String[] rightContext = bEntry.getRightContext(); - // match right context - for (int i = 0; i < rightContext.length; ++i) { - // in this case all context words were matched and some were dropped but there was no mismatch - if (deriv.end + i >= ex.numTokens() || ex.token(deriv.end + i).equals("?")) - break; - - if (!rightContext[0].equals(ex.lemmaToken(deriv.end + i))) { - if (opts.verbose >= 4) { - LogInfo.logs( - "RIGHT CONTEXT MISMATCH: full lexeme=%s, normalized text=%s left context=%s, right context=%s example=%s, formula=%s", - bEntry.fullLexeme, - bEntry.normalizedTextDesc, - Joiner.on(' ').join(leftContext), - Joiner.on(' ').join(rightContext), - Joiner.on(' ').join(ex.languageInfo.tokens), - bEntry.formula); - } - return false; - } - } - - // match right context - for (int i = 0; i < leftContext.length; ++i) { - if (deriv.start - i - 1 < 0) // in this case all context words were matched and some were dropped but there was no mismatch - break; - if (!leftContext[leftContext.length - i - 1].equals(ex.lemmaToken(deriv.start - i - 1))) { - if (opts.verbose >= 2) { - LogInfo.logs( - "LEFT CONTEXT MISMATCH: full lexeme=%s, normalized text=%s left context=%s, right context=%s example=%s, formula=%s", - bEntry.fullLexeme, - bEntry.normalizedTextDesc, - Joiner.on(' ').join(leftContext), - Joiner.on(' ').join(rightContext), - Joiner.on(' ').join(ex.languageInfo.tokens), - bEntry.formula); - } - return false; - } - } - return true; - } - - public class LazyLexiconFnDerivs extends MultipleDerivationStream { - - private Example ex; - private Callable callable; - private List entries; // we get one derivation from each entry - private String query; - private int currIndex = 0; - - public LazyLexiconFnDerivs(Example ex, Callable c, List entries, String query) { - this.ex = ex; - this.callable = c; - this.entries = entries; - this.query = query; - } - - @Override - public int estimatedSize() { - return entries.size() - currIndex; - } - - @Override - public Derivation createDerivation() { - if (currIndex == entries.size()) - return null; - LexicalEntry currEntry = entries.get(currIndex++); - Derivation res; - switch (mode) { - case "entity": - res = convert(ex, callable, "entity", query, currEntry); - break; - case "unary": - res = convert(ex, callable, "unary", query, currEntry); - break; - case "binary": - res = convert(ex, callable, "binary", query, currEntry); - // add context matching feature - if (FeatureExtractor.containsDomain("context")) { - if (!doesContextMatch(ex, res, (BinaryLexicalEntry) currEntry)) - res.addFeature("context", "binary.contextMismatch"); - } - break; - default: - throw new RuntimeException("Illegal mode: " + mode); - } - return res; - } - } -} - diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/SparqlExecutor.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/SparqlExecutor.java deleted file mode 100644 index f6d0aa7c06..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/SparqlExecutor.java +++ /dev/null @@ -1,955 +0,0 @@ -package edu.stanford.nlp.sempre.freebase; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.cache.StringCache; -import edu.stanford.nlp.sempre.cache.StringCacheUtils; -import fig.basic.*; -import org.w3c.dom.Document; -import org.w3c.dom.Element; -import org.w3c.dom.Node; -import org.w3c.dom.NodeList; -import org.xml.sax.InputSource; -import org.xml.sax.SAXException; -import com.google.common.collect.Lists; - -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.transform.OutputKeys; -import javax.xml.transform.Transformer; -import javax.xml.transform.TransformerFactory; -import javax.xml.transform.dom.DOMSource; -import javax.xml.transform.stream.StreamResult; -import java.io.*; -import java.net.SocketTimeoutException; -import java.net.URL; -import java.net.URLConnection; -import java.net.URLEncoder; -import java.util.*; - -/** - * Convert a Formula into a SPARQL query and execute it against some RDF endpoint. - * Formal specification of SPARQL: - * http://www.w3.org/TR/rdf-sparql-query/ - * - * @author Percy Liang - */ -public class SparqlExecutor extends Executor { - public static class Options { - @Option(gloss = "Maximum number of results to return") - public int maxResults = 10; - - @Option(gloss = "Milliseconds to wait until opening connection times out") - public int connectTimeoutMs = 1 * 60 * 1000; - - @Option(gloss = "Milliseconds to wait until reading connection times out") - public int readTimeoutMs = 1 * 60 * 1000; - - @Option(gloss = "Save all SPARQL queries in a file so we don't have to hit the SPARQL endpoint too often") - public String cachePath; - - @Option(gloss = "URL where the SPARQL server lives") - public String endpointUrl; - - @Option(gloss = "Whether to return a table of results rather than a list of entities (needed to support 'capital of each state')") - public boolean returnTable = false; - - // FIXME TODO(pliang): remove this since this is a really bad hack. - @Option(gloss = "If false, then enforce that denotation of (lambda x (border x)) does not contain (x,x)") - public boolean lambdaAllowDiagonals = true; - - @Option(gloss = "Whether to include entity names (mostly for readability)") - public boolean includeEntityNames = true; - - @Option(gloss = "Whether to return supporting information (e.g., 'length' for the 'longest river')") - public boolean includeSupportingInfo = false; - - @Option public int verbose = 1; - } - - public static Options opts = new Options(); - - private final FreebaseInfo fbInfo; - private final StringCache query2xmlCache; - - // Statistics on Sparql requests - private static class SparqlStats { - private StatFig timeFig = new StatFig(); - // Number of each type of error. - private LinkedHashMap errors = new LinkedHashMap(); - } - - private SparqlStats queryStats = new SparqlStats(); - - public SparqlExecutor() { - this.fbInfo = FreebaseInfo.getSingleton(); - this.query2xmlCache = StringCacheUtils.create(opts.cachePath); - } - - public class ServerResponse { - public ServerResponse(String xml) { this.xml = xml; } - public ServerResponse(ErrorValue error) { this.error = error; } - String xml; - ErrorValue error; - long timeMs; - boolean cached; // Whether things were cached - boolean beginTrack; // Whether we started printing things out - } - - // Make a request to the given SPARQL endpoint. - // Return the XML. - public ServerResponse makeRequest(String queryStr, String endpointUrl) { - if (endpointUrl == null) - throw new RuntimeException("No SPARQL endpoint url specified"); - - try { - String url = String.format("%s?query=%s&format=xml", endpointUrl, URLEncoder.encode(queryStr, "UTF-8")); - URLConnection conn = new URL(url).openConnection(); - conn.setConnectTimeout(opts.connectTimeoutMs); - conn.setReadTimeout(opts.readTimeoutMs); - InputStream in = conn.getInputStream(); - - // Read the response - StringBuilder buf = new StringBuilder(); - BufferedReader reader = new BufferedReader(new InputStreamReader(in)); - String line; - while ((line = reader.readLine()) != null) - buf.append(line); - - // Check for blatant errors. - String result = buf.toString(); - if (result.length() == 0) - return new ServerResponse(ErrorValue.empty); - if (result.startsWith("")) - return new ServerResponse(ErrorValue.badFormat); - - return new ServerResponse(buf.toString()); - } catch (SocketTimeoutException e) { - return new ServerResponse(ErrorValue.timeout); - } catch (IOException e) { - LogInfo.errors("Server exception: %s", e); - // Sometimes the SPARQL server throws a 408 to signify a server timeout. - if (e.toString().contains("HTTP response code: 408")) - return new ServerResponse(ErrorValue.server408); - if (e.toString().contains("HTTP response code: 500")) - return new ServerResponse(ErrorValue.server500); - throw new RuntimeException(e); // Haven't seen this happen yet... - } - } - - // For debugging only - // Document extends Node - public static void printDocument(Node node, OutputStream out) { - try { - TransformerFactory tf = TransformerFactory.newInstance(); - Transformer transformer = tf.newTransformer(); - transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no"); - transformer.setOutputProperty(OutputKeys.METHOD, "xml"); - transformer.setOutputProperty(OutputKeys.INDENT, "yes"); - transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); - transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4"); - - transformer.transform( - new DOMSource(node), - new StreamResult(new OutputStreamWriter(out, "UTF-8"))); - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - // Return - // - XML - // - Whether to print out details (coincides with whether this query was cached). - public ServerResponse runQueryToGetXml(String queryStr, Formula formula) { - if (opts.verbose >= 3) LogInfo.logs("SparqlExecutor.execute: %s", queryStr); - ServerResponse response = null; - - // Note: only cache for concrete queries. - boolean useCache = query2xmlCache != null; - - // Try to look the query up in the cache. - if (useCache) { - // Contents either encodes an error or not. - String contents = query2xmlCache.get(queryStr); - if (contents != null) { - ErrorValue error = ErrorValue.fromString(contents); - if (error != null) - response = new ServerResponse(error); - else - response = new ServerResponse(contents); - response.cached = true; - } - } - - // If not cached, then make the actual request. - // if (response == null || response.xml == null || response.xml.contains("TIMEOUT")) { - if (response == null) { - // Note: begin_track without end_track - if (opts.verbose >= 1) { - LogInfo.begin_track("SparqlExecutor.execute: %s", formula); - if (opts.verbose >= 2) LogInfo.logs("%s", queryStr); - } - - // Make actual request - StopWatch watch = new StopWatch(); - watch.start(); - response = makeRequest(queryStr, opts.endpointUrl); - watch.stop(); - response.timeMs = watch.getCurrTimeLong(); - response.beginTrack = true; - - if (useCache) - query2xmlCache.put(queryStr, response.error != null ? response.error.toString() : response.xml); - } - return response; - } - - public static NodeList extractResultsFromXml(ServerResponse response) { - return extractResultsFromXml(response.xml); - } - private static NodeList extractResultsFromXml(String xml) { - DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance(); - NodeList results = null; - try { - DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder(); - Document doc = docBuilder.parse(new InputSource(new StringReader(xml))); - results = doc.getElementsByTagName("result"); - } catch (IOException e) { - throw new RuntimeException(e); - } catch (SAXException e) { - LogInfo.errors("XML: %s", xml); - // throw new RuntimeException(e); - return null; - } catch (ParserConfigurationException e) { - throw new RuntimeException(e); - } - return results; - } - - // Main entry point. - public Response execute(Formula formula, ContextValue context) { - // Note: don't do beta reduction here to preserve the semantics of lambda DCS. - // Beta reduction should be triggered deliberately in the SemanticFn. - return execute(formula, 0, opts.maxResults); - } - public synchronized Response execute(Formula formula, int offset, int maxResults) { - if (opts.verbose >= 3) - LogInfo.logs("SparqlExecutor.execute: %s", formula); - String prefix = "exec-"; - - Evaluation stats = new Evaluation(); - // Convert to SPARQL - Converter converter; - try { - converter = new Converter(formula, offset, maxResults); - } catch (BadFormulaException e) { - stats.add(prefix + "error", true); - return new Response(ErrorValue.badFormula(e), stats); - } - - ServerResponse serverResponse = runQueryToGetXml(converter.queryStr, formula); - stats.add(prefix + "cached", serverResponse.cached); - if (!serverResponse.cached) - stats.add(prefix + "time", serverResponse.timeMs); - - //// Record statistics - - // Update/print sparql stats - if (!serverResponse.cached) { - queryStats.timeFig.add(serverResponse.timeMs); - if (serverResponse.error != null) { - MapUtils.incr(queryStats.errors, serverResponse.error.type, 1); - if (serverResponse.beginTrack && opts.verbose >= 1) - LogInfo.logs("Error: %s", serverResponse.error); - } - if (serverResponse.beginTrack && opts.verbose >= 2) { - LogInfo.logs("time: %s", queryStats.timeFig); - LogInfo.logs("errors: %s", queryStats.errors); - } - } - - // If error, then return out - if (serverResponse.error != null) { - if (serverResponse.beginTrack && opts.verbose >= 1) LogInfo.end_track(); - if (!serverResponse.cached) - stats.add(prefix + "error", true); - return new Response(serverResponse.error, stats); - } - - if (!serverResponse.cached) - stats.add(prefix + "error", false); - - // Extract the results from XML now. - NodeList results = extractResultsFromXml(serverResponse.xml); - if (results == null) return new Response(ErrorValue.badFormat, stats); - Value value = new ValuesExtractor(serverResponse.beginTrack, formula, converter).extract(results); - - if (serverResponse.beginTrack && opts.verbose >= 1) LogInfo.end_track(); - - return new Response(value, stats); - } - - //////////////////////////////////////////////////////////// - // Convert a Formula into a SparqlExpr. - class Converter { - private int numVars = 0; // Used to create new Sparql variables - - // Unit for each SPARQL variable. - Map unitsMap = new HashMap(); // ?y => fb:en.meter - - // For each variable, a description - Map descriptionsMap = new HashMap(); // ?y => "Height (meters)" - - String queryStr; - SparqlSelect query; // Resulting SPARQL expression - - // The state used to a SELECT statement (DRT box in which all variables are existentially closed). - class Box { - // These are the variables that are first selected. - List initialVars = new ArrayList(); - - // Mapping from lambda DCS variables to SPARQL variables (which are unique across the entire formula, not just this box). - Map env = new LinkedHashMap(); - - // Some SPARQL variables are bound to quantities based on the SELECT statement (e.g., COUNT(?x)). - Map asValuesMap = new HashMap(); // e.g., ?y => COUNT(?x) or ?y => (?x1 + ?x2) - } - - public Converter(Formula rootFormula, int offset, int maxResults) throws BadFormulaException { - Ref head = new Ref(); - Box box = new Box(); - - rootFormula = stripOuterLambdas(box, rootFormula); - SparqlBlock block = convert(rootFormula, head, null, box); - query = closeExistentialScope(block, head, box); - - // Select all the variables that appear in the block if we want to return a table. - // (Provide the evidence.) - if (opts.includeSupportingInfo) { - for (SparqlExpr expr : block.children) { - if (!(expr instanceof SparqlStatement)) continue; - SparqlStatement stmt = (SparqlStatement) expr; - if (stmt.arg1 instanceof VariableFormula) addSelectVar(box, query, block, (VariableFormula) stmt.arg1, false); - if (stmt.arg2 instanceof VariableFormula) addSelectVar(box, query, block, (VariableFormula) stmt.arg2, false); - } - } - - if (query.offset == 0) // If not set - query.offset = offset; - if (query.limit == -1) // If not set - query.limit = maxResults; - queryStr = "PREFIX fb: <" + FreebaseInfo.freebaseNamespace + "> " + query; - } - - // Strip off lambdas and add the variables to the environment - // For example, in (lambda x (lambda y BODY)), we would create an - // environment {x:NEW_VAR, y:NEW_VAR}, and interpret BODY as a unary. - private Formula stripOuterLambdas(Box box, Formula formula) { - while (formula instanceof LambdaFormula) { - LambdaFormula lambda = ((LambdaFormula) formula); - VariableFormula var = newVar(); - box.env.put(new VariableFormula(lambda.var), var); - box.initialVars.add(var); - formula = lambda.body; - } - return formula; - } - - // Create a SELECT expression (a DRT box). - private SparqlSelect closeExistentialScope(SparqlBlock block, Ref head, Box box) { - // Optimization: if block only contains one select statement, then can optimize and just return that. - if (block.children.size() == 1 && block.children.get(0) instanceof SparqlSelect && box.initialVars.size() == 0) - return (SparqlSelect) block.children.get(0); - - SparqlSelect select = new SparqlSelect(); - - // Add initial variables - for (VariableFormula var : box.initialVars) - addSelectVarWithName(box, select, block, var); - - // Add head variable (ensure that the head is a variable rather than a primitive value) - VariableFormula headVar = ensureIsVar(box, block, head); - addSelectVarWithName(box, select, block, headVar); - - // Add the other supporting variables in the environment (for communicating with nested blocks, e.g. for superlatives). - for (PrimitiveFormula formula : box.env.values()) { - if (!(formula instanceof VariableFormula)) continue; - VariableFormula supportingVar = (VariableFormula) formula; - addSelectVarWithName(box, select, block, supportingVar); - } - - select.where = block; - return select; - } - - // Add the variable |var|, but also potentially - private void addSelectVarWithName(Box box, SparqlSelect select, SparqlBlock block, VariableFormula var) { - addSelectVar(box, select, block, var, false); - - // Get the name of the head - String unit = unitsMap.get(var); - // LogInfo.logs("unit[%s] = %s", var, unit); - if (opts.includeEntityNames && FreebaseInfo.ENTITY.equals(unit)) { - VariableFormula nameVar = new VariableFormula(var.name + "name"); - addSelectVar(box, select, block, nameVar, true); - addOptionalStatement(block, var, FreebaseInfo.NAME, nameVar); - } - } - - private void addSelectVar(Box box, SparqlSelect select, SparqlBlock block, VariableFormula var, boolean isAuxiliary) { - if (opts.verbose >= 5) LogInfo.logs("addSelectVar: %s : %s | %s", var, box.asValuesMap.get(var), box.asValuesMap); - - // Check if alrady exists; if so, don't add it again - for (SparqlSelect.Var oldVar : select.selectVars) - if (oldVar.var.equals(var)) return; - - select.selectVars.add(new SparqlSelect.Var(var, box.asValuesMap.get(var), unitsMap.get(var), isAuxiliary, descriptionsMap.get(var))); - } - - // Mutable |head| to make sure it contains a VariableFormula. - private VariableFormula ensureIsVar(Box box, SparqlBlock block, Ref head) { - VariableFormula headVar; - if (head.value instanceof VariableFormula) { - headVar = (VariableFormula) head.value; - } else { - headVar = newVar(); - if (head.value != null) { - // LogInfo.logs("ensureIsVar: %s : %s", headVar, head.value); - Value value = ((ValueFormula) head.value).value; - if (value instanceof NumberValue) { // encode as (3 as ?x1) [FILTER doesn't work for isolated numbers] - box.asValuesMap.put(headVar, Formulas.getString(head.value)); - unitsMap.put(headVar, valueToUnit(((ValueFormula) head.value).value)); - } else { // encode as (FILTER (?x1 = fb:en.barack_obama)) - addStatement(block, headVar, "=", head.value); - addEntityStatement(block, headVar); - } - } - head.value = headVar; - } - return headVar; - } - - // Add statement as well as updating the units information. - private void addOptionalStatement(SparqlBlock block, PrimitiveFormula arg1, String property, PrimitiveFormula arg2) { addStatement(block, arg1, property, arg2, true); } - private void addStatement(SparqlBlock block, PrimitiveFormula arg1, String property, PrimitiveFormula arg2) { addStatement(block, arg1, property, arg2, false); } - private void addStatement(SparqlBlock block, PrimitiveFormula arg1, String property, PrimitiveFormula arg2, boolean optional) { - block.addStatement(arg1, property, arg2, optional); - - if (arg1 instanceof VariableFormula) { - VariableFormula var = (VariableFormula) arg1; - - // If the statement is ?x = , then extract unit from value. - if (property.equals("=") && arg2 instanceof ValueFormula) { - updateUnit(var, valueToUnit(((ValueFormula) arg2).value)); - } else if (property.equals(FreebaseInfo.TYPE)) { - String type = Formulas.getString(arg2); - updateUnit(var, fbInfo.typeToUnit(type, null)); - if (descriptionsMap.get(var) == null) - descriptionsMap.put(var, fbInfo.getName(type)); - } else if (!SparqlStatement.isOperator(property)) { - updateUnit(var, fbInfo.getUnit1(property)); - } - - if (descriptionsMap.get(var) == null) { - descriptionsMap.put(var, fbInfo.getName(fbInfo.getArg1Type(property))); - if (opts.verbose >= 3) LogInfo.logs("description arg1=%s => %s => %s", var, fbInfo.getArg1Type(property), descriptionsMap.get(var)); - } - } - - if (arg2 instanceof VariableFormula) { - // Get unit from Freebase property. - VariableFormula var = (VariableFormula) arg2; - updateUnit(var, fbInfo.getUnit2(property)); - descriptionsMap.put(var, fbInfo.getName(property)); - if (opts.verbose >= 3) LogInfo.logs("description arg2=%s => %s => %s", var, property, descriptionsMap.get(var)); - } - } - - void addEntityStatement(SparqlBlock block, VariableFormula var) { - // This is dangerous because in the DB, not all entities are necessarily labeled with fb:common.topic - //addStatement(block, var, FreebaseInfo.TYPE, new ValueFormula(new NameValue(FreebaseInfo.ENTITY))); - // Only needed when includeEntityNames = true. - addStatement(block, var, FreebaseInfo.TYPE, newVar()); - } - - // Update the unit of |var| if necessary. - void updateUnit(VariableFormula var, String unit) { - if (opts.verbose >= 5) LogInfo.logs("updateUnit: %s : %s", var, unit); - if (unit == null) return; - String oldUnit = unitsMap.get(var); - if (oldUnit == null) { unitsMap.put(var, unit); return; } - - // This replacement isn't quite kosher from a subtyping relation point of - // view (we're dealing with units, not types). - if (oldUnit.equals(NumberValue.unitless)) { unitsMap.put(var, unit); return; } // fb:en.meter replaces fb:en.unitless - if (oldUnit.equals(FreebaseInfo.ENTITY)) { unitsMap.put(var, unit); return; } // fb:en.cvt replaces fb:common.topic - if (oldUnit.equals(FreebaseInfo.CVT)) return; // Keep CVT - - if (!unit.equals(oldUnit)) - LogInfo.errors("Unit mis-match for %s: old is '%s', new is '%s'", var, oldUnit, unit); - } - - void updateAsValues(Box box, VariableFormula var, String asValue) { - if (opts.verbose >= 5) LogInfo.logs("updateAsValues: %s : %s", var, asValue); - box.asValuesMap.put(var, asValue); - } - - private String valueToUnit(Value value) { - // Note: units are fine grained on numbers but coarse on entities. - if (value instanceof NameValue) return FreebaseInfo.ENTITY; // Assume this is not a CVT - if (value instanceof BooleanValue) return FreebaseInfo.BOOLEAN; - if (value instanceof NumberValue) return ((NumberValue) value).unit; - if (value instanceof StringValue) return FreebaseInfo.TEXT; - if (value instanceof DateValue) return FreebaseInfo.DATE; - return null; - } - - // Main conversion function. - // head, modifier: SPARQL variables (e.g., ?x13) - // box: - // - env: mapping from lambda-DCS variables (e.g., ?city) to SPARQL variables (?x13) - // - asValuesMap: additional constraints - private SparqlBlock convert(Formula rawFormula, Ref head, Ref modifier, Box box) { - if (opts.verbose >= 5) LogInfo.begin_track("convert %s: head = %s, modifier = %s, env = %s", rawFormula, head, modifier, box.env); - - // Check binary/unary compatibility - boolean isNameFormula = (rawFormula instanceof ValueFormula) && (((ValueFormula) rawFormula).value instanceof NameValue); // Either binary or unary - boolean needsBinary = (modifier != null); - boolean providesBinary = rawFormula instanceof LambdaFormula || rawFormula instanceof ReverseFormula; - if (!isNameFormula && needsBinary != providesBinary) { - throw new RuntimeException("Binary/unary mis-match: " + - rawFormula + " is " + (providesBinary ? "binary" : "unary") + - ", but need " + (needsBinary ? "binary" : "unary")); - } - - SparqlBlock block = new SparqlBlock(); - - if (rawFormula instanceof ValueFormula) { // e.g., fb:en.barack_obama or (number 3) - @SuppressWarnings({ "unchecked" }) - ValueFormula formula = (ValueFormula) rawFormula; - if (modifier != null) { // Binary predicate - if (head.value == null) head.value = newVar(); - if (modifier.value == null) modifier.value = newVar(); - // Deal with primitive reverses (!fb:people.person.date_of_birth) - String property = ((NameValue) formula.value).id; - PrimitiveFormula arg1, arg2; - if (FreebaseInfo.isReverseProperty(property)) { - arg1 = modifier.value; - property = property.substring(1); - arg2 = head.value; - } else { - arg1 = head.value; - arg2 = modifier.value; - } - - // Annoying logic to deal with dates. - // If we have - // ?x fb:people.person.date_of_birth "2003"^xsd:datetime, - // then create two statements: - // ?x fb:people.person.date_of_birth ?v - // ?v = "2003"^xsd:datetime [this needs to be transformed] - if (!SparqlStatement.isOperator(property)) { - if (arg2 instanceof ValueFormula) { - Value value = ((ValueFormula) arg2).value; - if (value instanceof DateValue) { - VariableFormula v = newVar(); - addStatement(block, v, "=", arg2); - arg2 = v; - } - } - } - addStatement(block, arg1, property, arg2); - } else { // Unary predicate - unify(block, head, formula); - } - } else if (rawFormula instanceof VariableFormula) { - VariableFormula var = (VariableFormula) rawFormula; - PrimitiveFormula value = box.env.get(var); - if (value == null) - throw new RuntimeException("Unbound variable: " + var + ", env = " + box.env); - unify(block, head, value); - } else if (rawFormula instanceof NotFormula) { - NotFormula formula = (NotFormula) rawFormula; - block.add(new SparqlNot(convert(formula.child, head, null, box))); - } else if (rawFormula instanceof MergeFormula) { - MergeFormula formula = (MergeFormula) rawFormula; - switch (formula.mode) { - case and: - block.add(convert(formula.child1, head, null, box)); - block.add(convert(formula.child2, head, null, box)); - break; - case or: - SparqlUnion union = new SparqlUnion(); - ensureIsVar(box, block, head); - union.add(convert(formula.child1, head, null, box)); - union.add(convert(formula.child2, head, null, box)); - block.add(union); - break; - default: - throw new RuntimeException("Unhandled mode: " + formula.mode); - } - } else if (rawFormula instanceof JoinFormula) { - // Join - JoinFormula formula = (JoinFormula) rawFormula; - Ref intermediate = new Ref(); - block.add(convert(formula.child, intermediate, null, box)); - block.add(convert(formula.relation, head, intermediate, box)); - } else if (rawFormula instanceof ReverseFormula) { - // Reverse - ReverseFormula formula = (ReverseFormula) rawFormula; - block.add(convert(formula.child, modifier, head, box)); // Switch modifier and head - } else if (rawFormula instanceof LambdaFormula) { - // Lambda (new environment, same scope) - LambdaFormula formula = (LambdaFormula) rawFormula; - if (modifier.value == null) modifier.value = newVar(); - Box newBox = createNewBox(formula.body, box); // Create new environment - newBox.env.put(new VariableFormula(formula.var), modifier.value); // Map variable to modifier - block.add(convert(formula.body, head, null, newBox)); - // Place pragmatic constraint that head != modifier (for symmetric relations like spouse) - if (!opts.lambdaAllowDiagonals) - block.addStatement(head.value, "!=", modifier.value, false); - returnAsValuesMap(box, newBox); - } else if (rawFormula instanceof MarkFormula) { - // Mark (new environment, same scope) - MarkFormula formula = (MarkFormula) rawFormula; - if (head.value == null) head.value = newVar(); - Box newBox = createNewBox(formula.body, box); // Create new environment - newBox.env.put(new VariableFormula(formula.var), head.value); // Map variable to head (ONLY difference with lambda) - block.add(convert(formula.body, head, null, newBox)); - returnAsValuesMap(box, newBox); - } else if (rawFormula instanceof SuperlativeFormula) { - // Superlative (new environment, close scope) - SuperlativeFormula formula = (SuperlativeFormula) rawFormula; - - int rank = Formulas.getInt(formula.rank); - int count = Formulas.getInt(formula.count); - - boolean useOrderBy = rank != 1 || count != 1; - boolean isMax = formula.mode == SuperlativeFormula.Mode.argmax; - if (useOrderBy) { - // Method 1: use ORDER BY - // + can deal with offset and limit - // - but can't be nested - // - doesn't handle ties at the top - // Recurse - Box newBox = createNewBox(formula.head, box); // Create new environment - SparqlBlock newBlock = convert(formula.head, head, null, newBox); - Ref degree = new Ref(); - newBlock.add(convert(formula.relation, head, degree, newBox)); - - // Apply the aggregation operation - VariableFormula degreeVar = ensureIsVar(box, block, degree); - - // Force |degreeVar| to be selected as a variable. - box.env.put(new VariableFormula("degree"), degreeVar); - newBox.env.put(new VariableFormula("degree"), degreeVar); - - SparqlSelect select = closeExistentialScope(newBlock, head, newBox); - select.sortVars.add(isMax ? new VariableFormula(applyVar("DESC", degreeVar)) : degreeVar); - select.offset = rank - 1; - select.limit = count; - block.add(select); - } else { - // Method 2: use MAX - // - can't deal with offset and limit - // + can be nested - // + handles ties at the top - // (argmax 1 1 h r) ==> (h (r (mark degree (max ((reverse r) e))))) - AggregateFormula.Mode mode = isMax ? AggregateFormula.Mode.max : AggregateFormula.Mode.min; - Formula best = new MarkFormula("degree", new AggregateFormula(mode, new JoinFormula(new ReverseFormula(formula.relation), formula.head))); - Formula transformed = new MergeFormula(MergeFormula.Mode.and, formula.head, new JoinFormula(formula.relation, best)); - if (opts.verbose >= 5) LogInfo.logs("TRANSFORMED: %s", transformed); - block.add(convert(transformed, head, null, box)); - } - } else if (rawFormula instanceof AggregateFormula) { - // Aggregate (new environment, close scope) - AggregateFormula formula = (AggregateFormula) rawFormula; - ensureIsVar(box, block, head); - - // Recurse - Box newBox = createNewBox(formula.child, box); // Create new environment - Ref newHead = new Ref(newVar()); // Stores the aggregated value - SparqlBlock newBlock = convert(formula.child, newHead, null, newBox); - - VariableFormula var = (VariableFormula) head.value; // e.g., ?x - - // Variable representing the aggregation - VariableFormula newVar = (VariableFormula) newHead.value; // e.g., ?y = COUNT(?x) - String headUnit = formula.mode == AggregateFormula.Mode.count ? NumberValue.unitless : unitsMap.get(newHead.value); - updateUnit(var, headUnit); - descriptionsMap.put(var, capitalize(formula.mode.toString())); - - // If do aggregation on dates, need to convert wrap with xsd:datetime - boolean specialDateHandling = - (formula.mode == AggregateFormula.Mode.min || formula.mode == AggregateFormula.Mode.max) && - FreebaseInfo.DATE.equals(headUnit); - - if (specialDateHandling) { - VariableFormula dateVar = new VariableFormula(var.name + "date"); - updateAsValues(newBox, dateVar, applyVar(formula.mode.toString(), SparqlUtils.dateTimeStr(newVar))); // ?dateVar AS max(xsd:datetime(?newVar)) - block.add(closeExistentialScope(newBlock, new Ref(dateVar), newBox)); - addStatement(block, new VariableFormula(SparqlUtils.dateTimeStr(var)), "=", dateVar); // Add xsd:datetime(?var) = ?dateVar on the outside. - // PROBLEM: (max ...) doesn't work by itself because the variable returned (var) is only involved in the previous construction, - // but this is fine in conjunction with other things like argmax. - } else { - updateAsValues(newBox, var, applyVar(formula.mode.toString(), newVar)); // ?var AS COUNT(?newVar) - block.add(closeExistentialScope(newBlock, head, newBox)); - } - } else if (rawFormula instanceof ArithmeticFormula) { // (+ (number 3) (number 5)) - ArithmeticFormula formula = (ArithmeticFormula) rawFormula; - Ref newHead1 = new Ref(); - Ref newHead2 = new Ref(); - block.add(convert(formula.child1, newHead1, null, box)); - block.add(convert(formula.child2, newHead2, null, box)); - if (head.value == null) head.value = newVar(); - VariableFormula var = (VariableFormula) head.value; - PrimitiveFormula var1 = newHead1.value; - PrimitiveFormula var2 = newHead2.value; - updateAsValues(box, var, applyOpVar(ArithmeticFormula.modeToString(formula.mode), var1, var2)); - String unit = unitsMap.get(var1); // Just take the unit from variable 1 - if (unit == null) unit = NumberValue.unitless; - String newUnit; - switch (formula.mode) { - case add: - newUnit = unit; - break; - case sub: - newUnit = FreebaseInfo.DATE.equals(unit) ? NumberValue.yearUnit : unit; - break; - default: - // Don't even try to get the unit right - newUnit = NumberValue.unitless; - break; - } - updateUnit(var, newUnit); - descriptionsMap.put(var, "Number"); // This is weak - } else { - throw new RuntimeException("Unhandled formula: " + rawFormula); - } - - if (opts.verbose >= 5) LogInfo.logs("return: head = %s, modifier = %s, env = %s", head, modifier, box.env); - if (opts.verbose >= 5) LogInfo.end_track(); - - return block; - } - - private String capitalize(String s) { return Character.toUpperCase(s.charAt(0)) + s.substring(1); } - - // Copy |box|'s |env|, but only keep the variables which are used in |formula| (these are the free variables). - // This is an important optimization for converting to SPARQL. - private Box createNewBox(Formula formula, Box box) { - Box newBox = new Box(); - for (VariableFormula key : box.env.keySet()) - if (Formulas.containsFreeVar(formula, key)) - newBox.env.put(key, box.env.get(key)); - return newBox; - } - - // Copy asValuesMap constraints from newBox to box. - // This is for when we create a new environment (newBox), but maintain the same scope, - // so we don't rely on closeExistentialScope to include the asValuesMap constraints. - private void returnAsValuesMap(Box box, Box newBox) { - for (Map.Entry e : newBox.asValuesMap.entrySet()) { - if (box.asValuesMap.containsKey(e.getKey())) - throw new RuntimeException("Copying asValuesMap involves overwriting: " + box + " <- " + newBox); - box.asValuesMap.put(e.getKey(), e.getValue()); - } - } - - private void unify(SparqlBlock block, Ref head, PrimitiveFormula value) { - if (head.value == null) { - // |head| is not set, just use |value|. - head.value = value; - } else { - // |head| is already set, so add a constraint that it equals |value|. - // This happens when the logical form is just a single entity (e.g., fb:en.barack_obama). - addStatement(block, head.value, "=", value); - if (head.value instanceof VariableFormula && value instanceof ValueFormula && ((ValueFormula) value).value instanceof NameValue) - addEntityStatement(block, (VariableFormula) head.value); - } - } - - // Helper functions - private String applyVar(String func, VariableFormula var) { return applyVar(func, var.name); } - private String applyVar(String func, String var) { - if (func.equals("count")) var = "DISTINCT " + var; - return func + "(" + var + ")"; - } - - private String applyOpVar(String func, PrimitiveFormula var1, PrimitiveFormula var2) { - // Special function for taking the difference between dates. - LogInfo.logs("%s %s", var1, var2); - if (func.equals("-") && (var1 instanceof VariableFormula) && FreebaseInfo.DATE.equals(unitsMap.get(var1))) - return "bif:datediff(\"year\"," + SparqlUtils.dateTimeStr(var2) + "," + SparqlUtils.dateTimeStr(var1) + ")"; - else if (func.equals("+") && (var1 instanceof VariableFormula) && FreebaseInfo.DATE.equals(unitsMap.get(var1))) - return "bif:dateadd(\"year\"," + Formulas.getString(var2) + "," + SparqlUtils.dateTimeStr(var1) + ")"; // date + number - else - return '(' + Formulas.getString(var1) + ' ' + func + ' ' + Formulas.getString(var2) + ')'; - } - - private VariableFormula newVar() { - numVars++; - return new VariableFormula("?x" + numVars); - } - } - - //////////////////////////////////////////////////////////// - // Take results of executing an SparqlExpr and produce a List of values. - class ValuesExtractor { - final boolean beginTrack; - final Formula formula; - final List selectVars; - final List units; - final List header; - - public ValuesExtractor(boolean beginTrack, Formula formula, Converter converter) { - this.beginTrack = beginTrack; - this.formula = formula; - - this.selectVars = Lists.newArrayList(); - this.units = Lists.newArrayList(); - this.header = Lists.newArrayList(); - for (SparqlSelect.Var var : converter.query.selectVars) { - if (var.isAuxiliary) continue; - this.selectVars.add(var.var.name); - this.units.add(var.unit); - this.header.add(var.description); - } - } - - // |results| is (result (binding (uri ...)) ...) or (result (binding (literal ...)) ...) - Value extract(NodeList results) { - // For each result (row in a table)... - if (beginTrack && opts.verbose >= 2) { - LogInfo.begin_track("%d results", results.getLength()); - if (opts.returnTable) LogInfo.logs("Header: %s", header); - } - - List firstValues = new ArrayList(); // If not returning a table - List> rows = new ArrayList>(); // If returning table - - for (int i = 0; i < results.getLength(); i++) { - List row = nodeToValue(results.item(i)); - if (opts.returnTable) - rows.add(row); - else - firstValues.add(row.get(0)); - if (beginTrack && opts.verbose >= 2) LogInfo.logs("Row %d: %s", i, row); - } - if (beginTrack && opts.verbose >= 2) LogInfo.end_track(); - - if (opts.returnTable) - return new TableValue(header, rows); - else - return new ListValue(firstValues); - } - - private List nodeToValue(Node result) { - NodeList bindings = ((Element) result).getElementsByTagName("binding"); - - // For each variable in selectVars, we're going to keep track of an |id| - // (only for entities) and |description| (name or the literal value). - List ids = Lists.newArrayList(); - List descriptions = Lists.newArrayList(); - for (int j = 0; j < selectVars.size(); j++) { - ids.add(null); - descriptions.add(null); - } - - // For each binding j (contributes some information to one column)... - for (int j = 0; j < bindings.getLength(); j++) { - Element binding = (Element) bindings.item(j); - - String var = "?" + binding.getAttribute("name"); - int col; - if (var.endsWith("name")) - col = selectVars.indexOf(var.substring(0, var.length() - 4)); - else - col = selectVars.indexOf(var); - - String uri = getTagValue("uri", binding); - if (uri != null) ids.set(col, FreebaseInfo.uri2id(uri)); - - String literal = getTagValue("literal", binding); - if (literal != null) descriptions.set(col, literal); - } - - // Go through the selected variables and build the actual value - List row = Lists.newArrayList(); - for (int j = 0; j < selectVars.size(); j++) { - String unit = units.get(j); - String id = ids.get(j); - String description = descriptions.get(j); - - // Convert the string representation back to a value based on the unit. - Value value = null; - if (unit == null) { - value = new NameValue(id, description); - } else if (unit.equals(FreebaseInfo.DATE)) { - value = description == null ? null : DateValue.parseDateValue(description); - } else if (unit.equals(FreebaseInfo.TEXT)) { - value = new StringValue(description); - } else if (unit.equals(FreebaseInfo.BOOLEAN)) { - value = new BooleanValue(Boolean.parseBoolean(description)); - } else if (unit.equals(FreebaseInfo.ENTITY)) { - value = new NameValue(id, description); - } else if (unit.equals(FreebaseInfo.CVT)) { - LogInfo.warnings("%s returns CVT, probably not intended", formula); - value = new NameValue(id, description); - } else { - value = new NumberValue("NAN".equals(description) || description == null ? Double.NaN : Double.parseDouble(description), unit); - } - row.add(value); - } - return row; - } - } - - // Helper for parsing DOM. - // Return the inner text of of a child element of |elem| with tag |tag|. - public static String getTagValue(String tag, Element elem) { - NodeList nodes = elem.getElementsByTagName(tag); - if (nodes.getLength() == 0) return null; - if (nodes.getLength() > 1) - throw new RuntimeException("Multiple instances of " + tag); - nodes = nodes.item(0).getChildNodes(); - if (nodes.getLength() == 0) return null; - Node value = nodes.item(0); - return value.getNodeValue(); - } - - //////////////////////////////////////////////////////////// - - public static class MainOptions { - @Option(gloss = "Sparql expression to execute") public String sparql; - @Option(gloss = "Formula to execute") public String formula; - @Option(gloss = "File containing formulas to execute") - public String formulasPath; - } - - public static void main(String[] args) throws IOException { - OptionsParser parser = new OptionsParser(); - MainOptions mainOpts = new MainOptions(); - parser.registerAll(new Object[]{"SparqlExecutor", SparqlExecutor.opts, "FreebaseInfo", FreebaseInfo.opts, "main", mainOpts}); - parser.parse(args); - - LogInfo.begin_track("main()"); - SparqlExecutor executor = new SparqlExecutor(); - - if (mainOpts.formula != null) { - LogInfo.logs("%s", executor.execute(Formulas.fromLispTree(LispTree.proto.parseFromString(mainOpts.formula)), null).value); - } - - if (mainOpts.formulasPath != null) { - Iterator trees = LispTree.proto.parseFromFile(mainOpts.formulasPath); - while (trees.hasNext()) { - LogInfo.logs("%s", executor.execute(Formulas.fromLispTree(trees.next()), null).value); - } - } - - if (mainOpts.sparql != null) - LogInfo.logs("%s", executor.makeRequest(mainOpts.sparql, opts.endpointUrl).xml); - - LogInfo.end_track(); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/SparqlExpr.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/SparqlExpr.java deleted file mode 100644 index 0aa99c45a7..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/SparqlExpr.java +++ /dev/null @@ -1,253 +0,0 @@ -package edu.stanford.nlp.sempre.freebase; - -import java.util.*; -import com.google.common.collect.Lists; -import fig.basic.*; -import edu.stanford.nlp.sempre.*; - -/** - * Utilities for constructing SPARQL expressions. - * Note that we represent a subset of the SPARQL. - * @author Percy Liang - */ -public interface SparqlExpr { -} - -// Example: { ... } -class SparqlBlock implements SparqlExpr { - public final List children = Lists.newArrayList(); - - public SparqlBlock add(SparqlExpr expr) { - if (expr instanceof SparqlBlock) - this.children.addAll(((SparqlBlock) expr).children); - else - this.children.add(expr); - return this; - } - - private String getId(PrimitiveFormula formula) { - if (!(formula instanceof ValueFormula)) return null; - Value value = ((ValueFormula) formula).value; - if (!(value instanceof NameValue)) return null; - return ((NameValue) value).id; - } - - private boolean isPrimitiveType(String id) { - if (FreebaseInfo.BOOLEAN.equals(id)) return true; - if (FreebaseInfo.INT.equals(id)) return true; - if (FreebaseInfo.FLOAT.equals(id)) return true; - if (FreebaseInfo.DATE.equals(id)) return true; - if (FreebaseInfo.TEXT.equals(id)) return true; - return false; - } - - void addStatement(PrimitiveFormula arg1, String property, PrimitiveFormula arg2, boolean optional) { - if (!property.startsWith("fb:") && !SparqlStatement.isOperator(property) && !SparqlStatement.isSpecialFunction(property) && !SparqlStatement.isIndependent(property)) - throw new RuntimeException("Invalid SPARQL property: " + property); - // Ignore statements like: - // ?x fb:type.object.type fb:type.datetime - // because we should have already captured the semantics using other formulas that involve ?x. - if (property.equals(FreebaseInfo.TYPE)) { - String id = getId(arg2); - if (isPrimitiveType(id) || FreebaseInfo.ANY.equals(id)) return; - } - if (SparqlStatement.isIndependent(property)) return; // Nothing connecting arg1 and arg2 - add(new SparqlStatement(arg1, property, arg2, optional)); - } - - @Override public String toString() { - List strings = Lists.newArrayList(); - for (SparqlExpr expr : children) { - if (expr instanceof SparqlSelect) - strings.add("{ " + expr + " }"); - else - strings.add(expr.toString()); - } - return "{ " + StrUtils.join(strings, " . ") + " }"; - } -} - -// Example: SELECT ?x ?y WHERE { ... } ORDER BY ?x ?y LIMIT 10 OFFSET 3 -class SparqlSelect implements SparqlExpr { - static class Var { - public final VariableFormula var; - public final String asValue; // for COUNT(?x3) as ?x2 - public final String unit; // Specifies the types of the variable (used to parse back the results) - public final boolean isAuxiliary; // Whether this is supporting information (e.g., names) - public final String description; // Human-friendly for display - - public Var(VariableFormula var, String asValue, String unit, boolean isAuxiliary, String description) { - this.var = var; - this.asValue = asValue; - this.unit = unit; - this.isAuxiliary = isAuxiliary; - this.description = description; - } - - @Override public String toString() { - if (asValue == null) return var.name; - return "(" + asValue + " AS " + var.name + ")"; - } - } - - public final List selectVars = Lists.newArrayList(); - - public SparqlBlock where; - public final List sortVars = Lists.newArrayList(); - public int offset = 0; // Start at this point when returning results - public int limit = -1; // Number of results to return - - @Override public String toString() { - StringBuilder out = new StringBuilder(); - out.append("SELECT DISTINCT"); - for (Var var : selectVars) - out.append(" " + var.toString()); - out.append(" WHERE " + where); - if (sortVars.size() > 0) { - out.append(" ORDER BY"); - for (PrimitiveFormula sortVar : sortVars) - out.append(" " + SparqlUtils.plainStr(sortVar)); - } - if (limit != -1) out.append(" LIMIT " + limit); - if (offset != 0) out.append(" OFFSET " + offset); - return out.toString(); - } -} - -// Example: { ... } UNION { ... } UNION { ... } -class SparqlUnion implements SparqlExpr { - public final List children = Lists.newArrayList(); - public SparqlUnion add(SparqlBlock block) { this.children.add(block); return this; } - @Override public String toString() { - return "{ " + StrUtils.join(children, " UNION ") + " }"; - } -} - -// Example: FILTER NOT EXISTS { ... } -class SparqlNot implements SparqlExpr { - public final SparqlBlock block; - public SparqlNot(SparqlBlock block) { this.block = block; } - @Override public String toString() { - return "FILTER NOT EXISTS " + block; - } -} - -class SparqlStatement implements SparqlExpr { - public final PrimitiveFormula arg1; - public final String relation; - public final PrimitiveFormula arg2; - public boolean optional; - public String options; - - public SparqlStatement(PrimitiveFormula arg1, String relation, PrimitiveFormula arg2, boolean optional) { - this.arg1 = arg1; - this.relation = relation; - this.arg2 = arg2; - this.optional = optional; - this.options = null; - } - - public SparqlStatement(PrimitiveFormula arg1, String relation, PrimitiveFormula arg2, boolean optional, String options) { - this(arg1, relation, arg2, optional); - this.options = options; - } - - public static boolean isIndependent(String relation) { return relation.equals(":"); } - - public static boolean isOperator(String relation) { - return relation.equals("=") || relation.equals("!=") || - relation.equals("<") || relation.equals(">") || - relation.equals("<=") || relation.equals(">="); - } - - public static boolean isSpecialFunction(String relation) { - return relation.equals("STRSTARTS") || relation.equals("STRENDS"); - } - - public String simpleString() { - // Workaround for annoying dates: - // http://answers.semanticweb.com/questions/947/dbpedia-sparql-endpoint-xsddate-comparison-weirdness - if (arg2 instanceof ValueFormula && ((ValueFormula) arg2).value instanceof DateValue) { - if (isOperator(relation)) { - if (relation.equals("=")) { - // (= (date 2000 -1 -1)) really means (>= (2000 -1 -1)) and (< (2001 -1 -1)) - DateValue startDate = (DateValue) ((ValueFormula) arg2).value; - DateValue endDate = advance(startDate); - return SparqlUtils.dateTimeStr(arg1) + " >= " + SparqlUtils.dateTimeStr(new ValueFormula(startDate)) + ") . FILTER (" + - SparqlUtils.dateTimeStr(arg1) + " < " + SparqlUtils.dateTimeStr(new ValueFormula(endDate)); - } - if (relation.equals("<=")) { - // (<= (date 2000 -1 -1)) really means (< (date 2001 -1 -1)) - DateValue startDate = (DateValue) ((ValueFormula) arg2).value; - DateValue endDate = advance(startDate); - return SparqlUtils.dateTimeStr(arg1) + " < " + SparqlUtils.dateTimeStr(new ValueFormula(endDate)); - } - if (relation.equals(">")) { - // (> (date 2000 -1 -1)) really means >= (date 2001 -1 -1) - DateValue startDate = (DateValue) ((ValueFormula) arg2).value; - DateValue endDate = advance(startDate); - return SparqlUtils.dateTimeStr(arg1) + " >= " + SparqlUtils.dateTimeStr(new ValueFormula(endDate)); - } - if (relation.equals("<") || relation.equals(">=")) - return SparqlUtils.dateTimeStr(arg1) + " " + relation + " " + SparqlUtils.dateTimeStr(arg2); - // Note: != is not treated specially - } - } - - return SparqlUtils.plainStr(arg1) + " " + relation + " " + SparqlUtils.plainStr(arg2); - } - - private DateValue advance(DateValue date) { - // TODO(pliang): deal with carrying over - if (date.day != -1) return new DateValue(date.year, date.month, date.day + 1); - if (date.month != -1) return new DateValue(date.year, date.month + 1, -1); - return new DateValue(date.year + 1, -1, -1); - } - - public String toString() { - String result; - if (isSpecialFunction(relation)) { // Special functions - result = "FILTER (" + relation + "(" + SparqlUtils.plainStr(arg1) + "," + SparqlUtils.plainStr(arg2) + "))"; - } else if (isOperator(relation)) { - result = "FILTER (" + simpleString() + ")"; - } else if (optional) { - result = "OPTIONAL { " + simpleString() + " }"; - } else { - result = simpleString(); - } - - if (this.options != null) - result += " " + this.options; - - return result; - } -} - -final class SparqlUtils { - private SparqlUtils() { } - - public static String dateTimeStr(PrimitiveFormula formula) { - return "xsd:datetime(" + plainStr(formula) + ")"; - } - - public static String plainStr(PrimitiveFormula formula) { - if (formula instanceof VariableFormula) return ((VariableFormula) formula).name; - - Value value = ((ValueFormula) formula).value; - - if (value instanceof StringValue) { - String s = ((StringValue) value).value; - return "\"" + s.replaceAll("\"", "\\\\\"") + "\"" + (s.equals("en") ? "" : "@en"); - } - if (value instanceof NameValue) return ((NameValue) value).id; - if (value instanceof NumberValue) return ((NumberValue) value).value + ""; - if (value instanceof DateValue) { - DateValue date = (DateValue) value; - if (date.month == -1) return "\"" + date.year + "\"" + "^^xsd:datetime"; - if (date.day == -1) return "\"" + date.year + "-" + date.month + "\"" + "^^xsd:datetime"; - return "\"" + date.year + "-" + date.month + "-" + date.day + "\"" + "^^xsd:datetime"; - } - throw new RuntimeException("Unhandled primitive: " + value); - } -} - diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/Stemmer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/Stemmer.java deleted file mode 100644 index aad6ea2b31..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/Stemmer.java +++ /dev/null @@ -1,15 +0,0 @@ -package edu.stanford.nlp.sempre.freebase; - -import org.tartarus.snowball.ext.PorterStemmer; - -/** - * Wrapper for the default stemmer used by this project. - */ -public class Stemmer { - public String stem(String input) { - PorterStemmer state = new PorterStemmer(); - state.setCurrent(input); - state.stem(); - return state.getCurrent(); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/TextToTextMatcher.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/TextToTextMatcher.java deleted file mode 100644 index 1cad53c7c7..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/TextToTextMatcher.java +++ /dev/null @@ -1,137 +0,0 @@ -package edu.stanford.nlp.sempre.freebase; - -import edu.stanford.nlp.sempre.freebase.lexicons.TokenLevelMatchFeatures; -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.stats.ClassicCounter; -import edu.stanford.nlp.stats.Counter; -import edu.stanford.nlp.stats.Counters; -import fig.basic.LogInfo; -import fig.basic.Option; - -import java.util.ArrayList; -import java.util.List; -import java.util.Set; - -/** - * Computes various text-text-similarities - * - * @author jonathanberant - */ -public class TextToTextMatcher { - - public static class Options { - @Option(gloss = "Verbose") public int verbose = 0; - } - - public static Options opts = new Options(); - - private static TextToTextMatcher textToTextMatcher; - public static TextToTextMatcher getSingleton() { - if (textToTextMatcher == null) textToTextMatcher = new TextToTextMatcher(); - return textToTextMatcher; - } - private Stemmer stemmer; - - public TextToTextMatcher() { - stemmer = new Stemmer(); - } - - public FeatureVector extractFeatures(List exampleTokens, List examplePosTags, List exampleLemmas, Set fbDescs) { - FeatureVector res = new FeatureVector(); - extractTokenMatchFeatures(exampleTokens, exampleLemmas, fbDescs, res); - extractWordSimilarityFeatures(exampleTokens, examplePosTags, fbDescs, res); - return res; - } - - // JONATHAN - changing to not have edit distance since it is inefficient - private void extractWordSimilarityFeatures(List exampleTokens, List examplePosTags, Set fbDescs, FeatureVector vector) { - if (!FeatureExtractor.containsDomain("wordSim")) return; - - boolean match = false; - // collect fb tokens - List candidateFbTokens = new ArrayList<>(); - for (String fbDesc : fbDescs) { - List fbDescTokens = FbFormulasInfo.BinaryFormulaInfo.tokenizeFbDescription(fbDesc); - for (String fbDescToken : fbDescTokens) { - if (fbDescToken.length() <= 2) - continue; - candidateFbTokens.add(fbDesc); - } - } - // check if there is a token that is an fb token candidate - for (int i = 0; i < exampleTokens.size(); ++i) { - String pos = examplePosTags.get(i); - String textToken = exampleTokens.get(i); - if (pos.startsWith("NN") || (pos.startsWith("VB") && !pos.equals("VBD-AUX")) || pos.equals("JJ")) { - if (candidateFbTokens.contains(textToken)) { - match = true; - break; - } - } - } - if (match) - vector.add("wordSim", "FbTokenExTokenMatch"); - } - - public boolean existsTokenMatch(List exampleTokens, List exampleLemmas, Set fbDescs) { - // generate stems - List exampleStems = new ArrayList(); - for (String token : exampleTokens) - exampleStems.add(stemmer.stem(token)); - - Counter tokenFeatures = new ClassicCounter(); - Counter stemFeatures = new ClassicCounter(); - for (String fbDescription : fbDescs) { - List fbDescTokens = FbFormulasInfo.BinaryFormulaInfo.tokenizeFbDescription(fbDescription); - List fbDescStems = new ArrayList<>(); - for (String fbDescToken : fbDescTokens) - fbDescStems.add(stemmer.stem(fbDescToken)); - - Counters.maxInPlace(tokenFeatures, TokenLevelMatchFeatures.extractTokenMatchFeatures(exampleTokens, fbDescTokens, true)); - Counters.maxInPlace(tokenFeatures, TokenLevelMatchFeatures.extractTokenMatchFeatures(exampleLemmas, fbDescTokens, true)); - Counters.maxInPlace(stemFeatures, TokenLevelMatchFeatures.extractTokenMatchFeatures(exampleStems, fbDescStems, false)); - if (tokenFeatures.size() > 0 || stemFeatures.size() > 0) - return true; - } - return false; - } - - private void extractTokenMatchFeatures(List exampleTokens, List exampleLemmas, Set fbDescs, FeatureVector vector) { - if (!FeatureExtractor.containsDomain("tokenMatch")) return; - - // generate stems - List exampleStems = new ArrayList<>(); - for (String token : exampleTokens) - exampleStems.add(stemmer.stem(token)); - - Counter tokenFeatures = new ClassicCounter<>(); - Counter stemFeatures = new ClassicCounter<>(); - for (String fbDescription : fbDescs) { - List fbDescTokens = FbFormulasInfo.BinaryFormulaInfo.tokenizeFbDescription(fbDescription); - List fbDescStems = new ArrayList<>(); - for (String fbDescToken : fbDescTokens) - fbDescStems.add(stemmer.stem(fbDescToken)); - - Counters.maxInPlace(tokenFeatures, TokenLevelMatchFeatures.extractTokenMatchFeatures(exampleTokens, fbDescTokens, true)); - Counters.maxInPlace(tokenFeatures, TokenLevelMatchFeatures.extractTokenMatchFeatures(exampleLemmas, fbDescTokens, true)); - Counters.maxInPlace(stemFeatures, TokenLevelMatchFeatures.extractTokenMatchFeatures(exampleStems, fbDescStems, false)); - } - if (opts.verbose >= 3) { - LogInfo.logs("Binary formula desc: %s, token match: %s, stem match: %s", fbDescs, tokenFeatures, stemFeatures); - } - addFeaturesToVector(tokenFeatures, "binary_token", vector); - addFeaturesToVector(stemFeatures, "binary_stem", vector); - } - - private void addFeaturesToVector(Counter features, String prefix, FeatureVector vector) { - if (features.getCount("equal") > 0) - vector.add("tokenMatch", prefix + ".equal", features.getCount("equal")); - else if (features.getCount("prefix") > 0 || features.getCount("suffix") > 0) { - if (features.getCount("prefix") > 0) - vector.add("tokenMatch", prefix + ".prefix", features.getCount("prefix")); - if (features.getCount("suffix") > 0) - vector.add("tokenMatch", prefix + ".suffix", features.getCount("suffix")); - } else if (features.getCount("overlap") > 0) - vector.add("tokenMatch", prefix + ".overlap", features.getCount("overlap")); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/UnaryLexicon.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/UnaryLexicon.java deleted file mode 100644 index 373816488b..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/UnaryLexicon.java +++ /dev/null @@ -1,193 +0,0 @@ -package edu.stanford.nlp.sempre.freebase; - -import com.google.common.base.Strings; -import edu.stanford.nlp.io.IOUtils; -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.freebase.FbFormulasInfo.UnaryFormulaInfo; -import edu.stanford.nlp.sempre.freebase.lexicons.EntrySource; -import edu.stanford.nlp.sempre.freebase.lexicons.LexicalEntry.LexiconValue; -import edu.stanford.nlp.sempre.freebase.lexicons.LexicalEntry.UnaryLexicalEntry; -import fig.basic.LogInfo; -import fig.basic.MapUtils; -import fig.basic.Option; -import fig.basic.StopWatchSet; - -import java.io.IOException; -import java.io.PrintWriter; -import java.util.*; - -/** - * lexicon for unaries: "city"-->fb:location.citytown - * Loads the lexicon from a file and has some features - */ -public final class UnaryLexicon { - - public static class Options { - @Option(gloss = "Number of results return by the lexicon") - public int maxEntries = 1000; - @Option(gloss = "Path to unary lexicon file") - public String unaryLexiconFilePath = "lib/fb_data/7/unaryInfoStringAndAlignment.txt"; - @Option(gloss = "Threshold for filtering unaries") - public int unaryFilterThreshold = 5; - @Option(gloss = "Verbosity") public int verbose = 0; - } - - public static Options opts = new Options(); - - private static UnaryLexicon unaryLexicon; - public static UnaryLexicon getInstance() { - if (unaryLexicon == null) unaryLexicon = new UnaryLexicon(); - return unaryLexicon; - } - - private Map> lexemeToEntryList = new HashMap<>(); - - public static final String INTERSECTION = "intersection"; - public static final String NL_SIZE = "nl_size"; - public static final String FB_SIZE = "fb_size"; - - private UnaryLexicon() { - if (Strings.isNullOrEmpty(opts.unaryLexiconFilePath)) - throw new RuntimeException("Missing unary lexicon file"); - read(); - sortLexiconEntries(); - } - - private void sortLexiconEntries() { - Comparator comparator = new UnaryLexicalEntryComparator(); - for (List uEntries: lexemeToEntryList.values()) { - Collections.sort(uEntries, comparator); - } - } - - private void read() { - LogInfo.begin_track("Loading unary lexicon file " + opts.unaryLexiconFilePath); - for (String line : IOUtils.readLines(opts.unaryLexiconFilePath)) { - LexiconValue lv = Json.readValueHard(line, LexiconValue.class); - addEntry(lv.lexeme, lv.source, lv.formula, lv.features); - } - LogInfo.log("Number of lexemes: " + lexemeToEntryList.size()); - LogInfo.end_track(); - } - - private void addEntry(String nl, String source, Formula formula, Map featureMap) { - - FbFormulasInfo ffi = FbFormulasInfo.getSingleton(); - if (ffi.getUnaryInfo(formula) != null) { - UnaryFormulaInfo uInfo = ffi.getUnaryInfo(formula); - UnaryLexicalEntry uEntry = new UnaryLexicalEntry(nl, nl, new TreeSet<>(uInfo.descriptions), formula, EntrySource.parseSourceDesc(source), - uInfo.popularity, new TreeMap<>(featureMap), uInfo.types); - MapUtils.addToList(lexemeToEntryList, nl, uEntry); - } else { - if (opts.verbose >= 3) LogInfo.warnings("Missing info for unary: %s ", formula); - } - } - - public void save(String outFile) throws IOException { - - PrintWriter writer = IOUtils.getPrintWriter(outFile); - for (String nl : lexemeToEntryList.keySet()) { - for (UnaryLexicalEntry uEntry : lexemeToEntryList.get(nl)) { - LexiconValue lv = new LexiconValue(nl, uEntry.formula, uEntry.source.toString(), uEntry.alignmentScores); - writer.println(Json.writeValueAsStringHard(lv)); - } - } - writer.close(); - } - - public List lookupEntries(String textDesc) throws IOException { - - List entries = lexemeToEntryList.get(textDesc.toLowerCase()); - if (entries != null) { - List res = new ArrayList<>(); - for (int i = 0; i < Math.min(entries.size(), opts.maxEntries); ++i) { - if (valid(entries.get(i))) - res.add(entries.get(i)); - } - return res; - } - return Collections.emptyList(); - } - - /** Checks if an entry is valid (e.g. we filter if intersection is too small) */ - private boolean valid(UnaryLexicalEntry lexicalEntry) { - return (lexicalEntry.source != EntrySource.ALIGNMENT || - MapUtils.getDouble(lexicalEntry.alignmentScores, INTERSECTION, 0.0) >= opts.unaryFilterThreshold); - } - - public void sortLexiconByFeedback(Params params) { - StopWatchSet.begin("UnaryLexicon.sortLexiconByFeedback"); - LogInfo.log("Number of entries: " + lexemeToEntryList.size()); - UnaryLexEntrybyFeaturesComparator comparator = new UnaryLexEntrybyFeaturesComparator(params); - for (String lexeme : lexemeToEntryList.keySet()) { - Collections.sort(lexemeToEntryList.get(lexeme), comparator); - if (LexiconFn.opts.verbose > 0) { - LogInfo.logs("Sorted list for lexeme=%s", lexeme); - for (UnaryLexicalEntry uEntry : lexemeToEntryList.get(lexeme)) { - FeatureVector fv = new FeatureVector(); - LexiconFn.getUnaryEntryFeatures(uEntry, fv); - LogInfo.logs("Entry=%s, dotprod=%s", uEntry, fv.dotProduct(comparator.params)); - } - } - } - StopWatchSet.end(); - } - - public class UnaryLexEntrybyFeaturesComparator implements Comparator { - public final Params params; - public UnaryLexEntrybyFeaturesComparator(Params params) { - this.params = params; - } - @Override - public int compare(UnaryLexicalEntry entry1, UnaryLexicalEntry entry2) { - - FeatureVector features1 = new FeatureVector(); - FeatureVector features2 = new FeatureVector(); - LexiconFn.getUnaryEntryFeatures(entry1, features1); - LexiconFn.getUnaryEntryFeatures(entry2, features2); - double score1 = features1.dotProduct(params); - double score2 = features2.dotProduct(params); - if (score1 > score2) return -1; - if (score1 < score2) return +1; - // back off to usual thing - double entry1Intersection = MapUtils.getDouble(entry1.alignmentScores, INTERSECTION, 0.0); - double entry2Intersection = MapUtils.getDouble(entry2.alignmentScores, INTERSECTION, 0.0); - if (entry1Intersection > entry2Intersection) - return -1; - if (entry1Intersection < entry2Intersection) - return 1; - if (entry1.popularity > entry2.popularity) - return -1; - if (entry1.popularity < entry2.popularity) - return 1; - return 0; - } - } - - public static class UnaryLexicalEntryComparator implements Comparator { - - public static final String INTERSECTION = "intersection"; - - @Override - public int compare(UnaryLexicalEntry entry1, UnaryLexicalEntry entry2) { - - double entry1Intersection = MapUtils.getDouble(entry1.alignmentScores, INTERSECTION, 0.0); - double entry2Intersection = MapUtils.getDouble(entry2.alignmentScores, INTERSECTION, 0.0); - if (entry1Intersection > entry2Intersection) - return -1; - if (entry1Intersection < entry2Intersection) - return 1; - if (entry1.popularity > entry2.popularity) - return -1; - if (entry1.popularity < entry2.popularity) - return 1; - // todo - this is to break ties - make more efficient - int stringComparison = entry1.formula.toString().compareTo(entry2.formula.toString()); - if (stringComparison < 0) - return -1; - if (stringComparison > 0) - return +1; - return 0; - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/Utils.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/Utils.java deleted file mode 100644 index 6434918d11..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/Utils.java +++ /dev/null @@ -1,122 +0,0 @@ -package edu.stanford.nlp.sempre.freebase; - -import fig.basic.IOUtils; -import fig.basic.LogInfo; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.PrintWriter; -import java.util.HashMap; -import java.util.Map; - -public final class Utils { - private Utils() { } - - public static final String ttlPrefix = "@prefix fb: ."; - - // Somewhat of a crude approximation. - public static boolean isUrl(String s) { return s.startsWith("<"); } - - // Virtuoso can't deal with this; these are probably useless anyway. - public static boolean identifierContainsStrangeCharacters(String s) { - return !s.startsWith("\"") && s.contains("$"); - } - - // Convert a string from the ns: namespace to the fb: namespace. - // "ns:en.barack_obama" => "fb:en.barack_obama" - public static String nsToFb(String s) { - if (s.startsWith("ns:")) return "fb:" + s.substring(3); - return s; - } - - // "\"/en/distributive_writing\"" => "fb:en.distributive_writing" - public static String stringToRdf(String arg2) { - if (!arg2.startsWith("\"/") || !arg2.endsWith("\"")) - throw new RuntimeException("Bad: " + arg2); - return "fb:" + arg2.substring(2, arg2.length() - 1).replaceAll("/", "."); - } - - public static String[] parseTriple(String line) { - if (!line.endsWith(".")) return null; - String[] tokens = line.substring(0, line.length() - 1).split("\t"); - if (tokens.length != 3) return null; - tokens[0] = Utils.nsToFb(tokens[0]); - tokens[1] = Utils.nsToFb(tokens[1]); - tokens[2] = Utils.nsToFb(tokens[2]); - return tokens; - } - - public static int parseInt(String arg2) { - if (!arg2.endsWith("^^xsd:int")) - throw new RuntimeException("Arg2 is not a valid integer: " + arg2); - int closingQuoteIndex = arg2.lastIndexOf('"'); - return Integer.parseInt(arg2.substring(1, closingQuoteIndex)); - } - - public static String parseStr(String arg2) { - if (!arg2.endsWith("@en")) - throw new RuntimeException("Arg2 is not a valid String: " + arg2); - int closingQuoteIndex = arg2.lastIndexOf('"'); - return arg2.substring(1, closingQuoteIndex); - } - - public static void writeTriple(PrintWriter out, String arg1, String property, String arg2) { - out.println(arg1 + "\t" + property + "\t" + arg2 + "."); - } - - // For some reason, the Freebase topic dumps don't have properly formatted numbers. - // We need to replace - // fb:m.012_53 fb:people.person.height_meters 1.57. - // with - // fb:m.012_53 fb:people.person.height_meters "1.57"^^xsd:double. - // This function operates on the second argument (value). - public static String quoteValues(String value) { - if (value.equals("true")) return "\"true\"^^xsd:boolean"; - if (value.equals("false")) return "\"false\"^^xsd:boolean"; - - // Short circuit: not numeric - if (value.startsWith("\"") || (value.length() > 0 && Character.isLetter(value.charAt(0)))) - return value; - - // Try to convert to integer - try { - Integer.parseInt(value); - return "\"" + value + "\"^^xsd:int"; - } catch (NumberFormatException e) { - } - // Try to convert to double - try { - Double.parseDouble(value); - return "\"" + value + "\"^^xsd:double"; - } catch (NumberFormatException e) { - } - return value; - } - - public static Map readCanonicalIdMap(String canonicalIdMapPath) { - return readCanonicalIdMap(canonicalIdMapPath, Integer.MAX_VALUE); - } - public static Map readCanonicalIdMap(String canonicalIdMapPath, int maxInputLines) { - Map canonicalIdMap = new HashMap(); - LogInfo.begin_track("Read %s", canonicalIdMapPath); - try { - BufferedReader in = IOUtils.openIn(canonicalIdMapPath); - String line; - int numInputLines = 0; - while (numInputLines < maxInputLines && (line = in.readLine()) != null) { - numInputLines++; - if (numInputLines % 10000000 == 0) - LogInfo.logs("Read %s lines", numInputLines); - String[] tokens = line.split("\t"); - if (tokens.length != 2) - throw new RuntimeException("Bad format: " + line); - canonicalIdMap.put(tokens[0], tokens[1]); - } - in.close(); - } catch (IOException e) { - throw new RuntimeException(e); - } - LogInfo.end_track(); - return canonicalIdMap; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/index/FbEntityIndexer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/index/FbEntityIndexer.java deleted file mode 100644 index b32ff9e596..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/index/FbEntityIndexer.java +++ /dev/null @@ -1,86 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.index; - -import edu.stanford.nlp.io.IOUtils; -import fig.basic.LogInfo; -import org.apache.lucene.analysis.core.KeywordAnalyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.document.*; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.IndexWriterConfig.OpenMode; -import org.apache.lucene.store.SimpleFSDirectory; -import org.apache.lucene.util.Version; - -import java.io.BufferedReader; -import java.io.File; -import java.io.IOException; - -public class FbEntityIndexer { - - private final IndexWriter indexer; - private String nameFile; - - public FbEntityIndexer(String namefile, String outputDir, String indexingStrategy) throws IOException { - - if (!indexingStrategy.equals("exact") && !indexingStrategy.equals("inexact")) - throw new RuntimeException("Bad indexing strategy: " + indexingStrategy); - - IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_44 , indexingStrategy.equals("exact") ? new KeywordAnalyzer() : new StandardAnalyzer(Version.LUCENE_44)); - config.setOpenMode(OpenMode.CREATE); - config.setRAMBufferSizeMB(256.0); - indexer = new IndexWriter(new SimpleFSDirectory(new File(outputDir)), config); - - this.nameFile = namefile; - } - - /** - * Index the datadump file - * - * @throws IOException - */ - public void index() throws IOException { - - LogInfo.begin_track("Indexing"); - BufferedReader reader = IOUtils.getBufferedFileReader(nameFile); - String line; - int indexed = 0; - while ((line = reader.readLine()) != null) { - - String[] tokens = line.split("\t"); - - String mid = tokens[0]; - String id = tokens[1]; - if (id.startsWith("fb:user.") || id.startsWith("fb:base.")) - continue; - String popularity = tokens[2]; - String text = tokens[3].toLowerCase(); - - // add to index - Document doc = new Document(); - doc.add(new StringField(FbIndexField.MID.fieldName(), mid, Field.Store.YES)); - doc.add(new StringField(FbIndexField.ID.fieldName(), id, Field.Store.YES)); - doc.add(new StoredField(FbIndexField.POPULARITY.fieldName(), popularity)); - doc.add(new TextField(FbIndexField.TEXT.fieldName(), text, Field.Store.YES)); - if (tokens.length > 4) { - doc.add(new StoredField(FbIndexField.TYPES.fieldName(), tokens[4])); - } - indexer.addDocument(doc); - indexed++; - - if (indexed % 1000000 == 0) { - LogInfo.log("Number of lines: " + indexed); - } - } - reader.close(); - LogInfo.log("Indexed lines: " + indexed); - - indexer.close(); - LogInfo.log("Done"); - LogInfo.end_track("Indexing"); - } - - public static void main(String[] args) throws IOException { - FbEntityIndexer fbni = new FbEntityIndexer(args[0], args[1], args[2]); - fbni.index(); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/index/FbEntitySearcher.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/index/FbEntitySearcher.java deleted file mode 100644 index 2d2591ae9f..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/index/FbEntitySearcher.java +++ /dev/null @@ -1,107 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.index; - -import fig.basic.LogInfo; -import fig.basic.StopWatch; -import org.apache.lucene.analysis.core.KeywordAnalyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.queryparser.classic.ParseException; -import org.apache.lucene.queryparser.classic.QueryParser; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.store.SimpleFSDirectory; -import org.apache.lucene.util.Version; - -import java.io.BufferedReader; -import java.io.File; -import java.io.IOException; -import java.io.InputStreamReader; -import java.util.LinkedList; -import java.util.List; -import java.util.regex.Pattern; - -public class FbEntitySearcher { - - private final QueryParser queryParser; - private final IndexSearcher indexSearcher; - private int numOfDocs = 50; - private String searchStrategy; - - public FbEntitySearcher(String indexDir, int numOfDocs, String searchingStrategy) throws IOException { - - LogInfo.begin_track("Constructing Searcher"); - if (!searchingStrategy.equals("exact") && !searchingStrategy.equals("inexact")) - throw new RuntimeException("Bad searching strategy: " + searchingStrategy); - this.searchStrategy = searchingStrategy; - - queryParser = new QueryParser( - Version.LUCENE_44, - FbIndexField.TEXT.fieldName(), - searchingStrategy.equals("exact") ? new KeywordAnalyzer() : new StandardAnalyzer(Version.LUCENE_44)); - LogInfo.log("Opening index dir: " + indexDir); - IndexReader indexReader = DirectoryReader.open(SimpleFSDirectory.open(new File(indexDir))); - indexSearcher = new IndexSearcher(indexReader); - LogInfo.log("Opened index with " + indexReader.numDocs() + " documents."); - - this.numOfDocs = numOfDocs; - LogInfo.end_track(); - } - - public synchronized List searchDocs(String question) throws IOException, ParseException { - - List res = new LinkedList(); - if (searchStrategy.equals("exact")) - question = "\"" + question + "\""; - - ScoreDoc[] hits = getHits(question); - - for (int i = 0; i < hits.length; ++i) { - int docId = hits[i].doc; - Document doc = indexSearcher.doc(docId); - res.add(doc); - } - return res; - } - - private ScoreDoc[] getHits(String question) throws IOException, ParseException { - Query luceneQuery = queryParser.parse(question); - ScoreDoc[] hits = indexSearcher.search(luceneQuery, numOfDocs).scoreDocs; - return hits; - } - - public static void main(String[] args) throws IOException, ParseException { - - Pattern quit = - Pattern.compile("quit|exit|q|bye", Pattern.CASE_INSENSITIVE); - FbEntitySearcher searcher = new FbEntitySearcher(args[0], 10000, args[1]); - BufferedReader is = new BufferedReader(new InputStreamReader(System.in)); - StopWatch watch = new StopWatch(); - while (true) { - System.out.print("Search> "); - String question = is.readLine().trim(); - if (quit.matcher(question).matches()) { - System.out.println("Quitting."); - break; - } - if (question.equals("")) - continue; - - watch.reset(); - watch.start(); - List docs = searcher.searchDocs(question); - watch.stop(); - for (Document doc : docs) { - LogInfo.log( - "Mid: " + doc.get(FbIndexField.MID.fieldName()) + "\t" + - "id: " + doc.get(FbIndexField.ID.fieldName()) + "\t" + - "types: " + doc.get(FbIndexField.TYPES.fieldName()) + "\t" + - "Name: " + doc.get(FbIndexField.TEXT.fieldName()) + "\t" + - "Popularity: " + doc.get(FbIndexField.POPULARITY.fieldName())); - } - LogInfo.logs("Number of docs: %s, Time: %s", docs.size(), watch); - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/index/FbIndexField.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/index/FbIndexField.java deleted file mode 100644 index 31d96a9b23..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/index/FbIndexField.java +++ /dev/null @@ -1,20 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.index; - -public enum FbIndexField { - - TEXT("text"), - MID("mid"), - ID("id"), - TYPES("types"), - POPULARITY("popularity"); - - private final String fieldName; - - FbIndexField(String fieldName) { - this.fieldName = fieldName; - } - - public String fieldName() { - return fieldName; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/lexicons/EntrySource.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/lexicons/EntrySource.java deleted file mode 100644 index ff2ea9c2f2..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/lexicons/EntrySource.java +++ /dev/null @@ -1,47 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.lexicons; - - -public enum EntrySource { - - ALIGNMENT("ALIGNMENT"), - STRING_MATCH("STRING_MATCH"), - HARD_CODED("HARD"), - LUCENE("LUCENE"), - GRAPHPROP("GRAPHPROP"), - FBSEARCH("FBSEARCH"), - FEEDBACK("FEEDBACK"); - - EntrySource(String source) { - this.source = source; - } - - private final String source; - - public String toString() { - return source; - } - public static EntrySource parseSourceDesc(String desc) { - - if (desc.equals("HARD")) - return HARD_CODED; - if (desc.startsWith("fb:m.")) - return STRING_MATCH; - if (desc.equals("STRING_MATCH")) - return STRING_MATCH; - if (desc.equals("NO_MID")) - return ALIGNMENT; - if (desc.equals("ALIGNMENT")) - return ALIGNMENT; - if (desc.equals("LUCENE")) - return LUCENE; - if (desc.equals("GRAPHPROP")) - return GRAPHPROP; - if (desc.equals("FBSEARCH")) - return FBSEARCH; - if (desc.equals("FEEDBACK")) - return FEEDBACK; - throw new RuntimeException("Description is not legal: " + desc); - - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/lexicons/ExtremeValueWrapper.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/lexicons/ExtremeValueWrapper.java deleted file mode 100644 index 6c76853dc0..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/lexicons/ExtremeValueWrapper.java +++ /dev/null @@ -1,38 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.lexicons; - -public abstract class ExtremeValueWrapper { - public double distance; - public abstract boolean add(double other); -} - -class MinValueWrapper extends ExtremeValueWrapper { - - public MinValueWrapper(double max) { - distance = max; - } - @Override - public boolean add(double other) { - if (other < distance) { - distance = other; - return true; - } - return false; - } -} - -class MaxValueWrapper extends ExtremeValueWrapper { - - public MaxValueWrapper(double min) { - distance = min; - } - @Override - public boolean add(double other) { - if (other > distance) { - distance = other; - return true; - } - return false; - } -} - - diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/lexicons/LexicalEntry.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/lexicons/LexicalEntry.java deleted file mode 100644 index fc4dcb8d17..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/lexicons/LexicalEntry.java +++ /dev/null @@ -1,408 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.lexicons; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; - -import edu.stanford.nlp.sempre.Formula; -import edu.stanford.nlp.sempre.SemType; -import edu.stanford.nlp.sempre.SemTypeHierarchy; -import edu.stanford.nlp.stats.ClassicCounter; -import edu.stanford.nlp.stats.Counter; -import edu.stanford.nlp.util.StringUtils; -import fig.basic.LispTree; - -import java.util.HashSet; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; - -public class LexicalEntry { - - public final String textDescription; // the query as submitted to the lexicon - public final String normalizedTextDesc; // the query after normalization - public final Set fbDescriptions; // descriptions matching the formula - public final Formula formula; - public EntrySource source; - public final double popularity; - public final double distance; - - - public LexicalEntry(String textDescription, String normalizedTextDesc, Set fbDescriptions, Formula formula, EntrySource source, double popularity, double distance) { - this.textDescription = textDescription; - this.normalizedTextDesc = normalizedTextDesc; - this.fbDescriptions = fbDescriptions; - this.formula = formula; - this.source = source; - this.popularity = popularity; - this.distance = distance; - } - - public Formula getFormula() { - return formula; - } - - public double getPopularity() { - return popularity; - } - - public double getDistance() { - return distance; - } - - private String stringRepn; - public String toString() { - if (stringRepn == null) { - stringRepn = textDescription + " (" + normalizedTextDesc + ")" + - ", FB: " + fbDescriptions + - ", formula: " + formula + - ", source: " + source + - ", popularity: " + popularity + - ", distance: " + distance; - } - return stringRepn; - } - - public static int computeEditDistance(String query, Set descriptions) { - - int distance = Integer.MAX_VALUE; - for (String description : descriptions) { - int currDistance = StringUtils.editDistance(query, description.toLowerCase()); - if (currDistance < distance) { - distance = currDistance; - } - } - return Math.min(15, distance); - } - - // Input: set of types coming from the lexicon {fb:common.topic, fb:people.person, ...} - // Output: remove any element which is in the transitive closure. {fb:people.person, ...} - // TODO(pliang): replace HashSet with something lighter weight. - public static SemType setToType(Set types) { - Set resultTypes = new HashSet<>(types); - for (String entityType : types) { - for (String supertype : SemTypeHierarchy.singleton.getSupertypes(entityType)) { - if (!supertype.equals(entityType)) - resultTypes.remove(supertype); - } - } - return SemType.newUnionSemType(resultTypes); - } - - public static class BinaryLexicalEntry extends LexicalEntry { - - public String expectedType1; - public String expectedType2; - public String unitId = ""; - public String unitDescription = ""; - public Map alignmentScores; - public String fullLexeme; // the lexeme as it is in the alignment without some normalization applied before uploading the lexicon - - public BinaryLexicalEntry(String textDescription, String normalizedTextDesc, Set fbDescriptions, Formula formula, EntrySource source, double popularity, - String expectedType1, String expectedType2, String unitId, String unitDesc, Map alignmentScores, String fullLexeme) { - super(textDescription, normalizedTextDesc, fbDescriptions, formula, source, popularity, computeEditDistance(textDescription, fbDescriptions)); - this.expectedType1 = expectedType1; - this.expectedType2 = expectedType2; - this.unitId = unitId; - this.unitDescription = unitDesc; - this.alignmentScores = alignmentScores; - this.fullLexeme = fullLexeme; - assert (fullLexeme.contains(normalizedTextDesc)); - } - - public boolean identicalFormulaInfo(Object other) { - if (!(other instanceof BinaryLexicalEntry)) - return false; - BinaryLexicalEntry otherBinary = (BinaryLexicalEntry) other; - - if (!formula.equals(otherBinary.formula)) - return false; - if (Math.abs(popularity - otherBinary.popularity) > 0.000001) - return false; - if (!expectedType1.equals(otherBinary.expectedType1)) - return false; - if (!expectedType2.equals(otherBinary.expectedType2)) - return false; - if (!unitId.equals(otherBinary.unitId)) - return false; - if (!unitDescription.equals(otherBinary.unitDescription)) - return false; - return true; - } - - public String getExpectedType1() { - return expectedType1; - } - - public String getExpectedType2() { - return expectedType2; - } - - public String getUnitId() { - return unitId; - } - - public String getUnitDescription() { - return unitDescription; - } - - private String stringRepn; - public String toString() { - if (stringRepn == null) { - StringBuilder sb = new StringBuilder(); - sb.append(super.toString()); - sb.append(", " + expectedType1 + " x " + expectedType2); - if (unitId != null) { - sb.append(", " + unitId + ":" + unitDescription); - } - if (alignmentScores.size() > 0) - sb.append(", " + alignmentScores); - stringRepn = sb.toString(); - } - return stringRepn; - } - - public boolean isFullLexemeEqualToNormalizedText() { - return fullLexeme.equals(normalizedTextDesc); - } - - public String[] getLeftContext() { - if (fullLexeme.startsWith(normalizedTextDesc)) - return new String[]{}; - String leftContext = fullLexeme.substring(0, fullLexeme.indexOf(normalizedTextDesc)).trim(); - return leftContext.split("\\s+"); - } - - public String[] getRightContext() { - if (fullLexeme.endsWith(normalizedTextDesc)) - return new String[]{}; - String rightContext = fullLexeme.substring(fullLexeme.indexOf(normalizedTextDesc) + normalizedTextDesc.length()).trim(); - return rightContext.split("\\s+"); - } - } - - public static class EntityLexicalEntry extends LexicalEntry { - - public Set types = new HashSet<>(); - public SemType type; - public Counter entityFeatures; - - public EntityLexicalEntry(String textDescription, String normalizedTextDesc, Set fbDescriptions, - Formula formula, EntrySource source, double popularity, double distance, Set types, - Counter entityFeatures) { - super(textDescription, normalizedTextDesc, fbDescriptions, formula, source, popularity, distance); - this.types = types; - this.type = setToType(types); - this.entityFeatures = entityFeatures; - } - - public String toString() { - return super.toString() + ", " + types; - } - } - - public static class UnaryLexicalEntry extends LexicalEntry { - - public Set types = new HashSet(); - public SemType type; - public Map alignmentScores; - - public UnaryLexicalEntry(String textDescription, String normalizedTextDesc, Set fbDescriptions, Formula formula, EntrySource source, double popularity, - Map alignmentScores, Set types) { - super(textDescription, normalizedTextDesc, fbDescriptions, formula, source, popularity, computeEditDistance(textDescription, fbDescriptions)); - this.types = types; - this.type = setToType(types); - this.alignmentScores = alignmentScores; - } - - String stringRepn; - public String toString() { - if (stringRepn == null) - stringRepn = super.toString() + ", " + types; - return stringRepn; - } - } - - /** - * Holds the essential parts of a value in a lexicon - * @author jonathanberant - * - */ - public static class LexiconValue { - - @JsonProperty public String lexeme; - @JsonProperty public Formula formula; - @JsonProperty public String source; - @JsonProperty public Map features; - - @JsonCreator - public LexiconValue(@JsonProperty("normLexeme") String lexeme, - @JsonProperty("formula") Formula formula, - @JsonProperty("source") String source, - @JsonProperty("features") Map features) { - this.lexeme = lexeme; - this.formula = formula; - this.source = source; - this.features = features; - } - } - - public static class LexicalEntrySerializer { - // Utilities that should move into fig later. - static Counter counterFromLispTree(LispTree tree) { - Counter counter = new ClassicCounter(); - for (int i = 0; i < tree.children.size(); i++) - counter.incrementCount(tree.child(i).child(0).value, Double.parseDouble(tree.child(i).child(1).value)); - return counter; - } - static LispTree counterToLispTree(Counter counter) { - LispTree tree = LispTree.proto.newList(); - for (String feature : counter.keySet()) - tree.addChild(LispTree.proto.newList(feature, "" + counter.getCount(feature))); - return tree; - } - - static Map featureMapFromLispTree(LispTree tree) { - Map featureMap = new TreeMap(); - for (int i = 0; i < tree.children.size(); i++) - featureMap.put(tree.child(i).child(0).value, Double.parseDouble(tree.child(i).child(1).value)); - return featureMap; - } - - static LispTree featureMapToLispTree(Map featureMap) { - LispTree tree = LispTree.proto.newList(); - for (String feature : featureMap.keySet()) - tree.addChild(LispTree.proto.newList(feature, "" + featureMap.get(feature))); - return tree; - } - - - static Set setFromLispTree(LispTree tree) { - Set set = new HashSet(); - for (int i = 0; i < tree.children.size(); i++) - set.add(tree.child(i).value); - return set; - } - static LispTree setToLispTree(Set set) { - LispTree tree = LispTree.proto.newList(); - for (String x : set) - tree.addChild(x); - return tree; - } - - static String[] stringArrayFromLispTree(LispTree tree) { - String[] result = new String[tree.children.size()]; - for (int i = 0; i < tree.children.size(); i++) - result[i] = tree.child(i).value; - return result; - } - static LispTree stringArrayToLispTree(String[] array) { - LispTree tree = LispTree.proto.newList(); - for (String x : array) - tree.addChild(x); - return tree; - } - - public static LexicalEntry entryFromLispTree(LispTree tree) { - int i = 1; - if (tree.child(0).value.equals("entity")) { - - String textDescription = tree.child(i++).value; - String normalizedTextDesc = tree.child(i++).value; - Set fbDescriptions = setFromLispTree(tree.child(i++)); - Formula formula = Formula.fromString(tree.child(i++).value); - EntrySource source = EntrySource.parseSourceDesc(tree.child(i++).value); - double popularity = Double.parseDouble(tree.child(i++).value); - double distance = Double.parseDouble(tree.child(i++).value); - Set types = setFromLispTree(tree.child(i++)); - Counter tokenEditDistanceFeatures = counterFromLispTree(tree.child(i++)); - - return new LexicalEntry.EntityLexicalEntry( - textDescription, normalizedTextDesc, fbDescriptions, formula, - source, popularity, distance, types, tokenEditDistanceFeatures); - } else if (tree.child(0).value.equals("unary")) { - String textDescription = tree.child(i++).value; - String normalizedTextDesc = tree.child(i++).value; - Set fbDescriptions = setFromLispTree(tree.child(i++)); - Formula formula = Formula.fromString(tree.child(i++).value); - EntrySource source = EntrySource.parseSourceDesc(tree.child(i++).value); - double popularity = Double.parseDouble(tree.child(i++).value); - Double.parseDouble(tree.child(i++).value); - Map alignmentScores = featureMapFromLispTree(tree.child(i++)); - Set types = setFromLispTree(tree.child(i++)); - return new LexicalEntry.UnaryLexicalEntry( - textDescription, normalizedTextDesc, fbDescriptions, formula, source, - popularity, alignmentScores, types); - } else if (tree.child(0).value.equals("binary")) { - String textDescription = tree.child(i++).value; - String normalizedTextDesc = tree.child(i++).value; - Set fbDescriptions = setFromLispTree(tree.child(i++)); - Formula formula = Formula.fromString(tree.child(i++).value); - EntrySource source = EntrySource.parseSourceDesc(tree.child(i++).value); - double popularity = Double.parseDouble(tree.child(i++).value); - Double.parseDouble(tree.child(i++).value); // this is computed in the constructor so need not save it - String expectedType1 = tree.child(i++).value; - String expectedType2 = tree.child(i++).value; - String unitId = tree.child(i++).value; - String unitDescription = tree.child(i++).value; - Map alignmentScores = featureMapFromLispTree(tree.child(i++)); - String fullLexeme = tree.child(i++).value; - return new LexicalEntry.BinaryLexicalEntry( - textDescription, normalizedTextDesc, fbDescriptions, formula, source, - popularity, expectedType1, expectedType2, unitId, unitDescription, alignmentScores, fullLexeme); - } else { - throw new RuntimeException("Invalid: " + tree); - } - } - - public static String emptyIfNull(String s) { return s == null ? "" : s; } - - public static LispTree entryToLispTree(LexicalEntry rawEntry) { - LispTree result = LispTree.proto.newList(); - if (rawEntry instanceof LexicalEntry.EntityLexicalEntry) { - LexicalEntry.EntityLexicalEntry entry = (LexicalEntry.EntityLexicalEntry) rawEntry; - result.addChild("entity"); - - result.addChild(entry.textDescription); - result.addChild(entry.normalizedTextDesc); - result.addChild(setToLispTree(entry.fbDescriptions)); - result.addChild(entry.formula.toString()); - result.addChild(entry.source.toString()); - result.addChild("" + entry.popularity); - result.addChild("" + entry.distance); - result.addChild(setToLispTree(entry.types)); - result.addChild(counterToLispTree(entry.entityFeatures)); - } else if (rawEntry instanceof LexicalEntry.UnaryLexicalEntry) { - LexicalEntry.UnaryLexicalEntry entry = (LexicalEntry.UnaryLexicalEntry) rawEntry; - result.addChild("unary"); - - result.addChild(entry.textDescription); - result.addChild(entry.normalizedTextDesc); - result.addChild(setToLispTree(entry.fbDescriptions)); - result.addChild(entry.formula.toString()); - result.addChild(entry.source.toString()); - result.addChild("" + entry.popularity); - result.addChild("" + entry.distance); - result.addChild(featureMapToLispTree(entry.alignmentScores)); - result.addChild(setToLispTree(entry.types)); - } else if (rawEntry instanceof LexicalEntry.BinaryLexicalEntry) { - LexicalEntry.BinaryLexicalEntry entry = (LexicalEntry.BinaryLexicalEntry) rawEntry; - result.addChild("binary"); - - result.addChild(entry.textDescription); - result.addChild(entry.normalizedTextDesc); - result.addChild(setToLispTree(entry.fbDescriptions)); - result.addChild(entry.formula.toString()); - result.addChild(entry.source.toString()); - result.addChild("" + entry.popularity); - result.addChild("" + entry.distance); - result.addChild(entry.expectedType1); - result.addChild(entry.expectedType2); - result.addChild(emptyIfNull(entry.unitId)); - result.addChild(emptyIfNull(entry.unitDescription)); - result.addChild(featureMapToLispTree(entry.alignmentScores)); - result.addChild(entry.fullLexeme); - } - return result; - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/lexicons/TokenLevelMatchFeatures.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/lexicons/TokenLevelMatchFeatures.java deleted file mode 100644 index b40b7359b7..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/lexicons/TokenLevelMatchFeatures.java +++ /dev/null @@ -1,238 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.lexicons; - -import edu.stanford.nlp.sempre.freebase.utils.FileUtils; -import edu.stanford.nlp.stats.ClassicCounter; -import edu.stanford.nlp.stats.Counter; -import fig.basic.LogInfo; -import fig.basic.Option; - -import java.util.Arrays; -import java.util.List; - -public final class TokenLevelMatchFeatures { - private TokenLevelMatchFeatures() { } - - public static class Options { - @Option(gloss = "Verbose") public int verbose = 0; - } - - public static Options opts = new Options(); - - public static boolean usePrefix = false; - public static boolean useSuffix = false; - - public static boolean useQueryIsPrefix = true; - public static boolean useAnswerIsPrefix = true; - public static boolean useQueryIsSuffix = true; - public static boolean useAnswerIsSuffix = true; - public static boolean useQueryEqualAnswer = true; - - - public static boolean useDiffSet = false; - public static boolean useEqualSet = false; - public static boolean useDiffFirstName = false; - - - public static Counter extractFeatures(String query, String answer) { - - Counter res = new ClassicCounter(); - query = FileUtils.omitPunct(query).toLowerCase(); - answer = FileUtils.omitPunct(answer).toLowerCase(); - - String[] queryTokens = query.split("\\s+"); - String[] answerTokens = answer.split("\\s+"); - if (usePrefix) { - boolean prefix = isPrefix(queryTokens, answerTokens); - res.incrementCount("prefix", prefix ? 1 : 0); - } - if (useSuffix) { - boolean suffix = isSuffix(queryTokens, answerTokens); - res.incrementCount("suffix", suffix ? 1 : 0); - } - if (useDiffSet) { - boolean diffSet = isDiffSet(queryTokens, answerTokens); - res.incrementCount("diffset", diffSet ? 1 : 0); - } - - if (useEqualSet) { - boolean equalSet = isEqualSet(queryTokens, answerTokens); - res.incrementCount("equalset", equalSet ? 1 : 0); - } - if (useDiffFirstName) { - boolean diffFirstName = isDiffFirstName(queryTokens, answerTokens); - res.incrementCount("diff_firstname", diffFirstName ? 1 : 0); - } - if (useQueryIsPrefix) { - boolean queryIsPrefix = isFirstPrefixOfSecond(queryTokens, answerTokens); - res.incrementCount("queryIsPrefix", queryIsPrefix ? 1 : 0); - } - if (useAnswerIsPrefix) { - boolean answerIsPrefix = isFirstPrefixOfSecond(answerTokens, queryTokens); - res.incrementCount("answerIsPrefix", answerIsPrefix ? 1 : 0); - } - if (useQueryIsSuffix) { - boolean queryIsSuffix = isFirstSuffixOfSecond(queryTokens, answerTokens); - res.incrementCount("queryIsSuffix", queryIsSuffix ? 1 : 0); - } - if (useAnswerIsSuffix) { - boolean answerIsSuffix = isFirstSuffixOfSecond(answerTokens, queryTokens); - res.incrementCount("answerIsSuffix", answerIsSuffix ? 1 : 0); - } - if (useQueryEqualAnswer) { - res.incrementCount("equal", isEqual(queryTokens, answerTokens) ? 1 : 0); - } - - return res; - } - - private static boolean isPrefix(String[] queryTokens, String[] answerTokens) { - int min = Math.min(queryTokens.length, answerTokens.length); - for (int i = 0; i < min; ++i) { - if (!queryTokens[i].equals(answerTokens[i])) - return false; - } - return true; - } - - private static boolean isEqual(String[] queryTokens, String[] answerTokens) { - - if (queryTokens.length != answerTokens.length) - return false; - for (int i = 0; i < queryTokens.length; ++i) { - if (!queryTokens[i].equals(answerTokens[i])) - return false; - } - return true; - } - - private static boolean isFirstPrefixOfSecond(String[] tokens1, String[] tokens2) { - if (tokens1.length >= tokens2.length) - return false; - - for (int i = 0; i < tokens1.length; ++i) { - if (!tokens1[i].equals(tokens2[i])) - return false; - } - return true; - } - - - private static boolean isSuffix(String[] queryTokens, String[] answerTokens) { - int min = Math.min(queryTokens.length, answerTokens.length); - for (int i = 0; i < min; ++i) { - if (!queryTokens[queryTokens.length - 1 - i].equals(answerTokens[answerTokens.length - 1 - i])) - return false; - } - return true; - } - - private static boolean isFirstSuffixOfSecond(String[] tokens1, String[] tokens2) { - - if (tokens1.length >= tokens2.length) - return false; - - for (int i = 0; i < tokens1.length; ++i) { - if (!tokens1[tokens1.length - 1 - i].equals(tokens2[tokens2.length - 1 - i])) - return false; - } - return true; - } - - private static boolean isDiffFirstName(String[] queryTokens, String[] answerTokens) { - - return (queryTokens.length == 2 && answerTokens.length == 2 && queryTokens[1].equals(answerTokens[1]) - && !queryTokens[1].equals(answerTokens[0])); - } - - private static boolean isDiffSet(String[] queryTokens, String[] answerTokens) { - - List queryList = Arrays.asList(queryTokens); - List answerList = Arrays.asList(answerTokens); - return queryList.containsAll(answerList) || answerList.containsAll(queryList); - } - - public static int diffSetSize(String query, String answer) { - - query = FileUtils.omitPunct(query); - answer = FileUtils.omitPunct(answer); - - String[] queryTokens = query.toLowerCase().split("\\s+"); - String[] answerTokens = answer.toLowerCase().split("\\s+"); - - List queryList = Arrays.asList(queryTokens); - List answerList = Arrays.asList(answerTokens); - boolean queryContains = queryList.containsAll(answerList); - boolean answerContains = answerList.containsAll(queryList); - if (!queryContains && !answerContains) - return Integer.MAX_VALUE; - if (queryContains) { - return queryTokens.length - answerTokens.length; - } - return answerTokens.length - queryTokens.length; - } - - private static boolean isEqualSet(String[] queryTokens, String[] answerTokens) { - - List queryList = Arrays.asList(queryTokens); - List answerList = Arrays.asList(answerTokens); - return queryList.containsAll(answerList) && answerList.containsAll(queryList); - } - - public static Counter extractTokenMatchFeatures(List source, List target, boolean strict) { - - if (opts.verbose >= 1) { - LogInfo.log("SOURCE: " + source); - LogInfo.log("TARGET: " + target); - } - - Counter res = new ClassicCounter(); - for (int i = 0; i < source.size(); ++i) { - for (int j = 0; j < target.size(); ++j) { - - if (target.get(j).length() <= 2) // do not match very short words - continue; - - int matchLength = findLongestMatch(source, target, i, j, strict); - double cover = (double) matchLength / target.size(); - if (opts.verbose >= 1) { - if (cover > 0) { - LogInfo.logs("Source index %s, target index %s, cover %s", i, j, cover); - } - } - if (cover > 0) { - if (cover > 0.9999) { - res.setCount("equal", 1); - } else if (j == 0 && res.getCount("prefix") < cover) { - res.setCount("prefix", cover); - } else if (j + matchLength == target.size() && res.getCount("suffix") < cover) { - res.incrementCount("suffix", cover); - } else if (j > 0 && j + matchLength < target.size() && res.getCount("overlap") < cover) { - res.incrementCount("overlap", cover); - } - } - } - } - return res; - } - - private static int findLongestMatch(List source, List target, - int i, int j, boolean strict) { - - int match = 0; - - for (int offset = 0; i + offset < source.size() && j + offset < target.size(); ++offset) { - if (strict) { - if (source.get(i + offset).equals(target.get(j + offset))) - match++; - else - break; - } else { - if (source.get(i + offset).startsWith(target.get(j + offset)) || target.get(j + offset).startsWith(source.get(i + offset))) - match++; - else - break; - } - } - return match; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/lexicons/normalizers/BinaryNormalizer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/lexicons/normalizers/BinaryNormalizer.java deleted file mode 100644 index ccea095f98..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/lexicons/normalizers/BinaryNormalizer.java +++ /dev/null @@ -1,105 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.lexicons.normalizers; - -import edu.stanford.nlp.util.ArrayUtils; - -import java.io.Serializable; -import java.util.Set; - -/** - * Normalizes a string by omitting adverbs, determiners, quasi modals, modals - * and "be" - * - * @author jonathanberant - */ -public class BinaryNormalizer implements EntryNormalizer, Serializable { - - - private static final long serialVersionUID = -4704293835712088190L; - - public static Set adverbs = ArrayUtils.asSet(new String[]{"also", "very", "currently", "originally", "really"}); - public static Set determiners = ArrayUtils.asSet(new String[]{"the", "a", "an"}); - public static Set quasiModals = ArrayUtils.asSet(new String[]{"used to "}); - public static Set modals = ArrayUtils.asSet(new String[]{"will "}); - public static Set be = ArrayUtils.asSet(new String[]{"'m", "am", "'re", "are", "'s", "is", "was", "were", "be", "being"}); - - public String normalize(String binary) { - - String res = binary.toLowerCase(); - res = omitAdverbs(res); - res = omitDeterminers(res); - res = stripQuasiModals(res); - res = stripModals(res); - res = stripBe(res); - - if (res.length() == 0) - return binary; - return res; - } - - - private static String stripBe(String res) { - - String[] tokens = res.split("\\s+"); - int i; - for (i = 0; i < tokens.length; ++i) { - if (!be.contains(tokens[i])) - break; - } - - StringBuilder sb = new StringBuilder(); - for (; i < tokens.length; ++i) { - sb.append(tokens[i] + " "); - } - - return sb.toString().trim(); - } - - - private static String stripModals(String res) { - - for (String modal : modals) { - if (res.startsWith(modal)) { - res = res.substring(modal.length()); - return res; - } - } - return res; - } - - - private static String stripQuasiModals(String res) { - - for (String quasiModal : quasiModals) { - if (res.startsWith(quasiModal)) { - res = res.substring(quasiModal.length()); - return res; - } - } - return res; - } - - - private static String omitAdverbs(String res) { - - String[] tokens = res.split("\\s+"); - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < tokens.length; ++i) { - if (!adverbs.contains(tokens[i])) - sb.append(tokens[i] + " "); - } - return sb.toString().trim(); - } - - private static String omitDeterminers(String res) { - - String[] tokens = res.split("\\s+"); - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < tokens.length; ++i) { - if (!determiners.contains(tokens[i])) - sb.append(tokens[i] + " "); - } - return sb.toString().trim(); - } - - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/lexicons/normalizers/EntryNormalizer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/lexicons/normalizers/EntryNormalizer.java deleted file mode 100644 index dbaf090186..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/lexicons/normalizers/EntryNormalizer.java +++ /dev/null @@ -1,5 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.lexicons.normalizers; - -public interface EntryNormalizer { - String normalize(String str); -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/lexicons/normalizers/IdentityNormalizer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/lexicons/normalizers/IdentityNormalizer.java deleted file mode 100644 index a7181e2df7..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/lexicons/normalizers/IdentityNormalizer.java +++ /dev/null @@ -1,10 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.lexicons.normalizers; - -public class IdentityNormalizer implements EntryNormalizer { - - @Override - public String normalize(String str) { - return str; - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/lexicons/normalizers/PrepDropNormalizer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/lexicons/normalizers/PrepDropNormalizer.java deleted file mode 100644 index 4a59a54d75..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/lexicons/normalizers/PrepDropNormalizer.java +++ /dev/null @@ -1,36 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.lexicons.normalizers; - -import edu.stanford.nlp.util.ArrayUtils; - -import java.util.Set; - -/** - * Deletes the preposition at the end - * - * @author jonathanberant - */ -public class PrepDropNormalizer implements EntryNormalizer { - - public static Set prepositions = ArrayUtils.asSet(new String[]{"in", "on", "of", "for", "about", "at", "from", "to", "with"}); - @Override - public String normalize(String str) { - String res = stripPrep(str); - return stripPrep(res); - } - - public static String stripPrep(String str) { - - String[] tokens = str.split("\\s+"); - if (tokens.length == 1) - return str; - if (!prepositions.contains(tokens[tokens.length - 1])) - return str; - else { - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < tokens.length - 1; ++i) { - sb.append(tokens[i] + " "); - } - return sb.toString().trim(); - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/test/FbFormulasTest.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/test/FbFormulasTest.java deleted file mode 100644 index 58c10b405c..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/test/FbFormulasTest.java +++ /dev/null @@ -1,46 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.test; - -import edu.stanford.nlp.sempre.freebase.FbFormulasInfo; -import edu.stanford.nlp.sempre.freebase.FbFormulasInfo.BinaryFormulaInfo; -import edu.stanford.nlp.sempre.freebase.FbFormulasInfo.UnaryFormulaInfo; -import edu.stanford.nlp.sempre.Formula; -import org.testng.annotations.Test; - -import static org.testng.AssertJUnit.assertEquals; - -public class FbFormulasTest { - - @Test - public void formulaInfo() { - FbFormulasInfo infoRepos = FbFormulasInfo.getSingleton(); - - // 1 - BinaryFormulaInfo bInfo = infoRepos.getBinaryInfo(Formula.fromString("(lambda x (!fb:education.education.specialization (!fb:education.field_of_study.students_majoring (var x))))")); - assertEquals(521.0, bInfo.popularity, 0.0001); - assertEquals("fb:education.field_of_study", bInfo.expectedType1); - assertEquals("fb:education.field_of_study", bInfo.expectedType2); - boolean contains = bInfo.descriptions.contains("specialization") && bInfo.descriptions.contains("students majoring in this field"); - assertEquals(true, contains); - // 2 - bInfo = infoRepos.getBinaryInfo(Formula.fromString("!fb:broadcast.content.broadcast")); - assertEquals(4838.0, bInfo.popularity, 0.0001); - assertEquals("fb:broadcast.broadcast", bInfo.expectedType1); - assertEquals("fb:broadcast.content", bInfo.expectedType2); - contains = bInfo.descriptions.contains("broadcasts"); - assertEquals(true, contains); - // 3 - UnaryFormulaInfo uInfo = infoRepos.getUnaryInfo(Formula.fromString("(fb:type.object.type fb:location.country)")); - assertEquals(574.0, uInfo.popularity, 0.0001); - contains = uInfo.descriptions.contains("country") && uInfo.descriptions.contains("empire"); - assertEquals(true, contains); - assertEquals("fb:location.country", uInfo.types.iterator().next()); - // 4 - uInfo = infoRepos.getUnaryInfo(Formula.fromString("(fb:people.person.profession fb:en.wrestler)")); - assertEquals(1449.0, uInfo.popularity, 0.0001); - contains = uInfo.descriptions.contains("wrestler") - && uInfo.descriptions.contains("professional wrestler") - && uInfo.descriptions.contains("pro wrestler"); - assertEquals(true, contains); - assertEquals("fb:people.person", uInfo.types.iterator().next()); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/test/FreebaseInfoTest.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/test/FreebaseInfoTest.java deleted file mode 100644 index 3351fb8a31..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/test/FreebaseInfoTest.java +++ /dev/null @@ -1,24 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.test; - -import edu.stanford.nlp.sempre.freebase.FreebaseInfo; -import org.testng.annotations.Test; - -import static org.testng.AssertJUnit.assertEquals; - -/** - * Test FreebaseInfo. - * @author Percy Liang - */ -public class FreebaseInfoTest { - @Test public void units() { - FreebaseInfo info = FreebaseInfo.getSingleton(); - assertEquals(FreebaseInfo.ENTITY, info.getUnit1("fb:people.person.place_of_birth")); - assertEquals(FreebaseInfo.ENTITY, info.getUnit2("fb:people.person.place_of_birth")); - - assertEquals(FreebaseInfo.ENTITY, info.getUnit1("fb:people.person.date_of_birth")); - assertEquals(FreebaseInfo.DATE, info.getUnit2("fb:people.person.date_of_birth")); - - assertEquals(FreebaseInfo.ENTITY, info.getUnit1("fb:people.person.height_meters")); - assertEquals("fb:en.meter", info.getUnit2("fb:people.person.height_meters")); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/test/FreebaseSemTypeTest.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/test/FreebaseSemTypeTest.java deleted file mode 100644 index 8f53b8ff39..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/test/FreebaseSemTypeTest.java +++ /dev/null @@ -1,67 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.test; - -import org.testng.annotations.Test; - -import edu.stanford.nlp.sempre.SemType; -import edu.stanford.nlp.sempre.SemTypeHierarchy; -import edu.stanford.nlp.sempre.freebase.*; -import fig.basic.LispTree; - -/** - * Test type system on Freebase schema. - * @author Percy Liang - */ -public class FreebaseSemTypeTest { - // For testing - private static SemType T(String str) { - return SemType.fromLispTree(LispTree.proto.parseFromString(str)); - } - - private static void verifyEquals(SemType predType, SemType wantedType) { - if (!predType.toString().equals(wantedType.toString())) - throw new RuntimeException(String.format("Wanted %s, but got %s", wantedType, predType)); - } - - private static void verifyMeet(String t1, String t2) { verifyMeet(t1, t2, t2); } - private static void verifyMeet(String t1, String t2, String t) { - verifyEquals(T(t1).meet(T(t2)), T(t)); - verifyEquals(T(t2).meet(T(t1)), T(t)); - } - - @Test public void simpleSemType() { - FreebaseInfo.getSingleton(); // Load Freebase type hierarchy - SemTypeHierarchy.opts.failOnUnknownTypes = false; - verifyMeet("city", "city"); - verifyMeet("city", "country", "(union)"); - verifyMeet("city", "(union city country)", "city"); - verifyMeet("(union city country river)", "(union city country)"); - - verifyEquals(T("(-> city fb:type.int)").apply(T("(union city country)")), T("fb:type.int")); - verifyEquals(T("(-> city fb:type.int fb:type.float)").apply(T("(union city country)")).apply(T("fb:type.int")), T("fb:type.float")); - verifyEquals(T("fb:type.datetime").apply(T("fb:common.topic")), T("(union)")); - verifyEquals(T("(-> fb:location.citytown fb:type.datetime)").apply(T("fb:location.location")), T("fb:type.datetime")); - verifyEquals(T("(-> fb:location.location fb:type.datetime)").apply(T("fb:location.citytown")), T("fb:type.datetime")); - - verifyMeet("(-> fb:location.location fb:type.number)", "(-> fb:location.location fb:type.float)"); - verifyMeet("fb:common.topic", "fb:location.location"); - verifyMeet("fb:type.any", "fb:type.boolean"); - verifyMeet("fb:type.any", "fb:type.number"); - verifyMeet("fb:type.any", "fb:type.datetime"); - verifyMeet("fb:type.any", "fb:type.cvt"); - verifyMeet("fb:type.any", "fb:type.text"); - verifyMeet("fb:type.any", "fb:location.location"); - verifyMeet("fb:type.any", "fb:common.topic"); - - verifyMeet("fb:common.topic", "fb:common.topic"); - verifyMeet("top", "(-> t t)"); - verifyMeet("top", "fb:type.datetime"); - verifyMeet("top", "(union a b)"); - - verifyMeet("(-> (-> a b) top)", "(-> top (-> a b))", "(-> (-> a b) (-> a b))"); - verifyMeet("(-> (union city country) person)", "(-> city (union person dog))", "(-> city person)"); - } - - public static void main(String[] args) { - new FreebaseSemTypeTest().simpleSemType(); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/test/FreebaseTypeInferenceTest.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/test/FreebaseTypeInferenceTest.java deleted file mode 100644 index b2dc1b4a6a..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/test/FreebaseTypeInferenceTest.java +++ /dev/null @@ -1,92 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.test; - -import static org.testng.AssertJUnit.assertEquals; - -import org.testng.annotations.Test; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.freebase.*; - -/** - * Test type inference on Freebase schema. - * @author Percy Liang - */ -public class FreebaseTypeInferenceTest { - // For testing - private static Formula F(String str) { return Formula.fromString(str); } - private static SemType T(String str) { return SemType.fromString(str); } - private static SemType FT(String str) { return TypeInference.inferType(F(str)); } - - void check(String fstr, String tstr) { - System.out.println("check " + fstr + " " + tstr); - assertEquals(T(tstr).toString(), FT(fstr).toString()); - } - - @Test public void simpleSemType() { - // TODO(pliang): replace with NullTypeLookup()? - FreebaseTypeLookup.opts.entityTypesPath = null; // Disable entity lookup - TypeInference.setTypeLookup(new FreebaseTypeLookup()); - - check("(fb:location.location.area (>= (number 200)))", "fb:location.location"); - - check("(number 3)", "fb:type.number"); - check("(string foo)", "fb:type.text"); - check("(date 1981 1 1)", "fb:type.datetime"); - check("fb:en.barack_obama", "fb:common.topic"); // Don't have getEntityTypes - check("fb:people.person.place_of_birth", "(-> fb:location.location fb:people.person)"); - - // Join - check("(fb:type.object.type fb:location.location)", "fb:location.location"); - check("(fb:people.person.place_of_birth (fb:type.object.type fb:location.location))", "fb:people.person"); - check("(!fb:people.person.place_of_birth (fb:type.object.type fb:location.location))", "(union)"); - - // Merge - check("(and (fb:type.object.type fb:common.topic) (fb:people.person.place_of_birth fb:en.seattle))", "fb:people.person"); - check("(and (fb:type.object.type fb:location.location) (fb:people.person.place_of_birth fb:en.seattle))", "(union)"); - - // Mark - check("(mark x (fb:people.person.parents (var x)))", "fb:people.person"); - check("(mark x (fb:people.person.place_of_birth (var x)))", "(union)"); - - // Lambda - check("(lambda x (fb:people.person.place_of_birth (var x)))", "(-> fb:location.location fb:people.person)"); - check("(lambda x (!fb:people.person.place_of_birth (var x)))", "(-> fb:people.person fb:location.location)"); - check("(lambda x (fb:people.person.place_of_birth (var x)))", "(-> fb:location.location fb:people.person)"); - check("(lambda x (!fb:people.person.place_of_birth (var x)))", "(-> fb:people.person fb:location.location)"); - check("(lambda x (!fb:people.person.profession (fb:people.person.place_of_birth (var x))))", "(-> fb:location.location fb:people.profession)"); - check("(lambda b ((var b) (fb:type.object.type fb:people.person)))", "(-> (-> fb:people.person top) top)"); - // Note: and the other way doesn't work, since we don't propagate everything. - check("(lambda b (and (fb:type.object.type fb:location.location) ((var b) (fb:type.object.type fb:people.person))))", "(-> (-> fb:people.person fb:location.location) fb:location.location)"); - check("(lambda x (lambda y ((var x) (var y))))", "(-> (-> top top) (-> top top))"); - check("(lambda x (lambda x (fb:people.person.place_of_birth (var x))))", "(-> top (-> fb:location.location fb:people.person))"); // No variable capture - - // Aggregation - check("(lambda x (not (var x)))", "(-> fb:type.any fb:type.any)"); - check("(lambda x (count (var x)))", "(-> fb:type.any fb:type.number)"); - check("(lambda x (count (fb:people.person.place_of_birth (var x))))", "(-> fb:location.location fb:type.number)"); - - // Arithmetic - check("(+ (number 3) (number 4))", "fb:type.number"); - check("(+ (date 1981 1 1) (string 4))", "(union)"); - check("(- (date 1982 1 1) (date 1981 1 1))", "fb:type.datetime"); // Future: should be a different duration type - - // Reverse - check("(reverse fb:people.person.place_of_birth)", "(-> fb:people.person fb:location.location)"); - - // Superlative - check("(argmax 1 1 (fb:type.object.type fb:people.person) fb:people.person.date_of_birth)", "fb:people.person"); - check("(argmax 1 1 (fb:type.object.type fb:common.topic) fb:people.person.date_of_birth)", "fb:people.person"); - check("(argmax 1 1 (fb:type.object.type fb:common.topic) (reverse (lambda x (number 3))))", "fb:common.topic"); - check("(lambda x (lambda y (argmax 1 1 (var x) (var y))))", "(-> fb:type.any (-> (-> (union fb:type.number fb:type.datetime) fb:type.any) fb:type.any))"); - - // Call - check("(call Math.cos (number 0))", "fb:type.float"); - check("(call Math.cos (string abc))", "(union)"); - check("(lambda x (lambda y (call .concat (var x) (var y))))", "(-> fb:type.text (-> fb:type.text fb:type.text))"); - check("(lambda x (call .length (var x)))", "(-> fb:type.text fb:type.int)"); - } - - public static void main(String[] args) { - new FreebaseTypeInferenceTest().simpleSemType(); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/test/LexiconTest.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/test/LexiconTest.java deleted file mode 100644 index 6177946510..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/test/LexiconTest.java +++ /dev/null @@ -1,85 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.test; - -import edu.stanford.nlp.sempre.freebase.BinaryLexicon; -import edu.stanford.nlp.sempre.freebase.lexicons.EntrySource; -import edu.stanford.nlp.sempre.freebase.lexicons.LexicalEntry.BinaryLexicalEntry; -import edu.stanford.nlp.sempre.freebase.lexicons.LexicalEntry.UnaryLexicalEntry; -import edu.stanford.nlp.sempre.freebase.UnaryLexicon; -import fig.basic.LogInfo; -import org.testng.annotations.Test; - -import java.io.IOException; -import java.util.List; - -import static org.testng.AssertJUnit.assertEquals; - -public class LexiconTest { - @Test - public void unary() throws IOException { - UnaryLexicon.opts.unaryLexiconFilePath = "unittest-files/unaryInfoStringAndAlignment.txt"; - UnaryLexicon unary = UnaryLexicon.getInstance(); - boolean existsAlignment = false, existsStringMatch = false; - double popularity = 0.0; - double intersection = 0.0; - - List entries = unary.lookupEntries("continent"); - LogInfo.logs("Num of unary entries for 'continent': %s", entries.size()); - for (UnaryLexicalEntry entry : entries) { - if (entry.formula.toString().equals("(fb:type.object.type fb:location.continent)")) { - if (entry.source == EntrySource.ALIGNMENT) { - existsAlignment = true; - intersection = entry.alignmentScores.get(UnaryLexicon.INTERSECTION); - } else if (entry.source == EntrySource.STRING_MATCH) { - existsStringMatch = true; - popularity = entry.popularity; - } - } - } - assertEquals(true, existsAlignment); - assertEquals(true, existsStringMatch); - assertEquals(7.0, popularity, 0.0001); - assertEquals(5.0, intersection, 0.0001); - - existsAlignment = false; - existsStringMatch = false; - popularity = 0.0; - intersection = 0.0; - entries = unary.lookupEntries("lawyer"); - LogInfo.logs("Num of unary entries for 'lawyer': %s", entries.size()); - for (UnaryLexicalEntry entry : entries) { - if (entry.formula.toString().equals("(fb:people.person.profession fb:en.attorney)")) { - if (entry.source == EntrySource.ALIGNMENT) { - existsAlignment = true; - intersection = entry.alignmentScores.get(UnaryLexicon.INTERSECTION); - } else if (entry.source == EntrySource.STRING_MATCH) { - existsStringMatch = true; - popularity = entry.popularity; - } - } - } - assertEquals(true, existsAlignment); - assertEquals(true, existsStringMatch); - assertEquals(12282.0, popularity, 0.0001); // Based on 93.exec (full Freebase) - assertEquals(26.0, intersection, 0.0001); - } - - @Test - public void binary() throws IOException { - BinaryLexicon.opts.binaryLexiconFilesPath = "unittest-files/binaryInfoStringAndAlignment.txt"; - BinaryLexicon.opts.keyToSortBy = BinaryLexicon.INTERSECTION; - - BinaryLexicon lexicon = BinaryLexicon.getInstance(); - List entries = lexicon.lookupEntries("bear in"); - LogInfo.logs("Num of binary entries for 'bear in': %s", entries.size()); - BinaryLexicalEntry top = entries.get(0); - assertEquals("people born here", top.fbDescriptions.iterator().next()); - assertEquals("!fb:location.location.people_born_here", top.formula.toString()); - assertEquals("ALIGNMENT", top.source.toString()); - assertEquals(759773.0, top.popularity, 0.00001); // Based on 93.exec (full Freebase) - assertEquals("fb:people.person", top.expectedType1); - assertEquals("fb:location.location", top.expectedType2); - assertEquals(16184.0, top.alignmentScores.get("FB_typed_size"), 0.0001); - assertEquals(13856.0, top.alignmentScores.get("Intersection_size_typed"), 0.0001); - assertEquals(15765.0, top.alignmentScores.get("NL_typed_size"), 0.0001); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/test/PrepDropNormalizerTest.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/test/PrepDropNormalizerTest.java deleted file mode 100644 index 56585a0bb7..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/test/PrepDropNormalizerTest.java +++ /dev/null @@ -1,23 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.test; - -import edu.stanford.nlp.sempre.freebase.lexicons.normalizers.PrepDropNormalizer; -import org.testng.annotations.Test; - -import static org.testng.AssertJUnit.assertEquals; - -/** - * Simple test for normalization (exercise) - * @author jonathan - */ - -public class PrepDropNormalizerTest { - - @Test - public void normalization() { - PrepDropNormalizer normalizer = new PrepDropNormalizer(); - assertEquals("interested", normalizer.normalize("interested in")); - assertEquals("interested", normalizer.normalize("interested at")); - assertEquals("blow up", normalizer.normalize("blow up in")); - assertEquals("blow up the", normalizer.normalize("blow up the to")); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/test/SparqlExecutorTest.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/test/SparqlExecutorTest.java deleted file mode 100644 index e7eb0a424c..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/test/SparqlExecutorTest.java +++ /dev/null @@ -1,173 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.test; - -import java.util.List; -import edu.stanford.nlp.sempre.Executor; -import edu.stanford.nlp.sempre.Formulas; -import edu.stanford.nlp.sempre.Value; -import edu.stanford.nlp.sempre.ListValue; -import edu.stanford.nlp.sempre.freebase.SparqlExecutor; -import fig.basic.LispTree; -import fig.basic.LogInfo; -import org.testng.annotations.Test; - -/** - * Test execution of Formulas on a SPARQL server. - * @author Percy Liang - */ -public class SparqlExecutorTest { - interface ValuesChecker { - void checkValues(List values); - } - - public static ValuesChecker size(final int expectedNumResults) { - return new ValuesChecker() { - public void checkValues(List values) { - if (values.size() != expectedNumResults) - throw new RuntimeException("Expected " + expectedNumResults + " results, but got " + values.size() + ": " + values); - } - }; - } - - public static ValuesChecker sizeAtLeast(final int expectedNumResults) { - return new ValuesChecker() { - public void checkValues(List values) { - if (values.size() < expectedNumResults) - throw new RuntimeException("Expected at least " + expectedNumResults + " results, but got " + values.size() + ": " + values); - } - }; - } - - public static ValuesChecker matches(String expected) { - final Value expectedValue = Value.fromString(expected); - return new ValuesChecker() { - public void checkValues(List values) { - if (values.size() != 1 || !values.get(0).equals(expectedValue)) - throw new RuntimeException("Expected " + expectedValue + ", but got " + values); - } - }; - } - - public static ValuesChecker regexMatches(final String expectedPattern) { - return new ValuesChecker() { - public void checkValues(List values) { - if (values.size() != 1 || !values.get(0).toString().matches(expectedPattern)) - throw new RuntimeException("Expected " + expectedPattern + ", but got " + values); - } - }; - } - - protected static void runFormula(SparqlExecutor executor, String formula) { - runFormula(executor, formula, sizeAtLeast(0)); - } - - protected static void runFormula(SparqlExecutor executor, String formula, ValuesChecker checker) { - Executor.Response response = executor.execute(Formulas.fromLispTree(LispTree.proto.parseFromString(formula)), null); - LogInfo.logs("RESULT: %s", response.value); - checker.checkValues(((ListValue) response.value).values); - } - - SparqlExecutor executor = new SparqlExecutor(); - - public SparqlExecutorTest() { - SparqlExecutor.opts.endpointUrl = System.getProperty("sparqlserver"); - // Hard-coding not ideal. - if (SparqlExecutor.opts.endpointUrl == null) - SparqlExecutor.opts.endpointUrl = "http://freebase.cloudapp.net:3093/sparql"; - SparqlExecutor.opts.verbose = 3; - } - - @Test(groups = "sparql") public void sparqlTrivial() { - runFormula(executor, "fb:en.barack_obama", size(1)); - runFormula(executor, "(number 3)", size(1)); - } - - @Test(groups = "sparql") public void sparqlJoin() { - runFormula(executor, "(!fb:people.person.place_of_birth fb:en.barack_obama)", matches("(name fb:en.honolulu)")); // place of birth of Obama - runFormula(executor, "(!fb:people.person.date_of_birth fb:en.barack_obama)", matches("(date 1961 8 4)")); // date of birth of Obama - runFormula(executor, "(!fb:common.topic.alias fb:en.barack_obama)", sizeAtLeast(2)); // Names of Obama - runFormula(executor, "(!fb:people.person.children fb:en.barack_obama)", size(2)); // children of Obama - runFormula(executor, "(!fb:people.marriage.spouse (!fb:people.person.spouse_s fb:en.barack_obama))", size(2)); // spouse of Obama (will include Barack and Michelle) - } - - @Test(groups = "sparql") public void sparqlIntersect() { - runFormula(executor, "(and (fb:type.object.type fb:type.datetime) (!fb:people.person.date_of_birth fb:en.barack_obama))", size(1)); // date of birth of Obama - runFormula(executor, "(and (fb:type.object.type fb:people.person) (!fb:people.person.parents fb:en.barack_obama))", size(2)); // parents of Obama - } - - @Test(groups = "sparql") public void sparqlLambda() { - runFormula(executor, "((lambda x (!fb:people.person.parents (var x))) fb:en.barack_obama)", size(2)); // parents of Obama - runFormula(executor, "((lambda x (!fb:people.marriage.spouse (!fb:people.person.spouse_s (var x)))) fb:en.barack_obama)", size(2)); // spouse of Barack Obama (includes himself) - } - - @Test(groups = "sparql") public void sparqlReverse() { - runFormula(executor, "((reverse fb:people.person.parents) fb:en.barack_obama)", size(2)); // parents of Obama - runFormula(executor, "((reverse (reverse fb:people.person.parents)) fb:en.barack_obama)", size(2)); // children of Obama - runFormula(executor, "((reverse (lambda x (fb:people.person.parents (fb:people.person.parents (var x))))) fb:en.barack_obama)", size(4)); // grandparents of Obama - } - - @Test(groups = "sparql") public void sparqlUnion() { - runFormula(executor, "(or (!fb:people.person.children fb:en.barack_obama) (!fb:people.person.parents fb:en.barack_obama))", size(4)); // children or parents of Obama - runFormula(executor, "(or fb:en.barack_obama fb:en.michelle_obama)", size(2)); // Barack and Michelle Obama - - // TODO(pliang): doesn't work - // runFormula(executor, "(or (number 3) (number 5))", size(2)); // 3 or 5 - } - - @Test(groups = "sparql") public void sparqlNot() { - runFormula(executor, "(and (!fb:people.person.parents fb:en.barack_obama) (not (fb:people.person.gender fb:en.male)))", size(1)); // parents of Obama who are not male - } - - @Test(groups = "sparql") public void sparqlRelations() { - runFormula(executor, "(and (fb:type.object.type fb:location.citytown) (fb:type.object.name (string \"Palo Alto\")))", size(8)); // cities called "Palo Alto" - runFormula(executor, "(and (fb:type.object.type fb:location.us_state) (fb:type.object.name (STRSTARTS (string A))))", size(4)); // cities whose names begin with "A" - runFormula(executor, "(and (!fb:people.person.parents fb:en.barack_obama) (!= fb:en.ann_dunham))", size(1)); // parents of Obama who are not Ann Dunham - runFormula(executor, "(fb:geography.mountain.elevation (>= 8500))", size(4)); // mountains at least 8500 meters tall - runFormula(executor, "(fb:people.person.height_meters (> 1.8))", sizeAtLeast(10)); // people over 1.8 meters tall - - // Dates are tricky - runFormula(executor, "(and (!fb:people.person.children fb:en.barack_obama) (fb:people.person.date_of_birth (= (date 2001 -1 -1))))", size(1)); // children of Obama born in 2001 - runFormula(executor, "(and (!fb:people.person.children fb:en.barack_obama) (fb:people.person.date_of_birth (<= (date 2001 -1 -1))))", size(2)); // children of Obama born no later than 2001 - runFormula(executor, "(and (!fb:people.person.children fb:en.barack_obama) (fb:people.person.date_of_birth (>= (date 2001 -1 -1))))", size(1)); // children of Obama born no earlier than 2001 - } - - @Test(groups = "sparql") public void sparqlMark() { - runFormula(executor, "(mark x (and (fb:type.object.type fb:people.person) (fb:people.person.place_of_birth (!fb:people.deceased_person.place_of_death (var x)))))", sizeAtLeast(10)); // people who were born in the place that they died - runFormula(executor, "((lambda x (mark y (!fb:people.marriage.spouse (!fb:people.person.spouse_s (and (var x) (!= (var y))))))) fb:en.barack_obama)", size(1)); // spouse of Barack Obama - } - - @Test(groups = "sparql") public void sparqlAggregate() { - runFormula(executor, "(count (fb:type.object.type fb:location.us_state))", matches("(number 50)")); // number of US states - runFormula(executor, "(min (!fb:location.location.area (fb:type.object.type fb:location.us_state)))", matches("(number 3140 fb:en.square_kilometer)")); // minimum area of all US states - runFormula(executor, "(max (!fb:location.location.area (fb:type.object.type fb:location.us_state)))", matches("(number 1717850 fb:en.square_kilometer)")); // maximum area of all US states - runFormula(executor, "(sum (!fb:location.location.area (fb:type.object.type fb:location.us_state)))", regexMatches("\\(number .* fb:en.square_kilometer\\)")); // total area of all US states - runFormula(executor, "((reverse (lambda x (count (!fb:people.person.children (var x))))) (>= 50))", size(2)); // people with at least 50 children - - String border = "(lambda x (mark y (fb:location.location.adjoin_s (fb:location.adjoining_relationship.adjoins (and (var x) (!= (var y)))))))"; - runFormula(executor, "(and (fb:type.object.type fb:location.us_state) ((reverse (lambda x (count (and (fb:type.object.type fb:location.us_state) (" + border + " (var x)))))) (> 6)))", size(4)); // states bordering more than 6 states - - // TODO(pliang): known bug (bordering less than 2 states doesn't include Alaska and Hawaii - need to make things optional) - // (execute (and (@type @state) ((reverse (lambda x (count (and (@type @state) (@border (var x)))))) (number 0)))) # doesn't work - } - - @Test(groups = "sparql") public void sparqlSuperlative() { - runFormula(executor, "(argmax 1 1 (fb:type.object.type fb:location.us_state) fb:location.location.area)", matches("(name fb:en.alaska)")); // largest state - runFormula(executor, "(argmax 1 3 (fb:type.object.type fb:location.us_state) fb:location.location.area)", size(3)); // 3 largest states - runFormula(executor, "(argmax 3 1 (fb:type.object.type fb:location.us_state) fb:location.location.area)", matches("(name fb:en.california)")); // 3rd largest state - - runFormula(executor, "(!fb:measurement_unit.dated_integer.number (argmax 1 1 (!fb:location.statistical_region.population fb:en.california) fb:measurement_unit.dated_integer.year))", size(1)); // (most recent) population of california - runFormula(executor, "(!fb:measurement_unit.dated_integer.number (argmax 1 2 (!fb:location.statistical_region.population fb:en.california) fb:measurement_unit.dated_integer.year))", size(2)); // (most recent) two populations of california - - // TODO(pliang): this query seems to time out, figure out why. - // runFormula(executor, "(argmax 1 1 (fb:type.object.type fb:location.us_state) (lambda x (!fb:measurement_unit.dated_integer.number (argmax 1 1 (!fb:location.statistical_region.population (var x)) fb:measurement_unit.dated_integer.year))))", matches("(name fb:en.california)")); - - runFormula(executor, "(argmax 1 1 (fb:type.object.type fb:location.us_state) (reverse (lambda x (count ((reverse (lambda y (fb:location.location.adjoin_s (fb:location.adjoining_relationship.adjoins (and (fb:type.object.type fb:location.us_state) (var y)))))) (var x))))))", size(2)); // states that borders the most states - } - - // Arithmetic - @Test(groups = "sparql") public void sparqlArithmetic() { - runFormula(executor, "(+ (!fb:people.person.height_meters fb:en.barack_obama) (!fb:people.person.height_meters fb:en.michelle_obama))", regexMatches("\\(number 3.650 fb:en.meter\\)")); - runFormula(executor, "(- (!fb:people.person.height_meters fb:en.barack_obama) (!fb:people.person.height_meters fb:en.michelle_obama))", regexMatches("\\(number 0.050 fb:en.meter\\)")); - runFormula(executor, "(/ (!fb:people.person.height_meters fb:en.barack_obama) (!fb:people.person.height_meters fb:en.michelle_obama))", regexMatches("\\(number 1.028\\)")); - runFormula(executor, "(+ (number 3) (number 8))", regexMatches("\\(number 11\\)")); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/test/StemmerTest.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/test/StemmerTest.java deleted file mode 100644 index 12f90365fe..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/test/StemmerTest.java +++ /dev/null @@ -1,15 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.test; - -import edu.stanford.nlp.sempre.freebase.Stemmer; -import org.testng.annotations.Test; - -import static org.testng.AssertJUnit.assertEquals; - -public class StemmerTest { - @Test public void simpleStem() { - Stemmer stemmer = new Stemmer(); - assertEquals("box", stemmer.stem("boxes")); - assertEquals("creat", stemmer.stem("created")); - assertEquals("citi", stemmer.stem("cities")); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/test/TokenMatchTest.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/test/TokenMatchTest.java deleted file mode 100644 index fb77caf428..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/test/TokenMatchTest.java +++ /dev/null @@ -1,21 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.test; - -import edu.stanford.nlp.sempre.freebase.lexicons.TokenLevelMatchFeatures; -import edu.stanford.nlp.stats.Counter; -import org.testng.annotations.Test; - -import java.util.Arrays; - -import static org.testng.AssertJUnit.assertEquals; - -public class TokenMatchTest { - - @Test - public void tokenMatch() { - String[] text = new String[]{"what", "tv", "program", "have", "hugh", "laurie", "create"}; - String[] pattern = new String[]{"program", "create"}; - Counter match = TokenLevelMatchFeatures.extractTokenMatchFeatures(Arrays.asList(text), Arrays.asList(pattern), true); - assertEquals(0.5, match.getCount("prefix"), 0.00001); - assertEquals(0.5, match.getCount("suffix"), 0.00001); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/CollectionUtils.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/CollectionUtils.java deleted file mode 100644 index f9788a08f0..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/CollectionUtils.java +++ /dev/null @@ -1,25 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.utils; - -import java.util.HashMap; -import java.util.Map; - -public final class CollectionUtils { - private CollectionUtils() { } - - public static Map arraysToMap(K[] keys, V[] values) { - if (keys.length != values.length) - throw new RuntimeException("Lenght of keys: " + keys.length + ", length of values: " + values.length); - Map res = new HashMap<>(); - for (int i = 0; i < keys.length; ++i) { - res.put(keys[i], values[i]); - } - return res; - } - - public static Map doubleContainerToDoubleMap(Map map) { - Map res = new HashMap<>(); - for (K key : map.keySet()) - res.put(key, map.get(key).value()); - return res; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/DoubleContainer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/DoubleContainer.java deleted file mode 100644 index 0a0b231750..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/DoubleContainer.java +++ /dev/null @@ -1,34 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.utils; - -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import fig.basic.Fmt; - -public class DoubleContainer implements Comparable { - - @JsonProperty private double value; - - @JsonCreator - public DoubleContainer(@JsonProperty("count") double count) { this.value = count; } - - public void inc() { value++; } - - public void dec() { value--; } - - public void inc(double n) { value += n; } - - public void dec(double n) { value -= n; } - - public void set(double n) { value = n; } - - public double value() { return value; } - - public String toString() { return "" + Fmt.D(value); } - - @Override - public int compareTo(DoubleContainer o) { - if (value < o.value) return -1; - if (value > o.value) return 1; - return 0; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/FileUtils.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/FileUtils.java deleted file mode 100644 index b4ae651be7..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/FileUtils.java +++ /dev/null @@ -1,261 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.utils; - -import com.google.common.base.Joiner; -import com.google.common.collect.BiMap; -import com.google.common.collect.HashBiMap; -import edu.stanford.nlp.io.IOUtils; -import edu.stanford.nlp.objectbank.ObjectBank; -import edu.stanford.nlp.sempre.DateValue; -import edu.stanford.nlp.sempre.DescriptionValue; -import edu.stanford.nlp.sempre.Json; -import edu.stanford.nlp.sempre.NameValue; -import edu.stanford.nlp.stats.ClassicCounter; -import edu.stanford.nlp.stats.Counter; -import edu.stanford.nlp.util.StringUtils; -import fig.basic.LispTree; -import fig.basic.LogInfo; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.PrintWriter; -import java.util.*; - -/** - * Utilities for files - * @author jonathanberant - */ -public final class FileUtils { - private FileUtils() { } - - /** - * Upload a set of string where each line is an element - * - * @throws IOException - */ - public static Set loadSet(String file) throws IOException { - - Set res = new TreeSet(); - BufferedReader reader = IOUtils.getBufferedFileReader(file); - String line; - while ((line = reader.readLine()) != null) { - res.add(line); - } - reader.close(); - return res; - } - - public static Map loadStringToStringMap(String file) throws IOException { - - Map res = new HashMap(); - BufferedReader reader = IOUtils.getBufferedFileReader(file); - String line; - int i = 0; - while ((line = reader.readLine()) != null) { - String[] tokens = line.split("\t"); - res.put(tokens[0], tokens[1]); - i++; - if (i % 1000000 == 0) - LogInfo.logs("Uploaing line %s: %s", i, line); - } - reader.close(); - return res; - } - - public static Map loadIntToDoubleMap(String file) throws IOException { - - Map res = new HashMap(); - BufferedReader reader = IOUtils.getBufferedFileReader(file); - String line; - while ((line = reader.readLine()) != null) { - String[] tokens = line.split("\t"); - res.put(Integer.parseInt(tokens[0]), Double.parseDouble(tokens[1])); - } - reader.close(); - return res; - } - - public static Map loadStringToStringMap(String file, int keyColumn, int valueColumn) throws IOException { - - Map res = new HashMap(); - BufferedReader reader = IOUtils.getBufferedFileReader(file); - String line; - int i = 0; - while ((line = reader.readLine()) != null) { - String[] tokens = line.split("\t"); - res.put(tokens[keyColumn], tokens[valueColumn]); - i++; - if (i % 1000000 == 0) - LogInfo.log("Number of lines uploaded: " + i); - } - reader.close(); - return res; - } - - public static BiMap loadStringToStringBiMap(String file, int from, int to) throws IOException { - - BiMap res = HashBiMap.create(); - BufferedReader reader = IOUtils.getBufferedFileReader(file); - String line; - while ((line = reader.readLine()) != null) { - String[] tokens = line.split("\t"); - if (res.containsKey(tokens[from])) - throw new RuntimeException("Map already contains key: " + tokens[from]); - if (res.inverse().containsKey(tokens[to])) - throw new RuntimeException("Map already contains value: " + tokens[to]); - res.put(tokens[from], tokens[to]); - } - reader.close(); - return res; - } - - public static Set loadSetFromTabDelimitedFile(String file, int column) throws IOException { - - Set res = new HashSet(); - BufferedReader reader = IOUtils.getBufferedFileReader(file); - String line; - int i = 0; - while ((line = reader.readLine()) != null) { - String[] tokens = line.split("\t"); - res.add(tokens[column]); - i++; - if (i % 1000000 == 0) { - LogInfo.log("Number of lines: " + i); - } - } - - reader.readLine(); - return res; - - } - - public static BiMap loadString2IntegerBiMap(String file, String delimiter) throws IOException { - - BiMap res = HashBiMap.create(); - BufferedReader reader = IOUtils.getBufferedFileReader(file); - String line; - while ((line = reader.readLine()) != null) { - - String[] tokens = line.split(delimiter); - res.put(tokens[0], Integer.parseInt(tokens[1])); - - } - reader.close(); - return res; - } - - public static BiMap loadIntegerToIntegerBiMap(String file) throws IOException { - - BiMap res = HashBiMap.create(); - BufferedReader reader = IOUtils.getBufferedFileReader(file); - String line; - while ((line = reader.readLine()) != null) { - - String[] tokens = line.split("\t"); - res.put(Integer.parseInt(tokens[0]), Integer.parseInt(tokens[1])); - - } - reader.close(); - return res; - } - - public static Map loadString2IntegerMap(String file) throws IOException { - - Map res = new HashMap(); - BufferedReader reader = IOUtils.getBufferedFileReader(file); - String line; - while ((line = reader.readLine()) != null) { - - String[] tokens = line.split("\t"); - res.put(tokens[0], Integer.parseInt(tokens[1])); - - } - reader.close(); - return res; - } - - public static BiMap loadString2IntegerBiMap(String file) throws IOException { - return loadString2IntegerBiMap(file, "\t"); - } - - public static Counter loadStringCounter(String filename) { - - Counter res = new ClassicCounter(); - for (String line : ObjectBank.getLineIterator(filename)) { - - String[] tokens = line.split("\t"); - res.incrementCount(tokens[0], Double.parseDouble(tokens[1])); - - } - return res; - } - - public static void ridDuplicates(String inFile, String outFile) throws IOException { - - Set inSet = loadSet(inFile); - PrintWriter writer = IOUtils.getPrintWriter(outFile); - for (String str : inSet) { - writer.println(str); - } - writer.close(); - } - - public static String omitPunct(String str) { - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < str.length(); ++i) { - if (!StringUtils.isPunct((new Character(str.charAt(i))).toString())) { - sb.append(str.charAt(i)); - } - } - return sb.toString(); - } - - //input: tab separated file with |utterance| |gold| |predicted| - //output: prediction file for codalab - public static void generatePredictionFile(String inFile, String outFile) throws IOException { - - PrintWriter writer = IOUtils.getPrintWriter(outFile); - for (String line : IOUtils.readLines(inFile)) { - String[] tokens = line.split("\\t"); - if (tokens.length == 0) - continue; - if (tokens.length < 2) - throw new RuntimeException("Illegal line: " + line); - String utterance = tokens[0]; - // get gold - List goldDescriptions = new ArrayList<>(); - LispTree goldTree = LispTree.proto.parseFromString(tokens[1]); - for (int i = 1; i < goldTree.children.size(); ++i) { - DescriptionValue dValue = (DescriptionValue) DescriptionValue.fromString(goldTree.child(i).toString()); - goldDescriptions.add(dValue.value); - } - // get predicted - List predictedDescriptions = new ArrayList<>(); - if (tokens.length > 2) { - LispTree predictedTree = LispTree.proto.parseFromString(tokens[2]); - for (int i = 1; i < predictedTree.children.size(); ++i) { - if (predictedTree.child(i).child(0).value.equals("name")) { - NameValue nValue = (NameValue) NameValue.fromString(predictedTree.child(i).toString()); - predictedDescriptions.add(nValue.description); - } else if (predictedTree.child(i).child(0).value.equals("date")) { - DateValue dateValue = (DateValue) DateValue.fromString(predictedTree.child(i).toString()); - predictedDescriptions.add(dateValue.toString()); - } else - throw new RuntimeException("Can not support this value: " + line); - } - } - writer.println(Joiner.on('\t').join(utterance, - Json.writeValueAsStringHard(goldDescriptions), - Json.writeValueAsStringHard(predictedDescriptions))); - } - writer.close(); - } - - public static void main(String[] args) { - try { - generatePredictionFile(args[0], args[1]); - } catch (IOException e) { - e.printStackTrace(); - throw new RuntimeException(e); - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/FormatConverter.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/FormatConverter.java deleted file mode 100644 index 908bdff903..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/FormatConverter.java +++ /dev/null @@ -1,82 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.utils; - -public final class FormatConverter { - private FormatConverter() { } - - public static final String FULL_FB_PREFIX = "http://rdf.freebase.com/ns/"; - public static final String SHORT_FB_PREFIX = "fb:"; - - public static String fromDotToSlash(String dotString) { - if (dotString.startsWith("http")) { - return dotString.substring(dotString.lastIndexOf('/')).replace('.', '/'); - } else { - if (dotString.startsWith("/")) - return dotString; - return "/" + dotString.substring(dotString.indexOf(':') + 1).replace('.', '/'); - } - } - - public static String toShortPrefix(String str) { - return str.replace(FULL_FB_PREFIX, SHORT_FB_PREFIX); - } - - public static String fromNoPrefixMidToDot(String mid) { - - if (mid.startsWith("fb:m") || mid.startsWith("/m/")) - throw new RuntimeException("This mid has a prefix: " + mid); - return SHORT_FB_PREFIX + "m." + mid; - - } - - /** converts from slash notation to dot notation */ - public static String fromSlashToDot(String slashString, boolean strict) { - - if (!(slashString.charAt(0) == '/')) { - if (strict) { - throw new IllegalArgumentException("Not a legal slash string: " + slashString); - } else - return slashString; - } - - return SHORT_FB_PREFIX + slashString.substring(1).replace('/', '.'); - } - - /** - * convert from [/award/award_winning_work/awards_won, - * /award/award_honor/award_winner] to (lambda x - * (fb:award.award_winning_work.awards_won (fb:award.award_honor.award_winner - * (var x)))) - */ - public static String fromCvtBinaryToLispTree(String str) { - - boolean reversed = false; - if (str.startsWith("!")) { - reversed = true; - str = str.substring(1); - } - // strip brackets - str = str.substring(1, str.length() - 1); - String[] tokens = str.split(","); - if (tokens.length == 1) { - return reversed ? "!" + fromSlashToDot(tokens[0].trim(), false) : fromSlashToDot(tokens[0].trim(), false); - } else { - String property1 = fromSlashToDot(tokens[0].trim(), false); - String property2 = fromSlashToDot(tokens[1].trim(), false); - return propertiesToCompositeLispTree(property1, property2, reversed); - } - } - - public static String propertiesToCompositeLispTree(String property1, - String property2, boolean reversed) { - - StringBuilder sb = new StringBuilder(); - if (reversed) { - sb.append("(lambda x (!" + property2 + " (!" + property1 + " (var x))))"); - } else { - sb.append("(lambda x (" + property1 + " (" + property2 + " (var x))))"); - } - return sb.toString(); - } - - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/FreebaseUtils.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/FreebaseUtils.java deleted file mode 100644 index 1563538ae0..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/FreebaseUtils.java +++ /dev/null @@ -1,180 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.utils; - -import com.google.common.collect.BiMap; -import com.google.common.collect.HashBiMap; -import edu.stanford.nlp.io.IOUtils; -import fig.basic.LogInfo; - -import java.io.BufferedReader; -import java.io.IOException; -import java.util.HashMap; -import java.util.Map; -import java.util.regex.Pattern; - - -/** - * Utils for handling freebase data dump files - * - * @author jonathanberant - */ -public final class FreebaseUtils { - private FreebaseUtils() { } - - public static final Pattern DELIMITER_PATTERN = Pattern.compile("\t"); - public static final String NAME_PROPERTY = "/type/object/name"; - public static final String ALIAS_PROPERTY = "/common/topic/alias"; - public static final String TYPE_PROPERTY = "/type/object/type"; - public static final String PROFESSION_PROPERTY = "/people/person/profession"; - public static final String MID_PREFIX = "/m/"; - public static final String COMMON_DOMAIN_PREFIX = "/common/"; - public static final String USER_DOMAIN_PREFIX = "/user/"; - public static final String BASE_DOMAIN_PREFIX = "/base/"; - public static final String FREEBASE_DOMAIN_PREFIX = "/freebase/"; - public static final String DATA_DOMAIN_PREFIX = "/dataworld/"; - public static final String TYPE_DOMAIN_PREFIX = "/type/"; - // indices in data dump file - public static final int MID_INDEX = 0; - public static final int PROPERTY_INDEX = 1; - public static final int LANGUAGE_INDEX = 2; - public static final int VALUE_INDEX = 2; - public static final int DATE_INDEX = 3; - public static final int NAME_INDEX = 3; - - /** Checks if a string is a valid MID */ - public static boolean isMid(String str) { - return str.startsWith("/m/"); - } - - public static boolean isValidPropertyLine(String line) { - - String[] tokens = DELIMITER_PATTERN.split(line); - - return tokens.length == 3 && - isMid(tokens[MID_INDEX]) && - isMid(tokens[VALUE_INDEX]) && - !isMid(tokens[PROPERTY_INDEX]) && - isValidTypePrefix(tokens[PROPERTY_INDEX]); - } - - public static boolean isValidPropertyLineWithDate(String line) { - - String[] tokens = DELIMITER_PATTERN.split(line); - - boolean regularProperty = tokens.length == 3 && - isMid(tokens[MID_INDEX]) && - !isMid(tokens[PROPERTY_INDEX]) && - isValidTypePrefix(tokens[PROPERTY_INDEX]); - - boolean dateProperty = - tokens.length == 4 && - isMid(tokens[MID_INDEX]) && - isValidTypePrefix(tokens[PROPERTY_INDEX]) && - tokens[2].equals("") && - isDate(tokens[3]); - - return regularProperty || dateProperty; - } - - public static boolean isDate(String dateCandidate) { - - boolean res = true; - if (dateCandidate.startsWith("-")) { - dateCandidate = dateCandidate.substring(1); - } - int i = 0; - for (; i < Math.min(4, dateCandidate.length()); ++i) { - if (!Character.isDigit(dateCandidate.charAt(i))) { - res = false; - break; - } - } - if (i != 4) - res = false; - if (dateCandidate.length() > 4 && dateCandidate.charAt(4) != '-') - res = false; - return res; - } - - public static String extractDate(String dateCandidate) { - - boolean neg = false; - if (dateCandidate.startsWith("-")) { - neg = true; - dateCandidate = dateCandidate.substring(1); - } - return neg ? "-" + dateCandidate.substring(0, 4) : dateCandidate.substring(0, 4); - } - - public static boolean isValidTypePrefix(String type) { - if (type.equals("/type/datetime")) - return true; - return !(type.startsWith(BASE_DOMAIN_PREFIX) || type.startsWith(MID_PREFIX) || type.startsWith(COMMON_DOMAIN_PREFIX) || - type.startsWith(USER_DOMAIN_PREFIX) || - type.startsWith(DATA_DOMAIN_PREFIX) || - type.startsWith(FREEBASE_DOMAIN_PREFIX) || - type.startsWith(TYPE_DOMAIN_PREFIX) || - type.startsWith("/guid/")); - } - - public static String getNoPrefixMid(String line) { - return DELIMITER_PATTERN.split(line)[MID_INDEX].substring(MID_PREFIX.length()); - } - - public static String getProperty(String line) { - return DELIMITER_PATTERN.split(line)[PROPERTY_INDEX]; - } - - public static String getValue(String line) { - return DELIMITER_PATTERN.split(line)[VALUE_INDEX]; - } - - public static String getDateValue(String line) { - return DELIMITER_PATTERN.split(line)[DATE_INDEX]; - } - - public static String getName(String line) { - return DELIMITER_PATTERN.split(line)[NAME_INDEX]; - } - - public static boolean isArg1Equal2Mid(String mid, String tupleTokens) { - return DELIMITER_PATTERN.split(tupleTokens)[MID_INDEX].equals(mid); - } - - public static Map loadMid2NameMap(String filename) throws IOException { - - LogInfo.log("Loading mid to name file..."); - - Map res = new HashMap(); - BufferedReader reader = IOUtils.getBufferedFileReader(filename); - String line; - while ((line = reader.readLine()) != null) { - String[] tokens = line.split("\t"); - res.put(tokens[0], tokens[1]); - } - LogInfo.log("Loaded " + res.keySet().size() + " MIDs"); - return res; - } - - public static BiMap loadProperties(String propertyFileName) throws IOException { - - BiMap res = HashBiMap.create(); - BufferedReader reader = IOUtils.getBufferedFileReader(propertyFileName); - - String line; - short id = 1; - while ((line = reader.readLine()) != null) { - res.put(id++, line); - } - return res; - } - - public static boolean isUnary(String property) { - property = FormatConverter.fromDotToSlash(property); - return (property.equals(TYPE_PROPERTY) || property.equals(PROFESSION_PROPERTY)); - } - - public static boolean isNameProperty(String property) { - property = FormatConverter.fromDotToSlash(property); - return (property.equals(ALIAS_PROPERTY) || property.equals(NAME_PROPERTY)); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/LinkedExtractionFileUtils.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/LinkedExtractionFileUtils.java deleted file mode 100644 index fcca55764d..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/LinkedExtractionFileUtils.java +++ /dev/null @@ -1,110 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.utils; - -import edu.stanford.nlp.io.IOUtils; -import fig.basic.LogInfo; - -import java.io.BufferedReader; -import java.io.IOException; -import java.util.*; -import java.util.regex.Pattern; - -public final class LinkedExtractionFileUtils { - - private String extractionFile; - private static final int ARG1_INDEX = 0; - private static final int PREDICATE_INDEX = 1; - private static final int ARG2_INDEX = 2; - private static final int MID_INDEX = 3; - public static final Pattern DELIMITER_PATTERN = Pattern.compile("\t"); - public static final String TIME_ARG = "TIME:"; - - - public LinkedExtractionFileUtils(String extractionFileName) { - this.extractionFile = extractionFileName; - } - - public Map> getIdToExtractionsMap() throws IOException { - - LogInfo.log("Uploading id-to-extraction-set map"); - Map> res = new HashMap>(); - - BufferedReader reader = IOUtils.getBufferedFileReader(extractionFile); - String line; - while ((line = reader.readLine()) != null) { - - String[] tokens = DELIMITER_PATTERN.split(line); - - Set extractionSet = res.get(tokens[MID_INDEX]); - if (extractionSet == null) { - extractionSet = new HashSet(); - res.put(tokens[MID_INDEX], extractionSet); - } - extractionSet.add(tokens[ARG1_INDEX] + DELIMITER_PATTERN + tokens[PREDICATE_INDEX] + DELIMITER_PATTERN + DELIMITER_PATTERN + tokens[ARG2_INDEX]); - } - reader.close(); - LogInfo.log("Done uploading id-to-extraction-set map"); - return res; - } - - public Set getLinkedIdSet() throws IOException { - - LogInfo.log("Uploading linked MIDs set"); - Set res = new HashSet(); - - BufferedReader reader = IOUtils.getBufferedFileReader(extractionFile); - String line; - while ((line = reader.readLine()) != null) { - - String[] tokens = DELIMITER_PATTERN.split(line); - res.add(tokens[MID_INDEX]); - } - reader.close(); - LogInfo.log("Done uploading linked IDs set"); - return res; - } - - public Map>> getIdToArg2ToPredicateListMap() throws IOException { - - LogInfo.begin_track("Uploading id-to-arg-predicate-list-map"); - // BinaryNormalizer normalizer = new BinaryNormalizer(); - - Map>> res = new HashMap>>(); - - for (String line : IOUtils.readLines(extractionFile)) { - - String[] tokens = DELIMITER_PATTERN.split(line); - - String id = tokens[MID_INDEX]; - String arg2 = tokens[ARG2_INDEX]; - String predicate = tokens[PREDICATE_INDEX]; - // String predicate = normalizer.normalize(tokens[PREDICATE_INDEX]); - - Map> arg2ToPredicateList = res.get(id); - if (arg2ToPredicateList == null) { - arg2ToPredicateList = new HashMap>(); - arg2ToPredicateList.put(arg2, new LinkedList()); - res.put(id, arg2ToPredicateList); - } - - List predicateList = arg2ToPredicateList.get(arg2); - if (predicateList == null) { - predicateList = new LinkedList(); - arg2ToPredicateList.put(arg2, predicateList); - } - predicateList.add(predicate); - } - LogInfo.end_track(); - return res; - } - - public static boolean isTimeArg(String str) { - return str.startsWith(TIME_ARG); - } - - public static String extractTime(String str) { - if (!isTimeArg(str)) { - throw new RuntimeException("Not a time arg: " + str); - } - return str.substring(TIME_ARG.length()); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/MathUtils.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/MathUtils.java deleted file mode 100644 index e5761faa65..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/MathUtils.java +++ /dev/null @@ -1,202 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.utils; - -import edu.stanford.nlp.stats.ClassicCounter; -import edu.stanford.nlp.stats.Counter; -import edu.stanford.nlp.stats.Counters; - -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - -public final class MathUtils { - private MathUtils() { } - - public static double jaccard(double intersection, double size1, double size2, double smoothing) { - return intersection / (size1 + size2 + smoothing - intersection); - } - - public static double generalizedJensenShannonDivergence(Counter c1, Counter c2) { - - double sum = 0.0; - Set nonZeroEntries = new HashSet(); - for (E entry : nonZeroEntries) { - double u = c1.getCount(entry); - double v = c2.getCount(entry); - sum += coordinateJsDivergence(u, v); - } - return sum / 2.0; - } - - private static double coordinateJsDivergence(double u, double v) { - return u * Math.log(2 * u / (u + v)) + v * Math.log(2 * v / (u + v)); - } - - public static double coordinateJsDiverDeriv(double x, double y) { - if (x == 0.0) - return 0.0; - if (y == 0.0) - return Math.log(2) / 2.0; - - double xPlusY = x + y; - double res = -1 * Math.log(xPlusY) - (y / xPlusY) + x * (1 / x - 1 / xPlusY) + Math.log(x) + Math.log(2); - return res / 2.0; - } - - public static Counter prefixCounterKeys(Counter counter, String prefix) { - Counter res = new ClassicCounter(); - for (String key : counter.keySet()) { - res.setCount(prefix + "_" + key, counter.getCount(key)); - } - return res; - } - - public static double vectorCosine(List array1, List array2) { - - if (array1.size() != array2.size()) - throw new RuntimeException("Cannot compute cosine of arrays of differnt sizes: " + array1.size() + " " + array2.size()); - double dotProd = 0.0; - double lsq1 = 0.0; - double lsq2 = 0.0; - - for (int i = 0; i < array1.size(); ++i) { - dotProd += array1.get(i) * array2.get(i); - lsq1 += array1.get(i) * array1.get(i); - lsq2 += array2.get(i) * array2.get(i); - } - return dotProd / (Math.sqrt(lsq1) * Math.sqrt(lsq2)); - } - - public static double euclidDistance(List array1, List array2) { - - if (array1.size() != array2.size()) - throw new RuntimeException("Cannot compute cosine of arrays of differnt sizes: " + array1.size() + " " + array2.size()); - - double sqDistance = 0.0; - for (int i = 0; i < array1.size(); ++i) { - sqDistance += Math.pow(array1.get(i) - array2.get(i), 2); - } - return Math.sqrt(sqDistance); - } - - public static double sumDoubleMap(Map map) { - double sum = 0.0; - for (DoubleContainer d : map.values()) - sum += d.value(); - return sum; - } - - public static void normalizeDoubleMap(Map map) { - double sum = 0.0; - for (DoubleContainer d : map.values()) - sum += d.value(); - for (T key : map.keySet()) { - double normalizedValue = map.get(key).value() / sum; - map.get(key).set(normalizedValue); - } - } - - /** - * Computes jaccard between sets of objects - * @param x - * @param y - * @return - */ - public static double jaccard(Set x, Set y) { - - Set intersection = new HashSet(x); - intersection.retainAll(y); - Set union = new HashSet(x); - union.addAll(y); - - double res = union.size() == 0 ? 1.0 : (double) intersection.size() / union.size(); - return res; - } - - /** - * Computes jaccard between sets of objects - * @param x - * @param y - * @return - */ - public static double jaccard(List x, List y) { - - Set intersection = new HashSet(x); - intersection.retainAll(y); - Set union = new HashSet(x); - union.addAll(y); - - double res = union.size() == 0 ? 1.0 : (double) intersection.size() / union.size(); - return res; - } - - /** - * how many of the tokens in x are covered by y - * @param x - * @param y - * @return - */ - public static double coverage(List x, List y) { - Set yTokens = new HashSet(y); - int covered = 0; - for (T xItem : x) { - if (yTokens.contains(xItem)) - covered++; - } - return (double) covered / x.size(); - } - - /** - * Geometric average of unigram bigram and trigram precision - * @param test - * @param ref - * @return - */ - - public static double bleu(List test, List ref) { - - Set refUnigrams = new HashSet(); - Set refBigrams = new HashSet(); - Set refTrigrams = new HashSet(); - for (int i = 0; i < ref.size(); ++i) { - refUnigrams.add(ref.get(i)); - if (i < ref.size() - 1) - refBigrams.add(ref.get(i) + " " + ref.get(i + 1)); - if (i < ref.size() - 2) - refTrigrams.add(ref.get(i) + " " + ref.get(i + 1) + " " + ref.get(i + 2)); - } - int unigramCov = 0; - int bigramCov = 0; - int trigramCov = 0; - for (int i = 0; i < test.size(); ++i) { - if (refUnigrams.contains(test.get(i))) - unigramCov++; - if (i < test.size() - 1) { - String bigram = test.get(i) + " " + test.get(i + 1); - if (refBigrams.contains(bigram)) - bigramCov++; - } - if (i < test.size() - 2) { - String trigram = test.get(i) + " " + test.get(i + 1) + " " + test.get(i + 2); - if (refTrigrams.contains(trigram)) - trigramCov++; - } - } - double unigramPrec = (double) unigramCov / test.size(); - double bigramPrec = (double) bigramCov / (test.size() - 1); - double trigramPrec = (double) trigramCov / (test.size() - 2); - double exponent = (double) 1 / 3; - return Math.pow(unigramPrec * bigramPrec * trigramPrec, exponent); - } - - public static double tokensCosine(List x, List y) { - - Counter xCounter = new ClassicCounter(); - for (String str : x) - xCounter.incrementCount(str); - Counter yCounter = new ClassicCounter(); - for (String str : y) - yCounter.incrementCount(str); - return Counters.cosine(xCounter, yCounter); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/SemparseLogTools.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/SemparseLogTools.java deleted file mode 100644 index 01fd1be741..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/SemparseLogTools.java +++ /dev/null @@ -1,261 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.utils; - -import edu.stanford.nlp.io.IOUtils; -import fig.basic.LispTree; -import fig.basic.LogInfo; -import fig.basic.MapUtils; - -import java.io.IOException; -import java.io.PrintWriter; -import java.util.*; - -public final class SemparseLogTools { - private SemparseLogTools() { } - - public static void main(String[] args) throws IOException { - - LogInfo.begin_track_printAll("analyze"); - if (args[0].equals("compare")) { - compareLogs(args[1], args[2], args[3]); - } else if (args[0].equals("diff")) { - printDiff(args[1], args[2]); - } else if (args[0].equals("fb_descriptions")) { - getFbDescInTrueDerivations(args[1], args[2]); - } - if (args[0].equals("result_list")) { - printResultList(args[1], args[2]); - } - LogInfo.end_track(); - } - - private static void printResultList(String log, String field) { - - boolean start = false; - int numOfIterations = -1; - String prob = null; - String correct = null; - String oracle = null; - - - for (String line : IOUtils.readLines(log)) { - if (line.contains("Iteration")) { - int slashIndex = line.indexOf('/'); - int openCurlyIndex = line.indexOf('{'); - numOfIterations = Integer.parseInt(line.substring(slashIndex + 1, openCurlyIndex - 1)); - } - if (line.contains("Processing iter=" + numOfIterations + ".dev")) { - start = true; - } - if (start) { - if (line.contains("Pred@0000")) { - prob = line.substring(line.indexOf("prob=") + 5, line.indexOf(", comp=")); - } - if (line.contains("Example:")) - prob = "0"; - if (line.contains("Current: correct=")) { - int correctIndex = line.indexOf("correct="); - int oracleIndex = line.indexOf("oracle="); - int partCorrectIndex = line.indexOf("partCorrect="); - int parsedIndex = line.indexOf("parsed="); - int numTokensIndex = line.indexOf("numTokens="); - - - if (field.equals("oracle")) - System.out.println(line.substring(oracleIndex + 7, partCorrectIndex - 1)); - if (field.equals("correct")) - System.out.println(line.substring(correctIndex + 8, oracleIndex - 1) + "\t" + prob); - if (field.equals("parsed")) - System.out.println(line.substring(parsedIndex + 7, numTokensIndex - 1)); - } - } - } - } - - private static void getFbDescInTrueDerivations(String log, String out) throws IOException { - - boolean start = false; - int numOfIterations = -1; - Map> exampleToDescriptions = new HashMap>(); - - - String currExample = null; - - for (String line : IOUtils.readLines(log)) { - if (line.contains("Iteration")) { - int slashIndex = line.indexOf('/'); - int openCurlyIndex = line.indexOf('{'); - numOfIterations = Integer.parseInt(line.substring(slashIndex + 1, openCurlyIndex - 1)); - } - if (line.contains("Processing iter=" + numOfIterations + ".")) { - start = true; - } - if (start) { - if (line.contains("Example:")) { - int end = line.indexOf("{") - 1; - currExample = line.substring(line.indexOf("Example:") + 9, end); - exampleToDescriptions.put(currExample, new HashSet()); - } - if (line.contains("True@")) { - - String formula = line.substring(line.indexOf("(formula"), line.indexOf("(value") - 1); - LispTree t = LispTree.proto.parseFromString(formula); - Set descriptions = new HashSet(); - extractDescriptionsFromTree(t, descriptions); - for (String description : descriptions) - MapUtils.addToSet(exampleToDescriptions, currExample, description); - } - } - } - PrintWriter writer = IOUtils.getPrintWriter(out); - for (String example : exampleToDescriptions.keySet()) { - if (exampleToDescriptions.get(example).size() > 0) - writer.println(example + "\t" + exampleToDescriptions.get(example)); - } - writer.close(); - } - - private static void extractDescriptionsFromTree(LispTree t, Set descriptions) { - - if (t.value != null) { - if (t.value.indexOf('.') != t.value.lastIndexOf('.')) { - descriptions.add(t.value.substring(t.value.lastIndexOf('.') + 1)); - } - } - if (!t.isLeaf()) { - for (LispTree child : t.children) { - extractDescriptionsFromTree(child, descriptions); - } - } - } - - private static void printDiff(String log, String field) throws IOException { - - boolean start = false; - int numOfIterations = -1; - - String example = null; - String targetFormula = null; - String targetValue = null; - String trueDeriv = null; - String predDeriv = null; - - boolean print = false; - - for (String line : IOUtils.readLines(log)) { - if (line.contains("Iteration")) { - int slashIndex = line.indexOf('/'); - int openCurlyIndex = line.indexOf('{'); - numOfIterations = Integer.parseInt(line.substring(slashIndex + 1, openCurlyIndex - 1)); - } - if (line.contains("Processing iter=" + numOfIterations + ".dev")) { - start = true; - } - if (start) { - if (line.contains("Example:")) { - if (print && example != null) { - LogInfo.log(example); - LogInfo.log(targetFormula); - LogInfo.log(targetValue); - LogInfo.log(trueDeriv); - LogInfo.log(predDeriv); - } - example = line; - targetFormula = null; targetValue = null; trueDeriv = null; - predDeriv = null; - } - if (line.contains("targetFormula:")) { - targetFormula = line; - } - if (line.contains("targetValue:")) { - targetValue = line; - } - if (line.contains("True@") && trueDeriv == null) { - trueDeriv = line; - } - if (line.contains("Pred@") && predDeriv == null) { - predDeriv = line; - } - if (line.contains("Current:")) { - if (field.equals("correct")) { - if (line.contains("correct=0") && line.contains("oracle=1")) { - print = true; - } else - print = false; - } - if (field.equals("oracle")) { - if (line.contains("oracle=0") && line.contains("parsed=1")) - print = true; - else - print = false; - } - if (field.equals("parsed")) { - if (line.contains("parsed=0")) - print = true; - else - print = false; - } - } - } - } - } - - public static void compareLogs(String log1, String log2, String field) { - List correctnessList1 = computeCorrectnessList(log1, field); - List correctnessList2 = computeCorrectnessList(log2, field); - - if (correctnessList1.size() != correctnessList2.size()) - throw new RuntimeException("lists are not same size"); - LogInfo.logs("Size of correctness: %s", correctnessList1.size()); - - for (int i = 0; i < correctnessList1.size(); ++i) { - if (!correctnessList1.get(i).equals(correctnessList2.get(i))) { - LogInfo.log("example: " + i + " log1: " + correctnessList1.get(i) + " log2: " + correctnessList2.get(i)); - } - } - } - - private static List computeCorrectnessList(String log1, String field) { - - List res = new LinkedList<>(); - boolean start = false; - int numOfIterations = -1; - for (String line : IOUtils.readLines(log1)) { - if (line.contains("Iteration")) { - int slashIndex = line.indexOf('/'); - int openCurlyIndex = line.indexOf('{'); - numOfIterations = Integer.parseInt(line.substring(slashIndex + 1, openCurlyIndex - 1)); - LogInfo.logs("Number of iterations=%s", numOfIterations); - } - if (line.contains("Processing iter=" + numOfIterations + ".dev")) { - start = true; - } - if (start) { - if (line.contains("Current: parsed=")) { - String[] tokens = line.split("\\s+"); - for (String token : tokens) { - String[] tokenParts = token.split("="); - if (field.equals(tokenParts[0])) - res.add(Double.parseDouble(tokenParts[1])); - } -// LogInfo.logs() -// int correctIndex = line.indexOf(" correct="); -// int oracleIndex = line.indexOf(" oracle="); -// int partCorrectIndex = line.indexOf(" partCorrect="); -// int partOracleIndex= line.indexOf(" partOracle="); -// int afterPartOracleIndex = line.indexOf("\\s+",partOracleIndex); -// LogInfo.logs("part oracle index=%s, after=%s",partCorrectIndex,afterPartOracleIndex); -// if (field.equals("oracle")) -// res.add(Double.parseDouble(line.substring(oracleIndex + 8, partCorrectIndex))); -// if (field.equals("correct")) -// res.add(Double.parseDouble(line.substring(correctIndex + 9, oracleIndex))); -// if (field.equals("partCorrect")) -// res.add(Double.parseDouble(line.substring(partCorrectIndex + 13, partOracleIndex))); -// if (field.equals("partOracle")) -// res.add(Double.parseDouble(line.substring(partOracleIndex + 12, afterPartOracleIndex))); - } - } - } - return res; - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/ShortContainer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/ShortContainer.java deleted file mode 100644 index c7674c94e1..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/ShortContainer.java +++ /dev/null @@ -1,28 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.utils; - -import java.io.Serializable; - -public class ShortContainer implements Serializable { - - /** - * - */ - private static final long serialVersionUID = -911790554283478225L; - - private short count; - - public ShortContainer(short count) { this.count = count; } - - public void inc() { count++; } - - public void dec() { count--; } - - public void inc(short n) { count += n; } - - public void dec(short n) { count -= n; } - - public short value() { return count; } - - public String toString() { return new Short(count).toString(); } -} - diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/WnExpander.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/WnExpander.java deleted file mode 100644 index c4de6e6936..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/WnExpander.java +++ /dev/null @@ -1,147 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.utils; - -import fig.basic.LogInfo; -import fig.basic.Option; - -import edu.stanford.nlp.sempre.freebase.utils.WordNet.*; - -import java.io.File; -import java.io.IOException; -import java.util.*; - - -public class WnExpander { - - public static class Options { - @Option(gloss = "Verbose") public int verbose = 0; - @Option(gloss = "Path to Wordnet file") - public String wnFile = "lib/wordnet-3.0-prolog"; - @Option(gloss = "Relations to expand with wordnet") - public Set wnRelations = new HashSet<>(); - } - - public static Options opts = new Options(); - - private WordNet wn; - private Set edgeTypes = new HashSet<>(); - - /** - * Initializing wordnet and the relations to expand with - * - * @throws IOException - */ - public WnExpander() throws IOException { - wn = WordNet.loadPrologWordNet(new File(opts.wnFile)); - for (String wnRelation : opts.wnRelations) { - switch (wnRelation) { - case "derives": - edgeTypes.add(EdgeType.DERIVES); - break; - case "derived_from": - edgeTypes.add(EdgeType.DERIVED_FROM); - break; - case "hyponym": - edgeTypes.add(EdgeType.HYPONYM); - break; - default: - throw new RuntimeException("Invalid relation: " + wnRelation); - } - } - } - - public Set expandPhrase(String phrase) { - - // find synsetse for phrase - Set phraseSynsets = phraseToSynsets(phrase); - // expand synsets - for (EdgeType edgeType : edgeTypes) - phraseSynsets.addAll(expandSynsets(phraseSynsets, edgeType)); - // find phrases for synsets - Set expansions = synsetsToPhrases(phraseSynsets); - if (opts.verbose > 0) { - for (String expansion : expansions) - LogInfo.logs("WordNetExpansionLexicon: expanding %s to %s", phrase, expansion); - } - return expansions; - } - - public Set getSynonyms(String phrase) { - Set phraseSynsets = phraseToSynsets(phrase); - Set expansions = synsetsToPhrases(phraseSynsets); - expansions.remove(phrase); - return expansions; - } - - public Set getDerivations(String phrase) { - Set phraseSynsets = phraseToSynsets(phrase); - Set derivations = new HashSet<>(); - derivations.addAll(expandSynsets(phraseSynsets, EdgeType.DERIVED_FROM)); - derivations.addAll(expandSynsets(phraseSynsets, EdgeType.DERIVES)); - Set expansions = synsetsToPhrases(derivations); - expansions.remove(phrase); - return expansions; - } - - public Set getHypernyms(String phrase) { - Set phraseSynsets = phraseToSynsets(phrase); - Set hypernyms = new HashSet<>(); - hypernyms.addAll(expandSynsets(phraseSynsets, EdgeType.HYPONYM)); - Set expansions = synsetsToPhrases(hypernyms); - expansions.remove(phrase); - return expansions; - } - - private Set synsetsToPhrases(Set phraseSynsets) { - - Set res = new HashSet<>(); - for (WordNetID phraseSynset : phraseSynsets) { - res.addAll(synsetToPhrases(phraseSynset)); - } - return res; - } - - private Collection synsetToPhrases(WordNetID phraseSynset) { - Set res = new HashSet<>(); - List wordTags = phraseSynset.get(EdgeType.SYNSET_HAS_WORDTAG); - for (WordNetID wordTag : wordTags) { - List words = wordTag.get(EdgeType.WORDTAG_TO_WORD); - for (WordNetID word : words) { - res.add(((WordID) word).word); - } - } - return res; - } - - /** Given a phrase find all synsets containing this phrase */ - private Set phraseToSynsets(String phrase) { - - List wordTags = new LinkedList<>(); - WordID word = wn.getWordID(phrase); - if (word != null) - wordTags.addAll(word.get(EdgeType.WORD_TO_WORDTAG)); - Set synsets = new HashSet<>(); - for (WordNetID wordTag : wordTags) { - synsets.addAll(wordTag.get(EdgeType.WORDTAG_IN_SYNSET)); - } - return synsets; - } - - private List expandSynset(WordNetID synset, EdgeType edgeType) { - return synset.get(edgeType); - } - - private Set expandSynsets(Collection synsets, EdgeType edgeType) { - Set res = new HashSet<>(); - for (WordNetID synset : synsets) - res.addAll(expandSynset(synset, edgeType)); - return res; - } - - public static void main(String[] args) throws IOException { - - WnExpander wnLexicon = new WnExpander(); - wnLexicon.expandPhrase("assassinate"); - System.out.println(); - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/WordNet.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/WordNet.java deleted file mode 100644 index f5d1daf488..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/freebase/utils/WordNet.java +++ /dev/null @@ -1,982 +0,0 @@ -package edu.stanford.nlp.sempre.freebase.utils; - -/* - * Distributed as part of WordWalk, a Java package for lexical - * semantic relatedness using random graph walks. - * - * Copyright (C) 2008 Daniel Ramage - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110 USA - */ - -import java.io.File; -import java.lang.ref.WeakReference; -import java.util.*; - -import edu.stanford.nlp.io.RuntimeIOException; -import edu.stanford.nlp.objectbank.ObjectBank; - - -/** - *

Fast, lightweight, constant time library for accessing WordNet. Use - * one of the public load methods to create an instance by reading the WordNet - * data files from disk. WordNet is represented with a set of WordID, - * WordTagID, and SynsetID objects, each of which is unique within the WordNet - * instance (so you can use == for equality checks). There are many types - * of edges between these nodes (mostly SynsetID to SynsetID), where - * each edge type is defined in the EdgeType enum.

- * - *

This class would be Serializable except that the default Java - * serialization mechanism can't handle the depth of recursion between - * the inner WordTagID objects -- hence an instance can be created only - * by loading the requisite file from disk.

- * - * @author dramage - * @author Chris Manning made minimal changes to make it JavaNLP land not Ramage land - */ -public final class WordNet { - - /** Global counter for assigning unique index to each loaded id. */ - private final ArrayList all = new ArrayList(500000); - - /** Global immutable view of all */ - private final List immutableAll = Collections.unmodifiableList(all); - - /** Set of loaded edges types */ - private final Set loadedEdges = new HashSet(); - - /** Indexes mapping canonical strings to their typed versions */ - private final Map synsets = new HashMap(); - private final Map words = new HashMap(); - private final Map wordtags = new HashMap(); - - /** File resource backing this WordNet instance */ - private final File path; - - /** - * Private constructor - use one of the static constructor methods. - * - * @param path Path on disk to the underlying wordnet resource. - */ - private WordNet(File path) { - this.path = path; - } - - /** - * All types of edges in wordnet. Some are transposes of each other, - * as specified by parallel objects in edgeTransposePairs, - * edgeTransposeTarget. - */ - public enum EdgeType { - // structural relationships between words, wordtags, and synsets. etc - WORD_TO_WORDTAG, WORDTAG_TO_WORD, // word to all wordtag in play - WORDTAG_IN_SYNSET, SYNSET_HAS_WORDTAG, // wordtag to all synsets - SYNSET_WORDTAGS_OVERLAP, // two synsets share wordtag - - // invertible synset relations - HYPONYM, HYPERNYM, // from hyp; nouns and verbs - INSTANCE_OF, HAS_INSTANCE, // from ins - ENTAILS, ENTAILED_BY, // from ent; verbs only - SIM_HEAD, SIM_SATELLITE, // from sim; adjectives only - MM_HOLONYM, MM_MERONYM, // from mm; member meronyms - MS_HOLONYM, MS_MERONYM, // from ms; substance meronym - MP_HOLONYM, MP_MERONYM, // from mp; part meronym - CAUSED_BY, CAUSES, // from cs; for verbs - DERIVES, DERIVED_FROM, // from der; for noun to adj - - // self reflexive synset relations - ATTRIBUTE, // from at - - // invertible word relations - PARTICIPLE_OF, HAS_PARTICIPLE, // from ppl - PERTAINS_TO, PERTANYM_OF, // from per - SEE_ALSO_TO, SEE_ALSO_FROM, // from sa - - // self reflexive word relations - ANTONYM, // from ant - SIMILAR_VERBS, // from vgp - - // weird relations (words or synsets) from cls relation - TERM_HAS_TOPIC, TOPIC_FROM_TERM, - TERM_HAS_USAGE, USAGE_FROM_TERM, - TERM_IN_REGION, REGION_HAS_TERM, - } - - /** Set of relations that are transposes of eachother. */ - private static final EdgeType[][] transpose = { - {EdgeType.HYPONYM, EdgeType.HYPERNYM}, - {EdgeType.INSTANCE_OF, EdgeType.HAS_INSTANCE}, - {EdgeType.ENTAILS, EdgeType.ENTAILED_BY}, - {EdgeType.SIM_HEAD, EdgeType.SIM_SATELLITE}, - {EdgeType.MM_HOLONYM, EdgeType.MM_MERONYM}, - {EdgeType.MS_HOLONYM, EdgeType.MS_MERONYM}, - {EdgeType.MP_HOLONYM, EdgeType.MP_MERONYM}, - {EdgeType.CAUSED_BY, EdgeType.CAUSES}, - {EdgeType.DERIVES, EdgeType.DERIVED_FROM}, - {EdgeType.PARTICIPLE_OF, EdgeType.HAS_PARTICIPLE}, - {EdgeType.PERTAINS_TO, EdgeType.PERTANYM_OF}, - {EdgeType.SEE_ALSO_TO, EdgeType.SEE_ALSO_FROM}, - {EdgeType.TERM_HAS_TOPIC, EdgeType.TOPIC_FROM_TERM}, - {EdgeType.TERM_HAS_USAGE, EdgeType.USAGE_FROM_TERM}, - {EdgeType.TERM_IN_REGION, EdgeType.REGION_HAS_TERM} - }; - - /** Set of relations that are self-reflexive, i.e. if a->b then b-> a */ - private static final EdgeType[] reflexive = { - EdgeType.SYNSET_WORDTAGS_OVERLAP, - EdgeType.ATTRIBUTE, - EdgeType.SIMILAR_VERBS, - EdgeType.ANTONYM, - }; - - /** - * WordNet-defined part of speech tags. Folds adjective satellite - * in with adjective. - */ - public enum PartOfSpeech { - NOUN('n'), VERB('v'), ADJECTIVE('a'), ADVERB('r'); - - /** WordNet character for encoding the part of speech. */ - public final char ssType; - - private PartOfSpeech(char ssType) { - this.ssType = ssType; - } - - public static PartOfSpeech fromWordNetSSType(char ssType) { - for (PartOfSpeech pos : PartOfSpeech.values()) { - if (pos.ssType == ssType) { - return pos; - } - } - if (ssType == 's') { - return ADJECTIVE; - } - throw new IllegalArgumentException("Unexpected ss_type: " + ssType); - } - } - - /** Base of all wordnet ids. Comparison is based on index number. */ - public abstract class WordNetID implements Comparable { - /** Globally unique index of this WordNetID */ - private int index; - - /** Outgoing links from this node */ - private Map> links - = new EnumMap>(EdgeType.class); - - /** Private no-arg constructor prevents new subclasses */ - private WordNetID() { - index = all.size(); - all.add(this); - } - - /** - * Returns the set of nodes linked from this note by the given - * edge type. - */ - public List get(EdgeType type) { - List set = links.get(type); - if (set == null) { set = Collections.emptyList(); } - return Collections.unmodifiableList(set); - } - - /** Returns the index of this WordNetID in the enclosing WordNet instance */ - public int index() { - return index; - } - - /** Adds a link with the given edge type to the object */ - protected void add(EdgeType type, WordNetID target) { - ArrayList list = links.get(type); - if (list == null) { - list = new ArrayList(); - links.put(type, list); - } - if (!list.contains(target)) { - list.add(target); - } - } - - /** Compacts and sorts each edge list. */ - protected void compact() { - for (ArrayList edge : links.values()) { - Collections.sort(edge); - edge.trimToSize(); - } - } - - /** Ordered by index in enclosing WordNet */ - public int compareTo(WordNetID other) { - return this.index < other.index ? -1 : - (this.index > other.index ? 1 : 0); - } - - /** Returns the enclosing WordNet instance */ - public WordNet getEnclosingWordNet() { - return WordNet.this; - } - - /** Ordered by index in enclosing WordNet */ - @Override - public boolean equals(Object other) { - if (other == this) return true; - if (!(other instanceof WordNetID)) return false; - WordNetID w = (WordNetID) other; - return this.index == w.index; - } - - @Override - public int hashCode() { - return index; - } - - } - - /** - * Represents a word, e.g. "dog". - * - * Instances of this class are unique for a given {@link WordNet}, - * so direct object equality and hashcode are the default behavior. - * - * @author dramage - */ - public final class WordID extends WordNetID { - - /** WordTags for this Word */ - private final EnumMap mWordTags - = new EnumMap(PartOfSpeech.class); - - /** The string of our word (with spaces converted to underscore) */ - public final String word; - - /** Immutable view of all WordTagIDs this WordID is part of. */ - public final Map wordTags - = Collections.unmodifiableMap(mWordTags); - - private WordID(String word) { - this.word = word; - } - - /** - * Gets the WordTagID for this word with the given part of speech - * or null if the part of speech does not apply to this word. - */ - public WordTagID getWordTag(PartOfSpeech tag) { - return mWordTags.get(tag); - } - - /** Adds a reference to the given WordTagID */ - private void addWordTag(WordTagID wordTagId) { - assert !mWordTags.containsKey(wordTagId.tag) - || mWordTags.get(wordTagId.tag) == wordTagId - : "Unexpected duplicate WordTagID"; - - this.mWordTags.put(wordTagId.tag, wordTagId); - } - - @Override - public String toString() { - return word; - } - } - - /** - * Represents a word with part of speech tag, e.g. "dog#n". - * - * Instances of this class are unique for a given {@link WordNet}, - * so direct object equality and hashcode are the default behavior. - * - * @author dramage - */ - public final class WordTagID extends WordNetID { - - /** Mutable view of word senses for this WordTag. */ - private final ArrayList mSynsets - = new ArrayList(1); - - /** The WordID we are an instance of */ - public final WordID word; - - /** Our part of speech tag */ - public PartOfSpeech tag; - - /** Immutable list of all synsets this WordTag takes part in. */ - public final List synsets - = Collections.unmodifiableList(mSynsets); - - public List getSynsets() { - return this.synsets; - } - private WordTagID(WordID word, PartOfSpeech tag) { - this.word = word; - this.tag = tag; - } - - /** - * Gets the n'th synset associated with this word tag. This index - * is 1-based to be consistent with wordnet's numbering scheme; i.e. - * getSynset(0) will throw an IllegalArgumentException. - * */ - public SynsetID getSynset(int num) { - num--; // convert to 0-indexed - if (num < 0) { - throw new IllegalArgumentException("SynsetIDs are 1-based"); - } - return mSynsets.get(num); - } - - /** Adds the given Synset into this WordTag with the given number */ - private void addSynset(SynsetID synsetId, int num) { - num--; // convert to 0-indexed - assert num >= 0 && (mSynsets.size() <= num || - mSynsets.get(num) == null || mSynsets.get(num) == synsetId) - : "Unexpected repeat of word sense " + synsetId + " on " + this - + ": already " + mSynsets.get(num); - - while (mSynsets.size() <= num) { - mSynsets.add(null); - } - mSynsets.set(num, synsetId); - } - - @Override - public String toString() { - return word + "#" + tag.ssType; - } - } - - /** - * Represents a WordNet synset id as an int, e.g. 301380127. - * - * Instances of this class are unique for a given {@link WordNet}, - * so direct object equality and hashcode are the default behavior. - * - * Each SynsetID has potentially many WordTagSenseID's. - * - * @author dramage - */ - public final class SynsetID extends WordNetID { - - /** Modifiable WordTagIDs associated with this SynsetID */ - private final ArrayList mWordTags - = new ArrayList(1); - - /** The synset id number */ - public final int synset; - - /** Number of times this sense was tagged in a corpus or 0 for not seen. */ - public final int count; - - /** The number of this synset in the first WordTagID (for toString) */ - private int numberInFirstWordTagID = 0; - - /** All WordTags that this Synset is a part of */ - public final List wordtags - = Collections.unmodifiableList(mWordTags); - - private SynsetID(int synset, int count) { - this.synset = synset; - this.count = count; - } - - /** Returns the n'th word sense */ - public WordTagID getWordTag(int num) { - num--; // convert to 0-indexed - if (num < 0) { - throw new IllegalArgumentException("WordTagIDs are 1-based"); - } - - return mWordTags.get(num); - } - - /** - * Adds the given WordTagSense into this synset - */ - private void addWordSense(WordTagID wordTagId, int num) { - num--; // convert to 0-indexed - assert num >= 0 && (mWordTags.size() <= num || - mWordTags.get(num) == null || mWordTags.get(num) == wordTagId) - : "Unexpected repeat of word " + wordTagId + " on " + this - + ": already " + mWordTags.get(num); - - while (mWordTags.size() <= num) { - mWordTags.add(null); - } - mWordTags.set(num, wordTagId); - } - - @Override - public String toString() { - return mWordTags.get(0) + "#" + numberInFirstWordTagID; - } - } - - /** Returns a collection of all the WordNetIDs loaded. */ - public Collection getAllWordNetIDs() { - return immutableAll; - } - - /** Returns a collection of all the words loaded. */ - public Collection getAllWords() { - return words.keySet(); - } - /** - * Returns the given WordNetID by it's index. The return value could - * be cast to one of the three possible subclasses: WordID, WordTagID, - * or SynsetID. - */ - public WordNetID getWordNetID(int index) { - return all.get(index); - } - - /** - * Returns the WordNetID referred to by the given string. Delegates - * to one of getSynsetID, getWordTagID, or getWordID depending on the - * format of the string or null if no such ID can be found. - */ - public WordNetID getWordNetID(String string) { - int firstHash = string.indexOf('#'); - if (firstHash < 0) { - WordNetID rv = getWordID(string); - return rv != null ? rv : getSynsetID(string); - } else { - int secondHash = string.lastIndexOf('#'); - if (firstHash == secondHash) { - return getWordTagID(string); - } else { - return getSynsetID(string); - } - } - } - - /** - * Returns the SynsetID instance as described by the given id string - * or null if there is no such id. The string can either be a synset id - * such as "100001740" or a word sense such as "entity#n#1". - */ - public SynsetID getSynsetID(String string) { - // check if specified as "100001740" - SynsetID id = synsets.get(string); - if (id == null) { - // check if specified as "entity#n#1" - int split = string.lastIndexOf('#'); - if (split >= 1) { - int num; - try { - num = Integer.parseInt(string.substring(split + 1)); - } catch (NumberFormatException e) { - return null; - } - WordTagID wordTagId = wordtags.get(string.substring(0, split)); - if (wordTagId != null) { - id = wordTagId.getSynset(num); - } - } - } - return id; - } - - /** - * Returns the WordID instance from a string such as "dog" - * or null if there is no such id. - */ - public WordID getWordID(String string) { - return words.get(string); - } - - /** - * Returns the WordTagID instance from a string such as "dog#n" - * or null if there is no such id. - */ - public WordTagID getWordTagID(String string) { - return wordtags.get(string); - } - - /** - * The total number of loaded nodes in the WordNet instance. - */ - public int size() { - return all.size(); - } - - /** - * Returns the path on disk of the underlying WordNet database. - */ - @Override - public String toString() { - return path.toString(); - } - - /** - * Stitches together the structural edge types, WORD_TOWORDTAG, - * SYNSET_HAS_WORDTAG, etc. - */ - private void createStructuralEdges() { - // create WORD_TO_WORDTAG and WORDTAG_TO_WORD - for (WordID word : words.values()) { - for (WordTagID wordtag : word.wordTags.values()) { - word.add(EdgeType.WORD_TO_WORDTAG, wordtag); - wordtag.add(EdgeType.WORDTAG_TO_WORD, word); - } - } - - // create WORTAG_IN_SYNSET and SYNSET_HAS_WORDTAG - for (WordTagID wordtag : wordtags.values()) { - for (SynsetID synset : wordtag.synsets) { - wordtag.add(EdgeType.WORDTAG_IN_SYNSET, synset); - synset.add(EdgeType.SYNSET_HAS_WORDTAG, wordtag); - } - } - - // create SYNSET_WORDTAGS_OVERLAP - for (WordTagID wordtag : wordtags.values()) { - for (SynsetID synsetA : wordtag.mSynsets) { - if (synsetA == null) { continue; } - for (SynsetID synsetB : wordtag.mSynsets) { - if (synsetB == null || synsetA == synsetB) { continue; } - synsetA.add(EdgeType.SYNSET_WORDTAGS_OVERLAP, synsetB); - } - } - } - - // record that we have created these edges - loadedEdges.addAll(Arrays.asList(new EdgeType[]{ - EdgeType.WORD_TO_WORDTAG, EdgeType.WORDTAG_TO_WORD, - EdgeType.WORDTAG_IN_SYNSET, EdgeType.SYNSET_HAS_WORDTAG, - EdgeType.SYNSET_WORDTAGS_OVERLAP, })); - } - - /** Compacts and finalizes ordering of all data structures */ - private void compact() { - // sort the WordNetID's by type (then by order added) - Collections.sort(all, new Comparator() { - public int compare(WordNetID o1, WordNetID o2) { - if (o1.getClass() == o2.getClass()) { - return o1.compareTo(o2); - } else if (o1.getClass() == SynsetID.class) { - return -1; - } else if (o2.getClass() == SynsetID.class) { - return 1; - } else if (o1.getClass() == WordTagID.class) { - return -1; - } else if (o2.getClass() == WordTagID.class) { - return 1; - } else { - throw new RuntimeException("Unexpected WordNetID type"); - } - } - }); - - // re-number the wordnet id's - for (int i = 0; i < all.size(); i++) { - all.get(i).index = i; - } - - for (WordNetID id : all) { - id.compact(); - } - for (SynsetID id : synsets.values()) { - id.mWordTags.trimToSize(); - } - for (WordTagID id : wordtags.values()) { - id.mSynsets.trimToSize(); - } - all.trimToSize(); - } - - /** Checks the representation invariants */ - private void checkrep() { - for (EdgeType edgetype : EdgeType.values()) { - assertIt(loadedEdges.contains(edgetype), "Failed to load " + edgetype); - } - - for (SynsetID synset : synsets.values()) { - assertIt(all.get(synset.index()) == synset, "Misplaced synset " + synset); - assertIt(synset.getWordTag(1).getSynset(synset.numberInFirstWordTagID) == synset, - "Wrong number in first WordTagID " + synset); - assertIt(!synset.wordtags.contains(null), synset + " contains null WordTagID"); - - for (WordTagID wordtag : synset.wordtags) { - assertIt(wordtag.synsets.contains(synset), "Miswired Synset " + synset); - } - } - - for (WordTagID wordtag : wordtags.values()) { - assertIt(all.get(wordtag.index()) == wordtag, "Misplaced wordtag " + wordtag); - assertIt(!wordtag.synsets.contains(null), wordtag + " contains null SynsetID"); - - for (SynsetID synset : wordtag.synsets) { - assertIt(synset.wordtags.contains(wordtag), "Miswired WordTag " + wordtag); - } - - assertIt(wordtag.word.wordTags.containsValue(wordtag), "Miswired WordTag " + wordtag); - } - - for (WordID word : words.values()) { - assertIt(all.get(word.index()) == word, "Misplaced word " + word); - for (WordTagID wordtag : word.wordTags.values()) { - assertIt(wordtag.word == word, "Miswired WordTag " + word); - } - } - - for (WordNetID id1 : all) { - for (EdgeType[] pair : transpose) { - for (WordNetID id2 : id1.get(pair[0])) { - assertIt(id2.get(pair[1]).contains(id1), - "Missing transpose " + Arrays.asList(transpose) - + " " + id1 + " " + id2); - } - } - } - - for (WordNetID id1 : all) { - for (EdgeType type : reflexive) { - for (WordNetID id2 : id1.get(type)) { - assertIt(id2.get(type).contains(id1), - "Missing reflextive " + type + " " + id1 + " " + id2); - } - } - } - } - - /** Assertion method used by checkRep */ - private static void assertIt(boolean condition, String message) { - if (!condition) { - throw new AssertionError(message); - } - } - - - // - // WordNet loader - // - - /** Weak collection of WordNet instances based on file name */ - private static final Collection> instances - = new LinkedList>(); - - /** - * Loads an instance of WordNet from the given WordNet database -- - * currently supports only Prolog DB format. - */ - public static WordNet load(File path) { - System.err.println("WordNet.load: " + path); - - // see if already loaded - for (Iterator> it = instances.iterator(); it.hasNext();) { - - WordNet wordnet = it.next().get(); - - if (wordnet == null) { - it.remove(); - } else if (wordnet.path.equals(path)) { - return wordnet; - } - } - - // not already loaded, load now - try { - WordNet wordnet = loadPrologWordNet(path); - instances.add(new WeakReference(wordnet)); - return wordnet; - } catch (RuntimeIOException e) { - throw new IllegalArgumentException( - "Provided path not a valid WordNet PrologDB directory", e); - } - } - - /** - * Returns an instance of WordNet based on the contents of the WordNet - * databases as stored in the given path prolog WordNet 3.0 format. - */ - public static WordNet loadPrologWordNet(File path) { - WordNet wordnet = new WordNet(path); - - /** Global stashing point for unassigned word senses depending on wn POS tag */ - Map deferredPositionSynsets = new HashMap(); - - // - // read the synsets file - // - { - for (String line : ObjectBank.getLineIterator(new File(path, "wn_s.pl"))) { - if (line.length() == 0) continue; - - // fields from the line - String[] fields = line.substring(2, line.length() - 2).split(","); - final int wordTagNumberInSynset = Integer.parseInt(fields[1]); - final int synsetNumberInWordTag = Integer.parseInt( - fields.length > 4 ? fields[4] : "0"); - final int senseCount = Integer.parseInt( - fields.length > 5 ? fields[5] : "0"); - final String word = new String(fields[2].substring( - 1, fields[2].length() - 1).replaceAll("\\s+", "_")) - .toLowerCase(); - final PartOfSpeech tag = PartOfSpeech.fromWordNetSSType( - fields[3].charAt(0)); - final String wordTag = word + "#" + tag.ssType; - final String synset = fields[0]; - - // add WordID - WordID wordId = wordnet.words.get(word); - if (wordId == null) { - wordId = wordnet.new WordID(word); - wordnet.words.put(word, wordId); - } - - // add WordTagID - WordTagID wordTagId = wordnet.wordtags.get(wordTag); - if (wordTagId == null) { - wordTagId = wordnet.new WordTagID(wordId, tag); - wordnet.wordtags.put(wordTag, wordTagId); - } - - // add SynsetID - SynsetID synsetId = wordnet.synsets.get(synset); - if (synsetId == null) { - synsetId = wordnet.new SynsetID(Integer.parseInt(synset), - senseCount); - wordnet.synsets.put(synset, synsetId); - } - - // link WordID to WordTagID - wordId.addWordTag(wordTagId); - - // link WordTagID to SynsetID - if (synsetNumberInWordTag == 0) { - if (deferredPositionSynsets.containsKey(wordTagId)) { - throw new RuntimeException("Error: don't know what " + - "to do when more than one synset doesn't come" + - " with a valid sense number"); - } else { - deferredPositionSynsets.put(wordTagId, synsetId); - } - } else { - wordTagId.addSynset(synsetId, synsetNumberInWordTag); - } - synsetId.addWordSense(wordTagId, wordTagNumberInSynset); - } - - // add in deferredPositionSynsets - for (Map.Entry entry : deferredPositionSynsets.entrySet()) { - boolean placed = false; - for (int i = 0; i < entry.getKey().mSynsets.size(); i++) { - if (entry.getKey().mSynsets.get(i) == null) { - entry.getKey().mSynsets.set(i, entry.getValue()); - placed = true; - } - } - if (!placed) { - throw new AssertionError("Unable to place deferred synset"); - } - } - - // tell each SynsetID its position in its first WordTagID - for (SynsetID synset : wordnet.synsets.values()) { - int position = synset.wordtags.get(0).mSynsets.indexOf(synset) + 1; - assert position >= 1 : "Unexpected: couldn't find the synset"; - synset.numberInFirstWordTagID = position; - } - - // add all structural edges - wordnet.createStructuralEdges(); - } - - - // - // read all synset relations defined over SynsetID pairs - // - - // invert-ready synset relations - wordnet.loadSynsetRelation(path, "hyp", EdgeType.HYPONYM); - wordnet.loadSynsetRelation(path, "ins", EdgeType.INSTANCE_OF); - wordnet.loadSynsetRelation(path, "ent", EdgeType.ENTAILS); - wordnet.loadSynsetRelation(path, "sim", EdgeType.SIM_HEAD); - wordnet.loadSynsetRelation(path, "mm", EdgeType.MM_HOLONYM); - wordnet.loadSynsetRelation(path, "ms", EdgeType.MS_HOLONYM); - wordnet.loadSynsetRelation(path, "mp", EdgeType.MP_HOLONYM); - wordnet.loadSynsetRelation(path, "cs", EdgeType.CAUSED_BY); - - // self-reflexive synset relations - wordnet.loadSynsetRelation(path, "at", EdgeType.ATTRIBUTE); - - // invert-ready word relations - wordnet.loadWordRelation(path, "ppl", EdgeType.PARTICIPLE_OF); - wordnet.loadWordRelation(path, "per", EdgeType.PERTAINS_TO); - wordnet.loadWordRelation(path, "sa", EdgeType.SEE_ALSO_TO); - - // self-reflexive word relations - wordnet.loadWordRelation(path, "der", EdgeType.DERIVES); - wordnet.loadWordRelation(path, "vgp", EdgeType.SIMILAR_VERBS); - wordnet.loadWordRelation(path, "ant", EdgeType.ANTONYM); - - // weird class relations - wordnet.loadCLSRelations(path); - - // do transposes - for (EdgeType[] pair : transpose) { - wordnet.addTranspose(pair[0], pair[1]); - } - - // compact data structure and check rep invariants - - wordnet.compact(); - wordnet.checkrep(); - - return wordnet; - } - - /** - * Loads the given relation from the prolog file, storing the result in - * the given EdgeType. - */ - private void loadSynsetRelation(File path, String relation, EdgeType type) { - if (loadedEdges.contains(type)) { - throw new IllegalArgumentException("Unexpected error: trying to load " + type + " twice"); - } - loadedEdges.add(type); - - for (String line : ObjectBank.getLineIterator(new File(path, "wn_" + relation + ".pl"))) { - if (line.length() == 0) continue; - String[] fields = line.substring(relation.length() + 1, line.length() - 2).split(","); - - SynsetID id1 = getSynsetID(fields[0]); - SynsetID id2 = getSynsetID(fields[1]); - - id1.add(type, id2); - } - } - - /** - * Loads the given relation from the prolog file, storing the result in - * the given EdgeType. - */ - private void loadWordRelation(File path, String relation, EdgeType type) { - if (loadedEdges.contains(type)) { - throw new IllegalArgumentException("Unexpected error: trying to load " + type + " twice"); - } - loadedEdges.add(type); - - for (String line : ObjectBank.getLineIterator(new File(path, "wn_" + relation + ".pl"))) { - if (line.length() == 0) continue; - String[] fields = line.substring(relation.length() + 1, line.length() - 2).split(","); - - final SynsetID sid1 = getSynsetID(fields[0]); - final SynsetID sid2 = getSynsetID(fields[2]); - - if (sid1 == sid2) { - System.err.println("WordNet.loadWordRelation(" + relation + "): skipping self-loop on " + sid1); - } else { - sid1.add(type, sid2); - } - } - } - - /** - * Loads the given relation from the prolog file, storing the result in - * the given EdgeType. - */ - private void loadCLSRelations(File path) { - final String relation = "cls"; - - if (loadedEdges.contains(EdgeType.TERM_HAS_TOPIC) || - loadedEdges.contains(EdgeType.TERM_HAS_USAGE) || - loadedEdges.contains(EdgeType.TERM_IN_REGION)) { - - throw new IllegalArgumentException("Unexpected error while loading " + relation); - } - - loadedEdges.add(EdgeType.TERM_HAS_TOPIC); - loadedEdges.add(EdgeType.TERM_HAS_USAGE); - loadedEdges.add(EdgeType.TERM_IN_REGION); - - for (String line : ObjectBank.getLineIterator(new File(path, "wn_" + relation + ".pl"))) { - if (line.length() == 0) continue; - String[] fields = line.substring(relation.length() + 1, line.length() - 2).split(","); - - if (fields.length != 5 || fields[4].length() != 1) { - throw new IllegalArgumentException("Badly formed file for " + relation); - } - - SynsetID sid1 = getSynsetID(fields[0]); - SynsetID sid2 = getSynsetID(fields[2]); - - int num1 = Integer.parseInt(fields[1]); - int num2 = Integer.parseInt(fields[3]); - assert !((num1 == 0) ^ (num2 == 0)); - - WordNetID id1 = num1 == 0 ? sid1 : sid1.getWordTag(num1); - WordNetID id2 = num2 == 0 ? sid2 : sid2.getWordTag(num2); - - switch (fields[4].charAt(0)) { - case 't': - id1.add(EdgeType.TERM_HAS_TOPIC, id2); - break; - - case 'u': - id1.add(EdgeType.TERM_HAS_USAGE, id2); - break; - - case 'r': - id1.add(EdgeType.TERM_IN_REGION, id2); - break; - - default: - throw new IllegalArgumentException("Unexpected relation type " + fields[4]); - } - } - } - - /** Adds a new edge type t2 as the transpose of t1 */ - private void addTranspose(EdgeType t1, EdgeType t2) { - if (!loadedEdges.contains(t1)) { - throw new IllegalArgumentException("Cannot transpose: doesn't contain " + t1); - } else if (loadedEdges.contains(t2)) { - throw new IllegalArgumentException("Cannot transpose: already contains " + t2); - } - loadedEdges.add(t2); - - /** Add reverse links for all types */ - for (WordNetID ab : all) { - for (WordNetID ba : ab.get(t1)) { - ba.add(t2, ab); - } - } - } - - - // - // Sample main method for testing - // - public static void main(String[] args) { - WordNet wordnet = load(new File(args[0])); - - SynsetID id1 = wordnet.getSynsetID("run#v#1"); - - for (EdgeType edgetype : EdgeType.values()) { - for (WordNetID id2 : id1.get(edgetype)) { - System.out.println(edgetype + " " + id2); - } - } - - System.out.println(wordnet.getAllWordNetIDs().size()); - - for (SynsetID synset : wordnet.synsets.values()) { - System.out.printf("%06d %s\n", synset.synset, synset.toString()); - } - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/geo880/Geo880TypeLookup.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/geo880/Geo880TypeLookup.java deleted file mode 100644 index 920f48cdf0..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/geo880/Geo880TypeLookup.java +++ /dev/null @@ -1,110 +0,0 @@ -package edu.stanford.nlp.sempre.geo880; - -import edu.stanford.nlp.sempre.SemType; -import edu.stanford.nlp.sempre.SemTypeHierarchy; -import edu.stanford.nlp.sempre.TypeLookup; -import fig.basic.IOUtils; -import fig.basic.Option; -import fig.basic.LogInfo; - -import java.io.IOException; -import java.util.HashSet; -import java.util.Set; - -/** - * Type lookup for the geo880 domain, Mostly for distinguishing locations and numbers. - * We also use a type hierarchy provided by a file to match |location.us_state| and |location.location| etc. - * Created by joberant on 05/12/2016. - */ -public class Geo880TypeLookup implements TypeLookup{ - public static class Options { - @Option(gloss = "Verbosity") public int verbose = 0; - @Option(gloss = "A path to a file that specified the type hierarchy.") - public String typeHierarchyPath; - - } - public static Options opts = new Options(); - public static final String LOCATION = "fb:location.location"; - public static final String CITY = "fb:location.citytown"; - public static final String STATE = "fb:location.us_state"; - public static final String RIVER = "fb:location.river"; - public static final String LAKE = "fb:location.lake"; - public static final String MOUNTAIN = "fb:location.mountain"; - public static final String COUNTRY = "fb:location.country"; - - public Geo880TypeLookup() { - SemTypeHierarchy semTypeHierarchy = SemTypeHierarchy.singleton; - if (opts.typeHierarchyPath != null) { - try { - for (String line : IOUtils.readLines(opts.typeHierarchyPath)) { - String[] tokens = line.split("\\s+"); - - // Check the file only contains relations about supertypes. - assert tokens[1].endsWith("included_types"); - semTypeHierarchy.addSupertype(tokens[0], tokens[0]); - semTypeHierarchy.addSupertype(tokens[2], tokens[2]); - semTypeHierarchy.addSupertype(tokens[0], tokens[2]); - } - } catch (IOException e) { - e.printStackTrace(); - throw new RuntimeException("Could not read lines from: " + opts.typeHierarchyPath); - } - } - } - - @Override - public SemType getEntityType(String entity) { - // Entites are of the form fb:state.florida. - int colonIndex = entity.indexOf(':'); - int dotIndex = entity.indexOf('.'); - String type = entity.substring(colonIndex+1, dotIndex); - - if (type.equals("place")) { - type = LOCATION; - } - else if (type.equals("city")) { - type = CITY; - } - else if (type.equals("state")) { - type = STATE; - } - else if (type.equals("river")) { - type = RIVER; - } - else if (type.equals("lake")) { - type = LAKE; - } - else if (type.equals("mountain")) { - type = MOUNTAIN; - } - else if (type.equals("country")) { - type = COUNTRY; - } - else { - throw new RuntimeException("Illegal entity: " + entity); - } - SemType result = SemType.newUnionSemType(type); - if (opts.verbose >= 1) { - LogInfo.logs("Entity=%s, Type=%s", entity, result); - } - return result; - } - - @Override - public SemType getPropertyType(String property) { - // Properties are of the form fb:location.location.population. - String arg1 = property.substring(0, property.lastIndexOf('.')); - String suffix = property.substring(property.lastIndexOf('.') + 1); - String arg2 = LOCATION; - if (suffix.equals("density") || suffix.equals("elevation") || - suffix.equals("population") || suffix.equals("size") || - suffix.equals("area") || suffix.equals("length")) { - arg2 = "fb:type.number"; - } - SemType result = SemType.newFuncSemType(arg2, arg1); - if (opts.verbose >= 1) { - LogInfo.logs("Property=%s, Type=%s", property, result); - } - return result; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/geo880/Geo880ValueEvaluator.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/geo880/Geo880ValueEvaluator.java deleted file mode 100644 index 3dc3feb14c..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/geo880/Geo880ValueEvaluator.java +++ /dev/null @@ -1,85 +0,0 @@ -package edu.stanford.nlp.sempre.geo880; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.StringNormalizationUtils; -import fig.basic.LogInfo; - -import java.util.List; - -/** - * This is only used because the data does not mention when a city is in the usa, but - * the kg returns usa, and we want to use exact match, so we add this logic here. - * Created by joberant on 03/12/2016. - */ -public class Geo880ValueEvaluator implements ValueEvaluator { - - public double getCompatibility(Value target, Value pred) { - List targetList = ((ListValue) target).values; - if (!(pred instanceof ListValue)) return 0; - List predList = ((ListValue) pred).values; - - // In geo880, if we return that something is contained in a state, there is no need to return fb:country.usa - Value toDelete = null; - if (predList.size() > 1 && predList.get(0) instanceof NameValue) { - for (Value v: predList) { - String id = ((NameValue) v).id; - if (id.equals("fb:country.usa")) { - toDelete = v; - break; - } - } - } - if (toDelete != null) { - predList.remove(toDelete); - } - - if (targetList.size() != predList.size()) return 0; - - for (Value targetValue : targetList) { - boolean found = false; - for (Value predValue : predList) { - if (getItemCompatibility(targetValue, predValue)) { - found = true; - break; - } - } - if (!found) return 0; - } - return 1; - } - - // ============================================================ - // Item Compatibility - // ============================================================ - - // Compare one element of the list. - protected boolean getItemCompatibility(Value target, Value pred) { - if (pred instanceof ErrorValue) return false; // Never award points for error - if (pred == null) { - LogInfo.warning("Predicted value is null!"); - return false; - } - - if (target instanceof DescriptionValue) { - String targetText = ((DescriptionValue) target).value; - if (pred instanceof NameValue) { - // Just has to match the description - String predText = ((NameValue) pred).description; - if (predText == null) predText = ""; - return targetText.equals(predText); - } - } else if (target instanceof NumberValue) { - NumberValue targetNumber = (NumberValue) target; - if (pred instanceof NumberValue) { - return compareNumberValues(targetNumber, (NumberValue) pred); - } - } - - return target.equals(pred); - } - - protected boolean compareNumberValues(NumberValue target, NumberValue pred) { - return Math.abs(target.value - pred.value) < 1e-6; - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/ApplyFn.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/ApplyFn.java deleted file mode 100644 index 14241f38bd..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/ApplyFn.java +++ /dev/null @@ -1,66 +0,0 @@ -package edu.stanford.nlp.sempre.interactive; - -import java.util.List; - -import edu.stanford.nlp.sempre.Derivation; -import edu.stanford.nlp.sempre.DerivationStream; -import edu.stanford.nlp.sempre.Example; -import edu.stanford.nlp.sempre.Formula; -import edu.stanford.nlp.sempre.Formulas; -import edu.stanford.nlp.sempre.LambdaFormula; -import edu.stanford.nlp.sempre.SemanticFn; -import edu.stanford.nlp.sempre.SingleDerivationStream; -import fig.basic.LispTree; -import fig.basic.Option; - -/** - * Take any number of arguments and apply them to the lambda expression given in - * this SemanticFn TODO: type inference, some function applications - * - * @author sidaw - */ -public class ApplyFn extends SemanticFn { - public static class Options { - @Option(gloss = "verbosity") - public int verbose = 0; - } - - public static Options opts = new Options(); - - Formula formula; - - @Override - public void init(LispTree tree) { - super.init(tree); - formula = Formulas.fromLispTree(tree.child(1)); - } - - public Formula getFormula() { - return formula; - } - - public ApplyFn() { - } - - public ApplyFn(Formula f) { - formula = f; - } - - @Override - public DerivationStream call(final Example ex, final Callable c) { - return new SingleDerivationStream() { - @Override - public Derivation createDerivation() { - List args = c.getChildren(); - Formula f = Formulas.fromLispTree(formula.toLispTree()); - for (Derivation arg : args) { - if (!(f instanceof LambdaFormula)) - throw new RuntimeException("Expected LambdaFormula, but got " + f + "; initial: " + formula); - f = Formulas.lambdaApply((LambdaFormula) f, arg.getFormula()); - } - Derivation res = new Derivation.Builder().withCallable(c).formula(f).createDerivation(); - return res; - } - }; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/BadInteractionException.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/BadInteractionException.java deleted file mode 100644 index cbfd3dfdbf..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/BadInteractionException.java +++ /dev/null @@ -1,48 +0,0 @@ -package edu.stanford.nlp.sempre.interactive; - -public class BadInteractionException extends RuntimeException { - - public static BadInteractionException nonSenseDefinition(String head) { - String message = String.format("Definitions should make sense and useable by yourself and others" - + "-- using more than 10 words, " + "or more than 15 characters in a word is not allowed." - + "If your definition is not non-sense," + "please paste this message in our bugs channel (head: %s)", head); - return new BadInteractionException(message); - } - - public static BadInteractionException headIsCore(String head) { - String message = String.format( - "Redefining the core language is not allowed, " + "please reword your command and try again (head: %s)", head); - return new BadInteractionException(message); - } - - public static BadInteractionException headIsEmpty(String head) { - String message = String.format("Cannot define with an empty head (head: %s)", head); - return new BadInteractionException(message); - } - - public BadInteractionException() { - // TODO Auto-generated constructor stub - } - - public BadInteractionException(String message) { - super(message); - // TODO Auto-generated constructor stub - } - - public BadInteractionException(Throwable cause) { - super(cause); - // TODO Auto-generated constructor stub - } - - public BadInteractionException(String message, Throwable cause) { - super(message, cause); - // TODO Auto-generated constructor stub - } - - public BadInteractionException(String message, Throwable cause, boolean enableSuppression, - boolean writableStackTrace) { - super(message, cause, enableSuppression, writableStackTrace); - // TODO Auto-generated constructor stub - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/BlockFn.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/BlockFn.java deleted file mode 100644 index 7e736ed2a7..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/BlockFn.java +++ /dev/null @@ -1,106 +0,0 @@ -package edu.stanford.nlp.sempre.interactive; - -import java.util.List; - -import org.testng.collections.Lists; - -import edu.stanford.nlp.sempre.ActionFormula; -import edu.stanford.nlp.sempre.Derivation; -import edu.stanford.nlp.sempre.DerivationStream; -import edu.stanford.nlp.sempre.Example; -import edu.stanford.nlp.sempre.FeatureExtractor; -import edu.stanford.nlp.sempre.FeatureVector; -import edu.stanford.nlp.sempre.SemanticFn; -import edu.stanford.nlp.sempre.SingleDerivationStream; -import fig.basic.LispTree; -import fig.basic.Option; - -/** - * Generates formula scoped in various modes sequential: just perform in - * sequence, no scoping block: basic scoping block blockr: returns selected - * isolate: scopes allItems instead of selected - * - * @author sidaw - */ -public class BlockFn extends SemanticFn { - public static class Options { - @Option(gloss = "verbosity") - public int verbose = 0; - } - - public static Options opts = new Options(); - - List scopingModes = Lists.newArrayList(ActionFormula.Mode.block, ActionFormula.Mode.blockr, - ActionFormula.Mode.isolate); - ActionFormula.Mode mode = ActionFormula.Mode.block; - boolean optional = true; - - @Override - public void init(LispTree tree) { - super.init(tree); - if (tree.child(1).value.equals("sequential")) - mode = ActionFormula.Mode.sequential; - else if (tree.child(1).value.equals("block")) - mode = ActionFormula.Mode.block; - else if (tree.child(1).value.equals("blockr")) - mode = ActionFormula.Mode.blockr; - else if (tree.child(1).value.equals("isolate")) - mode = ActionFormula.Mode.isolate; - else - mode = ActionFormula.Mode.sequential; - } - - public BlockFn(ActionFormula.Mode mode) { - this.mode = mode; - } - - public BlockFn() { - this.mode = ActionFormula.Mode.sequential; - } - - @Override - public DerivationStream call(final Example ex, final Callable c) { - return new SingleDerivationStream() { - @Override - public Derivation createDerivation() { - List args = c.getChildren(); - if (args.size() == 1) { - Derivation onlyChild = args.get(0); - // LogInfo.logs("1 BlockFn %s : %s Example.size=%d, callInfo(%d,%d)", - // onlyChild, mode, ex.getTokens().size(), onlyChild.getStart(), - // onlyChild.getEnd()); - - if (onlyChild == null) - return null; - if (onlyChild.getStart() != 0 || onlyChild.getEnd() != ex.getTokens().size()) - return null; - // do not do anything to the core language - if (onlyChild.allAnchored()) - return null; - // if (!ILUtils.stripBlock(onlyChild).rule.isInduced()) return null; - - // if already blocked explicitly, do not do anything - if (scopingModes.contains(((ActionFormula) onlyChild.formula).mode)) - return null; - - // do not repeat any blocks - if (((ActionFormula) onlyChild.formula).mode == BlockFn.this.mode) - return null; - - FeatureVector features = new FeatureVector(); - if (FeatureExtractor.containsDomain(":scope")) { - features.add(":scope", - BlockFn.this.mode.toString() + "::" + !InteractiveUtils.stripBlock(onlyChild).rule.isInduced()); - features.add(":scope", BlockFn.this.mode.toString() + "::" + ex.id); - } - - Derivation deriv = new Derivation.Builder() - .formula(new ActionFormula(mode, Lists.newArrayList(onlyChild.formula))).withCallable(c) - .localFeatureVector(features).createDerivation(); - return deriv; - } else - return null; - } - }; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/CitationTracker.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/CitationTracker.java deleted file mode 100644 index 25f2bd030a..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/CitationTracker.java +++ /dev/null @@ -1,166 +0,0 @@ -package edu.stanford.nlp.sempre.interactive; - -import java.io.File; -import java.io.PrintWriter; -import java.io.UnsupportedEncodingException; -import java.net.URLDecoder; -import java.net.URLEncoder; -import java.nio.file.Paths; -import java.time.LocalDateTime; -import java.util.Base64; -import java.util.LinkedHashMap; -import java.util.Map; - -import com.google.common.base.Strings; - -import edu.stanford.nlp.sempre.Derivation; -import edu.stanford.nlp.sempre.Example; -import edu.stanford.nlp.sempre.Json; -import edu.stanford.nlp.sempre.Rule; -import fig.basic.IOUtils; -import fig.basic.LogInfo; - -/** - * Tracks rule usage via a citation system. A rule is cited when a user makes - * use of that rule in a derivation. - * - * @author sidaw - */ -public class CitationTracker { - public static final String IDPrefix = "id:"; - public static final String HeadPrefix = "head:"; - public static final String BodyPrefix = "body:"; - String uid = "undefined"; - Example ex; - - public CitationTracker(String uid, Example ex) { - this.uid = uid; - this.ex = ex; - } - - public synchronized void citeRule(Rule rule) { - writeLog(rule); - writeSummary(rule); - } - - // The summary is ONE SINGLE line of json, has cite, self, and head - private synchronized void writeSummary(Rule rule) { - String author = getAuthor(rule); - String summaryPath = Paths.get(InteractiveUtils.opts.citationPath, encode(author), encode(getHead(rule)) + ".json") - .toString(); - File file = new File(summaryPath); - file.getParentFile().mkdirs(); - - Map summary; - try { - String line = IOUtils.readLineEasy(summaryPath); - if (line == null) - summary = defaultMap(rule); - else - summary = Json.readMapHard(line); - - boolean selfcite = author.equals(uid); - if (!selfcite) { - summary.put("cite", (Integer) summary.get("cite") + 1); - rule.source.cite++; - } else { - summary.put("self", (Integer) summary.get("self") + 1); - rule.source.self++; - } - - } catch (Exception e) { - summary = defaultMap(rule); - e.printStackTrace(); - } - String jsonStr = Json.writeValueAsStringHard(summary); - PrintWriter out = IOUtils.openOutHard(file); - out.println(jsonStr); - out.close(); - } - - private Map defaultMap(Rule rule) { - Map summary = new LinkedHashMap<>(); - summary.put("cite", 0); - summary.put("self", 0); - summary.put("private", true); - summary.put("head", getHead(rule)); - summary.put("body", getBody(rule)); - return summary; - } - - private synchronized void writeLog(Rule rule) { - String head = getHead(rule); - String author = getAuthor(rule); - String logPath = Paths.get(InteractiveUtils.opts.citationPath, encode(author), encode(head) + ".json.log") - .toString(); - File file = new File(logPath); - file.getParentFile().mkdirs(); - - Map jsonMap = new LinkedHashMap<>(); - jsonMap.put("user", this.uid); - // jsonMap.put("body", decode(getBody(rule))); - jsonMap.put("time", LocalDateTime.now().toString()); - jsonMap.put("tokens", ex.getTokens()); - // jsonMap.put("head", decode(headCode)); - jsonMap.put("author", author); - - String jsonStr = Json.writeValueAsStringHard(jsonMap); - PrintWriter out = IOUtils.openOutAppendHard(file); - out.println(jsonStr); - out.close(); - } - - public void citeAll(Derivation deriv) { - if (deriv.rule != null && deriv.rule.isInduced()) { - LogInfo.logs("CitationTracker: user %s is citing rule: %s", this.uid, deriv.rule.toString()); - citeRule(deriv.rule); - } - - if (deriv.children == null) - return; - for (Derivation d : deriv.children) { - citeAll(d); - } - } - - static String getAuthor(Rule rule) { - try { - String author = rule.source.uid; - if (Strings.isNullOrEmpty(author)) - return "__noname__"; - else - return author; - } catch (Exception e) { - e.printStackTrace(); - return "__noname__"; - } - } - - static String getHead(Rule rule) { - return rule.source.head; - } - - static String getBody(Rule rule) { - return String.join(". ", rule.source.body); - } - - public static String encode(String utt) { - try { - return URLEncoder.encode(utt, "UTF-8"); - } catch (UnsupportedEncodingException e) { - e.printStackTrace(); - } - return Base64.getUrlEncoder().encodeToString(utt.getBytes()); - // return Base64.getUrlEncoder().encodeToString(utt.getBytes()); - } - - public static String decode(String code) { - try { - return URLDecoder.decode(code, "UTF-8"); - } catch (UnsupportedEncodingException e) { - e.printStackTrace(); - } - return Base64.getUrlDecoder().decode(code).toString(); - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/DALAnalyzer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/DALAnalyzer.java deleted file mode 100644 index cb5587aa5e..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/DALAnalyzer.java +++ /dev/null @@ -1,136 +0,0 @@ -package edu.stanford.nlp.sempre.interactive; - -import java.util.Arrays; - -import edu.stanford.nlp.sempre.LanguageAnalyzer; -import edu.stanford.nlp.sempre.LanguageInfo; - -/** - * LanguageAnalyzer for DAL, basically a lexer - * - * @author sidaw - */ -public class DALAnalyzer extends LanguageAnalyzer { - // Stanford tokenizer doesn't break hyphens. - // Replace hypens with spaces for utterances like - // "Spanish-speaking countries" but not for "2012-03-28". - public static String breakHyphens(String utterance) { - StringBuilder buf = new StringBuilder(utterance); - for (int i = 0; i < buf.length(); i++) { - if (buf.charAt(i) == '-' && (i + 1 < buf.length() && Character.isLetter(buf.charAt(i + 1)))) - buf.setCharAt(i, ' '); - } - return buf.toString(); - } - - private static final String[] numbers = { "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", - "nine", "ten" }; - - @Override - public LanguageInfo analyze(String utterance) { - LanguageInfo languageInfo = new LanguageInfo(); - - // Clear these so that analyze can hypothetically be called - // multiple times. - languageInfo.tokens.clear(); - languageInfo.posTags.clear(); - languageInfo.nerTags.clear(); - languageInfo.nerValues.clear(); - languageInfo.lemmaTokens.clear(); - - // do not break hyphens - //utterance = breakHyphens(utterance); - - // Default analysis - create tokens crudely - StringBuilder buf = new StringBuilder(); - for (int i = 0; i < utterance.length(); i++) { - char c = utterance.charAt(i); - // Put whitespace around certain characters. - boolean boundaryBefore = !(i - 1 >= 0) || utterance.charAt(i - 1) == ' '; - boolean boundaryAfter = !(i + 1 < utterance.length()) || utterance.charAt(i + 1) == ' '; - boolean separate = false; - // if (c == '.') // Break off period if already space around it (to preserve - // // numbers like 3.5) - // separate = boundaryBefore || boundaryAfter; - // else if (c == '=') // separate all >, =, < except >=, <= - // separate = !(i - 1 >= 0) || ((utterance.charAt(i - 1) != '>' && utterance.charAt(i - 1) != '<')); - // else if (c == '>' || c == '<') - // separate = !(i + 1 < utterance.length()) - // || ((utterance.charAt(i + 1) != '=' && utterance.charAt(i + 1) != '=')); - // else - - // WIP - // // Hard separators - // if (",".indexOf(c) != -1) { // (include `;`?) - // tokenList.add(buf.toString()); - // ? - // } - // // - // int cat = 1; - // if ("()[]{}".indexOf(c) != -1) - // cat *= 2; - // if ("!%&$#+-/:<=>?@\\~`^|*".indexOf(c) != -1) - // cat *= 3; - // if ("_'".indexOf(c) != -1 || Character.isLetterOrDigit(c)) - // cat *= 5; - separate = (",?'\"[];{}+()`".indexOf(c) != -1); - - if (separate) - buf.append(' '); - // Convert quotes - if (c == '"') - buf.append(boundaryBefore ? "``" : "''"); - else if (c == '\'') - buf.append(boundaryBefore ? "`" : "'"); - // else if (c == '>' || c == '<') { - // buf.append(' '); - // buf.append(c); - // } else if (c == '=') { - // buf.append(c); - // buf.append(' '); - // } else - else buf.append(c); - if (separate) - buf.append(' '); - } - utterance = buf.toString().trim(); - if (!utterance.equals("")) { - String[] tokens = utterance.split("\\s+"); - for (String token : tokens) { - String lemma = token; - if (token.endsWith("s") && token.length() > 1) - lemma = token.substring(0, token.length() - 1); - - languageInfo.tokens.add(LanguageAnalyzer.opts.lowerCaseTokens ? token.toLowerCase() : token); - languageInfo.lemmaTokens.add(LanguageAnalyzer.opts.lowerCaseTokens ? lemma.toLowerCase() : lemma); - - // Is it a written out number? - int x = Arrays.asList(numbers).indexOf(token); - if (x != -1) { - languageInfo.posTags.add("CD"); - languageInfo.nerTags.add("NUMBER"); - languageInfo.nerValues.add(x + ""); - continue; - } - - try { - Double.parseDouble(token); - languageInfo.posTags.add("CD"); - languageInfo.nerTags.add("NUMBER"); - languageInfo.nerValues.add(token); - } catch (NumberFormatException e) { - // Guess that capitalized nouns are proper - if (Character.isUpperCase(token.charAt(0))) - languageInfo.posTags.add("NNP"); - else if (token.equals("'") || token.equals("\"") || token.equals("''") || token.equals("``")) - languageInfo.posTags.add("''"); - else - languageInfo.posTags.add("UNK"); - languageInfo.nerTags.add("UNK"); - languageInfo.nerValues.add("UNK"); - } - } - } - return languageInfo; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/Definition.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/Definition.java deleted file mode 100644 index 087164dade..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/Definition.java +++ /dev/null @@ -1,11 +0,0 @@ -package edu.stanford.nlp.sempre.interactive; - -import java.util.List; - -import edu.stanford.nlp.sempre.Derivation; -import edu.stanford.nlp.sempre.Example; - -public class Definition { - public Definition(Example head, List chartList, List body) { - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/DefinitionAligner.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/DefinitionAligner.java deleted file mode 100644 index 8c8a71f8e3..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/DefinitionAligner.java +++ /dev/null @@ -1,234 +0,0 @@ -package edu.stanford.nlp.sempre.interactive; - -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; - -import com.google.common.collect.Lists; -import com.google.common.collect.Sets; - -import edu.stanford.nlp.sempre.Derivation; -import edu.stanford.nlp.sempre.Rule; -import edu.stanford.nlp.sempre.interactive.GrammarInducer.ParseStatus; -import fig.basic.LogInfo; -import fig.basic.Option; - -/** - * Takes the definition and the head, then induce rules through alignment - * - * @author sidaw - */ - -public class DefinitionAligner { - public static class Options { - @Option(gloss = "categories that can serve as rules") - public Set alignedCats = new HashSet(); - @Option(gloss = "phrase size") - public int phraseSize = 2; - @Option(gloss = "max length difference") - public int maxLengthDifference = 3; - @Option(gloss = "max set exclusion length") - public int maxSetExclusionLength = 2; - @Option(gloss = "max exact exclusion length") - public int maxExactExclusionLength = 5; - @Option(gloss = "window size") - public int windowSize = 1; - - @Option(gloss = "strategies") - public Set strategies = Sets.newHashSet(Strategies.SetExclusion, Strategies.ExactExclusion); - @Option(gloss = "maximum matches") - public int maxMatches = 3; - @Option(gloss = "verbose") - public int verbose = 0; - - } - - public enum Strategies { - SetExclusion, ExactExclusion, cmdSet - }; - - public static Options opts = new Options(); - - public class Match { - @Override - public String toString() { - return "Match [deriv=" + deriv + ", start=" + start + ", end=" + end + "]"; - } - - public Match(Derivation def, int start, int end) { - deriv = def; - this.start = start; - this.end = end; - deriv.grammarInfo.start = start; - deriv.grammarInfo.end = end; - } - - Derivation deriv; - int start; - int end; - } - - List headTokens; - List defTokens; - - public static List getRules(List head, List def, Derivation deriv, List chartList) { - if (opts.verbose > 0) - LogInfo.logs("DefinitionAligner.chartList: %s", chartList); - - DefinitionAligner aligner = new DefinitionAligner(head, def, deriv, chartList); - - List allAlignedRules = Lists.newArrayList(); - if (opts.verbose > 0) - LogInfo.logs("DefinitionAligner.allMatches.size(): %d", aligner.allMatches.size()); - - for (int i = 0; i < aligner.allMatches.size() && i <= opts.maxMatches; i++) { - Match match = aligner.allMatches.get(i); - - List filteredList = chartList.stream() - .filter(d -> d.start >= match.deriv.start && d.end <= match.deriv.end).collect(Collectors.toList()); - - // filter out core - List currentParses = chartList.stream().filter(d -> { - if (opts.verbose > 1) - LogInfo.logs("DefinitionAligner.chartList.d: %s", d); - return (d.start == match.start && d.end == match.end); - }).collect(Collectors.toList()); - - if (opts.verbose > 1) - LogInfo.logs("DefinitionAligner.Match: %s", match); - if (opts.verbose > 1) - LogInfo.logs("DefinitionAligner.currentParses: %s", currentParses); - - if (GrammarInducer.getParseStatus(currentParses) != ParseStatus.Core) { - if (opts.verbose > 1) - LogInfo.logs("DefinitionAligner.NotCore: %s", currentParses); - GrammarInducer grammarInducer = new GrammarInducer(head, match.deriv, filteredList); - allAlignedRules.addAll(grammarInducer.getRules()); - } - } - return allAlignedRules; - } - - public List allMatches = new ArrayList<>(); - private Map> chartMap; - - public DefinitionAligner(List headTokens, List defTokens, Derivation def, - List chartList) { - this.headTokens = headTokens; - this.defTokens = defTokens; - this.chartMap = GrammarInducer.makeChartMap(chartList); - if (opts.verbose > 0) - LogInfo.logs("DefinitionAligner: head '%s' as body: '%s'", headTokens, defTokens); - if (Math.abs(headTokens.size() - defTokens.size()) >= 4) - return; - recursiveMatch(def); - } - - void recursiveMatch(Derivation def) { - // LogInfo.logs("Considering (%d,%d): %s", def.start, def.end, def); - for (int start = 0; start < headTokens.size(); start++) { - for (int end = headTokens.size(); end > start; end--) { - // LogInfo.logs("Testing (%d,%d)", start, end); - if (end == headTokens.size() && start == 0) - continue; - if (isMatch(def, start, end)) { - if (opts.verbose > 0) - LogInfo.logs("Matched head(%d,%d)=%s with deriv(%d,%d)=%s: %s", start, end, headTokens.subList(start, end), - def.start, def.end, defTokens.subList(def.start, def.end), def); - allMatches.add(new Match(def, start, end)); - return; - } - } - } - - for (Derivation d : def.children) { - recursiveMatch(d); - } - } - - boolean isMatch(Derivation def, int start, int end) { - if (def.start == -1 || def.end == -1) - return false; - if (chartMap.containsKey(GrammarInducer.catFormulaKey(def))) - return false; - if (opts.verbose > 0) - LogInfo.logs("checkingLengths (%d, %d) - (%d, %d)", start, end, def.start, def.end); - if (Math.abs((end - start) - (def.end - def.start)) >= opts.maxLengthDifference) - return false; - if (opts.strategies.contains(Strategies.ExactExclusion) && exactExclusion(def, start, end)) - return true; - if (opts.strategies.contains(Strategies.SetExclusion) && setExclusion(def, start, end)) - return true; - if (opts.strategies.contains(Strategies.cmdSet) && cmdSet(def, start, end)) - return true; - - return false; - } - - private boolean setExclusion(Derivation def, int start, int end) { - // the span under consideration does not match anythign - if (end - start > opts.maxSetExclusionLength) - return false; - if (!headTokens.subList(start, end).stream().noneMatch(t -> defTokens.contains(t))) - return false; - if (!defTokens.subList(def.start, def.end).stream().noneMatch(t -> headTokens.contains(t))) - return false; - - // everything before and afterwards are accounted for - if (!headTokens.subList(0, start).stream().allMatch(t -> defTokens.contains(t))) - return false; - if (!headTokens.subList(end, headTokens.size()).stream().allMatch(t -> defTokens.contains(t))) - return false; - return true; - } - - private List window(int lower, int upper, List list) { - List ret = new ArrayList<>(); - for (int i = lower; i < upper; i++) { - if (i < 0 || i >= list.size()) - ret.add("(*)"); - else - ret.add(list.get(i)); - } - return ret; - } - - private boolean exactExclusion(Derivation def, int start, int end) { - if (opts.verbose > 0) - LogInfo.log("In exactExclusion"); - if (end - start > opts.maxExactExclusionLength) - return false; - - boolean prefixEq = window(start - opts.windowSize, start, headTokens) - .equals(window(def.start - opts.windowSize, def.start, defTokens)); - boolean sufixEq = window(end, end + opts.windowSize, headTokens) - .equals(window(def.end, def.end + opts.windowSize, defTokens)); - if (opts.verbose > 0) - LogInfo.logs("%b : %b", prefixEq, sufixEq); - if (opts.verbose > 0) - LogInfo.logs("(%d,%d)-head(%d,%d): %b %b %s %s", def.start, def.end, start, end, prefixEq, sufixEq, - window(end, end + opts.windowSize, headTokens), window(def.end, def.end + opts.windowSize, defTokens)); - if (!prefixEq || !sufixEq) - return false; - if (headTokens.subList(start, end).equals(defTokens.subList(def.start, def.end))) - return false; - - return true; - } - - // exact match plus big - private boolean cmdSet(Derivation def, int start, int end) { - if (opts.verbose > 0) - LogInfo.log("In exactPlusBig"); - // match only beginning and end - boolean cmdSet = (end == headTokens.size()) && (start > 0) && def.end == defTokens.size() && def.start > 0; - if (cmdSet && headTokens.subList(0, start).equals(defTokens.subList(0, start))) - return true; - - return false; - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/GrammarInducer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/GrammarInducer.java deleted file mode 100644 index 1e2051dec7..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/GrammarInducer.java +++ /dev/null @@ -1,465 +0,0 @@ -package edu.stanford.nlp.sempre.interactive; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; - -import com.beust.jcommander.internal.Lists; -import com.google.common.base.Function; - -import edu.stanford.nlp.sempre.ActionFormula; -import edu.stanford.nlp.sempre.ConstantFn; -import edu.stanford.nlp.sempre.Derivation; -import edu.stanford.nlp.sempre.Example; -import edu.stanford.nlp.sempre.Formula; -import edu.stanford.nlp.sempre.Formulas; -import edu.stanford.nlp.sempre.IdentityFn; -import edu.stanford.nlp.sempre.LambdaFormula; -import edu.stanford.nlp.sempre.Rule; -import edu.stanford.nlp.sempre.SemanticFn; -import edu.stanford.nlp.sempre.VariableFormula; -import fig.basic.LispTree; -import fig.basic.LogInfo; -import fig.basic.Option; - -/** - * Takes two examples, and induce Rules - * - * @author sidaw - */ - -public class GrammarInducer { - public static class Options { - @Option(gloss = "categories that can serve as rules") - public Set filteredCats = new HashSet(); - @Option(gloss = "verbose") - public int verbose = 0; - @Option(gloss = "cats that never overlaps, and always save to replace") - public List simpleCats = Lists.newArrayList("$Color", "$Number", "$Direction"); - @Option(gloss = "use best packing") - public boolean useBestPacking = true; - @Option(gloss = "use simple packing") - public boolean useSimplePacking = true; - @Option(gloss = "maximum nonterminals in a rule") - public long maxNonterminals = 4; - @Option(gloss = "minimum terminals in a rule") - public int minTerminals = 1; - } - - public static Options opts = new Options(); - - private List inducedRules = null; - - List headTokens; - String id; - - public List matches; - Derivation def; - - // induce rule is possible, - // otherwise set the correct status - public GrammarInducer(List headTokens, Derivation def, List chartList) { - // grammarInfo start and end is used to indicate partial, when using aligner - boolean allHead = false; - if (def.grammarInfo.start == -1) { - def.grammarInfo.start = 0; - def.grammarInfo.end = headTokens.size(); - allHead = true; - } - - // dont want weird cat unary rules with strange semantics - if (headTokens == null || headTokens.isEmpty()) { - throw new RuntimeException("The head is empty, refusing to define."); - } - chartList.removeIf(d -> d.start == def.grammarInfo.start && d.end == def.grammarInfo.end); - this.def = def; - - this.headTokens = headTokens; - int numTokens = headTokens.size(); - - this.matches = new ArrayList<>(); - addMatches(def, makeChartMap(chartList)); - Collections.reverse(this.matches); - - inducedRules = new ArrayList<>(); - if (allHead && opts.useSimplePacking) { - List filteredMatches = this.matches.stream().filter(d -> { - return opts.simpleCats.contains(d.cat) && d.allAnchored() && d.end - d.start == 1; - }).collect(Collectors.toList()); - - List packing = new ArrayList<>(); - for (int i = 0; i <= headTokens.size(); i++) { - for (Derivation d : filteredMatches) { - if (d.start == i) { - packing.add(d); - break; - } - } - } - - HashMap formulaToCat = new HashMap<>(); - packing.forEach(d -> formulaToCat.put(catFormulaKey(d), varName(d))); - buildFormula(def, formulaToCat); - List simpleInduced = induceRules(packing, def); - for (Rule rule : simpleInduced) { - rule.addInfo("simple_packing", 1.0); - filterRule(rule); - } - - if (opts.verbose > 1) { - LogInfo.logs("Simple Packing", chartList.size()); - LogInfo.logs("chartList.size = %d", chartList.size()); - LogInfo.log("Potential packings: "); - this.matches.forEach(d -> LogInfo.logs("%f: %s\t %s", d.getScore(), d.formula, d.allAnchored())); - LogInfo.logs("packing: %s", packing); - LogInfo.logs("formulaToCat: %s", formulaToCat); - } - } - if (opts.useBestPacking) { - List bestPacking = bestPackingDP(this.matches, numTokens); - - HashMap formulaToCat = new HashMap<>(); - bestPacking.forEach(d -> formulaToCat.put(catFormulaKey(d), varName(d))); - buildFormula(def, formulaToCat); - for (Rule rule : induceRules(bestPacking, def)) - filterRule(rule); - - if (opts.verbose > 1) { - LogInfo.logs("chartList.size = %d", chartList.size()); - LogInfo.log("Potential packings: "); - this.matches.forEach(d -> LogInfo.logs("%f: %s\t", d.getScore(), d.formula)); - LogInfo.logs("BestPacking: %s", bestPacking); - LogInfo.logs("formulaToCat: %s", formulaToCat); - } - } - - } - - Set RHSs = new HashSet<>(); - - private void filterRule(Rule rule) { - if (rule.isCatUnary()) { - LogInfo.logs("GrammarInducer.filterRule: not allowing CatUnary rules %s", rule.toString()); - return; - } - - if (RHSs.contains(rule.rhs.toString())) { - LogInfo.logs("GrammarInducer.filterRule: already have %s", rule.toString()); - return; - } - - int numNT = 0; - int numT = 0; - for (String t : rule.rhs) { - if (Rule.isCat(t)) numNT++; - else numT++; - } - - if (numNT > GrammarInducer.opts.maxNonterminals ) { - LogInfo.logs("GrammarInducer.filterRule: too many nonterminals (max %d) %s", GrammarInducer.opts.maxNonterminals, rule.rhs.toString()); - return; - } - - if (numT < GrammarInducer.opts.minTerminals ) { - LogInfo.logs("GrammarInducer.filterRule: too few terminals (min %d) %s", GrammarInducer.opts.minTerminals, rule.rhs.toString()); - return; - } - - inducedRules.add(rule); - RHSs.add(rule.rhs.toString()); - // LogInfo.logs("Added rule %s", rule.toString()); - } - - static Map> makeChartMap(List chartList) { - Map> chartMap = new HashMap<>(); - for (Derivation d : chartList) { - List derivs = chartMap.get(catFormulaKey(d)); - derivs = derivs != null ? derivs : new ArrayList<>(); - derivs.add(d); - chartMap.put(catFormulaKey(d), derivs); - } - return chartMap; - } - - // this is used to test for matches, same cat, same formula - // maybe cat needs to be more flexible - static String catFormulaKey(Derivation d) { - // return d.formula.toString(); - return getNormalCat(d) + "::" + d.formula.toString(); - } - - private String varName(Derivation anchored) { - int s = def.grammarInfo.start; - return getNormalCat(anchored) + (anchored.start - s) + "_" + (anchored.end - s); - } - - static private String getNormalCat(Derivation def) { - // return def.cat; - String cat = def.getCat(); - if (cat.endsWith("s")) - return cat.substring(0, cat.length() - 1); - else - return cat; - } - - // label the derivation tree with what it matches in chartList - private void addMatches(Derivation deriv, Map> chartMap) { - String key = catFormulaKey(deriv); - if (chartMap.containsKey(key)) { - deriv.grammarInfo.matches.addAll(chartMap.get(key)); - deriv.grammarInfo.matched = true; - matches.addAll(chartMap.get(key)); - } - for (Derivation d : deriv.children) { - addMatches(d, chartMap); - } - } - - class Packing { - List packing; - double score; - - public Packing(double score, List packing) { - this.score = score; - this.packing = packing; - } - - @Override - public String toString() { - return this.score + ": " + this.packing.toString(); - } - } - - // the maximum starting index of every match that ends on or before end - private int blockingIndex(List matches, int end) { - return matches.stream().filter(d -> d.end <= end).map(d -> d.start).max((s1, s2) -> s1.compareTo(s2)) - .orElse(Integer.MAX_VALUE / 2); - } - - // start inclusive, end exclusive - private List bestPackingDP(List matches, int length) { - List bestEndsAtI = new ArrayList<>(length + 1); - List maximalAtI = new ArrayList<>(length + 1); - bestEndsAtI.add(new Packing(Double.NEGATIVE_INFINITY, new ArrayList())); - maximalAtI.add(new Packing(0.0, new ArrayList())); - - @SuppressWarnings("unchecked") - List[] endsAtI = new ArrayList[length + 1]; - - for (Derivation d : matches) { - List derivs = endsAtI[d.end]; - derivs = derivs != null ? derivs : new ArrayList<>(); - derivs.add(d); - endsAtI[d.end] = derivs; - } - - for (int i = 1; i <= length; i++) { - // the new maximal either uses a derivation that ends at i, plus a - // previous maximal - Packing bestOverall = new Packing(Double.NEGATIVE_INFINITY, new ArrayList<>()); - Derivation bestDerivI = null; - if (endsAtI[i] != null) { - for (Derivation d : endsAtI[i]) { - double score = d.getScore() + maximalAtI.get(d.start).score; - if (score >= bestOverall.score) { - bestOverall.score = score; - bestDerivI = d; - } - } - List bestpacking = new ArrayList<>(maximalAtI.get(bestDerivI.start).packing); - bestpacking.add(bestDerivI); - bestOverall.packing = bestpacking; - } - bestEndsAtI.add(i, bestOverall); - - // or it's a previous bestEndsAtI[j] for i-minLength+1 <= j < i - for (int j = blockingIndex(matches, i) + 1; j < i; j++) { - // LogInfo.dbgs("BlockingIndex: %d, j=%d, i=%d", blockingIndex(matches, - // i), j, i); - if (bestEndsAtI.get(j).score >= bestOverall.score) - bestOverall = bestEndsAtI.get(j); - } - if (opts.verbose > 1) - LogInfo.logs("maximalAtI[%d] = %f: %s, BlockingIndex: %d", i, bestOverall.score, bestOverall.packing, - blockingIndex(matches, i)); - if (bestOverall.score > Double.NEGATIVE_INFINITY) - maximalAtI.add(i, bestOverall); - else { - maximalAtI.add(i, new Packing(0, new ArrayList<>())); - } - } - return maximalAtI.get(length).packing; - } - - public List getRules() { - return inducedRules; - } - - private List induceRules(List packings, Derivation defDeriv) { - List RHS = getRHS(defDeriv, packings); - SemanticFn sem = getSemantics(defDeriv, packings); - String cat = getNormalCat(defDeriv); - Rule inducedRule = new Rule(cat, RHS, sem); - inducedRule.addInfo("induced", 1.0); - inducedRule.addInfo("anchored", 1.0); - List inducedRules = new ArrayList<>(); - if (!inducedRule.isCatUnary()) { - inducedRules.add(inducedRule); - } - return inducedRules; - } - - // populate grammarInfo.formula, replacing everything that can be replaced - private void buildFormula(Derivation deriv, Map replaceMap) { - // LogInfo.logs("BUILDING %s at (%d,%d) %s", deriv, deriv.start, deriv.end, - // catFormulaKey(deriv)); - if (replaceMap.containsKey(catFormulaKey(deriv))) { - // LogInfo.logs("Found match %s, %s, %s", catFormulaKey(deriv), - // replaceMap, deriv); - deriv.grammarInfo.formula = new VariableFormula(replaceMap.get(catFormulaKey(deriv))); - // LogInfo.logs("WITH VARIABLE: %s", deriv.grammarInfo.formula); - return; - } - if (deriv.children.size() == 0) { - deriv.grammarInfo.formula = deriv.formula; - } - - for (Derivation c : deriv.children) { - buildFormula(c, replaceMap); - // deriv.grammarInfo.start = Math.min(deriv.grammarInfo.start, - // c.grammarInfo.start); - // deriv.grammarInfo.end = Math.max(deriv.grammarInfo.end, - // c.grammarInfo.end); - } - Rule rule = deriv.rule; - List args = deriv.children; - - // cant use the standard DerivationStream because formula is final - if (rule == null || rule.sem == null) { - deriv.grammarInfo.formula = deriv.formula; - } else if (rule.sem instanceof ApplyFn) { - Formula f = Formulas.fromLispTree(((ApplyFn) rule.sem).formula.toLispTree()); - for (Derivation arg : args) { - if (!(f instanceof LambdaFormula)) - throw new RuntimeException("Expected LambdaFormula, but got " + f); - Formula after = renameBoundVars(f, new HashSet<>()); - // LogInfo.logs("renameBoundVar %s === %s", after, f); - f = Formulas.lambdaApply((LambdaFormula) after, arg.grammarInfo.formula); - } - deriv.grammarInfo.formula = f; - } else if (rule.sem instanceof IdentityFn) { - deriv.grammarInfo.formula = args.get(0).grammarInfo.formula; - } else if (rule.sem instanceof edu.stanford.nlp.sempre.interactive.lassie.ChoiceFn) { - deriv.grammarInfo.formula = args.get(0).grammarInfo.formula; - } else if (rule.sem instanceof BlockFn) { - deriv.grammarInfo.formula = new ActionFormula(((BlockFn) rule.sem).mode, - args.stream().map(d -> d.grammarInfo.formula).collect(Collectors.toList())); - } else { - deriv.grammarInfo.formula = deriv.formula; - } - // LogInfo.logs("BUILT %s for %s", deriv.grammarInfo.formula, - // deriv.formula); - // LogInfo.log("built " + deriv.grammarInfo.formula); - } - - private String newName(String s) { - return s.endsWith("_") ? s : s + "_"; - } - - private Formula renameBoundVars(Formula formula, Set boundvars) { - if (formula instanceof LambdaFormula) { - LambdaFormula f = (LambdaFormula) formula; - boundvars.add(f.var); - return new LambdaFormula(newName(f.var), renameBoundVars(f.body, boundvars)); - } else { - Formula after = formula.map(new Function() { - @Override - public Formula apply(Formula formula) { - if (formula instanceof VariableFormula) { // Replace variable - String name = ((VariableFormula) formula).name; - if (boundvars.contains(name)) - return new VariableFormula(newName(name)); - else - return formula; - } - return null; - } - }); - return after; - } - } - - private SemanticFn getSemantics(final Derivation def, List packings) { - Formula baseFormula = def.grammarInfo.formula; - if (opts.verbose > 0) - LogInfo.logs("getSemantics %s", baseFormula); - if (packings.size() == 0) { - SemanticFn constantFn = new ConstantFn(); - LispTree newTree = LispTree.proto.newList(); - newTree.addChild("ConstantFn"); - newTree.addChild(baseFormula.toLispTree()); - constantFn.init(newTree); - return constantFn; - } - - for (int i = packings.size() - 1; i >= 0; i--) { - baseFormula = new LambdaFormula(varName(packings.get(i)), Formulas.fromLispTree(baseFormula.toLispTree())); - } - SemanticFn applyFn = new ApplyFn(); - LispTree newTree = LispTree.proto.newList(); - newTree.addChild("interactive.ApplyFn"); - newTree.addChild(baseFormula.toLispTree()); - applyFn.init(newTree); - return applyFn; - } - - private List getRHS(Derivation def, List packings) { - List rhs = new ArrayList<>(headTokens); - for (Derivation deriv : packings) { - // LogInfo.logs("got (%d,%d):%s:%s", deriv.start, deriv.end, - // deriv.formula, deriv.cat); - rhs.set(deriv.start, getNormalCat(deriv)); - for (int i = deriv.start + 1; i < deriv.end; i++) { - rhs.set(i, null); - } - } - return rhs.subList(def.grammarInfo.start, def.grammarInfo.end).stream().filter(s -> s != null) - .collect(Collectors.toList()); - } - - public static enum ParseStatus { - Nothing, // nothing at all parses in the utterance - /// Float, // something parse, no longer used. - Induced, // redefining known utterance - Core; - - public static ParseStatus fromString(String status) { - for (ParseStatus c : ParseStatus.values()) - if (c.name().equalsIgnoreCase(status)) - return c; - return null; - } // define known utterance in core, should reject - } - - public static ParseStatus getParseStatus(Example ex) { - return getParseStatus(ex.predDerivations); - } - - public static ParseStatus getParseStatus(List derivs) { - if (derivs.size() > 0) { - for (Derivation deriv : derivs) { - if (deriv.allAnchored()) { - return ParseStatus.Core; - } - } - return ParseStatus.Induced; - } - // could check the chart here set partial, but no need for now - return ParseStatus.Nothing; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/InteractiveBeamParser.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/InteractiveBeamParser.java deleted file mode 100644 index 24c5e7b6a2..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/InteractiveBeamParser.java +++ /dev/null @@ -1,577 +0,0 @@ -package edu.stanford.nlp.sempre.interactive; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashSet; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.stream.Collectors; - -import com.beust.jcommander.internal.Lists; -import com.google.common.base.Joiner; -import com.google.common.collect.ImmutableList; - -import edu.stanford.nlp.sempre.ChartParserState; -import edu.stanford.nlp.sempre.Derivation; -import edu.stanford.nlp.sempre.DerivationStream; -import edu.stanford.nlp.sempre.Example; -import edu.stanford.nlp.sempre.Formula; -import edu.stanford.nlp.sempre.Json; -import edu.stanford.nlp.sempre.Params; -import edu.stanford.nlp.sempre.Parser; -import edu.stanford.nlp.sempre.ParserState; -import edu.stanford.nlp.sempre.Rule; -import edu.stanford.nlp.sempre.SemanticFn; -import edu.stanford.nlp.sempre.Trie; -import fig.basic.Evaluation; -import fig.basic.IOUtils; -import fig.basic.IntRef; -import fig.basic.LogInfo; -import fig.basic.Option; -import fig.basic.SetUtils; -import fig.basic.StopWatch; -import fig.basic.StopWatchSet; -import fig.exec.Execution; - -/** - * A modified version of the BeamParser, with consideration for use in the interactive setting - * - * @author Percy Liang, sidaw - */ -public class InteractiveBeamParser extends Parser { - public static class Options { - @Option - public int maxNewTreesPerSpan = Integer.MAX_VALUE; - @Option - public FloatStrategy floatStrategy = FloatStrategy.Never; - @Option(gloss = "track these categories") - public List trackedCats; - } - - public enum FloatStrategy { - Always, Never, NoParse - }; - - public static Options opts = new Options(); - - Trie trie; // For non-cat-unary rules - // so that duplicated rules are never added - Set allRules; - List interactiveCatUnaryRules; - public InteractiveBeamParser(Spec spec) { - super(spec); - if (opts.trackedCats != null) { - opts.trackedCats = opts.trackedCats.stream().map(s -> "$" + s).collect(Collectors.toList()); - LogInfo.logs("Mapped trackedCats to: %s", opts.trackedCats); - } - interactiveCatUnaryRules = new LinkedList<>(super.catUnaryRules); - allRules = new LinkedHashSet<>(super.catUnaryRules); - // Index the non-cat-unary rules - trie = new Trie(); - for (Rule rule : grammar.getRules()) { - addRule(rule); - } - if (Parser.opts.visualizeChartFilling) - this.chartFillOut = IOUtils.openOutAppendEasy(Execution.getFile("chartfill")); - } - - @Override - public synchronized void addRule(Rule rule) { - if (allRules.contains(rule)) - return; - - allRules.add(rule); - - if (!rule.isCatUnary()) { - trie.add(rule); - } else { - interactiveCatUnaryRules.add(rule); - } - } - - @Override - public List getCatUnaryRules() { - return interactiveCatUnaryRules; - } - - // for grammar induction, just need the formula, do not execute - public InteractiveBeamParserState parseWithoutExecuting(Params params, Example ex, boolean computeExpectedCounts) { - // Parse - StopWatch watch = new StopWatch(); - watch.start(); - InteractiveBeamParserState state = new InteractiveBeamParserState(this, params, ex); - state.infer(); - watch.stop(); - state.parseTime = watch.getCurrTimeLong(); - - ex.predDerivations = state.predDerivations; - Derivation.sortByScore(ex.predDerivations); - // Clean up temporary state used during parsing - return state; - } - - - @Override - public ParserState newParserState(Params params, Example ex, boolean computeExpectedCounts) { - InteractiveBeamParserState coarseState = null; - if (Parser.opts.coarsePrune) { - LogInfo.begin_track("Parser.coarsePrune"); - coarseState = new InteractiveBeamParserState(this, params, ex, computeExpectedCounts, - InteractiveBeamParserState.Mode.bool, null); - coarseState.infer(); - coarseState.keepTopDownReachable(); - LogInfo.end_track(); - } - return new InteractiveBeamParserState(this, params, ex, computeExpectedCounts, InteractiveBeamParserState.Mode.full, - coarseState); - } -} - -/** - * Stores BeamFloatingParser information about parsing a particular example. The - * actual parsing code lives here. - * - * @author Percy Liang - * @author Roy Frostig - * @author sidaw - */ -class InteractiveBeamParserState extends ChartParserState { - public final Mode mode; - - // Modes: - // 1) Bool: just check if cells (cat, start, end) are reachable (to prune - // chart) - // 2) Full: compute everything - public enum Mode { - bool, full - } - - private final InteractiveBeamParser parser; - private final InteractiveBeamParserState coarseState; // Used to prune - private final boolean execute; - - public List chartList; - - public InteractiveBeamParserState(InteractiveBeamParser parser, Params params, Example ex) { - super(parser, params, ex, false); - this.parser = parser; - this.mode = Mode.full; - this.coarseState = null; - this.execute = false; - } - - public InteractiveBeamParserState(InteractiveBeamParser parser, Params params, Example ex, boolean computeExpectedCounts, - Mode mode, InteractiveBeamParserState coarseState) { - super(parser, params, ex, computeExpectedCounts); - this.parser = parser; - this.mode = mode; - this.coarseState = coarseState; - this.execute = true; - } - - @Override - public void infer() { - if (numTokens == 0) - return; - - if (parser.verbose(2)) - LogInfo.begin_track("ParserState.infer"); - - // Base case - for (Derivation deriv : gatherTokenAndPhraseDerivations()) { - featurizeAndScoreDerivation(deriv); - addToChart(deriv); - } - - // Recursive case - for (int len = 1; len <= numTokens; len++) - for (int i = 0; i + len <= numTokens; i++) - build(i, i + len); - - if (parser.verbose(2)) - LogInfo.end_track(); - - // Visualize - if (parser.chartFillOut != null && Parser.opts.visualizeChartFilling && this.mode != Mode.bool) { - parser.chartFillOut.println( - Json.writeValueAsStringHard(new ChartFillingData(ex.id, chartFillingList, ex.utterance, ex.numTokens()))); - parser.chartFillOut.flush(); - } - - setPredDerivations(); - - for (Derivation deriv : predDerivations) { - deriv.getAnchoredTokens(); - } - - this.chartList = this.collectChart(); - - boolean parseFloat = false; - if (InteractiveBeamParser.opts.floatStrategy == InteractiveBeamParser.FloatStrategy.Always) - parseFloat = true; - else if (InteractiveBeamParser.opts.floatStrategy == InteractiveBeamParser.FloatStrategy.NoParse) - parseFloat = predDerivations.size() == 0; - else - parseFloat = false; - - if (mode == Mode.full) { - // Compute gradient with respect to the predicted derivations - if (this.execute) - ensureExecuted(); - if (computeExpectedCounts) { - expectedCounts = new HashMap<>(); - ParserState.computeExpectedCounts(predDerivations, expectedCounts); - } - } - - /* If Beam Parser failed to find derivations, try a floating parser */ - if (parseFloat) { - /* - * For every base span of the chart, add the derivations from nothing - * rules - */ - List nothingRules = new ArrayList(); - for (Rule rule : parser.grammar.getRules()) - if (rule.isFloating() && rule.rhs.size() == 1 && rule.isRhsTerminals()) - nothingRules.add(rule); - for (int i = 0; i < numTokens; i++) - for (Rule rule : nothingRules) - applyRule(i, i + 1, rule, chart[i][i + 1].get("$TOKEN")); - - /* Traverse the chart bottom up */ - for (int len = 1; len <= numTokens; len++) { - for (int i = 0; i + len <= numTokens; i++) { - buildFloating(i, i + len); - } - } - - /* Add unique derivations to predDerivations */ - List rootDerivs = chart[0][numTokens].get("$FROOT"); - if (rootDerivs == null) - rootDerivs = new ArrayList(Derivation.emptyList); - - List actionDerivs = new ArrayList(Derivation.emptyList); - if (actionDerivs != null) { - Set formulas = new HashSet(); - for (Derivation d : rootDerivs) { - Formula f = d.getFormula(); - if (!formulas.contains(f)) { - formulas.add(f); - predDerivations.add(d); - } - } - } - } - } - - private List collectChart() { - List chartList = Lists.newArrayList(); - for (int len = 1; len <= numTokens; ++len) { - for (int i = 0; i + len <= numTokens; ++i) { - for (String cat : chart[i][i + len].keySet()) { - if (Rule.specialCats.contains(cat)) - continue; - chartList.addAll(chart[i][i + len].get(cat)); - } - } - } - return chartList; - } - - // Create all the derivations for the span [start, end). - protected void build(int start, int end) { - applyNonCatUnaryRules(start, end, start, parser.trie, new ArrayList(), new IntRef(0)); - - Set cellsPruned = new HashSet<>(); - applyCatUnaryRules(start, end, cellsPruned); - - for (Map.Entry> entry : chart[start][end].entrySet()) - pruneCell(cellsPruned, entry.getKey(), start, end, entry.getValue()); - } - - private static String cellString(String cat, int start, int end) { - return cat + ":" + start + ":" + end; - } - - // Return number of new derivations added - private int applyRule(int start, int end, Rule rule, List children) { - if (Parser.opts.verbose >= 5) - LogInfo.logs("applyRule %s %s %s %s", start, end, rule, children); - try { - if (mode == Mode.full) { - StopWatchSet.begin(rule.getSemRepn()); - DerivationStream results = rule.sem.call(ex, - new SemanticFn.CallInfo(rule.lhs, start, end, rule, ImmutableList.copyOf(children))); - StopWatchSet.end(); - while (results.hasNext()) { - Derivation newDeriv = results.next(); - featurizeAndScoreDerivation(newDeriv); - addToChart(newDeriv); - } - return results.estimatedSize(); - } else if (mode == Mode.bool) { - Derivation deriv = new Derivation.Builder().cat(rule.lhs).start(start).end(end).rule(rule) - .children(ImmutableList.copyOf(children)).formula(Formula.nullFormula).createDerivation(); - addToChart(deriv); - return 1; - } else { - throw new RuntimeException("Invalid mode"); - } - } catch (Exception e) { - LogInfo.errors("Composition failed: rule = %s, children = %s", rule, children); - e.printStackTrace(); - throw new RuntimeException(e); - } - } - - // Don't prune the same cell more than once. - protected void pruneCell(Set cellsPruned, String cat, int start, int end, List derivations) { - String cell = cellString(cat, start, end); - if (cellsPruned.contains(cell)) - return; - - cellsPruned.add(cell); - pruneCell(cell, derivations); - } - - private boolean canBeRoot(int start, int end) {return start==0 && end==numTokens;}; - // Apply all unary rules with RHS category. - // Before applying each unary rule (rule.lhs -> rhsCat), we can prune the cell - // of rhsCat - // because we assume acyclicity, so rhsCat's cell will never grow. - private void applyCatUnaryRules(int start, int end, Set cellsPruned) { - for (Rule rule : parser.getCatUnaryRules()) { - if (!coarseAllows(rule.lhs, start, end)) - continue; - if (rule.lhs.equals(Rule.rootCat) && !canBeRoot(start, end)) - continue; - String rhsCat = rule.rhs.get(0); - List derivations = chart[start][end].get(rhsCat); - if (Parser.opts.verbose >= 5) - LogInfo.logs("applyCatUnaryRules %s %s %s %s", start, end, rule, chart[start][end]); - if (derivations == null) - continue; - - // Prune before applying rules to eliminate cruft! - pruneCell(cellsPruned, rhsCat, start, end, derivations); - - for (Derivation deriv : derivations) { - applyRule(start, end, rule, Collections.singletonList(deriv)); - } - } - } - - // Strategy: walk along the input on span (start:end) and traverse the trie - // to get the list of the rules that could apply by matching the RHS. - // start:end: span we're dealing with. - // i: current token position - // node: contains a link to the RHS that could apply. - // children: the derivations that't we're building up. - // numNew: Keep track of number of new derivations created - private void applyNonCatUnaryRules(int start, int end, int i, Trie node, ArrayList children, - IntRef numNew) { - if (node == null) - return; - if (!coarseAllows(node, start, end)) - return; - - if (Parser.opts.verbose >= 5) { - LogInfo.logs("applyNonCatUnaryRules(start=%d, end=%d, i=%d, children=[%s], %s rules)", start, end, i, - Joiner.on(", ").join(children), node.rules.size()); - } - - // Base case: our fencepost has walked to the end of the span, so - // apply the rule on all the children gathered during the walk. - if (i == end) { - Iterator ruleIterator = node.rules.iterator(); - while (ruleIterator.hasNext()) { - Rule rule = ruleIterator.next(); - if (coarseAllows(rule.lhs, start, end)) { - numNew.value += applyRule(start, end, rule, children); - } - } - return; - } - - // Advance terminal token - applyNonCatUnaryRules(start, end, i + 1, node.next(ex.token(i)), children, numNew); - - // Advance non-terminal category - for (int j = i + 1; j <= end; j++) { - for (Map.Entry> entry : chart[i][j].entrySet()) { - Trie nextNode = node.next(entry.getKey()); - for (Derivation arg : entry.getValue()) { - children.add(arg); - applyNonCatUnaryRules(start, end, j, nextNode, children, numNew); - children.remove(children.size() - 1); - if (mode != Mode.full) - break; // Only need one hypothesis - if (numNew.value >= InteractiveBeamParser.opts.maxNewTreesPerSpan) - return; - } - } - } - } - - /* For each span, apply applicable floating rules */ - protected void buildFloating(int start, int end) { - for (Rule rule : parser.grammar.getRules()) { - if (!rule.isFloating() || !coarseAllows(rule.lhs, start, end)) - continue; - - if (rule.rhs.size() == 1) { - /* Apply cat unary rules simply */ - String rhsCat = rule.rhs.get(0); - List derivs = chart[start][end].get(rhsCat); - - if (derivs == null) - continue; - - for (Derivation deriv : derivs) - applyRule(start, end, rule, Collections.singletonList(deriv)); - } else { - /* Apply non-cat unary rules by traversing through the subspans */ - int derivsCreated = 0; - for (int i = start + 1; i < end; i++) { - derivsCreated += applyFloatingRule(rule, start, end, chart[start][i], chart[i][end]); - derivsCreated += applyFloatingRule(rule, start, end, chart[i][end], chart[start][i]); - } - - /* If no derivs created, propagate up */ - if (derivsCreated == 0) { - copyDerivs(chart[start][end - 1], chart[start][end]); - if (start != numTokens - 1) - copyDerivs(chart[start + 1][end], chart[start][end]); - } - } - } - // test prune - Set cellsPruned = new HashSet<>(); - for (Map.Entry> entry : chart[start][end].entrySet()) - pruneCell(cellsPruned, entry.getKey(), start, end, entry.getValue()); - } - - protected int applyFloatingRule(Rule rule, int start, int end, Map> first, - Map> second) { - List derivs1 = first.get(rule.rhs.get(0)); - List derivs2 = second.get(rule.rhs.get(1)); - - if (derivs1 == null || derivs2 == null) - return 0; - - int derivsCreated = 0; - - for (Derivation deriv1 : derivs1) { - for (Derivation deriv2 : derivs2) { - List children = new ArrayList(); - children.add(deriv1); - children.add(deriv2); - derivsCreated += applyRule(start, end, rule, children); - } - } - - return derivsCreated; - } - - protected void copyDerivs(Map> source, Map> dest) { - if (source == null || dest == null) - return; - - for (String cat : source.keySet()) { - List derivations = dest.get(cat); - if (derivations == null) - dest.put(cat, derivations = new ArrayList<>()); - - /* add only if the formula not already present to ensure no duplicates */ - Set formulas = new HashSet(); - for (Derivation deriv : derivations) - formulas.add(deriv.formula); - - for (Derivation deriv : source.get(cat)) { - if (!formulas.contains(deriv.formula)) { - derivations.add(deriv); - formulas.add(deriv.formula); - } - } - } - } - - protected void addDerivs(List source, List dest) { - if (dest == null || source == null) - return; - dest.addAll(source); - } - - // -- Coarse state pruning -- - - // Remove any (cat, start, end) which isn't reachable from the - // (Rule.rootCat, 0, numTokens) - public void keepTopDownReachable() { - if (numTokens == 0) - return; - - Set reachable = new HashSet<>(); - collectReachable(reachable, Rule.rootCat, 0, numTokens); - - // Remove all derivations associated with (cat, start, end) that aren't - // reachable. - for (int start = 0; start < numTokens; start++) { - for (int end = start + 1; end <= numTokens; end++) { - List toRemoveCats = new LinkedList<>(); - for (String cat : chart[start][end].keySet()) { - String key = catStartEndKey(cat, start, end); - if (!reachable.contains(key)) { - toRemoveCats.add(cat); - } - } - Collections.sort(toRemoveCats); - for (String cat : toRemoveCats) { - if (parser.verbose(4)) { - LogInfo.logs("Pruning chart %s(%s,%s)", cat, start, end); - } - chart[start][end].remove(cat); - } - } - } - } - - private void collectReachable(Set reachable, String cat, int start, int end) { - String key = catStartEndKey(cat, start, end); - if (reachable.contains(key)) - return; - - if (!chart[start][end].containsKey(cat)) { - // This should only happen for the root when there are no parses. - return; - } - - reachable.add(key); - for (Derivation deriv : chart[start][end].get(cat)) { - for (Derivation subderiv : deriv.children) { - collectReachable(reachable, subderiv.cat, subderiv.start, subderiv.end); - } - } - } - - private String catStartEndKey(String cat, int start, int end) { - return cat + ":" + start + ":" + end; - } - - // For pruning with the coarse state - protected boolean coarseAllows(Trie node, int start, int end) { - if (coarseState == null) - return true; - return SetUtils.intersects(node.cats, coarseState.chart[start][end].keySet()); - } - - protected boolean coarseAllows(String cat, int start, int end) { - if (coarseState == null) - return true; - return coarseState.chart[start][end].containsKey(cat); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/InteractiveMaster.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/InteractiveMaster.java deleted file mode 100644 index e72f56bf66..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/InteractiveMaster.java +++ /dev/null @@ -1,365 +0,0 @@ -package edu.stanford.nlp.sempre.interactive; - -import java.io.PrintWriter; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.List; -import java.util.stream.Collectors; - -import com.google.common.collect.Lists; - -import edu.stanford.nlp.sempre.interactive.lassie.LassieUtils; - -import edu.stanford.nlp.sempre.Builder; -import edu.stanford.nlp.sempre.ContextValue; -import edu.stanford.nlp.sempre.Derivation; -import edu.stanford.nlp.sempre.Example; -import edu.stanford.nlp.sempre.Formula; -import edu.stanford.nlp.sempre.Formulas; -import edu.stanford.nlp.sempre.Master; -import edu.stanford.nlp.sempre.Params; -import edu.stanford.nlp.sempre.Parser; -import edu.stanford.nlp.sempre.ParserState; -import edu.stanford.nlp.sempre.Rule; -import edu.stanford.nlp.sempre.RuleSource; -import edu.stanford.nlp.sempre.Session; -import edu.stanford.nlp.sempre.Json; -import fig.basic.IOUtils; -import fig.basic.LispTree; -import fig.basic.LogInfo; -import fig.basic.Option; -import fig.basic.Ref; - -/** - * An InteractiveMaster supports interactive commands, and grammar induction - * methods. - */ -public class InteractiveMaster extends Master { - public static class Options { - @Option(gloss = "Write out new grammar rules") - public String intOutputPath; - @Option(gloss = "each session gets a different model with its own parameters") - public boolean independentSessions = false; - @Option(gloss = "number of utterances to return for autocomplete") - public int autocompleteCount = 5; - @Option(gloss = "only allow interactive commands") - public boolean onlyInteractive = false; - - @Option(gloss = "try partial matches") - public boolean useAligner = true; - - @Option(gloss = "use the best formula when no match or not provided") - public int maxSequence = 20; - @Option(gloss = "path to the citations") - public int maxChars = 200; - - @Option(gloss = "allow regular commands specified in Master") - public boolean allowRegularCommands = false; - } - - public static Options opts = new Options(); - - public InteractiveMaster(Builder builder) { - super(builder); - } - - @Override - protected void printHelp() { - // interactive commands - LogInfo.log("Interactive commands"); - LogInfo.log( - " (:def head [[body1,bodyformula1],[body2,bodyformula2]]): provide a definition for the original utterance"); - LogInfo.log(" (:q |utterance|): provide a definition for the original utterance"); - LogInfo.log(" (:accept |formula1| |formula2|): accept any derivation with those corresponding formula"); - LogInfo.log(" (:reject |formula1| |formula2|): reject any derivations with those corresponding formula"); - LogInfo.log("Main commands:"); - super.printHelp(); - } - - @Override - public void runServer() { - InteractiveServer server = new InteractiveServer(this); - server.run(); - } - - @Override - public Response processQuery(Session session, String line) { - LogInfo.begin_track("InteractiveMaster.handleQuery"); - LogInfo.logs("session %s", session.id); - LogInfo.logs("query %s", line); - line = line.trim(); - Response response = new Response(); - if (line.startsWith("(:")) - handleCommand(session, line, response); - else if (line.startsWith("(") && opts.allowRegularCommands) - try { - // might be a command - super.processQuery(session, line); - } catch (Throwable t) { - // might just be an utterance starting with ( - handleCommand(session, String.format("(:q \"%s\")", line), response); - } - else - handleCommand(session, String.format("(:q \"%s\")", line), response); - LogInfo.end_track(); - return response; - } - - void handleCommand(Session session, String line, Response response) { - LispTree tree = LispTree.proto.parseFromString(line); - tree = builder.grammar.applyMacros(tree); - - String command = tree.child(0).value; - QueryStats stats = new QueryStats(response, command); - // Start of interactive commands - if (command.equals(":q")) { - // Create example - String utt = tree.children.get(1).value; - Example ex = exampleFromUtterance(utt, session); - - if (!utteranceAllowed(ex, response)) { - stats.error("utterance_too_expensive"); - // returns with size and error message - return; - } - - builder.parser.parse(builder.params, ex, false); - - stats.size(ex.predDerivations != null ? ex.predDerivations.size() : 0); - stats.status(InteractiveUtils.getParseStatus(ex)); - - LogInfo.logs("parse stats: %s", response.stats); - response.ex = ex; - - ex.logWithoutContext(); - if (ex.predDerivations.size() > 0) { - response.candidateIndex = 0; - printDerivation(response.getDerivation()); - } - - LogInfo.logs("Printing response to socket"); - LassieUtils.printToSocket("LassieLib.sempreResponse := " - + LassieUtils.json2sml(Json.writeValueAsStringHard(InteractiveServer.makeJson(response)))); - - } else if (command.equals(":qdbg")) { - // Create example - String utt = tree.children.get(1).value; - Example ex = exampleFromUtterance(utt, session); - - builder.parser.parse(builder.params, ex, false); - - Derivation.opts.showCat = true; - Derivation.opts.showRules = true; - for (Derivation d : ex.predDerivations) { - response.lines.add(d.toLispTree().toString()); - } - Derivation.opts.showCat = false; - Derivation.opts.showRules = false; - response.ex = ex; - } else if (command.equals(":reject")) { - stats.put("rejectSize", tree.children.size()); - } else if (command.equals(":accept")) { - String utt = tree.children.get(1).value; - List targetFormulas = new ArrayList<>(); - try { - targetFormulas = tree.children.subList(2, tree.children.size()).stream() - .map(t -> Formulas.fromLispTree(LispTree.proto.parseFromString(t.value))).collect(Collectors.toList()); - } catch (Exception e) { - e.printStackTrace(); - response.lines.add("cannot accept formula: "); - } - - Example ex = exampleFromUtterance(utt, session); - response.ex = ex; - - // Parse! - ((InteractiveBeamParser)builder.parser).parseWithoutExecuting(builder.params, ex, false); - - int rank = -1; - Derivation match = null; - for (int i = 0; i < ex.predDerivations.size(); i++) { - Derivation derivi = ex.predDerivations.get(i); - if (targetFormulas.contains(derivi.formula)) { - rank = i; - match = derivi; - break; - } - } - if (rank == -1) { - stats.error("unable to match on accept"); - } - stats.rank(rank); - stats.status(InteractiveUtils.getParseStatus(ex)); - stats.size(ex.predDerivations.size()); - - stats.put("formulas.size", targetFormulas.size()); - stats.put("len_formula", targetFormulas.get(0).toLispTree().toString().length()); - stats.put("len_utterance", ex.utterance.length()); - - if (match != null) { - if (session.isWritingCitation()) { - InteractiveUtils.cite(match, ex); - } - // ex.setTargetValue(match.value); // this is just for logging, not - // actually used for learning - if (session.isLearning()) { - LogInfo.begin_track("Updating parameters"); - learner.onlineLearnExampleByFormula(ex, targetFormulas); - LogInfo.end_track(); - } - } - } else if (command.startsWith(":def")) { - stats.put("type", "def"); // startsWith - if (tree.children.size() == 3) { - String head = tree.children.get(1).value; - String jsonDef = tree.children.get(2).value; - - List inducedRules = new ArrayList<>(); - stats.put("head_len", head.length()); - stats.put("json_len", jsonDef.length()); - try { - inducedRules.addAll(induceRulesHelper(command, head, jsonDef, builder.parser, builder.params, session, - new Ref(response))); - stats.put("num_rules", inducedRules.size()); - } catch (BadInteractionException e) { - stats.put("num_rules", 0); - stats.error(e.getMessage()); - response.lines.add(e.getMessage()); - return; - } - if (inducedRules.size() > 0) { - if (session.isLearning()) { - for (Rule rule : inducedRules) { - InteractiveUtils.addRuleInteractive(rule, builder.parser); - } - stats.put("total_rules", ((InteractiveBeamParser)builder.parser).allRules.size()); - stats.put("total_unicat", ((InteractiveBeamParser)builder.parser).interactiveCatUnaryRules.size()); - } - // TODO : should not have to parse again, I guess just set the formula - // or something - // builder.parser.parse(builder.params, refExHead.value, false); - // write out the grammar - if (session.isWritingGrammar()) { - PrintWriter out = IOUtils - .openOutAppendHard(Paths.get(InteractiveMaster.opts.intOutputPath, "grammar.log.json").toString()); - for (Rule rule : inducedRules) { - out.println(rule.toJson()); - } - out.close(); - } - } else { - LogInfo.logs("No rule induced for head %s", head); - } - } else { - LogInfo.logs("Invalid format for def"); - } - } else if (command.equals(":printInfo")) { - LogInfo.logs("Printing and overriding grammar and parameters..."); - builder.params.write(Paths.get(InteractiveMaster.opts.intOutputPath, "params.params").toString()); - PrintWriter out = IOUtils - .openOutAppendHard(Paths.get(InteractiveMaster.opts.intOutputPath + "grammar.final.json").toString()); - for (Rule rule : builder.grammar.getRules()) { - out.println(rule.toJson()); - } - out.close(); - LogInfo.logs("Done printing and overriding grammar and parameters..."); - } else if (command.equals(":context")) { - if (tree.children.size() == 1) { - LogInfo.logs("%s", session.context); - } else { - session.context = ContextValue - .fromString(String.format("(context (graph NaiveKnowledgeGraph ((string \"%s\") (name b) (name c))))", - tree.children.get(1).toString())); - response.stats.put("context_length", tree.children.get(1).toString().length()); - } - } else { - LogInfo.log("Invalid command: " + tree); - } - LogInfo.log(Json.writeValueAsStringHard(InteractiveServer.makeJson(response))); - } - - private static Example exampleFromUtterance(String utt, Session session) { - Example.Builder b = new Example.Builder(); - b.setId(session.id); - b.setUtterance(utt); - b.setContext(session.context); - Example ex = b.createExample(); - ex.preprocess(); - return ex; - } - - public static List induceRulesHelper(String command, String head, String jsonDef, Parser parser, Params params, - Session session, Ref refResponse) throws BadInteractionException { - Example exHead = exampleFromUtterance(head, session); - LogInfo.logs("head: %s", exHead.getTokens()); - - if (exHead.getTokens() == null || exHead.getTokens().size() == 0) - throw BadInteractionException.headIsEmpty(head); - //isNonsense for Lassie, not used - //if (isNonsense(exHead)) - //throw BadInteractionException.nonSenseDefinition(head); - - InteractiveBeamParserState state = ((InteractiveBeamParser)parser).parseWithoutExecuting(params, exHead, false); - - if (GrammarInducer.getParseStatus(exHead) == GrammarInducer.ParseStatus.Core) - throw BadInteractionException.headIsCore(head); - - LogInfo.logs("num anchored: %d", state.chartList.size()); - List bodyList = InteractiveUtils.utterancefromJson(jsonDef, false); - LogInfo.logs("bodyutterances:\n %s", String.join("\t", bodyList)); - - Derivation bodyDeriv = InteractiveUtils - .combine(InteractiveUtils.derivsfromJson(jsonDef, parser, params, refResponse)); - if (refResponse != null) { - refResponse.value.ex = exHead; - } - - List inducedRules = new ArrayList<>(); - GrammarInducer grammarInducer = new GrammarInducer(exHead.getTokens(), bodyDeriv, state.chartList); - inducedRules.addAll(grammarInducer.getRules()); - - for (Rule rule : inducedRules) { - rule.source = new RuleSource(session.id, head, bodyList); - } - - if (opts.useAligner && bodyList.size() == 1) { - List alignedRules = DefinitionAligner.getRules(exHead.getTokens(), - InteractiveUtils.utterancefromJson(jsonDef, true), bodyDeriv, state.chartList); - for (Rule rule : alignedRules) { - rule.source = new RuleSource(session.id, head, bodyList); - rule.source.align = true; - } - inducedRules.addAll(alignedRules); - } - - exHead.predDerivations = Lists.newArrayList(bodyDeriv); - return inducedRules; - } - - private static boolean isNonsense(Example exHead) { - List tokens = exHead.getTokens(); - if (tokens.size() > 10) - return true; - if (tokens.size() == 0) - return true; - return tokens.stream().anyMatch(s -> s.length() > 15); - } - - private boolean utteranceAllowed(Example ex, Response response) { - if (ex.utterance.length() > opts.maxChars) { - response.lines.add(String.format("refused to execute: too many characters in one command (current: %d, max: %d)", - ex.utterance.length(), opts.maxChars)); - return false; - } - long approxSeq = ex.getLemmaTokens().stream().filter(s -> s.contains(";")).count(); - if (approxSeq >= opts.maxSequence) { - response.lines.add(String.format( - "refused to execute: too many steps in one command -- " - + "consider defining some of steps as one single step. (current: %d, max: %d)", - approxSeq, opts.maxSequence)); - return false; - } - return true; - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/InteractiveServer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/InteractiveServer.java deleted file mode 100644 index 94e1a4b419..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/InteractiveServer.java +++ /dev/null @@ -1,357 +0,0 @@ -package edu.stanford.nlp.sempre.interactive; - -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.io.PrintWriter; -import java.io.UnsupportedEncodingException; -import java.net.HttpCookie; -import java.net.InetSocketAddress; -import java.net.URI; -import java.net.URLDecoder; -import java.net.URLEncoder; -import java.time.LocalDateTime; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicLong; - -import com.google.common.base.Strings; -import com.sun.net.httpserver.Headers; -import com.sun.net.httpserver.HttpExchange; -import com.sun.net.httpserver.HttpHandler; -import com.sun.net.httpserver.HttpServer; - -import edu.stanford.nlp.sempre.Derivation; -import edu.stanford.nlp.sempre.ErrorValue; -import edu.stanford.nlp.sempre.Example; -import edu.stanford.nlp.sempre.Json; -import edu.stanford.nlp.sempre.Master; -import edu.stanford.nlp.sempre.Session; -import edu.stanford.nlp.sempre.StringValue; -import edu.stanford.nlp.sempre.Value; -import fig.basic.IOUtils; -import fig.basic.LogInfo; -import fig.basic.MapUtils; -import fig.basic.Option; - -/** - * JsonServer, interactive learning queries run through this. All the logs are - * handled here. - * - * @author sidaw - */ -public class InteractiveServer { - public static class Options { - @Option - public int port = 8400; - @Option - public int numThreads = 4; - @Option - public int verbose = 1; - @Option - public int maxCandidates = Integer.MAX_VALUE; - @Option - public String queryLogPath = "./int-output/query.log"; - @Option - public String responseLogPath = "./int-output/response.log"; - @Option - public String fullResponseLogPath; - @Option - public int maxExecutionTime = 10; // in seconds - } - - public static Options opts = new Options(); - private static Object queryLogLock = new Object(); - private static Object responseLogLock = new Object(); - private static AtomicLong queryCounter = new AtomicLong(); - Master master; - - class Handler implements HttpHandler { - @Override - public void handle(HttpExchange exchange) { - try { - new ExchangeState(exchange); - } catch (Exception e) { - e.printStackTrace(); - } - } - } - - class ExchangeState { - // Input - HttpExchange exchange; - Map reqParams = new HashMap<>(); - String remoteHost; - - // For header - HttpCookie cookie; - boolean isNewSession; - - // For writing main content - - public ExchangeState(HttpExchange exchange) throws IOException { - this.exchange = exchange; - - URI uri = exchange.getRequestURI(); - this.remoteHost = exchange.getRemoteAddress().getHostName(); - - // Don't use uri.getQuery: it can't distinguish between '+' and '-' - String[] tokens = uri.toString().split("\\?"); - if (tokens.length == 2) { - for (String s : tokens[1].split("&")) { - String[] kv = s.split("=", 2); - try { - String key = URLDecoder.decode(kv[0], "UTF-8"); - String value = URLDecoder.decode(kv[1], "UTF-8"); - // logs("%s => %s", key, value); - reqParams.put(key, value); - } catch (UnsupportedEncodingException e) { - throw new RuntimeException(e); - } - } - } - // do not decode sessionId, keep it filename and lisptree friendly - String sessionId = URLEncoder.encode(MapUtils.get(reqParams, "sessionId", ""), "UTF-8"); - if (sessionId != null) { - isNewSession = false; - } else { - isNewSession = true; - } - - if (opts.verbose >= 2) - logs("GET %s from %s (%ssessionId=%s)", uri, remoteHost, isNewSession ? "new " : "", sessionId); - - String uriPath = uri.getPath(); - if (uriPath.equals("/")) - uriPath += "index.html"; - if (uriPath.equals("/sempre")) { - handleQuery(sessionId); - } else { - // getFile(opts.basePath + uriPath); security - } - exchange.close(); - } - - String getMimeType(String path) { - String[] tokens = path.split("\\."); - String ext = tokens[tokens.length - 1]; - if (ext.equals("html")) - return "text/html"; - if (ext.equals("css")) - return "text/css"; - if (ext.equals("jpeg")) - return "image/jpeg"; - if (ext.equals("gif")) - return "image/gif"; - return "text/plain"; - } - - void setHeaders(String mimeType) throws IOException { - Headers headers = exchange.getResponseHeaders(); - headers.set("Content-Type", mimeType); - headers.set("Access-Control-Allow-Origin", "*"); - if (isNewSession && cookie != null) - headers.set("Set-Cookie", cookie.toString()); - exchange.sendResponseHeaders(200, 0); - } - - // This should be concurrent - Master.Response processQuery(Session session, String query) { - String message = null; - Master.Response response = master.new Response(); - ExecutorService executor = Executors.newSingleThreadExecutor(); - Future future = executor.submit(() -> master.processQuery(session, query)); - long startTime = System.nanoTime(); - try { - // most exceptions should be handled in InteractiveMaster - // so the response can be more specific - response = future.get(opts.maxExecutionTime, TimeUnit.SECONDS); - } catch (Throwable e) { - e.printStackTrace(); - message = e.toString(); - response.lines.add(String.format("Exceeded the maximum allowed time: %ss", opts.maxExecutionTime)); - response.stats.put("uncaught_error", message); - LogInfo.flush(); - LogInfo.resetInfos(); - } finally { - future.cancel(true); - executor.shutdown(); - long endTime = System.nanoTime(); - response.stats.put("walltime", (endTime - startTime) / 1.0e9); - response.stats.put("count", queryCounter.get()); - } - return response; - } - - void handleQuery(String sessionId) throws IOException { - String query = reqParams.get("q"); - long queryNumber = queryCounter.incrementAndGet(); - Session session = master.getSession(sessionId); - session.reqParams = reqParams; - session.remoteHost = remoteHost; - session.format = "json"; - - LocalDateTime queryTime = LocalDateTime.now(); - synchronized (queryLogLock) { // write the query log - Map jsonMap = new LinkedHashMap<>(); - jsonMap.put("count", queryNumber); - jsonMap.put("q", query); - // jsonMap.put("remote", remoteHost); - // jsonMap.put("time", queryTime.toString()); - jsonMap.put("sessionId", sessionId); - reqParams.remove("q"); - jsonMap.putAll(reqParams); - if (session.isLogging()) { - logLine(opts.queryLogPath, Json.writeValueAsStringHard(jsonMap)); - } else { - logLine(opts.queryLogPath + ".sandbox", Json.writeValueAsStringHard(jsonMap)); - } - } - - // If JSON, don't store cookies. - - if (query == null) - query = "null"; - logs("Server.handleQuery %s: %s", session.id, query); - - // Print header - setHeaders("application/json"); - - Master.Response masterResponse = null; - if (query != null) { - masterResponse = processQuery(session, query); - } - - Map responseMap = null; - { - PrintWriter out = new PrintWriter(new OutputStreamWriter(exchange.getResponseBody())); - if (masterResponse != null) { - // Render answer - Example ex = masterResponse.getExample(); - responseMap = makeJson(masterResponse); - out.println(Json.writeValueAsStringHard(responseMap)); - } - out.close(); - } - - synchronized (responseLogLock) { // write the response log log - Map jsonMap = new LinkedHashMap<>(); - LocalDateTime responseTime = LocalDateTime.now(); - // jsonMap.put("responseTime", responseTime.toString()); - jsonMap.put("time", queryTime.toString()); - jsonMap.put("ms", String.format("%.3f", java.time.Duration.between(queryTime, responseTime).toNanos() / 1.0e6)); - jsonMap.put("sessionId", sessionId); - jsonMap.put("q", query); // backwards compatibility... - jsonMap.put("lines", responseMap.get("lines")); - if (session.isLogging()) { - logLine(opts.responseLogPath, Json.writeValueAsStringHard(jsonMap)); - if (!Strings.isNullOrEmpty(opts.fullResponseLogPath)) { - jsonMap.put("candidates", responseMap.get("candidates")); - logLine(opts.fullResponseLogPath, Json.writeValueAsStringHard(jsonMap)); - } - } else { - logLine(opts.responseLogPath + ".sandbox", Json.writeValueAsStringHard(jsonMap)); - if (!Strings.isNullOrEmpty(opts.fullResponseLogPath)) { - jsonMap.put("candidates", responseMap.get("candidates")); - logLine(opts.fullResponseLogPath + ".sandbox", Json.writeValueAsStringHard(jsonMap)); - } - // LogInfo.log(Json.writeValueAsStringHard(jsonMap)); - } - } - } - - void logLine(String path, String line) { - PrintWriter out; - try { - out = IOUtils.openOutAppend(path); - out.println(line); - out.close(); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } - } - - private void logs(String s, Object... args) { - }; - - public InteractiveServer(Master master) { - this.master = master; - } - - public void run() { - try { - String hostname = fig.basic.SysInfoUtils.getHostName(); - HttpServer server = HttpServer.create(new InetSocketAddress(opts.port), 10); - // generous timeout here - ExecutorService pool = new ThreadPoolExecutor(opts.numThreads, opts.numThreads, 120, TimeUnit.SECONDS, - new LinkedBlockingQueue()); - // Executors.newFixedThreadPool(opts.numThreads); - server.createContext("/", new Handler()); - server.setExecutor(pool); - server.start(); - LogInfo.logs("JSON Server (%d threads) started at http://%s:%s/sempre", opts.numThreads, hostname, opts.port); - LogInfo.log("Press Ctrl-D to terminate."); - LogInfo.begin_threads(); - while (LogInfo.stdin.readLine() != null) { - } - LogInfo.log("Shutting down server..."); - server.stop(0); - LogInfo.log("Shutting down executor pool..."); - pool.shutdown(); - LogInfo.end_threads(); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - public static Map makeJson(Master.Response response) { - Map json = new HashMap(); - json.put("stats", response.stats); - - if (response.lines != null) { - json.put("lines", response.lines); - } - if (response.getExample() != null) { - List items = new ArrayList(); - json.put("candidates", items); - List allCandidates = response.getExample().getPredDerivations(); - Derivation.sortByScore(allCandidates); - if (allCandidates != null) { - if (allCandidates.size() >= InteractiveServer.opts.maxCandidates) { - response.lines.add(String.format("Exceeded max options: (current: %d / max: %d) ", allCandidates.size(), - InteractiveServer.opts.maxCandidates)); - allCandidates = allCandidates.subList(0, InteractiveServer.opts.maxCandidates); - } - - for (Derivation deriv : allCandidates) { - Map item = new HashMap(); - Value value = deriv.getValue(); - if (value instanceof StringValue) - item.put("value", ((StringValue) value).value); - else if (value instanceof ErrorValue) - item.put("value", ((ErrorValue) value).sortString()); - else if (value != null) - item.put("value", value.sortString()); - else - item.put("value", "[[]]"); - item.put("score", deriv.getScore()); - item.put("prob", deriv.getProb()); - item.put("anchored", deriv.allAnchored); // used only anchored rules - item.put("formula", deriv.formula.toLispTree().toString()); - items.add(item); - } - } - } - return json; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/InteractiveUtils.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/InteractiveUtils.java deleted file mode 100644 index 944a505d73..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/InteractiveUtils.java +++ /dev/null @@ -1,199 +0,0 @@ -package edu.stanford.nlp.sempre.interactive; - -import java.util.ArrayList; -import java.util.List; -import java.util.stream.Collectors; - -import org.testng.collections.Lists; - -import com.google.common.collect.ImmutableList; - -import edu.stanford.nlp.sempre.ActionFormula; -import edu.stanford.nlp.sempre.BeamParser; -import edu.stanford.nlp.sempre.Derivation; -import edu.stanford.nlp.sempre.Example; -import edu.stanford.nlp.sempre.Formula; -import edu.stanford.nlp.sempre.Formulas; -import edu.stanford.nlp.sempre.IdentityFn; -import edu.stanford.nlp.sempre.Json; -import edu.stanford.nlp.sempre.Master; -import edu.stanford.nlp.sempre.Params; -import edu.stanford.nlp.sempre.Parser; -import edu.stanford.nlp.sempre.Rule; -import edu.stanford.nlp.sempre.SemanticFn; -import fig.basic.LispTree; -import fig.basic.LogInfo; -import fig.basic.Option; -import fig.basic.Ref; - -/** - * Utilities for interactive learning - * - * @author sidaw - */ -public final class InteractiveUtils { - public static class Options { - @Option(gloss = "use the best formula when no match or not provided") - public boolean useBestFormula = false; - - @Option(gloss = "path to the citations") - public String citationPath; - - @Option(gloss = "verbose") - public int verbose = 0; - } - - public static Options opts = new Options(); - - private InteractiveUtils() { - } - - // dont spam my log when reading things in the beginning... - public static boolean fakeLog = false; - - public static Derivation stripDerivation(Derivation deriv) { - while (deriv.rule.sem instanceof IdentityFn) { - deriv = deriv.child(0); - } - return deriv; - } - - public static Derivation stripBlock(Derivation deriv) { - if (opts.verbose > 0) - LogInfo.logs("StripBlock %s %s %s", deriv, deriv.rule, deriv.cat); - while ((deriv.rule.sem instanceof BlockFn || deriv.rule.sem instanceof IdentityFn) && deriv.children.size() == 1) { - deriv = deriv.child(0); - } - return deriv; - } - - public static List derivsfromJson(String jsonDef, Parser parser, Params params, - Ref refResponse) { - @SuppressWarnings("unchecked") - List body = Json.readValueHard(jsonDef, List.class); - // string together the body definition - List allDerivs = new ArrayList<>(); - int numFailed = 0; - for (Object obj : body) { - @SuppressWarnings("unchecked") - List pair = (List) obj; - String utt = pair.get(0); - String formula = pair.get(1); - - if (formula.equals("()")) { - LogInfo.logs("Error: Got empty formula"); - continue; - } - - Example.Builder b = new Example.Builder(); - // b.setId("session:" + sessionId); - b.setUtterance(utt); - Example ex = b.createExample(); - ex.preprocess(); - - LogInfo.logs("Parsing body: %s", ex.utterance); - ((InteractiveBeamParser)parser).parseWithoutExecuting(params, ex, false); - - boolean found = false; - Formula targetFormula = Formulas.fromLispTree(LispTree.proto.parseFromString(formula)); - for (Derivation d : ex.predDerivations) { - // LogInfo.logs("considering: %s", d.formula.toString()); - if (d.formula.equals(targetFormula)) { - found = true; - allDerivs.add(stripDerivation(d)); - break; - } - } - if (!found && !formula.equals("?")) { - LogInfo.errors("matching formula not found: %s :: %s", utt, formula); - numFailed++; - } - // just making testing easier, use top derivation when we formula is not - // given - if (!found && ex.predDerivations.size() > 0 && (formula.equals("?") || formula == null || opts.useBestFormula)) - allDerivs.add(stripDerivation(ex.predDerivations.get(0))); - else if (!found) { - Derivation res = new Derivation.Builder().formula(targetFormula) - // setting start to -1 is important, - // which grammarInducer interprets to mean we do not want partial - // rules - .withCallable(new SemanticFn.CallInfo("$ROOT", -1, -1, null, new ArrayList<>())).createDerivation(); - allDerivs.add(res); - } - } - if (refResponse != null) { - refResponse.value.stats.put("num_failed", numFailed); - refResponse.value.stats.put("num_body", body.size()); - } - // LogInfo.logs("returning deriv list %s, \n %s", allDerivs.toString(), - // jsonDef); - return allDerivs; - } - - public static List utterancefromJson(String jsonDef, boolean tokenize) { - @SuppressWarnings("unchecked") - List body = Json.readValueHard(jsonDef, List.class); - // string together the body definition - List utts = new ArrayList<>(); - for (int i = 0; i < body.size(); i++) { - Object obj = body.get(i); - @SuppressWarnings("unchecked") - List pair = (List) obj; - String utt = pair.get(0); - - Example.Builder b = new Example.Builder(); - // b.setId("session:" + sessionId); - b.setUtterance(utt); - Example ex = b.createExample(); - ex.preprocess(); - - if (tokenize) { - utts.addAll(ex.getTokens()); - if (i != body.size() - 1 && !utts.get(utts.size() - 1).equals(";")) - utts.add(";"); - } else { - utts.add(String.join(" ", ex.getTokens())); - } - - } - return utts; - } - - public static synchronized void addRuleInteractive(Rule rule, Parser parser) { - LogInfo.logs("addRuleInteractive: %s", rule); - if (parser instanceof InteractiveBeamParser) { - parser.addRule(rule); - } else { - throw new RuntimeException("interactively adding rule not supported for paser " + parser.getClass().toString()); - } - } - - static Rule blockRule(ActionFormula.Mode mode) { - BlockFn b = new BlockFn(mode); - b.init(LispTree.proto.parseFromString("(BlockFn sequential)")); - return new Rule("$ROOT", Lists.newArrayList("$ROOT", "$ROOT"), b); - } - - public static Derivation combine(List children) { - ActionFormula.Mode mode = ActionFormula.Mode.sequential; - if (children.size() == 1) { - return children.get(0); - } - Formula f = new ActionFormula(mode, children.stream().map(d -> d.formula).collect(Collectors.toList())); - Derivation res = new Derivation.Builder().formula(f) - // setting start to -1 is important, - // which grammarInducer interprets to mean we do not want partial rules - .withCallable(new SemanticFn.CallInfo("$ROOT", -1, -1, blockRule(mode), ImmutableList.copyOf(children))) - .createDerivation(); - return res; - } - - public static String getParseStatus(Example ex) { - return GrammarInducer.getParseStatus(ex).toString(); - } - - public static void cite(Derivation match, Example ex) { - CitationTracker tracker = new CitationTracker(ex.id, ex); - tracker.citeAll(match); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/Item.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/Item.java deleted file mode 100644 index 2265162680..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/Item.java +++ /dev/null @@ -1,16 +0,0 @@ -package edu.stanford.nlp.sempre.interactive; - -import java.util.Set; - -// Individual items with some properties -public abstract class Item { - public Set names; - - public abstract boolean selected(); // explicit global selection - - public abstract void select(boolean sel); - - public abstract void update(String rel, Object value); - - public abstract Object get(String rel); -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/QueryStats.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/QueryStats.java deleted file mode 100644 index b9b1507d1d..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/QueryStats.java +++ /dev/null @@ -1,41 +0,0 @@ -package edu.stanford.nlp.sempre.interactive; - -import edu.stanford.nlp.sempre.Master; - -public class QueryStats { - Master.Response response; - QueryType type; - - public enum QueryType { - q, def, accept, reject, other - }; - - public QueryStats(Master.Response response) { - this.response = response; - } - - public QueryStats(Master.Response response, String command) { - this.response = response; - put("type", command.substring(1)); - } - - public void put(String k, Object v) { - response.stats.put(k, v); - } - - public void size(int num) { - put("size", num); - } - - public void status(String status) { - put("status", status); - } - - public void rank(int r) { - put("rank", r); - } - - public void error(String msg) { - put("error", msg); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/SimulationAnalyzer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/SimulationAnalyzer.java deleted file mode 100644 index 804bd13c3a..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/SimulationAnalyzer.java +++ /dev/null @@ -1,90 +0,0 @@ -package edu.stanford.nlp.sempre.interactive; - -import java.io.PrintWriter; -import java.util.LinkedHashMap; -import java.util.Map; -import java.util.Map.Entry; - -import edu.stanford.nlp.sempre.Json; -import fig.basic.Evaluation; -import fig.basic.IOUtils; -import fig.basic.LogInfo; -import fig.exec.Execution; - -public class SimulationAnalyzer { - @SuppressWarnings("unchecked") - public static Map getStats(String jsonResponse) { - Map response = Json.readMapHard(jsonResponse); - if (response.containsKey("stats")) - return (Map) response.get("stats"); - return null; - } - - static Evaluation qEval = new Evaluation(); - static Evaluation acceptEval = new Evaluation(); - static Evaluation dEval = new Evaluation(); - static int queryCount = 0; - - // add stats to the query. - public synchronized static void addStats(Map query, String jsonResponse) { - Map stats = getStats(jsonResponse); - Map line = new LinkedHashMap(query); - LogInfo.logs("stats: %s", stats); - if (stats == null) { - LogInfo.logs("No stats"); - LogInfo.log(query); - LogInfo.log(jsonResponse); - return; - } - - // make sure no key conflict - for (Entry entry : stats.entrySet()) { - line.put("stats." + entry.getKey(), entry.getValue()); - } - line.put("queryCount", ++queryCount); - PrintWriter infoFile = IOUtils.openOutAppendHard(Execution.getFile("plotInfo.json")); - infoFile.println(Json.writeValueAsStringHard(line)); - infoFile.close(); - - if (!stats.containsKey("type")) - return; - - if (stats.get("type").equals("def") && !stats.containsKey("error")) { - qEval.add("def.head_len", (Integer) stats.get("head_len")); - qEval.add("def.json_len", (Integer) stats.get("json_len")); - qEval.add("def.num_failed", (Integer) stats.get("num_failed")); - qEval.add("def.num_body", (Integer) stats.get("num_body")); - qEval.add("def.num_rules", (Integer) stats.get("num_rules")); - qEval.add("def.time", (Integer) stats.get("count")); - } - - if (stats.get("type").equals("q") && !stats.containsKey("error")) { - GrammarInducer.ParseStatus status = GrammarInducer.ParseStatus.fromString(stats.get("status").toString()); - int size = (Integer) stats.get("size"); - qEval.add("q.size", size); - qEval.add("q.isCore", status == GrammarInducer.ParseStatus.Core); - qEval.add("q.isInduced", status == GrammarInducer.ParseStatus.Induced); - } - - if (stats.get("type").equals("accept") && !stats.containsKey("error")) { - GrammarInducer.ParseStatus status = GrammarInducer.ParseStatus.fromString(stats.get("status").toString()); - int size = (Integer) stats.get("size"); - int rank = (Integer) stats.get("rank"); - acceptEval.add("size", size); - if (rank != -1) - acceptEval.add("rank", rank); - - acceptEval.add("isCore", status == GrammarInducer.ParseStatus.Core); - acceptEval.add("isInduced", status == GrammarInducer.ParseStatus.Induced); - } - } - - public synchronized static void flush() { - // TODO Auto-generated method stub - qEval.logStats("q"); - qEval.putOutput("q"); - - acceptEval.logStats("accept"); - acceptEval.putOutput("accept"); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/Simulator.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/Simulator.java deleted file mode 100644 index fce84018c7..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/Simulator.java +++ /dev/null @@ -1,238 +0,0 @@ -package edu.stanford.nlp.sempre.interactive; - -import java.io.BufferedInputStream; -import java.io.BufferedReader; -import java.io.Closeable; -import java.io.DataOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.io.UncheckedIOException; -import java.io.UnsupportedEncodingException; -import java.net.HttpURLConnection; -import java.net.URL; -import java.net.URLEncoder; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; -import java.util.stream.Collectors; -import java.util.stream.Stream; -import java.util.zip.GZIPInputStream; - -import org.testng.collections.Lists; - -import edu.stanford.nlp.sempre.Json; -import fig.basic.LogInfo; -import fig.basic.Option; -import fig.basic.OptionsParser; -import fig.exec.Execution; - -/** - * utilites for simulating a session through the server - * - * @author sidaw - */ - -class GZIPFiles { - /** - * Get a lazily loaded stream of lines from a gzipped file, similar to - * {@link Files#lines(java.nio.file.Path)}. - * - * @param path - * The path to the gzipped file. - * @return stream with lines. - */ - public static Stream lines(Path path) { - InputStream fileIs = null; - BufferedInputStream bufferedIs = null; - GZIPInputStream gzipIs = null; - try { - fileIs = Files.newInputStream(path); - // Even though GZIPInputStream has a buffer it reads individual bytes - // when processing the header, better add a buffer in-between - bufferedIs = new BufferedInputStream(fileIs, 65535); - gzipIs = new GZIPInputStream(bufferedIs); - } catch (IOException e) { - closeSafely(gzipIs); - closeSafely(bufferedIs); - closeSafely(fileIs); - throw new UncheckedIOException(e); - } - BufferedReader reader = new BufferedReader(new InputStreamReader(gzipIs)); - return reader.lines().onClose(() -> closeSafely(reader)); - } - - private static void closeSafely(Closeable closeable) { - if (closeable != null) { - try { - closeable.close(); - } catch (IOException e) { - // Ignore - } - } - } -} - -public class Simulator implements Runnable { - - @Option - public static String serverURL = "http://localhost:8410"; - @Option - public static int numThreads = 1; - @Option - public static int verbose = 1; - @Option - public static boolean useThreads = false; - @Option - public static long maxQueries = Long.MAX_VALUE; - @Option - public static String reqParams = "grammar=0&cite=0&learn=0"; - @Option - public static List logFiles = null; - - public void readQueries() { - // T.printAllRules(); - // A.assertAll(); - for (String fileName : logFiles) { - long startTime = System.nanoTime(); - Stream stream; - try { - if (fileName.endsWith(".gz")) - stream = GZIPFiles.lines(Paths.get(fileName)); - else - stream = Files.lines(Paths.get(fileName)); - - List lines = stream.collect(Collectors.toList()); - LogInfo.logs("Reading %s (%d lines)", fileName, lines.size()); - int numLinesRead = 0; - // ExecutorService executor = new ThreadPoolExecutor(numThreads, - // numThreads, - // 15000, TimeUnit.MILLISECONDS, - // new LinkedBlockingQueue()); - ExecutorService executor = Executors.newSingleThreadExecutor(); - - for (String l : lines) { - numLinesRead++; - if (numLinesRead > maxQueries) - break; - LogInfo.logs("Line %d", numLinesRead); - if (!useThreads) { - executeLine(l); - } else { - Future future = executor.submit(() -> executeLine(l)); - try { - future.get(10, TimeUnit.MINUTES); - } catch (Throwable t) { - t.printStackTrace(); - } finally { - future.cancel(true); // may or may not desire this - long endTime = System.nanoTime(); - LogInfo.logs("Took %d ns or %.4f s", (endTime - startTime), (endTime - startTime) / 1.0e9); - } - } - } - } catch (IOException e) { - e.printStackTrace(); - } - - } - SimulationAnalyzer.flush(); - } - - static void executeLine(String l) { - Map json = null; - try { - json = Json.readMapHard(l); - } catch (RuntimeException e) { - LogInfo.logs("Json cannot be read from %s: %s", l, e.toString()); - return; - } - Object command = json.get("q"); - if (command == null) // to be backwards compatible - command = json.get("log"); - Object sessionId = json.get("sessionId"); - if (sessionId == null) // to be backwards compatible - sessionId = json.get("id"); - - try { - String response = sempreQuery(command.toString(), sessionId.toString()); - SimulationAnalyzer.addStats(json, response); - } catch (Throwable t) { - t.printStackTrace(); - } - } - - public static String sempreQuery(String query, String sessionId) throws UnsupportedEncodingException { - String params = "q=" + URLEncoder.encode(query, "UTF-8"); - params += String.format("&sessionId=%s&%s", sessionId, reqParams); - // params = URLEncoder.encode(params); - String url = String.format("%s/sempre?", serverURL); - // LogInfo.log(params); - // LogInfo.log(query); - String response = executePost(url + params, ""); - // LogInfo.log(response); - return response; - } - - public static String executePost(String targetURL, String urlParameters) { - HttpURLConnection connection = null; - - try { - // Create connection - URL url = new URL(targetURL); - connection = (HttpURLConnection) url.openConnection(); - connection.setRequestMethod("POST"); - connection.setRequestProperty("Content-Type", "application/x-www-form-urlencoded"); - - connection.setRequestProperty("Content-Length", Integer.toString(urlParameters.getBytes().length)); - connection.setRequestProperty("Content-Language", "en-US"); - - connection.setUseCaches(false); - connection.setDoOutput(true); - - // Send request - DataOutputStream wr = new DataOutputStream(connection.getOutputStream()); - wr.writeBytes(urlParameters); - wr.close(); - - // Get Response - InputStream is = connection.getInputStream(); - BufferedReader rd = new BufferedReader(new InputStreamReader(is)); - StringBuilder response = new StringBuilder(); // or StringBuffer if Java - // version 5+ - String line; - while ((line = rd.readLine()) != null) { - response.append(line); - response.append('\r'); - } - rd.close(); - return response.toString(); - } catch (Exception e) { - e.printStackTrace(); - return null; - } finally { - if (connection != null) { - connection.disconnect(); - } - } - } - - public static void main(String[] args) { - OptionsParser parser = new OptionsParser(); - Simulator simulator = new Simulator(); - // parser.register("", opts); - Execution.run(args, "Simulator", simulator, parser); - } - - @Override - public void run() { - LogInfo.logs("setting numThreads %d", numThreads); - readQueries(); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/World.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/World.java deleted file mode 100644 index e9a27d784e..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/World.java +++ /dev/null @@ -1,81 +0,0 @@ -package edu.stanford.nlp.sempre.interactive; - -import java.util.HashSet; -import java.util.Set; -import java.util.Collections; - -import edu.stanford.nlp.sempre.ContextValue; -import edu.stanford.nlp.sempre.interactive.lassie.TacticWorld; - -/** - * The world consists of Items, and tracks allItems: the whole world selected: - * the set of items in focus, usually, but not necessarily a subset of allItems - * previous: previously selected items, to handle more without variables - * implementation: voxelurn.VoxelWorld - * - * @author sidaw - **/ -public abstract class World { - // supports variables, and perhaps scoping - public Set allItems; - public Set selected; - public Set previous; - - //public static World fromContext(String worldname, ContextValue context) { - // if (worldname.equals("VoxelWorld")) - // return VoxelWorld.fromContext(context); - // throw new RuntimeException("World does not exist: " + worldname); - //} - - // there are some annoying issues with mutable objects. - // The current strategy is to keep allitems up to date on each mutable - // operation - public abstract String toJSON(); - - public abstract Set has(String rel, Set values); - - public abstract Set get(String rel, Set subset); - - public abstract void update(String rel, Object value, Set selected); - - public abstract void merge(); - // public abstract void select(Set set); - - public World() { - this.allItems = new HashSet<>(); - this.selected = new HashSet<>(); - this.previous = new HashSet<>(); - } - - // general actions, flatness means these actions can be performed on allitems - public void remove(Set selected) { - allItems = new HashSet<>(allItems); - allItems.removeAll(selected); - // this.selected.removeAll(selected); - } - - // it is bad to ever mutate select, which will break scoping - public void select(Set set) { - this.selected = set; - } - - public void noop() { - } - - public Set selected() { - return this.selected; - } - - public Set previous() { - return this.previous; - } - - public Set all() { - return allItems; - } - - public Set empty() { - return new HashSet<>(); - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/lassie/ChoiceFn.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/lassie/ChoiceFn.java deleted file mode 100644 index 8fa148b2bf..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/lassie/ChoiceFn.java +++ /dev/null @@ -1,85 +0,0 @@ -package edu.stanford.nlp.sempre.interactive.lassie; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.interactive.lassie.LassieUtils; -import edu.stanford.nlp.sempre.interactive.lassie.HOLOntology; - -import fig.basic.LispTree; -import fig.basic.LogInfo; -import fig.basic.Option; - -import java.util.List; -import java.util.LinkedList; -import java.util.Arrays; - -/** - * Given a set, returns an element of that set. Kills derivations which - * execute to empty sets. Sets which execute to something bigger than a - * singleton trigger a warning, written to the socket file, which - * locates and describes the ambiguity. In the case that SEMPRE returns - * no derivations, Lassie can use this information to describe the cause - * of missing derivations. The grammar rule using this SemanticFn might - * look like - * - * (rule $MyType ($MyTypeCandidates) (ChoiceFn)) - * - * where $MyTypeCandidates is a call formula returning a - * StringValue. $MyType will be a StringValue as well. - */ - -public class ChoiceFn extends SemanticFn { - public static class Options { - @Option(gloss = "Verbose") public int verbose = 0; - } - public static Options opts = new Options(); - - Formula formula; - String[] elements; - - public ChoiceFn() { } - - public ChoiceFn(Formula formula) { - this.formula = formula; - } - - // Get the string in the uttrance which is at the origin of this derivation - public static String getUttString(Callable c) { - if (c.getChildren().size() == 0) return ((ValueFormula) ((Derivation) c).formula).value.pureString(); - else { - String uttString = getUttString(c.child(0)); - for (int i = 1; i < c.getChildren().size(); i++) - uttString = uttString + " " + getUttString(c.child(i)); - return uttString; - } - } - - public DerivationStream call(final Example ex, final Callable c) { - Executor executor = new JavaExecutor(); - // c.child(0).printDerivationRecursively(); - if (this.formula == null) - this.formula = c.child(0).formula; - String candidates = executor.execute(this.formula, ex.context).value.pureString(); - elements = candidates.split(","); // representation of set is as a string (comma-separated) - if (elements.length > 1) { - LassieUtils.printToSocket("Lassie.AMBIGUITY_WARNING := SOME {set= " - + "[\"" + candidates.replace(",","\",\"") + "\"], " - + "span= \"" + getUttString((CallInfo) c) + "\"}"); - elements = new String[0]; - } - return new MultipleDerivationStream() { - private boolean chosen = false; - @Override - public Derivation createDerivation() { - if (elements.length == 0 || elements[0].equals("") || chosen) return null; - else { - Derivation res = new Derivation.Builder() - .withCallable(c) - .formula(formula) - .createDerivation(); - chosen = true; - return res; - } - } - }; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/lassie/Component.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/lassie/Component.java deleted file mode 100644 index 469be89135..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/lassie/Component.java +++ /dev/null @@ -1,35 +0,0 @@ -package edu.stanford.nlp.sempre.interactive.lassie; - -import java.util.Set; -import java.util.HashSet; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; - -import edu.stanford.nlp.sempre.interactive.Item; - - -// Individual items with some properties -public class Component extends Item { - public Set names; - public Map> features; - - public Component(Set names, Map> features) { - this.names = names; - this.features = features; - } - - public Component(String name, Map> features) { - this.names = new HashSet(); - names.add(name); - this.features = features; - } - - public boolean selected() { return false; } // explicit global selection - public void select(boolean sel) {}; - public void update(String rel, Object value) {}; - - public Object get(String feature) { - return this.features.get(feature); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/lassie/HOLOntology.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/lassie/HOLOntology.java deleted file mode 100644 index 56a6fc18b2..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/lassie/HOLOntology.java +++ /dev/null @@ -1,185 +0,0 @@ -package edu.stanford.nlp.sempre.interactive.lassie; - -import java.io.IOException; -import java.io.PrintWriter; - -import java.util.Arrays; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedHashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.function.Function; -import java.util.stream.Collectors; - -import com.google.common.collect.Sets; - -import edu.stanford.nlp.sempre.ContextValue; -import edu.stanford.nlp.sempre.Json; -import edu.stanford.nlp.sempre.NaiveKnowledgeGraph; -import edu.stanford.nlp.sempre.StringValue; -import edu.stanford.nlp.sempre.interactive.Item; -import edu.stanford.nlp.sempre.interactive.World; - -import fig.basic.IOUtils; -import fig.basic.LogInfo; -import fig.basic.Option; - -import edu.stanford.nlp.sempre.interactive.lassie.Component; - -// Lassie's knowledge of HOL components -public class HOLOntology { - - public static class Options { - @Option(gloss = "Path to database file, contains components and their features") - public String dbPath = null; - @Option(gloss = "Path to lexicon file, temporary interface to inform SimpleLexiconFn of db") - public String lexPath = null; - // @Option(gloss = "Path to seed grammar, to be instantiated to all types") - // public String seedGrammarPath = null; - // @Option(gloss = "Path to generated grammar, result of generation from seed") - // public String genGrammarPath = null; - } - public static Options opts = new Options(); - - public Map> entities; // component -> features - public Map> features; // feature -> components - - private static HOLOntology theOntology; - public static HOLOntology getTheOntology() { - if (theOntology == null) theOntology = new HOLOntology(); - return theOntology; - } - - @SuppressWarnings("unchecked") - private HOLOntology() { - this.entities = new HashMap>(); - this.features = new HashMap>(); - readDB(); - writeLexicon(); - } - - private void logLine(String path, String line) { - PrintWriter out; - try { - out = IOUtils.openOutAppend(path); - out.println(line); - out.close(); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } - - private void insert(String component, Set features) { - for (String feat : features) { - //LogInfo.logs("inserting: %s, %s, %s", component, attribute, feat); - insert(component, feat); - } - } - - private void insert(String component, String feature) { - // From components to their features - this.entities.putIfAbsent(component, new HashSet()); - Set componentFeatures = this.entities.get(component); - componentFeatures.add(feature); - this.entities.put(component, componentFeatures); - // From features to components which possess them - this.features.putIfAbsent(feature, new HashSet()); - Set components = this.features.get(feature); - components.add(component); - //LogInfo.logs(" adding: %s :: %s", feature, component); - this.features.put(feature, components); - } - - private void readDB() { - LogInfo.begin_track("HOLOntology.readEntities: %s", opts.dbPath); - // Load up from database - for (String line : IOUtils.readLinesHard(opts.dbPath)) { - //LogInfo.logs("Processing line: %s", line); - if (line.startsWith("#")) continue; // Skip comment lines - String[] statements = line.split(",\\s*"); - String[] tokens = statements[0].split("\\s+"); - if (tokens.length == 0 || tokens[0].equals("")) continue; // Skip empty lines - // We expect triplets (at least), e.g. "POW_2 name power" - if (tokens.length >= 3) { - String component = tokens[0]; - String attribute = tokens[1]; - String feature = attribute + "." + tokens[2]; - for (int i = 3; i < tokens.length; i++) - feature = feature + " " + tokens[i]; - Set moreFeatures = new HashSet(); - moreFeatures.add(feature); - for (int i = 1; i < statements.length; i++) - moreFeatures.add(attribute + "." + statements[i].replaceAll("\\s+", " ")); - insert(component, moreFeatures); - } else { - continue; // Skip lines with missing information - } - } - LogInfo.end_track(); - } - - private String typeOf(String c) { - for (String f : entities.get(c)) - if (f.startsWith("type.")) - return f.replace(" ","").replace("(","[").replace(")","]"); - throw new RuntimeException("Cannot find type: " + c); - } - - private String suffix(String f) { - try { - return f.substring(f.lastIndexOf('.') + 1, f.length()); - } catch (Exception e) { - throw new RuntimeException("Bad string: " + f); - } - } - - private String prefix(String f) { - try { - return f.substring(0, f.lastIndexOf('.')); - } catch (Exception e) { - throw new RuntimeException("Bad string: " + f); - } - } - - private String quot(String s) { return "\"" + s + "\""; } - - private void writeLexicon() { - try { - PrintWriter writer = new PrintWriter(opts.lexPath, "UTF-8"); - // Components (literal) - for (String c : this.entities.keySet()) { - Map jsonMap = new LinkedHashMap<>(); - jsonMap.put("lexeme", c); - jsonMap.put("formula", quot(c)); // force Formula to StringFormula in the Lisp interpreter - jsonMap.put("type", quot(suffix(typeOf(c)))); - writer.println(Json.writeValueAsStringHard(jsonMap)); - } - // Features - for (String f : this.features.keySet()) { - Map jsonMap = new LinkedHashMap<>(); - jsonMap.put("lexeme", suffix(f)); - jsonMap.put("formula", quot(f)); // may contain spaces, force Formula to StringFormula - jsonMap.put("type", quot(prefix(f))); - writer.println(Json.writeValueAsStringHard(jsonMap)); - } - writer.close(); - } catch (IOException e) { - throw new RuntimeException("Error writing to file " + opts.lexPath); - } - } - - // private void generateGrammar() { - // Set types = new HashSet(); - // for (String f : this.features) - // if (f.startsWith("type.")) - // types.add(f); - // String monotypeRules = - // for (String t : types) { - // } - // } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/lassie/LassieUtils.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/lassie/LassieUtils.java deleted file mode 100644 index eba744e2ee..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/interactive/lassie/LassieUtils.java +++ /dev/null @@ -1,66 +0,0 @@ -package edu.stanford.nlp.sempre.interactive.lassie; - -import com.google.common.base.Joiner; -import com.google.common.base.Strings; -import com.google.common.collect.Lists; - -import com.fasterxml.jackson.core.JsonProcessingException; - -import edu.stanford.nlp.sempre.*; -import fig.basic.*; -import java.util.*; -import java.io.*; - - -public class LassieUtils{ - - public static void printToSocket(String string) { - try (PrintWriter writer = new PrintWriter(new FileOutputStream (new File("interactive/sempre-out-socket.sml"), true))) { - writer.println("val _ = " + string + "\n"); - writer.close(); - } catch (IOException ex) { - System.err.println("Error writing to file interactive/sempre-out-socket.sml"); - } - } - - // Rudimentary translation of a json object into an SML record - public static String json2sml(String string) { - // dependent on knowing the fields in advance - String[] fields = {"score", "prob", "formula", //"candidates", "anchored", "formula", - "size", "status"}; //"stats", "lines"}; - String orig = string; - // unquote fields; subsitute `:` for `=` - for (String field : fields) { - string = string.replace("\"" + field + "\":", field + "= "); - } - - //string = string - // .replace("\"type\":", "cmd= ") // avoid reserved keywords of SML - // .replace("\"NaN\"", "~1.0") // force types of fields - // .replaceAll("\"anchored\":true,","") - // .replaceAll("\"anchored\":false,","") - // .replaceAll("\"candidates\":\\[(.*?)\\]","\\[ $1 \\]") - // .replaceAll("\"stats\":\\{(.*?)\\},","") - // .replaceAll(",\"lines\":\\[(.*?)\\]","") - // .replaceAll("\"value\":\"Tactic (.*?)\"", "value = \"Tactic $1\",result = Tactic ($1)") - // .replaceAll("\"value\":\"Command (.*?)\"", "value = \"Command $1\",result = Command ($1)") - // .replaceAll("\"value\":\"(.*?)\"", "value = \"$1\",result = Tactic ($1)") - // .replaceAll("\\{\\[(.*?)\\]\\}","\\[$1\\]"); - - //.replaceAll("\"value\":\"(.*?)\"","value= \"$1\",tactic= $1"); // cast the value as a tactic - - // escape backslashes in strings - // (we could do more fancy escaping, but quotes are already converted earlier by - // sempre and other characters requiring escaping are not expected to appear here) - String[] substrings = string.split("(?- " + parens(tac2); - } - public static String cons(String hd, String tl) { - if (hd.equals("") || tl.equals("")) return ""; - return hd + " , " + tl; - } - public static String list(String seq) { - if (seq.equals("")) return ""; - return "[ " + seq + " ]"; - } - public static String quote(String exp) { - if (exp.equals("")) return ""; - return "TERMSTART " + exp + " TERMEND"; - } - public static String parens(String exp) { - if (exp.equals("")) return ""; - return "( " + exp + " )"; - } - public static String op(String operator, String arg1, String arg2) { - if (operator.equals("") || arg1.equals("") || arg2.equals("")) return ""; - return arg1 + " " + operator + " " + arg2; - } - - public static String goalInt(String num) { - return "INTGOAL" + " " + num; - } - - public static String goalTerm(String tm) { - return "TERMGOAL" + " " + tm; - } - - public static Set fromFeature(String f) { - HOLOntology ontology = HOLOntology.getTheOntology(); - if (f.equals("top")) return ontology.entities.keySet(); - else if (f.equals("bot")) return new HashSet(); - else if (ontology.features.containsKey(f)) return ontology.features.get(f); - else throw new RuntimeException("Feature not recognized: " + f); - } - - // Set operations - public static Set intersect(Set s1, Set s2) { - return s1.stream().filter(i -> s2.contains(i)).collect(Collectors.toSet()); - } - - public static String set2string(Set s) { - return String.join(",", s); - } - - // Semantic side helper of ChoiceFn - // returns - public static String choice(Set s) { - if (s.size() > 1) { - - // Abduce simplest answer if its features are a subset of every other candidate's features - // (i.e. abduce if there is no disambiguation possible) - HOLOntology ontology = HOLOntology.getTheOntology(); - String smallest = "TOP_TACTIC"; - int smallestSize = Integer.MAX_VALUE; - for (String e : s) - if (ontology.entities.get(e).size() < smallestSize) { - smallest = e; - smallestSize = ontology.entities.get(e).size(); - } - boolean abduceable = true; - for (String e : s) - if (!ontology.entities.get(e).containsAll(ontology.entities - .get(smallest) - .stream() // (not required to share name) - .filter(x -> !x.startsWith("name")) - .collect(Collectors.toSet()))) - abduceable = false; - - if (abduceable) - return smallest; - } - // not abduceable, therefore ambiguous - return String.join(",", s); // send the set of candidates to alert ambiguity - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/overnight/Aligner.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/overnight/Aligner.java deleted file mode 100644 index 7265203994..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/overnight/Aligner.java +++ /dev/null @@ -1,146 +0,0 @@ -package edu.stanford.nlp.sempre.overnight; - -import com.google.common.base.Joiner; -import edu.stanford.nlp.io.IOUtils; -import edu.stanford.nlp.stats.ClassicCounter; -import edu.stanford.nlp.stats.Counter; -import edu.stanford.nlp.stats.Counters; -import fig.basic.LispTree; -import fig.basic.MapUtils; - -import java.io.IOException; -import java.io.PrintWriter; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; - -/** - * Word-aligns original utterances with their paraphrases - * Created by joberant on 2/20/15. - */ -public class Aligner { - - private Map> model = new HashMap<>(); - - public double getCondProb(String target, String source) { - if (model.containsKey(source)) { - if (model.get(source).containsKey(target)) - return model.get(source).getCount(target); - } - return 0d; - } - - public int size() { return model.size(); } - - //takes an example file and creates a model - public void heuristicAlign(String exampleFile, int threshold) { - - model.clear(); - Iterator iter = LispTree.proto.parseFromFile(exampleFile); - - while (iter.hasNext()) { - LispTree tree = iter.next(); - LispTree utteranceTree = tree.child(1); - LispTree originalTree = tree.child(2); - String utterance = preprocessUtterance(utteranceTree.child(1).value); - String original = preprocessUtterance(originalTree.child(1).value); - String[] utteranceTokens = utterance.split("\\s+"); - String[] originalTokens = original.split("\\s+"); - - align(utteranceTokens, originalTokens); - } - normalize(threshold); - } - - public void saveModel(String out) throws IOException { - PrintWriter writer = IOUtils.getPrintWriter(out); - for (String source: model.keySet()) { - Counter counts = model.get(source); - for (String target: counts.keySet()) { - writer.println(Joiner.on('\t').join(source, target, counts.getCount(target))); - } - } - } - - //normalize all the counts to get conditional probabilities - private void normalize(int threshold) { - for (String source: model.keySet()) { - Counter counts = model.get(source); - Counters.removeKeys(counts, Counters.keysBelow(counts, threshold)); - Counters.normalize(counts); - } - } - - //count every co-occurrence - private void align(String[] utteranceTokens, String[] originalTokens) { - for (String utteranceToken: utteranceTokens) { - for (String originalToken: originalTokens) { - MapUtils.putIfAbsent(model, utteranceToken.toLowerCase(), new ClassicCounter<>()); - MapUtils.putIfAbsent(model, originalToken.toLowerCase(), new ClassicCounter<>()); - model.get(utteranceToken.toLowerCase()).incrementCount(originalToken.toLowerCase()); - model.get(originalToken.toLowerCase()).incrementCount(utteranceToken.toLowerCase()); - } - } - } - - //remove '?' and '.' - public String preprocessUtterance(String utterance) { - if (utterance.endsWith("?")) - return utterance.substring(0, utterance.length() - 1); - if (utterance.endsWith(".")) - return utterance.substring(0, utterance.length() - 1); - return utterance; - } - - //read from serialized file - public static Aligner read(String path) { - Aligner res = new Aligner(); - for (String line: edu.stanford.nlp.io.IOUtils.readLines(path)) { - String[] tokens = line.split("\t"); - MapUtils.putIfAbsent(res.model, tokens[0], new ClassicCounter<>()); - res.model.get(tokens[0]).incrementCount(tokens[1], Double.parseDouble(tokens[2])); - } - return res; - } - - private void berkeleyAlign(String file, int threshold) { - for (String line: IOUtils.readLines(file)) { - String[] tokens = line.split("\t"); - String[] sourceTokens = tokens[0].split("\\s+"); - String[] targetTokens = tokens[1].split("\\s+"); - String[] alignmentTokens = tokens[2].split("\\s+"); - for (String alignmentToken: alignmentTokens) { - String[] alignment = alignmentToken.split("-"); - Integer source = Integer.parseInt(alignment[0]); - Integer target = Integer.parseInt(alignment[1]); - MapUtils.putIfAbsent(model, sourceTokens[source], new ClassicCounter<>()); - MapUtils.putIfAbsent(model, targetTokens[target], new ClassicCounter<>()); - model.get(sourceTokens[source]).incrementCount(targetTokens[target]); - model.get(targetTokens[target]).incrementCount(sourceTokens[source]); - } - } - normalize(threshold); - } - - //args[0] - example file with utterance and original - //args[1] - output file - //args[2] - heuristic or berkeley - //args[3] - threshold - public static void main(String[] args) { - Aligner aligner = new Aligner(); - int threshold = Integer.parseInt(args[3]); - if (args[2].equals("heuristic")) - aligner.heuristicAlign(args[0], threshold); - else if (args[2].equals("berkeley")) - aligner.berkeleyAlign(args[0], threshold); - else throw new RuntimeException("bad alignment mode: " + args[2]); - try { - aligner.saveModel(args[1]); - } catch (IOException e) { - e.printStackTrace(); - throw new RuntimeException(e); - } - } - - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/overnight/ConvertTargetValueFromListToString.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/overnight/ConvertTargetValueFromListToString.java deleted file mode 100644 index a9c3857092..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/overnight/ConvertTargetValueFromListToString.java +++ /dev/null @@ -1,59 +0,0 @@ -package edu.stanford.nlp.sempre.overnight; - -import edu.stanford.nlp.io.IOUtils; -import edu.stanford.nlp.sempre.StringValue; -import fig.basic.LispTree; - -import java.io.IOException; -import java.io.PrintWriter; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - -/** - * Created by joberant on 2/24/15. - * This converts - * (targetValue (list (name fb:en.place.walton_county))) - * to - * (targetValue (string [(name fb:en.place.walton_county)])) - * This has nothing to do with 'paraphrase', it's just here for no reason - */ -public final class ConvertTargetValueFromListToString { - private ConvertTargetValueFromListToString() { } - - public static void main(String[] args) { - - try { - PrintWriter writer = IOUtils.getPrintWriter(args[1]); - Iterator trees = LispTree.proto.parseFromFile(args[0]); - while (trees.hasNext()) { - LispTree tree = trees.next(); - - LispTree outTree = LispTree.proto.newList(); - outTree.addChild("example"); - outTree.addChild(tree.child(1)); - - List output = new ArrayList<>(); - LispTree targetValue = tree.child(3); - if (!targetValue.child(0).value.equals("targetValue")) - throw new RuntimeException("Expected a target value as second child: " + targetValue); - LispTree list = targetValue.child(1); - if (!list.child(0).value.equals("list")) - throw new RuntimeException("Expected a list as first child: " + list); - for (int i = 1; i < list.children.size(); ++i) - output.add(list.child(i).toString()); - StringValue newTargetValue = new StringValue(output.toString()); - LispTree newTargetValueTree = LispTree.proto.newList(); - newTargetValueTree.addChild("targetValue"); - newTargetValueTree.addChild(newTargetValue.toLispTree()); - outTree.addChild(newTargetValueTree); - outTree.print(120, 120, writer); - writer.println(); - } - writer.close(); - } catch (IOException e) { - e.printStackTrace(); - throw new RuntimeException(e); - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/overnight/CreateBerkeleyAlignerInputFromLispTree.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/overnight/CreateBerkeleyAlignerInputFromLispTree.java deleted file mode 100644 index 5c1630a5af..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/overnight/CreateBerkeleyAlignerInputFromLispTree.java +++ /dev/null @@ -1,53 +0,0 @@ -package edu.stanford.nlp.sempre.overnight; - -import edu.stanford.nlp.io.IOUtils; -import fig.basic.LispTree; -import fig.basic.LogInfo; - -import java.io.IOException; -import java.io.PrintWriter; -import java.util.Iterator; - -/** - * Created by joberant on 2/22/15. - * Takes a file with lisp trees of original and canonical utterances - * and creates the input for the berkley aligner - */ -public final class CreateBerkeleyAlignerInputFromLispTree { - private CreateBerkeleyAlignerInputFromLispTree() { } - - //args[0]: lisp tree file - //args[1] output directory - public static void main(String[] args) { - - Iterator trees = LispTree.proto.parseFromFile(args[0]); - try { - PrintWriter writerOriginal = IOUtils.getPrintWriter(args[1] + ".e"); - PrintWriter writerUtterance = IOUtils.getPrintWriter(args[1] + ".f"); - LogInfo.logs("output directory=%s", args[1]); - - int i = 0; - while (trees.hasNext()) { - i++; - LispTree tree = trees.next(); - LispTree utteranceTree = tree.child(1); - LispTree originalTree = tree.child(2); - if (!utteranceTree.child(0).value.equals("utterance")) - throw new RuntimeException("First child is not an utterance " + utteranceTree); - if (!originalTree.child(0).value.equals("original")) - throw new RuntimeException("second child is not the original " + originalTree); - String uttearnce = utteranceTree.child(1).value; - if (uttearnce.endsWith("?") || uttearnce.endsWith(".")) - uttearnce = uttearnce.substring(0, uttearnce.length() - 1); - String original = originalTree.child(1).value; - writerOriginal.println(original); - writerUtterance.println(uttearnce); - } - LogInfo.logs("Numebr of trees=%s", i); - writerOriginal.close(); - writerUtterance.close(); - } catch (IOException e) { - e.printStackTrace(); - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/overnight/GenerationMain.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/overnight/GenerationMain.java deleted file mode 100644 index 7012cf4d69..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/overnight/GenerationMain.java +++ /dev/null @@ -1,50 +0,0 @@ -package edu.stanford.nlp.sempre.overnight; - -import fig.basic.Option; -import fig.exec.Execution; -import edu.stanford.nlp.sempre.*; - -/** - * Created by joberant on 1/27/15. - * Generating canonical utterances from grammar with various depths - */ -public class GenerationMain implements Runnable { - @Option - public boolean interactive = false; - @Option - public boolean varyMaxDepth = false; - - @Override - public void run() { - Builder builder = new Builder(); - builder.build(); - - Dataset dataset = new Dataset(); - dataset.read(); - - int currDepth = varyMaxDepth ? 1 : FloatingParser.opts.maxDepth; - int maxDepth = FloatingParser.opts.maxDepth; - - for (; currDepth < maxDepth + 1; currDepth++) { - FloatingParser.opts.maxDepth = currDepth; - //LogInfo.logs("Curr depth=%s", currDepth); - //LogInfo.logs("file = %s", FloatingParser.opts.predictedUtterancesFile); - //PrintWriter writer = IOUtils.openOutAppendEasy(Execution.getFile(FloatingParser.opts.predictedUtterancesFile)); - //writer.println(String.format("Depth=%s", currDepth)); - //writer.println(String.format("--------", currDepth)); - //writer.close(); - Learner learner = new Learner(builder.parser, builder.params, dataset); - learner.learn(); - } - - - if (interactive) { - Master master = new Master(builder); - master.runInteractivePrompt(); - } - } - - public static void main(String[] args) { - Execution.run(args, "GenerationMain", new GenerationMain(), Master.getOptionsParser()); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/overnight/OvernightDerivationPruningComputer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/overnight/OvernightDerivationPruningComputer.java deleted file mode 100644 index 658871b567..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/overnight/OvernightDerivationPruningComputer.java +++ /dev/null @@ -1,52 +0,0 @@ -package edu.stanford.nlp.sempre.overnight; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; - -/** - * Hard-coded hacks for pruning derivations in floating parser for overnight domains. - */ - -public class OvernightDerivationPruningComputer extends DerivationPruningComputer { - - public OvernightDerivationPruningComputer(DerivationPruner pruner) { - super(pruner); - } - - @Override - public Collection getAllStrategyNames() { - return Arrays.asList("violateHardConstraints"); - } - - @Override - public String isPruned(Derivation deriv) { - if (containsStrategy("violateHardConstraints") && violateHardConstraints(deriv)) return "violateHardConstraints"; - return null; - } - - // Check a few hard constraints on each derivation - private static boolean violateHardConstraints(Derivation deriv) { - if (deriv.value != null) { - if (deriv.value instanceof ErrorValue) return true; - if (deriv.value instanceof StringValue) { //empty denotation - if (((StringValue) deriv.value).value.equals("[]")) return true; - } - if (deriv.value instanceof ListValue) { - List values = ((ListValue) deriv.value).values; - // empty lists - if (values.size() == 0) return true; - // NaN - if (values.size() == 1 && values.get(0) instanceof NumberValue) { - if (Double.isNaN(((NumberValue) values.get(0)).value)) return true; - } - // If we are supposed to get a number but we get a string (some sparql weirdness) - if (deriv.type.equals(SemType.numberType) && - values.size() == 1 && - !(values.get(0) instanceof NumberValue)) return true; - } - } - return false; - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/overnight/OvernightFeatureComputer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/overnight/OvernightFeatureComputer.java deleted file mode 100644 index b1c1596a84..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/overnight/OvernightFeatureComputer.java +++ /dev/null @@ -1,609 +0,0 @@ -package edu.stanford.nlp.sempre.overnight; - -import com.google.common.base.Joiner; -import com.google.common.collect.Sets; -import edu.stanford.nlp.io.IOUtils; -import fig.basic.BipartiteMatcher; -import fig.basic.LogInfo; -import fig.basic.MapUtils; -import fig.basic.Option; -import edu.stanford.nlp.sempre.*; - -import java.util.*; - -/** - * Define features on the input utterance and a partial canonical utterance. - * - * Feature computation recipe: - * - For both the input and (partial) canonical utterance, extract a list of tokens - * (perhaps with POS tags). - * - Given a list of tokens, extract a set of items, where an item is a (tag, - * data) pair, where the tag specifies the "type" of the data, and is used - * to determine features. Example: ("bigram", "not contains"), ("unigram", - * "not"), ("unigram-RB", "not") - * - Given the input and canonical items, define recall features (how much of - * the input items is the canononical covering). - * This recipe allows us to decouple the extraction of items on one utterance - * from the computation of actual precision/recall features. - * - * @author Percy Liang - * @author Yushi Wang - */ -public class OvernightFeatureComputer implements FeatureComputer { - public static class Options { - @Option(gloss = "Set of paraphrasing feature domains to include") - public Set featureDomains = new HashSet<>(); - - @Option(gloss = "Whether or not to count intermediate categories for size feature") - public boolean countIntermediate = true; - - @Option(gloss = "Whether or not to do match/ppdb analysis") - public boolean itemAnalysis = true; - - @Option(gloss = "Whether or not to learn paraphrases") - public boolean learnParaphrase = true; - - @Option(gloss = "Verbose flag") - public int verbose = 0; - - @Option(gloss = "Path to alignment file") - public String wordAlignmentPath; - @Option(gloss = "Path to phrase alignment file") - public String phraseAlignmentPath; - @Option(gloss = "Threshold for phrase table co-occurrence") - public int phraseTableThreshold = 3; - } - - public static Options opts = new Options(); - - private static Aligner aligner; - private static Map> phraseTable; - public final SimpleLexicon simpleLexicon = SimpleLexicon.getSingleton(); - - @Override public void extractLocal(Example ex, Derivation deriv) { - if (deriv.rule.rhs == null) return; - - // Optimization: feature vector same as child, so don't do anything. - if (deriv.rule.isCatUnary()) { - if (deriv.isRootCat()) { - extractValueInFormulaFeature(deriv); - extractRootFeatures(ex, deriv); - return; - } - } - - // Important! We want to define the global feature vector for this - // derivation, but we can only specify the local feature vector. So to - // make things cancel out, we subtract out the unwanted feature vectors of - // descendents. - subtractDescendentsFeatures(deriv, deriv); - - deriv.addFeature("paraphrase", "size", derivationSize(deriv)); - extractRootFeatures(ex, deriv); - extractLexicalFeatures(ex, deriv); - extractPhraseAlignmentFeatures(ex, deriv); - extractLogicalFormFeatures(ex, deriv); - - if (!opts.itemAnalysis) return; - - List inputItems = computeInputItems(ex); - List candidateItems = computeCandidateItems(ex, deriv); - - for (Item input : inputItems) { - double match = 0; - double ppdb = 0; - double skipBigram = 0; - double skipPpdb = 0; - for (Item candidate : candidateItems) { - if (!input.tag.equals(candidate.tag)) continue; - if (input.tag.equals("skip-bigram")) { - skipBigram = Math.max(skipBigram, computeMatch(input.data, candidate.data)); - skipPpdb = Math.max(skipPpdb, computeParaphrase(input.data, candidate.data)); - } else { - - match = Math.max(match, computeMatch(input.data, candidate.data)); - ppdb = Math.max(ppdb, computeParaphrase(input.data, candidate.data)); - } - } - if (match > 0 && opts.featureDomains.contains("match")) deriv.addFeature("paraphrase", "match"); - if (ppdb > 0 && opts.featureDomains.contains("ppdb")) deriv.addFeature("paraphrase", "ppdb"); - if (skipBigram > 0 && opts.featureDomains.contains("skip-bigram")) deriv.addFeature("paraphrase", "skip-bigram"); - if (skipPpdb > 0 && opts.featureDomains.contains("skip-ppdb")) deriv.addFeature("paraphrase", "skip-ppdb"); - } - - HashMap features = new LinkedHashMap<>(); - deriv.incrementAllFeatureVector(+1, features); - if (opts.verbose >= 1) { - LogInfo.logs("category %s, %s %s", deriv.cat, inputItems, candidateItems); - FeatureVector.logFeatures(features); - } - } - - private void extractValueInFormulaFeature(Derivation deriv) { - if (!opts.featureDomains.contains("denotation")) return; - - if (deriv.value instanceof StringValue) { - - //get strings from value - List valueList = new ArrayList<>(); - - String value = ((StringValue) deriv.value).value; - - if (value.charAt(0) == '[') - value = value.substring(1, value.length() - 1); //strip "[]" - String[] tokens = value.split(","); - for (String token : tokens) { - token = token.trim(); //strip spaces - if (token.length() > 0) - valueList.add(token); - } - - //get strings from formula - List formulaList = deriv.formula.mapToList(formula -> { - List res = new ArrayList<>(); - if (formula instanceof ValueFormula) { - res.add(formula); - } - return res; - }, true); - - for (Formula f : formulaList) { - Value formulaValue = ((ValueFormula) f).value; - String valueStr = (formulaValue instanceof StringValue) ? ((StringValue) formulaValue).value : formulaValue.toString(); - if (valueList.contains(valueStr)) - deriv.addFeature("denotation", "value_in_formula"); - } - } - } - - private void extractRootFeatures(Example ex, Derivation deriv) { - if (!deriv.isRootCat()) return; - if (!opts.featureDomains.contains("root") && !opts.featureDomains.contains("root_lexical")) return; - - List derivTokens = Arrays.asList(deriv.canonicalUtterance.split("\\s+")); - List inputTokens = ex.getTokens(); - //alignment features - BipartiteMatcher bMatcher = new BipartiteMatcher(); - List filteredInputTokens = filterStopWords(inputTokens); - List filteredDerivTokens = filterStopWords(derivTokens); - - int[] assignment = bMatcher.findMaxWeightAssignment(buildAlignmentMatrix(filteredInputTokens, filteredDerivTokens)); - - if (opts.featureDomains.contains("root")) { - //number of unmathced words based on exact match and ppdb - int matches = 0; - for (int i = 0; i < filteredInputTokens.size(); ++i) { - if (assignment[i] != i) { - matches++; - } - } - deriv.addFeature("root", "unmatched_input", filteredInputTokens.size() - matches); - deriv.addFeature("root", "unmatched_deriv", filteredDerivTokens.size() - matches); - if (deriv.value != null) { - if (deriv.value instanceof ListValue) { - ListValue list = (ListValue) deriv.value; - deriv.addFeature("root", String.format("pos0=%s&returnType=%s", ex.posTag(0), list.values.get(0).getClass())); - } - } - } - - if (opts.featureDomains.contains("root_lexical")) { - for (int i = 0; i < assignment.length; ++i) { - if (assignment[i] == i) { - if (i < filteredInputTokens.size()) { - String inputToken = filteredInputTokens.get(i).toLowerCase(); - deriv.addFeature("root_lexical", "deleted_token=" + inputToken); - if (!simpleLexicon.lookup(inputToken).isEmpty()) { - deriv.addFeature("root_lexical", "deleted_entity"); - } - } - else { - String derivToken = filteredDerivTokens.get(i - filteredInputTokens.size()); - deriv.addFeature("root_lexical", "deleted_token=" + derivToken); - if (!simpleLexicon.lookup(derivToken).isEmpty()) { - deriv.addFeature("root_lexical", "deleted_entity"); - } - } - } - } - } - } - - private List getCallFormulas(Derivation deriv) { - return deriv.formula.mapToList(formula -> { - List res = new ArrayList<>(); - if (formula instanceof CallFormula) { - res.add(((CallFormula) formula).func); - } - return res; - }, true); - } - private void extractLogicalFormFeatures(Example ex, Derivation deriv) { - if (!opts.featureDomains.contains("lf")) return; - for (int i = 0; i < ex.numTokens(); ++i) { - List callFormulas = getCallFormulas(deriv); - if (ex.posTag(i).equals("JJS")) { - if (ex.token(i).equals("least") || ex.token(i).equals("most")) //at least and at most are not what we want - continue; - for (Formula callFormula: callFormulas) { - String callFormulaDesc = callFormula.toString(); - //LogInfo.logs("SUPER: utterance=%s, formula=%s", ex.utterance, deriv.formula); - deriv.addFeature("lf", callFormulaDesc + "& superlative"); - } - } - } - if (!opts.featureDomains.contains("simpleworld")) return; - //specific handling of simple world methods - if (deriv.formula instanceof CallFormula) { - CallFormula callFormula = (CallFormula) deriv.formula; - String desc = callFormula.func.toString(); - switch (desc) { - case "edu.stanford.nlp.sempre.overnight.SimpleWorld.filter": - deriv.addFeature("simpleworld", "filter&" + callFormula.args.get(1)); - break; - case "edu.stanford.nlp.sempre.overnight.SimpleWorld.getProperty": - deriv.addFeature("simpleworld", "getProperty&" + callFormula.args.get(1)); - break; - case "edu.stanford.nlp.sempre.overnight.SimpleWorld.superlative": - deriv.addFeature("simpleworld", "superlative&" + callFormula.args.get(1) + "&" + callFormula.args.get(2)); - break; - case "edu.stanford.nlp.sempre.overnight.SimpleWorld.countSuperlative": - deriv.addFeature("simpleworld", "countSuperlative&" + callFormula.args.get(1) + "&" + callFormula.args.get(2)); - break; - case "edu.stanford.nlp.sempre.overnight.SimpleWorld.countComparative": - deriv.addFeature("simpleworld", "countComparative&" + callFormula.args.get(2) + "&" + callFormula.args.get(1)); - break; - case "edu.stanford.nlp.sempre.overnight.SimpleWorld.aggregate": - deriv.addFeature("simpleworld", "countComparative&" + callFormula.args.get(0)); - break; - default: break; - } - } - } - - private void extractPhraseAlignmentFeatures(Example ex, Derivation deriv) { - - if (!opts.featureDomains.contains("alignment")) return; - if (phraseTable == null) phraseTable = loadPhraseTable(); - - //get the tokens - List derivTokens = Arrays.asList(deriv.canonicalUtterance.split("\\s+")); - Set inputSubspans = ex.languageInfo.getLowerCasedSpans(); - - for (int i = 0; i < derivTokens.size(); ++i) { - for (int j = i + 1; j <= derivTokens.size() && j <= i + 4; ++j) { - - String lhs = Joiner.on(' ').join(derivTokens.subList(i, j)); - if (entities.contains(lhs)) continue; //optimization - - if (phraseTable.containsKey(lhs)) { - Map rhsCandidates = phraseTable.get(lhs); - Set intersection = Sets.intersection(rhsCandidates.keySet(), inputSubspans); - for (String rhs: intersection) { - addAndFilterLexicalFeature(deriv, "alignment", rhs, lhs); - } - } - } - } - } - - private Map> loadPhraseTable() { - Map> res = new HashMap<>(); - int num = 0; - for (String line : IOUtils.readLines(opts.phraseAlignmentPath)) { - String[] tokens = line.split("\t"); - if (tokens.length != 3) throw new RuntimeException("Bad alignment line: " + line); - MapUtils.putIfAbsent(res, tokens[0], new HashMap<>()); - - double value = Double.parseDouble(tokens[2]); - if (value >= opts.phraseTableThreshold) { - res.get(tokens[0]).put(tokens[1], value); - num++; - } - } - LogInfo.logs("Number of entries=%s", num); - return res; - } - - - private void addAndFilterLexicalFeature(Derivation deriv, String domain, String str1, String str2) { - - String[] str1Tokens = str1.split("\\s+"); - String[] str2Tokens = str2.split("\\s+"); - for (String str1Token: str1Tokens) - if (entities.contains(str1Token)) return; - for (String str2Token: str2Tokens) - if (entities.contains(str2Token)) return; - - if (stopWords.contains(str1) || stopWords.contains(str2)) return; - deriv.addFeature(domain, str1 + "--" + str2); - } - - private void extractLexicalFeatures(Example ex, Derivation deriv) { - - if (!opts.featureDomains.contains("lexical")) return; - - List derivTokens = Arrays.asList(deriv.canonicalUtterance.split("\\s+")); - List inputTokens = ex.getTokens(); - //alignment features - BipartiteMatcher bMatcher = new BipartiteMatcher(); - List filteredInputTokens = filterStopWords(inputTokens); - List filteredDerivTokens = filterStopWords(derivTokens); - - double[][] alignmentMatrix = buildLexicalAlignmentMatrix(filteredInputTokens, filteredDerivTokens); - int[] assignment = bMatcher.findMaxWeightAssignment(alignmentMatrix); - for (int i = 0; i < filteredInputTokens.size(); ++i) { - if (assignment[i] != i) { - int derivIndex = assignment[i] - filteredInputTokens.size(); - String inputToken = filteredInputTokens.get(i).toLowerCase(); - - if (entities.contains(inputToken)) continue; //optimization - stop here - - String derivToken = filteredDerivTokens.get(derivIndex).toLowerCase(); - if (!inputToken.equals(derivToken)) { - addAndFilterLexicalFeature(deriv, "lexical", inputToken, derivToken); - extractStringSimilarityFeatures(deriv, inputToken, derivToken); - - //2:2 features - if (i < filteredInputTokens.size() - 1) { - if (assignment[i + 1] == assignment[i] + 1) { - String inputBigram = Joiner.on(' ').join(inputToken, filteredInputTokens.get(i + 1)).toLowerCase(); - String derivBigram = Joiner.on(' ').join(derivToken, filteredDerivTokens.get(derivIndex + 1)).toLowerCase(); - if (!inputBigram.equals(derivBigram)) { - addAndFilterLexicalFeature(deriv, "lexical", inputBigram, derivBigram); - } - } - } - //1:2 features - if (derivIndex > 0) { - addAndFilterLexicalFeature(deriv, "lexical", inputToken, - Joiner.on(' ').join(filteredDerivTokens.get(derivIndex - 1), filteredDerivTokens.get(derivIndex))); - } - if (derivIndex < filteredDerivTokens.size() - 1) { - addAndFilterLexicalFeature(deriv, "lexical", inputToken, - Joiner.on(' ').join(filteredDerivTokens.get(derivIndex), filteredDerivTokens.get(derivIndex + 1))); - } - } - } - } - } - - private void extractStringSimilarityFeatures(Derivation deriv, String inputToken, String derivToken) { - if (inputToken.startsWith(derivToken) || derivToken.startsWith(inputToken)) - deriv.addFeature("lexical", "starts_with"); - else if (inputToken.length() > 4 && derivToken.length() > 4) { - if (inputToken.substring(0, 4).equals(derivToken.substring(0, 4))) - deriv.addFeature("lexical", "common_prefix"); - } - } - - //return a list without wtop words - private List filterStopWords(List tokens) { - List res = new ArrayList<>(); - for (String token : tokens) { - if (!stopWords.contains(token)) - res.add(token); - } - return res; - } - - private double[][] buildAlignmentMatrix(List inputTokens, List derivTokens) { - - double[][] res = new double[inputTokens.size() + derivTokens.size()][inputTokens.size() + derivTokens.size()]; - for (int i = 0; i < inputTokens.size(); ++i) { - for (int j = 0; j < derivTokens.size(); ++j) { - String inputToken = inputTokens.get(i); - String derivToken = derivTokens.get(j); - - if (computeMatch(inputToken, derivToken) > 0d) { - res[i][inputTokens.size() + j] = 1d; - res[inputTokens.size() + j][i] = 1d; - } - else if (computeParaphrase(inputToken, derivToken) > 0d) { - res[i][inputTokens.size() + j] = 0.5d; - res[inputTokens.size() + j][i] = 0.5d; - } - } - } - for (int i = 0; i < res.length - 1; i++) { - for (int j = i + 1; j < res.length; j++) { - if (i != j && res[i][j] < 1) { - res[i][j] = Double.NEGATIVE_INFINITY; - res[j][i] = Double.NEGATIVE_INFINITY; - } - } - } - return res; - } - - private double[][] buildLexicalAlignmentMatrix(List inputTokens, List derivTokens) { - if (aligner == null) { - aligner = Aligner.read(opts.wordAlignmentPath); - } - - double[][] res = new double[inputTokens.size() + derivTokens.size()][inputTokens.size() + derivTokens.size()]; - //init with -infnty and low score on the diagonal - for (int i = 0; i < res.length - 1; i++) { - for (int j = i; j < res.length; j++) { - if (i == j) { - res[i][j] = 0d; - res[j][i] = 0d; - } - else { - res[i][j] = -1000d; - res[j][i] = -1000d; - } - } - } - - for (int i = 0; i < inputTokens.size(); ++i) { - for (int j = 0; j < derivTokens.size(); ++j) { - String inputToken = inputTokens.get(i).toLowerCase(); - String derivToken = derivTokens.get(j).toLowerCase(); - - if (computeMatch(inputToken, derivToken) > 0) { - res[i][inputTokens.size() + j] = 1d; - res[inputTokens.size() + j][i] = 1d; - } else if (computeParaphrase(inputToken, derivToken) > 0) { - res[i][inputTokens.size() + j] = 0.5d; - res[inputTokens.size() + j][i] = 0.5d; - } - else if (aligner.getCondProb(inputToken, derivToken) > 0d && - aligner.getCondProb(derivToken, inputToken) > 0d) { - double product = aligner.getCondProb(inputToken, derivToken) * aligner.getCondProb(derivToken, inputToken); - res[i][inputTokens.size() + j] = product; - res[inputTokens.size() + j][i] = product; - } - } - } - return res; - } - - // Represents a local pattern on an utterance. - private static class Item { - public final String tag; - public final String data; - public Item(String tag, String data) { - this.tag = tag; - this.data = data; - } - @Override public String toString() { - return tag + ":" + data; - } - } - - // Fetch items from the temporary state. - // If it doesn't exist, create one. - private static List getItems(Map tempState) { - List items = (List) tempState.get("items"); - if (items == null) - tempState.put("items", items = new ArrayList<>()); - return items; - } - private static void setItems(Map tempState, List items) { - tempState.put("items", items); - } - - // TODO(yushi): make this less hacky - private static final List stopWords = Arrays.asList("\' \" `` ` \'\' a an the that which . what ? is are am be of".split(" ")); - private static final Set entities = - new HashSet<>(Arrays.asList("alice", "bob", "greenberg", "greenberg cafe", "central office", - "sacramento", "austin", "california", "texas", "colorado", "colorado river", "red river", "lake tahoe", "tahoe", "lake huron", "huron", "mount whitney", "whitney", "mount rainier", "rainier", "death valley", "pacific ocean", "pacific", - "sesame", "mission ave", "mission", "chelsea", - "multivariate data analysis", "multivariate data", "data analysis", "multivariate", "data", "efron", "lakoff", "annals of statistics", "annals", "annals of", "of statistics", "statistics", "computational linguistics", "computational", "linguistics", - "thai cafe", "pizzeria juno", - "new york", "york", "beijing", "brown university", "ucla", "mckinsey", "google")); - - - private static boolean isStopWord(String token) { - return stopWords.contains(token); - } - - private static void populateItems(List tokens, List items) { - List prunedTokens = new ArrayList<>(); - // Populate items with unpruned tokens - for (int i = 0; i < tokens.size(); i++) { - items.add(new Item("unigram", tokens.get(i))); - if (i - 1 >= 0) { - items.add(new Item("bigram", tokens.get(i - 1) + " " + tokens.get(i))); - } - if (!isStopWord(tokens.get(i)) || (i > 0 && (tokens.get(i - 1).equals('`') || tokens.get(i - 1).equals("``")))) - prunedTokens.add(tokens.get(i)); - } - - // Populate items with skip words removed - for (int i = 1; i < prunedTokens.size(); i++) { - items.add(new Item("skip-bigram", prunedTokens.get(i - i) + " " + prunedTokens.get(i))); - } - } - - // Compute the items for the input utterance. - private static List computeInputItems(Example ex) { - List items = getItems(ex.getTempState()); - if (items.size() != 0) return items; - List tokens = new ArrayList<>(ex.getTokens()); - populateItems(tokens, items); - LogInfo.logs("input %s, items %s", ex, items); - return items; - } - - // Return the set of tokens (partial canonical utterance) produced by the - // derivation. - public static List extractTokens(Example ex, Derivation deriv, List tokens) { - int childIndex = 0; - if (deriv.rule.rhs != null) { - for (String p : deriv.rule.rhs) - if (Rule.isCat(p)) - extractTokens(ex, deriv.children.get(childIndex++), tokens); - else - tokens.add(p); - - } else if (deriv.start != -1 && deriv.end != -1) { - for (int i = deriv.start; i < deriv.end; i++) - tokens.add(ex.token(i)); - } - return tokens; - } - - // Compute the items for a partial canonical utterance. - private static List computeCandidateItems(Example ex, Derivation deriv) { - // Get tokens - List tokens = new ArrayList<>(); - extractTokens(ex, deriv, tokens); - // Compute items - List items = new ArrayList<>(); - populateItems(tokens, items); - return items; - } - - private static void subtractDescendentsFeatures(Derivation deriv, Derivation subderiv) { - if (subderiv.children != null) { - for (Derivation child : subderiv.children) { - deriv.getLocalFeatureVector().add(-1, child.getLocalFeatureVector()); - subtractDescendentsFeatures(deriv, child); - } - } - } - - // Return the "complexity" of the given derivation. - private static int derivationSize(Derivation deriv) { - int sum = 0; - if (opts.countIntermediate || !(deriv.rule.lhs.contains("Intermediate"))) sum++; - if (deriv.children != null) { - for (Derivation child : deriv.children) - sum += derivationSize(child); - } - return sum; - } - - private static double computeMatch(String a, String b) { - if (a.equals(b)) return 1; - if (LanguageInfo.LanguageUtils.stem(a).equals(LanguageInfo.LanguageUtils.stem(b))) return 1; - return 0; - } - - private static double computeParaphrase(String a, String b) { - if (computeMatch(a, b) > 0) return 0; - - String[] aGrams = a.split(" "); - String[] bGrams = b.split(" "); - if (aGrams.length != bGrams.length) return 0; - - PPDBModel model = PPDBModel.getSingleton(); - int numPpdb = 0; - int numMisses = 0; - for (int i = 0; i < aGrams.length; i++) { - if (computeMatch(aGrams[i], bGrams[i]) == 0d) { - if (model.get(aGrams[i], bGrams[i]) > 0d || - model.get(LanguageInfo.LanguageUtils.stem(aGrams[i]), - LanguageInfo.LanguageUtils.stem(bGrams[i])) > 0d) { - numPpdb++; - } - else { - numMisses++; - } - } - } - return (numMisses == 0 && numPpdb <= 1 ? 1d : 0d); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/overnight/PPDBModel.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/overnight/PPDBModel.java deleted file mode 100644 index e51dd23b56..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/overnight/PPDBModel.java +++ /dev/null @@ -1,90 +0,0 @@ -package edu.stanford.nlp.sempre.overnight; - -import fig.basic.IOUtils; -import fig.basic.LogInfo; -import fig.basic.MapUtils; -import fig.basic.Option; -import edu.stanford.nlp.sempre.*; - -import java.util.HashMap; -import java.util.Map; - -/** - * PPDBModel extracts and scores paraphrasing featues from derivations. - * This model is intended to be used with FloatingParser - * - * @author Yushi Wang - */ - -public final class PPDBModel { - public static class Options { - @Option(gloss = "Path to file with alignment table") - public String ppdbModelPath = "regex/regex-ppdb.txt"; - - @Option(gloss = "Using ppdb format") - public boolean ppdb = true; - } - - public static Options opts = new Options(); - - public static PPDBModel model; - - Map> table; - - // We should only have one paraphrase model - public static PPDBModel getSingleton() { - if (model == null) { - model = new PPDBModel(); - } - return model; - } - - private PPDBModel() { - table = loadPPDBModel(opts.ppdbModelPath); - } - - /** - * Loading ppdb model from file - */ - private Map> loadPPDBModel(String path) { - LogInfo.begin_track("Loading ppdb model"); - Map> res = new HashMap<>(); - for (String line: IOUtils.readLinesHard(path)) { - if (opts.ppdb) { - String[] tokens = line.split("\\|\\|\\|"); - String first = tokens[1].trim(); - String second = tokens[2].trim(); - String stemmedFirst = LanguageInfo.LanguageUtils.stem(first); - String stemmedSecond = LanguageInfo.LanguageUtils.stem(second); - - putParaphraseEntry(res, first, second); - if ((!stemmedFirst.equals(first) || !stemmedSecond.equals(second)) && - !stemmedFirst.equals(stemmedSecond)) - putParaphraseEntry(res, stemmedFirst, stemmedSecond); - } else { - String[] tokens = line.split("\t"); - MapUtils.putIfAbsent(res, tokens[0], new HashMap<>()); - for (String token : tokens) - LogInfo.logs("%s", token); - res.get(tokens[0]).put(tokens[1], 1.0); - } - } - LogInfo.logs("ParaphraseUtils.loadPhraseTable: number of entries=%s", res.size()); - LogInfo.end_track(); - return res; - } - - private void putParaphraseEntry(Map> res, String first, String second) { - MapUtils.putIfAbsent(res, first, new HashMap<>()); - res.get(first).put(second, 1.0); - } - - public boolean containsKey(String key) { - return table.containsKey(key); - } - - public Double get(String key, String token) { - if (!table.containsKey(key) || !table.get(key).containsKey(token)) return 0.0; - return table.get(key).get(token); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/overnight/SimpleWorld.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/overnight/SimpleWorld.java deleted file mode 100644 index fbc213c28b..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/overnight/SimpleWorld.java +++ /dev/null @@ -1,829 +0,0 @@ -package edu.stanford.nlp.sempre.overnight; - -import com.google.common.collect.Lists; -import fig.basic.*; -import edu.stanford.nlp.sempre.*; - -import java.util.*; - -/** - * Functions for supporting a simple database. - * This is very inefficient and it works only for small worlds. - * Example applications: calendar, blocks world - * - * Types: DateValue, TimeValue, Value - * All arguments are lists of Values. - * - * @author Jonathan Berant - * @author Yushi Wang - */ -public final class SimpleWorld { - public static class Options { - @Option(gloss = "Number of entity samples") - public int numOfValueSamples = 60; - @Option(gloss = "Domain specifies which predicates/entities exist in the world") - public String domain; - @Option(gloss = "Verbosity") - public int verbose = 0; - @Option(gloss = "Path to load up the DB from (triples)") - public String dbPath = null; - @Option(gloss = "When performing a join with getProperty, do we want to deduplicate?") - public boolean joinDedup = true; - } - public static Options opts = new Options(); - - private SimpleWorld() { } - - // en.person.alice => en.person - private static String extractType(String id) { - int i = id.lastIndexOf('.'); - if (id.charAt(i + 1) == '_') { //to deal with /fb:en.lake._st_clair and such - id = id.substring(0, i); - i = id.lastIndexOf('.'); - return id.substring(0, i); - } - else return id.substring(0, i); - } - - private static String getType(Value v) { - if (v instanceof NumberValue) { - String unit = ((NumberValue) v).unit; - return unit + "_number"; // So we can quickly tell if something is a number or not - } else if (v instanceof DateValue) { - return "en.date"; - } else if (v instanceof TimeValue) { - return "en.time"; - } else if (v instanceof BooleanValue) { - return "en.boolean"; - } else if (v instanceof NameValue) { - return extractType(((NameValue) v).id); - } else if (v instanceof ListValue) { - return getType(((ListValue) v).values.get(0)); - } else { - throw new RuntimeException("Can't get type of value " + v); - } - } - - // Make sure that the types of the objects in the two lists are the same - private static void checkTypeMatch(List l1, List l2) { - for (Value o1 : l1) { - for (Value o2 : l2) { - if (!getType(o1).equals(getType(o2))) - throw new RuntimeException("Intersecting objects with non-matching types, object 1: " + - o1 + ", object2: " + o2); - } - } - } - - private static boolean intersects(List l1, List l2) { - //optimization - if (l1.size() < l2.size()) { - for (T o1 : l1) { - if (l2.contains(o1)) - return true; - } - } - else { - for (T o2 : l2) { - if (l1.contains(o2)) - return true; - } - } - return false; - } - - // Modes of superlatives - private static final String MIN = "min"; - private static final String MAX = "max"; - - //////////////////////////////////////////////////////////// - // Methods exposed to the public. - - public static List singleton(Value value) { return Collections.singletonList(value); } - - // Return the set of entities who can be the first argument of property - public static List domain(String property) { - createWorld(); - String type1 = propertyToType1.get(property); - if (type1 == null) - throw new RuntimeException("Property " + property + " has no type1"); - return getProperty(singleton(new NameValue(type1)), reverse("type")); - } - - public static String reverse(String property) { - if (property.startsWith("!")) return property.substring(1); - return "!" + property; - } - - // Return the concatenation of two lists. - public static List concat(Value v1, Value v2) { - return concat(singleton(v1), singleton(v2)); - } - public static List concat(List l1, List l2) { - checkTypeMatch(l1, l2); - if (l1.equals(l2)) // Disallow 'alice' or 'alice' - throw new RuntimeException("Cannot concatenate two copies of the same list: " + l1); - List newList = new ArrayList(); - newList.addAll(l1); - newList.addAll(l2); - return newList; - } - - private static void checkType1(String property, Value e1) { - String type1 = propertyToType1.get(property); - String type2 = "?"; - if (type1 == null) - throw new RuntimeException("Property " + property + " has no type1"); - if (!getType(e1).equals(type1)) - throw new RuntimeException("Type check failed: " + property + " : (-> " + type1 + " " + type2 + ") doesn't match arg1 " + e1 + " : " + getType(e1)); - } - - private static void checkType2(String property, Value e2) { - String type1 = "?"; - String type2 = propertyToType2.get(property); - if (type2 == null) - throw new RuntimeException("Property " + property + " has no type2"); - if (!getType(e2).equals(type2)) - throw new RuntimeException("Type check failed: " + property + " : (-> " + type1 + " " + type2 + ") doesn't match arg2 " + e2 + " : " + getType(e2)); - } - - private static void ensureNonnumericType2(String property) { - createWorld(); - String type2 = propertyToType2.get(property); - if (type2 == null) - throw new RuntimeException("Property " + property + " has no type2"); - if (type2.endsWith("_number") || type2.equals("en.date") || type2.equals("en.time")) - throw new RuntimeException("Property " + property + " has numeric type2, which is not allowed"); - } - - public static String ensureNumericProperty(String property) { - createWorld(); - String type2 = propertyToType2.get(property); - if (type2.endsWith("number") || type2.equals("en.date") || type2.equals("en.time")) { - return property; - } - throw new RuntimeException("Property " + property + " has non-numeric type2, which is not allowed"); - } - - public static List ensureNumericEntity(Value value) { - return ensureNumericEntity(singleton(value)); - } - - public static List ensureNumericEntity(List list) { - createWorld(); - String type = getType(list.get(0)); - if (type.endsWith("number") || type.equals("en.date") || type.equals("en.time")) { - return list; - } - throw new RuntimeException("List " + list + " is non-numeric, which is not allowed"); - } - - - // Return the subset of |objects| whose |property| |compare| refValues. - public static List filter(List entities, String property) { // Unary properties - return filter(entities, property, "=", singleton(new BooleanValue(true))); - } - public static List filter(List entities, String property, String compare, Value refValue) { - return filter(entities, property, compare, singleton(refValue)); - } - public static List filter(List entities, String property, String compare, List refValues) { - List newEntities = new ArrayList<>(); - - for (Value v : refValues) - checkType2(property, v); - - for (Value obj : entities) { - if (!(obj instanceof NameValue)) continue; - NameValue e = (NameValue) obj; - - List values = lookupDB(e, property); - boolean match = false; - - checkType1(property, e); - - if (compare.equals("=")) { - match = intersects(values, refValues); - } else if (compare.equals("!=")) { - match = !intersects(values, refValues); // Note this is not the existential interpretation! - } else if (compare.equals("<")) { - match = getDegree(values, MIN) < getDegree(refValues, MAX); - } else if (compare.equals(">")) { - match = getDegree(values, MAX) > getDegree(refValues, MIN); - } else if (compare.equals("<=")) { - match = getDegree(values, MIN) <= getDegree(refValues, MAX); - } else if (compare.equals(">=")) { - match = getDegree(values, MAX) >= getDegree(refValues, MIN); - } - if (match) newEntities.add(e); - } - return newEntities; - } - - private static double getDouble(Value v) { - if (!(v instanceof NumberValue)) - throw new RuntimeException("Not a number: " + v); - return ((NumberValue) v).value; - } - - // Degree is used to compare (either take the max or min). - private static double getDegree(List values, String mode) { - double deg = Double.NaN; - for (Value v : values) { - double x = getDegree(v); - if (Double.isNaN(deg) || (mode.equals(MAX) ? x > deg : x < deg)) - deg = x; - } - return deg; - } - private static double getDegree(Value value) { - if (value instanceof TimeValue) { - TimeValue timeValue = (TimeValue) value; - return timeValue.hour; - } else if (value instanceof DateValue) { - DateValue dateValue = (DateValue) value; - double dValue = 0; - if (dateValue.year != -1) dValue += dateValue.year * 10000; - if (dateValue.month != -1) dValue += dateValue.month * 100; - if (dateValue.day != -1) dValue += dateValue.day; - return dValue; - } else if (value instanceof NumberValue) { - return ((NumberValue) value).value; - } else { - throw new RuntimeException("Can't get degree from " + value); - } - } - - // Return the subset of entities that obtain the min/max value of property. - public static List superlative(List entities, String mode, String property) { - List bestEntities = null; - double bestDegree = Double.NaN; - - for (Value e : entities) { - double degree = getDegree(lookupDB(e, property), mode); - checkType1(property, e); - if (bestEntities == null || (mode.equals(MAX) ? degree > bestDegree : degree < bestDegree)) { - bestEntities = new ArrayList(); - bestEntities.add(e); - bestDegree = degree; - } else if (degree == bestDegree) { - bestEntities.add(e); - } - } - return bestEntities; - } - - // Return the subset of entities that obtain the most/least number of values of property that fall in restrictors. - public static List countSuperlative(List entities, String mode, String property) { - return countSuperlative(entities, mode, property, null); - } - - //make sure lists are returned in a unique order - public static String sortAndToString(Object obj) { - if (obj instanceof List) { - List strList = new ArrayList<>(); - List list = (List) obj; - for (Object listObj: list) - strList.add(listObj.toString()); - Collections.sort(strList); - return strList.toString(); - } - return obj.toString(); - } - - - public static List countSuperlative(List entities, String mode, String property, List restrictors) { - List bestEntities = null; - double bestDegree = Double.NaN; - - if (restrictors != null) { - for (Value v : restrictors) - checkType2(property, v); - } - ensureNonnumericType2(property); - - for (Value e : entities) { - List values = lookupDB(e, property); - double degree = 0; - for (Value v : values) - if (restrictors == null || restrictors.contains(v)) - degree++; - - checkType1(property, e); - - if (bestEntities == null || (mode.equals(MAX) ? degree > bestDegree : degree < bestDegree)) { - bestEntities = new ArrayList(); - bestEntities.add(e); - bestDegree = degree; - } else if (degree == bestDegree) { - bestEntities.add(e); - } - } - return bestEntities; - } - - // Return subset of entities that have the number of values of property that meet the mode/threshold criteria (e.g. >= 3). - public static List countComparative(List entities, String property, String mode, NumberValue thresholdValue) { - return countComparative(entities, property, mode, thresholdValue, null); - } - public static List countComparative(List entities, String property, String mode, NumberValue thresholdValue, List restrictors) { - List newEntities = new ArrayList<>(); - double threshold = getDouble(thresholdValue); - - if (restrictors != null) { - for (Value v : restrictors) - checkType2(property, v); - } - ensureNonnumericType2(property); - - for (Value e : entities) { - List values = lookupDB(e, property); - double degree = 0; - for (Value v : values) - if (restrictors == null || restrictors.contains(v)) - degree++; - - checkType1(property, e); - - switch (mode) { - case "=": if (degree == threshold) newEntities.add(e); break; - case "<": if (degree < threshold) newEntities.add(e); break; - case ">": if (degree > threshold) newEntities.add(e); break; - case "<=": if (degree <= threshold) newEntities.add(e); break; - case ">=": if (degree >= threshold) newEntities.add(e); break; - default: throw new RuntimeException("Illegal mode: " + mode); - } - } - return newEntities; - } - - // Return sum of values. - public static List sum(List values) { - double sum = 0; - for (Value v : values) - sum += getDouble(v); - return Collections.singletonList(new NumberValue(sum)); - } - - // Return sum/mean/min/max of values. - public static List aggregate(String mode, List values) { - // Note: this is probably too strong to reject empty lists. - if (values.size() == 0) - throw new RuntimeException("Can't aggregate " + mode + " over empty list"); - double sum = 0; - for (Value v : values) { - // Note: we're leaving out dates and times, but sum/avg doesn't quite work with them anyway. - if (!(v instanceof NumberValue)) - throw new RuntimeException("Can only aggregate over numbers, but got " + v); - double x = getDouble(v); - sum += x; - } - double result; - switch (mode) { - case "avg": result = sum / values.size(); break; - case "sum": result = sum; break; - default: throw new RuntimeException("Bad mode: " + mode); - } - return Collections.singletonList(new NumberValue(result, ((NumberValue) values.get(0)).unit)); - } - - // Return the properties of the entities (database join). - public static List getProperty(Value inObject, String property) { return getProperty(singleton(inObject), property); } - public static List getProperty(List inObjects, String property) { - List outObjects = new ArrayList<>(); - Set outObjectsCache = new HashSet<>(); //optimization - run "contains" on set and not list - for (Value obj : inObjects) { - List values = lookupDB(obj, property); - checkType1(property, obj); - for (Value v : values) { - if (!opts.joinDedup || !outObjectsCache.contains(v)) { - outObjects.add(v); - outObjectsCache.add(v); - } - } - } - if (outObjects.size() == 0) - throw new RuntimeException("The property " + property + " does not appear in any of the objects " + inObjects); - return outObjects; - } - - private static double arithOp(String op, double v1, double v2) { - switch (op) { - case "+": return v1 + v2; - case "-": return v1 - v2; - case "*": return v1 * v2; - case "/": return v1 / v2; - default: throw new RuntimeException("Invalid operation: " + op); - } - } - public static List arithOp(String op, Value v1, Value v2) { - return singleton(new NumberValue(arithOp(op, getDouble(v1), getDouble(v2)))); - } - public static List arithOp(String op, List args1, List args2) { - // FUTURE: should pay attention to units - List result = new ArrayList(); - for (Value v1 : args1) - for (Value v2 : args2) - result.addAll(arithOp(op, v1, v2)); - return result; - } - - //////////////////////////////////////////////////////////// - // Internal state of the world - - private static final Random random = new Random(1); - - private static Set entities; // Keep track of all the entities - private static Set properties; // Keep track of all the properties - private static Map propertyToType1, propertyToType2; // types - private static Map, List> database; // Database consists of (e1, property, e2) triples - - public static int sizeofDB() { - return database.size(); - } - - public static List lookupDB(Value e, String property) { - createWorld(); - if (!entities.contains(e)) throw new RuntimeException("DB doesn't contain entity " + e); - if (!properties.contains(property)) throw new RuntimeException("DB doesn't contain property " + property); - List values = database.get(new Pair(e, property)); - if (values == null) return Collections.EMPTY_LIST; - return values; - } - - private static void insertDB(Value e1, String property) { // For unary properties - insertDB(e1, property, new BooleanValue(true)); - } - private static void insertDB(Value e1, String property, List e2s) { - for (Value e2 : e2s) insertDB(e1, property, e2); - } - private static void insertDB(Value e1, String property, Value e2) { - //LogInfo.logs("insertDB (%s, %s, %s)", e1, property, e2); - entities.add(e1); - properties.add(property); - properties.add(reverse(property)); - entities.add(e2); - MapUtils.addToList(database, new Pair(e1, property), e2); - MapUtils.addToList(database, new Pair(e2, reverse(property)), e1); - propertyToType1.put(property, getType(e1)); - propertyToType2.put(property, getType(e2)); - propertyToType1.put(reverse(property), getType(e2)); - propertyToType2.put(reverse(property), getType(e1)); - } - - public static void dumpDatabase() { - for (Pair pair : database.keySet()) - LogInfo.logs("%s %s %s", pair.getFirst(), pair.getSecond(), database.get(pair)); - } - - // Used for testing - public static void recreateWorld() { - database = null; - createWorld(); - } - - public static void createWorld() { - if (database != null) return; - entities = new HashSet<>(); - properties = new HashSet<>(); - database = new HashMap<>(); - propertyToType1 = new HashMap<>(); - propertyToType2 = new HashMap<>(); - - Domain domain = null; - switch (opts.domain) { - case "blocks": domain = new BlocksDomain(); break; - case "calendar": domain = new CalendarDomain(); break; - case "housing": domain = new HousingDomain(); break; - case "restaurants": domain = new RestaurantsDomain(); break; - case "publications": domain = new PublicationDomain(); break; - case "socialnetwork": domain = new SocialNetworkDomain(); break; - case "basketball": domain = new BasketballDomain(); break; - case "recipes": domain = new RecipesDomain(); break; - case "geo880": opts.dbPath = "lib/data/overnight/geo880.db"; domain = new ExternalDomain(); break; - case "lassie": opts.dbPath = "overnight/lassie.db"; domain = new ExternalDomain(); break; - case "external": domain = new ExternalDomain(); break; - default: throw new RuntimeException("Unknown domain: " + opts.domain); - } - domain.createEntities(opts.numOfValueSamples); - - // Dump the entire database - LogInfo.begin_track("SimpleWorld.createWorld: domain = %s (%d entity/property pairs)", opts.domain, database.size()); - if (opts.verbose >= 1) { - dumpDatabase(); - } - LogInfo.end_track(); - } - - private static List L(T... list) { - return Arrays.asList(list); - } - - // Convert the Object back to a Value - private static Value toValue(Object obj) { - if (obj instanceof Value) return (Value) obj; - if (obj instanceof Boolean) return new BooleanValue((Boolean) obj); - if (obj instanceof Integer) return new NumberValue((Integer) obj, "count"); - if (obj instanceof Double) return new NumberValue((Double) obj); - if (obj instanceof String) return new StringValue((String) obj); - if (obj instanceof List || obj instanceof Set) { - List list = Lists.newArrayList(); - for (Object elem : obj) - list.add(toValue(elem)); - return new ListValue(list); - } - throw new RuntimeException("Unhandled object: " + obj + " with class " + obj.getClass()); - } - - // Convert the Object to list value - public static ListValue listValue(Object obj) { - Value value = toValue(obj); - if (value instanceof ListValue) { - ListValue lv = (ListValue) value; - Collections.sort(lv.values, new Value.ValueComparator()); - return (ListValue) value; - } - return new ListValue(singleton(value)); - } - - public static int sampleInt(int min, int max) { - return random.nextInt(max - min) + min; - } - - public static boolean sampleBernoulli(double prob) { - return random.nextDouble() < prob; - } - - // Choose a single random element from the list - public static T sampleMultinomial(List list) { - return list.get(sampleInt(0, list.size())); - } - - // Choose n random elements from list (duplicates are possible). - public static List sampleMultinomial(List list, int n) { - List sublist = new ArrayList(); - if (list.size() > 0) { - for (int i = 0; i < n; i++) - sublist.add(sampleMultinomial(list)); - } - return sublist; - } - - // Keep each element with probability |prob|. - public static List subsample(List list, double prob) { - List sublist = new ArrayList(); - for (T x : list) - if (sampleBernoulli(prob)) - sublist.add(x); - return sublist; - } - - // With high probability, return one of the first few to avoid empty denotations. - public static T focusSampleMultinomial(List list) { - if (sampleBernoulli(0.5)) - return list.get(0); - return list.get(sampleInt(0, list.size())); - } - - public abstract static class Domain { - public abstract void createEntities(int numEntities); - } - - // Create |numEntities| entities, the first few have ids. - private static List makeValues(List ids) { return makeValues(ids.size(), ids); } - private static List makeValues(int numEntities, List ids) { - List values = new ArrayList(); - String type = extractType(ids.get(0)); - for (int i = 0; i < numEntities; i++) - values.add(makeValue(i < ids.size() ? ids.get(i) : type + "." + i, type)); - return values; - } - private static Value makeValue(String id) { return makeValue(id, extractType(id)); } - private static Value makeValue(String id, String type) { - Value e = new NameValue(id); - if (!entities.contains(e)) { - insertDB(e, "type", new NameValue(type)); - } - return e; - } - - // All dates for all domains are assumed to be in this range. - private static DateValue sampleDate() { - return new DateValue(sampleInt(2000, 2010), -1, -1); - } - - //////////////////////////////////////////////////////////// - // Specific domains (important to synchronize the constants with overnight/grammar!) - - public static class BlocksDomain extends Domain { - public void createEntities(int numEntities) { - List blocks = makeValues(numEntities, L("en.block.block1", "en.block.block2")); - List shapes = makeValues(L("en.shape.pyramid", "en.shape.cube")); - List colors = makeValues(L("en.color.red", "en.color.green")); - for (Value e : blocks) { - insertDB(e, "shape", sampleMultinomial(shapes)); - insertDB(e, "color", sampleMultinomial(colors)); - insertDB(e, "length", new NumberValue(sampleInt(2, 8), "en.inch")); - insertDB(e, "width", new NumberValue(sampleInt(2, 8), "en.inch")); - insertDB(e, "height", new NumberValue(sampleInt(2, 8), "en.inch")); - insertDB(e, "left", sampleMultinomial(blocks)); - insertDB(e, "right", sampleMultinomial(blocks)); - insertDB(e, "above", sampleMultinomial(blocks)); - insertDB(e, "below", sampleMultinomial(blocks)); - if (sampleBernoulli(0.5)) - insertDB(e, "is_special"); - } - } - } - - public static class CalendarDomain extends Domain { - private static DateValue sampleDate() { - return new DateValue(2015, 1, sampleInt(1, 5)); - } - private static TimeValue sampleTime() { - return new TimeValue(sampleInt(9, 16), 0); - } - public void createEntities(int numEntities) { - List meetings = makeValues(numEntities, L("en.meeting.weekly_standup", "en.meeting.annual_review")); - List people = makeValues(L("en.person.alice", "en.person.bob")); - List locations = makeValues(L("en.location.greenberg_cafe", "en.location.central_office")); - for (Value e : meetings) { - insertDB(e, "date", sampleDate()); - insertDB(e, "start_time", sampleTime()); - insertDB(e, "end_time", sampleTime()); - insertDB(e, "length", new NumberValue(sampleInt(1, 4), "en.hour")); - insertDB(e, "attendee", sampleMultinomial(people, 2)); - insertDB(e, "location", sampleMultinomial(locations)); - if (sampleBernoulli(0.5)) - insertDB(e, "is_important"); - } - } - } - - public static class RestaurantsDomain extends Domain { - public static final List RESTAURANT_VPS = Arrays.asList("reserve,credit,outdoor,takeout,delivery,waiter,kids,groups".split(",")); - public static final List RESTAURANT_CUISINES = Arrays.asList("en.cuisine.thai,en.cuisine.french,en.cuisine.italian".split(",")); - public static final List RESTAURANT_MEALS = Arrays.asList("en.food.breakfast,en.food.lunch,en.food.dinner".split(",")); - public static final List NEIGHBORHOODS = Arrays.asList("en.neighborhood.tribeca,en.neighborhood.midtown_west,en.neighborhood.chelsea".split(",")); - - public void createEntities(int numEntities) { - List restaurants = makeValues(numEntities, L("en.restaurant.thai_cafe", "en.restaurant.pizzeria_juno")); - List neighborhoods = makeValues(NEIGHBORHOODS); - List cuisines = makeValues(RESTAURANT_CUISINES); - List meals = makeValues(RESTAURANT_MEALS); - for (Value e : restaurants) { - insertDB(e, "star_rating", new NumberValue(sampleInt(0, 6), "en.star")); - insertDB(e, "price_rating", new NumberValue(sampleInt(1, 5), "en.dollar_sign")); - insertDB(e, "num_reviews", new NumberValue(sampleInt(20, 60), "en.review")); - insertDB(e, "neighborhood", sampleMultinomial(neighborhoods)); - insertDB(e, "cuisine", sampleMultinomial(cuisines)); - insertDB(e, "meals", subsample(meals, 0.5)); - for (String vp : RESTAURANT_VPS) { - if (sampleBernoulli(0.5)) - insertDB(e, vp); - } - } - } - } - - public static class HousingDomain extends Domain { - public static final List HOUSING_VPS = Arrays.asList("allows_cats,allows_dogs,has_private_bath,has_private_room".split(",")); - public static final List HOUSING_TYPES = Arrays.asList("en.housing.apartment,en.housing.condo,en.housing.house,en.housing.flat".split(",")); - public static final List NEIGHBORHOODS = Arrays.asList("en.neighborhood.tribeca,en.neighborhood.midtown_west,en.neighborhood.chelsea".split(",")); - - public void createEntities(int numEntities) { - List units = makeValues(numEntities, L("en.housing_unit.123_sesame_street", "en.housing_unit.900_mission_ave")); - List housingTypes = makeValues(HOUSING_TYPES); - List neighborhoods = makeValues(NEIGHBORHOODS); - for (Value e : units) { - insertDB(e, "rent", new NumberValue((double) sampleMultinomial(L(1500, sampleInt(1000, 3000))), "en.dollar")); - insertDB(e, "size", new NumberValue((double) sampleMultinomial(L(800, sampleInt(500, 1500))), "en.square_feet")); - insertDB(e, "posting_date", sampleDate()); - insertDB(e, "neighborhood", sampleMultinomial(neighborhoods)); - insertDB(e, "housing_type", sampleMultinomial(housingTypes)); - for (String vp : HOUSING_VPS) { - if (sampleBernoulli(0.5)) - insertDB(e, vp); - } - } - } - } - - public static class PublicationDomain extends Domain { - public void createEntities(int numEntities) { - List articles = makeValues(numEntities, L("en.article.multivariate_data_analysis")); - List people = makeValues(L("en.person.efron", "en.person.lakoff")); - List venues = makeValues(L("en.venue.computational_linguistics", "en.venue.annals_of_statistics")); - for (Value e : articles) { - insertDB(e, "author", sampleMultinomial(people, 2)); - insertDB(e, "venue", sampleMultinomial(venues)); - insertDB(e, "publication_date", sampleDate()); - insertDB(e, "cites", sampleMultinomial(articles, sampleInt(1, 10))); - insertDB(e, "won_award"); - } - } - } - - public static class SocialNetworkDomain extends Domain { - public void createEntities(int numEntities) { - List people = makeValues(numEntities, L("en.person.alice", "en.person.bob")); - List genders = makeValues(L("en.gender.male", "en.gender.female")); - List relationshipStatuses = makeValues(L("en.relationship_status.single", "en.relationship_status.married")); - List cities = makeValues(L("en.city.new_york", "en.city.beijing")); - List universities = makeValues(L("en.university.brown", "en.university.berkeley", "en.university.ucla")); - List fields = makeValues(L("en.field.computer_science", "en.field.economics", "en.field.history")); - List companies = makeValues(L("en.company.google", "en.company.mckinsey", "en.company.toyota")); - List jobTitles = makeValues(L("en.job_title.ceo", "en.job_title.software_engineer", "en.job_title.program_manager")); - for (Value e : people) { - insertDB(e, "gender", sampleMultinomial(genders)); - insertDB(e, "relationship_status", sampleMultinomial(relationshipStatuses)); - insertDB(e, "height", new NumberValue(sampleInt(150, 210), "en.cm")); - insertDB(e, "birthdate", sampleDate()); - insertDB(e, "birthplace", sampleMultinomial(cities)); - insertDB(e, "friend", sampleMultinomial(people, sampleInt(0, 3))); - if (sampleBernoulli(0.5)) - insertDB(e, "logged_in"); - } - for (Value e : makeValues(numEntities, L("en.education.0"))) { - insertDB(e, "student", focusSampleMultinomial(people)); - insertDB(e, "university", sampleMultinomial(universities)); - insertDB(e, "field_of_study", sampleMultinomial(fields)); - insertDB(e, "education_start_date", sampleDate()); - insertDB(e, "education_end_date", sampleDate()); - } - for (Value e : makeValues(numEntities, L("en.employment.0"))) { - insertDB(e, "employee", focusSampleMultinomial(people)); - insertDB(e, "employer", sampleMultinomial(companies)); - insertDB(e, "job_title", sampleMultinomial(jobTitles)); - insertDB(e, "employment_start_date", sampleDate()); - insertDB(e, "employment_end_date", sampleDate()); - } - } - } - - public static class BasketballDomain extends Domain { - public void createEntities(int numEntities) { - List players = makeValues(numEntities, L("en.player.kobe_bryant", "en.player.lebron_james")); - List teams = makeValues(L("en.team.lakers", "en.team.cavaliers")); - List positions = makeValues(L("en.position.point_guard", "en.position.forward")); - for (Value e : makeValues(numEntities, L("en.stats.0"))) { - insertDB(e, "player", focusSampleMultinomial(players)); - insertDB(e, "position", sampleMultinomial(positions)); - insertDB(e, "team", sampleMultinomial(teams)); - insertDB(e, "season", sampleDate()); - - insertDB(e, "num_points", new NumberValue(sampleInt(0, 10), "point")); - insertDB(e, "num_assists", new NumberValue(sampleInt(0, 10), "assist")); - insertDB(e, "num_steals", new NumberValue(sampleInt(0, 10), "steal")); - insertDB(e, "num_turnovers", new NumberValue(sampleInt(0, 10), "turnover")); - insertDB(e, "num_rebounds", new NumberValue(sampleInt(0, 10), "rebound")); - insertDB(e, "num_blocks", new NumberValue(sampleInt(0, 10), "block")); - insertDB(e, "num_fouls", new NumberValue(sampleInt(0, 10), "foul")); - insertDB(e, "num_games_played", new NumberValue(sampleInt(0, 10), "game")); - } - } - } - - public static class RecipesDomain extends Domain { - public void createEntities(int numEntities) { - List recipes = makeValues(numEntities, L("en.recipe.rice_pudding", "en.recipe.quiche")); - List cuisines = makeValues(L("en.cuisine.chinese", "en.cuisine.french")); - List ingredients = makeValues(L("en.ingredient.milk", "en.ingredient.spinach")); - List meals = makeValues(L("en.meal.lunch", "en.meal.dinner")); - for (Value e : recipes) { - insertDB(e, "preparation_time", new NumberValue(sampleInt(5, 30), "en.minute")); - insertDB(e, "cooking_time", new NumberValue(sampleInt(5, 30), "en.minute")); - insertDB(e, "cuisine", sampleMultinomial(cuisines)); - insertDB(e, "requires", sampleMultinomial(ingredients)); - insertDB(e, "meal", sampleMultinomial(meals)); - insertDB(e, "posting_date", sampleDate()); - } - } - } - - // Domain that corresponds to reading from a file - public static class ExternalDomain extends Domain { - public void createEntities(int numEntities) { - LogInfo.begin_track("ExternalDomain.createEntities: %s", opts.dbPath); - // Load up from database - for (String line : IOUtils.readLinesHard(opts.dbPath)) { - if (line.startsWith("#")) continue; // Comments - String[] tokens = line.split("\t"); - String pred = tokens[0]; - Value e = makeValue(tokens[1]); - if (tokens.length == 2) { // Unary - insertDB(e, pred); - } else if (tokens.length == 3) { // Binary - Value f; - if (tokens[2].startsWith("fb:en.") || tokens[2].startsWith("en.")) // Named entity - f = makeValue(tokens[2]); - else - f = Value.fromString(tokens[2]); // Number - insertDB(e, pred, f); - } else { - throw new RuntimeException("Unhandled: " + line); - } - } - LogInfo.end_track(); - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/overnight/test/SimpleWorldTest.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/overnight/test/SimpleWorldTest.java deleted file mode 100644 index 0ddb4cbfa2..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/overnight/test/SimpleWorldTest.java +++ /dev/null @@ -1,30 +0,0 @@ -package edu.stanford.nlp.sempre.overnight.test; - -import static org.testng.AssertJUnit.assertEquals; - -import org.testng.annotations.Test; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.overnight.*; -import fig.basic.*; - -/** - * Test simple world from overnight framework. - * Creates a small database using SimpleWorld, - * and does sanity checks on the induced knowledge graph - * @author Yushi Wang - */ -public class SimpleWorldTest { - @Test public void externalWorldTest() { - edu.stanford.nlp.sempre.overnight.SimpleWorld.opts.domain = "external"; - edu.stanford.nlp.sempre.overnight.SimpleWorld.opts.dbPath = "lib/data/overnight/test/unittest.db"; - edu.stanford.nlp.sempre.overnight.SimpleWorld.opts.verbose = 1; - edu.stanford.nlp.sempre.overnight.SimpleWorld.recreateWorld(); - - assertEquals(edu.stanford.nlp.sempre.overnight.SimpleWorld.sizeofDB(), 12); - } - - public static void main(String[] args) { - new SimpleWorldTest().externalWorldTest(); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/DenotationTypeInference.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/DenotationTypeInference.java deleted file mode 100644 index 8db4ff1815..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/DenotationTypeInference.java +++ /dev/null @@ -1,169 +0,0 @@ -package edu.stanford.nlp.sempre.tables; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import fig.basic.LispTree; -import fig.basic.LogInfo; -import fig.basic.Option; -import fig.basic.Pair; - -/** - * Infer the type of a Value object. - * - * @author ppasupat - */ -public class DenotationTypeInference { - public static class Options { - @Option(gloss = "Allow unknown Value type") - public boolean allowUnknownValueType = true; - } - public static Options opts = new Options(); - - private DenotationTypeInference() { }; - - /** - * Return the type of the given Value as a String. - * - * If the Value contains Values of different types (e.g., if the Value - * is a ListValue), return the least common ancestor. - * - * For a Value that represents a mapping (e.g., PairListValue), - * return the type of the "values" as opposed to the keys. - */ - public static String getValueType(Value value) { - if (value instanceof NumberValue) return CanonicalNames.NUMBER; - else if (value instanceof DateValue) return CanonicalNames.DATE; - else if (value instanceof TimeValue) return CanonicalNames.TIME; - else if (value instanceof StringValue) return CanonicalNames.TEXT; - else if (value instanceof BooleanValue) return CanonicalNames.BOOLEAN; - else if (value instanceof ErrorValue) return "ERROR"; - else if (value instanceof NameValue) { - SemType type = getNameValueSemType((NameValue) value); - if (type instanceof AtomicSemType) { - return ((AtomicSemType) type).name; - } - } else if (value instanceof ListValue) { - ListValue listValue = (ListValue) value; - if (listValue.values.isEmpty()) return "EMPTY"; - String commonType = null; - for (Value x : listValue.values) { - String type = getValueType(x); - if (commonType == null) - commonType = type; - else if (!commonType.equals(type)) - commonType = findLowestCommonAncestor(commonType, type); - } - return commonType; - } else if (value instanceof InfiniteListValue) { - LispTree tree = ((InfiniteListValue) value).toLispTree(); - if (tree.children.size() >= 2) { - // (comparison value) or (comparison value comparison value) - return getValueType(Values.fromLispTree(tree.child(1))); - } else { - // STAR = (*) - return CanonicalNames.ANY; - } - } else if (value instanceof PairListValue) { - PairListValue pairListValue = (PairListValue) value; - if (pairListValue.pairs.isEmpty()) return "EMPTY"; - String commonType = null; - for (Pair pair : pairListValue.pairs) { - String type = getValueType(pair.getSecond()); - if (commonType == null) - commonType = type; - else if (!commonType.equals(type)) - commonType = findLowestCommonAncestor(commonType, type); - } - return commonType; - } else if (value instanceof ScopedValue) { - return getValueType(((ScopedValue) value).relation); - } - if (opts.allowUnknownValueType) - return "UNKNOWN VALUE: " + value; - else - throw new RuntimeException("Unhandled value: " + value); - } - - public static String getKeyType(Value value) { - if (value instanceof NameValue) { - SemType type = getNameValueSemType((NameValue) value); - if (type instanceof FuncSemType) { - return type.getArgType().toString(); - } - } else if (value instanceof PairListValue) { - String commonType = null; - for (Pair pair : ((PairListValue) value).pairs) { - String type = getValueType(pair.getFirst()); - if (commonType == null) - commonType = type; - else if (!commonType.equals(type)) - commonType = findLowestCommonAncestor(commonType, type); - } - return commonType; - } else if (value instanceof ScopedValue) { - return getValueType(((ScopedValue) value).head); - } - if (opts.allowUnknownValueType) - return "UNKNOWN KEY: " + value; - else - throw new RuntimeException("Unhandled value: " + value); - } - - /** - * Helper function: get the type of NameValue using TypeLookup. - */ - public static SemType getNameValueSemType(NameValue value) { - String id = value.id; - TypeLookup typeLookup = TypeInference.getTypeLookup(); - if (CanonicalNames.isUnary(id)) { // Unary - SemType unaryType = typeLookup.getEntityType(id); - return unaryType == null ? SemType.entityType : unaryType; - } else { // Binary - // Careful of the reversal. - SemType propertyType = null; - if (CanonicalNames.SPECIAL_SEMTYPES.containsKey(id)) { - propertyType = CanonicalNames.SPECIAL_SEMTYPES.get(id); - } else if (!CanonicalNames.isReverseProperty(id)) { - propertyType = typeLookup.getPropertyType(id); - } else { - propertyType = typeLookup.getPropertyType(CanonicalNames.reverseProperty(id)); - if (propertyType != null) propertyType = propertyType.reverse(); - } - return propertyType == null ? SemType.anyAnyFunc : propertyType; - } - } - - /** - * Find the lowest common ancestor of the two given types using the type hierarchy. - */ - public static String findLowestCommonAncestor(String type1, String type2) { - SemTypeHierarchy hierarchy = SemTypeHierarchy.singleton; - Set sup1 = hierarchy.getSupertypes(type1), sup2 = hierarchy.getSupertypes(type2); - String lca = CanonicalNames.ANY; - for (String type : sup1) { - if (sup2.contains(type)) { - if (hierarchy.getSupertypes(type).contains(lca)) - lca = type; - } - } - return lca; - } - - /** - * Check if two objects have the same type. - */ - public static boolean typeCheck(Value v1, Value v2) { - String t1 = getValueType(v1), t2 = getValueType(v2); - if (t1 == null) { - LogInfo.logs("NULL type occurred: %s %s", v1, t1); - return false; - } - if (t2 == null) { - LogInfo.logs("NULL type occurred: %s %s", v2, t2); - return false; - } - return t1.equals(t2); - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/InfiniteListValue.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/InfiniteListValue.java deleted file mode 100644 index af7657d0a1..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/InfiniteListValue.java +++ /dev/null @@ -1,68 +0,0 @@ -package edu.stanford.nlp.sempre.tables; - -import java.util.*; - -import edu.stanford.nlp.sempre.Value; -import edu.stanford.nlp.sempre.Values; -import fig.basic.LispTree; - -/** - * Represent a list of infinitely many values. - * - * The list is represented by a List of Objects. - * - * @author ppasupat - */ -public class InfiniteListValue extends Value { - - final List representation; - final int hashCode; - - public InfiniteListValue(List representation) { - this.representation = representation; - this.hashCode = representation.hashCode(); - } - - public InfiniteListValue(String s) { - this(LispTree.proto.parseFromString(s)); - } - - public InfiniteListValue(LispTree tree) { - this.representation = new ArrayList<>(); - for (LispTree child : tree.children) { - try { - Value value = Values.fromLispTree(child); - representation.add(value); - } catch (Exception e) { - representation.add(child.toString()); - } - } - this.hashCode = representation.hashCode(); - } - - @Override - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - for (Object x : representation) { - if (x instanceof Value) - tree.addChild(((Value) x).toLispTree()); - else - tree.addChild(x.toString()); - } - return tree; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - InfiniteListValue that = (InfiniteListValue) o; - return representation.equals(that.representation); - } - - @Override - public int hashCode() { - return hashCode; - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/ScopedFormula.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/ScopedFormula.java deleted file mode 100644 index c39ddfa781..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/ScopedFormula.java +++ /dev/null @@ -1,72 +0,0 @@ -package edu.stanford.nlp.sempre.tables; - -import java.util.*; - -import com.google.common.base.Function; - -import edu.stanford.nlp.sempre.Formula; -import fig.basic.LispTree; - -/** - * Represent a binary with a restrict domain (scope). - * - * @author ppasupat - */ -public class ScopedFormula extends Formula { - public final Formula head; - public final Formula relation; - - public ScopedFormula(Formula head, Formula relation) { - this.head = head; - this.relation = relation; - } - - @Override - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("scoped"); - tree.addChild(head.toLispTree()); - tree.addChild(relation.toLispTree()); - return tree; - } - - @Override - public void forEach(Function func) { - if (!func.apply(this)) { head.forEach(func); relation.forEach(func); } - } - - @Override - public Formula map(Function func) { - Formula result = func.apply(this); - return result == null ? new ScopedFormula(head.map(func), relation.map(func)) : result; - } - - @Override - public List mapToList(Function> func, boolean alwaysRecurse) { - List res = func.apply(this); - if (res.isEmpty() || alwaysRecurse) { - res.addAll(head.mapToList(func, alwaysRecurse)); - res.addAll(relation.mapToList(func, alwaysRecurse)); - } - return res; - } - - @SuppressWarnings({"equalshashcode"}) - @Override - public boolean equals(Object thatObj) { - if (!(thatObj instanceof ScopedFormula)) return false; - ScopedFormula that = (ScopedFormula) thatObj; - if (!this.head.equals(that.head)) return false; - if (!this.relation.equals(that.relation)) return false; - return true; - } - - @Override - public int computeHashCode() { - int hash = 0x7e2a16; - hash = hash * 0xd3b2646c + head.hashCode(); - hash = hash * 0xd3b2646c + relation.hashCode(); - return hash; - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/ScopedValue.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/ScopedValue.java deleted file mode 100644 index ecbcf3f129..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/ScopedValue.java +++ /dev/null @@ -1,41 +0,0 @@ -package edu.stanford.nlp.sempre.tables; - -import edu.stanford.nlp.sempre.Value; -import fig.basic.LispTree; - -/** - * Represent a binary with a restrict domain (scope). - * - * @author ppasupat - */ -public class ScopedValue extends Value { - public final Value head, relation; - - public ScopedValue(Value head, Value relation) { - this.head = head; - this.relation = relation; - } - - @Override - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("scoped"); - tree.addChild(head.toLispTree()); - tree.addChild(relation.toLispTree()); - return tree; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - ScopedValue that = (ScopedValue) o; - return head.equals(that.head) && relation.equals(that.relation); - } - - @Override - public int hashCode() { - return head.hashCode() + relation.hashCode(); - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/StringNormalizationUtils.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/StringNormalizationUtils.java deleted file mode 100644 index fbb6eb76f8..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/StringNormalizationUtils.java +++ /dev/null @@ -1,446 +0,0 @@ -package edu.stanford.nlp.sempre.tables; - -import java.text.*; -import java.util.*; -import java.util.regex.*; - -import org.joda.time.DateTime; -import org.joda.time.format.DateTimeFormat; -import org.joda.time.format.DateTimeFormatter; - -import com.google.common.collect.ArrayListMultimap; -import com.google.common.collect.Multimap; - -import edu.stanford.nlp.sempre.*; -import fig.basic.*; - -/** - * Utilities for string normalization. - * - * @author ppasupat - */ -public final class StringNormalizationUtils { - public static class Options { - @Option(gloss = "Verbosity") public int verbose = 0; - @Option(gloss = "Use language analyzer") - public boolean useLanguageAnalyzer = true; - @Option(gloss = "NUMBER does not have to be at the beginning of the string") - public boolean numberCanStartAnywhere = false; - @Option(gloss = "NUM2 does not have to follow the pattern NUMBER DASH NUM2") - public boolean num2CanStartAnywhere = false; - } - public static Options opts = new Options(); - - private StringNormalizationUtils() { } // Should not be instantiated. - - /** - * Analyze the content of the cells in the same column, and then generate possible normalizations. - * Modify the property map in each cell. - * - * TODO(ice): Take the homogeneity of the cells into account. - */ - public static void analyzeColumn(TableColumn column) { - // Parts in the same column with the same string content gets the same id. - Map originalStringToPartId = new HashMap<>(); - for (TableCell cell : column.children) { - if (!cell.properties.metadata.isEmpty()) continue; // Already analyzed. - analyzeString(cell.properties.originalString, cell.properties.metadata, - column, originalStringToPartId); - } - } - - // ============================================================ - // Cell normalization - // ============================================================ - - public static final Pattern STRICT_DASH = Pattern.compile("\\s*[-‐‑⁃‒–—―]\\s*"); - public static final Pattern DASH = Pattern.compile("\\s*[-‐‑⁃‒–—―/,:;]\\s*"); - public static final Pattern COMMA = Pattern.compile("\\s*(,\\s|\\n|/)\\s*"); - public static final Pattern SPACE = Pattern.compile("\\s+"); - - public static void analyzeString(String o, Multimap metadata, - TableColumn column, Map originalStringToPartId) { - metadata.clear(); - Value value; - LanguageAnalyzer analyzer = LanguageAnalyzer.getSingleton(); - LanguageInfo languageInfo = analyzer.analyze(o); - // ===== Number: Also handle "2,000 ft." --> (number 2000) ===== - value = parseNumberLenient(o); - if (value == null && opts.useLanguageAnalyzer) - value = parseNumberWithLanguageAnalyzer(languageInfo); - if (value != null) metadata.put(TableTypeSystem.CELL_NUMBER_VALUE, value); - // ===== Date and Time ===== - value = parseDate(o); - if (value == null && opts.useLanguageAnalyzer) - value = parseDateWithLanguageAnalyzer(languageInfo); - if (value != null) metadata.put(TableTypeSystem.CELL_DATE_VALUE, value); - // ===== First and Second: "2-1" --> first = (number 2), second = (number 1) ===== - if (opts.num2CanStartAnywhere) { - value = parseNum2Lenient(o); - if (value != null) - metadata.put(TableTypeSystem.CELL_NUM2_VALUE, value); - } else { - String[] splitted = DASH.split(o); - if (splitted.length != 2) splitted = SPACE.split(o); - if (splitted.length == 2) { - NumberValue first = parseNumberStrict(splitted[0]), second = parseNumberStrict(splitted[1]); - if (first != null && second != null) { - metadata.put(TableTypeSystem.CELL_NUM2_VALUE, second); - } - } - } - // ===== List: "apple, banana, carrot" --> fb:part.apple, etc. ===== - String[] splitted = COMMA.split(o); - if (splitted.length > 1) { - for (String partName : splitted) { - String normalizedPartName = StringNormalizationUtils.characterNormalize(partName).toLowerCase(); - String id = originalStringToPartId.get(normalizedPartName); - if (id == null) { - String canonicalName = TableTypeSystem.canonicalizeName(normalizedPartName); - id = TableTypeSystem.getUnusedName( - TableTypeSystem.getPartName(canonicalName, column.columnName), - originalStringToPartId.values()); - originalStringToPartId.put(normalizedPartName, id); - } - metadata.put(TableTypeSystem.CELL_PART_VALUE, new NameValue(id, partName)); - } - } - } - - // ============================================================ - // Type Conversion - // ============================================================ - - public static final NumberFormat numberFormat = NumberFormat.getInstance(Locale.US); - - /** - * Convert string to number. - * Partial match is allowed: "9,000 cakes" --> 9000 - */ - public static NumberValue parseNumberLenient(String s) { - try { - if (opts.numberCanStartAnywhere) - s = s.replaceAll("^[^0-9.]*", ""); - Number parsed = numberFormat.parse(s.replace(" ", "")); - return new NumberValue(parsed.doubleValue()); - } catch (ParseException e) { - return null; - } - } - - /** - * Get the second number - * Partial match is allowed: "9,000 cakes from 120 bakeries" --> 120 - */ - public static NumberValue parseNum2Lenient(String s) { - s = s.replace(" ", ""); - if (opts.numberCanStartAnywhere) - s = s.replaceAll("^[^0-9.]*", ""); - ParsePosition parsePosition = new ParsePosition(0); - Number parsed = numberFormat.parse(s, parsePosition); - if (parsed == null) return null; - s = s.substring(parsePosition.getIndex()); - s = s.replaceAll("^[^0-9.]*", ""); - parsePosition.setIndex(0); - parsed = numberFormat.parse(s, parsePosition); - if (parsed == null) return null; - return new NumberValue(parsed.doubleValue()); - } - - /** - * Convert string to number. - * Partial match is not allowed: "9,000 cakes" --> null - */ - public static NumberValue parseNumberStrict(String s) { - ParsePosition pos = new ParsePosition(0); - Number parsed = numberFormat.parse(s, pos); - if (parsed == null || s.length() != pos.getIndex()) return null; - return new NumberValue(parsed.doubleValue()); - } - - /** - * Convert string to number + unit. - * Must exactly match the pattern "number unit" (e.g., "9,000 cakes") - */ - public static NumberValue parseNumberWithUnitStrict(String s) { - String[] tokens = s.split(" "); - if (tokens.length != 2) return null; - ParsePosition pos = new ParsePosition(0); - Number parsed = numberFormat.parse(tokens[0], pos); - if (parsed == null || tokens[0].length() != pos.getIndex()) return null; - return new NumberValue(parsed.doubleValue(), tokens[1]); - } - - public static NumberValue parseNumberWithLanguageAnalyzer(LanguageInfo languageInfo) { - if (languageInfo.numTokens() == 0) return null; - String nerSpan; - nerSpan = languageInfo.getNormalizedNerSpan("NUMBER", 0, languageInfo.numTokens()); - if (nerSpan != null) { - try { - return new NumberValue(Double.parseDouble(nerSpan)); - } catch (NumberFormatException e) { } - } - nerSpan = languageInfo.getNormalizedNerSpan("ORDINAL", 0, languageInfo.numTokens()); - if (nerSpan != null) { - try { - return new NumberValue(Double.parseDouble(nerSpan)); - } catch (NumberFormatException e) { } - } - nerSpan = languageInfo.getNormalizedNerSpan("PERCENT", 0, languageInfo.numTokens()); - if (nerSpan != null) { - try { - return new NumberValue(Double.parseDouble(nerSpan.substring(1))); - } catch (NumberFormatException e) { } - } - nerSpan = languageInfo.getNormalizedNerSpan("MONEY", 0, languageInfo.numTokens()); - if (nerSpan != null) { - try { - return new NumberValue(Double.parseDouble(nerSpan.substring(1))); - } catch (NumberFormatException e) { } - } - return null; - } - - public static final DateTimeFormatter americanDateFormat = DateTimeFormat.forPattern("MMM d, yyyy"); - public static final Pattern suTimeDateFormat = Pattern.compile("([0-9X]{4})(?:-([0-9X]{2}))?(?:-([0-9X]{2}))?"); - - /** - * Convert string to DateValue. - */ - public static DateValue parseDate(String s) { - Matcher matcher = suTimeDateFormat.matcher(s.toUpperCase()); - if (matcher.matches()) { - String yS = matcher.group(1), mS = matcher.group(2), dS = matcher.group(3); - int y = -1, m = -1, d = -1; - if (!(yS == null || yS.isEmpty() || yS.contains("X"))) y = Integer.parseInt(yS); - if (!(mS == null || mS.isEmpty() || mS.contains("X"))) m = Integer.parseInt(mS); - if (!(dS == null || dS.isEmpty() || dS.contains("X"))) d = Integer.parseInt(dS); - if (y == -1 && m == -1 && d == -1) return null; - return new DateValue(y, m, d); - } - try { - DateTime date = americanDateFormat.parseDateTime(s); - return new DateValue(date.getYear(), date.getMonthOfYear(), date.getDayOfMonth()); - } catch (IllegalArgumentException e) { - return null; - } - } - - public static DateValue parseDateWithLanguageAnalyzer(LanguageInfo languageInfo) { - if (languageInfo.numTokens() == 0) return null; - String nerSpan = languageInfo.getNormalizedNerSpan("DATE", 0, languageInfo.numTokens()); - if (opts.verbose >= 2) - LogInfo.logs("%s %s %s %s", languageInfo.tokens, languageInfo.nerTags, languageInfo.nerValues, nerSpan); - if (nerSpan == null) return null; - Matcher matcher = suTimeDateFormat.matcher(nerSpan); - if (!matcher.matches()) return null; - String yS = matcher.group(1), mS = matcher.group(2), dS = matcher.group(3); - int y = -1, m = -1, d = -1; - if (!(yS == null || yS.isEmpty() || yS.contains("X"))) y = Integer.parseInt(yS); - if (!(mS == null || mS.isEmpty() || mS.contains("X"))) m = Integer.parseInt(mS); - if (!(dS == null || dS.isEmpty() || dS.contains("X"))) d = Integer.parseInt(dS); - if (y == -1 && m == -1 && d == -1) return null; - return new DateValue(y, m, d); - } - - // ============================================================ - // Generic String normalization - // ============================================================ - - /** - * newline (=> `\n`), backslash (`\` => `\\`), and pipe (`|` => `\p`) - */ - public static String escapeTSV(String x) { - return x.replace("\\", "\\\\").replace("\n", "\\n").replace("|", "\\p").replaceAll("\\s", " ").trim(); - } - - public static String unescapeTSV(String x) { - return x.replace("\\n", "\n").replace("\\p", "|").replace("\\\\", "\\"); - } - - /** - * Collapse multiple spaces into one. - */ - public static String whitespaceNormalize(String x) { - return x.replaceAll("\\s", " ").trim(); - } - - /** - * Remove ALL spaces and non-alphanumeric characters, then convert to lower case. - * Used for fuzzy matching. - */ - public static String collapseNormalize(String x) { - return Normalizer.normalize(x, Normalizer.Form.NFD).replaceAll("[^A-Za-z0-9]", "").toLowerCase(); - } - - /** - * String to number - */ - public static NumberValue toNumberValue(String description) { - if (description == null) return null; - try { - Number result = numberFormat.parse(description); - return new NumberValue(result.doubleValue()); - } catch (ParseException e) { - return null; - } - } - - public static NumberValue toNumberValue(Value value) { - if (value instanceof NumberValue) return (NumberValue) value; - if (value instanceof DateValue) { - DateValue date = (DateValue) value; - if (date.month == -1 && date.day == -1) - return new NumberValue(date.year); - } - if (value instanceof NameValue) return toNumberValue(((NameValue) value).description); - if (value instanceof DescriptionValue) return toNumberValue(((DescriptionValue) value).value); - return null; - } - - /** - * Character normalization. - */ - public static String characterNormalize(String string) { - // Remove diacritics // (Sorry European people) - string = Normalizer.normalize(string, Normalizer.Form.NFD).replaceAll("[\u0300-\u036F]", ""); - // Special symbols - string = string - .replaceAll("‚", ",") - .replaceAll("„", ",,") - .replaceAll("[·・]", ".") - .replaceAll("…", "...") - .replaceAll("ˆ", "^") - .replaceAll("˜", "~") - .replaceAll("‹", "<") - .replaceAll("›", ">") - .replaceAll("[‘’´`]", "'") - .replaceAll("[“”«»]", "\"") - .replaceAll("[•†‡]", "") - .replaceAll("[-‐‑–—]", "-"); - return string.replaceAll("\\s+", " ").trim(); - } - - /** - * Simple normalization. (Include whitespace normalization) - */ - public static String simpleNormalize(String string) { - string = characterNormalize(string); - // Citation - string = string.replaceAll("\\[(nb ?)?\\d+\\]", ""); - string = string.replaceAll("\\*+$", ""); - // Outside Quote - string = string.replaceAll("^\"(.*)\"$", "$1"); - return string.replaceAll("\\s+", " ").trim(); - } - - /** - * More aggressive normalization. (Include simple and whitespace normalization) - */ - public static String aggressiveNormalize(String string) { - // Dashed / Parenthesized information - string = simpleNormalize(string); - String oldString; - do { - oldString = string; - // Remove citations - string = string.trim().replaceAll("((? metadata = ArrayListMultimap.create(); - TableColumn column = new TableColumn("Test", "test", 0); - analyzeString(o, metadata, column, new HashMap<>()); - String aggressive = aggressiveNormalize(o).toLowerCase(); - String official = officialEvaluatorNormalize(o); - LogInfo.logs("%s %s | %s %s %s", o, metadata, official, aggressive, aggressive.equals(official)); - } - - public static void main(String[] args) { - LanguageAnalyzer.opts.languageAnalyzer = "corenlp.CoreNLPAnalyzer"; - opts.verbose = 2; - opts.numberCanStartAnywhere = true; - opts.num2CanStartAnywhere = true; - unitTest("2"); - unitTest("twenty three"); - unitTest("apple, banana, banana, BANANA"); - unitTest("apple\nbanana\norange"); - unitTest("0-1\n(4-5 p)"); - unitTest("\"HELLO\""); - unitTest("21st"); - unitTest("2001st"); - unitTest("2,000,000 ft."); - unitTest("2,000,000.3579"); - unitTest("1/2"); - unitTest("1-2"); - unitTest("50%"); - unitTest("$30"); - unitTest("1 104"); - unitTest("United States of America (USA)"); - unitTest("320 bhp diesel, 10 knots (19 km/h)"); - unitTest("January 3, 1993"); - unitTest("July 2008"); - // normalized-annotated-200.examples - unitTest("19 September 1984"); // Ex 90 - unitTest("July 9"); // Ex 139, 155 - unitTest("March 1983"); // Ex 167 - // Other things handled by SUTime - unitTest("Friday"); - unitTest("Every Friday"); - unitTest("7:00"); - unitTest("7pm"); - unitTest("January 2, 7am"); - unitTest("morning"); - unitTest("1993-95"); - unitTest("Jan 2-5"); - unitTest("Jan 2 - 5"); - unitTest("Jan 2 - Feb 5"); - unitTest("from dawn to dusk"); - unitTest("January 4 - February 10"); - unitTest("July 500"); - unitTest("July 500 B.C."); - unitTest("1800s"); - unitTest("19th century"); - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableCell.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableCell.java deleted file mode 100644 index abde2927b3..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableCell.java +++ /dev/null @@ -1,32 +0,0 @@ -package edu.stanford.nlp.sempre.tables; - -/** - * Represents a table cell. - * - * Information about the cell is kept in the |properties| field. - * - * @author ppasupat - */ -public final class TableCell { - public final TableColumn parentColumn; - public final TableRow parentRow; - public final TableCellProperties properties; - - private TableCell(TableCellProperties properties, TableColumn column, TableRow row) { - this.parentColumn = column; - this.parentRow = row; - this.properties = properties; - } - - public static TableCell createAndAddTo(TableCellProperties properties, TableColumn column, TableRow row) { - TableCell answer = new TableCell(properties, column, row); - column.children.add(answer); - row.children.add(answer); - return answer; - } - - @Override - public String toString() { - return properties.nameValue.toString(); - } -} \ No newline at end of file diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableCellProperties.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableCellProperties.java deleted file mode 100644 index 4ca733f5e7..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableCellProperties.java +++ /dev/null @@ -1,46 +0,0 @@ -package edu.stanford.nlp.sempre.tables; - -import com.google.common.collect.ArrayListMultimap; -import com.google.common.collect.Multimap; - -import edu.stanford.nlp.sempre.*; - -/** - * Store various properties of a cell. - * - * Contract: There is only one TableCellProperties for each unique id. - * - * @author ppasupat - */ -public class TableCellProperties { - public final String id; - public final String originalString; - public final NameValue nameValue; - public final Multimap metadata; - - public TableCellProperties(String id, String originalString) { - this.id = id; - this.originalString = originalString; - this.nameValue = new NameValue(id, originalString); - this.metadata = ArrayListMultimap.create(); - } - - /** Create a copy without the columns field. */ - public TableCellProperties(TableCellProperties old) { - this.id = old.id; - this.originalString = old.originalString; - this.nameValue = old.nameValue; - this.metadata = ArrayListMultimap.create(old.metadata); - } - - @Override - public boolean equals(Object o) { - if (!(o instanceof TableCellProperties)) return false; - return id.equals(((TableCellProperties) o).id); - } - - @Override - public int hashCode() { - return id.hashCode(); - } -} \ No newline at end of file diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableColumn.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableColumn.java deleted file mode 100644 index 536ccccce4..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableColumn.java +++ /dev/null @@ -1,79 +0,0 @@ -package edu.stanford.nlp.sempre.tables; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; - -/** - * Represents a table column. - * - * The column header is used as the relation name. - * - * @author ppasupat - */ -public class TableColumn { - public final List children; - public final String originalString; - public final String columnName; - public final int index; - // Relation Name - public final NameValue relationNameValue, relationConsecutiveNameValue; - // Children Cell's Type (EntitySemType) - public final String cellTypeString; - public final NameValue cellTypeValue; - public final SemType cellSemType; - - public TableColumn(String originalString, String columnName, int index) { - this.children = new ArrayList<>(); - this.originalString = originalString; - this.columnName = columnName; - this.index = index; - this.relationNameValue = new NameValue(TableTypeSystem.getRowPropertyName(columnName), originalString); - this.relationConsecutiveNameValue = new NameValue(TableTypeSystem.getRowConsecutivePropertyName(columnName), originalString); - this.cellTypeString = TableTypeSystem.getCellType(columnName); - this.cellTypeValue = new NameValue(this.cellTypeString, originalString); - this.cellSemType = SemType.newAtomicSemType(this.cellTypeString); - } - - /** Create a copy without the children field. */ - public TableColumn(TableColumn old) { - this.children = new ArrayList<>(); - this.originalString = old.originalString; - this.columnName = old.columnName; - this.index = old.index; - this.relationNameValue = old.relationNameValue; - this.relationConsecutiveNameValue = old.relationConsecutiveNameValue; - this.cellTypeString = old.cellTypeString; - this.cellTypeValue = old.cellTypeValue; - this.cellSemType = old.cellSemType; - } - - public static Set getReservedFieldNames() { - Set usedNames = new HashSet<>(); - usedNames.add("next"); - usedNames.add("index"); - return usedNames; - } - - @Override - public String toString() { - return relationNameValue.toString(); - } - - public boolean hasConsecutive() { - NameValue previousCell = null; - for (TableCell child : children) { - if (child.properties.nameValue.equals(previousCell)) return true; - previousCell = child.properties.nameValue; - } - return false; - } - - public Collection getAllNormalization() { - Set normalizations = new HashSet<>(); - for (TableCell cell : children) { - normalizations.addAll(cell.properties.metadata.keySet()); - } - return normalizations; - } -} \ No newline at end of file diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableDerivationPruningComputer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableDerivationPruningComputer.java deleted file mode 100644 index 7dadd833c5..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableDerivationPruningComputer.java +++ /dev/null @@ -1,195 +0,0 @@ -package edu.stanford.nlp.sempre.tables; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.AggregateFormula.Mode; -import edu.stanford.nlp.sempre.tables.lambdadcs.LambdaDCSException; - -public class TableDerivationPruningComputer extends DerivationPruningComputer { - - public TableDerivationPruningComputer(DerivationPruner pruner) { - super(pruner); - } - - public static final String lambdaDCSError = "lambdaDCSError"; - public static final String emptyDenotation = DefaultDerivationPruningComputer.emptyDenotation; - public static final String badSummarizerHead = DefaultDerivationPruningComputer.badSummarizerHead; - public static final String sameMark = "sameMark"; - public static final String forwardBackward = "forwardBackward"; - public static final String doubleNext = "doubleNext"; - public static final String doubleCompares = "doubleCompares"; - public static final String emptyJoin = "emptyJoin"; - public static final String subsetMerge = "subsetMerge"; - public static final String typeRowMerge = "typeRowMerge"; - public static final String aggregateInfinite = "aggregateInfinite"; - public static final String aggregateUncomparable = "aggregateUncomparable"; - public static final String aggregateVariable = "aggregateVariable"; - public static final String superlativeIdentity = "superlativeIdentity"; - - @Override - public Collection getAllStrategyNames() { - return Arrays.asList( - lambdaDCSError, emptyDenotation, badSummarizerHead, sameMark, - forwardBackward, doubleNext, doubleCompares, emptyJoin, subsetMerge, typeRowMerge, - aggregateInfinite, aggregateUncomparable, aggregateVariable, superlativeIdentity); - } - - private static final String NEXT = TableTypeSystem.ROW_NEXT_VALUE.id, PREV = "!" + NEXT; - private static final ValueFormula STAR = new ValueFormula<>(new NameValue("*")); - private final Formula TYPE_ROW = Formula.fromString("(fb:type.object.type fb:type.row)"); - private final Formula IDENTITY = Formula.fromString("(reverse (lambda x (var x)))"); - - @Override - public String isPruned(Derivation deriv) { - // lambdaDCSError: Prune unrecoverable LambdaDCSException - if (containsStrategy(lambdaDCSError)) { - deriv.ensureExecuted(parser.executor, ex.context); - if (deriv.value instanceof ErrorValue && LambdaDCSException.isUnrecoverable(((ErrorValue) deriv.value).type)) - return lambdaDCSError; - } - // emptyDenotation: Prune if the denotation is empty (for ScopedValue) - if (containsStrategy(emptyDenotation)) { - deriv.ensureExecuted(parser.executor, ex.context); - if (deriv.value instanceof PairListValue && ((PairListValue) deriv.value).pairs.isEmpty()) { - return emptyDenotation; - } - if (deriv.value instanceof ScopedValue) { - Value head = ((ScopedValue) deriv.value).head, relation = ((ScopedValue) deriv.value).relation; - if ((head instanceof ListValue && ((ListValue) head).values.isEmpty()) || - (relation instanceof PairListValue && ((PairListValue) relation).pairs.isEmpty())) - return emptyDenotation; - } - } - // badSummarizerHead: Prune if the head of a ScopedValue is empty or is a single object - if (containsStrategy(badSummarizerHead)) { - deriv.ensureExecuted(parser.executor, ex.context); - if (deriv.value instanceof ScopedValue) { - Value head = ((ScopedValue) deriv.value).head; - if ((head instanceof ListValue) && ((ListValue) head).values.size() == 1) - return badSummarizerHead; - } - } - // sameMark: Prune if mark does not filter out anything - if (containsStrategy(sameMark)) { - if (deriv.formula instanceof MergeFormula) { - MergeFormula merge = (MergeFormula) deriv.formula; - if (merge.mode == MergeFormula.Mode.and) { - Value head = null; - if (merge.child1 instanceof MarkFormula) { - head = parser.executor.execute(merge.child2, ex.context).value; - } else if (merge.child2 instanceof MarkFormula) { - head = parser.executor.execute(merge.child1, ex.context).value; - } - if (head != null && head.equals(deriv.value)) { - return sameMark; - } - } - } - } - // Prune JoinFormulas - if (containsStrategy(forwardBackward) || containsStrategy(doubleNext) - || containsStrategy(doubleCompares) || containsStrategy(emptyJoin)) { - Formula current = deriv.formula; - String rid1 = null, rid2 = null; - while (current instanceof JoinFormula) { - rid2 = rid1; - rid1 = Formulas.getBinaryId(((JoinFormula) current).relation); - if (rid1 != null && rid2 != null) { - // forwardBackward: Prune (!relation (relation (...))) - if (containsStrategy(forwardBackward) && (rid1.equals("!" + rid2) || rid2.equals("!" + rid1))) - return forwardBackward; - // doubleNext: Prune (next (next (...))) - if (containsStrategy(doubleNext) && - (rid1.equals(NEXT) || rid1.equals(PREV)) && (rid2.equals(NEXT) || rid2.equals(PREV))) - return doubleNext; - // doubleCompares: Prune (< (> ( ...))) - if (containsStrategy(doubleCompares) && - CanonicalNames.COMPARATORS.contains(rid1) && CanonicalNames.COMPARATORS.contains(rid2)) - return doubleCompares; - // emptyJoin: prune if the composition two consecutive joins always produce an empty set - if (containsStrategy(emptyJoin)) { - Formula test = new JoinFormula(rid1, new JoinFormula(rid2, STAR)); - try { - Value value = parser.executor.execute(test, ex.context).value; - if (value instanceof ListValue && ((ListValue) value).values.isEmpty()) - return emptyJoin; - } catch (RuntimeException e) { - // Do nothing. Don't prune the formula. - } - } - } - current = ((JoinFormula) current).child; - } - } - // Prune merge formulas - if (containsStrategy(subsetMerge) && deriv.formula instanceof MergeFormula) { - MergeFormula merge = (MergeFormula) deriv.formula; - Formula child1 = merge.child1, child2 = merge.child2; - // subsetMerge: Prune merge formulas where one child is a subset of the other - if (containsStrategy(subsetMerge)) { - Value d1 = parser.executor.execute(child1, ex.context).value; - Value d2 = parser.executor.execute(child2, ex.context).value; - if (d1 instanceof ListValue && d2 instanceof ListValue) { - Set v1 = new HashSet<>(((ListValue) d1).values); - Set v2 = new HashSet<>(((ListValue) d2).values); - if ((v1.size() >= v2.size() && v1.containsAll(v2)) || - (v2.size() > v1.size() && v2.containsAll(v1))) - return subsetMerge; - } - } - // typeRowMerge: Prune merge formulas where one child is (@type @row) [generally redundant] - if (containsStrategy(typeRowMerge) && - ((TYPE_ROW.equals(merge.child1) && !(merge.child2 instanceof MarkFormula)) || - (TYPE_ROW.equals(merge.child2) && !(merge.child1 instanceof MarkFormula)))) - return typeRowMerge; - } - // Prune aggregate formulas - else if (deriv.formula instanceof AggregateFormula) { - AggregateFormula aggregate = (AggregateFormula) deriv.formula; - Formula child = aggregate.child; - // aggregateInfinite: Prune aggregates when the child is an infinite set - if (containsStrategy(aggregateInfinite) && child instanceof JoinFormula) { - String rid = Formulas.getBinaryId(((JoinFormula) child).relation); - if (CanonicalNames.COMPARATORS.contains(rid) || "!=".equals(rid)) - return aggregateInfinite; - } - // aggregateUncomparable: Prune aggregates when the child's type is not number or date - if (containsStrategy(aggregateUncomparable) && aggregate.mode != AggregateFormula.Mode.count) { - SemType type = TypeInference.inferType(child, true); - if (!type.meet(SemType.numberOrDateType).isValid() || - (!type.meet(SemType.numberType).isValid() && (aggregate.mode == Mode.sum || aggregate.mode == Mode.avg))) - return aggregateUncomparable; - } - // aggregateVariable: Prune aggregates when the child is variable - if (containsStrategy(aggregateVariable) && child instanceof VariableFormula) { - return aggregateVariable; - } - } - // Prune superlative formulas - else if (deriv.formula instanceof SuperlativeFormula) { - SuperlativeFormula superlative = (SuperlativeFormula) deriv.formula; - Formula relation = superlative.relation; - // superlativeIdentity: Prune superlatives when the relation is exactly (lambda x (var x)) - if (containsStrategy(superlativeIdentity) && IDENTITY.equals(relation)) { - return superlativeIdentity; - } - } - // For ScopedFormula: recurse into the relation part - if (deriv.formula instanceof ScopedFormula) { - Formula relation = ((ScopedFormula) deriv.formula).relation; - if (relation instanceof LambdaFormula) { - relation = ((LambdaFormula) relation).body; - Derivation relationDeriv = new Derivation.Builder().formula(relation).createDerivation(); - String matchedStrategy; - for (DerivationPruningComputer computer : pruner.getPruningComputers()) { - if ((matchedStrategy = computer.isPruned(relationDeriv)) != null) { - return matchedStrategy; - } - } - } - } - return null; - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableFormulaEvaluator.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableFormulaEvaluator.java deleted file mode 100644 index f746af8149..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableFormulaEvaluator.java +++ /dev/null @@ -1,176 +0,0 @@ -package edu.stanford.nlp.sempre.tables; - -import java.io.File; -import java.util.*; -import java.util.concurrent.ExecutionException; - -import com.google.common.cache.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.serialize.LazyLoadedExampleList; -import edu.stanford.nlp.sempre.tables.serialize.SerializedDataset; -import edu.stanford.nlp.sempre.tables.test.CustomExample; -import edu.stanford.nlp.sempre.tables.test.TableFormulaCanonicalizer; -import edu.stanford.nlp.sempre.tables.test.CustomExample.ExampleProcessor; -import fig.basic.*; - -/** - * Evaluate if the predicted formula matches one of the annotated formulas. - * - * Use example ID to look up the formula annotation. - * - * The annotationPath option can point to one of the following: - * - an annotated LispTree file with targetFormula / alternativeFormula fields - * (formula shorthands are allowed) - * - a gzip dump file created by tables.serialize.SerializedDumper - * - a directory containing gzip dump files created by tables.serialize.SerializedDumper - * - * @author ppasupat - */ -public class TableFormulaEvaluator extends TableValueEvaluator { - public static class Options { - @Option(gloss = "verbosity") public int verbose = 0; - @Option(gloss = "Path for formula annotation") - public String annotationPath = null; - @Option(gloss = "Whether to fall back to ValueEvaluator when the example id does not exist") - public boolean fallBackToValueEvaluator = true; - } - public static Options opts = new Options(); - - protected Collection availableIds; - - // Map from ID string to target formulas. - // Use only when the file is an annotated LispTree file. - protected Map> idToTargetFormulas; - - // Map from ID string to LazyLoadedExampleList and example index. - // Use only when the file is dumped from SerializedDumper - protected Map> idToSerializedIndex; - - public TableFormulaEvaluator() { - // Load annotation file - if (opts.annotationPath == null || opts.annotationPath.isEmpty()) - throw new RuntimeException("Annotation file not specified."); - // Determine file type - if (new File(opts.annotationPath).isDirectory() || opts.annotationPath.endsWith(".gz")) { - readSerializedFile(); - } else { - readAnnotationFile(); - } - } - - protected void readSerializedFile() { - idToSerializedIndex = new HashMap<>(); - SerializedDataset dataset = new SerializedDataset(); - if (new File(opts.annotationPath).isDirectory()) { - dataset.readDir(opts.annotationPath); - } else { - dataset.read("annotated", opts.annotationPath); - } - for (String group : dataset.groups()) { - LazyLoadedExampleList examples = dataset.examples(group); - List ids = examples.getAllIds(); - for (int i = 0; i < ids.size(); i++) - idToSerializedIndex.put(ids.get(i), new Pair<>(examples, i)); - } - availableIds = idToSerializedIndex.keySet(); - } - - protected void readAnnotationFile() { - LogInfo.begin_track("Reading annotated examples"); - idToTargetFormulas = new HashMap<>(); - CustomExample.getDataset(Arrays.asList(new Pair<>("annotated", opts.annotationPath)), new ExampleProcessor() { - @Override - public void run(CustomExample ex) { - List targetFormulas = new ArrayList<>(); - // Canonicalize formulas - if (ex.targetFormula != null) - targetFormulas.add(TableFormulaCanonicalizer.canonicalizeFormula(ex.targetFormula)); - if (ex.alternativeFormulas != null) { - for (Formula alternativeFormula : ex.alternativeFormulas) - targetFormulas.add(TableFormulaCanonicalizer.canonicalizeFormula(alternativeFormula)); - } - idToTargetFormulas.put(ex.id, targetFormulas); - } - }); - availableIds = idToTargetFormulas.keySet(); - LogInfo.end_track(); - } - - /** - * Get formula compatibility. Fall back to ValueEvaluator if the example - * is not in the annotation file. - */ - public double getCompatibility(Example targetEx, Derivation deriv) { - if (availableIds.contains(targetEx.id)) { - if (idToTargetFormulas == null) - return getCompatibilitySerializedStrict(targetEx, deriv.formula); - else - return getCompatibilityAnnotationStrict(targetEx, deriv.formula); - } else { - return opts.fallBackToValueEvaluator ? getCompatibility(targetEx.targetValue, deriv.value) : 0; - } - } - - // Add a little cache - LoadingCache> canonicalizedCache = CacheBuilder.newBuilder() - .maximumSize(20) - .build( - new CacheLoader> () { - @Override - public List load(Example ex) throws Exception { - LogInfo.logs("Canonicalizing %s", ex.id); - List canonicalized = new ArrayList<>(); - Pair identifier = idToSerializedIndex.get(ex.id); - Example annotated = identifier.getFirst().get(identifier.getSecond()); - for (Derivation targetDeriv : annotated.predDerivations) { - canonicalized.add(TableFormulaCanonicalizer.canonicalizeFormula(targetDeriv.formula)); - } - LogInfo.logs("Canonicalized %d formulas", canonicalized.size()); - return canonicalized; - } - } - ); - - public double getCompatibilitySerializedStrict(Example targetEx, Formula formula) { - try { - List canonicalized = canonicalizedCache.get(targetEx); - formula = TableFormulaCanonicalizer.canonicalizeFormula(formula); - for (Formula targetFormula : canonicalized) - if (targetFormula.equals(formula)) return 1; - return 0; - } catch (ExecutionException e) { - throw new RuntimeException(e.getCause()); - } - } - - public double getCompatibilityAnnotationStrict(Example targetEx, Formula formula) { - List targetFormulas = idToTargetFormulas.get(targetEx.id); - if (targetFormulas == null) - throw new RuntimeException("Example ID " + targetEx.id + " not found in annotated data"); - formula = TableFormulaCanonicalizer.canonicalizeFormula(formula); - for (Formula targetFormula : targetFormulas) - if (targetFormula.equals(formula)) return 1; - return 0; - } - - public boolean containsId(String id) { - return availableIds.contains(id); - } - - // ============================================================ - // DEBUG - // ============================================================ - - public void log(Example targetEx, Formula formula) { - List targetFormulas = idToTargetFormulas.get(targetEx.id); - if (targetFormulas.isEmpty()) - LogInfo.logs("Gold: NONE"); - else - for (Formula targetFormula : targetFormulas) - LogInfo.logs("Gold: %s", targetFormula); - formula = TableFormulaCanonicalizer.canonicalizeFormula(formula); - LogInfo.logs("Predicted: %s", formula); - } -} - diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableKnowledgeGraph.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableKnowledgeGraph.java deleted file mode 100644 index 9300eb33f9..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableKnowledgeGraph.java +++ /dev/null @@ -1,707 +0,0 @@ -package edu.stanford.nlp.sempre.tables; - -import java.io.*; -import java.nio.file.Paths; -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.FuzzyMatchFn.FuzzyMatchFnMode; -import edu.stanford.nlp.sempre.tables.lambdadcs.ExecutorCache; -import edu.stanford.nlp.sempre.tables.lambdadcs.InfiniteUnaryDenotation; -import edu.stanford.nlp.sempre.tables.lambdadcs.LambdaDCSException; -import edu.stanford.nlp.sempre.tables.lambdadcs.LambdaDCSException.Type; -import edu.stanford.nlp.sempre.tables.match.FuzzyMatcher; -import edu.stanford.nlp.sempre.tables.serialize.TableReader; -import edu.stanford.nlp.sempre.tables.serialize.TableWriter; -import fig.basic.*; - -/** - * A knowledge graph constructed from a table. - * - * - Each row becomes an entity - * - Each cell becomes an entity - * - Each column becomes a property between a row and a cell - * e.g., (row5 nationality canada) - * - Rows have several special properties (next, index) - * - * === Special Row Properties === - * - name = fb:row.row.next | type = (-> fb:type.row fb:type.row) - * - name = fb:row.row.index | type = (-> fb:type.int fb:type.row) - * - * === Special Cell Properties === - * - name = fb:cell.cell.number | type = (-> fb:type.number fb:type.cell) - * - * @author ppasupat - */ -public class TableKnowledgeGraph extends KnowledgeGraph implements FuzzyMatchable { - public static class Options { - @Option(gloss = "Verbosity") public int verbose = 0; - @Option(gloss = "Base directory for CSV files") - public String baseCSVDir = null; - @Option(gloss = "Whether to cache TableKnowledgeGraph") - public boolean cacheTableKnowledgeGraphs = true; - @Option(gloss = "Forbid row.row.next on multiple rows") - public boolean forbidNextOnManyRows = true; - @Option(gloss = "Set up executor cache for each graph (must manually clear, or else will get memory overflow)") - public boolean individualExecutorCache = false; - @Option(gloss = "Have the row index starts at 1 instead of 0") - public boolean rowIndexStartsAt1 = true; - } - public static Options opts = new Options(); - - // ============================================================ - // Fields - // ============================================================ - - public List rows; - public List columns; - public Set cellProperties; - public Set cellParts; - public final String filename; - - // "fb:row.r5" --> TableRow object - Map rowIdToTableRow; - // "fb:row.row.population" --> TableColumn object - Map relationIdToTableColumn; - // "fb:cell.palo_alto_ca" --> TableCellProperties object - Map cellIdToTableCellProperties; - // "fb:part.palo_alto" --> String - Map partIdToOriginalString; - - FuzzyMatcher fuzzyMatcher; - public ExecutorCache executorCache; - - @Override - public void clean() { - if (executorCache != null) executorCache.clearCache(this); - } - - // ============================================================ - // Constructor - // ============================================================ - - /** - * Construct a new TableKnowledgeGraph from a String matrix. - * Does not cache the data. - */ - public TableKnowledgeGraph(String filename, Iterable data) { - this.filename = filename; - // Used column names (no two columns have the same id) - Set usedColumnNames = new HashSet<>(); - // Cells in the same column with the same string content gets the same id. - Map, String> columnAndOriginalStringToCellId = new HashMap<>(); - // Go though the data - for (String[] record : data) { - if (columns == null) { - // Initialize - rows = new ArrayList<>(); - columns = new ArrayList<>(); - rowIdToTableRow = new HashMap<>(); - relationIdToTableColumn = new HashMap<>(); - cellIdToTableCellProperties = new HashMap<>(); - // Read the header row - for (String entry : record) { - String normalizedEntry = StringNormalizationUtils.characterNormalize(entry).toLowerCase(); - String canonicalName = TableTypeSystem.canonicalizeName(normalizedEntry); - String columnName = TableTypeSystem.getUnusedName(canonicalName, usedColumnNames); - TableColumn column = new TableColumn(entry, columnName, columns.size()); - columns.add(column); - usedColumnNames.add(columnName); - relationIdToTableColumn.put(column.relationNameValue.id, column); - } - } else { - // Read the content row - if (record.length != columns.size()) { - LogInfo.warnings("Table has %d columns but row has %d cells: %s | %s", columns.size(), - record.length, columns, Fmt.D(record)); - } - int rowIndex = opts.rowIndexStartsAt1 ? rows.size() + 1 : rows.size(); - TableRow currentRow = new TableRow(rowIndex); - rowIdToTableRow.put(currentRow.nameValue.id, currentRow); - rows.add(currentRow); - for (int i = 0; i < columns.size(); i++) { - TableColumn column = columns.get(i); - String cellName = (i < record.length) ? record[i] : ""; - // Create a NameValue - String normalizedCellName = StringNormalizationUtils.characterNormalize(cellName).toLowerCase(); - Pair columnAndOriginalString = new Pair<>(column, normalizedCellName); - String id = columnAndOriginalStringToCellId.get(columnAndOriginalString); - if (id == null) { - String canonicalName = TableTypeSystem.canonicalizeName(normalizedCellName); - id = TableTypeSystem.getUnusedName( - TableTypeSystem.getCellName(canonicalName, column.columnName), - cellIdToTableCellProperties.keySet()); - columnAndOriginalStringToCellId.put(columnAndOriginalString, id); - cellIdToTableCellProperties.put(id, new TableCellProperties(id, cellName)); - } - TableCellProperties properties = cellIdToTableCellProperties.get(id); - TableCell.createAndAddTo(properties, column, currentRow); - } - } - } - // Generate cell properties by analyzing cell content in each column - for (TableColumn column : columns) - StringNormalizationUtils.analyzeColumn(column); - // Collect cell properties for public access - cellProperties = new HashSet<>(cellIdToTableCellProperties.values()); - cellParts = new HashSet<>(); - partIdToOriginalString = new HashMap<>(); - for (TableCellProperties properties : cellProperties) { - for (Value part : properties.metadata.get(TableTypeSystem.CELL_PART_VALUE)) { - NameValue partNameValue = (NameValue) part; - cellParts.add(partNameValue); - partIdToOriginalString.put(partNameValue.id, partNameValue.description); - } - } - // Precompute normalized strings for fuzzy matching - fuzzyMatcher = FuzzyMatcher.getFuzzyMatcher(this); - executorCache = opts.individualExecutorCache ? new ExecutorCache() : null; - } - - /** - * Read CSV or TSV file. - */ - TableKnowledgeGraph(String filename) throws IOException { - this(filename, new TableReader(filename)); - } - - // Cache (don't create multiple graphs for the same CSV or TSV file) - static final Map filenameToGraph = new HashMap<>(); - - public static synchronized TableKnowledgeGraph fromRootedFilename(String filename) { - // Get from cache if possible - TableKnowledgeGraph graph = filenameToGraph.get(filename); - if (graph == null) { - if (opts.verbose >= 1) - LogInfo.logs("create new TableKnowledgeGraph from filename = %s", filename); - StopWatchSet.begin("TableKnowledgeGraph.new"); - try { - graph = new TableKnowledgeGraph(filename); - } catch (IOException e) { - throw new RuntimeException(e); - } - StopWatchSet.end(); - if (opts.cacheTableKnowledgeGraphs) - filenameToGraph.put(filename, graph); - } - return graph; - } - - public static TableKnowledgeGraph fromFilename(String filename) { - return fromRootedFilename(new File(opts.baseCSVDir, filename).getPath()); - } - - public static TableKnowledgeGraph fromLispTree(LispTree tree) { - if (tree.children.size() > 3 && "rooted-path".equals(tree.child(3).value)) - return fromRootedFilename(tree.child(2).value); - else - return fromFilename(tree.child(2).value); - - } - - // ============================================================ - // Construct from existing cells - // ============================================================ - - /** - * Construct a new TableKnowledgeGraph using the same columns as an old table - * but with different cell ordering. - * Does not cache the data. - */ - public TableKnowledgeGraph(String filename, List oldColumns, - List> oldCells, boolean cellsAreGroupedByColumn) { - this.filename = filename; - rows = new ArrayList<>(); - columns = new ArrayList<>(); - rowIdToTableRow = new HashMap<>(); - relationIdToTableColumn = new HashMap<>(); - cellIdToTableCellProperties = new HashMap<>(); - // Header row - for (TableColumn oldColumn : oldColumns) { - TableColumn column = new TableColumn(oldColumn); - columns.add(column); - relationIdToTableColumn.put(column.relationNameValue.id, column); - } - // Sanity check - int numRows, numColumns = columns.size(); - if (cellsAreGroupedByColumn) { // oldCells[column][row] - if (oldCells.size() != numColumns) - throw new RuntimeException("Mismatched sizes: oldCells has " + oldCells.size() + " != " + numColumns + " columns"); - numRows = oldCells.get(0).size(); - for (List oldCellsRow : oldCells) - if (oldCellsRow.size() != numRows) - throw new RuntimeException("Mismatched sizes: oldCells has " + oldCells.size() + " != " + numColumns + " rows"); - } else { // oldCells[row][column] - numRows = oldCells.size(); - for (List oldCellsColumn : oldCells) - if (oldCellsColumn.size() != numColumns) - throw new RuntimeException("Mismatched sizes: oldCells has " + oldCells.size() + " != " + numColumns + " columns"); - } - // Content rows - for (int i = 0; i < numRows; i++) { - TableRow currentRow = new TableRow(i); - rows.add(currentRow); - rowIdToTableRow.put(currentRow.nameValue.id, currentRow); - for (int j = 0; j < numColumns; j++) { - TableColumn column = columns.get(j); - TableCellProperties properties = new TableCellProperties( - cellsAreGroupedByColumn ? oldCells.get(j).get(i) : oldCells.get(i).get(j)); - cellIdToTableCellProperties.put(properties.id, properties); - TableCell.createAndAddTo(properties, column, currentRow); - } - } - // Finalize - cellProperties = new HashSet<>(cellIdToTableCellProperties.values()); - cellParts = new HashSet<>(); - for (TableCellProperties properties : cellProperties) - for (Value part : properties.metadata.get(TableTypeSystem.CELL_PART_VALUE)) - cellParts.add((NameValue) part); - // Precompute normalized strings for fuzzy matching - fuzzyMatcher = FuzzyMatcher.getFuzzyMatcher(this); - executorCache = opts.individualExecutorCache ? new ExecutorCache() : null; - } - - // ============================================================ - // Convert to other formats - // ============================================================ - - @Override - public LispTree toLispTree() { - if (filename != null) { - // short version: just print the filename - LispTree tree = LispTree.proto.newList(); - tree.addChild("graph"); - tree.addChild("tables.TableKnowledgeGraph"); - if (filename.startsWith(opts.baseCSVDir)) - tree.addChild(Paths.get(opts.baseCSVDir).relativize(Paths.get(filename)).toString()); - else { - tree.addChild(filename); - tree.addChild("rooted-path"); - } - return tree; - } - return toTableValue().toLispTree(); - } - - @Override - public LispTree toShortLispTree() { - return toLispTree(); - } - - public TableValue toTableValue() { - List tableValueHeader = new ArrayList<>(); - List> tableValueRows = new ArrayList<>(); - for (TableColumn column : columns) { - tableValueHeader.add(column.originalString); - } - for (TableRow row : rows) { - List tableValueRow = new ArrayList<>(); - for (TableCell cell : row.children) { - tableValueRow.add(cell.properties.nameValue); - } - tableValueRows.add(tableValueRow); - } - return new TableValue(tableValueHeader, tableValueRows); - } - - public void log() { - new TableWriter(this).log(); - } - - // ============================================================ - // Fuzzy matching - // ============================================================ - - @Override - public Collection getFuzzyMatchedFormulas(String term, FuzzyMatchFn.FuzzyMatchFnMode mode) { - return fuzzyMatcher.getFuzzyMatchedFormulas(term, mode); - } - - @Override - public Collection getFuzzyMatchedFormulas( - List sentence, int startIndex, int endIndex, FuzzyMatchFnMode mode) { - return fuzzyMatcher.getFuzzyMatchedFormulas(sentence, startIndex, endIndex, mode); - } - - @Override - public Collection getAllFormulas(FuzzyMatchFn.FuzzyMatchFnMode mode) { - return fuzzyMatcher.getAllFormulas(mode); - } - - // ============================================================ - // Query - // ============================================================ - - public static final NameValue TYPE = new NameValue(CanonicalNames.TYPE); - public static final NameValue ROW_TYPE = new NameValue(TableTypeSystem.ROW_TYPE); - - /** Return all y such that x in firsts and (x,r,y) in graph */ - @Override - public List joinFirst(Value r, Collection firsts) { - return joinSecond(CanonicalNames.reverseProperty(r), firsts); - } - - /** Return all x such that y in seconds and (x,r,y) in graph */ - @Override - public List joinSecond(Value r, Collection seconds) { - List answer = new ArrayList<>(); - for (Pair pair : filterSecond(r, seconds)) - answer.add(pair.getFirst()); - return answer; - } - - /** Return all (x,y) such that x in firsts and (x,r,y) in graph */ - @Override - public List> filterFirst(Value r, Collection firsts) { - return getReversedPairs(filterSecond(CanonicalNames.reverseProperty(r), firsts)); - } - - /* - * - {one,many} to one: Each X maps to 1 Y: - * X-Y = row-row, row-primitive, primitive-row, row-cell, cell-primitive - * - {one,many} to many: Remove duplicates first, then each X maps to possibly many Y's - * X-Y = cell-row, primitive-cell - */ - /** Return all (x,y) such that y in seconds and (x,r,y) in graph */ - // TODO(ice): Check correctness - @Override - public List> filterSecond(Value r, Collection seconds) { - List> answer = new ArrayList<>(); - if (CanonicalNames.isReverseProperty(r)) { - r = CanonicalNames.reverseProperty(r); - if (r.equals(TYPE)) { - //////////////////////////////////////////////////////////// - // (!fb:type.object.type fb:row.r5) --> fb:type.row - // Not handled right now. - throw new BadFormulaException("Unhandled! " + r); - } else if (r.equals(TableTypeSystem.ROW_NEXT_VALUE)) { - //////////////////////////////////////////////////////////// - // (!fb:row.row.next fb:row.r5) --> fb:row.r6 - if (opts.forbidNextOnManyRows && seconds.size() != 1 && seconds != InfiniteUnaryDenotation.STAR_UNARY) { - throw new LambdaDCSException(Type.nonSingletonList, "cannot call next on " + seconds.size() + " rows."); - } - if (seconds.size() == Integer.MAX_VALUE) { - for (int i = 0; i < rows.size() - 1; i++) { - if (!seconds.contains(rows.get(i).nameValue)) continue; - answer.add(new Pair<>(rows.get(i + 1).nameValue, rows.get(i).nameValue)); - } - } else { - for (Value value : seconds) { - if (!(value instanceof NameValue)) continue; - TableRow row = rowIdToTableRow.get(((NameValue) value).id); - if (row == null) continue; - int i = opts.rowIndexStartsAt1 ? row.index - 1 : row.index; - if (i + 1 >= rows.size()) continue; - answer.add(new Pair<>(rows.get(i + 1).nameValue, row.nameValue)); - } - } - } else if (r.equals(TableTypeSystem.ROW_INDEX_VALUE)) { - //////////////////////////////////////////////////////////// - // (!fb:row.row.index fb:row.r5) --> (number 5) - if (seconds.size() == Integer.MAX_VALUE) { - for (TableRow row : rows) { - if (!seconds.contains(row.nameValue)) continue; - answer.add(new Pair<>(row.indexValue, row.nameValue)); - } - } else { - for (Value value : seconds) { - if (!(value instanceof NameValue)) continue; - TableRow row = rowIdToTableRow.get(((NameValue) value).id); - if (row == null) continue; - answer.add(new Pair<>(row.indexValue, row.nameValue)); - } - } - } else if (TableTypeSystem.isCellProperty(r)) { - //////////////////////////////////////////////////////////// - // (!fb:cell.cell.number fb:cell_id.5) --> 5 - if (seconds.size() == Integer.MAX_VALUE) { - for (TableColumn column : columns) { - for (TableCell cell : column.children) { - for (Value property : cell.properties.metadata.get(r)) { - if (!seconds.contains(cell.properties.nameValue)) continue; - answer.add(new Pair<>(property, cell.properties.nameValue)); - } - } - } - } else { - for (Value value : seconds) { - if (!(value instanceof NameValue)) continue; - TableCellProperties properties = cellIdToTableCellProperties.get(((NameValue) value).id); - if (properties == null) continue; - for (Value property : properties.metadata.get(r)) { - answer.add(new Pair<>(property, properties.nameValue)); - } - } - } - } else if (TableTypeSystem.isRowProperty(r)) { - //////////////////////////////////////////////////////////// - // (!fb:row.row.nationality fb:row.r5) --> fb:cell.canada - if (seconds.size() == Integer.MAX_VALUE) { - for (int i = 0; i < columns.size(); i++) { - if (!r.equals(columns.get(i).relationNameValue)) continue; - for (TableRow row : rows) { - if (!seconds.contains(row.nameValue)) continue; - answer.add(new Pair<>(row.children.get(i).properties.nameValue, row.nameValue)); - } - } - } else { - for (int i = 0; i < columns.size(); i++) { - if (!r.equals(columns.get(i).relationNameValue)) continue; - for (Value value : seconds) { - if (!(value instanceof NameValue)) continue; - TableRow row = rowIdToTableRow.get(((NameValue) value).id); - if (row == null) continue; - answer.add(new Pair<>(row.children.get(i).properties.nameValue, row.nameValue)); - } - } - } - } else if (TableTypeSystem.isRowConsecutiveProperty(r)) { - //////////////////////////////////////////////////////////// - // (!fb:row.consecutive.nationality fb:row.r5) --> (number 2) - for (int i = 0; i < columns.size(); i++) { - if (!r.equals(columns.get(i).relationConsecutiveNameValue)) continue; - int count = 0; - NameValue lastCell = null; - for (TableRow row : rows) { - if (row.children.get(i).properties.nameValue.equals(lastCell)) - count++; - else { - count = 1; - lastCell = row.children.get(i).properties.nameValue; - } - if (!seconds.contains(row.nameValue)) continue; - answer.add(new Pair<>(new NumberValue(count), row.nameValue)); - } - } - } - } else { - if (r.equals(TYPE)) { - //////////////////////////////////////////////////////////// - // (fb:type.object.type fb:type.row) --> {fb:row.r1, fb:row.r2, ...} - for (Value second : seconds) { - if (second.equals(ROW_TYPE)) { - for (TableRow row : rows) - answer.add(new Pair<>(row.nameValue, second)); - } - } - } else if (r.equals(TableTypeSystem.ROW_NEXT_VALUE)) { - //////////////////////////////////////////////////////////// - // (fb:row.row.next fb:row.r5) --> fb:row.r4 - if (opts.forbidNextOnManyRows && seconds.size() != 1 && seconds != InfiniteUnaryDenotation.STAR_UNARY) { - throw new LambdaDCSException(Type.nonSingletonList, "cannot call next on " + seconds.size() + " rows."); - } - if (seconds.size() == Integer.MAX_VALUE) { - for (int i = 1; i < rows.size(); i++) { - if (!seconds.contains(rows.get(i).nameValue)) continue; - answer.add(new Pair<>(rows.get(i - 1).nameValue, rows.get(i).nameValue)); - } - } else { - for (Value value : seconds) { - if (!(value instanceof NameValue)) continue; - TableRow row = rowIdToTableRow.get(((NameValue) value).id); - if (row == null) continue; - int i = opts.rowIndexStartsAt1 ? row.index - 1 : row.index; - if (i - 1 < 0) continue; - answer.add(new Pair<>(rows.get(i - 1).nameValue, row.nameValue)); - } - } - } else if (r.equals(TableTypeSystem.ROW_INDEX_VALUE)) { - //////////////////////////////////////////////////////////// - // (fb:row.row.index (number 5)) --> fb:row.r5 - if (seconds.size() == Integer.MAX_VALUE) { - for (TableRow row : rows) { - if (!seconds.contains(row.indexValue)) continue; - answer.add(new Pair<>(row.nameValue, row.indexValue)); - } - } else { - for (Value value : seconds) { - if (!(value instanceof NumberValue)) continue; - double x = ((NumberValue) value).value; - if (Math.abs(x - Math.round(x)) > 1e-6) continue; // Ignore non-integers - int i = (int) x; - if (opts.rowIndexStartsAt1) i--; - if (i < 0 || i >= rows.size()) continue; - TableRow row = rows.get(i); - answer.add(new Pair<>(row.nameValue, row.indexValue)); - } - } - } else if (TableTypeSystem.isCellProperty(r)) { - //////////////////////////////////////////////////////////// - // (fb:cell.cell.number (number 5)) --> {fb:cell_id.5 fb:cell_population.5, ...} - // Possibly with repeated id (if there are multiple cells with that id) - for (TableColumn column : columns) { - for (TableCell cell : column.children) { - for (Value property : cell.properties.metadata.get(r)) { - if (!seconds.contains(property)) continue; - answer.add(new Pair<>(cell.properties.nameValue, property)); - } - } - } - } else if (TableTypeSystem.isRowProperty(r)) { - //////////////////////////////////////////////////////////// - // (fb:row.row.nationality fb:cell.canada) --> fb:row.r5 - for (int i = 0; i < columns.size(); i++) { - if (!r.equals(columns.get(i).relationNameValue)) continue; - for (TableRow row : rows) { - if (!seconds.contains(row.children.get(i).properties.nameValue)) continue; - answer.add(new Pair<>(row.nameValue, row.children.get(i).properties.nameValue)); - } - } - } else if (TableTypeSystem.isRowConsecutiveProperty(r)) { - //////////////////////////////////////////////////////////// - // (fb:row.consecutive.nationality (number 2)) --> fb:row.r5 - for (int i = 0; i < columns.size(); i++) { - if (!r.equals(columns.get(i).relationConsecutiveNameValue)) continue; - int count = 0; - NameValue lastCell = null; - for (TableRow row : rows) { - if (row.children.get(i).properties.nameValue.equals(lastCell)) - count++; - else { - count = 1; - lastCell = row.children.get(i).properties.nameValue; - } - if (!seconds.contains(new NumberValue(count))) continue; - answer.add(new Pair<>(row.nameValue, new NumberValue(count))); - } - } - } - } - return answer; - } - - // ============================================================ - // Methods specific to TableKnowledgeGraph - // ============================================================ - - public void populateStats(Evaluation evaluation) { - evaluation.add("rows", rows.size()); - evaluation.add("columns", columns.size()); - evaluation.add("cells", rows.size() * columns.size()); - } - - public int numRows() { return rows.size(); } - public int numColumns() { return columns.size(); } - public int numUniqueCells() { return cellProperties.size(); } - - public TableRow getRow(int rowIndex) { - return rows.get(rowIndex); - } - - public TableColumn getColumn(int columnIndex) { - return columns.get(columnIndex); - } - - public TableCell getCell(int rowIndex, int colIndex) { - return rows.get(rowIndex).children.get(colIndex); - } - - public List getAllColumnStrings() { - List columnStrings = new ArrayList<>(); - for (TableColumn column : columns) { - columnStrings.add(column.originalString); - } - return columnStrings; - } - - public List getAllCellStrings() { - List cellStrings = new ArrayList<>(); - for (TableColumn column : columns) { - for (TableCell cell : column.children) { - cellStrings.add(cell.properties.originalString); - } - } - return cellStrings; - } - - public String getOriginalString(Value value) { - return (value instanceof NameValue) ? getOriginalString(((NameValue) value).id) : null; - } - - public String getOriginalString(String nameValueId) { - if (nameValueId.startsWith("!")) nameValueId = nameValueId.substring(1); - if (cellIdToTableCellProperties.containsKey(nameValueId)) - return cellIdToTableCellProperties.get(nameValueId).originalString; - if (partIdToOriginalString.containsKey(nameValueId)) - return partIdToOriginalString.get(nameValueId); - if (relationIdToTableColumn.containsKey(nameValueId)) - return relationIdToTableColumn.get(nameValueId).originalString; - if (nameValueId.startsWith(TableTypeSystem.CELL_SPECIFIC_TYPE_PREFIX)) { - String property = nameValueId.replace(TableTypeSystem.CELL_SPECIFIC_TYPE_PREFIX, TableTypeSystem.ROW_PROPERTY_NAME_PREFIX); - if (relationIdToTableColumn.containsKey(property)) - return relationIdToTableColumn.get(property).originalString; - } - return null; - } - - public Value getNameValueWithOriginalString(NameValue value) { - if (value.description == null) - value = new NameValue(value.id, getOriginalString(value.id)); - return value; - } - - public ListValue getListValueWithOriginalStrings(ListValue answers) { - List values = new ArrayList<>(); - for (Value value : answers.values) { - if (value instanceof NameValue) { - NameValue name = (NameValue) value; - if (name.description == null) - value = new NameValue(name.id, getOriginalString(name.id)); - } - values.add(value); - } - return new ListValue(values); - } - - /** - * Return a list of rows that contain a cell with the specified NameValue ID. - */ - public List getRowsOfCellId(String nameValueId) { - String property = TableTypeSystem.getPropertyOfEntity(nameValueId); - if (property == null) return null; - TableColumn column = relationIdToTableColumn.get(property); - if (column == null) return null; - List answer = new ArrayList<>(); - for (int i = 0; i < column.children.size(); i++) { - if (column.children.get(i).properties.id.equals(nameValueId)) - answer.add(i); - } - return answer; - } - - /** - * Return the index of the column with the specified ID. Return -1 if not found. - */ - public int getColumnIndex(String nameValueId) { - if (nameValueId.startsWith("!")) - nameValueId = nameValueId.substring(1); - for (int j = 0; j < columns.size(); j++) { - if (columns.get(j).relationNameValue.id.equals(nameValueId)) return j; - } - return -1; - } - - // ============================================================ - // Test - // ============================================================ - - public static void main(String[] args) { - StringNormalizationUtils.opts.verbose = 5; - StringNormalizationUtils.opts.numberCanStartAnywhere = true; - StringNormalizationUtils.opts.num2CanStartAnywhere = true; - opts.baseCSVDir = "lib/data/WikiTableQuestions/"; - String filename = "csv/200-csv/0.csv"; - TableKnowledgeGraph graph = (TableKnowledgeGraph) KnowledgeGraph.fromLispTree( - LispTree.proto.parseFromString("(graph tables.TableKnowledgeGraph " + filename + ")")); - for (TableColumn column : graph.columns) { - LogInfo.begin_track("%s (%s)", column.columnName, column.originalString); - for (TableCell cell : column.children) { - LogInfo.logs("%s (%s) %s", cell.properties.nameValue, - cell.properties.originalString, cell.properties.metadata); - } - LogInfo.end_track(); - } - } - - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableRow.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableRow.java deleted file mode 100644 index 527db93cfc..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableRow.java +++ /dev/null @@ -1,32 +0,0 @@ -package edu.stanford.nlp.sempre.tables; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; - -/** - * Represents a table row. - * - * In the knowledge graph, a table row is a nameless node. - * The final denotation cannot be row nodes. - * - * @author ppasupat - */ -public class TableRow { - public final List children; - public final int index; - public final NumberValue indexValue; - public final NameValue nameValue; - - public TableRow(int index) { - this.children = new ArrayList<>(); - this.index = index; - this.indexValue = new NumberValue(index); - this.nameValue = new NameValue(TableTypeSystem.getRowName(index), "" + index); - } - - @Override - public String toString() { - return nameValue.toString(); - } -} \ No newline at end of file diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableTypeLookup.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableTypeLookup.java deleted file mode 100644 index 1c0d4f8b09..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableTypeLookup.java +++ /dev/null @@ -1,49 +0,0 @@ -package edu.stanford.nlp.sempre.tables; - -import edu.stanford.nlp.sempre.*; -import fig.basic.*; - -/** - * Look up types for entities and properties in TableKnowledgeGraph. - * (Delegate all decisions to TableTypeSystem.) - * - * @author ppasupat - */ -public class TableTypeLookup implements TypeLookup { - public static class Options { - @Option(gloss = "Verbosity") public int verbose = 0; - } - public static Options opts = new Options(); - - @Override - public SemType getEntityType(String entity) { - if (opts.verbose >= 1) - LogInfo.logs("TableTypeLookup.getEntityType %s", entity); - SemType type = TableTypeSystem.getEntityTypeFromId(entity); - if (type == null && opts.verbose >= 1) - LogInfo.logs("TableTypeLookup.getEntityType FAIL %s", entity); - return type; - } - - @Override - public SemType getPropertyType(String property) { - if (opts.verbose >= 1) - LogInfo.logs("TableTypeLookup.getPropertyType %s", property); - SemType type = TableTypeSystem.getPropertyTypeFromId(property); - if (type == null && opts.verbose >= 1) - LogInfo.logs("TableTypeLookup.getPropertyType FAIL %s", property); - return type; - } - - // Test cases - public static void main(String[] args) { - TypeLookup typeLookup = new TableTypeLookup(); - String formulaString = - "(lambda x ((reverse >) ((reverse fb:cell.cell.number) (var x))))"; - //"(lambda x (< (< ((reverse <) ((reverse fb:row.row.next) (var x))))))"; - Formula formula = Formulas.fromLispTree(LispTree.proto.parseFromString(formulaString)); - LogInfo.logs("%s", formulaString); - LogInfo.logs("%s", TypeInference.inferType(formula, typeLookup)); - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableTypeSystem.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableTypeSystem.java deleted file mode 100644 index 2653a81e13..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableTypeSystem.java +++ /dev/null @@ -1,188 +0,0 @@ -package edu.stanford.nlp.sempre.tables; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; - -/** - * Typing System for table. Affects naming convention and how the types of formulas are inferred. - * - * ROW: name = fb:row.r[index] | type = fb:type.row - * CELL: name = fb:cell_[fieldName].[string] | type = (union fb:type.cell fb:column.[fieldName]) - * PROPERTY: name = fb:row.row.[fieldName] | type = (-> (union fb:type.cell fb:column.[fieldName]) fb:type.row) - * - * Note that the same string in different columns are mapped to different names. - * - * @author ppasupat - */ -public abstract class TableTypeSystem { - - // Value names - public static final String ROW_NAME_PREFIX = "fb:row"; - public static final String CELL_NAME_PREFIX = "fb:cell"; - public static final String PART_NAME_PREFIX = "fb:part"; - - // Type names - public static final String ROW_TYPE = "fb:type.row"; - public static final SemType ROW_SEMTYPE = SemType.newAtomicSemType(ROW_TYPE); - public static final String CELL_GENERIC_TYPE = "fb:type.cell"; - public static final SemType CELL_GENERIC_SEMTYPE = SemType.newAtomicSemType(CELL_GENERIC_TYPE); - public static final String CELL_SPECIFIC_TYPE_PREFIX = "fb:column"; - public static final String PART_GENERIC_TYPE = "fb:type.part"; - public static final SemType PART_GENERIC_SEMTYPE = SemType.newAtomicSemType(PART_GENERIC_TYPE); - public static final String PART_SPECIFIC_TYPE_PREFIX = "fb:part"; - - // Row relations - public static final String ROW_PROPERTY_NAME_PREFIX = "fb:row.row"; - public static final NameValue ROW_NEXT_VALUE = new NameValue("fb:row.row.next"); - public static final NameValue ROW_INDEX_VALUE = new NameValue("fb:row.row.index"); - public static final Map ROW_RELATIONS = new HashMap<>(); - static { - ROW_RELATIONS.put(ROW_NEXT_VALUE, SemType.newFuncSemType(ROW_TYPE, ROW_TYPE)); - ROW_RELATIONS.put(ROW_INDEX_VALUE, SemType.newFuncSemType(CanonicalNames.INT, ROW_TYPE)); - } - - public static final String ROW_CONSECUTIVE_PROPERTY_NAME_PREFIX = "fb:row.consecutive"; - - // Cell properties - public static final String CELL_PROPERTY_NAME_PREFIX = "fb:cell.cell"; - public static final NameValue CELL_NUMBER_VALUE = new NameValue("fb:cell.cell.number"); - public static final NameValue CELL_DATE_VALUE = new NameValue("fb:cell.cell.date"); - public static final NameValue CELL_NUM2_VALUE = new NameValue("fb:cell.cell.num2"); - public static final NameValue CELL_PART_VALUE = new NameValue("fb:cell.cell.part"); - public static final Map CELL_PROPERTIES = new HashMap<>(); - static { - CELL_PROPERTIES.put(CELL_NUMBER_VALUE, SemType.newFuncSemType(CanonicalNames.NUMBER, CELL_GENERIC_TYPE)); - CELL_PROPERTIES.put(CELL_DATE_VALUE, SemType.newFuncSemType(CanonicalNames.DATE, CELL_GENERIC_TYPE)); - CELL_PROPERTIES.put(CELL_NUM2_VALUE, SemType.newFuncSemType(CanonicalNames.NUMBER, CELL_GENERIC_TYPE)); - CELL_PROPERTIES.put(CELL_PART_VALUE, SemType.newFuncSemType(PART_GENERIC_TYPE, CELL_GENERIC_TYPE)); - } - - // ============================================================ - // Helper Functions - // ============================================================ - - /** - * Convert string entry to an alpha-numeric name - */ - public static String canonicalizeName(String originalString) { - String id = originalString; - id = id.replaceAll("[^\\w]", "_"); // Replace abnormal characters with _ - id = id.replaceAll("_+", "_"); // Merge consecutive _'s - id = id.replaceAll("_$", ""); - id = id.toLowerCase(); - if (id.length() == 0) id = "null"; - return id; - } - - /** - * Add suffix to make the name unique. (Does not modify usedNames) - */ - public static String getUnusedName(String baseName, Collection usedNames, String sep) { - int suffix = 2; - String appendedId = baseName; - while (usedNames.contains(appendedId)) { - appendedId = baseName + sep + (suffix++); - } - return appendedId; - } - public static String getUnusedName(String baseName, Collection usedNames) { - return getUnusedName(baseName, usedNames, "_"); - } - - /** - * When id = [prefix]_[1].[2], get [1]. For example: - * - fb:row_[tableId].r[index] --> [tableId] - * - fb:cell_[fieldName].[string] --> [fieldName] - */ - public static String getIdAfterUnderscore(String id, String prefix) { - return id.substring(prefix.length() + 1).split("\\.", 2)[0]; - } - - /** - * When id = [prefix]_[1].[2], get [2]. For example: - * - fb:row_[tableId].r[index] --> r[index] - * - fb:cell.[string] or fb:cell_[fieldName].[string] --> [string] - */ - public static String getIdAfterPeriod(String id, String prefix) { - return id.substring(prefix.length()).split("\\.", 2)[1]; - } - - public static boolean isRowProperty(Value r) { - return r instanceof NameValue && ((NameValue) r).id.startsWith(ROW_PROPERTY_NAME_PREFIX); - } - - public static boolean isRowConsecutiveProperty(Value r) { - return r instanceof NameValue && ((NameValue) r).id.startsWith(ROW_CONSECUTIVE_PROPERTY_NAME_PREFIX); - } - - public static boolean isCellProperty(Value r) { - return r instanceof NameValue && ((NameValue) r).id.startsWith(CELL_PROPERTY_NAME_PREFIX); - } - - // ============================================================ - // Main Functions - // ============================================================ - - public static String getRowName(int index) { - return ROW_NAME_PREFIX + ".r" + index; - } - - public static String getCellName(String id, String fieldName) { - return CELL_NAME_PREFIX + "_" + fieldName + "." + id; - } - - public static String getPartName(String id, String fieldName) { - return PART_NAME_PREFIX + "_" + fieldName + "." + id; - } - - public static String getCellType(String fieldName) { - return CELL_SPECIFIC_TYPE_PREFIX + "." + fieldName; - } - - public static String getPartType(String fieldName) { - return PART_SPECIFIC_TYPE_PREFIX + "." + fieldName; - } - - public static String getRowPropertyName(String fieldName) { - return ROW_PROPERTY_NAME_PREFIX + "." + fieldName; - } - - public static String getRowConsecutivePropertyName(String fieldName) { - return ROW_CONSECUTIVE_PROPERTY_NAME_PREFIX + "." + fieldName; - } - - public static SemType getEntityTypeFromId(String entity) { - if (entity.startsWith(CELL_NAME_PREFIX)) { - String fieldName = getIdAfterUnderscore(entity, CELL_NAME_PREFIX); - return SemType.newUnionSemType(CELL_GENERIC_TYPE, getCellType(fieldName)); - } else if (entity.startsWith(PART_NAME_PREFIX)) { - String fieldName = getIdAfterUnderscore(entity, PART_NAME_PREFIX); - return SemType.newUnionSemType(PART_GENERIC_TYPE, getPartType(fieldName)); - } - return null; - } - - public static SemType getPropertyTypeFromId(String property) { - if (property.startsWith(ROW_PROPERTY_NAME_PREFIX)) { - SemType rowPropertyType = ROW_RELATIONS.get(new NameValue(property)); - if (rowPropertyType != null) return rowPropertyType; - String fieldName = getIdAfterPeriod(property, ROW_PROPERTY_NAME_PREFIX); - return new FuncSemType(SemType.newUnionSemType(getCellType(fieldName), CELL_GENERIC_TYPE), ROW_SEMTYPE); - } - if (property.startsWith(CELL_PROPERTY_NAME_PREFIX)) { - SemType cellPropertyType = CELL_PROPERTIES.get(new NameValue(property)); - return cellPropertyType; - } - return null; - } - - public static String getPropertyOfEntity(String entity) { - if (entity.startsWith(CELL_NAME_PREFIX)) { - String fieldName = getIdAfterUnderscore(entity, CELL_NAME_PREFIX); - return getRowPropertyName(fieldName); - } - return null; - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableValueEvaluator.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableValueEvaluator.java deleted file mode 100644 index c825c7d540..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableValueEvaluator.java +++ /dev/null @@ -1,139 +0,0 @@ -package edu.stanford.nlp.sempre.tables; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import fig.basic.LogInfo; -import fig.basic.Option; - -/** - * Return 1 if |pred| and |target| represent the same list and 0 otherwise. - * - * This is similar to FreebaseValueEvaluator, but - * - does not give partial credits - * - also check the type of the values (number, date, ...) - * - * @author ppasupat - */ -public class TableValueEvaluator implements ValueEvaluator { - public static class Options { - @Option(gloss = "Allow type conversion on predicted values before comparison") - public boolean allowMismatchedTypes = false; - @Option(gloss = "Allow matching on normalized strings (e.g. remove parentheses)") - public boolean allowNormalizedStringMatch = true; - @Option(gloss = "When comparing number values, only consider the value and not the unit") - public boolean ignoreNumberValueUnits = true; - @Option(gloss = "Strict date evaluation (year, month, and date all have to match)") - public boolean strictDateEvaluation = false; - @Option(gloss = "Check if the normalized text matches the official evaluator") - public boolean checkStringNormalization = false; - } - public static Options opts = new Options(); - - public double getCompatibility(Value target, Value pred) { - List targetList = ((ListValue) target).values; - if (!(pred instanceof ListValue)) return 0; - List predList = ((ListValue) pred).values; - // Make unique - predList = new ArrayList<>(new HashSet<>(predList)); - - if (targetList.size() != predList.size()) return 0; - - for (Value targetValue : targetList) { - boolean found = false; - for (Value predValue : predList) { - if (getItemCompatibility(targetValue, predValue)) { - found = true; - break; - } - } - if (!found) return 0; - } - return 1; - } - - // ============================================================ - // Item Compatibility - // ============================================================ - - // Compare one element of the list. - protected boolean getItemCompatibility(Value target, Value pred) { - if (pred instanceof ErrorValue) return false; // Never award points for error - if (pred == null) { - LogInfo.warning("Predicted value is null!"); - return false; - } - - if (target instanceof DescriptionValue) { - String targetText = ((DescriptionValue) target).value; - if (pred instanceof NameValue || pred instanceof DescriptionValue) { - // Just has to match the description - String predText = (pred instanceof NameValue) ? ((NameValue) pred).description : ((DescriptionValue) pred).value; - if (predText == null) predText = ""; - if (opts.allowNormalizedStringMatch) { - targetText = StringNormalizationUtils.aggressiveNormalize(targetText).toLowerCase(); - predText = StringNormalizationUtils.aggressiveNormalize(predText).toLowerCase(); - if (opts.checkStringNormalization) { - String targetTextOfficial = StringNormalizationUtils.officialEvaluatorNormalize(targetText); - String predTextOfficial = StringNormalizationUtils.officialEvaluatorNormalize(predText); - if (!targetTextOfficial.equals(targetText) && !(targetTextOfficial + ".").equals(targetText)) - LogInfo.warnings("Different normalization: [%s][%s]", targetTextOfficial, targetText); - if (!predTextOfficial.equals(predText) && !(predTextOfficial + ".").equals(predText)) - LogInfo.warnings("Different normalization: [%s][%s]", predTextOfficial, predText); - } - } - return targetText.equals(predText); - } else if (pred instanceof NumberValue) { - if (opts.allowMismatchedTypes) { - NumberValue targetNumber = StringNormalizationUtils.parseNumberLenient(targetText); - return targetNumber != null && targetNumber.equals(pred); - } - } - } else if (target instanceof NumberValue) { - NumberValue targetNumber = (NumberValue) target; - if (pred instanceof NumberValue) { - // Compare number - return compareNumberValues(targetNumber, (NumberValue) pred); - } else if (pred instanceof DateValue) { - // Assume year - DateValue date = (DateValue) pred; - return date.year == targetNumber.value && date.month == -1 && date.day == -1; - } else if (pred instanceof NameValue || pred instanceof DescriptionValue) { - // Try converting NameValue String into NumberValue - if (opts.allowMismatchedTypes) { - NumberValue predNumber = StringNormalizationUtils.toNumberValue(pred); - return predNumber != null && compareNumberValues(targetNumber, predNumber); - } - } - } else if (target instanceof DateValue) { - DateValue targetDate = (DateValue) target; - if (pred instanceof DateValue) { - // Compare date and date - return compareDateValues(targetDate, (DateValue) pred); - } - } - - return target.equals(pred); - } - - protected boolean compareNumberValues(NumberValue target, NumberValue pred) { - if (opts.ignoreNumberValueUnits) { - return Math.abs(target.value - pred.value) < 1e-6; - } else { - return target.equals(pred); - } - } - - protected boolean compareDateValues(DateValue target, DateValue pred) { - if (opts.strictDateEvaluation) { - return target.equals(pred); - } else { - // If a field in target is not blank (-1), pred must match target on that field - if (target.year != -1 && target.year != pred.year) return false; - if (target.month != -1 && target.month != pred.month) return false; - if (target.day != -1 && target.day != pred.day) return false; - return true; - } - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableValuePreprocessor.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableValuePreprocessor.java deleted file mode 100644 index 4e62bfcab1..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/TableValuePreprocessor.java +++ /dev/null @@ -1,151 +0,0 @@ -package edu.stanford.nlp.sempre.tables; - -import java.io.BufferedReader; -import java.io.File; -import java.io.FileReader; -import java.io.IOException; -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import fig.basic.*; - -public class TableValuePreprocessor extends TargetValuePreprocessor { - public static class Options { - @Option(gloss = "Verbosity") public int verbose = 0; - @Option(gloss = "Read preprocessed values from these .tagged files") - public List taggedFiles = new ArrayList<>(); - } - public static Options opts = new Options(); - - @Override - public Value preprocess(Value value, Example ex) { - if (!opts.taggedFiles.isEmpty() && ex != null) { - return getFromTaggedFile(ex.id); - } - if (value instanceof ListValue) { - List values = new ArrayList<>(); - for (Value entry : ((ListValue) value).values) { - values.add(preprocessSingle(entry)); - } - return new ListValue(values); - } else { - return preprocessSingle(value); - } - } - - public Value preprocessSingle(Value origTarget) { - if (origTarget instanceof DescriptionValue) { - String origString = ((DescriptionValue) origTarget).value; - Value canonical = canonicalize(origString); - if (opts.verbose >= 1) - LogInfo.logs("Canonicalize %s --> %s", origString, canonical); - return canonical; - } else { - return origTarget; - } - } - - /* - * Most common origString patterns: - * - number (4, 20, "4,000", 1996, ".15") - * - number range ("1997/98", "2000-2005") - * - number + unit ("4 years", "82.6 m") - * - ordinal ("1st") - * - date ("January 4, 1994", "7 August 2004", "9-1-1990") - * - time -- point or amount ("4:47", " - * - short strings (yes, no, more, less, before, after) - * - string ("Poland", "World Championship") - */ - protected Value canonicalize(String origString) { - Value answer; - LanguageInfo languageInfo = LanguageAnalyzer.getSingleton().analyze(origString); - // Try converting to a number. - answer = StringNormalizationUtils.parseNumberStrict(origString); - if (answer != null) return answer; - //answer = StringNormalizationUtils.parseNumberWithLanguageAnalyzer(languageInfo); - //if (answer != null) return answer; - // Try converting to a date. - answer = StringNormalizationUtils.parseDate(origString); - if (answer != null) return answer; - answer = StringNormalizationUtils.parseDateWithLanguageAnalyzer(languageInfo); - if (answer != null) return answer; - // Maybe it's number + unit - answer = StringNormalizationUtils.parseNumberWithUnitStrict(origString); - if (answer != null) return answer; - // Just treat as a description string - return new DescriptionValue(origString); - } - - // ============================================================ - // Get preprocessed value from tagged file - // ============================================================ - - Map idToValue = null; - - public Value getFromTaggedFile(String id) { - if (idToValue == null) readTaggedFiles(); - return idToValue.get(id); - } - - protected void readTaggedFiles() { - LogInfo.begin_track("Reading .tagged files"); - idToValue = new HashMap<>(); - for (String path : opts.taggedFiles) { - File file = new File(path); - if (file.isDirectory()) { - for (File subpath : file.listFiles()) - readTaggedFile(subpath.toString()); - } else { - readTaggedFile(path); - } - } - LogInfo.logs("Read %d entries", idToValue.size()); - LogInfo.end_track(); - } - - protected void readTaggedFile(String path) { - LogInfo.begin_track("Reading %s", path); - try (BufferedReader reader = new BufferedReader(new FileReader(path))) { - // Read header - String[] header = reader.readLine().split("\t", -1); - int exIdIndex = 0, targetCanonIndex = 0; - while (!"id".equals(header[exIdIndex])) - exIdIndex++; - while (!"targetCanon".equals(header[targetCanonIndex])) - targetCanonIndex++; - // Read each line - String line; - while ((line = reader.readLine()) != null) { - String[] fields = line.split("\t", -1); // Include trailing spaces - String[] rawValues = fields[targetCanonIndex].split("\\|"); - List values = new ArrayList<>(); - for (String rawValue : rawValues) { - values.add(simpleCanonicalize(rawValue)); - } - idToValue.put(fields[exIdIndex], new ListValue(values)); - } - } catch (IOException e) { - throw new RuntimeException(e); - } - LogInfo.end_track(); - } - - /** - * Like canonicalize, but assume that the string is already well-formed: - * - A number should look like a float - * - A date should be in the ISO format - * - Otherwise, the value is treated as a string. - */ - protected Value simpleCanonicalize(String origString) { - Value answer; - // Try converting to a number. - answer = StringNormalizationUtils.parseNumberStrict(origString); - if (answer != null) return answer; - // Try converting to a date. - answer = StringNormalizationUtils.parseDate(origString); - if (answer != null) return answer; - // Just treat as a description string - return new DescriptionValue(origString); - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/AggregatedTurkData.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/AggregatedTurkData.java deleted file mode 100644 index e6ff5784c0..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/AggregatedTurkData.java +++ /dev/null @@ -1,99 +0,0 @@ -package edu.stanford.nlp.sempre.tables.alter; - -import java.io.*; -import java.util.*; - -import edu.stanford.nlp.sempre.DescriptionValue; -import edu.stanford.nlp.sempre.ListValue; -import edu.stanford.nlp.sempre.TargetValuePreprocessor; -import edu.stanford.nlp.sempre.Value; -import edu.stanford.nlp.sempre.tables.StringNormalizationUtils; -import fig.basic.LogInfo; -import fig.basic.MapUtils; - -/** - * Read aggregated Turked results from TSV file. - * - * File format: - * - HIT ID - * - Example ID - * - Alter table index - * - Flag (A2/A3 = agreed on an answer, B2/B3 = agreed on "no answer", X = disagreed) - * - Agreed answer (blank for B or X) - * - Individual answers; come in pairs of (worker id, answer) -- not used here - * - * @author ppasupat - */ -public class AggregatedTurkData { - - /** - * Map from Example ID -> altered table index -> agreed response - */ - Map> data = new HashMap<>(); - - /** - * List of all tables used regardless of agreement - */ - Map> allTurkedTables = new HashMap<>(); - - public AggregatedTurkData() { } - - public AggregatedTurkData(String filename) { - addFromFile(filename); - } - - public AggregatedTurkData addFromFile(String filename) { - LogInfo.begin_track("Reading Turked data from %s", filename); - try (BufferedReader reader = new BufferedReader(new FileReader(filename))) { - int count = 0; - String line; - while ((line = reader.readLine()) != null) { - String[] tokens = line.split("\t"); - String exampleId = tokens[1]; - int alteredTableIndex = Integer.parseInt(tokens[2]); - MapUtils.addToList(allTurkedTables, exampleId, alteredTableIndex); - char flagCode = tokens[3].charAt(0); - if (flagCode != 'A' && flagCode != 'B') continue; - String response = tokens[4]; - Value canonicalized = toValue(response); - MapUtils.set(data, exampleId, alteredTableIndex, canonicalized); - count++; - } - LogInfo.logs("Read %d records", count); - } catch (IOException e) { - throw new RuntimeException(e); - } - LogInfo.end_track(); - return this; - } - - public Map get(String exampleId) { - return data.get(exampleId); - } - - public Value get(String exampleId, int alteredTableIndex) { - return MapUtils.get(data, exampleId, alteredTableIndex, null); - } - - /** - * Get all Turked tables regardless of whether the answer is agreed upon. - */ - public List getAllTurkedTables(String exampleId) { - return allTurkedTables.get(exampleId); - } - - /** - * Return canonicalized Value. - */ - private Value toValue(String response) { - if (response.isEmpty()) { - // TODO: Distinguish empty list from ERROR - return ValueCanonicalizer.ERROR; - } - List values = new ArrayList<>(); - for (String x : response.split("\\|")) - values.add(new DescriptionValue(StringNormalizationUtils.unescapeTSV(x))); - return TargetValuePreprocessor.getSingleton().preprocess(new ListValue(values), null); - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/AlteredTablesExecutor.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/AlteredTablesExecutor.java deleted file mode 100644 index 318d0712d7..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/AlteredTablesExecutor.java +++ /dev/null @@ -1,221 +0,0 @@ -package edu.stanford.nlp.sempre.tables.alter; - -import java.io.*; -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.TableKnowledgeGraph; -import edu.stanford.nlp.sempre.tables.serialize.SerializedDataset; -import edu.stanford.nlp.sempre.tables.test.CustomExample; -import fig.basic.IOUtils; -import fig.basic.LogInfo; -import fig.basic.StrUtils; -import fig.exec.Execution; - -/** - * Execute the formulas on the altered tables. - * - * Summarized turk data can be provided in - * BatchTableAlterer.opts.turkedDataPath, - * in which case only turked tables will be executed on. - * A "check" file, indicating whether the denotation matches - * the turked answers, will also be dumped. - * - * Annotated formulas can be provided in - * BatchTableAlterer.opts.annotatedFormulasPath, - * in which case the annotated formula will be executed - * (to a separate file). - * - * @author ppasupat - */ -public class AlteredTablesExecutor implements Runnable { - - public static void main(String[] args) { - Execution.run(args, "AlteredTablesExecutorMain", new AlteredTablesExecutor(), Master.getOptionsParser()); - } - - private Builder builder; - private TableAltererCache tableAltererCache = null; - - private boolean hasAnnotated = false; - private Map idToAnnotated = new HashMap<>(); - - private boolean hasTurk = false; - private AggregatedTurkData turkedData = null; - - @Override - public void run() { - builder = new Builder(); - builder.build(); - - // Read dataset - Dataset dataset; - if (Dataset.opts.inPaths.isEmpty() || Dataset.opts.inPaths.get(0).getSecond().endsWith(".gz")) { - LogInfo.logs("Loading SERIALIZED dataset"); - dataset = new SerializedDataset(); - } else { - LogInfo.logs("Loading USUAL dataset"); - dataset = new Dataset(); - } - dataset.read(); - List examples = dataset.examples("train"); - - // Alterer cache - tableAltererCache = new TableAltererCache(); - - // Read annotation file - if (!StrUtils.isEmpty(BatchTableAlterer.opts.annotatedFormulasPath)) { - hasAnnotated = true; - // Prevent verbose output - if (CustomExample.opts.verbose > 1) - CustomExample.opts.verbose = 1; - List annotated = CustomExample.getDataset(BatchTableAlterer.opts.annotatedFormulasPath); - for (CustomExample ex : annotated) { - if (ex == null || ex.targetFormula == null) continue; - // Check annotation - Value rawValue = builder.executor.execute(ex.targetFormula, ex.context).value; - if (rawValue instanceof ListValue) - rawValue = ((TableKnowledgeGraph) ex.context.graph).getListValueWithOriginalStrings((ListValue) rawValue); - if (builder.valueEvaluator.getCompatibility(ex.targetValue, rawValue) == 1) - idToAnnotated.put(ex.id, ex); - else - LogInfo.warnings("Wrong annotation [%s] expected %s; got %s", ex.id, ex.targetValue, rawValue); - } - } - - // Read Turked data - if (!StrUtils.isEmpty(BatchTableAlterer.opts.turkedDataPath)) { - hasTurk = true; - turkedData = new AggregatedTurkData(BatchTableAlterer.opts.turkedDataPath); - } - - Execution.putOutput("group", "train"); - int index = -1; - for (Example ex : examples) { - Execution.putOutput("example", ++index); - if (CustomExample.checkFilterExamples(index)) - process(ex); - ex.predDerivations.clear(); // Save memory - } - } - - - // For each example, execute on the turked tables, then dump to file - private void process(Example ex) { - LogInfo.begin_track("Processing %s", ex.id); - ex.log(); - if (ex.predDerivations == null) - ex.predDerivations = Collections.emptyList(); - LogInfo.logs("Read %d derivations", ex.predDerivations.size()); - DenotationData denotationData = new DenotationData(BatchTableAlterer.opts.numAlteredTables, ex.predDerivations.size()); - DenotationData checkData = new DenotationData(BatchTableAlterer.opts.numAlteredTables, ex.predDerivations.size()); - - // Get the relevant indices - List tableIndices; - if (hasTurk) { - tableIndices = turkedData.getAllTurkedTables(ex.id); - if (tableIndices == null) - tableIndices = new ArrayList<>(); - } else { - tableIndices = new ArrayList<>(BatchTableAlterer.opts.numAlteredTables + 1); - for (int i = 0; i <= BatchTableAlterer.opts.numAlteredTables; i++) - tableIndices.add(i); - } - - for (int tableIndex : tableIndices) { - LogInfo.begin_track("Executing on table %d", tableIndex); - TableKnowledgeGraph graph = tableAltererCache.load(ex.id, tableIndex); - Value target = hasTurk ? turkedData.get(ex.id, tableIndex) : null; - ContextValue context = new ContextValue(graph); - // Execute all formulas on the new graph - for (int k = 0; k < ex.predDerivations.size(); k++) { - Derivation deriv = ex.predDerivations.get(k); - Value value = builder.executor.execute(deriv.formula, context).value; - if (value instanceof ListValue) - value = graph.getListValueWithOriginalStrings((ListValue) value); - boolean correct = isCorrect(ex, target, value); - value = ValueCanonicalizer.canonicalize(value); - denotationData.addDenotation(k, tableIndex, value); - if (hasTurk) - checkData.addDenotation(k, tableIndex, getCheck(target, value, correct)); - } - // Annotated formula - if (hasAnnotated) { - Value annotatedValue = null; - Example annotatedEx = idToAnnotated.get(ex.id); - boolean annotatedCorrect = false; - if (annotatedEx != null && annotatedEx.targetFormula != null) { - annotatedValue = builder.executor.execute(annotatedEx.targetFormula, context).value; - if (annotatedValue instanceof ListValue) - annotatedValue = graph.getListValueWithOriginalStrings((ListValue) annotatedValue); - annotatedCorrect = isCorrect(ex, target, annotatedValue); - annotatedValue = ValueCanonicalizer.canonicalize(annotatedValue); - } - denotationData.addAnnotatedDenotation(tableIndex, annotatedValue); - if (hasTurk) - checkData.addAnnotatedDenotation(tableIndex, getCheck(target, annotatedValue, annotatedCorrect)); - } - LogInfo.end_track(); - } - - { - File dir = new File(Execution.getFile("actual-denotations")); - if (!dir.isDirectory()) dir.mkdir(); - PrintWriter writer = IOUtils.openOutHard(dir.toString() + "/" + ex.id + ".gz"); - denotationData.dump(writer); - writer.close(); - } - if (hasAnnotated) { - File dir = new File(Execution.getFile("actual-annotated-denotations")); - if (!dir.isDirectory()) dir.mkdir(); - PrintWriter writer = IOUtils.openOutHard(dir.toString() + "/" + ex.id + ".gz"); - denotationData.dumpAnnotated(writer); - writer.close(); - } - if (hasTurk) { - File dir = new File(Execution.getFile("check-denotations")); - if (!dir.isDirectory()) dir.mkdir(); - PrintWriter writer = IOUtils.openOutHard(dir.toString() + "/" + ex.id + ".gz"); - checkData.dump(writer); - writer.close(); - } - if (hasTurk && hasAnnotated) { - File dir = new File(Execution.getFile("check-annotated-denotations")); - if (!dir.isDirectory()) dir.mkdir(); - PrintWriter writer = IOUtils.openOutHard(dir.toString() + "/" + ex.id + ".gz"); - checkData.dumpAnnotated(writer); - writer.close(); - } - - LogInfo.end_track(); - } - - // See if a value matches the targetValue - boolean isCorrect(Example ex, Value target, Value pred) { - if (target == null || target instanceof ErrorValue) return false; - double result = 0; - try { - result = builder.valueEvaluator.getCompatibility(target, pred); - } catch (Exception e) { - LogInfo.logs("%s", e); - e.printStackTrace(); - } - return result == 1; - } - - private static final Value TURK_A_MATCHED = new NameValue("Ao"); - private static final Value TURK_A_MISMATCHED = new NameValue("Ax"); - private static final Value TURK_B_MATCHED = new NameValue("Bo"); - private static final Value TURK_B_MISMATCHED = new NameValue("Bx"); - private static final Value TURK_X = new NameValue("X"); - - private Value getCheck(Value target, Value pred, boolean correct) { - if (target == null) return TURK_X; - if (target instanceof ErrorValue) { - return pred instanceof ErrorValue ? TURK_B_MATCHED : TURK_B_MISMATCHED; - } else { - return correct ? TURK_A_MATCHED : TURK_A_MISMATCHED; - } - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/BatchTableAlterer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/BatchTableAlterer.java deleted file mode 100644 index f6d9f5b059..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/BatchTableAlterer.java +++ /dev/null @@ -1,528 +0,0 @@ -package edu.stanford.nlp.sempre.tables.alter; - -import java.io.*; -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.*; -import edu.stanford.nlp.sempre.tables.serialize.SerializedDataset; -import edu.stanford.nlp.sempre.tables.serialize.SerializedDumper; -import edu.stanford.nlp.sempre.tables.test.CustomExample; -import fig.basic.*; -import fig.exec.Execution; - -/** - * For each example, - * - Generate altered tables - * - Execute the formulas on altered tables. - * - Select the most informative subset of tables to be sent to MTurk. - * - Dump formulas consistent with the Turked results. - * - * @author ppasupat - */ -public class BatchTableAlterer implements Runnable { - public static class Options { - @Option(gloss = "verbosity") - public int verbose = 2; - @Option(gloss = "number of tables to generate") - public int numAlteredTables = 30; - @Option(gloss = "do not execute formulas for speed (will also skip steps that require denotations)") - public boolean skipFormulaExecution = false; - // Serialize denotations - @Option(gloss = "dump all denotations") - public boolean dumpAllDenotations = false; - @Option(gloss = "dump annotated denotations") - public boolean dumpAnnotatedDenotations = false; - @Option(gloss = "load denotations from this directory") - public String loadDenotationsFromDir = null; - // Equivalent classes - @Option(gloss = "dump representative formula of each equivalent class") - public boolean dumpRepresentativeFormulas = false; - // Persistence - @Option(gloss = "if the altered tables are already saved to files, skip the example") - public boolean skipExistingSaveDirs = false; - @Option(gloss = "whether to overwrite the saved altered tables") - public boolean overwriteExistingSaveDirs = false; - // Choosing subset of altered tables - @Option(gloss = "which subset chooser to use") - public SubsetChooserSpec subsetChooser = SubsetChooserSpec.ENTROPY; - @Option(gloss = "number of tables to retain, not counting the original table (0 = retain all)") - public int numRetainedTables = 0; - @Option(gloss = "also try subsets of size 1, 2, ..., (numRetainedTables - 1)") - public boolean alsoTrySmallerSubsets = false; - // Checking with annotated formulas - @Option(gloss = "check with annotated formulas") - public String annotatedFormulasPath = null; - @Option(gloss = "Ignore agreed errors in Turk data") - public boolean ignoreTurkedAgreedErrors = true; - @Option(gloss = "dump all formulas in the same equivalent class as the annotation") - public boolean dumpAllMatchingAnnotated = false; - // Checking with Turked data - @Option(gloss = "Turked data path") - public String turkedDataPath = null; - } - public static Options opts = new Options(); - - public static enum SubsetChooserSpec { NONE, ENTROPY, CACHED, PURE } - - public static void main(String[] args) { - Execution.run(args, "BatchTableAltererMain", new BatchTableAlterer(), Master.getOptionsParser()); - } - - private Builder builder; - private Map idToAnnotated = new HashMap<>(); - private TableAltererCache tableAltererCache = null; - private SerializedDumper representativeDumper = null; - private PrintWriter retainedTablesOut = null; - private SubsetChooser subsetChooser = null; - private AggregatedTurkData turkedData = null; - private PrintWriter turkInfoWriter = null; - private SerializedDumper turkMatchDumper = null; - - @Override - public void run() { - if (opts.skipFormulaExecution && opts.numRetainedTables > 0) - LogInfo.fails("Cannot simultaneously skip formula execution and choose tables to retain."); - - builder = new Builder(); - builder.build(); - - // Read dataset - Dataset dataset; - if (Dataset.opts.inPaths.isEmpty() || Dataset.opts.inPaths.get(0).getSecond().endsWith(".gz")) { - LogInfo.logs("Loading SERIALIZED dataset"); - dataset = new SerializedDataset(); - } else { - LogInfo.logs("Loading USUAL dataset"); - dataset = new Dataset(); - } - dataset.read(); - List examples = dataset.examples("train"); - if (examples == null) { - // Representative? - LogInfo.logs("Reading representatives"); - examples = dataset.examples("representative"); - } - - // Read annotation file (optional) - if (opts.annotatedFormulasPath != null && !opts.annotatedFormulasPath.isEmpty()) { - // Prevent verbose output - if (CustomExample.opts.verbose > 1) - CustomExample.opts.verbose = 1; - List annotated = CustomExample.getDataset(opts.annotatedFormulasPath); - for (CustomExample ex : annotated) { - if (ex == null || ex.targetFormula == null) continue; - // Check annotation - Value rawValue = builder.executor.execute(ex.targetFormula, ex.context).value; - if (rawValue instanceof ListValue) - rawValue = ((TableKnowledgeGraph) ex.context.graph).getListValueWithOriginalStrings((ListValue) rawValue); - if (builder.valueEvaluator.getCompatibility(ex.targetValue, rawValue) == 1) - idToAnnotated.put(ex.id, ex); - else - LogInfo.warnings("Wrong annotation [%s] expected %s; got %s", ex.id, ex.targetValue, rawValue); - } - } - - // Alterer cache - tableAltererCache = new TableAltererCache(); - - // Dump equivalent classes (optional) - if (opts.dumpRepresentativeFormulas) { - representativeDumper = new SerializedDumper("representative", examples.size()); - } - - // Read Turked data (optional) - if (opts.turkedDataPath != null && !opts.turkedDataPath.isEmpty()) { - turkedData = new AggregatedTurkData(opts.turkedDataPath); - turkMatchDumper = new SerializedDumper("turk", examples.size()); - turkInfoWriter = IOUtils.openOutHard(Execution.getFile("turk-info.tsv")); - TurkEquivalentClassInfo.dumpHeader(turkInfoWriter); - } - - // Subset choosing - switch (opts.subsetChooser) { - case CACHED: - subsetChooser = new CachedSubsetChooser(); - break; - case ENTROPY: - if (opts.numRetainedTables > 0) - subsetChooser = new EntropySubsetChooser(opts.numAlteredTables, - opts.numRetainedTables, opts.alsoTrySmallerSubsets); - break; - case PURE: - if (opts.numRetainedTables > 0) - subsetChooser = new PureSubsetChooser(opts.numAlteredTables, - opts.numRetainedTables, opts.alsoTrySmallerSubsets); - break; - default: // Do nothing - } - if (opts.subsetChooser != null) - retainedTablesOut = IOUtils.openOutAppendEasy(Execution.getFile("retained-tables.tsv")); - - // Go through the dataset - Execution.putOutput("group", "train"); - int index = -1; - for (Example ex : examples) { - Execution.putOutput("example", ++index); - if (!CustomExample.checkFilterExamples(index) || - (opts.skipExistingSaveDirs && tableAltererCache.existsSaveDir(ex.id))) { - LogInfo.logs("SKIPPING %s", ex.id); - continue; - } - List graphs = process(ex); - if (!tableAltererCache.existsSaveDir(ex.id) || opts.overwriteExistingSaveDirs) - tableAltererCache.dump(graphs, ex.id); - ex.predDerivations.clear(); // Save memory - } - - if (representativeDumper != null) representativeDumper.closeFile(); - if (retainedTablesOut != null) retainedTablesOut.close(); - if (turkInfoWriter != null) turkInfoWriter.close(); - if (turkMatchDumper != null) turkMatchDumper.closeFile(); - } - - private List process(Example ex) { - LogInfo.begin_track("Processing %s", ex.id); - ex.log(); - if (ex.predDerivations == null) - ex.predDerivations = Collections.emptyList(); - LogInfo.logs("Read %d derivations", ex.predDerivations.size()); - - TableAlterer alterer = new TableAlterer(ex); - // altered tables (alteredGraphs[0] is always the original table) - List alteredGraphs = new ArrayList<>(); - boolean loadedDenotationData = false; - DenotationData denotationData = null; - if (opts.loadDenotationsFromDir != null && !opts.loadDenotationsFromDir.isEmpty()) { - File file = new File(opts.loadDenotationsFromDir, ex.id + ".gz"); - BufferedReader reader = IOUtils.openInEasy(file.toString()); - if (reader != null) { - try { - if (opts.verbose >= 1) - LogInfo.logs("Reading from " + file); - denotationData = DenotationData.load(reader); - loadedDenotationData = true; - } catch (Exception e) { - LogInfo.warnings("File " + file + " contains error: " + e); - e.printStackTrace(); - throw new RuntimeException(e); - } - } - } - if (denotationData == null) - denotationData = new DenotationData(opts.numAlteredTables, ex.predDerivations.size()); - - LogInfo.begin_track("Generating %d tables", opts.numAlteredTables); - for (int tableIndex = 0; tableIndex <= opts.numAlteredTables; tableIndex++) { - // Use the original table for table #0; an altered table otherwise - TableKnowledgeGraph graph; - if (tableIndex == 0) { - graph = alterer.oldGraph; - } else { - graph = tableAltererCache.load(ex.id, tableIndex); - if (graph == null) // Nothing in the cache ... - graph = alterer.constructAlteredGraph(tableIndex); - if (graph == null) // Something is wrong ... - throw new RuntimeException("Cannot generate graph " + ex.id + " " + tableIndex); - } - alteredGraphs.add(graph); - if (!opts.skipFormulaExecution) { - ContextValue context = new ContextValue(graph); - // Execute all formulas on the new graph - List denotationsForTable = new ArrayList<>(); - for (int k = 0; k < ex.predDerivations.size(); k++) { - Value value; - if (loadedDenotationData) { - value = denotationData.getDenotation(k, tableIndex); - } else { - Derivation deriv = ex.predDerivations.get(k); - value = builder.executor.execute(deriv.formula, context).value; - value = ValueCanonicalizer.canonicalize(value); - denotationData.addDenotation(k, tableIndex, value); - } - denotationsForTable.add(value); - } - // Annotated formula - Value annotatedValue = null; - Example annotatedEx = idToAnnotated.get(ex.id); - if (annotatedEx != null && annotatedEx.targetFormula != null) { - annotatedValue = builder.executor.execute(annotatedEx.targetFormula, context).value; - annotatedValue = ValueCanonicalizer.canonicalize(annotatedValue); - } - denotationData.addAnnotatedDenotation(tableIndex, annotatedValue); - // Log - if (opts.verbose >= 3) { - LogInfo.begin_track("Table %d", tableIndex); - graph.log(); - logGroups(DenotationData.groupByDenotation(denotationsForTable), annotatedValue, "ANNOTATED"); - LogInfo.end_track(); - } - } - } - LogInfo.end_track(); - - if (!opts.skipFormulaExecution) { - - if (opts.dumpAllDenotations) { - File dir = new File(Execution.getFile("denotations")); - if (!dir.isDirectory()) dir.mkdir(); - PrintWriter writer = IOUtils.openOutHard(Execution.getFile("denotations/" + ex.id + ".gz")); - denotationData.dump(writer); - writer.close(); - } - - if (opts.dumpAnnotatedDenotations && denotationData.isAnnotated()) { - File dir = new File(Execution.getFile("annotated-denotations")); - if (!dir.isDirectory()) dir.mkdir(); - PrintWriter writer = IOUtils.openOutHard(Execution.getFile("annotated-denotations/" + ex.id + ".gz")); - denotationData.dumpAnnotated(writer); - writer.close(); - } - - denotationData.computeGroups(ex.predDerivations); - - if (opts.dumpRepresentativeFormulas) { - List representatives = new ArrayList<>(); - for (int index : denotationData.getRepresentativeIndices()) - representatives.add(ex.predDerivations.get(index)); - LogInfo.logs("Dumping %d representatives", representatives.size()); - representativeDumper.dumpExample(ex, representatives); - } - - // Log the summary - if (opts.verbose >= 3) { - LogInfo.begin_track("Summary across %d tables", alteredGraphs.size()); - //logGroups(denotationData.groups, denotationData.getAnnotatedDenotations(), "OVERALL-ANNOTATED"); - if (opts.dumpAllMatchingAnnotated) { - LogInfo.begin_track("All formulas matching annotated formula on all tables"); - for (int k = 0; k < ex.predDerivations.size(); k++) { - if (denotationData.getDenotations(k).equals(denotationData.getAnnotatedDenotations())) - LogInfo.logs("%s", ex.predDerivations.get(k)); - } - LogInfo.end_track(); - } - LogInfo.end_track(); - } - - // Choosing the most informative subset of tables - // This will try all combinations. The complexity is (numAltered)^(numRetained) - if (subsetChooser != null && opts.turkedDataPath == null) { - Subset chosen = subsetChooser.chooseSubset(ex.id, denotationData); - if (chosen != null) { - LogInfo.logs("RETAINED TABLES: %s", chosen.indices); - testSubset(denotationData, chosen); - } else { - LogInfo.logs("RETAINED TABLES: null"); - chosen = new Subset(ex.id); - } - retainedTablesOut.println(chosen); - retainedTablesOut.flush(); - } - - // Check with Turked data - if (turkedData != null) { - Map turked = turkedData.get(ex.id); - if (turked != null && !turked.isEmpty()) { - LogInfo.logs("TURKED DATA: %s", turked.keySet()); - testWithTurkedData(ex, denotationData, turked, turkedData.getAllTurkedTables(ex.id)); - } else { - LogInfo.logs("TURKED DATA: null"); - testWithTurkedData(ex, denotationData, new HashMap<>(), new ArrayList<>()); - } - } - } - - LogInfo.end_track(); - return alteredGraphs; - } - - private void logGroups(Map> groups, T annotated, String prefix) { - // Sort by count - List>> entries = new ArrayList<>(groups.entrySet()); - entries.sort((o1, o2) -> o2.getValue().size() - o1.getValue().size()); - // Log the counts - LogInfo.begin_track("Denotation counts"); - int annotatedCount = 0, totalCount = 0; - for (Map.Entry> entry : entries) { - int size = entry.getValue().size(); - boolean matchAnnotated = entry.getKey().equals(annotated); - LogInfo.logs("%3s %5d : %s", matchAnnotated ? "[O]" : "", size, entry.getKey()); - totalCount += size; - if (matchAnnotated) annotatedCount = size; - } - if (annotated != null) { - LogInfo.logs("%s = %s", prefix, annotated); - LogInfo.logs("%s COUNT: %d / %d (%.3f%%)", prefix, - annotatedCount, totalCount, annotatedCount * 100.0 / totalCount); - } else { - LogInfo.logs("Example is NOT ANNOTATED"); - } - LogInfo.end_track(); - } - - // ============================================================ - // Test subset - // ============================================================ - - public void testSubset(DenotationData denotationData, Subset subset) { - LogInfo.begin_track("testSubset : %s %s", subset.id, subset.indices); - // denotations[i][j] for i in representativeDerivs and j in graphIndices - Map, Integer> counts = new HashMap<>(); - for (int i : denotationData.getRepresentativeIndices()) { - List denotationsForDeriv = new ArrayList<>(); - for (int j : subset.indices) - denotationsForDeriv.add(denotationData.getDenotation(i, j)); - MapUtils.incr(counts, denotationsForDeriv); - } - LogInfo.logs("subset test: %s | score = %8.3f from tables %s", subset.id, subset.score, subset.indices); - { - List values = new ArrayList<>(counts.values()); - Collections.sort(values); - Collections.reverse(values); - LogInfo.logs(" %s", values); - } - // Check annotated formulas - if (denotationData.isAnnotated()) { - List denotationsForDeriv = new ArrayList<>(); - for (int j : subset.indices) - denotationsForDeriv.add(denotationData.getAnnotatedDenotation(j)); - Integer count = counts.get(denotationsForDeriv); - if (count == null || count == 0) { - LogInfo.logs("subset annotation: %s | 0 ANNOTATED DENOTATIONS NOT FOUND!", subset.id); - } else if (count == 1) { - LogInfo.logs("subset annotation: %s | 1 ANNOTATED DENOTATIONS IS IN ITS OWN CLASS.", subset.id); - } else { - LogInfo.logs("subset annotation: %s | %d ANNOTATED DENOTATIONS MIX WITH OTHER THINGS!", subset.id, count); - } - } else { - LogInfo.logs("subset annotation: %s | X NOT ANNOTATED!", subset.id); - } - LogInfo.end_track(); - } - - // ============================================================ - // Test with Turked data - // ============================================================ - /* - * Output to 2 dump files - * - turk-info: TSV file - * - turk-match.gz: LispTrees file where each tree is a serialized example with matching formulas - */ - - public void testWithTurkedData(Example ex, DenotationData denotationData, - Map turked, List allTurkedTables) { - LogInfo.begin_track("testWithTurkedData: %s", ex.id); - TurkEquivalentClassInfo info = new TurkEquivalentClassInfo(); - info.id = ex.id; - info.numDerivs = ex.predDerivations.size(); - info.numClasses = denotationData.numClasses(); - info.allTurkedTables = allTurkedTables; - LogInfo.logs("ALL TURKED TABLES: %s", allTurkedTables); - for (Map.Entry entry : turked.entrySet()) - LogInfo.logs("%d : %s", entry.getKey(), entry.getValue()); - // sort by altered table index - info.agreedTurkedTables = new ArrayList<>(turked.keySet()); - Collections.sort(info.agreedTurkedTables); - List turkedValues = new ArrayList<>(); - for (int j : info.agreedTurkedTables) - turkedValues.add(turked.get(j)); - // denotations[i][j] for i in representativeDerivs and j in Turked keys - // Group equivalent classes based on turked data - Map, List> equivClassGroups = new HashMap<>(); - for (int i : denotationData.getRepresentativeIndices()) { - List denotationsForDeriv = new ArrayList<>(); - for (int j : info.agreedTurkedTables) - denotationsForDeriv.add(denotationData.getDenotation(i, j)); - MapUtils.addToList(equivClassGroups, denotationsForDeriv, i); - } - { - List values = new ArrayList<>(); - for (List equivClassGroupIndices : equivClassGroups.values()) - values.add(equivClassGroupIndices.size()); - Collections.sort(values); - Collections.reverse(values); - LogInfo.logs(" %s", values); - } - // Find out if even the answer for table 0 (original table) matches! - info.origTableTarget = ex.targetValue; - if (turked.containsKey(0)) { - info.origTableTurkedTarget = turked.get(0); - info.origTableFlag = isCompatible(info.origTableTarget, info.origTableTurkedTarget) ? "ok" : "mismatched"; - } else { - info.origTableTurkedTarget = null; - info.origTableFlag = "no turk"; - } - LogInfo.logs("original table: dataset = %s", info.origTableTarget); - LogInfo.logs("original table: turked = %s", info.origTableTurkedTarget); - LogInfo.logs("original table: flag = %s", info.origTableFlag); - // Find out how many equivalent classes match the annotation - List matchedDerivIndices = new ArrayList<>(); - List matchedDerivs = new ArrayList<>(); - for (Map.Entry, List> entry : equivClassGroups.entrySet()) { - boolean match = true; - List equivClassDenotations = entry.getKey(); - List equivClassGroupIndices = entry.getValue(); - for (int jj = 0; jj < turkedValues.size(); jj++) { - Value equivClassDenotation = equivClassDenotations.get(jj); - Value turkedDenotation = turkedValues.get(jj); - if (opts.ignoreTurkedAgreedErrors && turkedDenotation instanceof ErrorValue) - continue; - if (!isCompatible(turkedDenotation, equivClassDenotation)) { - match = false; - break; - } - } - if (match) { - LogInfo.logs("Matched %d classes: %s", equivClassGroupIndices.size(), equivClassDenotations); - info.numClassesMatched += equivClassGroupIndices.size(); - for (int equivClassGroupIndex : equivClassGroupIndices) { - for (int index : denotationData.getEquivClass(equivClassGroupIndex)) { - matchedDerivIndices.add(index); - Derivation matchedDeriv = ex.predDerivations.get(index); - matchedDerivs.add(matchedDeriv); - } - } - } - } - info.numDerivsMatched = matchedDerivs.size(); - LogInfo.logs("turk matching equivalent classes: %d", info.numClassesMatched); - LogInfo.logs("turk matching formulas: %d", info.numDerivsMatched); - // If there are multiple equivalent classes, find out which other tables we can turk from - if (subsetChooser != null) { - if (info.numClassesMatched > 1) { - DenotationData filtered = new DenotationData(opts.numAlteredTables, matchedDerivs.size()); - for (int i = 0; i < matchedDerivs.size(); i++) { - for (int j = 0; j <= opts.numAlteredTables; j++) - filtered.addDenotation(i, j, denotationData.getDenotation(matchedDerivIndices.get(i), j)); - } - filtered.computeGroups(matchedDerivs); - Subset chosen = subsetChooser.chooseSubset(ex.id, filtered, turked.keySet()); - if (chosen != null) { - LogInfo.logs("RETAINED TABLES: %s", chosen.indices); - retainedTablesOut.println(chosen); - retainedTablesOut.flush(); - } - } else { - LogInfo.logs("Not choosing subset since turk matching equivalent classes = %d", info.numClassesMatched); - } - } - // Dump stuff - info.dump(turkInfoWriter); - turkMatchDumper.dumpExample(ex, matchedDerivs); - LogInfo.end_track(); - } - - private boolean isCompatible(Value target, Value pred) { - if (target instanceof ErrorValue) { - return pred instanceof ErrorValue || (pred instanceof ListValue && ((ListValue) pred).values.isEmpty()); - } else if (pred instanceof ErrorValue) { - return false; - } - if (!(target instanceof ListValue)) - target = new ListValue(Collections.singletonList(target)); - if (!(pred instanceof ListValue)) - pred = new ListValue(Collections.singletonList(pred)); - return builder.valueEvaluator.getCompatibility(target, pred) == 1; - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/CachedSubsetChooser.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/CachedSubsetChooser.java deleted file mode 100644 index ce0c1215ce..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/CachedSubsetChooser.java +++ /dev/null @@ -1,47 +0,0 @@ -package edu.stanford.nlp.sempre.tables.alter; - -import java.io.BufferedReader; -import java.io.IOException; -import java.util.*; - -import fig.basic.*; - -public class CachedSubsetChooser implements SubsetChooser { - public static class Options { - @Option(gloss = "read the list of retained table from these files") - public List retainedTablesFilenames = new ArrayList<>(); - } - public static Options opts = new Options(); - - Map cache = new HashMap<>(); - - public CachedSubsetChooser() { - for (String filename : opts.retainedTablesFilenames) - load(filename); - } - - private void load(String retainedTablesFilename) { - try { - BufferedReader reader = IOUtils.openInHard(retainedTablesFilename); - String line; - while ((line = reader.readLine()) != null) { - Subset subset = Subset.fromString(line); - if (subset.score > Double.NEGATIVE_INFINITY) - cache.put(subset.id, subset); - } - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - @Override - public Subset chooseSubset(String id, DenotationData denotationData) { - return cache.get(id); - } - - @Override - public Subset chooseSubset(String id, DenotationData denotationData, Collection forbiddenTables) { - throw new RuntimeException("CachedSubsetChooser.chooseSubset cannot take forbiddenTables"); - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/DenotationData.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/DenotationData.java deleted file mode 100644 index 95a07172ae..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/DenotationData.java +++ /dev/null @@ -1,215 +0,0 @@ -package edu.stanford.nlp.sempre.tables.alter; - -import java.io.*; -import java.util.*; - -import edu.stanford.nlp.sempre.Derivation; -import edu.stanford.nlp.sempre.Value; -import edu.stanford.nlp.sempre.Values; -import edu.stanford.nlp.sempre.tables.InfiniteListValue; -import fig.basic.MapUtils; - -public class DenotationData { - - public final int numAlteredTables, numDerivs; - private final List uniqueDenotations = new ArrayList<>(); - private final Map uniqueDenotationsToId = new HashMap<>(); - // derivation -> denotations of that derivation on altered tables - private final List> denotations; - // denotations of the annotated formula on altered tables - private List annotatedDenotations; - // denotations on altered tables -> indices of formulas that give those denotations - private Map, List> groups; - // representative derivation of each group - private List representativeDerivs; - - public DenotationData(int numAlteredTables, int numDerivs) { - this.numAlteredTables = numAlteredTables; - this.numDerivs = numDerivs; - denotations = new ArrayList<>(numDerivs); - for (int k = 0; k < numDerivs; k++) { - List denotationsForDeriv = new ArrayList<>(numAlteredTables + 1); - for (int j = 0; j < numAlteredTables + 1; j++) - denotationsForDeriv.add(null); - denotations.add(denotationsForDeriv); - } - } - - private int lookup(Value value) { - Integer id = uniqueDenotationsToId.get(value); - if (id == null) { - id = uniqueDenotations.size(); - uniqueDenotations.add(value); - uniqueDenotationsToId.put(value, id); - } - return id; - } - - public void addDenotation(int derivIndex, int tableIndex, Value value) { - denotations.get(derivIndex).set(tableIndex, lookup(value)); - } - - public Value getDenotation(int derivIndex, int tableIndex) { - return uniqueDenotations.get(denotations.get(derivIndex).get(tableIndex)); - } - - public List getDenotations(int derivIndex) { - List answer = new ArrayList<>(numAlteredTables); - for (int x : denotations.get(derivIndex)) - answer.add(uniqueDenotations.get(x)); - return answer; - } - - public int[][] toArray(List derivs) { - int[][] answer = new int[derivs.size()][numAlteredTables + 1]; - for (int i = 0; i < derivs.size(); i++) { - List derivDenotationIndices = denotations.get(derivs.get(i)); - for (int j = 0; j <= numAlteredTables; j++) - answer[i][j] = derivDenotationIndices.get(j); - } - return answer; - } - - public void addAnnotatedDenotation(int tableIndex, Value value) { - if (annotatedDenotations == null) { - annotatedDenotations = new ArrayList<>(numAlteredTables + 1); - for (int j = 0; j < numAlteredTables + 1; j++) - annotatedDenotations.add(null); - } - annotatedDenotations.set(tableIndex, lookup(value)); - } - - public Value getAnnotatedDenotation(int tableIndex) { - return uniqueDenotations.get(annotatedDenotations.get(tableIndex)); - } - - public List getAnnotatedDenotations() { - List answer = new ArrayList<>(numAlteredTables); - for (int x : annotatedDenotations) - answer.add(uniqueDenotations.get(x)); - return answer; - } - - public boolean isAnnotated() { - return annotatedDenotations != null; - } - - public void computeGroups(List derivs) { - groups = groupByDenotation(denotations); - // Get the representative derivation of each group - // Choose the smallest formula - representativeDerivs = new ArrayList<>(); - for (List equivClass : groups.values()) { - int bestIndex = 0, bestScore = Integer.MIN_VALUE; - for (int index : equivClass) { - Derivation deriv = derivs.get(index); - int score = -index; - try { - if (deriv.canonicalUtterance.startsWith("$ROOT:")) - score = 100 - Integer.parseInt(deriv.canonicalUtterance.substring("$ROOT:".length())); - } catch (NumberFormatException e) { } - if (score > bestScore) { - bestScore = score; - bestIndex = index; - } - } - representativeDerivs.add(bestIndex); - } - } - - /** - * Helper method: - * Group formulas that execute to the same denotation (or tuple of denotations). - * - * Return a map from denotations to lists of formula indices. - */ - public static Map> groupByDenotation(List denotations) { - Map> groups = new HashMap<>(); - for (int i = 0; i < denotations.size(); i++) - MapUtils.addToList(groups, denotations.get(i), i); - return groups; - } - - public int numClasses() { - if (groups == null) - throw new RuntimeException("Must call computeGroups(derivs) first"); - return groups.size(); - } - - public List getRepresentativeIndices() { - if (groups == null) - throw new RuntimeException("Must call computeGroups(derivs) first"); - return representativeDerivs; - } - - public List getEquivClass(int representativeIndex) { - if (groups == null) - throw new RuntimeException("Must call computeGroups(derivs) first"); - return groups.get(denotations.get(representativeIndex)); - } - - // ============================================================ - // Serialization - // ============================================================ - - public void dump(PrintWriter out) { - // # derivations, # altered tables, # unique denotations - out.println("" + numDerivs + " " + numAlteredTables + " " + uniqueDenotations.size()); - for (Value denotation : uniqueDenotations) - out.println(denotation); - for (List derivDenotationIndices : denotations) { - StringBuilder sb = new StringBuilder(); - for (Integer derivDenotationIndex : derivDenotationIndices) - sb.append(derivDenotationIndex == null ? -1 : derivDenotationIndex).append(" "); - out.println(sb.toString().trim()); - } - } - - public void dumpAnnotated(PrintWriter out) { - for (Integer annotatedDenotationIndex : annotatedDenotations) - out.println(annotatedDenotationIndex == null ? null : uniqueDenotations.get(annotatedDenotationIndex)); - } - - public static DenotationData load(BufferedReader in) { - try { - String line = in.readLine(); - String[] tokens = line.split(" "); - if (tokens.length != 3) - throw new RuntimeException("Expected 3 tokens; got " + tokens.length); - int numDerivs = Integer.parseInt(tokens[0]), - numAlteredTables = Integer.parseInt(tokens[1]), - numUniqueDenotations = Integer.parseInt(tokens[2]); - DenotationData denotationData = new DenotationData(numAlteredTables, numDerivs); - for (int i = 0; i < numUniqueDenotations; i++) { - line = in.readLine(); - Value value; - if ("ERROR".equals(line)) { - value = ValueCanonicalizer.ERROR; - } else if ("null".equals(line)) { - value = null; - } else { - try { - value = Values.fromString(line); - } catch (Exception e) { - // Probably InfiniteValue - value = new InfiniteListValue(line); - } - } - denotationData.uniqueDenotations.add(value); - denotationData.uniqueDenotationsToId.put(value, i); - } - for (int i = 0; i < numDerivs; i++) { - tokens = in.readLine().split(" "); - if (tokens.length != numAlteredTables + 1) - throw new RuntimeException("Expected " + (numAlteredTables + 1) + " tokens; got " + tokens.length); - for (int j = 0; j <= numAlteredTables; j++) { - denotationData.denotations.get(i).set(j, Integer.parseInt(tokens[j])); - } - } - return denotationData; - } catch (Exception e) { - throw new RuntimeException(e); - } - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/EntropySubsetChooser.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/EntropySubsetChooser.java deleted file mode 100644 index fbd83b1653..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/EntropySubsetChooser.java +++ /dev/null @@ -1,241 +0,0 @@ -package edu.stanford.nlp.sempre.tables.alter; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import fig.basic.*; - -/** - * Choose a subset based on diversity score (entropy). - */ -public class EntropySubsetChooser implements SubsetChooser { - public static class Options { - @Option(gloss = "check correctness") - public boolean entropyCheckCorrectness = false; - } - public static Options opts = new Options(); - - private final int numAlteredTables, numRetainedTables; - private final boolean alsoTrySmallerSubsets; - - public EntropySubsetChooser(int numAlteredTables, int numRetainedTables, boolean alsoTrySmallerSubsets) { - this.numAlteredTables = numAlteredTables; - this.numRetainedTables = numRetainedTables; - this.alsoTrySmallerSubsets = alsoTrySmallerSubsets; - } - - @Override - public Subset chooseSubset(String id, DenotationData denotationData) { - return chooseSubset(id, denotationData, Collections.emptyList()); - } - - @Override - public Subset chooseSubset(String id, DenotationData denotationData, Collection forbiddenTables) { - if (this.numRetainedTables == 0) return null; - List bestGraphIndices = null; - double bestScore = 0; - int n = denotationData.getRepresentativeIndices().size(); - EquivClassComputer computer1 = opts.entropyCheckCorrectness ? null : new EquivClassComputerNaive(denotationData); - EquivClassComputer computer2 = opts.entropyCheckCorrectness ? null : new EquivClassComputerGroup(denotationData); - EquivClassComputer computer3 = new EquivClassComputerFast(denotationData); - - Iterator> itr; - if (alsoTrySmallerSubsets) - itr = new Subset.SubsetSizeAtMostKIterator(numAlteredTables, numRetainedTables); - else - itr = new Subset.SubsetSizeKIterator(numAlteredTables, numRetainedTables); - while (itr.hasNext()) { - List graphIndices = itr.next(); - if (!Subset.areDisjoint(graphIndices, forbiddenTables)) continue; - // If N is the number of representatives, - // H(C) = sum_i c_i/N log(N/c_i) = log(N) - (1/N) sum_i c_i log(c_i) - // normalized = 1 - [sum_i c_i log(c_i)] / [N log(N)] - Collection groupSizes = computer3.getGroupSizes(graphIndices); - double accum = 0; - for (int c : groupSizes) - accum += c * Math.log(c); - double entropy = 1 - accum / (n * Math.log(n)); - // Update - if (entropy > bestScore) { - bestGraphIndices = graphIndices; - bestScore = entropy; - if (BatchTableAlterer.opts.verbose >= 2) { - LogInfo.logs("entropy = %8.3f from tables %s", bestScore, bestGraphIndices); - } - } - // Check - if (opts.entropyCheckCorrectness) { - List naive = new ArrayList<>(computer1.getGroupSizes(graphIndices)); - Collections.sort(naive); - List group = new ArrayList<>(computer2.getGroupSizes(graphIndices)); - Collections.sort(group); - List fast = new ArrayList<>(computer3.getGroupSizes(graphIndices)); - Collections.sort(fast); - if (!naive.equals(group)) { - LogInfo.logs("Incorrect (group) %s", graphIndices); - LogInfo.logs("%s", naive); - LogInfo.logs("%s", group); - throw new RuntimeException(); - } - if (!naive.equals(fast)) { - LogInfo.logs("Incorrect (fast) %s", graphIndices); - LogInfo.logs("%s", naive); - LogInfo.logs("%s", fast); - throw new RuntimeException(); - } - } - } - if (bestGraphIndices != null) { - bestGraphIndices.add(0, 0); - return new Subset(id, bestGraphIndices, bestScore); - } else { - return new Subset(id, numRetainedTables, 0.0); - } - } - - - /** - * Given a subset s = {s1,...,sl} of graph indices {1,...,k}: - * - Group i's by the values of (denotations[i][s1], ..., denotations[i][sl]) - * - Return the list of group sizes - * - * The computation is done in amortized n * (k choose l) - * where n is the number of possible i's (number of representative formulas). - */ - public interface EquivClassComputer { - public Collection getGroupSizes(List graphIndices); - } - - public static class EquivClassComputerNaive implements EquivClassComputer { - private final DenotationData denotationData; - - public EquivClassComputerNaive(DenotationData denotationData) { - this.denotationData = denotationData; - } - - public Collection getGroupSizes(List graphIndices) { - Map, Integer> counts = new HashMap<>(); - for (int i : denotationData.getRepresentativeIndices()) { - List denotationsForDeriv = new ArrayList<>(); - for (int j : graphIndices) - denotationsForDeriv.add(denotationData.getDenotation(i, j)); - MapUtils.incr(counts, denotationsForDeriv); - } - return counts.values(); - } - } - - public static class EquivClassComputerGroup implements EquivClassComputer { - private final int n, k; - private final int[][] uniqueIds; - private List previousGraphIndices = new ArrayList<>(); - private final List>> groupStack = new ArrayList<>(); - private final List> initialGroup = new ArrayList<>(); - - public EquivClassComputerGroup(DenotationData denotationData) { - n = denotationData.getRepresentativeIndices().size(); - k = denotationData.numAlteredTables; - uniqueIds = new int[n][k + 1]; - for (int j = 1; j <= k; j++) { - Map uniqueDenotationsForTable = new HashMap<>(); - for (int i = 0; i < n; i++) { - int derivIndex = denotationData.getRepresentativeIndices().get(i); - Value denotation = denotationData.getDenotation(derivIndex, j); - Integer uniqueId = uniqueDenotationsForTable.get(denotation); - if (uniqueId == null) { - uniqueDenotationsForTable.put(denotation, uniqueIds[i][j] = uniqueDenotationsForTable.size()); - } else { - uniqueIds[i][j] = uniqueId; - } - } - } - List group = new ArrayList<>(); - for (int i = 0; i < n; i++) group.add(i); - initialGroup.add(group); - } - - public Collection getGroupSizes(List graphIndices) { - // Reduce to common prefix - int sizeAgreed = 0; - while (sizeAgreed < previousGraphIndices.size() && sizeAgreed < graphIndices.size() - && previousGraphIndices.get(sizeAgreed) == graphIndices.get(sizeAgreed)) - sizeAgreed++; - while (groupStack.size() > sizeAgreed) - groupStack.remove(groupStack.size() - 1); - previousGraphIndices = new ArrayList(graphIndices); - // Group the rest - for (int j = sizeAgreed; j < graphIndices.size(); j++) { - int sj = graphIndices.get(j); - List> previousGroups = groupStack.isEmpty() ? initialGroup : groupStack.get(j - 1); - List> groups = new ArrayList<>(); - for (List group : previousGroups) { - if (group.size() == 1) { - groups.add(group); - continue; - } - Map> idToGroups = new HashMap<>(); - for (int index : group) - MapUtils.addToList(idToGroups, uniqueIds[index][sj], index); - groups.addAll(idToGroups.values()); - } - groupStack.add(groups); - } - List groupSizes = new ArrayList<>(); - for (List group : groupStack.get(groupStack.size() - 1)) - groupSizes.add(group.size()); - return groupSizes; - } - } - - public static class EquivClassComputerFast implements EquivClassComputer { - private final int n; - private final int[][] uniqueIds; - private List previousGraphIndices = new ArrayList<>(); - private final List groups = new ArrayList<>(); - private final List breakpointStack = new ArrayList<>(); - private final int[] initialBreakpoint; - - public EquivClassComputerFast(DenotationData denotationData) { - n = denotationData.getRepresentativeIndices().size(); - uniqueIds = denotationData.toArray(denotationData.getRepresentativeIndices()); - initialBreakpoint = new int[] {0, n}; - for (int i = 0; i < n; i++) groups.add(i); - } - - public Collection getGroupSizes(List graphIndices) { - // Reduce to common prefix - int sizeAgreed = 0; - while (sizeAgreed < previousGraphIndices.size() && sizeAgreed < graphIndices.size() - && previousGraphIndices.get(sizeAgreed) == graphIndices.get(sizeAgreed)) - sizeAgreed++; - while (breakpointStack.size() > sizeAgreed) - breakpointStack.remove(breakpointStack.size() - 1); - previousGraphIndices = new ArrayList(graphIndices); - // Group the rest - for (int j = sizeAgreed; j < graphIndices.size(); j++) { - int sj = graphIndices.get(j), top = 0; - int[] previousBreakpoints = breakpointStack.isEmpty() ? initialBreakpoint : breakpointStack.get(j - 1); - int[] newBreakpoints = new int[n + 1]; - newBreakpoints[top++] = 0; - for (int u = 0; previousBreakpoints[u] < n; u++) { - int s = previousBreakpoints[u], t = previousBreakpoints[u + 1]; - if (t > s + 1) { - Collections.sort(groups.subList(s, t), (x, y) -> uniqueIds[x][sj] - uniqueIds[y][sj]); - for (int i = s + 1; i < t; i++) { - if (uniqueIds[groups.get(i - 1)][sj] != uniqueIds[groups.get(i)][sj]) - newBreakpoints[top++] = i; - } - } - newBreakpoints[top++] = t; - } - breakpointStack.add(newBreakpoints); - } - List groupSizes = new ArrayList<>(); - int[] breakpoints = breakpointStack.get(breakpointStack.size() - 1); - for (int u = 0; breakpoints[u] < n; u++) - groupSizes.add(breakpoints[u + 1] - breakpoints[u]); - return groupSizes; - } - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/PureSubsetChooser.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/PureSubsetChooser.java deleted file mode 100644 index d9cce6e684..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/PureSubsetChooser.java +++ /dev/null @@ -1,52 +0,0 @@ -package edu.stanford.nlp.sempre.tables.alter; - -import java.util.*; - -public class PureSubsetChooser implements SubsetChooser { - - private final int numAlteredTables, numRetainedTables; - private final boolean alsoTrySmallerSubsets; - - public PureSubsetChooser(int numAlteredTables, int numRetainedTables, boolean alsoTrySmallerSubsets) { - this.numAlteredTables = numAlteredTables; - this.numRetainedTables = numRetainedTables; - this.alsoTrySmallerSubsets = alsoTrySmallerSubsets; - } - - @Override - public Subset chooseSubset(String id, DenotationData denotationData) { - return chooseSubset(id, denotationData, Collections.emptyList()); - } - - @Override - public Subset chooseSubset(String id, DenotationData denotationData, Collection forbiddenTables) { - if (this.numRetainedTables == 0 || !denotationData.isAnnotated()) return null; - Iterator> itr; - if (alsoTrySmallerSubsets) - itr = new Subset.SubsetSizeAtMostKIterator(numAlteredTables, numRetainedTables); - else - itr = new Subset.SubsetSizeKIterator(numAlteredTables, numRetainedTables); - while (itr.hasNext()) { - List graphIndices = itr.next(); - if (!Subset.areDisjoint(graphIndices, forbiddenTables)) continue; - int numGroupsMixingWithAnnotated = 0; - for (int i : denotationData.getRepresentativeIndices()) { - boolean match = true; - for (int j : graphIndices) { - if (!denotationData.getDenotation(i, j).equals(denotationData.getAnnotatedDenotation(j))) { - match = false; - break; - } - } - if (match) numGroupsMixingWithAnnotated++; - } - if (numGroupsMixingWithAnnotated == 1) { - graphIndices.add(0, 0); - return new Subset(id, graphIndices, -graphIndices.size()); - } - } - return null; - } - - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/Subset.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/Subset.java deleted file mode 100644 index cea4fb0585..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/Subset.java +++ /dev/null @@ -1,132 +0,0 @@ -package edu.stanford.nlp.sempre.tables.alter; - -import java.util.*; - -public class Subset { - public final String id; - public final List indices; - public final double score; - - public Subset(String id, List indices, double score) { - this.id = id; - this.indices = indices; - this.score = score; - } - - // Subset {0, 1, 2, ..., k} - public Subset(String id, int k, double score) { - this.id = id; - this.indices = new ArrayList<>(k + 1); - for (int i = 0; i <= k; i++) - this.indices.add(i); - this.score = score; - } - - // NULL subset with very negative score - public Subset(String id) { - this.id = id; - this.indices = new ArrayList<>(); - this.score = Double.NEGATIVE_INFINITY; - } - - // Format: ID score space-separated tables - public static Subset fromString(String line) { - String[] tokens = line.trim().split("\t"); - if (tokens.length != 3) - throw new RuntimeException("Expected 3 fields; got " + tokens.length); - String[] indicesString = tokens[2].split(" "); - List indices = new ArrayList<>(indicesString.length); - for (String x : indicesString) - indices.add(Integer.parseInt(x)); - return new Subset(tokens[0], indices, Double.parseDouble(tokens[1])); - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder().append(id) - .append("\t").append(score).append("\t"); - for (int graphIndex : indices) { - sb.append(graphIndex).append(" "); - } - return sb.toString().trim(); - } - - public static boolean areDisjoint(Collection x1, Collection x2) { - for (int x : x1) - if (x2.contains(x)) return false; - return true; - } - - /** - * Iterate all subsets of size k of {1,...,n} in lexicographic order. - */ - public static class SubsetSizeKIterator implements Iterator> { - - List next = new ArrayList<>(), last = new ArrayList<>(); - - public SubsetSizeKIterator(int numAlteredTables, int numRetainedTables) { - for (int i = 1; i <= numRetainedTables; i++) { - next.add(i); - last.add(numAlteredTables - numRetainedTables + i); - } - } - - @Override - public boolean hasNext() { - return next != null; - } - - @Override - public List next() { - List newNext = new ArrayList<>(), current = next; - int changeIndex, changeValue; - for (changeIndex = next.size() - 1; changeIndex >= 0; changeIndex--) - if (next.get(changeIndex) != last.get(changeIndex)) break; - if (changeIndex < 0) { - next = null; - } else { - changeValue = next.get(changeIndex) + 1; - for (int i = 0; i < changeIndex; i++) - newNext.add(next.get(i)); - for (int i = 0; newNext.size() < next.size(); i++) - newNext.add(changeValue + i); - next = newNext; - } - return current; - } - - } - - /** - * Iterate all subsets of size AT MOST k of {1,...,n}. - * Smaller subsets are generated first. - */ - public static class SubsetSizeAtMostKIterator implements Iterator> { - - int numAlteredTables, numRetainedTables, currentK; - SubsetSizeKIterator sizeKIterator; - - public SubsetSizeAtMostKIterator(int numAlteredTables, int numRetainedTables) { - this.numAlteredTables = numAlteredTables; - this.numRetainedTables = numRetainedTables; - this.currentK = 1; - this.sizeKIterator = new SubsetSizeKIterator(numAlteredTables, currentK); - } - - @Override - public boolean hasNext() { - return currentK < numRetainedTables || sizeKIterator.hasNext(); - } - - @Override - public List next() { - if (!sizeKIterator.hasNext()) { - currentK++; - sizeKIterator = new SubsetSizeKIterator(numAlteredTables, currentK); - } - return sizeKIterator.next(); - } - - } - -} \ No newline at end of file diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/SubsetChooser.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/SubsetChooser.java deleted file mode 100644 index 7b766b507f..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/SubsetChooser.java +++ /dev/null @@ -1,10 +0,0 @@ -package edu.stanford.nlp.sempre.tables.alter; - -import java.util.Collection; - -public interface SubsetChooser { - - public Subset chooseSubset(String id, DenotationData denotationData); - public Subset chooseSubset(String id, DenotationData denotationData, Collection forbiddenTables); - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/TableAlterer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/TableAlterer.java deleted file mode 100644 index 95cf16b68a..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/TableAlterer.java +++ /dev/null @@ -1,180 +0,0 @@ -package edu.stanford.nlp.sempre.tables.alter; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.*; -import edu.stanford.nlp.sempre.tables.lambdadcs.DenotationUtils; -import fig.basic.*; - -/** - * Alter the given table. - * - * Given a table, the corresponding Example, and a seed (alteredTableIndex), return an altered table. - * - * @author ppasupat - */ -public class TableAlterer { - public static class Options { - @Option(gloss = "verbosity") - public int verbose = 0; - @Option(gloss = "parameter for the geometric distribution used to cut the number of rows") - public double altererGeomDistParam = 0.75; - @Option - public int maxNumRows = 50; - } - public static Options opts = new Options(); - - public final Example ex; - public final TableKnowledgeGraph oldGraph; - - public TableAlterer(Example ex) { - this.ex = ex; - this.oldGraph = (TableKnowledgeGraph) ex.context.graph; - } - - /** - * For each column, perform random draws with replacement until all rows are filled. - * - * Exceptions: - * - If the column has distinct cells, then just permute. - * - If the column is sorted, keep it sorted. - */ - public TableKnowledgeGraph constructAlteredGraph(int alteredTableIndex) { - Random altererRandom = new Random(); - int numRows = Math.min(oldGraph.numRows(), opts.maxNumRows); - numRows -= getGeometricRandom(numRows / 2, altererRandom); - List> cellsByColumn = new ArrayList<>(); - // Fuzzy Matching - Set fuzzyMatchedValues = new HashSet<>(); - for (int i = 0; i < ex.numTokens(); i++) { - for (int j = i + 1; j < ex.numTokens(); j++) { - for (Formula formula : oldGraph.getFuzzyMatchedFormulas( - ex.getTokens(), i, j, FuzzyMatchFn.FuzzyMatchFnMode.ENTITY)) { - if (formula instanceof ValueFormula) { - fuzzyMatchedValues.add(((ValueFormula) formula).value); - } - } - } - } - if (opts.verbose >= 2) { - LogInfo.logs("Fuzzy matched: %s", fuzzyMatchedValues); - } - // Go over each column - for (int j = 0; j < oldGraph.numColumns(); j++) { - altererRandom = new Random(); - TableColumn oldColumnCells = oldGraph.getColumn(j); - List oldColumn = new ArrayList<>(), newColumn = new ArrayList<>(); - for (TableCell cell : oldColumnCells.children) - oldColumn.add(cell.properties); - // Keep the entries that are fuzzy matched - Set fuzzyMatchedValuesInColumn = new HashSet<>(); - for (TableCellProperties properties : oldColumn) { - if (fuzzyMatchedValues.contains(properties.nameValue)) - fuzzyMatchedValuesInColumn.add(properties); - } - newColumn.addAll(fuzzyMatchedValuesInColumn); - while (newColumn.size() > numRows) - newColumn.remove(newColumn.size() - 1); - // Sample the cells - boolean isAllDistinct = isAllDistinct(oldColumn); - if (isAllDistinct) { - // Go from top to bottom, ignoring the ones already added - List nonFuzzyMatched = new ArrayList<>(oldColumn); - for (TableCellProperties properties : newColumn) - nonFuzzyMatched.remove(properties); - for (int i = 0; newColumn.size() < numRows; i++) - newColumn.add(nonFuzzyMatched.get(i)); - } else { - // Sample with replacement - while (newColumn.size() < numRows) - newColumn.add(oldColumn.get(altererRandom.nextInt(numRows))); - } - Collections.shuffle(newColumn, altererRandom); - // Sort? - String sorted = ""; - for (Pair> pair : COMPS) { - if (isSorted(oldColumn, pair.getSecond())) { - sorted = pair.getFirst(); - newColumn.sort(pair.getSecond()); - break; - } - } - // Done! - cellsByColumn.add(newColumn); - if (opts.verbose >= 2) { - LogInfo.logs("Column %3s%4s %s", isAllDistinct ? "[!]" : "", sorted, oldColumnCells.relationNameValue); - } - } - if (opts.verbose >= 1) - LogInfo.logs("numRows = %d | final size = %d columns x %d rows", - numRows, cellsByColumn.size(), cellsByColumn.get(0).size()); - return new TableKnowledgeGraph(null, oldGraph.columns, cellsByColumn, true); - } - - // ============================================================ - // Helper Functions - // ============================================================ - - int getGeometricRandom(int limit, Random random) { - int geometricRandom = 0; - while (geometricRandom < limit && random.nextDouble() < opts.altererGeomDistParam) - geometricRandom++; - return geometricRandom; - } - - private boolean isAllDistinct(List properties) { - Set ids = new HashSet<>(); - for (TableCellProperties x : properties) { - if (ids.contains(x.id)) return false; - ids.add(x.id); - } - return true; - } - - private static final Comparator NUMBER_COMP = new Comparator() { - @Override public int compare(TableCellProperties o1, TableCellProperties o2) { - Collection v1 = o1.metadata.get(TableTypeSystem.CELL_NUMBER_VALUE), - v2 = o2.metadata.get(TableTypeSystem.CELL_NUMBER_VALUE); - try { - return DenotationUtils.NumberProcessor.singleton.compareValues(v1.iterator().next(), v2.iterator().next()); - } catch (Exception e) { - throw new ClassCastException(); - } - } - }; - private static final Comparator NUMBER_COMP_REV = Collections.reverseOrder(NUMBER_COMP); - - private static final Comparator DATE_COMP = new Comparator() { - @Override public int compare(TableCellProperties o1, TableCellProperties o2) { - Collection v1 = o1.metadata.get(TableTypeSystem.CELL_DATE_VALUE), - v2 = o2.metadata.get(TableTypeSystem.CELL_DATE_VALUE); - try { - return DenotationUtils.DateProcessor.singleton.compareValues(v1.iterator().next(), v2.iterator().next()); - } catch (Exception e) { - throw new ClassCastException(); - } - } - }; - private static final Comparator DATE_COMP_REV = Collections.reverseOrder(DATE_COMP); - - private static final List>> COMPS = Arrays.asList( - new Pair<>("[N+]", NUMBER_COMP), - new Pair<>("[N-]", NUMBER_COMP_REV), - new Pair<>("[D+]", DATE_COMP), - new Pair<>("[D-]", DATE_COMP_REV) - ); - - private boolean isSorted(List properties, Comparator comparator) { - try { - for (int i = 0; i < properties.size() - 1; i++) { - if (comparator.compare(properties.get(i), properties.get(i+1)) > 0) - return false; - } - return true; - } catch (ClassCastException e) { - return false; - } - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/TableAltererCache.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/TableAltererCache.java deleted file mode 100644 index 1a0bdd3237..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/TableAltererCache.java +++ /dev/null @@ -1,104 +0,0 @@ -package edu.stanford.nlp.sempre.tables.alter; - -import java.io.BufferedReader; -import java.io.File; -import java.io.IOException; -import java.util.*; - -import edu.stanford.nlp.sempre.tables.StringNormalizationUtils; -import edu.stanford.nlp.sempre.tables.TableKnowledgeGraph; -import edu.stanford.nlp.sempre.tables.serialize.TableWriter; -import fig.basic.IOUtils; -import fig.basic.LogInfo; -import fig.basic.Option; - -public class TableAltererCache { - public static class Options { - @Option(gloss = "central path for saving altered tables") - public String baseAlteredTablesDir = null; - @Option(gloss = "path for altered table concat file (to reduce file server load)") - public String alteredTablesConcatCache = null; - } - public static Options opts = new Options(); - - private BufferedReader concatCache = null; - - public TableAltererCache() { - if (opts.alteredTablesConcatCache != null) { - if (opts.baseAlteredTablesDir != null) - throw new RuntimeException("Cannot specify both baseAlteredTablesDir and alteredTablesConcatCache"); - concatCache = IOUtils.openInHard(opts.alteredTablesConcatCache); - } - } - - public boolean existsSaveDir(String id) { - return opts.alteredTablesConcatCache != null - || (opts.baseAlteredTablesDir != null && new File(opts.baseAlteredTablesDir, id).isDirectory()); - } - - public TableKnowledgeGraph load(String id, int alteredTableIndex) { - return load(id, "" + alteredTableIndex); - } - - // Load table from |baseAlteredTablesDir|/nt-??/??.tsv - // or the next chunk of |alteredTablesConcatCache| - public TableKnowledgeGraph load(String id, String alteredTableIndex) { - if (opts.baseAlteredTablesDir == null) { - if (concatCache == null) return null; - try { - String line = concatCache.readLine(); - String[] metadata = line.split("\t"); - if (metadata.length != 3 || !id.equals(metadata[0]) || !alteredTableIndex.equals(metadata[1])) - throw new RuntimeException("Incorrect metadata. Expected " + id + " " + alteredTableIndex + " ___; found " + line); - int numLines = Integer.parseInt(metadata[2]); - if (BatchTableAlterer.opts.verbose >= 1) - LogInfo.logs("Reading %d lines from %s", numLines, opts.alteredTablesConcatCache); - List data = new ArrayList<>(); - for (int i = 0; i < numLines; i++) { - line = concatCache.readLine(); - String[] fields = line.split("\t", -1); // Include trailing spaces - for (int j = 0; j < fields.length; j++) - fields[j] = StringNormalizationUtils.unescapeTSV(fields[j]); - data.add(fields); - } - return new TableKnowledgeGraph(id + "/" + alteredTableIndex + ".tsv", data); - } catch (IOException e) { - e.printStackTrace(); - throw new RuntimeException("Error reading " + opts.alteredTablesConcatCache); - } - } else { - File tablePath = new File(new File(opts.baseAlteredTablesDir, id), alteredTableIndex + ".tsv"); - if (!tablePath.exists()) return null; - if (BatchTableAlterer.opts.verbose >= 1) - LogInfo.logs("Reading from %s", tablePath.getPath()); - try { - return TableKnowledgeGraph.fromRootedFilename(tablePath.getPath()); - } catch (Exception e) { - LogInfo.warnings("Error reading %s: %s", tablePath.getPath(), e); - return null; - } - } - } - - public void dump(TableKnowledgeGraph graph, String id, int alteredTableIndex) { - dump(graph, id, "" + alteredTableIndex); - } - - // Dump table to |baseAlteredTablesDir|/nt-??/??.tsv - public void dump(TableKnowledgeGraph graph, String id, String alteredTableIndex) { - if (opts.baseAlteredTablesDir == null) - throw new RuntimeException("cannot dump if baseAlteredTablesDir = null"); - File outDir = new File(opts.baseAlteredTablesDir, id); - outDir.mkdirs(); - new TableWriter(graph).writeTSV(new File(outDir, alteredTableIndex + ".tsv").getPath()); - } - - // Dump tables to |baseAlteredTablesDir|/nt-??/??.tsv - public void dump(List graphs, String id) { - for (int i = 0; i < graphs.size(); i++) { - dump(graphs.get(i), id, i); - } - } - - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/TurkEquivalentClassInfo.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/TurkEquivalentClassInfo.java deleted file mode 100644 index 87c5c6324a..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/TurkEquivalentClassInfo.java +++ /dev/null @@ -1,124 +0,0 @@ -package edu.stanford.nlp.sempre.tables.alter; - -import java.io.*; -import java.util.*; - -import edu.stanford.nlp.sempre.Value; -import edu.stanford.nlp.sempre.Values; - -/** - * Store information about equivalent classes inferred by Turked data for a single Example. - * - * Fields: - * - Example ID - * - Number of derivations from dump file - * - Number of equivalent classes - * - All tables that have been sent to Turk - * - All tables with agreed answers in Turked data - * - Original table verification - * - Target value from dataset (as Value) - * - Target value from Turk (as Value, or null if disagreed) - * - Whether the target values match - * - Matching equivalent classes - * - Number of derivations matched - * - Number of classes matched - * - Tables that can be Turked to distinguish the remaining classes - * - * @author ppasupat - */ -public class TurkEquivalentClassInfo { - - public static final String[] FIELD_NAMES = new String[] { - "id", "numDerivs", "numClasses", "allTurkedTables", "agreedTurkedTables", - "origTableTarget", "origTableTurkedTarget", "origTableFlag", - "numDerivsMatched", "numClassesMatched" - }; - - public static String getHeader() { - return String.join("\t", FIELD_NAMES); - } - - public static void dumpHeader(PrintWriter writer) { - writer.println(getHeader()); - writer.flush(); - } - - public String id; - public List allTurkedTables, agreedTurkedTables; - public Value origTableTarget, origTableTurkedTarget; - public String origTableFlag; - public int numDerivs, numClasses, numDerivsMatched, numClassesMatched; - - @Override - public String toString() { - String[] fields = new String[FIELD_NAMES.length]; - fields[0] = id; - fields[1] = "" + numDerivs; - fields[2] = "" + numClasses; - fields[3] = "" + allTurkedTables; - fields[4] = "" + agreedTurkedTables; - fields[5] = origTableTarget == null ? "null" : origTableTarget.toString(); - fields[6] = origTableTurkedTarget == null ? "null" : origTableTurkedTarget.toString(); - fields[7] = origTableFlag; - fields[8] = "" + numDerivsMatched; - fields[9] = "" + numClassesMatched; - return String.join("\t", fields); - } - - public void dump(PrintWriter writer) { - writer.println(toString()); - writer.flush(); - } - - // ============================================================ - // Read from String or file - // ============================================================ - - public static TurkEquivalentClassInfo fromString(String line) { - TurkEquivalentClassInfo info = new TurkEquivalentClassInfo(); - String[] fields = line.split("\t"); - info.id = fields[0]; - info.numDerivs = Integer.parseInt(fields[1]); - info.numClasses = Integer.parseInt(fields[2]); - info.allTurkedTables = readIntegerList(fields[3]); - info.agreedTurkedTables = readIntegerList(fields[4]); - info.origTableTarget = readValue(fields[5]); - info.origTableTurkedTarget = readValue(fields[6]); - info.origTableFlag = fields[7]; - info.numDerivsMatched = Integer.parseInt(fields[8]); - info.numClassesMatched = Integer.parseInt(fields[9]); - return info; - } - - private static List readIntegerList(String x) { - x = x.replaceAll("\\[|\\]", "").trim(); - List answer = new ArrayList<>(); - if (!x.isEmpty()) - for (String y : x.split(",")) - answer.add(Integer.parseInt(y.trim())); - Collections.sort(answer); - return answer; - } - - private static Value readValue(String x) { - if ("null".equals(x)) return null; - if ("ERROR".equals(x)) return ValueCanonicalizer.ERROR; - return Values.fromString(x); - } - - public static Map fromFile(String filename) { - Map map = new HashMap<>(); - try (BufferedReader in = new BufferedReader(new FileReader(filename))) { - String line; - while ((line = in.readLine()) != null) { - if (line.startsWith("id\tnumDerivs")) continue; // Skip header - TurkEquivalentClassInfo info = TurkEquivalentClassInfo.fromString(line); - map.put(info.id, info); - } - } catch (IOException e) { - throw new RuntimeException(e); - } - return map; - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/ValueCanonicalizer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/ValueCanonicalizer.java deleted file mode 100644 index 740b6d8e83..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/alter/ValueCanonicalizer.java +++ /dev/null @@ -1,35 +0,0 @@ -package edu.stanford.nlp.sempre.tables.alter; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; - -public class ValueCanonicalizer { - - public static final ErrorValue ERROR = new ErrorValue("ERROR"); - - public static Value canonicalize(Value value) { - if (value instanceof ErrorValue) { - return ERROR; - } else if (value instanceof ListValue) { - List stuff = ((ListValue) value).values; - List canonical = new ArrayList<>(); - for (Value x : stuff) { - if (x instanceof DateValue) { - DateValue date = (DateValue) x; - if (date.month == -1 && date.day == -1) - canonical.add(new NumberValue(date.year)); - else - canonical.add(x); - } else { - canonical.add(x); - } - } - ListValue canonList = new ListValue(canonical).getUnique(); - return (canonList.values.size() == 1) ? canonList.values.get(0) : canonList; - } else { - return value; // Probably infinite value - } - } - -} \ No newline at end of file diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/baseline/TableBaselineFeatureComputer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/baseline/TableBaselineFeatureComputer.java deleted file mode 100644 index 628ec91fda..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/baseline/TableBaselineFeatureComputer.java +++ /dev/null @@ -1,111 +0,0 @@ -package edu.stanford.nlp.sempre.tables.baseline; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.*; -import edu.stanford.nlp.sempre.tables.features.*; -import fig.basic.*; - -/** - * Compute features for BaselineParser - * - * @author ppasupat - */ -public class TableBaselineFeatureComputer implements FeatureComputer { - public static class Options { - @Option(gloss = "Verbosity") public int verbosity = 0; - } - public static Options opts = new Options(); - - @Override - public void extractLocal(Example ex, Derivation deriv) { - if (!deriv.isRoot(ex.numTokens())) return; - if (!FeatureExtractor.containsDomain("table-baseline")) return; - List phraseInfos = PhraseInfo.getPhraseInfos(ex); - // Find the list of all entities mentioned in the question - Set mentionedEntities = new HashSet<>(), mentionedProperties = new HashSet<>(); - for (PhraseInfo phraseInfo : phraseInfos) { - for (String s : phraseInfo.fuzzyMatchedPredicates) { - // s is either an ENTITY or a BINARY - SemType entityType = TableTypeSystem.getEntityTypeFromId(s); - SemType propertyType = TableTypeSystem.getPropertyTypeFromId(s); - if (entityType != null) mentionedEntities.add(s); - if (propertyType != null) mentionedProperties.add(s); - } - } - // Find the base cell(s) - TableKnowledgeGraph graph = (TableKnowledgeGraph) ex.context.graph; - List values = ((ListValue) deriv.value).values; - if (opts.verbosity >= 2) LogInfo.logs("%s", values); - if (values.get(0) instanceof NumberValue) { - values = graph.joinSecond(TableTypeSystem.CELL_NUMBER_VALUE, values); - } else if (values.get(0) instanceof DateValue) { - values = graph.joinSecond(TableTypeSystem.CELL_DATE_VALUE, values); - } else { - values = new ArrayList<>(values); - } - if (opts.verbosity >= 2) LogInfo.logs("%s", values); - List predictedEntities = new ArrayList<>(); - for (Value value : values) { - predictedEntities.add(((NameValue) value).id); - } - // Define features - for (String predicted : predictedEntities) { - String pProp = TableTypeSystem.getPropertyOfEntity(predicted); - List pRows = graph.getRowsOfCellId(predicted); - if (opts.verbosity >= 2) LogInfo.logs("[p] %s %s %s", predicted, pProp, pRows); - for (String mentioned : mentionedEntities) { - String mProp = TableTypeSystem.getPropertyOfEntity(mentioned); - List mRows = graph.getRowsOfCellId(mentioned); - if (opts.verbosity >= 2) LogInfo.logs("[m] %s %s %s", mentioned, mProp, mRows); - // Same column as ENTITY + offset - if (pProp != null && mProp != null && pProp.equals(mProp)) { - defineAllFeatures(deriv, "same-column", phraseInfos); - if (pRows != null && pRows.size() == 1 && mRows != null && mRows.size() == 1) { - defineAllFeatures(deriv, "same-column;offset=" + (pRows.get(0) - mRows.get(0)), phraseInfos); - } - } - // Same row as ENTITY - if (mRows != null && pRows != null) { - for (int pRow : pRows) { - if (mRows.contains(pRow)) { - defineAllFeatures(deriv, "same-row", phraseInfos); - break; - } - } - } - } - for (String mentioned : mentionedProperties) { - // match column name BINARY - if (opts.verbosity >= 2) LogInfo.logs("%s %s", pProp, mentioned); - if (mentioned.equals(pProp)) { - defineAllFeatures(deriv, "match-column-binary", phraseInfos); - } - } - // Row index (first or last) - if (pRows != null && pRows.contains(0)) - defineAllFeatures(deriv, "first-row", phraseInfos); - if (pRows != null && pRows.contains(graph.numRows() - 1)) - defineAllFeatures(deriv, "last-row", phraseInfos); - } - - } - - private void defineAllFeatures(Derivation deriv, String name, List phraseInfos) { - defineUnlexicalizedFeatures(deriv, name); - defineLexicalizedFeatures(deriv, name, phraseInfos); - } - - private void defineUnlexicalizedFeatures(Derivation deriv, String name) { - deriv.addFeature("table-baseline", name); - } - - private void defineLexicalizedFeatures(Derivation deriv, String name, List phraseInfos) { - for (PhraseInfo phraseInfo : phraseInfos) { - deriv.addFeature("table-baseline", "phrase=" + phraseInfo.lemmaText + ";" + name); - } - } - -} - diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/baseline/TableBaselineParser.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/baseline/TableBaselineParser.java deleted file mode 100644 index 5e7325fefa..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/baseline/TableBaselineParser.java +++ /dev/null @@ -1,75 +0,0 @@ -package edu.stanford.nlp.sempre.tables.baseline; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.*; -import fig.basic.*; - -/** - * Baseline parser for table. - * - * Choose the answer from a table cell. - * - * @author ppasupat - */ -public class TableBaselineParser extends Parser { - - public TableBaselineParser(Spec spec) { - super(spec); - } - - @Override - public ParserState newParserState(Params params, Example ex, boolean computeExpectedCounts) { - return new TableBaselineParserState(this, params, ex, computeExpectedCounts); - } - -} - -/** - * Actual logic for generating candidates. - */ -class TableBaselineParserState extends ParserState { - - public TableBaselineParserState(Parser parser, Params params, Example ex, boolean computeExpectedCounts) { - super(parser, params, ex, computeExpectedCounts); - } - - @Override - public void infer() { - LogInfo.begin_track("TableBaselineParser.infer()"); - // Add all entities and possible normalizations to the list of candidates - TableKnowledgeGraph graph = (TableKnowledgeGraph) ex.context.graph; - for (Formula f : graph.getAllFormulas(FuzzyMatchFn.FuzzyMatchFnMode.ENTITY)) { - buildAllDerivations(f); - } - // Execute + Compute expected counts - ensureExecuted(); - if (computeExpectedCounts) { - expectedCounts = new HashMap<>(); - ParserState.computeExpectedCounts(predDerivations, expectedCounts); - } - LogInfo.end_track(); - } - - private void buildAllDerivations(Formula f) { - generateDerivation(f); - // Try number and date normalizations as well - generateDerivation(new JoinFormula(Formula.fromString("!" + TableTypeSystem.CELL_NUMBER_VALUE.id), f)); - generateDerivation(new JoinFormula(Formula.fromString("!" + TableTypeSystem.CELL_DATE_VALUE.id), f)); - } - - private void generateDerivation(Formula f) { - Derivation deriv = new Derivation.Builder() - .cat(Rule.rootCat).start(-1).end(-1) - .formula(f).children(Collections.emptyList()) - .type(TypeInference.inferType(f)) - .createDerivation(); - deriv.ensureExecuted(parser.executor, ex.context); - if (deriv.value instanceof ErrorValue) return; - if (deriv.value instanceof ListValue && ((ListValue) deriv.value).values.isEmpty()) return; - if (!deriv.isFeaturizedAndScored()) featurizeAndScoreDerivation(deriv); - predDerivations.add(deriv); - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/dpd/DPDErrorValue.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/dpd/DPDErrorValue.java deleted file mode 100644 index aac01687dc..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/dpd/DPDErrorValue.java +++ /dev/null @@ -1,70 +0,0 @@ -package edu.stanford.nlp.sempre.tables.dpd; - -import edu.stanford.nlp.sempre.*; -import fig.basic.LispTree; - -/** - * Represent a partial formula (a lambda) obtained by applying the rule on the children. - * - * @author ppasupat - */ -public class DPDErrorValue extends Value { - - public final Rule rule; - public final Value child1, child2; - public final Formula formula; - public final int hashCode; - - public DPDErrorValue(Derivation deriv, Rule rule, Value child1, Value child2) { - if (child1 == null && child2 != null) - throw new RuntimeException("Cannot have child1 == null while child2 == " + child2); - this.rule = rule; - this.child1 = child1; - this.child2 = child2; - if (child1 == null && child2 == null) - // No children: Use the formula instead of the children - formula = deriv.formula; - else - formula = null; - this.hashCode = rule.hashCode() - + (child1 == null ? 0 : child1.hashCode()) * 359 - + (child2 == null ? 0 : child2.hashCode()) * 438 - + (formula == null ? 0 : formula.hashCode()) * 502; - } - - public DPDErrorValue(Derivation deriv, Rule rule, Derivation child1, Derivation child2) { - this(deriv, rule, child1 == null ? null : child1.value, child2 == null ? null : child2.value); - } - - @Override - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("dperror"); - tree.addChild(rule.toLispTree()); - if (child1 != null) - tree.addChild(child1.toLispTree()); - if (child2 != null) - tree.addChild(child2.toLispTree()); - if (child1 == null && child2 == null) - tree.addChild(formula.toLispTree()); - return tree; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - DPDErrorValue that = (DPDErrorValue) o; - if (rule != that.rule) return false; - if ((child1 == null && that.child1 != null) || (child1 != null && !child1.equals(that.child1))) return false; - if ((child2 == null && that.child2 != null) || (child2 != null && !child2.equals(that.child2))) return false; - if ((formula == null && that.formula != null) || (formula != null && !formula.equals(that.formula))) return false; - return true; - } - - @Override - public int hashCode() { - return hashCode; - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/dpd/DPDParser.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/dpd/DPDParser.java deleted file mode 100644 index 8c2a8e96df..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/dpd/DPDParser.java +++ /dev/null @@ -1,1146 +0,0 @@ -package edu.stanford.nlp.sempre.tables.dpd; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.DenotationTypeInference; -import edu.stanford.nlp.sempre.tables.InfiniteListValue; -import edu.stanford.nlp.sempre.tables.ScopedValue; -import edu.stanford.nlp.sempre.tables.TableDerivationPruningComputer; -import edu.stanford.nlp.sempre.tables.grow.ApplyFn; -import fig.basic.*; - -/** - * A DPDParser parses utterances like a FloatingParser, but the dynamic programming states - * also include the *denotation* in addition to (start,end) or depth/size. - * - * DPDParser makes 2 passes: - * - Pass 1: Find the parse paths that lead to the correct final denotation. - * - Pass 2: Use regular beam search (from FloatingParser), but only restrict - * the parse paths to the ones found in Pass 1. - * - * @author ppasupat - */ -public class DPDParser extends FloatingParser { - public static class Options { - @Option(gloss = "Use the targetValue at test time") - public boolean cheat = false; - @Option(gloss = "During training, use FloatingParser instead (don't use DPDParser at all)") - public boolean useFloatingParserForTrain = false; - @Option(gloss = "During training, combine the derivation list from FloatingParser") - public boolean combineFromFloatingParser = false; - @Option(gloss = "Random object for shuffling the derivation list") - public Random shuffleRandom = new Random(1); - @Option(gloss = "Custom maximum depth for DPDParser (default = FloatingParser's maxDepth)") - public int dpdParserMaxDepth = -1; - @Option(gloss = "Custom beam size for DPDParser (default = FloatingParser's beamSize)") - public int dpdParserBeamSize = -1; - @Option(gloss = "Prune the cells in first pass") - public int firstPassBeamSize = -1; - @Option(gloss = "Stop the current pass if the number of (cell, denotation) pairs exceeds this number") - public int maxNumCellDenotations = 5000000; - @Option(gloss = "Stop the current pass if it has used more than this amount of time (in seconds)") - public int maxDPDParsingTime = 600; - @Option(gloss = "Allowed pruning strategies in first pass (must not depend on actual formulas)") - public List allowedPrunersInFirstPass = new ArrayList<>(Arrays.asList( - DefaultDerivationPruningComputer.emptyDenotation, - DefaultDerivationPruningComputer.nonLambdaError, - DefaultDerivationPruningComputer.mistypedMerge, - DefaultDerivationPruningComputer.badSummarizerHead, - TableDerivationPruningComputer.lambdaDCSError, - TableDerivationPruningComputer.subsetMerge, - TableDerivationPruningComputer.sameMark, - TableDerivationPruningComputer.subsetMerge, - TableDerivationPruningComputer.aggregateInfinite, - TableDerivationPruningComputer.aggregateUncomparable - )); - @Option(gloss = "Use all pruning strategies if only one formula can produce the denotation") - public boolean aggressivelyPruneSingleFormulas = true; - // Debugging flags - @Option(gloss = "DEBUG: Put the cell name in the canonical utterance of final derivations") - public boolean putCellNameInCanonicalUtterance = false; - @Option(gloss = "DEBUG: Dump denotations after each pass") - public DumpSpec dumpDenotations = DumpSpec.NONE; - @Option(gloss = "DEBUG: Dump allowed ingredients after the first pass") - public boolean dumpAllowedIngredients = false; - @Option(gloss = "DEBUG: Summarize denotations after each pass") - public boolean summarizeDenotations = false; - @Option(gloss = "DEBUG: Count the number of useful unique-denotations and cell-denotations") - public boolean summarizeCountUseful = false; - @Option(gloss = "DEBUG: Do not do first pass; allow any ingredients in the second pass") - public boolean ignoreFirstPass = false; - } - public static Options opts = new Options(); - - public static enum DumpSpec { NONE, UNIQUE, NONERROR, ALL, FORMULA } - - public DPDParser(Spec spec) { - super(spec); - } - - @Override - public ParserState newParserState(Params params, Example ex, boolean computeExpectedCounts) { - if (computeExpectedCounts) { // Training - // Only use floating? - if (opts.useFloatingParserForTrain) - return super.newParserState(params, ex, computeExpectedCounts); - // Use mixture? - if (opts.combineFromFloatingParser) - return new DPDParserState(this, params, ex, computeExpectedCounts, - super.newParserState(params, ex, computeExpectedCounts)); - // Otherwise, just use DPDParser (look at denotation) - return new DPDParserState(this, params, ex, computeExpectedCounts); - } else { // Test - // Cheat by looking at denotation? - if (opts.cheat) - return new DPDParserState(this, params, ex, computeExpectedCounts); - // Otherwise, don't cheat and just use floating parser - return super.newParserState(params, ex, computeExpectedCounts); - } - } - -} - -/** - * Actual parsing logic. - */ -class DPDParserState extends ParserState { - - private final DerivationPruner pruner; - private final int maxDepth, beamSize; - private final CatSizeBound catSizeBound; - private final ParserState backoffParserState; - private long firstPassParseTime, secondPassParseTime; - private boolean timeout = false; - - private Map ruleTime; - - public DPDParserState(DPDParser parser, Params params, Example ex, boolean computeExpectedCounts) { - this(parser, params, ex, computeExpectedCounts, null); - } - - public DPDParserState(DPDParser parser, Params params, Example ex, boolean computeExpectedCounts, ParserState backoff) { - super(parser, params, ex, computeExpectedCounts); - pruner = new DerivationPruner(this); - maxDepth = DPDParser.opts.dpdParserMaxDepth > 0 ? DPDParser.opts.dpdParserMaxDepth : FloatingParser.opts.maxDepth; - beamSize = DPDParser.opts.dpdParserBeamSize > 0 ? DPDParser.opts.dpdParserBeamSize : Parser.opts.beamSize; - catSizeBound = new CatSizeBound(maxDepth, parser.grammar); - backoffParserState = backoff; - } - - @Override - protected int getBeamSize() { return beamSize; } - - protected void ensureExecuted(Derivation deriv) { - deriv.ensureExecuted(parser.executor, ex.context); - if (!deriv.isFeaturizedAndScored() && currentPass != ParsingPass.FIRST) - featurizeAndScoreDerivation(deriv); - } - - // ============================================================ - // Dynamic programming cells - // ============================================================ - - // Pass 1: Just try to reach the correct denotation - // state name => denotation => FirstPassData - private final Map> firstPassCells = new HashMap<>(); - // Pass 2: Using results from Pass 1 to prune the possible formulas - // state name => denotation => SecondPassData - private final Map> secondPassCells = new HashMap<>(); - - enum ParsingPass { FIRST, SECOND, DONE }; - ParsingPass currentPass = ParsingPass.FIRST; - - private Map> getCellsForCurrentPass() { - return currentPass == ParsingPass.FIRST ? firstPassCells : secondPassCells; - } - - // ============================================================ - // DenotationIngredient - // ============================================================ - - // Represents a possible method for creating a particular denotation in a particular cell. - class Ingredient { - public final String parentCell; - public final Rule rule; - public final Value child1, child2; - private final int hashCode; - - public Ingredient(String parentCell, Rule rule, Derivation deriv1, Derivation deriv2) { - this.parentCell = parentCell; - this.rule = rule; - if (deriv1 == null) { - this.child1 = null; - } else { - ensureExecuted(deriv1); - this.child1 = deriv1.value; - } - if (deriv2 == null) { - this.child2 = null; - } else { - ensureExecuted(deriv2); - this.child2 = deriv2.value; - } - hashCode = parentCell.hashCode() - + ((rule == null) ? 0 : rule.hashCode() * 1729) - + ((child1 == null) ? 0 : child1.hashCode() * 42) - + ((child2 == null) ? 0 : child2.hashCode() * 345); - } - - public Ingredient(String parentCell) { - this(parentCell, null, null, null); - } - - @Override - public boolean equals(Object o) { - if (!(o instanceof Ingredient)) return false; - Ingredient that = (Ingredient) o; - if (!parentCell.equals(that.parentCell)) return false; - if (rule != that.rule) return false; // Rules must be the same object - if ((child1 == null && that.child1 != null) || (child1 != null && !child1.equals(that.child1))) return false; - if ((child2 == null && that.child2 != null) || (child2 != null && !child2.equals(that.child2))) return false; - return true; - } - - @Override - public int hashCode() { return hashCode; } - - @Override - public String toString() { - String cellName = parentCell; - if (cellName.contains(":")) { - String[] parts = cellName.split(":"); - assert parts.length == 2; - cellName = String.format("&%2s:%s", parts[1], parts[0]); - } - return new StringBuilder().append("[ ").append(cellName).append(" | ").append(rule) - .append(" | ").append(child1).append(" | ").append(child2).append(" ]").toString(); - } - } - - private final Set allowedIngredients = new HashSet<>(); - - // ============================================================ - // BackPointer - // ============================================================ - - // Back pointer for dynamic programming. Points to a child cell. - class BackPointer { - public final String cell; - public final Value value; - - public BackPointer(String cell, Value denotation) { - this.cell = cell; - this.value = denotation; - } - - @Override - public boolean equals(Object o) { - if (!(o instanceof BackPointer)) return false; - BackPointer that = (BackPointer) o; - return cell.equals(that.cell) && value.equals(that.value); - } - - @Override - public int hashCode() { - return cell.hashCode() * 100 + value.hashCode(); - } - - @Override - public String toString() { - return cell + " " + value; - } - - public boolean isSingleFormula() { - return getCellsForCurrentPass().get(cell).get(value).singleFormula; - } - } - - public BackPointer getBackPointer(String cell, Derivation child) { - if (cell == null || child == null) return null; - ensureExecuted(child); - return new BackPointer(cell, child.value); - } - - // ============================================================ - // Metadata - // ============================================================ - - // Stores derivations and other data - class Metadata { - public final Value value; - // List of possible parse paths to create this value - public Set possibleIngredients = new HashSet<>(); - // Backpointers for backtracking after the first pass - public Set backPointers = new HashSet<>(); - // All derivations containing formulas that execute to the denotation. - // For the FIRST pass, this has only 1 formula. - // For the SECOND pass, this will eventually be pruned to the beam size. - public List derivations = new ArrayList<>(); - // Whether there is only one possible formula that executes to the denotation. - // (only used during the first pass) - // If so, we can apply any pruning heuristic on the formulas built upon this formula. - public boolean singleFormula = true; - - public Metadata(Value value) { - this.value = value; - } - - public void add(Derivation deriv, Ingredient ingredient, BackPointer bp1, BackPointer bp2) { - if (currentPass == ParsingPass.FIRST) { - if (derivations.isEmpty()) { - if (Parser.opts.verbose >= 3) - LogInfo.logs("Metadata.add: %s %s", value, deriv); - derivations.add(deriv); - } else if (!derivations.get(0).formula.equals(deriv.formula)) { - singleFormula = false; - } - if (Parser.opts.verbose >= 3) { - LogInfo.logs("possibleIngredients.add: %s", ingredient); - } - possibleIngredients.add(ingredient); - if (bp1 != null) { - backPointers.add(bp1); - if (!bp1.isSingleFormula()) singleFormula = false; - } - if (bp2 != null) { - backPointers.add(bp2); - if (!bp2.isSingleFormula()) singleFormula = false; - } - } else if (currentPass == ParsingPass.SECOND) { - derivations.add(deriv); - singleFormula = (derivations.size() == 1); - } - } - } - - // ============================================================ - // Add to Chart - // ============================================================ - - private void addToChart(Derivation deriv, Ingredient ingredient, - BackPointer bp1, BackPointer bp2) { - if (Parser.opts.verbose >= 3) - LogInfo.logs("addToChart %s %s: %s", ingredient.parentCell, deriv.value, deriv); - ensureExecuted(deriv); - Map> cells = getCellsForCurrentPass(); - Map denotationToData = cells.get(ingredient.parentCell); - if (denotationToData == null) - cells.put(ingredient.parentCell, denotationToData = new HashMap<>()); - Metadata metadata = denotationToData.get(deriv.value); - if (metadata == null) - denotationToData.put(deriv.value, metadata = new Metadata(deriv.value)); - metadata.add(deriv, ingredient, bp1, bp2); - } - - private String anchoredCell(String cat, int start, int end) { - return (cat + "[" + start + "," + end + "]").intern(); - } - - private String floatingCell(String cat, int depth) { - return (cat + ":" + depth).intern(); - } - - // ============================================================ - // Apply Rule - // ============================================================ - - private boolean isRootRule(Rule rule) { - return Rule.rootCat.equals(rule.lhs); - } - - private boolean applyRule(Rule rule, int start, int end, int depth, - String cell1, Derivation child1, String cell2, Derivation child2) { - if (timeout && !isRootRule(rule)) return false; - applyRuleActual(rule, start, end, depth, cell1, child1, cell2, child2); - return true; - } - - private void applyRuleActual(Rule rule, int start, int end, int depth, - String cell1, Derivation child1, String cell2, Derivation child2) { - if (Parser.opts.verbose >= 5) - LogInfo.logs("applyRule %s [%s:%s] depth=%s, %s %s", rule, start, end, depth, child1, child2); - - Ingredient anchoredIngredient = null, floatingIngredient = null; - if (depth == -1) { - anchoredIngredient = new Ingredient(anchoredCell(rule.lhs, start, end), rule, child1, child2); - floatingIngredient = new Ingredient(floatingCell(rule.lhs, 0), rule, child1, child2); - } else { - floatingIngredient = new Ingredient(floatingCell(rule.lhs, depth), rule, child1, child2); - } - if (currentPass == ParsingPass.SECOND && !DPDParser.opts.ignoreFirstPass) { - // Prune invalid ingredient - if (!allowedIngredients.contains(anchoredIngredient) - && !allowedIngredients.contains(floatingIngredient)) return; - } - BackPointer bp1 = getBackPointer(cell1, child1), bp2 = getBackPointer(cell2, child2); - boolean singleFormula = (bp1 == null || bp1.isSingleFormula()) && (bp2 == null || bp2.isSingleFormula()); - - List children; - if (child1 == null) // 0-ary - children = Collections.emptyList(); - else if (child2 == null) // 1-ary - children = Collections.singletonList(child1); - else { - children = ListUtils.newList(child1, child2); - // optionally: ensure that specific anchors are only used once (or K times) per final derivation - // Cannot impose useAnchorsOnce on the first pass without dropping correct derivations! - if (currentPass != ParsingPass.FIRST) { - if (FloatingParser.opts.useAnchorsOnce) { - if (FloatingRuleUtils.derivationAnchorsOverlap(child1, child2)) - return; - } else if (FloatingParser.opts.useMaxAnchors >= 0) { - if (FloatingRuleUtils.maxNumAnchorOverlaps(child1, child2) - > FloatingParser.opts.useMaxAnchors) - return; - } - } - } - - // Call the semantic function on the children and read the results - DerivationStream results = rule.sem.call(ex, - new SemanticFn.CallInfo(rule.lhs, start, end, rule, children)); - while (results.hasNext()) { - Derivation newDeriv = results.next(); - newDeriv = newDeriv.betaReduction(); - if (DPDParser.opts.aggressivelyPruneSingleFormulas) { - if (currentPass == ParsingPass.FIRST && singleFormula) { - if (pruner.isPruned(newDeriv, null)) continue; - } else { - if (pruner.isPruned(newDeriv)) continue; - } - } else { - if (pruner.isPruned(newDeriv)) continue; - } - if (newDeriv.value instanceof ErrorValue) { - // Assign canonical error value - newDeriv.value = new DPDErrorValue(newDeriv, rule, child1, child2); - } - if (depth == -1) { - // Anchored rule - addToChart(newDeriv, anchoredIngredient, bp1, bp2); - addToChart(newDeriv, floatingIngredient, bp1, bp2); - } else { - // Floating rule - addToChart(newDeriv, floatingIngredient, bp1, bp2); - } - } - } - - private boolean applyAnchoredRule(Rule rule, int start, int end) { - return applyRule(rule, start, end, -1, null, null, null, null); - } - private boolean applyAnchoredRule(Rule rule, int start, int end, - String cell1, Derivation child1) { - return applyRule(rule, start, end, -1, cell1, child1, null, null); - } - private boolean applyAnchoredRule(Rule rule, int start, int end, - String cell1, Derivation child1, String cell2, Derivation child2) { - return applyRule(rule, start, end, -1, cell1, child1, cell2, child2); - } - - private boolean applyFloatingRule(Rule rule, int depth) { - return applyRule(rule, -1, -1, depth, null, null, null, null); - } - private boolean applyFloatingRule(Rule rule, int depth, - String cell1, Derivation child1) { - return applyRule(rule, -1, -1, depth, cell1, child1, null, null); - } - private boolean applyFloatingRule(Rule rule, int depth, - String cell1, Derivation child1, String cell2, Derivation child2) { - return applyRule(rule, -1, -1, depth, cell1, child1, cell2, child2); - } - - // ============================================================ - // Get derivations - // ============================================================ - - private List getDerivations(Object cell) { - Map> cells = getCellsForCurrentPass(); - Map denotationToData = cells.get(cell); - if (denotationToData == null) return Collections.emptyList(); - List derivations = new ArrayList<>(); - for (Metadata metadata : denotationToData.values()) { - derivations.addAll(metadata.derivations); - } - return derivations; - } - - /** - * Return a collection of ChildDerivationsGroup. - * - * The rule should be applied on all derivations (or all pairs of derivations) in each ChildDerivationsGroup. - */ - private Collection getFilteredDerivations(Rule rule, String cell1, String cell2) { - List derivations1 = getDerivations(cell1), - derivations2 = (cell2 == null) ? null : getDerivations(cell2); - if (!FloatingParser.opts.filterChildDerivations) - return Collections.singleton(new ChildDerivationsGroup(derivations1, derivations2)); - // Try to filter down the number of partial logical forms - if (rule.getSem().supportFilteringOnTypeData()) - return rule.getSem().getFilteredDerivations(derivations1, derivations2); - return Collections.singleton(new ChildDerivationsGroup(derivations1, derivations2)); - } - - private Collection getFilteredDerivations(Rule rule, String cell) { - return getFilteredDerivations(rule, cell, null); - } - - // ============================================================ - // Build Anchored - // ============================================================ - - // Build derivations over span |start|, |end|. - private void buildAnchored(int start, int end) { - // Apply unary tokens on spans (rule $A (a)) - for (Rule rule : parser.grammar.getRules()) { - if (timeout && !isRootRule(rule)) continue; - if (!rule.isAnchored()) continue; - if (rule.rhs.size() != 1 || rule.isCatUnary()) continue; - boolean match = (end - start == 1) && ex.token(start).equals(rule.rhs.get(0)); - if (!match) continue; - - StopWatch stopWatch = new StopWatch().start(); - applyAnchoredRule(rule, start, end); - ruleTime.put(rule, ruleTime.getOrDefault(rule, 0L) + stopWatch.stop().ms); - } - - // Apply binaries on spans (rule $A ($B $C)), ... - for (int mid = start + 1; mid < end; mid++) { - for (Rule rule : parser.grammar.getRules()) { - if (timeout && !isRootRule(rule)) continue; - if (!rule.isAnchored()) continue; - if (rule.rhs.size() != 2) continue; - - StopWatch stopWatch = new StopWatch().start(); - String rhs1 = rule.rhs.get(0); - String rhs2 = rule.rhs.get(1); - boolean match1 = (mid - start == 1) && ex.token(start).equals(rhs1); - boolean match2 = (end - mid == 1) && ex.token(mid).equals(rhs2); - - if (!Rule.isCat(rhs1) && Rule.isCat(rhs2)) { // token $Cat - if (match1) { - String cell = anchoredCell(rhs2, mid, end); - List derivations = getDerivations(cell); - for (Derivation deriv : derivations) - if (!applyAnchoredRule(rule, start, end, cell, deriv)) break; - } - } else if (Rule.isCat(rhs1) && !Rule.isCat(rhs2)) { // $Cat token - if (match2) { - String cell = anchoredCell(rhs1, start, mid); - List derivations = getDerivations(cell); - for (Derivation deriv : derivations) - if (!applyAnchoredRule(rule, start, end, cell, deriv)) break; - } - } else if (!Rule.isCat(rhs1) && !Rule.isCat(rhs2)) { // token token - if (match1 && match2) - if (!applyAnchoredRule(rule, start, end)) break; - } else { // $Cat $Cat - String cell1 = anchoredCell(rhs1, start, mid); - String cell2 = anchoredCell(rhs2, mid, end); - List derivations1 = getDerivations(cell1); - List derivations2 = getDerivations(cell2); - derivLoop: - for (Derivation deriv1 : derivations1) - for (Derivation deriv2 : derivations2) - if (!applyAnchoredRule(rule, start, end, cell1, deriv1, cell2, deriv2)) break derivLoop; - } - ruleTime.put(rule, ruleTime.getOrDefault(rule, 0L) + stopWatch.stop().ms); - } - } - - // Apply unary categories on spans (rule $A ($B)) - // Important: do this in topologically sorted order and after all the binaries are done. - for (Rule rule : parser.getCatUnaryRules()) { - if (timeout && !isRootRule(rule)) continue; - if (!rule.isAnchored()) continue; - - StopWatch stopWatch = new StopWatch().start(); - String cell = anchoredCell(rule.rhs.get(0), start, end); - List derivations = getDerivations(cell); - for (Derivation deriv : derivations) { - if (!applyAnchoredRule(rule, start, end, cell, deriv)) break; - } - ruleTime.put(rule, ruleTime.getOrDefault(rule, 0L) + stopWatch.stop().ms); - } - } - - // ============================================================ - // Build Floating - // ============================================================ - - // Build floating derivations of exactly depth |depth|. - private void buildFloating(int depth) { - // Build a floating predicate from thin air - // (rule $A (a)); note that "a" is ignored - if (depth == 0) { - for (Rule rule : parser.grammar.getRules()) { - if (timeout && !isRootRule(rule)) continue; - if (!rule.isFloating()) continue; - if (rule.rhs.size() != 1 || rule.isCatUnary()) continue; - - StopWatch stopWatch = new StopWatch().start(); - applyFloatingRule(rule, depth); - ruleTime.put(rule, ruleTime.getOrDefault(rule, 0L) + stopWatch.stop().ms); - } - } - - // Apply unary categories on spans (rule $A ($B)) - for (Rule rule : parser.getCatUnaryRules()) { - if (timeout && !isRootRule(rule)) continue; - if (!rule.isFloating()) continue; - if (catSizeBound.getBound(rule.lhs) < depth) continue; - - StopWatch stopWatch = new StopWatch().start(); - String cell = floatingCell(rule.rhs.get(0), depth - 1); - derivLoop: - for (ChildDerivationsGroup group : getFilteredDerivations(rule, cell)) - for (Derivation deriv : group.derivations1) - if (!applyFloatingRule(rule, depth, cell, deriv)) break derivLoop; - ruleTime.put(rule, ruleTime.getOrDefault(rule, 0L) + stopWatch.stop().ms); - } - - // Apply binaries on spans (rule $A ($B $C)), ... - for (Rule rule : parser.grammar.getRules()) { - if (timeout && !isRootRule(rule)) continue; - if (!rule.isFloating()) continue; - if (rule.rhs.size() != 2) continue; - if (catSizeBound.getBound(rule.lhs) < depth) continue; - - StopWatch stopWatch = new StopWatch().start(); - String rhs1 = rule.rhs.get(0); - String rhs2 = rule.rhs.get(1); - if (!Rule.isCat(rhs1) || !Rule.isCat(rhs2)) - throw new RuntimeException("Floating rules with > 1 arguments cannot have tokens on the RHS: " + rule); - - if (FloatingParser.opts.useSizeInsteadOfDepth) { - derivLoop: - for (int depth1 = 0; depth1 < depth; depth1++) { // sizes must add up to depth-1 (actually size-1) - int depth2 = depth - 1 - depth1; - String cell1 = floatingCell(rhs1, depth1); - String cell2 = floatingCell(rhs2, depth2); - for (ChildDerivationsGroup group : getFilteredDerivations(rule, cell1, cell2)) - for (Derivation deriv1 : group.derivations1) - for (Derivation deriv2 : group.derivations2) - if (!applyFloatingRule(rule, depth, cell1, deriv1, cell2, deriv2)) break derivLoop; - } - } else { - { - derivLoop: - for (int subDepth = 0; subDepth < depth; subDepth++) { // depth-1 <=depth-1 - String cell1 = floatingCell(rhs1, depth - 1); - String cell2 = floatingCell(rhs2, subDepth); - for (ChildDerivationsGroup group : getFilteredDerivations(rule, cell1, cell2)) - for (Derivation deriv1 : group.derivations1) - for (Derivation deriv2 : group.derivations2) - if (!applyFloatingRule(rule, depth, cell1, deriv1, cell2, deriv2)) break derivLoop; - } - } - { - derivLoop: - for (int subDepth = 0; subDepth < depth - 1; subDepth++) { // (); - ParserState.computeExpectedCounts(predDerivations, expectedCounts); - } - StopWatchSet.end(); - LogInfo.end_track(); - } - - private void runParsingPass() { - // Create a parsing thread and run for some time - timeout = false; - Thread parsingThread = new Thread(new DPDParserParsingThread()); - parsingThread.start(); - try { - parsingThread.join(DPDParser.opts.maxDPDParsingTime * 1000); - if (parsingThread.isAlive()) { - // This will only interrupt first or second passes, not the final candidate collection. - LogInfo.warnings("Parsing time exceeded %d seconds. Will now interrupt ...", DPDParser.opts.maxDPDParsingTime); - timeout = true; - parsingThread.interrupt(); - parsingThread.join(); - } - } catch (InterruptedException e) { - e.printStackTrace(); - LogInfo.fails("DPDParser error: %s", e); - } - evaluation.add((currentPass == ParsingPass.FIRST ? "first" : "second") + "passTimeout", timeout); - } - - public class DPDParserParsingThread implements Runnable { - @Override - public void run() { - ruleTime = new HashMap<>(); - - Set categories = new HashSet(); - for (Rule rule : parser.grammar.getRules()) - categories.add(rule.lhs); - - // Set the pruner - if (currentPass == ParsingPass.FIRST) { - pruner.setCustomAllowedPruningStrategies(DPDParser.opts.allowedPrunersInFirstPass); - } else { - pruner.setCustomAllowedPruningStrategies(null); // All pruners allowed - } - - // Base case ($TOKEN, $PHRASE, $LEMMA_PHRASE) - // Denotations are StringValue - for (Derivation deriv : gatherTokenAndPhraseDerivations()) { - ensureExecuted(deriv); - addToChart(deriv, new Ingredient(anchoredCell(deriv.cat, deriv.start, deriv.end)), null, null); - } - - // Build up anchored derivations - int numTokens = ex.numTokens(); - for (int len = 1; len <= numTokens; len++) { - for (int i = 0; i + len <= numTokens; i++) { - buildAnchored(i, i + len); - for (String cat : categories) { - if (Rule.rootCat.equals(cat)) continue; - pruneBeam(anchoredCell(cat, i, i + len)); - } - } - } - - // Build up floating derivations - for (int depth = 0; depth <= maxDepth; depth++) { - if (Parser.opts.verbose >= 1) - LogInfo.begin_track("(%s) %s = %d", currentPass, FloatingParser.opts.useSizeInsteadOfDepth ? "SIZE" : "DEPTH", depth); - buildFloating(depth); - for (String cat : categories) { - if (Rule.rootCat.equals(cat)) continue; - pruneBeam(floatingCell(cat, depth)); - } - if (Parser.opts.verbose >= 1) { - Map statistics = countNumCells(getCellsForCurrentPass()); - LogInfo.logs("(%s) %d cells | %d unique-denotations | %d cell-denotations | %d derivations", - currentPass, statistics.get("Cells"), statistics.get("UniqueDenotations"), - statistics.get("CellDenotations"), statistics.get("Derivations")); - LogInfo.end_track(); - } - int numCellDenotations = getNumCellDenotations(), maxNumCellDenotations = DPDParser.opts.maxNumCellDenotations; - if (depth != maxDepth && maxNumCellDenotations >= 0 && maxNumCellDenotations < numCellDenotations) { - LogInfo.logs("Stop parsing: number of (cell, denotation) pairs is %d > %d", - numCellDenotations, maxNumCellDenotations); - break; - } - } - if (DPDParser.opts.dumpDenotations != DPDParser.DumpSpec.NONE) dumpDenotations(getCellsForCurrentPass()); - if (DPDParser.opts.summarizeDenotations) classifyUniqueDenotations(getCellsForCurrentPass()); - if (FloatingParser.opts.summarizeRuleTime) summarizeRuleTime(); - } - } - - // Prune to the beam size - private void pruneBeam(String cell) { - if (currentPass == ParsingPass.FIRST && DPDParser.opts.firstPassBeamSize > 0) { - Map> cells = getCellsForCurrentPass(); - Map denotationToData = cells.get(cell); - if (denotationToData == null || denotationToData.size() <= DPDParser.opts.firstPassBeamSize) return; - // TODO: Prune based on some criteria - if (Parser.opts.verbose >= 1) - LogInfo.logs("Pruning first pass beam: %d => %d", denotationToData.entrySet().size(), DPDParser.opts.firstPassBeamSize); - List> pruned = new ArrayList<>(denotationToData.entrySet()); - denotationToData = new HashMap<>(); - for (Map.Entry entry : pruned.subList(0, DPDParser.opts.firstPassBeamSize)) - denotationToData.put(entry.getKey(), entry.getValue()); - cells.put(cell, denotationToData); - } else if (currentPass == ParsingPass.SECOND) { - Map> cells = getCellsForCurrentPass(); - Map denotationToData = cells.get(cell); - if (denotationToData == null) return; - for (Metadata metadata : denotationToData.values()) { - pruneCell(cell, metadata.derivations); - } - } - } - - // ============================================================ - // Collect ingredients (after FIRST pass) - // ============================================================ - - private void collectPossibleIngredients() { - if (Parser.opts.verbose >= 4) - LogInfo.logs("DPDParserState.collectPossibleIngredients()"); - Set usedBps = new HashSet<>(); - collectPossibleIngredients(anchoredCell(Rule.rootCat, 0, numTokens), usedBps); - for (int depth = 1; depth <= maxDepth; depth++) - collectPossibleIngredients(floatingCell(Rule.rootCat, depth), usedBps); - if (Parser.opts.verbose >= 4 || DPDParser.opts.dumpAllowedIngredients) { - LogInfo.begin_track("allowedDenotationIngredients"); - Set sorted = new TreeSet<>(); - for (Ingredient ingredient : allowedIngredients) - sorted.add(ingredient.toString()); - for (String ingredient : sorted) - LogInfo.logs("%s", ingredient); - LogInfo.end_track(); - } - } - - private void collectPossibleIngredients(String cell, Set usedBps) { - if (Parser.opts.verbose >= 4) - LogInfo.logs("DPDParserState.collectPossibleIngredients(%s)", cell); - Map denotationToMetadata = firstPassCells.get(cell); - if (denotationToMetadata == null) return; - for (Value denotation : denotationToMetadata.keySet()) { - double compatibility = parser.valueEvaluator.getCompatibility(ex.targetValue, denotation); - if (compatibility != 1) continue; - if (Parser.opts.verbose >= 2) - LogInfo.logs("[%f] %s", compatibility, denotationToMetadata.get(denotation).derivations.get(0)); - BackPointer bp = new BackPointer(cell, denotation); - if (!usedBps.contains(bp)) - collectPossibleIngredients(bp, usedBps, 0); - } - } - - private void collectPossibleIngredients(BackPointer bp, Set usedBps, int depth) { - if (Parser.opts.verbose >= 4) - LogInfo.logs("DPDParserState.collectPossibleIngredients(%s)", bp); - usedBps.add(bp); - Map denotationToMetadata = firstPassCells.get(bp.cell); - if (denotationToMetadata == null) return; - Metadata metadata = denotationToMetadata.get(bp.value); - if (metadata == null) return; - allowedIngredients.addAll(metadata.possibleIngredients); - if (Parser.opts.verbose >= 4) - LogInfo.logs("Adding %s", metadata.possibleIngredients); - // Recurse - for (BackPointer childBp : metadata.backPointers) { - if (!usedBps.contains(childBp)) - collectPossibleIngredients(childBp, usedBps, depth + 1); - } - } - - // ============================================================ - // Collect final derivations (after SECOND pass) - // ============================================================ - - private void collectFinalDerivations() { - String cellName = anchoredCell(Rule.rootCat, 0, numTokens); - for (Derivation deriv : getDerivations(cellName)) { - if (DPDParser.opts.putCellNameInCanonicalUtterance) - deriv.canonicalUtterance = cellName; - predDerivations.add(deriv); - } - for (int depth = 0; depth <= maxDepth; depth++) { - cellName = floatingCell(Rule.rootCat, depth); - for (Derivation deriv : getDerivations(cellName)) { - if (DPDParser.opts.putCellNameInCanonicalUtterance) - deriv.canonicalUtterance = cellName; - predDerivations.add(deriv); - } - } - if (backoffParserState != null) { - // Also combine derivations from the backoff parser state - LogInfo.begin_track("Backoff ParserState"); - backoffParserState.infer(); - predDerivations.addAll(backoffParserState.predDerivations); - // Prevent oracles from always being at the front. - Collections.shuffle(predDerivations, DPDParser.opts.shuffleRandom); - LogInfo.end_track(); - } - } - - // ============================================================ - // Collect statistics - // ============================================================ - - // Collect the statistics and put them into the Evaluation object - @Override - protected void setEvaluation() { - super.setEvaluation(); - // Parse times - evaluation.add("firstPassParseTime", firstPassParseTime); - evaluation.add("secondPassParseTime", secondPassParseTime); - // Number of cells - for (Map.Entry entry : countNumCells(firstPassCells).entrySet()) - evaluation.add("firstPass" + entry.getKey(), entry.getValue()); - for (Map.Entry entry : countNumCells(secondPassCells).entrySet()) - evaluation.add("secondPass" + entry.getKey(), entry.getValue()); - // Number of possible ingredients - evaluation.add("allowedIngredients", allowedIngredients.size()); - } - - private Map countNumCells(Map> cells) { - int numAnchored = 0, numFloating = 0, numDenotations = 0, - numErrorDenotations = 0, numUniqueErrorDenotations = 0, numDerivations = 0; - Set uniqueDenotations = new HashSet<>(); - for (Map.Entry> entry : cells.entrySet()) { - if (entry.getKey().contains(",")) numAnchored++; else numFloating++; - for (Map.Entry subentry : entry.getValue().entrySet()) { - Value denotation = subentry.getKey(); - numDenotations++; - uniqueDenotations.add(denotation); - if (denotation instanceof ErrorValue) - numErrorDenotations++; - numDerivations += subentry.getValue().derivations.size(); - } - } - for (Value denotation : uniqueDenotations) { - if (denotation instanceof ErrorValue) - numUniqueErrorDenotations++; - } - Map statistics = new HashMap<>(); - statistics.put("Cells", cells.size()); - statistics.put("Anchored", numAnchored); - statistics.put("Floating", numFloating); - statistics.put("CellDenotations", numDenotations); - statistics.put("ErrorDenotations", numErrorDenotations); - statistics.put("UniqueDenotations", uniqueDenotations.size()); - statistics.put("UniqueErrorDenotations", numUniqueErrorDenotations); - statistics.put("Derivations", numDerivations); - return statistics; - } - - private int getNumCellDenotations() { - int numDenotations = 0; - for (Map value : getCellsForCurrentPass().values()) { - numDenotations += value.size(); - } - return numDenotations; - } - - // ============================================================ - // Debug: print all denotations in all cells - // ============================================================ - - protected void dumpDenotations(Map> cells) { - Map denotationToSampleFormula = new TreeMap<>(); - for (Map.Entry> entry : cells.entrySet()) { - String cellName = entry.getKey(); - if (DPDParser.opts.dumpDenotations == DPDParser.DumpSpec.NONERROR && Grammar.isIntermediate(cellName)) continue; - if (cellName.contains(":")) { - String[] parts = cellName.split(":"); - assert parts.length == 2; - cellName = String.format("&%2s:%s", parts[1], parts[0]); - } - for (Map.Entry subentry : entry.getValue().entrySet()) { - Value denotation = subentry.getKey(); - String key = null; - switch (DPDParser.opts.dumpDenotations) { - case UNIQUE: - key = denotation.toString(); - break; - case NONERROR: - if (denotation instanceof ErrorValue || denotation instanceof DPDErrorValue) continue; - key = cellName + " | " + denotation; - break; - case ALL: - key = cellName + " | " + denotation; - break; - default: - throw new RuntimeException("Unknown dump option: " + DPDParser.opts.dumpDenotations); - } - if (!denotationToSampleFormula.containsKey(key)) - denotationToSampleFormula.put(key, subentry.getValue().derivations.get(0).formula); - } - } - LogInfo.begin_track("%s DENOTATIONS", DPDParser.opts.dumpDenotations); - for (Map.Entry entry : denotationToSampleFormula.entrySet()) - LogInfo.logs("%s | %s", entry.getKey(), entry.getValue()); - LogInfo.end_track(); - } - - // ============================================================ - // Debug: classify unique-denotations by attributes - // ============================================================ - - protected void classifyUniqueDenotations(Map> cells) { - Set denotations = new HashSet<>(); - Map attributeCounter = new TreeMap<>(); - for (Map.Entry> entry : cells.entrySet()) { - for (Map.Entry subentry : entry.getValue().entrySet()) { - Value denotation = subentry.getKey(); - String key = denotation.toString(); - if (denotations.contains(key)) continue; - MapUtils.incr(attributeCounter, getDenotationAttributes(denotation)); - denotations.add(key); - } - } - LogInfo.begin_track("Denotation Classification"); - for (Map.Entry entry : attributeCounter.entrySet()) { - LogInfo.logs("%7d (%6.2f%%) : %s", entry.getValue(), - entry.getValue() * 100.0 / denotations.size(), entry.getKey()); - } - LogInfo.end_track(); - } - - protected String getDenotationAttributes(Value denotation) { - StringBuilder sb = new StringBuilder(); - if (denotation instanceof ListValue || denotation instanceof InfiniteListValue) { - sb.append("L"); - if (denotation instanceof InfiniteListValue) { - sb.append("|size=Inf"); - sb.append("|type=").append(DenotationTypeInference.getValueType(denotation)); - } else { - // Size - List values = ((ListValue) denotation).values; - if (values.size() == 0) sb.append("|size=0"); - else if (values.size() == 1) sb.append("|size=1"); - else sb.append("|size=many"); - // Type - if (!values.isEmpty()) - sb.append("|type=").append(DenotationTypeInference.getValueType(denotation)); - } - } else if (denotation instanceof ScopedValue) { - sb.append("S"); - try { - ListValue head = (ListValue) ((ScopedValue) denotation).head; - PairListValue relation = (PairListValue) ((ScopedValue) denotation).relation; - // Head - if (head.values.size() == 1) sb.append("|hsize=1"); - else sb.append("|hsize=many"); - sb.append("|htype=").append(DenotationTypeInference.getKeyType(denotation)); - // Relation - int relationSize = 0; - for (Pair pair : relation.pairs) { - if (pair.getSecond() instanceof ListValue) { - relationSize = Math.max(relationSize, ((ListValue) pair.getSecond()).values.size()); - } else if (pair.getSecond() instanceof InfiniteListValue) { - relationSize = Integer.MAX_VALUE; - } else { - throw new RuntimeException(); - } - } - if (relationSize == 0) sb.append("|vsize=0"); - else if (relationSize == 1) sb.append("|vsize=1"); - else if (relationSize == Integer.MAX_VALUE) sb.append("|vsize=Inf"); - else sb.append("|vsize=many"); - sb.append("|vtype=").append(DenotationTypeInference.getValueType(denotation)); - } catch (Exception e) { - sb.append("|???=").append(e); - } - } else { - // Currently PairListValue and ErrorValue, which don't appear in grow grammar, - // are not handled explicitly. - sb.append("X=" + denotation.getClass().getSimpleName()); - } - return sb.toString(); - } - - // ============================================================ - // Debug: count the number of useful unique-denotations and cell-denotations - // ============================================================ - - private void countUseful() { - Set allUniqueDenotations = new HashSet<>(), usefulUniqueDenotations = new HashSet<>(), - allCellDenotations = new HashSet<>(), usefulCellDenotations = new HashSet<>(); - Set usedBps = new HashSet<>(); - // All cells and denotations - for (Map.Entry> entry : firstPassCells.entrySet()) { - for (Value value : entry.getValue().keySet()) { - // Unique-denotations - String denotation = value.toString(); - allUniqueDenotations.add(denotation); - // Cell-denotations - String cellDenotation = entry.getKey() + " | " + denotation; - allCellDenotations.add(cellDenotation); - } - } - // Useful unique-denotations and cell-denotations - findUseful(anchoredCell(Rule.rootCat, 0, numTokens), usedBps, usefulUniqueDenotations, usefulCellDenotations); - for (int depth = 1; depth <= maxDepth; depth++) - findUseful(floatingCell(Rule.rootCat, depth), usedBps, usefulUniqueDenotations, usefulCellDenotations); - // Summarize - LogInfo.begin_track("countUseful %s", ex.id); - LogInfo.logs("uniqueDenotations: %d / %d (%.3f%%)", usefulUniqueDenotations.size(), allUniqueDenotations.size(), - usefulUniqueDenotations.size() * 100.0 / allUniqueDenotations.size()); - LogInfo.logs("cellDenotations: %d / %d (%.3f%%)", usefulCellDenotations.size(), allCellDenotations.size(), - usefulCellDenotations.size() * 100.0 / allCellDenotations.size()); - LogInfo.end_track(); - } - - private void findUseful(String cell, Set usedBps, - Set usefulUniqueDenotations, Set usefulCellDenotations) { - Map denotationToMetadata = firstPassCells.get(cell); - if (denotationToMetadata == null) return; - for (Value denotation : denotationToMetadata.keySet()) { - BackPointer bp = new BackPointer(cell, denotation); - findUseful(bp, usedBps, usefulUniqueDenotations, usefulCellDenotations); - } - } - - private void findUseful(BackPointer bp, Set usedBps, - Set usefulUniqueDenotations, Set usefulCellDenotations) { - usedBps.add(bp); - // Unique-denotations - String denotation = bp.value.toString(); - usefulUniqueDenotations.add(denotation); - // Cell-denotations - String cellDenotation = bp.cell + " | " + denotation; - usefulCellDenotations.add(cellDenotation); - // Recurse - Map denotationToMetadata = firstPassCells.get(bp.cell); - if (denotationToMetadata == null) return; - Metadata metadata = denotationToMetadata.get(bp.value); - if (metadata == null) return; - for (BackPointer childBp : metadata.backPointers) { - if (!usedBps.contains(childBp)) - findUseful(childBp, usedBps, usefulUniqueDenotations, usefulCellDenotations); - } - } - - // ============================================================ - // Debug: summarize the time used in each rule - // ============================================================ - - private void summarizeRuleTime() { - List> entries = new ArrayList<>(ruleTime.entrySet()); - entries.sort(new ValueComparator<>(true)); - LogInfo.begin_track("(%s) Rule time", currentPass); - for (Map.Entry entry : entries) { - LogInfo.logs("%9d : %s", entry.getValue(), entry.getKey()); - } - LogInfo.end_track(); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/features/AnchorFeatureComputer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/features/AnchorFeatureComputer.java deleted file mode 100644 index 0b4f098415..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/features/AnchorFeatureComputer.java +++ /dev/null @@ -1,67 +0,0 @@ -package edu.stanford.nlp.sempre.tables.features; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.StringNormalizationUtils; -import edu.stanford.nlp.sempre.tables.TableKnowledgeGraph; -import edu.stanford.nlp.sempre.tables.TableCell; -import edu.stanford.nlp.sempre.tables.TableColumn; -import fig.basic.LogInfo; - -public class AnchorFeatureComputer implements FeatureComputer { - - @Override - public void extractLocal(Example ex, Derivation deriv) { - if (!(FeatureExtractor.containsDomain("anchored-entity"))) return; - if (!(deriv.rule.sem instanceof FuzzyMatchFn)) return; - FuzzyMatchFn sem = (FuzzyMatchFn) deriv.rule.sem; - if (sem.getMatchAny() || sem.getMode() != FuzzyMatchFn.FuzzyMatchFnMode.ENTITY) return; - String phrase = ((StringValue) ((ValueFormula) deriv.child(0).formula).value).value; - NameValue predicate = (NameValue) ((ValueFormula) deriv.formula).value; - TableKnowledgeGraph graph = (TableKnowledgeGraph) ex.context.graph; - extractMatchingFeatures(graph, deriv, phrase, predicate); - } - - private void extractMatchingFeatures(TableKnowledgeGraph graph, - Derivation deriv, String phrase, NameValue predicate) { - String predicateString = graph.getOriginalString(predicate); - //LogInfo.logs("%s -> %s = %s", phrase, predicate, predicateString); - predicateString = StringNormalizationUtils.simpleNormalize(predicateString).toLowerCase(); - if (predicateString.equals(phrase)) { - deriv.addFeature("a-e", "exact"); - //LogInfo.logs("%s %s exact", phrase, predicateString); - } else if (predicateString.startsWith(phrase + " ")) { - deriv.addFeature("a-e", "prefix"); - //LogInfo.logs("%s %s prefix", phrase, predicateString); - } else if (predicateString.endsWith(" " + phrase)) { - deriv.addFeature("a-e", "suffix"); - //LogInfo.logs("%s %s suffix", phrase, predicateString); - } else if (predicateString.contains(" " + phrase + " ")){ - deriv.addFeature("a-e", "substring"); - //LogInfo.logs("%s %s substring", phrase, predicateString); - } else { - deriv.addFeature("a-e", "other"); - //LogInfo.logs("%s %s other", phrase, predicateString); - } - // Does the phrase match other cells? - Set matches = new HashSet<>(); - for (TableColumn column : graph.columns) { - for (TableCell cell : column.children) { - String s = StringNormalizationUtils.simpleNormalize(cell.properties.originalString).toLowerCase(); - if (s.contains(phrase) && !cell.properties.id.equals(predicate.id)) { - matches.add(s); - } - } - } - //LogInfo.logs(">> %s", matches); - if (matches.size() == 0) { - deriv.addFeature("a-e", "unique"); - } else if (matches.size() < 3) { - deriv.addFeature("a-e", "multiple;" + matches.size()); - } else { - deriv.addFeature("a-e", "multiple;>=3"); - } - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/features/ColumnCategoryInfo.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/features/ColumnCategoryInfo.java deleted file mode 100644 index d776fc2087..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/features/ColumnCategoryInfo.java +++ /dev/null @@ -1,84 +0,0 @@ -package edu.stanford.nlp.sempre.tables.features; - -import java.io.*; -import java.util.*; - -import edu.stanford.nlp.sempre.Example; -import edu.stanford.nlp.sempre.tables.TableKnowledgeGraph; -import fig.basic.*; - -public class ColumnCategoryInfo { - public static class Options { - @Option(gloss = "Read category information from this file") - public String tableCategoryInfo = null; - } - public static Options opts = new Options(); - - // ============================================================ - // Singleton access - // ============================================================ - - private static ColumnCategoryInfo singleton; - - public static ColumnCategoryInfo getSingleton() { - if (opts.tableCategoryInfo == null) - return null; - else if (singleton == null) - singleton = new ColumnCategoryInfo(); - return singleton; - } - - // ============================================================ - // Read data from file - // ============================================================ - - // tableId -> columnIndex -> list of (category, weight) - protected static Map>>> allCategoryInfo = null; - - private ColumnCategoryInfo() { - LogInfo.begin_track("Loading category information from %s", opts.tableCategoryInfo); - allCategoryInfo = new HashMap<>(); - try { - BufferedReader reader = IOUtils.openIn(opts.tableCategoryInfo); - String line; - while ((line = reader.readLine()) != null) { - String[] tokens = line.split("\t"); - String tableId = tokens[0]; - List>> categoryInfoForTable = allCategoryInfo.get(tableId); - if (categoryInfoForTable == null) - allCategoryInfo.put(tableId, categoryInfoForTable = new ArrayList<>()); - int columnIndex = Integer.parseInt(tokens[1]); - // Assume that the columns are ordered - assert categoryInfoForTable.size() == columnIndex; - // Read the category-weight pairs - List> categories = new ArrayList<>(); - for (int i = 2; i < tokens.length; i++) { - String[] pair = tokens[i].split(":"); - categories.add(new Pair<>(pair[0], Double.parseDouble(pair[1]))); - } - categoryInfoForTable.add(categories); - } - reader.close(); - } catch (IOException e) { - throw new RuntimeException(e); - } - LogInfo.end_track(); - } - - // ============================================================ - // Getters - // ============================================================ - - public List> get(String tableId, int columnIndex) { - return allCategoryInfo.get(tableId).get(columnIndex); - } - - public List> get(Example ex, String columnId) { - TableKnowledgeGraph graph = (TableKnowledgeGraph) ex.context.graph; - String tableId = graph.filename; - int columnIndex = graph.getColumnIndex(columnId); - if (columnIndex == -1) return null; - return allCategoryInfo.get(tableId).get(columnIndex); - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/features/HeadwordInfo.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/features/HeadwordInfo.java deleted file mode 100644 index 0e780d87f3..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/features/HeadwordInfo.java +++ /dev/null @@ -1,94 +0,0 @@ -package edu.stanford.nlp.sempre.tables.features; - -import java.util.concurrent.ExecutionException; - -import com.google.common.cache.*; - -import edu.stanford.nlp.sempre.*; - -/** - * Information about the headword of the utterance. - * - * Examples: - * - Which person is the fastest? ==> (which, person) - * - Who is the fastest person? ==> (who, person) - * - How many cars are red? ==> (how many, red) - * - Who is the fastest? ==> [skipped] - * - * Currently a simple heuristic is used to find the headword. - * @author ppasupat - * - */ -public class HeadwordInfo { - - public final String questionWord; - public final String headword; - - public HeadwordInfo(String questionWord, String headword) { - this.questionWord = questionWord; - this.headword = headword; - } - - public String toString() { - return "Q=" + questionWord + ",H=" + headword; - } - - public String questionWordTuple() { - return "Q=" + questionWord; - } - - public String headwordTuple() { - return "H=" + headword; - } - - // Caching - private static final LoadingCache cache = CacheBuilder - .newBuilder().maximumSize(20) - .build(new CacheLoader() { - @Override - public HeadwordInfo load(Example ex) throws Exception { - LanguageInfo langInfo = ex.languageInfo; - String questionWord = "", headWord = ""; - for (int i = 0; i < langInfo.numTokens(); i++) { - String token = langInfo.lemmaTokens.get(i), posTag = langInfo.posTags.get(i); - if (posTag.startsWith("W")) { - if ("who".equals(token) || "where".equals(token) || "when".equals(token)) { - // These are treated as head words - headWord = token; - //LogInfo.logs("HEADWORD: %s => %s | %s", ex.utterance, questionWord, headWord); - return new HeadwordInfo(questionWord.trim(), headWord.trim()); - } - questionWord += " " + token; - if (token.equals("how")) { - // Possibly "how many", "how much", ... - if (i + 1 < langInfo.numTokens() && langInfo.posTags.get(i + 1).startsWith("J")) - questionWord += " " + langInfo.lemmaTokens.get(i + 1); - } - } else if (posTag.startsWith("N") && !questionWord.isEmpty()) { - if ("number".equals(token)) { - questionWord += " " + token; - } else { - headWord += " " + token; - while (i + 1 < langInfo.numTokens() && langInfo.posTags.get(i + 1).startsWith("N")) { - i++; - headWord += " " + langInfo.lemmaTokens.get(i); - } - //LogInfo.logs("HEADWORD: %s => %s | %s", ex.utterance, questionWord, headWord); - return new HeadwordInfo(questionWord.trim(), headWord.trim()); - } - } - } - //LogInfo.logs("HEADWORD: %s => NULL", ex.utterance); - return new HeadwordInfo("", ""); - } - }); - - public static HeadwordInfo getHeadwordInfo(Example ex) { - try { - return cache.get(ex); - } catch (ExecutionException e) { - throw new RuntimeException(e.getCause()); - } - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/features/PhraseDenotationFeatureComputer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/features/PhraseDenotationFeatureComputer.java deleted file mode 100644 index 895ef3960b..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/features/PhraseDenotationFeatureComputer.java +++ /dev/null @@ -1,166 +0,0 @@ -package edu.stanford.nlp.sempre.tables.features; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.TableTypeSystem; -import fig.basic.*; - -/** - * Extract features based on (phrase, denotation) pairs. - * Intuition: "when" usually matches a date, which "how many" usually matches a number. - * - * @author ppasupat - */ -public class PhraseDenotationFeatureComputer implements FeatureComputer { - public static class Options { - @Option(gloss = "Verbosity") - public int verbose = 0; - } - public static Options opts = new Options(); - - @Override - public void extractLocal(Example ex, Derivation deriv) { - if (!(FeatureExtractor.containsDomain("custom-denotation") - || FeatureExtractor.containsDomain("phrase-denotation") - || FeatureExtractor.containsDomain("headword-denotation"))) return; - // Only compute features at the root. - if (!deriv.isRoot(ex.numTokens())) return; - Collection denotationTypes = tableTypes(deriv); - extractCustomDenotationFeatures(ex, deriv, denotationTypes); - extractPhraseDenotationFeatures(ex, deriv, denotationTypes); - extractHeadwordDenotationFeatures(ex, deriv, denotationTypes); - } - - public static Collection tableTypes(Derivation deriv) { - Set denotationTypes = new HashSet<>(); - // Type based on SemType - populateSemType("", deriv.type, denotationTypes); - // Look for the type under the first cell property - Formula formula = deriv.formula; - if (formula instanceof JoinFormula) { - JoinFormula join = (JoinFormula) formula; - String property = getCellProperty(join.relation); - if (property != null) { - populateSemType(property + "/", TypeInference.inferType(join.child), denotationTypes); - } - } - if (denotationTypes.isEmpty()) denotationTypes.add("OTHER"); - return denotationTypes; - } - - private static void populateSemType(String prefix, SemType type, Collection denotationTypes) { - LispTree tree = type.toLispTree(); - if (tree.isLeaf()) { - denotationTypes.add(prefix + tree.value); - } else { - for (LispTree subtree : tree.children) { - if (!subtree.isLeaf()) continue; - if (subtree.value.startsWith(TableTypeSystem.CELL_SPECIFIC_TYPE_PREFIX)) { - denotationTypes.add(prefix + subtree.value); - denotationTypes.add(prefix + TableTypeSystem.CELL_GENERIC_TYPE); - } - } - } - } - - private static String getCellProperty(Formula formula) { - LispTree tree = formula.toLispTree(); - if (tree.isLeaf()) { - String value = tree.value; - if (value.charAt(0) == '!' && value.substring(1).startsWith(TableTypeSystem.CELL_PROPERTY_NAME_PREFIX)) - return value; - } else { - if ("reverse".equals(tree.child(0).value) && tree.child(1).value.startsWith(TableTypeSystem.CELL_PROPERTY_NAME_PREFIX)) - return "!" + tree.child(1).value; - } - return null; - } - - // ============================================================ - // Custom Denotation Features - // ============================================================ - - private void extractCustomDenotationFeatures(Example ex, Derivation deriv, Collection denotationTypes) { - if (!FeatureExtractor.containsDomain("custom-denotation")) return; - - if (deriv.value instanceof ErrorValue) { - deriv.addFeature("custom-denotation", "error"); - return; - } else if (deriv.value instanceof ListValue) { - ListValue list = (ListValue) deriv.value; - int size = list.values.size(); - deriv.addFeature("custom-denotation", "size" + (size < 3 ? "=" + size : ">=" + 3)); - if (size == 1) { - Value value = list.values.get(0); - if (value instanceof NumberValue) { - double number = ((NumberValue) value).value; - deriv.addFeature("custom-denotation", "number" + (number > 0 ? ">0" : number == 0 ? "=0" : "<0")); - deriv.addFeature("custom-denotation", "number" + ((int) number == number ? "-int" : "-frac")); - } - } - } - } - - // ============================================================ - // Phrase - Denotation - // ============================================================ - - private void extractPhraseDenotationFeatures(Example ex, Derivation deriv, Collection denotationTypes) { - if (!FeatureExtractor.containsDomain("phrase-denotation")) return; - List phraseInfos = PhraseInfo.getPhraseInfos(ex); - if (opts.verbose >= 2) - LogInfo.logs("%s %s %s", deriv.value, deriv.type, denotationTypes); - for (String denotationType : denotationTypes) { - for (PhraseInfo phraseInfo : phraseInfos) { - if (PhraseInfo.opts.forbidBorderStopWordInLexicalizedFeatures && phraseInfo.isBorderStopWord) continue; - deriv.addFeature("p-d", phraseInfo.lemmaText + ";" + denotationType); - } - // Check original column text - String[] tokens = denotationType.split("/"); - String actualType = tokens[tokens.length - 1], suffix = (tokens.length == 1) ? "" : "(" + tokens[0] + ")"; - String originalColumn; - if ((originalColumn = PredicateInfo.getOriginalString(actualType, ex)) != null) { - originalColumn = PredicateInfo.getLemma(originalColumn); - for (PhraseInfo phraseInfo : phraseInfos) { - if (phraseInfo.lemmaText.equals(originalColumn)) { - if (opts.verbose >= 2) - LogInfo.logs("%s %s %s %s", phraseInfo, actualType, originalColumn, Arrays.asList(tokens)); - deriv.addFeature("p-d", "=" + suffix); - } - } - } - } - } - - // ============================================================ - // Headword - Denotation - // ============================================================ - - private void extractHeadwordDenotationFeatures(Example ex, Derivation deriv, Collection denotationTypes) { - if (!FeatureExtractor.containsDomain("headword-denotation")) return; - HeadwordInfo headwordInfo = HeadwordInfo.getHeadwordInfo(ex); - if (headwordInfo.questionWord.isEmpty() && headwordInfo.headword.isEmpty()) return; - if (opts.verbose >= 2) - LogInfo.logs("%s [%s] | %s %s %s", ex.utterance, headwordInfo, deriv.value, deriv.type, denotationTypes); - for (String denotationType : denotationTypes) { - deriv.addFeature("h-d", headwordInfo + ";" + denotationType); - deriv.addFeature("h-d", headwordInfo.questionWordTuple() + ";" + denotationType); - deriv.addFeature("h-d", headwordInfo.headwordTuple() + ";" + denotationType); - // Check original column text - String[] tokens = denotationType.split("/"); - String actualType = tokens[tokens.length - 1], suffix = (tokens.length == 1) ? "" : "(" + tokens[0] + ")"; - String originalColumn; - if ((originalColumn = PredicateInfo.getOriginalString(actualType, ex)) != null) { - originalColumn = PredicateInfo.getLemma(originalColumn); - if (headwordInfo.headword.equals(originalColumn)) { - if (opts.verbose >= 2) - LogInfo.logs("%s %s %s %s", headwordInfo, actualType, originalColumn, Arrays.asList(tokens)); - deriv.addFeature("h-d", "=" + suffix); - deriv.addFeature("h-d", headwordInfo.questionWordTuple() + "=" + suffix); - } - } - } - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/features/PhraseInfo.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/features/PhraseInfo.java deleted file mode 100644 index 86f06fc38c..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/features/PhraseInfo.java +++ /dev/null @@ -1,121 +0,0 @@ -package edu.stanford.nlp.sempre.tables.features; - -import java.util.*; -import java.util.concurrent.ExecutionException; -import java.util.regex.Pattern; - -import com.google.common.cache.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.FuzzyMatchFn.FuzzyMatchFnMode; -import edu.stanford.nlp.sempre.tables.TableKnowledgeGraph; -import fig.basic.*; - -/** - * Represents a phrase in the utterance. - * - * Also contains additional information such as POS and NER tags. - * - * @author ppasupat - */ -public class PhraseInfo { - public static class Options { - @Option(gloss = "Maximum number of tokens in a phrase") - public int maxPhraseLength = 3; - @Option(gloss = "Fuzzy match predicates") - public boolean computeFuzzyMatchPredicates = false; - @Option(gloss = "Do not produce lexicalized features if the phrase begins or ends with a stop word") - public boolean forbidBorderStopWordInLexicalizedFeatures = true; - } - public static Options opts = new Options(); - - public final int start, end, endOffset; - public final String text; - public final String lemmaText; - public final List tokens; - public final List lemmaTokens; - public final List posTags; - public final List nerTags; - public final String canonicalPosSeq; - public final List fuzzyMatchedPredicates; - public final boolean isBorderStopWord; // true if the first or last word is a stop word - - public PhraseInfo(Example ex, int start, int end) { - this.start = start; - this.end = end; - LanguageInfo languageInfo = ex.languageInfo; - this.endOffset = end - languageInfo.numTokens(); - tokens = languageInfo.tokens.subList(start, end); - lemmaTokens = languageInfo.tokens.subList(start, end); - posTags = languageInfo.posTags.subList(start, end); - nerTags = languageInfo.nerTags.subList(start, end); - text = languageInfo.phrase(start, end).toLowerCase(); - lemmaText = languageInfo.lemmaPhrase(start, end).toLowerCase(); - canonicalPosSeq = languageInfo.canonicalPosSeq(start, end); - fuzzyMatchedPredicates = opts.computeFuzzyMatchPredicates ? getFuzzyMatchedPredicates(ex.context) : null; - isBorderStopWord = isStopWord(languageInfo.lemmaTokens.get(start)) || isStopWord(languageInfo.lemmaTokens.get(end - 1)); - } - - private List getFuzzyMatchedPredicates(ContextValue context) { - if (context == null || context.graph == null || !(context.graph instanceof TableKnowledgeGraph)) - return null; - TableKnowledgeGraph graph = (TableKnowledgeGraph) context.graph; - List matchedPredicates = new ArrayList<>(); - // Assume everything is ValueFormula with NameValue inside - List formulas = new ArrayList<>(); - formulas.addAll(graph.getFuzzyMatchedFormulas(text, FuzzyMatchFnMode.ENTITY)); - formulas.addAll(graph.getFuzzyMatchedFormulas(text, FuzzyMatchFnMode.BINARY)); - for (Formula formula : formulas) { - if (formula instanceof ValueFormula) { - Value value = ((ValueFormula) formula).value; - if (value instanceof NameValue) { - matchedPredicates.add(((NameValue) value).id); - } - } - } - return matchedPredicates; - } - - static final Pattern ALL_PUNCT = Pattern.compile("^[^A-Za-z0-9]*$"); - static final Set STOP_WORDS = new HashSet<>(Arrays.asList( - "a", "an", "the", "be", "of", "in", "on", "do" - )); - - static boolean isStopWord(String x) { - if (ALL_PUNCT.matcher(x).matches()) return true; - if (STOP_WORDS.contains(x)) return true; - return false; - } - - @Override - public String toString() { - return "\"" + text + "\""; - } - - // Caching - private static final LoadingCache> cache = CacheBuilder.newBuilder() - .maximumSize(20) - .build( - new CacheLoader>() { - @Override - public List load(Example ex) throws Exception { - List phraseInfos = new ArrayList<>(); - List tokens = ex.languageInfo.tokens; - for (int s = 1; s <= opts.maxPhraseLength; s++) { - for (int i = 0; i <= tokens.size() - s; i++) { - phraseInfos.add(new PhraseInfo(ex, i, i + s)); - } - } - return phraseInfos; - } - }); - - public static List getPhraseInfos(Example ex) { - try { - return cache.get(ex); - } catch (ExecutionException e) { - throw new RuntimeException(e.getCause()); - } - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/features/PhrasePredicateFeatureComputer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/features/PhrasePredicateFeatureComputer.java deleted file mode 100644 index e61c2d27d4..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/features/PhrasePredicateFeatureComputer.java +++ /dev/null @@ -1,217 +0,0 @@ -package edu.stanford.nlp.sempre.tables.features; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.features.PredicateInfo.PredicateType; -import fig.basic.*; - -/** - * Extract features based on (phrase, predicate) pairs. - * - * - |phrase| is an n-gram from the utterance (usually n = 1) - * - |predicate| is a predicate (LispTree leaf) from the formula - * Example: fb:cell_name.barack_obama, fb:row.row.name, argmax - * - * Properties of phrases: POS tags, length, word shapes, ... - * Properties of predicates: category (entity / binary / keyword), ... - * Properties of alignment: exact match, prefix match, suffix match, string contains, ... - * - * @author ppasupat - */ -public class PhrasePredicateFeatureComputer implements FeatureComputer { - public static class Options { - @Option(gloss = "Verbosity") - public int verbose = 0; - @Option(gloss = "Define features on partial derivations as well") - public boolean defineOnPartialDerivs = true; - @Option(gloss = "Also define features on prefix and suffix matches") - public boolean usePrefixSuffixMatch = true; - @Option(gloss = "Also define features with POS tags") - public boolean usePosFeatures = true; - @Option(gloss = "Define unlexicalized phrase-predicate features") - public boolean unlexicalizedPhrasePredicate = true; - @Option(gloss = "Define lexicalized phrase-predicate features") - public boolean lexicalizedPhrasePredicate = true; - @Option(gloss = "Maximum ngram length for lexicalize all pair features") - public int maxNforLexicalizeAllPairs = Integer.MAX_VALUE; - @Option(gloss = "phrase-category: Weight threshold") - public double phraseCategoryWeightThreshold = 0.8; - @Option(gloss = "phrase-category: Use binary features instead of continuous ones") - public boolean phraseCategoryBinary = true; - - } - public static Options opts = new Options(); - - public final int maxNforLexicalizeAllPairs; - - public PhrasePredicateFeatureComputer() { - maxNforLexicalizeAllPairs = Math.min(opts.maxNforLexicalizeAllPairs, PhraseInfo.opts.maxPhraseLength); - } - - @Override - public void extractLocal(Example ex, Derivation deriv) { - if (!(FeatureExtractor.containsDomain("phrase-predicate") - || FeatureExtractor.containsDomain("phrase-formula") - || FeatureExtractor.containsDomain("phrase-category"))) return; - // Only compute features at the root, except when the partial option is set. - if (!opts.defineOnPartialDerivs && !deriv.isRoot(ex.numTokens())) return; - List phraseInfos = PhraseInfo.getPhraseInfos(ex); - List predicateInfos = PredicateInfo.getPredicateInfos(ex, deriv); - if (opts.verbose >= 2) { - LogInfo.logs("Example: %s", ex.utterance); - LogInfo.logs("Phrases: %s", phraseInfos); - LogInfo.logs("Derivation: %s", deriv); - LogInfo.logs("Predicates: %s", predicateInfos); - } - if (FeatureExtractor.containsDomain("phrase-predicate") - || FeatureExtractor.containsDomain("phrase-category")) { - if (opts.defineOnPartialDerivs) { - deriv.getTempState().put("p-p", new ArrayList<>(predicateInfos)); - // Subtract predicates from children - Map predicateInfoCounts = new HashMap<>(); - for (PredicateInfo predicateInfo : predicateInfos) - MapUtils.incr(predicateInfoCounts, predicateInfo); - if (deriv.children != null) { - for (Derivation child : deriv.children) { - @SuppressWarnings("unchecked") - List childPredicateInfos = (List) child.getTempState().get("p-p"); - for (PredicateInfo predicateInfo : childPredicateInfos) - MapUtils.incr(predicateInfoCounts, predicateInfo, -1); - } - } - for (PhraseInfo phraseInfo : phraseInfos) { - for (Map.Entry entry : predicateInfoCounts.entrySet()) { - if (entry.getValue() != 0) - extractMatch(ex, deriv, phraseInfo, entry.getKey(), entry.getValue()); - } - } - } else { - for (PhraseInfo phraseInfo : phraseInfos) { - for (PredicateInfo predicateInfo : predicateInfos) { - extractMatch(ex, deriv, phraseInfo, predicateInfo, 1); - } - } - } - } - if (FeatureExtractor.containsDomain("missing-predicate")) { - extractMissing(ex, deriv, phraseInfos, predicateInfos); - } - } - - // ============================================================ - // Matching - // ============================================================ - - private void extractMatch(Example ex, Derivation deriv, PhraseInfo phraseInfo, PredicateInfo predicateInfo, double factor) { - if (predicateInfo.originalString != null) { - extractMatch(ex, deriv, phraseInfo, phraseInfo.lemmaText, "", - predicateInfo, predicateInfo.originalString, "(o)", factor); - } else { - extractMatch(ex, deriv, phraseInfo, phraseInfo.lemmaText, "", - predicateInfo, predicateInfo.predicate, "(i)", factor); - } - } - - private void extractMatch(Example ex, Derivation deriv, - PhraseInfo phraseInfo, String phraseString, String phraseType, - PredicateInfo predicateInfo, String predicateString, String predicateType, double factor) { - if (FeatureExtractor.containsDomain("phrase-predicate") && opts.unlexicalizedPhrasePredicate) { - if (phraseString.equals(predicateString)) { - defineFeatures(ex, deriv, phraseInfo, predicateInfo, phraseType + "=" + predicateType, - phraseString, predicateString, factor); - } else if (opts.usePrefixSuffixMatch) { - if (predicateString.startsWith(phraseString)) { - defineFeatures(ex, deriv, phraseInfo, predicateInfo, "*_" + phraseType + "=" + predicateType, - phraseString, predicateString, factor); - } - if (predicateString.endsWith(phraseString)) { - defineFeatures(ex, deriv, phraseInfo, predicateInfo, "_*" + phraseType + "=" + predicateType, - phraseString, predicateString, factor); - } - if (phraseString.startsWith(predicateString)) { - defineFeatures(ex, deriv, phraseInfo, predicateInfo, phraseType + "=_*" + predicateType, - phraseString, predicateString, factor); - } - if (phraseString.endsWith(predicateString)) { - defineFeatures(ex, deriv, phraseInfo, predicateInfo, phraseType + "=*_" + predicateType, - phraseString, predicateString, factor); - } - } - } - if (FeatureExtractor.containsDomain("phrase-predicate") && opts.lexicalizedPhrasePredicate - && phraseInfo.end - phraseInfo.start <= maxNforLexicalizeAllPairs - && (!PhraseInfo.opts.forbidBorderStopWordInLexicalizedFeatures || !phraseInfo.isBorderStopWord)) { - deriv.addFeature("p-p", - phraseType + phraseString + ";" + predicateType + predicateString, factor); - } - if (FeatureExtractor.containsDomain("phrase-category") && predicateInfo.type == PredicateType.BINARY - && (!PhraseInfo.opts.forbidBorderStopWordInLexicalizedFeatures || !phraseInfo.isBorderStopWord)) { - ColumnCategoryInfo catInfo = ColumnCategoryInfo.getSingleton(); - List> categories = catInfo.get(ex, predicateInfo.predicate); - if (categories != null) { - for (Pair pair : categories) { - if (pair.getSecond() >= opts.phraseCategoryWeightThreshold) { - if (opts.phraseCategoryBinary) - deriv.addFeature("p-c", phraseType + phraseString + ";" + pair.getFirst()); - else - deriv.addFeature("p-c", phraseType + phraseString + ";" + pair.getFirst(), pair.getSecond()); - } - } - } - } - } - - private void defineFeatures(Example ex, Derivation deriv, PhraseInfo phraseInfo, PredicateInfo predicateInfo, - String featurePrefix, String phraseString, String predicateString, double factor) { - defineFeatures(ex, deriv, phraseInfo, predicateInfo, featurePrefix, factor); - if (opts.usePosFeatures) - defineFeatures(ex, deriv, phraseInfo, predicateInfo, - featurePrefix + "," + phraseInfo.canonicalPosSeq, factor); - } - - private void defineFeatures(Example ex, Derivation deriv, PhraseInfo phraseInfo, PredicateInfo predicateInfo, - String featurePrefix, double factor) { - if (opts.verbose >= 2) LogInfo.logs("defineFeatures: %s %s %s %s", - featurePrefix, phraseInfo, predicateInfo, predicateInfo.type); - deriv.addFeature("p-p", featurePrefix, factor); - deriv.addFeature("p-p", featurePrefix + "," + predicateInfo.type, factor); - } - - // ============================================================ - // Missing predicate features - // ============================================================ - - private void extractMissing(Example ex, Derivation deriv, List phraseInfos, List predicateInfos) { - // Only makes sense at the root - if (!deriv.isRoot(ex.numTokens())) return; - // Get the list of all relevant predicates - Set relevantPredicates = new HashSet<>(); - for (PredicateInfo predicateInfo : predicateInfos) { - if (predicateInfo.type == PredicateType.BINARY || predicateInfo.type == PredicateType.ENTITY) { - String predicate = predicateInfo.predicate; - if (predicate.charAt(0) == '!') predicate = predicate.substring(1); - relevantPredicates.add(predicate); - } - } - // See which predicates are missing! - Set missingPredicates = new HashSet<>(); - for (PhraseInfo phraseInfo : phraseInfos) { - if (phraseInfo.fuzzyMatchedPredicates == null) continue; - for (String predicate : phraseInfo.fuzzyMatchedPredicates) { - if (!relevantPredicates.contains(predicate)) { - missingPredicates.add(predicate); - missingPredicates.add("type=" + PredicateInfo.inferType(predicate)); - } - } - } - if (opts.verbose >= 2) { - LogInfo.logs("have %s", relevantPredicates); - LogInfo.logs("missing %s", missingPredicates); - } - for (String missing : missingPredicates) { - deriv.addFeature("m-p", missing); - } - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/features/PredicateInfo.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/features/PredicateInfo.java deleted file mode 100644 index 0267f1b51b..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/features/PredicateInfo.java +++ /dev/null @@ -1,227 +0,0 @@ -package edu.stanford.nlp.sempre.tables.features; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.*; -import fig.basic.*; - -/** - * Represents a predicate in the formula. - * - * Also contains additional information such as type and original string. - * - * @author ppasupat - */ -public class PredicateInfo { - public static class Options { - @Option(gloss = "Allow repreated predicates") - public boolean allowRepeats = false; - @Option(gloss = "Maximum length of predicate string") - public int maxPredicateLength = 40; - } - public static Options opts = new Options(); - - static enum PredicateType { KEYWORD, ENTITY, BINARY }; - - public final String predicate; - public final String originalString; - public final PredicateType type; - - public PredicateInfo(String predicate, ContextValue context) { - this.predicate = predicate; - this.type = inferType(predicate); - String s = getOriginalString(predicate, context); - this.originalString = (s == null) ? null : s.toLowerCase(); - } - - public static PredicateType inferType(String predicate) { - if (predicate.charAt(0) == '!') predicate = predicate.substring(1); - if (predicate.startsWith(CanonicalNames.PREFIX)) { - if (CanonicalNames.isUnary(predicate)) { - return PredicateType.ENTITY; - } else if (CanonicalNames.isBinary(predicate)) { - return PredicateType.BINARY; - } else { - throw new RuntimeException("Unrecognized predicate: " + predicate); - } - } else { - return PredicateType.KEYWORD; - } - } - - @Override - public String toString() { - return predicate + "(" + originalString + ")"; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || !(o instanceof PredicateInfo)) return false; - return predicate.equals(((PredicateInfo) o).predicate); - } - - @Override - public int hashCode() { - return predicate.hashCode(); - } - - // ============================================================ - // Get original strings and lemmas - // ============================================================ - - // Lemma cache - private static final Map lemmaCache = new HashMap<>(); - - // Helper function: get lemma form - public static synchronized String getLemma(String s) { - if (s == null || s.trim().isEmpty()) return null; - String lemma = lemmaCache.get(s); - if (lemma == null) { - LanguageInfo langInfo = LanguageAnalyzer.getSingleton().analyze(s); - lemma = (langInfo.numTokens() == 0) ? "" : langInfo.lemmaPhrase(0, langInfo.numTokens()); - lemmaCache.put(s, lemma); - } - return lemma; - } - - // Helper function: get original string from the table - public static String getOriginalString(String predicate, Example ex) { - return getOriginalString(predicate, ex.context); - } - - //Helper function: get original string from the table - public static String getOriginalString(String predicate, ContextValue context) { - if (context == null || context.graph == null || !(context.graph instanceof TableKnowledgeGraph)) - return null; - return getOriginalString(predicate, (TableKnowledgeGraph) context.graph); - } - - // Helper function: get original string from the table - public static String getOriginalString(String predicate, TableKnowledgeGraph graph) { - String s = graph.getOriginalString(predicate); - s = getLemma(s); - if (s != null && s.trim().isEmpty()) s = null; - return s; - } - - // ============================================================ - // Get the list of all PredicateInfos - // ============================================================ - - public static List getPredicateInfos(Example ex, Derivation deriv) { - Collection predicates; - Formula formula = deriv.formula; - FormulaTraverser traverser = new FormulaTraverser(ex); - traverser.traverse(formula); - predicates = traverser.predicates; - List answer = new ArrayList<>(); - for (PredicateInfo p : predicates) { - if (p.originalString == null || p.originalString.length() <= opts.maxPredicateLength) - answer.add(p); - } - return answer; - } - - private static class FormulaTraverser { - public final Collection predicates; - private final ContextValue context; - - public FormulaTraverser(Example ex) { - this.predicates = opts.allowRepeats ? new ArrayList<>() : new HashSet<>(); - this.context = ex.context; - } - - public void traverse(Formula formula) { - if (formula instanceof ValueFormula) { - Value value = ((ValueFormula) formula).value; - if (value instanceof NumberValue) { - NumberValue number = (NumberValue) value; - predicates.add(new PredicateInfo("number", context)); - predicates.add(new PredicateInfo(Fmt.D(number.value), context)); - - } else if (value instanceof DateValue) { - DateValue date = (DateValue) value; - predicates.add(new PredicateInfo("date", context)); - // Use prefixes to distinguish from numbers - predicates.add(new PredicateInfo("y:" + Fmt.D(date.year), context)); - predicates.add(new PredicateInfo("m:" + Fmt.D(date.month), context)); - predicates.add(new PredicateInfo("d:" + Fmt.D(date.day), context)); - - } else if (value instanceof StringValue) { - StringValue string = (StringValue) value; - predicates.add(new PredicateInfo("string", context)); - predicates.add(new PredicateInfo(string.value, context)); - - } else if (value instanceof NameValue) { - NameValue name = (NameValue) value; - String id = name.id; - predicates.add(new PredicateInfo(id, context)); - } - - } else if (formula instanceof JoinFormula) { - JoinFormula join = (JoinFormula) formula; - traverse(join.relation); traverse(join.child); - - } else if (formula instanceof ReverseFormula) { - ReverseFormula reverse = (ReverseFormula) formula; - if (reverse.child instanceof ValueFormula && ((ValueFormula) reverse.child).value instanceof NameValue) { - String id = ((NameValue) ((ValueFormula) reverse.child).value).id; - id = id.startsWith("!") ? id.substring(1) : ("!" + id); - traverse(new ValueFormula<>(new NameValue(id))); - } else { - predicates.add(new PredicateInfo("reverse", context)); - traverse(reverse.child); - } - - } else if (formula instanceof MergeFormula) { - MergeFormula merge = (MergeFormula) formula; - predicates.add(new PredicateInfo(merge.mode.toString(), context)); - traverse(merge.child1); traverse(merge.child2); - - } else if (formula instanceof AggregateFormula) { - AggregateFormula aggregate = (AggregateFormula) formula; - predicates.add(new PredicateInfo(aggregate.mode.toString(), context)); - traverse(aggregate.child); - - } else if (formula instanceof SuperlativeFormula) { - SuperlativeFormula superlative = (SuperlativeFormula) formula; - predicates.add(new PredicateInfo(superlative.mode.toString(), context)); - // Skip the "(number 1) (number 1)" part - traverse(superlative.head); traverse(superlative.relation); - - } else if (formula instanceof ArithmeticFormula) { - ArithmeticFormula arithmetic = (ArithmeticFormula) formula; - predicates.add(new PredicateInfo(arithmetic.mode.toString(), context)); - traverse(arithmetic.child1); traverse(arithmetic.child2); - - } else if (formula instanceof VariableFormula) { - // Skip variables - - } else if (formula instanceof MarkFormula) { - MarkFormula mark = (MarkFormula) formula; - predicates.add(new PredicateInfo("mark", context)); - // Skip variable - traverse(mark.body); - - } else if (formula instanceof LambdaFormula) { - LambdaFormula lambda = (LambdaFormula) formula; - predicates.add(new PredicateInfo("lambda", context)); - // Skip variable - traverse(lambda.body); - - } else if (formula instanceof ScopedFormula) { - ScopedFormula scoped = (ScopedFormula) formula; - traverse(scoped.head); - Formula relation = scoped.relation; - if (relation instanceof LambdaFormula) - relation = ((LambdaFormula) relation).body; - traverse(relation); - - } else { - throw new RuntimeException("[PredicateInfo] Cannot handle formula " + formula); - } - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/grow/ApplyFn.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/grow/ApplyFn.java deleted file mode 100644 index a09b31d1b1..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/grow/ApplyFn.java +++ /dev/null @@ -1,185 +0,0 @@ -package edu.stanford.nlp.sempre.tables.grow; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.DenotationTypeInference; -import edu.stanford.nlp.sempre.tables.ScopedFormula; -import edu.stanford.nlp.sempre.tables.ScopedValue; -import fig.basic.LispTree; -import fig.basic.MapUtils; -import fig.basic.Option; - -/** - * Apply the function on the children from left to right. - * - * Example: - * (lambda x (lambda y ((var x) (count (var y))))) - * on x = population, y = (type cake) - * ==> (population (count (type cake))) - * - * If some of the arguments are ScopeFormulas, they must have either - * the same denotation (when exactScopeHead is false) or the same - * head formula (when exactScopeHead is true). The function is applied - * on the relation part of the ScopeFormula. - * - * In addition to the function, a type can be specified. - * A parser can use this type information to save running time - * (e.g., by not subtracting things that are not numbers or dates) - * - * @author ppasupat - */ -public class ApplyFn extends SemanticFn { - public static class Options { - @Option(gloss = "verbosity") public int verbose = 0; - } - public static Options opts = new Options(); - - public static boolean exactScopeHead = true; - - Formula formula; - // Type information - boolean hasTypeInfo = false, sameType = false; - SemType arg1Type = null, arg2Type = null; - - public void init(LispTree tree) { - super.init(tree); - formula = Formulas.fromLispTree(tree.child(1)); - for (int i = 2; i < tree.children.size(); i++) { - hasTypeInfo = true; - LispTree typeInfo = tree.child(i); - if (typeInfo.isLeaf() && "same-type".equals(typeInfo.value)) { - sameType = true; - } else if (!typeInfo.isLeaf() && typeInfo.children.size() == 2 && "arg1-type".equals(typeInfo.child(0).value)) { - arg1Type = SemType.fromLispTree(typeInfo.child(1)); - } else if (!typeInfo.isLeaf() && typeInfo.children.size() == 2 && "arg2-type".equals(typeInfo.child(0).value)) { - arg2Type = SemType.fromLispTree(typeInfo.child(1)); - } else { - throw new RuntimeException("Cannot parse type information: " + typeInfo); - } - } - } - - public Formula getFormula() { - return formula; - } - - public boolean hasTypeInfo() { return hasTypeInfo; } - public boolean sameType() { return sameType; } - public SemType getArg1Type() { return arg1Type == null ? SemType.anyType : arg1Type; } - public SemType getArg2Type() { return arg2Type == null ? SemType.anyType : arg2Type; } - - @Override - public DerivationStream call(Example ex, Callable c) { - return new SingleDerivationStream() { - @Override - public Derivation createDerivation() { - Formula result = formula, head = null; - Value headValue = null; - // Check the scopes - for (Derivation child : c.getChildren()) { - if (!(child.formula instanceof ScopedFormula)) continue; - ScopedFormula scoped = (ScopedFormula) child.formula; - ScopedValue scopedValue = (ScopedValue) child.value; - if (head == null) { - head = scoped.head; - if (scopedValue != null) - headValue = scopedValue.head; - } else { - if ((exactScopeHead && !head.equals(scoped.head)) || - (!exactScopeHead && !headValue.equals(scopedValue.head))) - return null; - } - } - // Apply the function on the arguments - for (Derivation child : c.getChildren()) { - if (!(result instanceof LambdaFormula)) - throw new RuntimeException("Too many arguments: " + c.getChildren() + " for " + formula); - Formula argument = child.formula; - if (argument instanceof ScopedFormula) - argument = ((ScopedFormula) argument).relation; - result = Formulas.lambdaApply((LambdaFormula) result, argument); - } - // SUPER HACK: Resolve ((reverse (lambda x (r1 (r2 (var x))))) ...) => ((reverse r2) ((reverse r1) ...)) - if (result instanceof JoinFormula) - result = hackJoin(result); - else if (result instanceof LambdaFormula && ((LambdaFormula) result).body instanceof JoinFormula) { - LambdaFormula lambda = (LambdaFormula) result; - if (lambda.body instanceof JoinFormula) - result = new LambdaFormula(lambda.var, hackJoin(lambda.body)); - } - // END SUPER HACK - return new Derivation.Builder().withCallable(c) - .formula(head == null ? result : new ScopedFormula(head, result)) - .type(TypeInference.inferType(result)).createDerivation(); - } - }; - } - - private Formula hackJoin(Formula formula) { - if (formula instanceof JoinFormula) { - JoinFormula join = (JoinFormula) formula; - if (join.relation instanceof ReverseFormula) { - ReverseFormula reverse = (ReverseFormula) join.relation; - if (reverse.child instanceof LambdaFormula) { - LambdaFormula lambda = (LambdaFormula) reverse.child; - if (lambda.body instanceof JoinFormula) { - JoinFormula join2 = (JoinFormula) lambda.body; - if (join2.child instanceof JoinFormula) { - JoinFormula join3 = (JoinFormula) join2.child; - // YAY! - return new JoinFormula(new ReverseFormula(join3.relation), - new JoinFormula(new ReverseFormula(join2.relation), join.child)); - } - } - } - } - } - // AWW! - return formula; - } - - @Override - public boolean supportFilteringOnTypeData() { return true; } - - @Override - public Collection getFilteredDerivations(List derivations1, List derivations2) { - if (!hasTypeInfo) - return Collections.singleton(new ChildDerivationsGroup(derivations1, derivations2)); - // TODO: Currently this works only for lists of values, not mappings - Map> grouped1 = groupByType(derivations1, getArg1Type()), - grouped2 = (derivations2 == null) ? null : groupByType(derivations2, getArg2Type()); - List groups = new ArrayList<>(); - if (sameType) { // Matching pairs - for (String valueType : grouped1.keySet()) { - if (!grouped2.containsKey(valueType)) continue; - groups.add(new ChildDerivationsGroup(grouped1.get(valueType), grouped2.get(valueType))); - } - } else if (derivations2 != null) { // All pairs - for (List filtered1 : grouped1.values()) { - for (List filtered2 : grouped2.values()) { - groups.add(new ChildDerivationsGroup(filtered1, filtered2)); - } - } - } else { - for (List filtered : grouped1.values()) { - groups.add(new ChildDerivationsGroup(filtered)); - } - } - return groups; - } - - private Map> groupByType(List derivations, SemType parentType) { - Map> typeToDerivs = new HashMap<>(); - for (Derivation deriv : derivations) { - String valueType = DenotationTypeInference.getValueType(deriv.value); - MapUtils.addToList(typeToDerivs, valueType, deriv); - } - for (Iterator itr = typeToDerivs.keySet().iterator(); itr.hasNext();) { - String valueType = itr.next(); - if (!parentType.meet(SemType.newAtomicSemType(valueType)).isValid()) - itr.remove(); - } - return typeToDerivs; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/grow/BeginGrowFn.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/grow/BeginGrowFn.java deleted file mode 100644 index 07b44cfacf..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/grow/BeginGrowFn.java +++ /dev/null @@ -1,37 +0,0 @@ -package edu.stanford.nlp.sempre.tables.grow; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.ScopedFormula; -import fig.basic.*; - -/** - * Formula s [finite set] ==> ScopedFormula(s, identity function) - * - * @author ppasupat - */ -public class BeginGrowFn extends SemanticFn { - public static class Options { - @Option(gloss = "verbosity") public int verbose = 0; - } - public static Options opts = new Options(); - - public void init(LispTree tree) { - super.init(tree); - } - - public static final Formula IDENTITY = new LambdaFormula("x", new VariableFormula("x")); - - @Override - public DerivationStream call(Example ex, Callable c) { - return new SingleDerivationStream() { - @Override - public Derivation createDerivation() { - if (c.getChildren().size() != 1) - throw new RuntimeException("Wrong number of argument: expected 1; got " + c.getChildren().size()); - ScopedFormula scoped = new ScopedFormula(c.child(0).formula, IDENTITY); - return new Derivation.Builder().withCallable(c) - .formula(scoped).type(TypeInference.inferType(scoped.relation)).createDerivation(); - } - }; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/grow/EndGrowFn.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/grow/EndGrowFn.java deleted file mode 100644 index b19c0d587d..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/grow/EndGrowFn.java +++ /dev/null @@ -1,49 +0,0 @@ -package edu.stanford.nlp.sempre.tables.grow; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.ScopedFormula; -import fig.basic.*; - -/** - * Mapping(s, r) ==> f(s, r) - * - * @author ppasupat - */ -public class EndGrowFn extends SemanticFn { - public static class Options { - @Option(gloss = "verbosity") public int verbose = 0; - } - public static Options opts = new Options(); - - Formula formula; - - public void init(LispTree tree) { - super.init(tree); - formula = Formulas.fromLispTree(tree.child(1)); - if (!(formula instanceof LambdaFormula) || !(((LambdaFormula) formula).body instanceof LambdaFormula)) - throw new RuntimeException("Function for EndGrowFn must take 2 arguments (a set s and a relation r)"); - } - - public Formula getFormula() { - return formula; - } - - @Override - public DerivationStream call(Example ex, Callable c) { - return new SingleDerivationStream() { - @Override - public Derivation createDerivation() { - if (c.getChildren().size() != 1) - throw new RuntimeException("Wrong number of argument: expected 1; got " + c.getChildren().size()); - if (!(c.child(0).formula instanceof ScopedFormula)) - throw new RuntimeException("Wrong argument type: expected ScopedFormula; got " + c.child(0).formula); - ScopedFormula scoped = (ScopedFormula) c.child(0).formula; - Formula result = formula; - result = Formulas.lambdaApply((LambdaFormula) result, scoped.head); - result = Formulas.lambdaApply((LambdaFormula) result, scoped.relation); - return new Derivation.Builder().withCallable(c) - .formula(result).type(TypeInference.inferType(result)).createDerivation(); - } - }; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/BinaryDenotation.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/BinaryDenotation.java deleted file mode 100644 index 100892db0d..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/BinaryDenotation.java +++ /dev/null @@ -1,65 +0,0 @@ -package edu.stanford.nlp.sempre.tables.lambdadcs; - -import edu.stanford.nlp.sempre.PairListValue; -import fig.basic.LispTree; - -/** - * Binary denotation: a mapping from value to values. - * - * Share the implementation with MappingDenotation by using PairList. - * - * @author ppasupat - */ -public class BinaryDenotation implements Binarylike { - - protected final PL pairList; - - @Override - public String toString() { - return toLispTree().toString(); - } - - public BinaryDenotation(PL pairList) { - this.pairList = pairList; - } - - @Override - public LispTree toLispTree() { - return LispTree.proto.newList("binary", pairList.toLispTree()); - } - - @Override - public PairListValue toValue() { - return pairList.toValue(); - } - - public MappingDenotation asMapping(String domainVar) { - return new MappingDenotation(domainVar, pairList); - } - - @Override - public Binarylike reverse() { - return new BinaryDenotation<>(pairList.reverse()); - } - - @Override - public UnaryDenotation joinOnKey(UnaryDenotation keys) { - return pairList.joinOnKey(keys); - } - - @Override - public UnaryDenotation joinOnValue(UnaryDenotation values) { - return pairList.joinOnValue(values); - } - - @Override - public BinaryDenotation explicitlyFilterOnKey(UnaryDenotation keys) { - return new BinaryDenotation<>(pairList.explicitlyFilterOnKey(keys)); - } - - @Override - public BinaryDenotation explicitlyFilterOnValue(UnaryDenotation values) { - return new BinaryDenotation<>(pairList.explicitlyFilterOnValue(values)); - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/BinaryTypeHint.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/BinaryTypeHint.java deleted file mode 100644 index 9271a4a8bf..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/BinaryTypeHint.java +++ /dev/null @@ -1,56 +0,0 @@ -package edu.stanford.nlp.sempre.tables.lambdadcs; - -import edu.stanford.nlp.sempre.*; - -/** - * Impose that the result is a binary and: - * - the set of first pair entries should be a subset of |upperBoundFirst| - * - the set of second pair entries should be a subset of |upperBoundSecond| - * - * @author ppasupat - */ -public class BinaryTypeHint extends TypeHint { - - public final UnaryDenotation firstUpperBound, secondUpperBound; - - // Should only be called within this package - protected BinaryTypeHint(UnaryDenotation first, UnaryDenotation second, VariableMap map) { - firstUpperBound = (first == null) ? InfiniteUnaryDenotation.STAR_UNARY : first; - secondUpperBound = (second == null) ? InfiniteUnaryDenotation.STAR_UNARY : second; - variableMap = map; - } - - @Override - public String toString() { - return "BinaryTypeHint [" + firstUpperBound + "|" + secondUpperBound + "] " + variableMap; - } - - // ============================================================ - // Derive a new type hint - // ============================================================ - - public BinaryTypeHint withVar(String name, Value value) { - return new BinaryTypeHint(firstUpperBound, secondUpperBound, variableMap.plus(name, value)); - } - - public BinaryTypeHint withFreeVar(String name) { - return new BinaryTypeHint(firstUpperBound, secondUpperBound, variableMap.plusFreeVar(name)); - } - - public BinaryTypeHint reverse() { - return newRestrictedBinary(secondUpperBound, firstUpperBound); - } - - public UnarylikeTypeHint first() { - return new UnarylikeTypeHint(firstUpperBound, null, variableMap); - } - - public UnarylikeTypeHint second() { - return new UnarylikeTypeHint(secondUpperBound, null, variableMap); - } - - // Binary (a, b) = mapping from b to a - public UnarylikeTypeHint asMapping(String freeVar) { - return new UnarylikeTypeHint(firstUpperBound, secondUpperBound, variableMap.plusFreeVar(freeVar)); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/Binarylike.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/Binarylike.java deleted file mode 100644 index 34711235b4..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/Binarylike.java +++ /dev/null @@ -1,40 +0,0 @@ -package edu.stanford.nlp.sempre.tables.lambdadcs; - -import edu.stanford.nlp.sempre.Value; -import fig.basic.LispTree; - -/** - * Represents a BinaryDenotation. - * - * By LambdaDCS convention, (v <= k) means that k maps to v. - * - * The following operations must be handled: - * - Reverse - * - reverse(BL) - * - Compose - * - Join: BL.UL - * - Superlative: argmax(UL, BL), ... - * - * @author ppasupat - */ -public interface Binarylike { - - public LispTree toLispTree(); - public Value toValue(); - - /** Return all (y <= x) such that (x <= y) is in this binary */ - public Binarylike reverse(); - - /** Return all v such that for some k in keys, (v <= k) is in this binary */ - public UnaryDenotation joinOnKey(UnaryDenotation keys); - - /** Return all k such that for some v in values, (v <= k) is in this binary */ - public UnaryDenotation joinOnValue(UnaryDenotation values); - - /** Return all (v <= k) such that k is in keys and (v <= k) is in this binary */ - public BinaryDenotation explicitlyFilterOnKey(UnaryDenotation keys); - - /** Return all (v <= k) such that v in values and (v <= k) is in this binary */ - public BinaryDenotation explicitlyFilterOnValue(UnaryDenotation values); - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/DenotationUtils.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/DenotationUtils.java deleted file mode 100644 index f586b3b7c9..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/DenotationUtils.java +++ /dev/null @@ -1,471 +0,0 @@ -package edu.stanford.nlp.sempre.tables.lambdadcs; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.lambdadcs.LambdaDCSException.Type; -import fig.basic.*; - -/** - * Utilities for denotations. - * - * Handle aggregation, superlative, and arithmetic operations on different types of values. - * - * @author ppasupat - */ -public final class DenotationUtils { - - private DenotationUtils() { } - - // ============================================================ - // Type Enforcer - // ============================================================ - - /** - * Try to convert to a number - */ - public static int convertToInteger(Value value) { - if (value instanceof NumberValue) { - return (int) ((NumberValue) value).value; - } else { - throw new LambdaDCSException(Type.typeMismatch, "Cannot convert %s to number", value); - } - } - - /** - * Try to convert to a number - */ - public static double convertToNumber(Value value) { - if (value instanceof NumberValue) { - return ((NumberValue) value).value; - } else { - throw new LambdaDCSException(Type.typeMismatch, "Cannot convert %s to number", value); - } - } - - /** - * Ensure that the unary only has a single positive integer and return that integer - */ - public static int getSinglePositiveInteger(UnaryDenotation unary) { - if (unary.size() != 1) - throw new LambdaDCSException(Type.nonSingletonList, "getSinglePositiveInteger(): denotation %s has != 1 elements", unary); - int amount = convertToInteger(unary.iterator().next()); - if (amount > 0) return amount; - throw new LambdaDCSException(Type.typeMismatch, "getSinglePositiveInteger(): denotation %s is not a positive integer", unary); - } - - /** - * Ensure that the unary only has a single number and return that number - */ - public static double getSingleNumber(UnaryDenotation unary) { - if (unary.size() != 1) - throw new LambdaDCSException(Type.nonSingletonList, "getSingleNumber(): denotation %s has != 1 elements", unary); - return convertToNumber(unary.iterator().next()); - } - - /** - * Ensure that the unary has only a single value and return that value - */ - public static Value getSingleValue(UnaryDenotation unary) { - if (unary.size() != 1) - throw new LambdaDCSException(Type.nonSingletonList, "getSingleValue(): denotation %s has != 1 elements", unary); - return unary.iterator().next(); - } - - // ============================================================ - // Operations on Denotations - // ============================================================ - - /** - * Join between a BinaryDenotation and a UnarylikeDenotation. - */ - public static Unarylike genericJoin(Binarylike b, Unarylike u) { - if (u instanceof UnaryDenotation) - return b.joinOnKey((UnaryDenotation) u); - Map> mapping = new HashMap<>(); - if (u.domain().size() != Integer.MAX_VALUE) { - for (Value value : u.domain()) - mapping.put(value, b.joinOnKey(u.get(value))); - return new MappingDenotation<>(u.getDomainVar(), new ExplicitPairList(mapping)); - } else { - ExplicitPairList binary = b.explicitlyFilterOnKey(u.range()).pairList; - for (Map.Entry entry : binary.mapping.entrySet()) { - for (Value key : u.inverseGet(entry.getKey())) { - if (!mapping.containsKey(key)) - mapping.put(key, new ArrayList<>(entry.getValue())); - else - mapping.get(key).addAll(entry.getValue()); - } - } - return new MappingDenotation<>(u.getDomainVar(), new ExplicitPairList(mapping)); - } - } - - /** - * Aggregate values of the same type. - */ - public static Value aggregate(Collection values, AggregateFormula.Mode mode) { - // Handle basic cases - if (mode == AggregateFormula.Mode.count) { - if (values.size() == Integer.MAX_VALUE) - throw new LambdaDCSException(Type.infiniteList, "Cannot call %s on an infinite list.", mode); - return new NumberValue(values.size()); - } - if (values.isEmpty()) { - if (LambdaDCSExecutor.opts.aggregatesFailOnEmptyLists) - throw new LambdaDCSException(Type.emptyList, "Cannot call %s on an empty list.", mode); - return new ListValue(Collections.emptyList()); - } - // General cases - TypeProcessor processor = getTypeProcessor(values); - switch (mode) { - case max: return processor.max(values); - case min: return processor.min(values); - case sum: return processor.sum(values); - case avg: return processor.avg(values); - default: throw new LambdaDCSException(Type.invalidFormula, "Unknown aggregate mode: %s", mode); - } - } - - /** - * Helper: Check if the two Unarylikes have the same domain variable and return it. - * Throw an exception if the domain variables are not the same. - */ - public static String checkDomainVars(Unarylike u1, Unarylike u2) { - if (u1 == null || u1.getDomainVar() == null) return u2.getDomainVar(); - if (u2 == null || u2.getDomainVar() == null) return u1.getDomainVar(); - if (u1.getDomainVar().equals(u2.getDomainVar())) return u1.getDomainVar(); - throw new LambdaDCSException(Type.invalidFormula, "Different domain variables: %s != %s", - u1.getDomainVar(), u2.getDomainVar()); - } - - /** - * Merge values. - */ - public static Unarylike merge(Unarylike u1, Unarylike u2, MergeFormula.Mode mode) { - if (u1 instanceof UnaryDenotation && u2 instanceof UnaryDenotation) - return ((UnaryDenotation) u1).merge((UnaryDenotation) u2, mode); - // MappingDenotation: Go over the union of the domains - String domainVar = checkDomainVars(u1, u2); - Map answer = new HashMap<>(); - for (Value key : u1.domain()) { - if (!answer.containsKey(key)) - answer.put(key, u1.get(key).merge(u2.get(key), mode)); - } - for (Value key : u2.domain()) { - if (!answer.containsKey(key)) - answer.put(key, u1.get(key).merge(u2.get(key), mode)); - } - if (!answer.containsKey(null)) - answer.put(null, u1.get(null).merge(u2.get(null), mode)); - answer.entrySet().removeIf(e -> e.getValue().size() == 0); - return new MappingDenotation<>(domainVar, new ExplicitPairList(answer)); - } - - /** - * Perform arithmetic operation. Currently each Value must be a NumberValue. - * - * If arithmeticsFailOnMultipleElements is specified, throw an error if - * both children contain more than 1 Value. - */ - public static Unarylike arithmetic(Unarylike u1, Unarylike u2, ArithmeticFormula.Mode mode) { - TypeProcessor processor = getTypeProcessor(u1.range(), u2.range()); - if (u1 instanceof UnaryDenotation && u2 instanceof UnaryDenotation) - return arithmeticUnary((UnaryDenotation) u1, (UnaryDenotation) u2, mode, processor); - // MappingDenotation: Go over the union of the domains - String domainVar = checkDomainVars(u1, u2); - Map answer = new HashMap<>(); - for (Value key : u1.domain()) { - if (!answer.containsKey(key)) - answer.put(key, arithmeticUnary(u1.get(key), u2.get(key), mode, processor)); - } - for (Value key : u2.domain()) { - if (!answer.containsKey(key)) - answer.put(key, arithmeticUnary(u1.get(key), u2.get(key), mode, processor)); - } - if (!answer.containsKey(null)) - answer.put(null, arithmeticUnary(u1.get(null), u2.get(null), mode, processor)); - answer.entrySet().removeIf(e -> e.getValue().size() == 0); - return new MappingDenotation<>(domainVar, new ExplicitPairList(answer)); - } - - public static UnaryDenotation arithmeticUnary(UnaryDenotation u1, UnaryDenotation u2, - ArithmeticFormula.Mode mode, TypeProcessor processor) { - if (LambdaDCSExecutor.opts.arithmeticsFailOnEmptyLists && (u1.size() == 0 || u2.size() == 0)) - throw new LambdaDCSException(Type.emptyList, "Cannot call %s on an empty list.", mode); - if (LambdaDCSExecutor.opts.arithmeticsFailOnMultipleElements && u1.size() > 1 && u2.size() > 1) - throw new LambdaDCSException(Type.nonSingletonList, "Cannot call %s when both denotations have > 1 values.", mode); - if (processor == null) processor = getTypeProcessor(u1, u2); - List answer = new ArrayList<>(); - switch (mode) { - case add: for (Value v1 : u1) for (Value v2 : u2) answer.add(processor.add(v1, v2)); break; - case sub: for (Value v1 : u1) for (Value v2 : u2) answer.add(processor.sub(v1, v2)); break; - case mul: for (Value v1 : u1) for (Value v2 : u2) answer.add(processor.mul(v1, v2)); break; - case div: for (Value v1 : u1) for (Value v2 : u2) answer.add(processor.div(v1, v2)); break; - default: - throw new LambdaDCSException(Type.invalidFormula, "Unknown arithmetic mode: %s", mode); - } - return new ExplicitUnaryDenotation(answer); - } - - /** - * Compute the superlative (argmax/min rank count head (reverse relation)). - * - * Note that |relation| is reversed so that |relation| can be directly joined with |head|. - * - * If opt.aggregateReturnAllTopTies is true, (argmin 1 1 ...) and (argmax 1 1 ...) will return - * the list of all values that produce the min or max. Otherwise, only 1 arbitrary value will be returned. - */ - public static Unarylike superlative(int rank, int count, Unarylike head, - Binarylike relation, SuperlativeFormula.Mode mode) { - // Filter the relation with the possible keys - BinaryDenotation filtered = relation.explicitlyFilterOnKey(head.range()); - TypeProcessor processor = getTypeProcessor(filtered.pairList.range()); - if (head instanceof UnaryDenotation) { - return superlativeUnary(rank, count, filtered.pairList.pairs, mode, processor); - } - // MappingDenotation: Go over the domain - Map answer = new HashMap<>(); - for (Value key : head.domain()) { - BinaryDenotation refiltered = filtered.explicitlyFilterOnKey(head.get(key)); - answer.put(key, superlativeUnary(rank, count, refiltered.pairList.pairs, mode, processor)); - } - return new MappingDenotation<>(head.getDomainVar(), new ExplicitPairList(answer)); - } - - /** - * Perform superlative on a list of (value, key). - */ - public static UnaryDenotation superlativeUnary(int rank, int count, List> pairs, - SuperlativeFormula.Mode mode, TypeProcessor processor) { - if (rank <= 0 || count <= 0 || rank >= 1000000 || count >= 100000) - LogInfo.fails("Invalid superlative (rank = %d, count = %d)", rank, count); - if (pairs.isEmpty()) { - if (LambdaDCSExecutor.opts.superlativesFailOnEmptyLists) - throw new LambdaDCSException(Type.emptyList, "Cannot call %s on an empty list.", mode); - return UnaryDenotation.EMPTY; - } - List values = new ArrayList<>(), answer = new ArrayList<>(); - for (Pair pair : pairs) - values.add(pair.getFirst()); - if (processor == null) processor = getTypeProcessor(values); - if (LambdaDCSExecutor.opts.superlativesReturnAllTopTies && rank == 1 && count == 1) { - // Special case: Return all ties at the top - Value topValue; - switch (mode) { - case argmax: topValue = processor.max(values); break; - case argmin: topValue = processor.min(values); break; - default: throw new LambdaDCSException(Type.invalidFormula, "Unknown superlative mode: %s", mode); - } - for (Pair pair : pairs) - if (topValue.equals(pair.getFirst()) && !answer.contains(pair.getSecond())) - answer.add(pair.getSecond()); - } else { - // Other cases - List indices; - switch (mode) { - case argmax: indices = processor.argsort(values); Collections.reverse(indices); break; - case argmin: indices = processor.argsort(values); break; - default: throw new LambdaDCSException(Type.invalidFormula, "Unknown superlative mode: %s", mode); - } - int from = Math.min(rank - 1, indices.size()), - to = Math.min(from + count, indices.size()); - for (int index : indices.subList(from, to)) - answer.add(pairs.get(index).getSecond()); - } - return new ExplicitUnaryDenotation(answer); - } - - // ============================================================ - // Processor for each data type - // ============================================================ - - /** - * Processor for each data type. - */ - public abstract static class TypeProcessor { - // Is the value v compatible with this processor? - public abstract boolean isCompatible(Value v); - // Is the collection sortable? (Is there a total order on the elements?) - public abstract boolean isSortable(Collection values); - // positive if v1 > v2 | negative if v1 < v2 | 0 if v1 == v2 - public int compareValues(Value v1, Value v2) { throw new LambdaDCSException(Type.typeMismatch, "Cannot compare values with " + getClass().getSimpleName()); } - public Value sum(Collection values) { throw new LambdaDCSException(Type.typeMismatch, "Cannot compute sum with " + getClass().getSimpleName()); } - public Value avg(Collection values) { throw new LambdaDCSException(Type.typeMismatch, "Cannot compute avg with " + getClass().getSimpleName()); } - public Value add(Value v1, Value v2) { throw new LambdaDCSException(Type.typeMismatch, "Cannot compute add with " + getClass().getSimpleName()); } - public Value sub(Value v1, Value v2) { throw new LambdaDCSException(Type.typeMismatch, "Cannot compute sub with " + getClass().getSimpleName()); } - public Value mul(Value v1, Value v2) { throw new LambdaDCSException(Type.typeMismatch, "Cannot compute mul with " + getClass().getSimpleName()); } - public Value div(Value v1, Value v2) { throw new LambdaDCSException(Type.typeMismatch, "Cannot compute div with " + getClass().getSimpleName()); } - - public Value max(Collection values) { - if (!isSortable(values)) - throw new LambdaDCSException(Type.typeMismatch, "Values cannot be sorted."); - Value max = null; - for (Value value : values) { - if (max == null || compareValues(max, value) < 0) - max = value; - } - return max; - } - - public Value min(Collection values) { - if (!isSortable(values)) - throw new LambdaDCSException(Type.typeMismatch, "Values cannot be sorted."); - Value min = null; - for (Value value : values) { - if (min == null || compareValues(min, value) > 0) - min = value; - } - return min; - } - - public List argsort(List values) { - if (!isSortable(values)) - throw new LambdaDCSException(Type.typeMismatch, "Values cannot be sorted."); - List indices = new ArrayList<>(); - for (int i = 0; i < values.size(); i++) - indices.add(i); - Collections.sort(indices, new Comparator() { - @Override - public int compare(Integer o1, Integer o2) { - return compareValues(values.get(o1), values.get(o2)); - } - }); - return indices; - } - } - - /** - * Handle NumberValue. All operations are possible. - */ - public static class NumberProcessor extends TypeProcessor { - public static TypeProcessor singleton = new NumberProcessor(); - - @Override - public boolean isCompatible(Value v) { - return v instanceof NumberValue; - } - - @Override - public boolean isSortable(Collection values) { - return true; - } - - @Override - public int compareValues(Value v1, Value v2) { - double x1 = ((NumberValue) v1).value, x2 = ((NumberValue) v2).value; - return (x1 > x2) ? 1 : (x1 < x2) ? -1 : 0; - } - - @Override - public Value sum(Collection values) { - double sum = 0; - for (Value value : values) - sum += ((NumberValue) value).value; - return new NumberValue(sum); - } - - @Override - public Value avg(Collection values) { - double sum = 0; - for (Value value : values) - sum += ((NumberValue) value).value; - return new NumberValue(sum / values.size()); - } - - @Override public Value add(Value v1, Value v2) { return new NumberValue(((NumberValue) v1).value + ((NumberValue) v2).value); } - @Override public Value sub(Value v1, Value v2) { return new NumberValue(((NumberValue) v1).value - ((NumberValue) v2).value); } - @Override public Value mul(Value v1, Value v2) { return new NumberValue(((NumberValue) v1).value * ((NumberValue) v2).value); } - @Override public Value div(Value v1, Value v2) { return new NumberValue(((NumberValue) v1).value / ((NumberValue) v2).value); } - } - - /** - * Handle DateValue. Only comparison is possible. - */ - public static class DateProcessor extends TypeProcessor { - public static TypeProcessor singleton = new DateProcessor(); - - @Override - public boolean isCompatible(Value v) { - return v instanceof DateValue; - } - - @Override - public boolean isSortable(Collection values) { - DateValue firstDate = null; - for (Value value : values) { - DateValue date = (DateValue) value; - if (firstDate == null) { - firstDate = date; - } else { - if ((firstDate.year == -1) != (date.year == -1)) return false; - if ((firstDate.month == -1) != (date.month == -1)) return false; - if ((firstDate.day == -1) != (date.day == -1)) return false; - } - } - return true; - } - - @Override - public int compareValues(Value v1, Value v2) { - DateValue d1 = ((DateValue) v1), d2 = ((DateValue) v2); - if (d1.year == -1 || d2.year == -1 || d1.year == d2.year) { - if (d1.month == -1 || d2.month == -1 || d1.month == d2.month) { - if (d1.day == -1 || d2.day == -1 || d1.day == d2.day) { - return 0; - } else { - return d1.day - d2.day; - } - } else { - return d1.month - d2.month; - } - } else { - return d1.year - d2.year; - } - } - } - - /** - * Get the TypeProcessor corresponding to the strictest type. - */ - public static TypeProcessor getTypeProcessor(Value value) { - return getTypeProcessor(Collections.singleton(value), Collections.emptySet()); - } - - /** - * Get the TypeProcessor corresponding to the strictest type. - */ - public static TypeProcessor getTypeProcessor(Value value1, Value value2) { - return getTypeProcessor(Collections.singleton(value1), Collections.singleton(value2)); - } - - /** - * Get the TypeProcessor corresponding to the strictest type. - */ - public static TypeProcessor getTypeProcessor(Collection values) { - return getTypeProcessor(values, Collections.emptySet()); - } - - /** - * Get the TypeProcessor corresponding to the strictest type. - */ - public static TypeProcessor getTypeProcessor(Collection values1, Collection values2) { - boolean canBeNumber = true, canBeDate = true; - for (Value value : values1) { - if (!(value instanceof NumberValue)) canBeNumber = false; - if (!(value instanceof DateValue)) canBeDate = false; - } - for (Value value : values2) { - if (!(value instanceof NumberValue)) canBeNumber = false; - if (!(value instanceof DateValue)) canBeDate = false; - } - if (canBeNumber) { - return NumberProcessor.singleton; - } else if (canBeDate) { - return DateProcessor.singleton; - } else { - throw new LambdaDCSException(Type.typeMismatch, "Cannot compare values"); - } - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/ExecutorCache.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/ExecutorCache.java deleted file mode 100644 index fdb0c6b494..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/ExecutorCache.java +++ /dev/null @@ -1,70 +0,0 @@ -package edu.stanford.nlp.sempre.tables.lambdadcs; - -import java.util.*; - -import fig.basic.*; - -/** - * Cache the executed values of an executor. - * - * The cache should be cleared once the parser finishes each example. - * The metakey Object specifies the current example. - * When the metakey is changed, the cache is cleared. - * - * @author ppasupat - */ -public final class ExecutorCache { - public static class Options { - @Option(gloss = "maximum number of values to retain") - public int maxCacheSize = 1000000; - @Option(gloss = "minimum number of values evicted before garbage collection") - public int cacheGCThreshold = 10000000; - @Option public int verbose = 0; - } - public static Options opts = new Options(); - - // Default cache - public static final ExecutorCache singleton = new ExecutorCache(); - - private Object currentMetakey = null; - private final Map cache; - private int accumSize = 0; - - public ExecutorCache() { - cache = new HashMap<>(); - } - - public synchronized Object get(Object metakey, Object key) { - if (currentMetakey != metakey) { - clearCache(metakey); - } - Object value = cache.get(key); - if (opts.verbose >= 1) - LogInfo.logs("[GET =>] %s => %s", key, value); - return value; - } - - public synchronized void put(Object metakey, Object key, Object value) { - if (currentMetakey != metakey) { - clearCache(metakey); - } - if (opts.verbose >= 1) - LogInfo.logs("[<= PUT] %s <= %s", key, value); - if (cache.size() < opts.maxCacheSize) - cache.put(key, value); - } - - public void clearCache(Object metakey) { - accumSize += cache.size(); - cache.clear(); - // Garbage collect - if (accumSize >= opts.cacheGCThreshold) { - System.gc(); - accumSize = 0; - } - if (opts.verbose >= 1) - LogInfo.logs("[clearCache] metakey = %s", metakey); - currentMetakey = metakey; - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/ExplicitPairList.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/ExplicitPairList.java deleted file mode 100644 index 1c5de7d9ec..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/ExplicitPairList.java +++ /dev/null @@ -1,203 +0,0 @@ -package edu.stanford.nlp.sempre.tables.lambdadcs; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.AggregateFormula.Mode; -import fig.basic.LispTree; -import fig.basic.MapUtils; -import fig.basic.Pair; - -public class ExplicitPairList implements PairList { - - // Following LambdaDCS convention, pair (v, k) means k maps to v. - protected final List> pairs; - protected final Map mapping; - protected final Map reverseMapping; - - // ============================================================ - // Constructors - // ============================================================ - - public ExplicitPairList() { - pairs = Collections.emptyList(); - mapping = Collections.emptyMap(); - reverseMapping = Collections.emptyMap(); - } - - public ExplicitPairList(Value key, Value value) { - pairs = Collections.singletonList(new Pair<>(value, key)); - mapping = Collections.singletonMap(key, new ExplicitUnaryDenotation(value)); - reverseMapping = Collections.singletonMap(value, new ExplicitUnaryDenotation(key)); - } - - public ExplicitPairList(Pair pair) { - pairs = Collections.singletonList(pair); - mapping = Collections.singletonMap(pair.getSecond(), new ExplicitUnaryDenotation(pair.getFirst())); - reverseMapping = Collections.singletonMap(pair.getFirst(), new ExplicitUnaryDenotation(pair.getSecond())); - } - - public ExplicitPairList(List> pairs) { - this.pairs = pairs; - Map> mappingBuilder = new HashMap<>(), reverseMappingBuilder = new HashMap<>(); - for (Pair pair : pairs) { - MapUtils.addToList(mappingBuilder, pair.getSecond(), pair.getFirst()); - MapUtils.addToList(reverseMappingBuilder, pair.getFirst(), pair.getSecond()); - } - mapping = new HashMap<>(); - for (Map.Entry> entry : mappingBuilder.entrySet()) - mapping.put(entry.getKey(), new ExplicitUnaryDenotation(entry.getValue())); - reverseMapping = new HashMap<>(); - for (Map.Entry> entry : reverseMappingBuilder.entrySet()) - reverseMapping.put(entry.getKey(), new ExplicitUnaryDenotation(entry.getValue())); - } - - public > ExplicitPairList(Map keyToValues) { - pairs = new ArrayList<>(); - Map> reverseMappingBuilder = new HashMap<>(); - for (Map.Entry entry : keyToValues.entrySet()) { - for (Value value : entry.getValue()) { - pairs.add(new Pair<>(value, entry.getKey())); - MapUtils.addToList(reverseMappingBuilder, value, entry.getKey()); - } - } - mapping = new HashMap<>(); - for (Map.Entry entry : keyToValues.entrySet()) - mapping.put(entry.getKey(), new ExplicitUnaryDenotation(entry.getValue())); - reverseMapping = new HashMap<>(); - for (Map.Entry> entry : reverseMappingBuilder.entrySet()) - reverseMapping.put(entry.getKey(), new ExplicitUnaryDenotation(entry.getValue())); - } - - // ============================================================ - // Representation - // ============================================================ - - @Override - public String toString() { - return toLispTree().toString(); - } - - protected static final LispTree NULL_LEAF = LispTree.proto.newLeaf(null); - - @Override - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - for (Pair pair : pairs) { - Value first = pair.getFirst(), second = pair.getSecond(); - tree.addChild(LispTree.proto.newList( - first == null ? NULL_LEAF : first.toLispTree(), second == null ? NULL_LEAF : second.toLispTree())); - } - return tree; - } - - @Override - public PairListValue toValue() { - PairListValue result = new PairListValue(pairs); - if (LambdaDCSExecutor.opts.sortResults) - result = result.getSorted(); - return result; - } - - // ============================================================ - // Getter - // ============================================================ - - @Override - public UnaryDenotation domain() { - return new ExplicitUnaryDenotation(mapping.keySet()); - } - - @Override - public UnaryDenotation range() { - return new ExplicitUnaryDenotation(reverseMapping.keySet()); - } - - @Override - public UnaryDenotation get(Value key) { - UnaryDenotation values = mapping.get(key); - if (values == null) values = mapping.get(null); - return values == null ? UnaryDenotation.EMPTY : values; - } - - @Override - public UnaryDenotation inverseGet(Value value) { - UnaryDenotation keys = reverseMapping.get(value); - return keys == null ? UnaryDenotation.EMPTY : keys; - } - - // ============================================================ - // Operations - // ============================================================ - - - @Override - public PairList aggregate(Mode mode) { - Map aggregated = new HashMap<>(); - for (Map.Entry entry : mapping.entrySet()) { - aggregated.put(entry.getKey(), entry.getValue().aggregate(mode)); - } - if (mode == Mode.count && !aggregated.containsKey(null)) - aggregated.put(null, UnaryDenotation.ZERO); - return new ExplicitPairList(aggregated); - } - - @Override - public PairList filter(UnaryDenotation upperBound, UnaryDenotation domainUpperBound) { - return explicitlyFilter(upperBound, domainUpperBound); - } - - @Override - public ExplicitPairList reverse() { - List> reversed = new ArrayList<>(); - for (Pair pair : pairs) { - reversed.add(new Pair<>(pair.getSecond(), pair.getFirst())); - } - return new ExplicitPairList(reversed); - } - - @Override - public UnaryDenotation joinOnKey(UnaryDenotation keys) { - List values = new ArrayList<>(); - for (Map.Entry entry : mapping.entrySet()) { - if (keys.contains(entry.getKey())) values.addAll(entry.getValue()); - } - return new ExplicitUnaryDenotation(values); - } - - @Override - public UnaryDenotation joinOnValue(UnaryDenotation values) { - List keys = new ArrayList<>(); - for (Map.Entry entry : reverseMapping.entrySet()) { - if (values.contains(entry.getKey())) keys.addAll(entry.getValue()); - } - return new ExplicitUnaryDenotation(keys); - } - - @Override - public ExplicitPairList explicitlyFilterOnKey(UnaryDenotation keys) { - List> filtered = new ArrayList<>(); - for (Pair pair : pairs) { - if (keys.contains(pair.getSecond())) filtered.add(pair); - } - return new ExplicitPairList(filtered); - } - - @Override - public ExplicitPairList explicitlyFilterOnValue(UnaryDenotation values) { - List> filtered = new ArrayList<>(); - for (Pair pair : pairs) { - if (values.contains(pair.getFirst())) filtered.add(pair); - } - return new ExplicitPairList(filtered); - } - - public ExplicitPairList explicitlyFilter(UnaryDenotation values, UnaryDenotation keys) { - List> filtered = new ArrayList<>(); - for (Pair pair : pairs) { - if (values.contains(pair.getFirst()) && keys.contains(pair.getSecond())) filtered.add(pair); - } - return new ExplicitPairList(filtered); - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/ExplicitUnaryDenotation.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/ExplicitUnaryDenotation.java deleted file mode 100644 index 4d14543775..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/ExplicitUnaryDenotation.java +++ /dev/null @@ -1,116 +0,0 @@ -package edu.stanford.nlp.sempre.tables.lambdadcs; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.lambdadcs.LambdaDCSException.Type; -import fig.basic.*; - -/** - * A unary with finite number of elements. Represented as a set of values. - * - * @author ppasupat - */ -public class ExplicitUnaryDenotation extends UnaryDenotation { - - protected final List values; - - public ExplicitUnaryDenotation() { - values = Collections.emptyList(); - } - - public ExplicitUnaryDenotation(Value value) { - values = Collections.singletonList(value); - } - - public ExplicitUnaryDenotation(Collection values) { - this.values = new ArrayList<>(values); - } - - @Override - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("unary"); - for (Value value : values) - tree.addChild(value.toLispTree()); - return tree; - } - - protected ListValue cachedValue; - - @Override - public ListValue toValue() { - if (cachedValue != null) return cachedValue; - ListValue result = new ListValue(values); - if (LambdaDCSExecutor.opts.sortResults) - result = result.getSorted(); - cachedValue = result; - return result; - } - - @Override - public String toString() { - return toLispTree().toString(); - } - - @Override - public boolean contains(Object o) { - return values.contains(o); - } - - @Override - public boolean containsAll(Collection c) { - return values.containsAll(c); - } - - @Override - public Iterator iterator() { - return values.iterator(); - } - - @Override - public Object[] toArray() { - return values.toArray(); - } - - @Override public T[] toArray(T[] a) { - return values.toArray(a); - } - - @Override - public int size() { - return values.size(); - } - - @Override - public UnaryDenotation merge(UnaryDenotation that, MergeFormula.Mode mode) { - if (that.size() == Integer.MAX_VALUE) return that.merge(this, mode); - Set merged = new HashSet<>(values); - switch (mode) { - case and: merged.retainAll(that); break; - case or: merged.addAll(that); break; - default: throw new LambdaDCSException(Type.invalidFormula, "Unknown merge mode: %s", mode); - } - return new ExplicitUnaryDenotation(merged); - } - - @Override - public UnaryDenotation aggregate(AggregateFormula.Mode mode) { - if (mode == AggregateFormula.Mode.count) { - // Count the set size, not the list size - return new ExplicitUnaryDenotation(new NumberValue(new HashSet<>(values).size())); - } - return new ExplicitUnaryDenotation(DenotationUtils.aggregate(this, mode)); - } - - @Override - public UnaryDenotation filter(UnaryDenotation upperBound) { - List filtered = new ArrayList<>(); - for (Value value : values) { - if (upperBound.contains(value)) - filtered.add(value); - } - return new ExplicitUnaryDenotation(filtered); - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/InfiniteUnaryDenotation.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/InfiniteUnaryDenotation.java deleted file mode 100644 index 4bb941ee11..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/InfiniteUnaryDenotation.java +++ /dev/null @@ -1,435 +0,0 @@ -package edu.stanford.nlp.sempre.tables.lambdadcs; - -import java.time.YearMonth; -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.DenotationTypeInference; -import edu.stanford.nlp.sempre.tables.InfiniteListValue; -import edu.stanford.nlp.sempre.tables.lambdadcs.LambdaDCSException.Type; -import fig.basic.*; - -/** - * A unary with infinite number of elements such as (>= 4) and * [= anything] - * - * @author ppasupat - */ -public abstract class InfiniteUnaryDenotation extends UnaryDenotation { - public static class Options { - @Option(gloss = "(!= x) only contains things with the same type as x") - public boolean neqMustTypeCheck = true; - } - public static Options opts = new Options(); - - // Default implementation: calls |contains| on all elements of |c| - @Override - public boolean containsAll(Collection c) { - for (Object o : c) { - if (!contains(o)) return false; - } - return true; - } - - @Override - public Iterator iterator() { - throw new LambdaDCSException(Type.infiniteList, "Cannot iterate over an infinite unary"); - } - - @Override - public Object[] toArray() { - throw new LambdaDCSException(Type.infiniteList, "Cannot convert an infinite unary to array"); - } - - @Override public T[] toArray(T[] a) { - throw new LambdaDCSException(Type.infiniteList, "Cannot convert an infinite unary to array"); - } - - @Override - public int size() { - return Integer.MAX_VALUE; - } - - @Override - public UnaryDenotation aggregate(AggregateFormula.Mode mode) { - throw new LambdaDCSException(Type.infiniteList, "Cannot use aggregate mode %s on %s", mode, this); - } - - @Override - public UnaryDenotation filter(UnaryDenotation that) { - return merge(that, MergeFormula.Mode.and); - } - - // Create an InfiniteUnaryDenotation based on the specification - public static InfiniteUnaryDenotation create(String binary, UnaryDenotation second) { - try { - if (ComparisonUnaryDenotation.COMPARATORS.contains(binary)) { - if (second instanceof EverythingUnaryDenotation) { - return (EverythingUnaryDenotation) second; - } else if (second instanceof GenericDateUnaryDenotation) { - if ("<".equals(binary) || ">=".equals(binary)) - return new ComparisonUnaryDenotation(binary, ((GenericDateUnaryDenotation) second).getMin()); - if (">".equals(binary) || "<=".equals(binary)) - return new ComparisonUnaryDenotation(binary, ((GenericDateUnaryDenotation) second).getMax()); - } - return new ComparisonUnaryDenotation(binary, DenotationUtils.getSingleValue(second)); - } - } catch (LambdaDCSException e) { - throw e; - } catch (Exception e) { } - throw new LambdaDCSException(Type.invalidFormula, - "Cannot create an InfiniteUnaryDenotation: binary = %s, second = %s", binary, second); - } - - // ============================================================ - // Everything (*) - // ============================================================ - - static class EverythingUnaryDenotation extends InfiniteUnaryDenotation { - - @Override - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("unary"); - tree.addChild("*"); - return tree; - } - - @Override - public Value toValue() { - return new InfiniteListValue(Arrays.asList("*")); - } - - @Override - public boolean contains(Object o) { - return true; - } - - @Override - public UnaryDenotation merge(UnaryDenotation that, MergeFormula.Mode mode) { - switch (mode) { - case and: return that; - case or: return this; - default: throw new LambdaDCSException(Type.invalidFormula, "Unknown merge mode: %s", mode); - } - } - - } - public static final InfiniteUnaryDenotation STAR_UNARY = new EverythingUnaryDenotation(); - - // ============================================================ - // Comparison - // ============================================================ - - public static class ComparisonUnaryDenotation extends InfiniteUnaryDenotation { - - public static final List COMPARATORS = Arrays.asList("!=", "<", ">", "<=", ">="); - public final String comparator; - public final Value value; - private final DenotationUtils.TypeProcessor valueProcessor; - - public ComparisonUnaryDenotation(String comparator, Value value) { - this.comparator = comparator; - this.value = value; - this.valueProcessor = comparator.equals("!=") ? null : DenotationUtils.getTypeProcessor(value); - } - - @Override - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild(comparator); - tree.addChild(value.toLispTree()); - return tree; - } - - @Override - public Value toValue() { - return new InfiniteListValue(Arrays.asList(comparator, value)); - } - - @Override - public UnaryDenotation merge(UnaryDenotation that, MergeFormula.Mode mode) { - if (that.size() != Integer.MAX_VALUE) { - if (mode == MergeFormula.Mode.and) { - Set filtered = new HashSet<>(); - for (Value value : that) - if (contains(value)) - filtered.add(value); - return new ExplicitUnaryDenotation(filtered); - } - } else if (that instanceof EverythingUnaryDenotation || that instanceof RangeUnaryDenotation) { - return that.merge(this, mode); - } else if (mode == MergeFormula.Mode.and && that instanceof InfiniteUnaryDenotation) { - UnaryDenotation answer = RangeEnds.andMerge(this, (InfiniteUnaryDenotation) that); - if (answer != null) return answer; - } - throw new LambdaDCSException(Type.infiniteList, "Cannot use merge mode %s on %s and %s", mode, this, that); - } - - @Override - public boolean contains(Object o) { - if (!(o instanceof Value)) return false; - Value that = ((Value) o); - if (comparator.equals("!=")) { - if (InfiniteUnaryDenotation.opts.neqMustTypeCheck) { - return !that.equals(value) && DenotationTypeInference.typeCheck(that, value); - } else { - return !that.equals(value); - } - } - if (!valueProcessor.isCompatible(that)) - throw new LambdaDCSException(Type.typeMismatch, "Cannot compare %s with %s", value, that); - int comparison = valueProcessor.compareValues(that, value); - switch (comparator) { - case "<": return comparison < 0; - case ">": return comparison > 0; - case "<=": return comparison <= 0; - case ">=": return comparison >= 0; - default: throw new LambdaDCSException(Type.invalidFormula, "Unknown comparator: %s", comparator); - } - } - - } - - // ============================================================ - // Range - // ============================================================ - - static class RangeEnds { - public final String leftComparator, rightComparator; - public final Value leftValue, rightValue; - - public RangeEnds(String leftComparator, Value leftValue, String rightComparator, Value rightValue) { - this.leftComparator = leftComparator; - this.leftValue = leftValue; - this.rightComparator = rightComparator; - this.rightValue = rightValue; - } - - public static RangeEnds getRangeEnds(InfiniteUnaryDenotation x) { - if (x instanceof RangeUnaryDenotation) { - return ((RangeUnaryDenotation) x).rangeEnds; - } else if (x instanceof ComparisonUnaryDenotation) { - ComparisonUnaryDenotation comparison = (ComparisonUnaryDenotation) x; - String leftComparator = ">", rightComparator = "<"; - Value leftValue = null, rightValue = null; - switch (comparison.comparator) { - case "<=": rightComparator = "<="; - case "<": rightValue = comparison.value; break; - case ">=": leftComparator = ">="; - case ">": leftValue = comparison.value; break; - default: return null; - } - return new RangeEnds(leftComparator, leftValue, rightComparator, rightValue); - } - return null; - } - - // Helper function for performing AND on ComparisonUnaryDenotation or RangeUnaryDenotation - public static UnaryDenotation andMerge(InfiniteUnaryDenotation xDeno, InfiniteUnaryDenotation yDeno) { - RangeEnds x = getRangeEnds(xDeno), y = getRangeEnds(yDeno); - if (x == null || y == null) return null; - String leftComparator, rightComparator; - Value leftValue, rightValue; - int comparison; - // Left - comparison = (x.leftValue == null) ? -1 : (y.leftValue == null) ? +1 : - DenotationUtils.getTypeProcessor(x.leftValue, y.leftValue).compareValues(x.leftValue, y.leftValue); - if (comparison > 0) { - leftValue = x.leftValue; leftComparator = x.leftComparator; - } else if (comparison < 0) { - leftValue = y.leftValue; leftComparator = y.leftComparator; - } else { - leftValue = x.leftValue; leftComparator = (">".equals(x.leftComparator) || ">".equals(y.leftComparator))? ">" : ">="; - } - // Right - comparison = (x.rightValue == null) ? 1 : (y.rightValue == null) ? -1 : - DenotationUtils.getTypeProcessor(x.rightValue, y.rightValue).compareValues(x.rightValue, y.rightValue); - if (comparison < 0) { - rightValue = x.rightValue; rightComparator = x.rightComparator; - } else if (comparison > 0) { - rightValue = y.rightValue; rightComparator = y.rightComparator; - } else { - rightValue = x.rightValue; rightComparator = ("<".equals(x.rightComparator) || "<".equals(y.rightComparator))? "<" : "<="; - } - // Return answer - if (leftValue == null) { - if (rightValue == null) return null; - return new ComparisonUnaryDenotation(rightComparator, rightValue); - } else if (rightValue == null) { - return new ComparisonUnaryDenotation(leftComparator, leftValue); - } else { - comparison = DenotationUtils.getTypeProcessor(leftValue, rightValue).compareValues(leftValue, rightValue); - if (comparison < 0) - return new RangeUnaryDenotation(leftComparator, leftValue, rightComparator, rightValue); - if (comparison == 0 && ">=".equals(leftComparator) && "<=".equals(rightComparator)) - return new ExplicitUnaryDenotation(leftValue); - else - return null; - } - } - } - - public static class RangeUnaryDenotation extends InfiniteUnaryDenotation { - - public final RangeEnds rangeEnds; - private final DenotationUtils.TypeProcessor valueProcessor; - - public RangeUnaryDenotation(String leftComparator, Value leftValue, String rightComparator, Value rightValue) { - this.rangeEnds = new RangeEnds(leftComparator, leftValue, rightComparator, rightValue); - this.valueProcessor = DenotationUtils.getTypeProcessor(leftValue, rightValue); - } - - @Override - public LispTree toLispTree() { - LispTree tree = LispTree.proto.newList(); - tree.addChild("and"); - tree.addChild(LispTree.proto.newList(rangeEnds.leftComparator, rangeEnds.leftValue.toLispTree())); - tree.addChild(LispTree.proto.newList(rangeEnds.rightComparator, rangeEnds.rightValue.toLispTree())); - return tree; - } - - @Override - public Value toValue() { - return new InfiniteListValue(Arrays.asList(rangeEnds.leftComparator, rangeEnds.leftValue, - rangeEnds.rightComparator, rangeEnds.rightValue)); - } - - @Override - public UnaryDenotation merge(UnaryDenotation that, MergeFormula.Mode mode) { - if (that.size() != Integer.MAX_VALUE) { - if (mode == MergeFormula.Mode.and) { - Set filtered = new HashSet<>(); - for (Value value : that) - if (contains(value)) - filtered.add(value); - return new ExplicitUnaryDenotation(filtered); - } - } else if (that instanceof EverythingUnaryDenotation) { - return that.merge(this, mode); - } else if (mode == MergeFormula.Mode.and && that instanceof InfiniteUnaryDenotation) { - UnaryDenotation answer = RangeEnds.andMerge(this, (InfiniteUnaryDenotation) that); - if (answer != null) return answer; - } - throw new LambdaDCSException(Type.infiniteList, "Cannot use merge mode %s on %s and %s", mode, this, that); - } - - @Override - public boolean contains(Object o) { - if (!(o instanceof Value)) return false; - Value that = ((Value) o); - if (!valueProcessor.isCompatible(that)) - throw new LambdaDCSException(Type.typeMismatch, "Cannot compare %s and %s with %s", rangeEnds.leftValue, rangeEnds.rightValue, that); - int comparison = valueProcessor.compareValues(that, rangeEnds.leftValue); - switch (rangeEnds.leftComparator) { - case ">": if (comparison <= 0) return false; break; - case ">=": if (comparison < 0) return false; break; - default: throw new LambdaDCSException(Type.invalidFormula, "Unknown leftComparator: %s", rangeEnds.leftComparator); - } - comparison = valueProcessor.compareValues(that, rangeEnds.rightValue); - switch (rangeEnds.rightComparator) { - case "<": if (comparison >= 0) return false; break; - case "<=": if (comparison > 0) return false; break; - default: throw new LambdaDCSException(Type.invalidFormula, "Unknown rightComparator: %s", rangeEnds.rightComparator); - } - return true; - } - - } - - // ============================================================ - // Generic Date (e.g., (date -1 5 -1) in the formula also matches (date -1 5 12) in knowledge graph) - // ============================================================ - - public static class GenericDateUnaryDenotation extends InfiniteUnaryDenotation { - DateValue date; - - public GenericDateUnaryDenotation(DateValue date) { - if (date.year == -1 && date.month == -1 && date.day == -1) - throw new LambdaDCSException(Type.invalidFormula, "Date cannot be (date -1 -1 -1)"); - this.date = date; - } - - @Override - public LispTree toLispTree() { - return date.toLispTree(); - } - - @Override - public ListValue toValue() { - return new ListValue(Collections.singletonList(date)); - } - - @Override - public UnaryDenotation merge(UnaryDenotation that, MergeFormula.Mode mode) { - if (that.size() != Integer.MAX_VALUE) { - if (mode == MergeFormula.Mode.and) { - Set filtered = new HashSet<>(); - for (Value value : that) - if (contains(value)) - filtered.add(value); - return new ExplicitUnaryDenotation(filtered); - } - } else if (that instanceof EverythingUnaryDenotation) { - return that.merge(this, mode); - } - throw new LambdaDCSException(Type.infiniteList, "Cannot use merge mode %s on %s and %s", mode, this, that); - } - - public DateValue getMin() { - if (date.day != -1) - return date; - if (date.month != -1) - return new DateValue(date.year, date.month, 1); - if (date.year != -1) - return new DateValue(date.year, 1, 1); - throw new LambdaDCSException(Type.unknown, "Invalid date: (-1 -1 -1)."); - } - - public DateValue getMax() { - if (date.day != -1) - return date; - if (date.month != -1) - return new DateValue(date.year, date.month, - YearMonth.of(date.year == -1 ? 2000 : date.year, date.month).lengthOfMonth()); - if (date.year != -1) - return new DateValue(date.year, 12, 31); - throw new LambdaDCSException(Type.unknown, "Invalid date: (-1 -1 -1)."); - } - - @Override - public boolean contains(Object o) { - if (!(o instanceof DateValue)) return false; - DateValue that = (DateValue) o; - return (date.year == -1 || date.year == that.year) && - (date.month == -1 || date.month == that.month) && - (date.day == -1 || date.day == that.day); - } - - /** - * If the provided value is a full date like (date 2015 10 21), return an ExplicitUnaryDenotation object. - * If instead it has a placeholder like (date -1 10 21), return a GenericDateUnaryDenotation object. - */ - public static UnaryDenotation get(DateValue value) { - if (value.year != -1 && value.month != -1 && value.day != -1) - return new ExplicitUnaryDenotation(value); - return new GenericDateUnaryDenotation(value); - } - - } - - // ============================================================ - // Test - // ============================================================ - - public static void main(String[] args) { - //UnaryDenotation x = new RangeUnaryDenotation(">", new NumberValue(2), "<=", new NumberValue(4)); - UnaryDenotation x = new ComparisonUnaryDenotation("<=", new NumberValue(10)); - LogInfo.logs("%s", x); - //UnaryDenotation y = new RangeUnaryDenotation(">=", new NumberValue(3), "<=", new NumberValue(5)); - UnaryDenotation y = new ComparisonUnaryDenotation("<", new NumberValue(4)); - LogInfo.logs("%s", y); - UnaryDenotation z = x.merge(y, MergeFormula.Mode.and); - LogInfo.logs("%s", z); - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/LambdaDCSException.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/LambdaDCSException.java deleted file mode 100644 index 4b9166b401..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/LambdaDCSException.java +++ /dev/null @@ -1,87 +0,0 @@ -package edu.stanford.nlp.sempre.tables.lambdadcs; - -import java.util.*; - -import fig.basic.Option; - -public class LambdaDCSException extends RuntimeException { - public static class Options { - @Option(gloss = "do not generate message to save time") public boolean noErrorMessage = false; - } - public static Options opts = new Options(); - - private static final long serialVersionUID = -9174017483530966223L; - - public enum Type { - - // Unknown formula parameters (e.g., superlative that is not argmax or argmin) - // Should not occur. Otherwise there is a serious bug in the code. - invalidFormula, - - // Trying to perform an operation on unsupported denotations - emptyList, - nonSingletonList, - infiniteList, - - // Type mismatch - typeMismatch, - notUnary, - notBinary, - notMapping, - - // Other errors - unknown, - - // Placeholder (used to represent partial formulas in DPDParser) - placeholder, - - }; - - public final Type type; - public final String message; - - public LambdaDCSException(Type type, String message, Object... args) { - this.type = type; - if (opts.noErrorMessage) - this.message = ""; - else if (args.length == 0) - this.message = message; - else - this.message = String.format(message, args); - } - - @Override - public String toString() { - return "" + type + ": " + message; - } - - private static final Collection UNRECOVERABLE = new ArrayList<>(); - static { - UNRECOVERABLE.add(Type.invalidFormula); - UNRECOVERABLE.add(Type.typeMismatch); - } - - public static boolean isUnrecoverable(String error) { - try { - String typeString = error.substring(0, error.indexOf(":")); - Type type = Type.valueOf(typeString); - return UNRECOVERABLE.contains(type); - } catch (Exception e) { - return false; // Be conservative - } - } - - @Override - public boolean equals(Object o) { - if (!(o instanceof LambdaDCSException)) return false; - LambdaDCSException that = (LambdaDCSException) o; - return type == that.type && - ((message == null && that.message == null) || message.equals(that.message)); - } - - @Override - public int hashCode() { - return type.hashCode() + message.hashCode(); - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/LambdaDCSExecutor.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/LambdaDCSExecutor.java deleted file mode 100644 index 0d468b25a3..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/LambdaDCSExecutor.java +++ /dev/null @@ -1,453 +0,0 @@ -package edu.stanford.nlp.sempre.tables.lambdadcs; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.ScopedFormula; -import edu.stanford.nlp.sempre.tables.ScopedValue; -import edu.stanford.nlp.sempre.tables.TableKnowledgeGraph; -import edu.stanford.nlp.sempre.tables.lambdadcs.LambdaDCSException.Type; -import fig.basic.*; - -/** - * Execute a Formula on the given KnowledgeGraph instance. - * - * @author ppasupat - */ -public class LambdaDCSExecutor extends Executor { - public static class Options { - @Option(gloss = "Verbosity") public int verbose = 0; - @Option(gloss = "Use caching") public boolean useCache = true; - @Option(gloss = "Sort the resulting values (may slow down execution)") - public boolean sortResults = true; - @Option(gloss = "Allow the return value to be an implicit value") - public boolean allowImplicitValues = true; - @Option(gloss = "Allow the root formula to be a binary") - public boolean executeBinary = false; - @Option(gloss = "Generic DateValue: (date -1 5 -1) in formula also matches (date -1 5 12)") - public boolean genericDateValue = false; - @Option(gloss = "If the result is empty, return an ErrorValue instead of an empty ListValue") - public boolean failOnEmptyLists = false; - @Option(gloss = "Return all ties on (argmax 1 1 ...) and (argmin 1 1 ...)") - public boolean superlativesReturnAllTopTies = true; - @Option(gloss = "Aggregates (sum, avg, max, min) throw an error on empty lists") - public boolean aggregatesFailOnEmptyLists = false; - @Option(gloss = "Superlatives (argmax, argmin) throw an error on empty lists") - public boolean superlativesFailOnEmptyLists = false; - @Option(gloss = "Arithmetics (+, -, *, /) throw an error on empty lists") - public boolean arithmeticsFailOnEmptyLists = false; - @Option(gloss = "Arithmetics (+, -, *, /) throw an error when both operants have > 1 values") - public boolean arithmeticsFailOnMultipleElements = true; - } - public static Options opts = new Options(); - - public final Evaluation stats = new Evaluation(); - - @Override - public Response execute(Formula formula, ContextValue context) { - LambdaDCSCoreLogic logic; - if (opts.verbose < 3) { - logic = new LambdaDCSCoreLogic(context, stats); - } else { - logic = new LambdaDCSCoreLogicWithVerbosity(context, stats); - } - StopWatch stopWatch = new StopWatch(); - stopWatch.start(); - formula = Formulas.betaReduction(formula); - Value answer = logic.execute(formula); - stopWatch.stop(); - stats.addCumulative("execTime", stopWatch.ms); - if (stopWatch.ms >= 10 && opts.verbose >= 1) - LogInfo.logs("long time (%d ms): %s => %s", stopWatch.ms, formula, answer); - return new Response(answer); - } - - public void summarize() { - LogInfo.begin_track("LambdaDCSExecutor: summarize"); - stats.logStats("LambdaDCSExecutor"); - LogInfo.end_track(); - } -} - -// ============================================================ -// Execution -// ============================================================ - -/** - * Main logic of Lambda DCS Executor. - * - * Find the denotation of a formula (logical form) with respect to the given knowledge graph. - * - * Assume that the denotation is either a unary or a binary, - * and the final denotation is a unary. - * - * Both unaries and binaries are lists (not sets). - * However, the following formula types will treat them as sets: - * - and, or - * - count (= count the number of distinct values) - * - * Note that (and (!weight (@type @row)) (@p.num (> (number 90)))) may give a wrong answer, - * but it can be rewritten as (!weight (and (@type @row) (weight (@p.num (> (number 90)))))) - * - * @author ppasupat - */ -class LambdaDCSCoreLogic { - - // Note: STAR does not work well with type checking - static final NameValue STAR = new NameValue("*"); - - final KnowledgeGraph graph; - final Evaluation stats; - ExecutorCache cache; - - public LambdaDCSCoreLogic(ContextValue context, Evaluation stats) { - graph = context.graph; - this.stats = stats; - if (graph == null) - throw new RuntimeException("Cannot call LambdaDCSExecutor when context graph is null"); - if (graph instanceof TableKnowledgeGraph) - cache = ((TableKnowledgeGraph) graph).executorCache; - if (cache == null) - cache = ExecutorCache.singleton; - } - - public Value execute(Formula formula) { - if (LambdaDCSExecutor.opts.verbose >= 2) - LogInfo.logs("%s", formula); - Value answer; - // Special case: ScopedFormula - if (formula instanceof ScopedFormula) { - ScopedFormula scoped = (ScopedFormula) formula; - try { - // Head - UnaryDenotation head = (UnaryDenotation) computeUnary(scoped.head, TypeHint.UNRESTRICTED_UNARY); - if (head.size() == Integer.MAX_VALUE) - throw new LambdaDCSException(Type.infiniteList, "Cannot have an infinite head: ", head); - ListValue headValue = ((ListValue) head.toValue()).getUnique(); - // Relation - LambdaFormula lambdaRelation = (LambdaFormula) scoped.relation; - List> collapsedPairs = new ArrayList<>(); - for (Value varValue : headValue.values) { - UnaryDenotation results = (UnaryDenotation) computeUnary(lambdaRelation.body, - TypeHint.UNRESTRICTED_UNARY.withVar(lambdaRelation.var, varValue)); - if (LambdaDCSExecutor.opts.useCache) { - cache.put(graph, - new Pair<>(lambdaRelation.body, new Pair<>(lambdaRelation.var, varValue)), results); - } - if (!results.isEmpty()) - collapsedPairs.add(new Pair<>(varValue, results.toValue())); - } - Value relationValue = new PairListValue(collapsedPairs); - answer = new ScopedValue(headValue, relationValue); - } catch (LambdaDCSException e) { - answer = new ErrorValue(e.toString()); - } - } else { - // Unaries and Binaries - try { - Unarylike denotation = computeUnary(formula, TypeHint.UNRESTRICTED_UNARY); - if (LambdaDCSExecutor.opts.useCache) { - cache.put(graph, formula, denotation); - } - answer = denotation.toValue(); - if (answer instanceof ListValue) { - answer = ((ListValue) answer).getUnique(); - if (LambdaDCSExecutor.opts.failOnEmptyLists && ((ListValue) answer).values.isEmpty()) - answer = ErrorValue.empty; - } - } catch (LambdaDCSException e) { - if (LambdaDCSExecutor.opts.executeBinary && e.type == Type.notUnary) { - try { - Binarylike denotation = computeBinary(formula, TypeHint.UNRESTRICTED_BINARY); - answer = denotation.toValue(); - } catch (LambdaDCSException e2) { - answer = new ErrorValue(e2.toString()); - } - } else { - answer = new ErrorValue(e.toString()); - } - } - } - if (LambdaDCSExecutor.opts.verbose >= 2) - LogInfo.logs("=> %s", answer); - return answer; - } - - public Unarylike computeUnary(Formula formula, UnarylikeTypeHint typeHint) { - assert typeHint != null; - if (formula instanceof LambdaFormula) { - throw new LambdaDCSException(Type.notUnary, "[Unary] Not a unary %s", formula); - } - - if (LambdaDCSExecutor.opts.useCache) { - Object object = cache.get(graph, formula); - if (object != null && object instanceof Unarylike) { - stats.addCumulative("normalCacheHit", true); - stats.addCumulative("scopedCacheHit", false); - return (Unarylike) object; - } else if (typeHint.getIfSingleVar() != null) { - object = cache.get(graph, new Pair<>(formula, typeHint.getIfSingleVar())); - if (object != null && object instanceof Unarylike) { - stats.addCumulative("normalCacheHit", false); - stats.addCumulative("scopedCacheHit", true); - return (Unarylike) object; - } - } - stats.addCumulative("normalCacheHit", false); - stats.addCumulative("scopedCacheHit", false); - } - - if (formula instanceof ValueFormula) { - // ============================================================ - // ValueFormula - // ============================================================ - Value value = ((ValueFormula) formula).value; - if (value instanceof BooleanValue || value instanceof NumberValue || - value instanceof StringValue || value instanceof DateValue || value instanceof NameValue) { - // Special case: * - if (STAR.equals(value)) return InfiniteUnaryDenotation.STAR_UNARY; - // Special case: generic date - if (LambdaDCSExecutor.opts.genericDateValue && value instanceof DateValue) - return typeHint.applyBound(InfiniteUnaryDenotation.GenericDateUnaryDenotation.get((DateValue) value)); - // Rule out binaries - if (CanonicalNames.isBinary(value) && LambdaDCSExecutor.opts.executeBinary) - throw new LambdaDCSException(Type.notUnary, "[Unary] Binary value %s", formula); - if (value instanceof NameValue && graph instanceof TableKnowledgeGraph) - value = ((TableKnowledgeGraph) graph).getNameValueWithOriginalString((NameValue) value); - // Other cases - return typeHint.applyBound(new ExplicitUnaryDenotation(value)); - } - - } else if (formula instanceof VariableFormula) { - // ============================================================ - // Variable - // ============================================================ - String name = ((VariableFormula) formula).name; - Value value = typeHint.get(name); - if (value != null) - return typeHint.applyBound(new ExplicitUnaryDenotation(value)); - // Could be a mapping - if (name.equals(typeHint.getFreeVar())) - return typeHint.applyBound(new MappingDenotation<>(name, PredicatePairList.IDENTITY)); - - } else if (formula instanceof JoinFormula) { - // ============================================================ - // JoinFormula - // ============================================================ - JoinFormula join = (JoinFormula) formula; - try { - // Compute unary, then join binary - Unarylike childD = computeUnary(join.child, typeHint.unrestrictedUnary()); - Binarylike relationD = computeBinary(join.relation, typeHint.asFirstOfBinaryWithSecond(childD.range())); - return typeHint.applyBound(DenotationUtils.genericJoin(relationD, childD)); - } catch (LambdaDCSException e1) { - try { - // Compute binary, then join unary - Binarylike relationD = computeBinary(join.relation, typeHint.asFirstOfBinary()); - Unarylike childUpperBound = relationD.joinOnValue(typeHint.upperBound); - Unarylike childD = computeUnary(join.child, typeHint.restrictedUnary(childUpperBound.range())); - return typeHint.applyBound(DenotationUtils.genericJoin(relationD, childD)); - } catch (LambdaDCSException e2) { - Type errorType = (e1.type == e2.type) ? e1.type : Type.unknown; - throw new LambdaDCSException(errorType, "Cannot join | %s | %s", e1, e2); - } - } - - } else if (formula instanceof MergeFormula) { - // ============================================================ - // Merge - // ============================================================ - MergeFormula merge = (MergeFormula) formula; - try { - Unarylike child1D = computeUnary(merge.child1, typeHint); - Unarylike child2D = computeUnary(merge.child2, - merge.mode == MergeFormula.Mode.and ? typeHint.restrict(child1D) : typeHint); - return typeHint.applyBound(DenotationUtils.merge(child1D, child2D, merge.mode)); - } catch (LambdaDCSException e1) { - try { - Unarylike child2D = computeUnary(merge.child2, typeHint); - Unarylike child1D = computeUnary(merge.child1, - merge.mode == MergeFormula.Mode.and ? typeHint.restrict(child2D) : typeHint); - return typeHint.applyBound(DenotationUtils.merge(child2D, child1D, merge.mode)); - } catch (LambdaDCSException e2) { - Type errorType = (e1.type == e2.type) ? e1.type : Type.unknown; - throw new LambdaDCSException(errorType, "Cannot merge | %s | %s", e1, e2); - } - } - - } else if (formula instanceof AggregateFormula) { - // ============================================================ - // Aggregate - // ============================================================ - AggregateFormula aggregate = (AggregateFormula) formula; - Unarylike childD = computeUnary(aggregate.child, typeHint.unrestrictedUnary()); - return typeHint.applyBound(childD.aggregate(aggregate.mode)); - - } else if (formula instanceof SuperlativeFormula) { - // ============================================================ - // Superlative - // ============================================================ - SuperlativeFormula superlative = (SuperlativeFormula) formula; - int rank = DenotationUtils.getSinglePositiveInteger( - computeUnary(superlative.rank, typeHint.unrestrictedUnary()).range()); - int count = DenotationUtils.getSinglePositiveInteger( - computeUnary(superlative.count, typeHint.unrestrictedUnary()).range()); - if (rank != 1 || count != 1) { - LogInfo.logs("Superlative WTF: %s | rank %d | count %d", formula, rank, count); - } - Unarylike headD = computeUnary(superlative.head, typeHint); - Binarylike relationD; - if (superlative.relation instanceof ReverseFormula) { - relationD = computeBinary(((ReverseFormula) superlative.relation).child, - typeHint.restrictedBinary(null, headD.range())); - } else { - relationD = computeBinary(superlative.relation, - typeHint.restrictedBinary(headD.range(), null)).reverse(); - } - return typeHint.applyBound(DenotationUtils.superlative(rank, count, headD, relationD, superlative.mode)); - - } else if (formula instanceof ArithmeticFormula) { - // ============================================================ - // Arithmetic - // ============================================================ - ArithmeticFormula arithmetic = (ArithmeticFormula) formula; - Unarylike child1D = computeUnary(arithmetic.child1, typeHint.unrestrictedUnary()); - Unarylike child2D = computeUnary(arithmetic.child2, typeHint.unrestrictedUnary()); - return typeHint.applyBound(DenotationUtils.arithmetic(child1D, child2D, arithmetic.mode)); - - } else if (formula instanceof MarkFormula) { - // ============================================================ - // Mark - // ============================================================ - MarkFormula mark = (MarkFormula) formula; - LambdaFormula lambda = new LambdaFormula(mark.var, - new MergeFormula(MergeFormula.Mode.and, new VariableFormula(mark.var), mark.body)); - Binarylike lambdaD = computeBinary(lambda, typeHint.asFirstAndSecondOfBinary()); - return lambdaD.joinOnValue(InfiniteUnaryDenotation.STAR_UNARY); - - } else { - throw new LambdaDCSException(Type.notUnary, "[Unary] Not a valid unary %s", formula); - } - - // Catch-all error - throw new LambdaDCSException(Type.unknown, "[Unary] Cannot handle formula %s", formula); - } - - public Binarylike computeBinary(Formula formula, BinaryTypeHint typeHint) { - assert typeHint != null; - if (formula instanceof ValueFormula) { - // ============================================================ - // ValueFormula - // ============================================================ - Value value = ((ValueFormula) formula).value; - // Must be a binary - if (CanonicalNames.isBinary(value)) - return new BinaryDenotation<>(new PredicatePairList(value, graph)); - else - throw new LambdaDCSException(Type.notBinary, "[Binary] Unary value %s", formula); - - } else if (formula instanceof ReverseFormula) { - // ============================================================ - // Reverse - // ============================================================ - ReverseFormula reverse = (ReverseFormula) formula; - Binarylike childD = computeBinary(reverse.child, typeHint.reverse()); - return childD.reverse(); - - } else if (formula instanceof LambdaFormula) { - // ============================================================ - // Lambda - // ============================================================ - // Note: The variable's values become the SECOND argument of the binary pairs - LambdaFormula lambda = (LambdaFormula) formula; - String var = lambda.var; - // Assuming that the type hint has enough information ... - try { - List> pairs = new ArrayList<>(); - for (Value varValue : typeHint.secondUpperBound) { - Unarylike results = computeUnary(lambda.body, typeHint.first().withVar(var, varValue)); - if (!(results instanceof UnaryDenotation)) - throw new LambdaDCSException(Type.notUnary, "Not a unary denotation: %s", results); - for (Value result : (UnaryDenotation) results) { - pairs.add(new Pair<>(result, varValue)); - } - } - return new BinaryDenotation<>(new ExplicitPairList(pairs)); - } catch (LambdaDCSException e) { } - // Try the reverse - try { - Formula reversed = Formulas.reverseFormula(lambda); - if (reversed instanceof LambdaFormula && !reversed.equals(lambda)) { - List> pairs = new ArrayList<>(); - for (Value varValue : typeHint.firstUpperBound) { - Unarylike results = computeUnary(((LambdaFormula) reversed).body, - typeHint.second().withVar(var, varValue)); - if (!(results instanceof UnaryDenotation)) - throw new LambdaDCSException(Type.notUnary, "Not a unary denotation: %s", results); - for (Value result : (UnaryDenotation) results) { - pairs.add(new Pair<>(varValue, result)); - } - } - return new BinaryDenotation<>(new ExplicitPairList(pairs)); - } else { - throw new LambdaDCSException(Type.unknown, "Cannot compute reverse of %s", lambda); - } - } catch (LambdaDCSException e) { } - // Try to execute using a mapping. - if (LambdaDCSExecutor.opts.executeBinary) { - try { - Unarylike mapping = computeUnary(lambda.body, typeHint.asMapping(lambda.var)); - if (mapping instanceof MappingDenotation) - return ((MappingDenotation) mapping).asBinary(); - } catch (LambdaDCSException e) { } - } - - } else { - throw new LambdaDCSException(Type.notBinary, "[Binary] Not a valid binary %s", formula); - } - - // Catch-all error - throw new LambdaDCSException(Type.unknown, "[Binary] Cannot handle formula %s", formula); - } - -} - -// ============================================================ -// Debug Print -// ============================================================ - -class LambdaDCSCoreLogicWithVerbosity extends LambdaDCSCoreLogic { - - public LambdaDCSCoreLogicWithVerbosity(ContextValue context, Evaluation stats) { - super(context, stats); - } - - @Override - public Unarylike computeUnary(Formula formula, UnarylikeTypeHint typeHint) { - LogInfo.begin_track("UNARY %s [%s]", formula, typeHint); - try { - Unarylike denotation = super.computeUnary(formula, typeHint); - LogInfo.logs("%s", denotation); - LogInfo.end_track(); - return denotation; - } catch (Exception e) { - LogInfo.end_track(); - throw e; - } - } - - @Override - public Binarylike computeBinary(Formula formula, BinaryTypeHint typeHint) { - LogInfo.begin_track("BINARY %s [%s]", formula, typeHint); - try { - Binarylike denotation = super.computeBinary(formula, typeHint); - LogInfo.logs("%s", denotation); - LogInfo.end_track(); - return denotation; - } catch (Exception e) { - LogInfo.end_track(); - throw e; - } - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/LambdaDCSExecutorTest.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/LambdaDCSExecutorTest.java deleted file mode 100644 index 3cbe95d021..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/LambdaDCSExecutorTest.java +++ /dev/null @@ -1,273 +0,0 @@ -package edu.stanford.nlp.sempre.tables.lambdadcs; - -import java.util.*; - -import fig.basic.*; -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.corenlp.CoreNLPAnalyzer; -import edu.stanford.nlp.sempre.tables.TableKnowledgeGraph; - -import org.testng.annotations.Test; - -/** - * Test LambdaDCSExecutor: Execute a Formula on a given context graph. - * - * @author ppasupat - */ -public class LambdaDCSExecutorTest { - - // ============================================================ - // Value Checker (copied from SparqlExectutorTest) - // ============================================================ - - public static abstract class ValuesChecker { - void checkValue(Value value) { - if (value instanceof ListValue) - checkList(new HashSet<>(((ListValue) value).values)); - else if (value instanceof PairListValue) - checkPairList(new HashSet<>(((PairListValue) value).pairs)); - else - throw new RuntimeException("The answer is not a ListValue or a PairListValue."); - } - void checkList(Collection values) { - // Override this - throw new RuntimeException("Got a ListValue; expected something else: " + values); - } - void checkPairList(Collection> pairs) { - // Override this - throw new RuntimeException("Got a PairListValue; expected something else: " + pairs); - } - } - - public static ValuesChecker size(final int expectedNumResults) { - return new ValuesChecker() { - public void checkList(Collection values) { - if (values.size() != expectedNumResults) - throw new RuntimeException("Expected " + expectedNumResults + " results, but got " + values.size() + ": " + values); - } - public void checkPairList(Collection> pairs) { - if (pairs.size() != expectedNumResults) - throw new RuntimeException("Expected " + expectedNumResults + " results, but got " + pairs.size() + ": " + pairs); - } - }; - } - - public static ValuesChecker sizeAtLeast(final int expectedNumResults) { - return new ValuesChecker() { - public void checkList(Collection values) { - if (values.size() < expectedNumResults) - throw new RuntimeException("Expected at least " + expectedNumResults + " results, but got " + values.size() + ": " + values); - } - public void checkPairList(Collection> pairs) { - if (pairs.size() < expectedNumResults) - throw new RuntimeException("Expected at least " + expectedNumResults + " results, but got " + pairs.size() + ": " + pairs); - } - }; - } - - public static ValuesChecker matches(String expected) { - final Value expectedValue = Value.fromString(expected); - return new ValuesChecker() { - public void checkList(Collection values) { - if (values.size() != 1 || !(new ArrayList<>(values)).get(0).equals(expectedValue)) - throw new RuntimeException("Expected " + expectedValue + ", but got " + values); - } - }; - } - - public static ValuesChecker matchesAll(String... expected) { - final List expectedValues = new ArrayList<>(); - for (String x : expected) expectedValues.add(Value.fromString(x)); - return new ValuesChecker() { - public void checkList(Collection values) { - if (values.size() != expectedValues.size() || !expectedValues.containsAll(values)) - throw new RuntimeException("Expected " + new ListValue(expectedValues) + ", but got " + values); - } - }; - } - - public static ValuesChecker regexMatches(final String expectedPattern) { - return new ValuesChecker() { - public void checkList(Collection values) { - if (values.size() != 1 || !(new ArrayList<>(values)).get(0).toString().matches(expectedPattern)) - throw new RuntimeException("Expected " + expectedPattern + ", but got " + values); - } - }; - } - - // ============================================================ - // Processing - // ============================================================ - - LambdaDCSExecutor executor = new LambdaDCSExecutor(); - - protected static void runFormula(LambdaDCSExecutor executor, String formula, KnowledgeGraph graph) { - runFormula(executor, formula, graph, sizeAtLeast(0)); - } - - protected static void runFormula(LambdaDCSExecutor executor, String formula, KnowledgeGraph graph, ValuesChecker checker) { - ContextValue context = new ContextValue(graph); - LambdaDCSExecutor.opts.verbose = 5; - LambdaDCSExecutor.opts.executeBinary = true; - LogInfo.begin_track("formula: %s", formula); - Executor.Response response = executor.execute(Formulas.fromLispTree(LispTree.proto.parseFromString(formula)), context); - LogInfo.logs("RESULT: %s", response.value); - LogInfo.end_track(); - if (checker != null) - checker.checkValue(response.value); - } - - protected static KnowledgeGraph getKnowledgeGraph(String name) { - if ("simple".equals(name)) { - return KnowledgeGraph.fromLispTree(LispTree.proto.parseFromString( - "(graph NaiveKnowledgeGraph ((number 1) (number 2) (number 3)))")); - } else if ("prez".equals(name)) { - return KnowledgeGraph.fromLispTree(LispTree.proto.parseFromString( - "(graph NaiveKnowledgeGraph " + - "(fb:en.barack_obama fb:people.person.place_of_birth fb:en.honolulu)" + - "(fb:en.barack_obama fb:people.person.profession fb:en.politician)" + - "(fb:en.barack_obama fb:people.person.weight_kg (number 82))" + - "(fb:en.george_w_bush fb:people.person.place_of_birth fb:en.new_haven)" + - "(fb:en.george_w_bush fb:people.person.profession fb:en.politician)" + - "(fb:en.george_w_bush fb:people.person.weight_kg (number 86))" + - "(fb:en.bill_clinton fb:people.person.place_of_birth fb:en.hope_arkansas)" + - "(fb:en.bill_clinton fb:people.person.profession fb:en.lawyer)" + - "(fb:en.bill_clinton fb:people.person.profession fb:en.politician)" + - "(fb:en.bill_clinton fb:people.person.weight_kg (number 100))" + - "(fb:en.nicole_kidman fb:people.person.place_of_birth fb:en.honolulu)" + - "(fb:en.nicole_kidman fb:people.person.profession fb:en.actor)" + - "(fb:en.nicole_kidman fb:people.person.weight_kg (number 58))" + - "(fb:en.morgan_freeman fb:people.person.place_of_birth fb:en.memphis)" + - "(fb:en.morgan_freeman fb:people.person.profession fb:en.actor)" + - "(fb:en.morgan_freeman fb:people.person.weight_kg (number 91))" + - "(fb:en.ronald_reagan fb:people.person.place_of_birth fb:en.tampico)" + - "(fb:en.ronald_reagan fb:people.person.profession fb:en.politician)" + - "(fb:en.ronald_reagan fb:people.person.weight_kg (number 82))" + - "(fb:en.honolulu fb:location.location.containedby fb:en.hawaii)" + - "(fb:en.memphis fb:location.location.containedby fb:en.tennessee)" + - "(fb:en.new_haven fb:location.location.containedby fb:en.connecticut)" + - "(fb:en.hope_arkansas fb:location.location.containedby fb:en.arkansas)" + - "(fb:en.tampico fb:location.location.containedby fb:en.illinois)" + - ")")); - } else if ("csv".equals(name)) { - return TableKnowledgeGraph.fromFilename("tables/toy-examples/nikos_machlas.csv"); - } else if ("csv2".equals(name)) { - return TableKnowledgeGraph.fromFilename("tables/toy-examples/204-495.tsv"); - } else if ("csv3".equals(name)) { - return TableKnowledgeGraph.fromFilename("tables/toy-examples/203-839.tsv"); - } - throw new RuntimeException("Unknown graph name: " + name); - } - - // ============================================================ - // Actual Tests - // ============================================================ - - @Test(groups = "lambdaSimple") public void lambdaOnGraphDummyTest() { - KnowledgeGraph graph = getKnowledgeGraph("simple"); - runFormula(executor, "(number 3)", graph, matches("(number 3)")); - } - - @Test(groups = "lambdaPrez") public void lambdaOnGraphBasicTest() { - KnowledgeGraph graph = getKnowledgeGraph("prez"); - runFormula(executor, "(fb:people.person.place_of_birth fb:en.honolulu)", - graph, matchesAll("(name fb:en.barack_obama)", "(name fb:en.nicole_kidman)")); - runFormula(executor, "(!fb:people.person.place_of_birth (fb:people.person.place_of_birth fb:en.honolulu))", - graph, matches("(name fb:en.honolulu)")); - runFormula(executor, "(!fb:people.person.place_of_birth fb:en.barack_obama)", - graph, matches("(name fb:en.honolulu)")); - runFormula(executor, "(and (fb:people.person.place_of_birth fb:en.honolulu) (fb:people.person.profession fb:en.actor))", - graph, matches("(name fb:en.nicole_kidman)")); - runFormula(executor, "(or (fb:people.person.place_of_birth fb:en.honolulu) (fb:people.person.profession fb:en.actor))", - graph, matchesAll("(name fb:en.barack_obama)", "(name fb:en.nicole_kidman)", "(name fb:en.morgan_freeman)")); - runFormula(executor, "(count (or (fb:people.person.place_of_birth fb:en.honolulu) (fb:people.person.profession fb:en.actor)))", - graph, matches("(number 3)")); - } - - @Test(groups = "lambdaPrez") public void lambdaOnGraphInfiniteTest() { - KnowledgeGraph graph = getKnowledgeGraph("prez"); - runFormula(executor, "(!fb:people.person.place_of_birth *)", - graph, size(5)); - runFormula(executor, "(and * (fb:people.person.place_of_birth fb:en.honolulu))", - graph, matchesAll("(name fb:en.barack_obama)", "(name fb:en.nicole_kidman)")); - runFormula(executor, "(max (!fb:people.person.weight_kg *))", - graph, matches("(number 100)")); - runFormula(executor, "(sum (!fb:people.person.weight_kg *))", - graph, matches("(number 499)")); - runFormula(executor, "(argmax 1 1 * fb:people.person.weight_kg)", - graph, matches("(name fb:en.bill_clinton)")); - runFormula(executor, "(fb:people.person.weight_kg (> (number 95)))", - graph, matches("(name fb:en.bill_clinton)")); - runFormula(executor, "(fb:people.person.weight_kg (!= (!fb:people.person.weight_kg fb:en.barack_obama)))", - graph, size(4)); - runFormula(executor, "(fb:people.person.weight_kg ((reverse >) (number 82)))", - graph, matchesAll("(name fb:en.barack_obama)", "(name fb:en.ronald_reagan)", "(name fb:en.nicole_kidman)")); - runFormula(executor, "(fb:people.person.weight_kg (and (< (number 100)) (> (number 90))))", - graph, matches("(name fb:en.morgan_freeman)")); - } - - @Test(groups = "lambdaPrez") public void lambdaOnGraphLambdaTest() { - KnowledgeGraph graph = getKnowledgeGraph("prez"); - runFormula(executor, "((lambda x (fb:people.person.place_of_birth (var x))) fb:en.honolulu)", - graph, matchesAll("(name fb:en.barack_obama)", "(name fb:en.nicole_kidman)")); - runFormula(executor, "(argmax 1 1 (fb:location.location.containedby *) (reverse (lambda x (count (fb:people.person.place_of_birth (var x))))))", - graph, matches("(name fb:en.honolulu)")); - runFormula(executor, "(and (!fb:people.person.place_of_birth *) ((reverse (lambda x (fb:people.person.place_of_birth (var x)))) fb:en.barack_obama))", - graph, matches("(name fb:en.honolulu)")); - runFormula(executor, "(and ((reverse (lambda x (fb:people.person.place_of_birth (var x)))) fb:en.barack_obama) (!fb:people.person.place_of_birth *))", - graph, matches("(name fb:en.honolulu)")); - runFormula(executor, "((reverse (lambda x (fb:people.person.place_of_birth (var x)))) fb:en.barack_obama)", - graph, matches("(name fb:en.honolulu)")); - } - - @Test(groups = "floating") public void lambdaOnGraphFloatingLambdaTest() { - KnowledgeGraph graph = getKnowledgeGraph("prez"); - runFormula(executor, "(lambda x (fb:people.person.place_of_birth (var x)))", graph); - runFormula(executor, "(lambda x ((reverse fb:people.person.place_of_birth) (var x)))", graph); - runFormula(executor, "(lambda x (count ((reverse fb:people.person.place_of_birth) (var x))))", graph); - } - - @Test(groups = "lambdaCSV") public void lambdaOnGraphCSVTest() { - KnowledgeGraph graph = getKnowledgeGraph("csv"); - runFormula(executor, "(number 3)", graph, matches("(number 3)")); - runFormula(executor, "(!fb:row.row.score (fb:row.row.opponent fb:cell_opponent.austria))", - graph, matches("(name fb:cell_score.1_2)")); - runFormula(executor, "(count (fb:row.row.result fb:cell_result.win))", - graph, matches("(number 16)")); - // Depending on tie-breaking, one of these will be correct - try { - // Return all top ties - runFormula(executor, "(argmax 1 1 (!fb:row.row.opponent (fb:type.object.type fb:type.row)) " - + "(reverse (lambda x (count (fb:row.row.opponent (var x))))))", - graph, size(5)); - } catch (Exception e) { - // Return only one item - runFormula(executor, "(count (fb:row.row.opponent (argmax 1 1 (!fb:row.row.opponent (fb:type.object.type fb:type.row)) " - + "(reverse (lambda x (count (fb:row.row.opponent (var x))))))))", - graph, matches("(number 2)")); - } - } - - @Test(groups = "lambdaCSV2") public void lambdaOnGraphCSV2Test() { - KnowledgeGraph graph = getKnowledgeGraph("csv2"); - runFormula(executor, - "(and (!= (and (!= fb:cell_venue.away) fb:cell_venue.home)) ((reverse fb:row.row.opponent) (fb:row.row.index (- (number 2) (number 1)))))", - graph, matches("(name fb:cell_opponent.derby_county)")); - } - - @Test(groups = "lambdaCSV3") public void lambdaOnGraphCSV3Test() { - LanguageAnalyzer.setSingleton(new CoreNLPAnalyzer()); - KnowledgeGraph graph = getKnowledgeGraph("csv3"); - runFormula(executor, - "(count (fb:type.object.type fb:type.row))", - graph, matches("(number 21)")); - runFormula(executor, - "(count (fb:row.row.opened (fb:cell.cell.date (< (date 1926 -1 -1)))))", - graph, matches("(number 6)")); - runFormula(executor, - "(- (number 1926) (argmax (number 1) (number 1) ((reverse fb:cell.cell.number) " - + "(or (or (or fb:cell_closed.1920 fb:cell_closed.1925) fb:cell_opened.1926) fb:cell_closed.1946)) " - + "(reverse (lambda x (sum ((reverse fb:cell.cell.number) (fb:cell.cell.number (var x))))))))", - graph, matches("(number 6)")); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/MappingDenotation.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/MappingDenotation.java deleted file mode 100644 index d35a269486..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/MappingDenotation.java +++ /dev/null @@ -1,60 +0,0 @@ -package edu.stanford.nlp.sempre.tables.lambdadcs; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.AggregateFormula.Mode; -import edu.stanford.nlp.sempre.tables.lambdadcs.LambdaDCSException.Type; -import fig.basic.LispTree; - -/** - * Mapping denotation: a mapping from variable assignment to values. - * - * Share the implementation with BinaryDenotation by using PairList. - * - * @author ppasupat - */ -public class MappingDenotation implements Unarylike { - - protected final String domainVar; - protected final PL pairList; - - protected MappingDenotation(String domainVar, PL pairList) { - this.domainVar = domainVar; - this.pairList = pairList; - } - - @Override - public String toString() { - return toLispTree().toString(); - } - - @Override - public LispTree toLispTree() { - return LispTree.proto.L("mapping", domainVar, pairList.toLispTree()); - } - - @Override - public ListValue toValue() { - throw new LambdaDCSException(Type.notUnary, "Expected Unary; Mapping found: %s", this); - } - - @Override - public String getDomainVar() { - return domainVar; - } - - public BinaryDenotation asBinary() { - return new BinaryDenotation(pairList); - } - - @Override public UnaryDenotation domain() { return pairList.domain(); } - @Override public UnaryDenotation range() { return pairList.range(); } - @Override public UnaryDenotation get(Value key) { return pairList.get(key); } - @Override public UnaryDenotation inverseGet(Value value) { return pairList.inverseGet(value); } - @Override public Unarylike aggregate(Mode mode) { - return new MappingDenotation<>(domainVar, pairList.aggregate(mode)); - } - @Override public Unarylike filter(UnaryDenotation upperBound, UnaryDenotation domainUpperBound) { - return new MappingDenotation<>(domainVar, pairList.filter(upperBound, domainUpperBound)); - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/PairList.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/PairList.java deleted file mode 100644 index d1adca9beb..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/PairList.java +++ /dev/null @@ -1,37 +0,0 @@ -package edu.stanford.nlp.sempre.tables.lambdadcs; - -import edu.stanford.nlp.sempre.*; -import fig.basic.LispTree; - -public interface PairList { - - // ============================================================ - // Representation - // ============================================================ - - public String toString(); - public LispTree toLispTree(); - public PairListValue toValue(); - - // ============================================================ - // Getter - // ============================================================ - - public UnaryDenotation domain(); - public UnaryDenotation range(); - public UnaryDenotation get(Value key); - public UnaryDenotation inverseGet(Value value); - - // ============================================================ - // Operations - // ============================================================ - - public PairList aggregate(AggregateFormula.Mode mode); - public PairList filter(UnaryDenotation upperBound, UnaryDenotation domainUpperBound); - public PairList reverse(); - public UnaryDenotation joinOnKey(UnaryDenotation keys); - public UnaryDenotation joinOnValue(UnaryDenotation values); - public ExplicitPairList explicitlyFilterOnKey(UnaryDenotation keys); - public ExplicitPairList explicitlyFilterOnValue(UnaryDenotation values); - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/PredicatePairList.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/PredicatePairList.java deleted file mode 100644 index 6b3df58ac4..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/PredicatePairList.java +++ /dev/null @@ -1,256 +0,0 @@ -package edu.stanford.nlp.sempre.tables.lambdadcs; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.AggregateFormula.Mode; -import edu.stanford.nlp.sempre.tables.lambdadcs.LambdaDCSException.Type; -import fig.basic.LispTree; -import fig.basic.Pair; - -/** - * Implicitly represent a pair list using a single predicate: - * - NORMAL: a relation (fb:people.person.parent) or its reverse - * - COMPARISON: != < > <= >= - * - COLON: (: u) is an empty unary if u is empty; STAR otherwise. - * - EQUAL: identity map - * @author ppasupat - * - */ -public class PredicatePairList implements PairList { - - enum PredicateType { NORMAL, COMPARISON, COLON, EQUAL } - protected final PredicateType type; - - // predicate fb:people.person.birthdate maps key (date 1961 8 4) to value fb:en.barack_obama - // predicate > maps key 3 to value 7 - // predicate : maps any non-empty key to everything (*) - // predicate = maps any key to itself - protected final Value predicate; - protected final String predicateId; - protected final KnowledgeGraph graph; - - // ============================================================ - // Constructors - // ============================================================ - - public PredicatePairList(Value predicate, KnowledgeGraph graph) { - this.predicate = predicate; - this.predicateId = (predicate instanceof NameValue) ? ((NameValue) predicate).id : null; - this.graph = graph; - if ("=".equals(predicateId)) { - type = PredicateType.EQUAL; - } else if (CanonicalNames.COLON.equals(predicateId)) { - type = PredicateType.COLON; - } else if (CanonicalNames.COMPARATORS.contains(predicateId)) { - type = PredicateType.COMPARISON; - } else { - type = PredicateType.NORMAL; - assert graph != null; - } - } - - public static final PredicatePairList IDENTITY = new PredicatePairList(new NameValue("="), null); - - // ============================================================ - // Representation - // ============================================================ - - @Override - public String toString() { - return toLispTree().toString(); - } - - @Override - public LispTree toLispTree() { - return predicate.toLispTree(); - } - - @Override - public PairListValue toValue() { - return explicitlyFilterOnKey(InfiniteUnaryDenotation.STAR_UNARY).toValue(); - } - - // ============================================================ - // Getter - // ============================================================ - - // If needed, the explicit list of pairs are computed and cached - protected ExplicitPairList explicitPairListCache; - - protected ExplicitPairList getExplicit() { - if (explicitPairListCache == null) - explicitPairListCache = explicitlyFilterOnKey(InfiniteUnaryDenotation.STAR_UNARY); - return explicitPairListCache; - } - - @Override - public UnaryDenotation domain() { - switch (type) { - case EQUAL: case COLON: case COMPARISON: - return InfiniteUnaryDenotation.STAR_UNARY; - default: - return getExplicit().domain(); - } - } - - @Override - public UnaryDenotation range() { - switch (type) { - case EQUAL: case COLON: case COMPARISON: - return InfiniteUnaryDenotation.STAR_UNARY; - default: - return getExplicit().range(); - } - } - - @Override - public UnaryDenotation get(Value key) { - switch (type) { - case EQUAL: - return new ExplicitUnaryDenotation(key); - case COLON: - return InfiniteUnaryDenotation.STAR_UNARY; - case COMPARISON: - return new InfiniteUnaryDenotation.ComparisonUnaryDenotation(predicateId, key); - default: - return getExplicit().get(key); - } - } - - @Override - public UnaryDenotation inverseGet(Value value) { - switch (type) { - case EQUAL: - return new ExplicitUnaryDenotation(value); - case COLON: - throw new LambdaDCSException(Type.invalidFormula, "Cannot perform inverseGet on COLON"); - case COMPARISON: - return new InfiniteUnaryDenotation.ComparisonUnaryDenotation( - CanonicalNames.COMPARATOR_REVERSE.get(predicateId), value); - default: - return getExplicit().inverseGet(value); - } - } - - // ============================================================ - // Operations - // ============================================================ - - @Override - public PairList aggregate(Mode mode) { - switch (type) { - case EQUAL: - return this; - case COLON: case COMPARISON: - throw new LambdaDCSException(Type.infiniteList, "Cannot call aggregate on %s", this); - default: - return getExplicit().aggregate(mode); - } - } - - @Override - public PairList filter(UnaryDenotation upperBound, UnaryDenotation domainUpperBound) { - return explicitlyFilter(upperBound, domainUpperBound); - } - - @Override - public PairList reverse() { - switch (type) { - case EQUAL: - return this; - case COLON: - throw new LambdaDCSException(Type.invalidFormula, "Cannot perform reverse on COLON"); - default: - return new PredicatePairList(CanonicalNames.reverseProperty(predicate), graph); - } - } - - @Override - public UnaryDenotation joinOnKey(UnaryDenotation keys) { - switch (type) { - case EQUAL: - return keys; - case COLON: - return keys.isEmpty() ? UnaryDenotation.EMPTY : InfiniteUnaryDenotation.STAR_UNARY; - case COMPARISON: - return InfiniteUnaryDenotation.create(predicateId, keys); - default: - return new ExplicitUnaryDenotation(graph.joinSecond(predicate, keys)); - } - } - - @Override - public UnaryDenotation joinOnValue(UnaryDenotation values) { - switch (type) { - case EQUAL: - return values; - case COLON: - throw new LambdaDCSException(Type.invalidFormula, "Cannot perform reverse on COLON"); - case COMPARISON: - return InfiniteUnaryDenotation.create(CanonicalNames.COMPARATOR_REVERSE.get(predicateId), values); - default: - return new ExplicitUnaryDenotation(graph.joinFirst(predicate, values)); - } - } - - @Override - public ExplicitPairList explicitlyFilterOnKey(UnaryDenotation keys) { - switch (type) { - case EQUAL: - if (keys.size() == Integer.MAX_VALUE) - throw new LambdaDCSException(Type.infiniteList, "Cannot call explicitlyFilter* on %s", this); - List> pairs = new ArrayList<>(); - for (Value x : keys) pairs.add(new Pair<>(x, x)); - return new ExplicitPairList(pairs); - case COLON: case COMPARISON: - throw new LambdaDCSException(Type.infiniteList, "Cannot call explicitlyFilter* on %s", this); - default: - return new ExplicitPairList(graph.filterSecond(predicate, keys)); - } - } - - @Override - public ExplicitPairList explicitlyFilterOnValue(UnaryDenotation values) { - switch (type) { - case EQUAL: - if (values.size() == Integer.MAX_VALUE) - throw new LambdaDCSException(Type.infiniteList, "Cannot call explicitlyFilter* on %s", this); - List> pairs = new ArrayList<>(); - for (Value x : values) pairs.add(new Pair<>(x, x)); - return new ExplicitPairList(pairs); - case COLON: case COMPARISON: - throw new LambdaDCSException(Type.infiniteList, "Cannot call explicitlyFilter* on %s", this); - default: - return new ExplicitPairList(graph.filterFirst(predicate, values)); - } - } - - public ExplicitPairList explicitlyFilter(UnaryDenotation values, UnaryDenotation keys) { - List> pairs = new ArrayList<>(); - switch (type) { - case EQUAL: - UnaryDenotation domain = values.merge(keys, MergeFormula.Mode.and); - if (domain.size() == Integer.MAX_VALUE) - throw new LambdaDCSException(Type.infiniteList, "Cannot call explicitlyFilter* on %s", this); - for (Value x : domain) pairs.add(new Pair<>(x, x)); - return new ExplicitPairList(pairs); - case COLON: case COMPARISON: - throw new LambdaDCSException(Type.infiniteList, "Cannot call explicitlyFilter* on %s", this); - default: - try { - for (Pair pair : graph.filterSecond(predicate, keys)) - if (values.contains(pair.getFirst())) pairs.add(pair); - return new ExplicitPairList(pairs); - } catch (LambdaDCSException e) { - try { - for (Pair pair : graph.filterFirst(predicate, values)) - if (keys.contains(pair.getSecond())) pairs.add(pair); - return new ExplicitPairList(pairs); - } catch (LambdaDCSException e2) { - throw new LambdaDCSException(Type.infiniteList, "Cannot call explicitlyFilter* on %s", this); - } - } - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/TypeHint.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/TypeHint.java deleted file mode 100644 index c4ebed23aa..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/TypeHint.java +++ /dev/null @@ -1,140 +0,0 @@ -package edu.stanford.nlp.sempre.tables.lambdadcs; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.lambdadcs.LambdaDCSException.Type; -import fig.basic.Pair; - -/** - * Impose some constraints on the possible denotation of a Formula. - * - * TypeHint is immutable, but one can create a new TypeHint object using the information - * from the original TypeHint. - * - * @author ppasupat - */ -public abstract class TypeHint { - - /** - * Immutable map from variable name to its value. - * At most one variable can be a free variable. - * The scope (domain) of the free variable can also be specified as a formula. - */ - protected static class VariableMap { - - protected final Map mapping; - public final String freeVar; - - public VariableMap() { - mapping = new HashMap<>(); - freeVar = null; - } - - private VariableMap(Map mapping, String freeVar) { - this.mapping = new HashMap<>(mapping); - this.freeVar = freeVar; - } - - public VariableMap plus(String name, Value value) { - VariableMap answer; - if (name.equals(freeVar)) { - answer = new VariableMap(mapping, null); - } else { - answer = new VariableMap(mapping, freeVar); - } - answer.mapping.put(name, value); - return answer; - } - - public VariableMap plusFreeVar(String name) { - if (freeVar != null) - throw new LambdaDCSException(Type.invalidFormula, - "TypeHint already has a free variable %s", freeVar); - if (mapping.containsKey(name)) - throw new LambdaDCSException(Type.invalidFormula, - "Variable %s is already bound to %s", name, mapping.get(name)); - VariableMap answer = new VariableMap(mapping, name); - return answer; - } - - public Value get(String name) { - Value value = mapping.get(name); - if (value == null && !name.equals(freeVar)) - throw new LambdaDCSException(Type.invalidFormula, "Unbound variable: " + name); - return value; - } - - public Pair getIfSingleVar() { - if (mapping.size() != 1) return null; - Map.Entry entry = mapping.entrySet().iterator().next(); - return new Pair<>(entry.getKey(), entry.getValue()); - } - - @Override - public String toString() { - StringBuilder builder = new StringBuilder(); - if (freeVar != null) { - builder.append("(").append(freeVar).append(")"); - } - for (Map.Entry entry : mapping.entrySet()) { - builder.append(", ").append(entry.getKey()).append(": ").append(entry.getValue()); - } - return "{" + builder.append("}").toString(); - } - } - - public VariableMap variableMap; - - public Value get(String name) { - return variableMap.get(name); - } - - public Pair getIfSingleVar() { - return variableMap.getIfSingleVar(); - } - - public String getFreeVar() { - return variableMap.freeVar; - } - - // Unrestricted type hints - - public static final UnarylikeTypeHint UNRESTRICTED_UNARY = new UnarylikeTypeHint(null, null, new VariableMap()); - public static final BinaryTypeHint UNRESTRICTED_BINARY = new BinaryTypeHint(null, null, new VariableMap()); - - public UnarylikeTypeHint unrestrictedUnary() { - return new UnarylikeTypeHint(null, null, variableMap); - } - - public BinaryTypeHint unrestrictedBinary() { - return new BinaryTypeHint(null, null, variableMap); - } - - // Restricted type hints - - public static UnarylikeTypeHint newRestrictedUnary(UnaryDenotation upperBound) { - return new UnarylikeTypeHint(upperBound, null, new VariableMap()); - } - - public static UnarylikeTypeHint newRestrictedUnary(UnaryDenotation upperBound, UnaryDenotation domainUpperBound) { - return new UnarylikeTypeHint(upperBound, domainUpperBound, new VariableMap()); - } - - public static BinaryTypeHint newRestrictedBinary(UnaryDenotation first, UnaryDenotation second) { - return new BinaryTypeHint(first, second, new VariableMap()); - } - - public UnarylikeTypeHint restrictedUnary(UnaryDenotation upperBound) { - return new UnarylikeTypeHint(upperBound, null, variableMap); - } - - public UnarylikeTypeHint restrictedUnary(UnaryDenotation upperBound, UnaryDenotation domainUpperBound) { - return new UnarylikeTypeHint(upperBound, domainUpperBound, variableMap); - } - - public BinaryTypeHint restrictedBinary(UnaryDenotation first, UnaryDenotation second) { - return new BinaryTypeHint(first, second, variableMap); - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/UnaryDenotation.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/UnaryDenotation.java deleted file mode 100644 index b56b96ada8..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/UnaryDenotation.java +++ /dev/null @@ -1,84 +0,0 @@ -package edu.stanford.nlp.sempre.tables.lambdadcs; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; - -/** - * Unary denotation: a list of values. - * - * @author ppasupat - */ -public abstract class UnaryDenotation implements Unarylike, Collection { - - @Override - public String toString() { - return toLispTree().toString(); - } - - @Override - public String getDomainVar() { - return null; - } - - public static final UnaryDenotation EMPTY = new ExplicitUnaryDenotation(); - public static final UnaryDenotation NULL = new ExplicitUnaryDenotation((Value) null); - public static final UnaryDenotation ZERO = new ExplicitUnaryDenotation(new NumberValue(0)); - public static final UnaryDenotation ONE = new ExplicitUnaryDenotation(new NumberValue(1)); - - @Override - public UnaryDenotation get(Value key) { - // Any assignment yields the same answer. - return this; - } - - @Override - public UnaryDenotation inverseGet(Value value) { - return NULL; - } - - @Override - public UnaryDenotation domain() { - return NULL; - } - - @Override - public UnaryDenotation range() { - return this; - } - - public abstract UnaryDenotation merge(UnaryDenotation that, MergeFormula.Mode mode); - - @Override - public abstract UnaryDenotation aggregate(AggregateFormula.Mode mode); - - @Override - public UnaryDenotation filter(UnaryDenotation upperBound, UnaryDenotation domainUpperBound) { - // domainUpperBound information is not used - return filter(upperBound); - } - - public abstract UnaryDenotation filter(UnaryDenotation upperBound); - - // ============================================================ - // Collection interface - // ============================================================ - - // Don't support mutation - @Override public boolean add(Value e) { throw new UnsupportedOperationException("unsupported"); } - @Override public boolean addAll(Collection c) { throw new UnsupportedOperationException("unsupported"); } - @Override public void clear() { throw new UnsupportedOperationException("unsupported"); } - @Override public boolean remove(Object o) { throw new UnsupportedOperationException("unsupported"); } - @Override public boolean removeAll(Collection c) { throw new UnsupportedOperationException("unsupported"); } - @Override public boolean retainAll(Collection c) { throw new UnsupportedOperationException("unsupported"); } - - @Override public abstract boolean contains(Object o); - @Override public abstract boolean containsAll(Collection c); - @Override public abstract Iterator iterator(); - @Override public abstract Object[] toArray(); - @Override public abstract T[] toArray(T[] a); - @Override public abstract int size(); - @Override public boolean isEmpty() { return size() == 0; } - - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/Unarylike.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/Unarylike.java deleted file mode 100644 index 9da19d1f6c..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/Unarylike.java +++ /dev/null @@ -1,47 +0,0 @@ -package edu.stanford.nlp.sempre.tables.lambdadcs; - -import edu.stanford.nlp.sempre.*; -import fig.basic.LispTree; - -/** - * Represents a UnaryDenotation or a MappingDenotation. - * - * The following operations must be handled: - * - Map - * - count(UL) - * - Aggregate: sum(UL), ... - * - Combine - * - Merge: and(UL1, UL2), ... - * - Arithmetic: sub(UL1, UL2), ... - * - * Compose operations (join, superlative) are handled in BL. - * - * @author ppasupat - */ -public interface Unarylike { - - public LispTree toLispTree(); - public Value toValue(); - - /** Return the name of the free variable. */ - public String getDomainVar(); - - /** List of possible variable assignments */ - public UnaryDenotation domain(); - - /** List of possible values. */ - public UnaryDenotation range(); - - /** |key| => ??? */ - public UnaryDenotation get(Value key); - - /** ??? => |value| */ - public UnaryDenotation inverseGet(Value value); - - /** count and other aggregate operations */ - public Unarylike aggregate(AggregateFormula.Mode mode); - - /** Return a new Unarylike where only the values found in |upperBound| - * and domain values found in |domainUpperBound| are kept */ - public Unarylike filter(UnaryDenotation upperBound, UnaryDenotation domainUpperBound); -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/UnarylikeTypeHint.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/UnarylikeTypeHint.java deleted file mode 100644 index ce4053142b..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/lambdadcs/UnarylikeTypeHint.java +++ /dev/null @@ -1,71 +0,0 @@ -package edu.stanford.nlp.sempre.tables.lambdadcs; - -import edu.stanford.nlp.sempre.*; - -/** - * Impose that the result is a unary and the set of values should be a subset of |upperBound|. - * - * @author ppasupat - */ -public class UnarylikeTypeHint extends TypeHint { - - public final UnaryDenotation upperBound; - public final UnaryDenotation domainUpperBound; - - // Should only be called within this package - protected UnarylikeTypeHint(UnaryDenotation u, UnaryDenotation domain, VariableMap map) { - upperBound = (u == null) ? InfiniteUnaryDenotation.STAR_UNARY : u; - domainUpperBound = (domain == null) ? InfiniteUnaryDenotation.STAR_UNARY : domain; - variableMap = map; - } - - @Override - public String toString() { - return "UnaryTypeHint [" + domainUpperBound + " => " + upperBound + "] " + variableMap; - } - - /** - * Keep only the values that appear in this upperBound. - * If a value occurs multiple times, keep the multiplicity. - */ - public Unarylike applyBound(Unarylike denotation) { - return denotation.filter(upperBound, domainUpperBound); - } - - // ============================================================ - // Derive a new type hint - // ============================================================ - - public UnarylikeTypeHint withVar(String name, Value value) { - return new UnarylikeTypeHint(upperBound, domainUpperBound, variableMap.plus(name, value)); - } - - public UnarylikeTypeHint withFreeVar(String name) { - return new UnarylikeTypeHint(upperBound, domainUpperBound, variableMap.plusFreeVar(name)); - } - - public BinaryTypeHint asFirstOfBinary() { - return new BinaryTypeHint(upperBound, null, variableMap); - } - - public BinaryTypeHint asFirstOfBinaryWithSecond(UnaryDenotation second) { - return new BinaryTypeHint(upperBound, second, variableMap); - } - - public BinaryTypeHint asSecondOfBinary() { - return new BinaryTypeHint(null, upperBound, variableMap); - } - - public BinaryTypeHint asSecondOfBinaryWithFirst(UnaryDenotation first) { - return new BinaryTypeHint(first, upperBound, variableMap); - } - - public BinaryTypeHint asFirstAndSecondOfBinary() { - return new BinaryTypeHint(upperBound, upperBound, variableMap); - } - - public UnarylikeTypeHint restrict(Unarylike child1d) { - return restrictedUnary(DenotationUtils.merge(upperBound, child1d, MergeFormula.Mode.and).range()); - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/match/ClosedClassFn.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/match/ClosedClassFn.java deleted file mode 100644 index bd8c3f4a4f..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/match/ClosedClassFn.java +++ /dev/null @@ -1,141 +0,0 @@ -package edu.stanford.nlp.sempre.tables.match; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.*; -import fig.basic.LispTree; -import fig.basic.LogInfo; -import fig.basic.Option; - -/** - * Generate the closed class entities from the table, including: - * - [generic] Generic common entities (e.g., null = empty cell) - * - [column] If the number of unique entities in a column is <= the limit, - * and there is at least one repeated entity, generate all entities in the column. - * - * @author ppasupat - */ -public class ClosedClassFn extends SemanticFn { - public static class Options { - @Option(gloss = "verbosity") public int verbose = 0; - @Option(gloss = "maximum number of unique entities in a column to be considered a closed class") - public int maxNumClosedClassEntities = 3; - } - public static Options opts = new Options(); - - public enum ClosedClassFnMode { - GENERIC, COLUMN, - } - protected ClosedClassFnMode mode; - - public void init(LispTree tree) { - super.init(tree); - String value = tree.child(1).value; - if ("generic".equals(value)) this.mode = ClosedClassFnMode.GENERIC; - else if ("column".equals(value)) this.mode = ClosedClassFnMode.COLUMN; - else throw new RuntimeException("Invalid argument: " + value); - } - - @Override - public DerivationStream call(Example ex, Callable c) { - return new LazyClosedClassFnDerivs(ex, c, mode); - } - - // ============================================================ - // Derivation - // ============================================================ - - public static class LazyClosedClassFnDerivs extends MultipleDerivationStream { - final Example ex; - final TableKnowledgeGraph graph; - final Callable c; - final ClosedClassFnMode mode; - - int index = 0; - List formulas; - - public LazyClosedClassFnDerivs(Example ex, Callable c, ClosedClassFnMode mode) { - this.ex = ex; - this.graph = (TableKnowledgeGraph) ex.context.graph; - this.c = c; - this.mode = mode; - } - - @Override - public Derivation createDerivation() { - // Compute the formulas if not computed yet - if (formulas == null) { - switch (mode) { - case GENERIC: - formulas = new ArrayList<>(createGenericFormulas()); - break; - case COLUMN: - formulas = new ArrayList<>(createColumnFormulas()); - break; - default: - throw new RuntimeException("Invalid mode: " + mode); - } - } - - // Use the next formula to create a derivation - if (index >= formulas.size()) return null; - Formula formula = formulas.get(index++); - SemType type = TypeInference.inferType(formula); - - return new Derivation.Builder().withCallable(c) - .formula(formula).type(type).createDerivation(); - } - - protected Collection createGenericFormulas() { - List formulas = new ArrayList<>(); - // Find out if the table has a null cell - for (TableColumn column : graph.columns) { - for (TableCell cell : column.children) { - if (cell.properties.id.endsWith(".null")) { - formulas.add(new ValueFormula<>(cell.properties.nameValue)); - break; - - } - } - } - if (ClosedClassFn.opts.verbose >= 2) { - LogInfo.begin_track("ClosedClassFn(generic):"); - for (Formula formula : formulas) - LogInfo.logs("%s", formula); - LogInfo.end_track(); - } - return formulas; - } - - protected Collection createColumnFormulas() { - Set formulas = new HashSet<>(); - // Process the columns separately - for (TableColumn column : graph.columns) { - boolean hasRepeats = false; - Set values = new HashSet<>(); - for (TableCell cell : column.children) { - if (cell.properties.id.endsWith(".null")) continue; - if (values.contains(cell.properties.nameValue)) - hasRepeats = true; - else - values.add(cell.properties.nameValue); - } - if (values.size() <= ClosedClassFn.opts.maxNumClosedClassEntities && hasRepeats) { - for (Value value : values) { - formulas.add(new ValueFormula<>(value)); - } - } - } - if (ClosedClassFn.opts.verbose >= 2) { - LogInfo.begin_track("ClosedClassFn(column):"); - for (Formula formula : formulas) - LogInfo.logs("%s", formula); - LogInfo.end_track(); - } - return formulas; - } - - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/match/EditDistanceFuzzyMatcher.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/match/EditDistanceFuzzyMatcher.java deleted file mode 100644 index 9288b6ce0e..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/match/EditDistanceFuzzyMatcher.java +++ /dev/null @@ -1,412 +0,0 @@ -package edu.stanford.nlp.sempre.tables.match; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.FuzzyMatchFn.FuzzyMatchFnMode; -import edu.stanford.nlp.sempre.tables.*; -import fig.basic.*; - -/** - * Perform fuzzy matching on the table knowledge graph. - * - * @author ppasupat - */ -public class EditDistanceFuzzyMatcher extends FuzzyMatcher { - public static class Options { - @Option(gloss = "Verbosity") - public int verbose = 0; - @Option(gloss = "Also return the union of matched formulas") - public boolean alsoReturnUnion = false; - @Option(gloss = "Also return fb:row.consecutive...") - public boolean alsoReturnConsecutive = false; - @Option(gloss = "Also match parts") - public boolean alsoMatchPart = false; - @Option(gloss = "Also add normalization to matched binaries") - public boolean alsoAddNormalization = false; - @Option(gloss = "Maximum edit distance ratio") - public double fuzzyMatchMaxEditDistanceRatio = 0; - @Option(gloss = "Allow the query phrase to match part of the table cell content") - public boolean fuzzyMatchSubstring = false; - @Option(gloss = "Minimum query phrase length (number of characters) to invoke substring matching") - public int fuzzyMatchSubstringMinQueryLength = 3; - @Option(gloss = "If the number of cells matching the query exceeds this, don't return individual matches (but still return the union)") - public int fuzzyMatchMaxTotalMatches = 5; - @Option(gloss = "If the number of cells having the query as a substring exceeds this, don't perform substring matches") - public int fuzzyMatchMaxSubstringMatches = 5; - @Option(gloss = "Ignore cells with more than this number of characters when doing substring matching") - public int fuzzyMatchSubstringMaxCellLength = 70; - } - public static Options opts = new Options(); - - public EditDistanceFuzzyMatcher(TableKnowledgeGraph graph) { - super(graph); - precompute(); - } - - protected final Map> - phraseToEntityFormulas = new HashMap<>(), - phraseToUnaryFormulas = new HashMap<>(), - phraseToBinaryFormulas = new HashMap<>(); - protected final Map> - substringToEntityFormulas = new HashMap<>(), - substringToUnaryFormulas = new HashMap<>(), - substringToBinaryFormulas = new HashMap<>(); - protected final Set - allEntityFormulas = new HashSet<>(), - allUnaryFormulas = new HashSet<>(), - allBinaryFormulas = new HashSet<>(); - - protected void precompute() { - // unary and binary - for (TableColumn column : graph.columns) { - Formula unary = getUnaryFormula(column); - Formula binary = getBinaryFormula(column); - Formula consecutive = (opts.alsoReturnConsecutive && column.hasConsecutive()) ? getConsecutiveBinaryFormula(column) : null; - List normalizedBinaries = opts.alsoAddNormalization ? getNormalizedBinaryFormulas(column) : null; - allUnaryFormulas.add(unary); - allBinaryFormulas.add(binary); - if (consecutive != null) - allBinaryFormulas.add(consecutive); - if (normalizedBinaries != null) - allBinaryFormulas.addAll(normalizedBinaries); - for (String s : getAllCollapsedForms(column.originalString)) { - MapUtils.addToSet(phraseToUnaryFormulas, s, unary); - MapUtils.addToSet(phraseToBinaryFormulas, s, binary); - if (consecutive != null) - MapUtils.addToSet(phraseToBinaryFormulas, s, consecutive); - if (normalizedBinaries != null) - for (Formula f : normalizedBinaries) - MapUtils.addToSet(phraseToBinaryFormulas, s, f); - } - if (opts.fuzzyMatchSubstring) { - for (String s : getAllSubstringCollapsedForms(column.originalString)) { - MapUtils.addToSet(substringToUnaryFormulas, s, unary); - MapUtils.addToSet(substringToBinaryFormulas, s, binary); - if (consecutive != null) - MapUtils.addToSet(substringToBinaryFormulas, s, consecutive); - if (normalizedBinaries != null) - for (Formula f : normalizedBinaries) - MapUtils.addToSet(substringToBinaryFormulas, s, f); - } - } - } - // entity - for (TableCellProperties properties : graph.cellProperties) { - Formula entity = getEntityFormula(properties); - allEntityFormulas.add(entity); - for (String s : getAllCollapsedForms(properties.originalString)) - MapUtils.addToSet(phraseToEntityFormulas, s, entity); - if (opts.fuzzyMatchSubstring) { - for (String s : getAllSubstringCollapsedForms(properties.originalString)) { - MapUtils.addToSet(substringToEntityFormulas, s, entity); - } - } - } - // part (treated as extra entities) - if (opts.alsoMatchPart) { - for (NameValue value : graph.cellParts) { - Formula partEntity = getEntityFormula(value); - allEntityFormulas.add(partEntity); - for (String s : getAllCollapsedForms(value.description)) - MapUtils.addToSet(phraseToEntityFormulas, s, partEntity); - if (opts.fuzzyMatchSubstring) { - for (String s : getAllSubstringCollapsedForms(value.description)) { - MapUtils.addToSet(substringToEntityFormulas, s, partEntity); - } - } - } - } - // debug print - if (opts.verbose >= 5) { - debugPrint("phrase Entity", phraseToEntityFormulas); - debugPrint("phrase Unary", phraseToUnaryFormulas); - debugPrint("phrase Binary", phraseToBinaryFormulas); - debugPrint("substring Entity", substringToEntityFormulas); - debugPrint("substring Unary", substringToUnaryFormulas); - debugPrint("substring Binary", substringToBinaryFormulas); - } - } - - void debugPrint(String message, Map> target) { - LogInfo.begin_track("%s", message); - for (Map.Entry> entry : target.entrySet()) { - LogInfo.logs("[%s] %s", entry.getKey(), entry.getValue()); - } - LogInfo.end_track(); - } - - // ============================================================ - // Collapse Strings + Compute Substrings - // ============================================================ - - static Collection getAllSubstringCollapsedForms(String original) { - // Compute all substrings (based on spaces) - Set collapsedForms = new HashSet<>(); - if (original.length() > opts.fuzzyMatchSubstringMaxCellLength) return collapsedForms; - String[] tokens = original.trim().split("[^A-Za-z0-9]+"); - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < tokens.length; i++) { - sb.setLength(0); - for (int j = i; j < tokens.length; j++) { - String phrase = sb.append(" " + tokens[j]).toString(); - collapsedForms.addAll(getAllCollapsedForms(phrase)); - } - } - return collapsedForms; - } - - static Collection getAllCollapsedForms(String original) { - Set collapsedForms = new HashSet<>(); - collapsedForms.add(StringNormalizationUtils.collapseNormalize(original)); - String normalized = StringNormalizationUtils.aggressiveNormalize(original); - collapsedForms.add(StringNormalizationUtils.collapseNormalize(normalized)); - collapsedForms.remove(""); - return collapsedForms; - } - - static String getCanonicalCollapsedForm(String original) { - return StringNormalizationUtils.collapseNormalize(original); - } - - // ============================================================ - // Edit Distance - // ============================================================ - - static int editDistance(String a, String b) { - int m = a.length() + 1, n = b.length() + 1; - int[] dists = new int[m], newDists = new int[m]; - for (int i = 0; i < m; i++) dists[i] = i; - for (int j = 1; j < n; j++) { - newDists[0] = j; - for (int i = 1; i < m; i++) { - newDists[i] = Math.min(Math.min( - dists[i] + 1, // Insert - newDists[i-1] + 1), // Delete - dists[i-1] + (a.charAt(i-1) == b.charAt(j-1) ? 0 : 1)); // Replace - } - int[] swap = dists; dists = newDists; newDists = swap; - } - return dists[m-1]; - } - - static double editDistanceRatio(String a, String b) { - if (a.isEmpty() && b.isEmpty()) return 0.0; - return editDistance(a, b) * 2.0 / (a.length() + b.length()); - } - - // ============================================================ - // Caching fuzzy matches of a whole sentence - // ============================================================ - - Map, FuzzyMatchCache> cacheMap = new HashMap<>(); - - protected FuzzyMatchCache cacheSentence(List sentence, FuzzyMatchFnMode mode) { - String joined = String.join(" ", sentence); - FuzzyMatchCache cache = cacheMap.get(new Pair<>(joined, mode)); - if (cache != null) return cache; - // Compute a new FuzzyMatchCache - cache = new FuzzyMatchCache(); - // aggregateCache[i,j] = all formulas matched by sentence[i'<=i:j'>=j], (i',j') != (i,j) - FuzzyMatchCache aggregateCache = new FuzzyMatchCache(); - for (int s = sentence.size(); s >= 1; s--) { - for (int i = 0; i + s <= sentence.size(); i++) { - int j = i + s; - String term = String.join(" ", sentence.subList(i, j)); - Collection formulas = new HashSet<>(); - if (!(FuzzyMatcher.opts.ignorePunctuationBoundedQueries && !checkPunctuationBoundaries(term))) { - String normalized = getCanonicalCollapsedForm(term); - // Exact matches - Collection exactMatched = getFuzzyExactMatchedFormulas(normalized, mode); - if (exactMatched != null) formulas.addAll(exactMatched); - // Substring matches - if (opts.fuzzyMatchSubstring && normalized.length() >= opts.fuzzyMatchSubstringMinQueryLength) { - Collection substringMatched = getFuzzySubstringMatchedFormulas(normalized, mode); - if (substringMatched != null && substringMatched.size() <= opts.fuzzyMatchMaxSubstringMatches) - formulas.addAll(substringMatched); - } - } - cache.put(i, j, formulas); - if (s > 1) { - aggregateCache.addAll(i + 1, j, formulas); - aggregateCache.addAll(i + 1, j, aggregateCache.get(i, j)); - aggregateCache.addAll(i, j - 1, formulas); - aggregateCache.addAll(i, j - 1, aggregateCache.get(i, j)); - } - } - } - if (opts.verbose >= 3) { - LogInfo.begin_track("Caching[%s] %s", mode, sentence); - for (int s = 1; s <= sentence.size(); s++) { - for (int i = 0; i + s <= sentence.size(); i++) { - int j = i + s; - Collection formulas = cache.get(i, j); - if (formulas == null || formulas.isEmpty()) continue; - LogInfo.logs("{%s:%s} %s", i, j, sentence.subList(i, j)); - LogInfo.logs("%s", formulas); - } - } - LogInfo.end_track(); - } - // Filter: If sentence[i:j] and sentence[i'<=i:j'>=j], (i',j') != (i,j), - // both matches formula f, remove f from sentence[i:j] - // This is done to reduce over-generation - for (int s = sentence.size(); s >= 1; s--) { - for (int i = 0; i + s <= sentence.size(); i++) { - int j = i + s; - cache.removeAll(i, j, aggregateCache.get(i, j)); - Collection allMatched = cache.get(i, j); - if (allMatched == null) continue; - Collection unions = opts.alsoReturnUnion ? getUnions(allMatched) : null; - if (allMatched.size() > opts.fuzzyMatchMaxTotalMatches) - cache.clear(i, j); - if (unions != null) - cache.addAll(i, j, unions); - } - } - if (opts.verbose >= 3) { - LogInfo.begin_track("Caching[%s] %s", mode, sentence); - for (int s = 1; s <= sentence.size(); s++) { - for (int i = 0; i + s <= sentence.size(); i++) { - int j = i + s; - Collection formulas = cache.get(i, j); - if (formulas == null || formulas.isEmpty()) continue; - LogInfo.logs("[%s:%s] %s", i, j, sentence.subList(i, j)); - LogInfo.logs("%s", formulas); - } - } - LogInfo.end_track(); - } - cacheMap.put(new Pair<>(joined, mode), cache); - return cache; - } - - // ============================================================ - // Main fuzzy matching interface - // ============================================================ - - /** - * Helper: Get the union of all fb:cell... and the union of all fb:part... - */ - protected Collection getUnions(Collection formulas) { - Collection unions = new ArrayList<>(); - Formula formula; - // fb:cell... - if ((formula = getUnion(formulas, TableTypeSystem.CELL_NAME_PREFIX)) != null) - unions.add(formula); - // fb:part... - if ((formula = getUnion(formulas, TableTypeSystem.PART_NAME_PREFIX)) != null) - unions.add(formula); - return unions; - } - - protected Formula getUnion(Collection formulas, String prefix) { - if (formulas == null || formulas.size() <= 1) return null; - List sortedFormulaList = new ArrayList<>(); - for (Formula formula : formulas) - if (prefix == null || formula.toString().startsWith(prefix)) - sortedFormulaList.add(formula); - if (sortedFormulaList.size() <= 1) return null; - Collections.sort(sortedFormulaList, - (Formula v1, Formula v2) -> v1.toString().compareTo(v2.toString())); - Formula union = sortedFormulaList.get(0); - for (int i = 1; i < sortedFormulaList.size(); i++) { - if (union.toString().compareTo(sortedFormulaList.get(i).toString()) <= 0) - union = new MergeFormula(MergeFormula.Mode.or, union, sortedFormulaList.get(i)); - else - union = new MergeFormula(MergeFormula.Mode.or, sortedFormulaList.get(i), union); - } - return union; - } - - @Override - protected Collection getFuzzyMatchedFormulasInternal(String term, FuzzyMatchFnMode mode) { - String normalized = getCanonicalCollapsedForm(term); - Collection allMatched = new HashSet<>(); - // Exact matches - Collection exactMatched = getFuzzyExactMatchedFormulas(normalized, mode); - if (exactMatched != null) allMatched.addAll(exactMatched); - // Substring matches - if (opts.fuzzyMatchSubstring && normalized.length() >= opts.fuzzyMatchSubstringMinQueryLength) { - Collection substringMatched = getFuzzySubstringMatchedFormulas(normalized, mode); - if (substringMatched != null && substringMatched.size() <= opts.fuzzyMatchMaxSubstringMatches) { - allMatched.addAll(substringMatched); - } - } - Formula union = opts.alsoReturnUnion ? getUnion(allMatched, null) : null; - if (allMatched.size() > opts.fuzzyMatchMaxTotalMatches) allMatched.clear(); - if (union != null) allMatched.add(union); - return allMatched; - } - - protected Collection getFuzzyExactMatchedFormulas(String normalized, FuzzyMatchFnMode mode) { - Map> target; - switch (mode) { - case ENTITY: target = phraseToEntityFormulas; break; - case UNARY: target = phraseToUnaryFormulas; break; - case BINARY: target = phraseToBinaryFormulas; break; - default: throw new RuntimeException("Unknown FuzzyMatchMode " + mode); - } - Set filtered = filterFuzzyMatched(normalized, target); - // Debug print - if (opts.verbose >= 3 && filtered != null && !filtered.isEmpty()) { - LogInfo.begin_track("(EXACT) Normalized: %s (%d)", normalized, filtered.size()); - for (Formula formula : filtered) - LogInfo.logs("%s", formula); - LogInfo.end_track(); - } - return filtered; - } - - protected Collection getFuzzySubstringMatchedFormulas(String normalized, FuzzyMatchFnMode mode) { - Map> target; - switch (mode) { - case ENTITY: target = substringToEntityFormulas; break; - case UNARY: target = substringToUnaryFormulas; break; - case BINARY: target = substringToBinaryFormulas; break; - default: throw new RuntimeException("Unknown FuzzyMatchMode " + mode); - } - Set filtered = filterFuzzyMatched(normalized, target); - // Debug print - if (opts.verbose >= 3 && filtered != null && !filtered.isEmpty()) { - LogInfo.begin_track("(SUBSTRING) Normalized: %s (%d)", normalized, filtered.size()); - for (Formula formula : filtered) - LogInfo.logs("%s", formula); - LogInfo.end_track(); - } - return filtered; - } - - protected Set filterFuzzyMatched(String normalized, Map> phraseToFormulas) { - Set filtered; - if (opts.fuzzyMatchMaxEditDistanceRatio == 0) { - filtered = phraseToFormulas.get(normalized); - } else { - filtered = new HashSet<>(); - for (Map.Entry> entry : phraseToFormulas.entrySet()) { - if (editDistanceRatio(entry.getKey(), normalized) < opts.fuzzyMatchMaxEditDistanceRatio) { - filtered.addAll(entry.getValue()); - } - } - } - return filtered; - } - - @Override - protected Collection getAllFormulasInternal(FuzzyMatchFnMode mode) { - switch (mode) { - case ENTITY: return allEntityFormulas; - case UNARY: return allUnaryFormulas; - case BINARY: return allBinaryFormulas; - default: throw new RuntimeException("Unknown FuzzyMatchMode " + mode); - } - } - - // ============================================================ - // Test - // ============================================================ - - public static void main(String[] args) { - System.out.println(editDistance("unionist", "unionists")); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/match/FuzzyMatchCache.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/match/FuzzyMatchCache.java deleted file mode 100644 index 3753fb7cdf..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/match/FuzzyMatchCache.java +++ /dev/null @@ -1,44 +0,0 @@ -package edu.stanford.nlp.sempre.tables.match; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import fig.basic.Pair; - -public class FuzzyMatchCache { - - Map, Collection> entries = new HashMap<>(); - - public void put(int startIndex, int endIndex, Collection formulas) { - entries.put(new Pair<>(startIndex, endIndex), formulas); - } - - public void add(int startIndex, int endIndex, Formula formula) { - Collection current = entries.get(new Pair<>(startIndex, endIndex)); - if (current == null) entries.put(new Pair<>(startIndex, endIndex), current = new HashSet<>()); - current.add(formula); - } - - public void addAll(int startIndex, int endIndex, Collection formulas) { - Collection current = entries.get(new Pair<>(startIndex, endIndex)); - if (current == null) entries.put(new Pair<>(startIndex, endIndex), current = new HashSet<>()); - current.addAll(formulas); - } - - public void clear(int startIndex, int endIndex) { - entries.remove(new Pair<>(startIndex, endIndex)); - } - - public void removeAll(int startIndex, int endIndex, Collection formulas) { - Collection current = entries.get(new Pair<>(startIndex, endIndex)); - if (current == null) return; - current.removeAll(formulas); - if (current.isEmpty()) entries.remove(new Pair<>(startIndex, endIndex)); - } - - public Collection get(int startIndex, int endIndex) { - Collection answer = entries.get(new Pair<>(startIndex, endIndex)); - return answer == null ? Collections.emptySet() : answer; - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/match/FuzzyMatcher.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/match/FuzzyMatcher.java deleted file mode 100644 index ee290588bd..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/match/FuzzyMatcher.java +++ /dev/null @@ -1,148 +0,0 @@ -package edu.stanford.nlp.sempre.tables.match; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.FuzzyMatchFn.FuzzyMatchFnMode; -import edu.stanford.nlp.sempre.tables.*; -import fig.basic.*; - -/** - * Perform fuzzy matching on the table knowledge graph. - * - * @author ppasupat - */ -public abstract class FuzzyMatcher { - public static class Options { - // This would prevent "canada ?" from fuzzy matching: we already fuzzy match "canada" - @Option(gloss = "Ignore query strings where a boundary word is a punctuation (prevent overgeneration)") - public boolean ignorePunctuationBoundedQueries = true; - @Option(gloss = "Default fuzzy matcher to use") - public String fuzzyMatcher = "tables.match.EditDistanceFuzzyMatcher"; - } - public static Options opts = new Options(); - - /** - * Get a fuzzy matcher of the default class. - */ - public static FuzzyMatcher getFuzzyMatcher(TableKnowledgeGraph graph) { - return getFuzzyMatcher(opts.fuzzyMatcher, graph); - } - - public static FuzzyMatcher getFuzzyMatcher(String className, TableKnowledgeGraph graph) { - try { - Class classObject = Class.forName(SempreUtils.resolveClassName(className)); - return (FuzzyMatcher) classObject.getConstructor(TableKnowledgeGraph.class).newInstance(graph); - } catch (Exception e) { - e.printStackTrace(); - e.getCause().printStackTrace(); - throw new RuntimeException(e); - } - } - - // ============================================================ - // Precomputation - // ============================================================ - - public final TableKnowledgeGraph graph; - - public FuzzyMatcher(TableKnowledgeGraph graph) { - this.graph = graph; - } - - // ============================================================ - // Fuzzy Matching Main Interface - // ============================================================ - - /** - * Check if the first or the last token is a punctuation (no alphanumeric character). - */ - public boolean checkPunctuationBoundaries(String term) { - String[] tokens = term.trim().split("\\s+"); - if (tokens.length == 0) return false; - if (StringNormalizationUtils.collapseNormalize(tokens[0]).isEmpty()) return false; - if (tokens.length == 1) return true; - if (StringNormalizationUtils.collapseNormalize(tokens[tokens.length - 1]).isEmpty()) return false; - return true; - } - - /** - * If needed, compute the fuzzy matched predicates for all substrings of sentence and cache the result. - * Then, return all formulas of the specified mode that match the phrase formed by sentence[startIndex:endIndex]. - */ - public Collection getFuzzyMatchedFormulas( - List sentence, int startIndex, int endIndex, FuzzyMatchFnMode mode) { - FuzzyMatchCache cache = cacheSentence(sentence, mode); - Collection formulas = cache.get(startIndex, endIndex); - return formulas == null ? Collections.emptySet() : formulas; - } - - abstract protected FuzzyMatchCache cacheSentence(List sentence, FuzzyMatchFnMode mode); - - /** - * Return all formulas of the specified mode that match the phrase. - * Do not use any cached results. - */ - public Collection getFuzzyMatchedFormulas(String term, FuzzyMatchFnMode mode) { - if (opts.ignorePunctuationBoundedQueries && !checkPunctuationBoundaries(term)) - return Collections.emptySet(); - Collection formulas = getFuzzyMatchedFormulasInternal(term, mode); - return formulas == null ? Collections.emptySet() : formulas; - } - - abstract protected Collection getFuzzyMatchedFormulasInternal(String term, FuzzyMatchFnMode mode); - - /** Return all formulas of the specified mode. */ - public Collection getAllFormulas(FuzzyMatchFnMode mode) { - Collection formulas = getAllFormulasInternal(mode); - return formulas == null ? Collections.emptySet() : formulas; - } - - abstract protected Collection getAllFormulasInternal(FuzzyMatchFnMode mode); - - // ============================================================ - // Helper Functions: Construct Formulas - // ============================================================ - /* - * ENTITIY --> fb:cell.___ - * UNARY --> (!fb.row.row.___ (fb:type.object.type fb:type.row)) - * BINARY --> fb:row.row.___ - */ - - static Formula getEntityFormula(NameValue nameValue) { - return new ValueFormula<>(nameValue); - } - - static Formula getEntityFormula(TableCell cell) { - return new ValueFormula<>(cell.properties.nameValue); - } - - static Formula getEntityFormula(TableCellProperties properties) { - return new ValueFormula<>(properties.nameValue); - } - - static Formula getUnaryFormula(TableColumn column) { - return new JoinFormula( - new ValueFormula<>(CanonicalNames.reverseProperty(column.relationNameValue)), - new JoinFormula(new ValueFormula<>(new NameValue(CanonicalNames.TYPE)), - new ValueFormula<>(new NameValue(TableTypeSystem.ROW_TYPE)))); - } - - static Formula getBinaryFormula(TableColumn column) { - return new ValueFormula<>(column.relationNameValue); - } - - static Formula getConsecutiveBinaryFormula(TableColumn column) { - return new ValueFormula<>(column.relationConsecutiveNameValue); - } - - static List getNormalizedBinaryFormulas(TableColumn column) { - List formulas = new ArrayList<>(); - for (Value normalization : column.getAllNormalization()) { - formulas.add(new LambdaFormula("x", new JoinFormula( - new ValueFormula<>(column.relationNameValue), new JoinFormula( - new ValueFormula<>(normalization), new VariableFormula("x"))))); - } - return formulas; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/match/OriginalMatcher.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/match/OriginalMatcher.java deleted file mode 100644 index 035de85a62..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/match/OriginalMatcher.java +++ /dev/null @@ -1,131 +0,0 @@ -package edu.stanford.nlp.sempre.tables.match; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.FuzzyMatchFn.FuzzyMatchFnMode; -import edu.stanford.nlp.sempre.tables.StringNormalizationUtils; -import edu.stanford.nlp.sempre.tables.TableCell; -import edu.stanford.nlp.sempre.tables.TableColumn; -import edu.stanford.nlp.sempre.tables.TableKnowledgeGraph; -import edu.stanford.nlp.sempre.tables.TableTypeSystem; -import fig.basic.MapUtils; -import fig.basic.Option; -import fig.basic.Pair; - -/** - * Original matcher used in ACL 2015. Only does exact matches. - * - * @author ppasupat - */ -public class OriginalMatcher extends FuzzyMatcher { - public static class Options { - @Option(gloss = "Do not fuzzy match if the query matches more than this number of formulas (prevent overgeneration)") - public int maxMatchedCandidates = Integer.MAX_VALUE; - } - public static Options opts = new Options(); - - public OriginalMatcher(TableKnowledgeGraph graph) { - super(graph); - precomputeForMatching(); - } - - private static Collection getAllCollapsedForms(String original) { - Set collapsedForms = new HashSet<>(); - collapsedForms.add(StringNormalizationUtils.collapseNormalize(original)); - String normalized = StringNormalizationUtils.aggressiveNormalize(original); - collapsedForms.add(StringNormalizationUtils.collapseNormalize(normalized)); - collapsedForms.remove(""); - return collapsedForms; - } - - private static String getCanonicalCollapsedForm(String original) { - return StringNormalizationUtils.collapseNormalize(original); - } - - // Map normalized strings to Values - // ENTITIY --> ValueFormula fb:cell.___ or other primitive format - // UNARY --> JoinFormula (type fb:column.___) - // BINARY --> ValueFormula fb:row.row.___ - Set allEntityFormulas, allUnaryFormulas, allBinaryFormulas; - Map> phraseToEntityFormulas, phraseToUnaryFormulas, phraseToBinaryFormulas; - - protected void precomputeForMatching() { - allEntityFormulas = new HashSet<>(); - allUnaryFormulas = new HashSet<>(); - allBinaryFormulas = new HashSet<>(); - phraseToEntityFormulas = new HashMap<>(); - phraseToUnaryFormulas = new HashMap<>(); - phraseToBinaryFormulas = new HashMap<>(); - for (TableColumn column : graph.columns) { - // unary and binary - Formula unary = new JoinFormula( - new ValueFormula<>(CanonicalNames.reverseProperty(column.relationNameValue)), - new JoinFormula(new ValueFormula<>(new NameValue(CanonicalNames.TYPE)), - new ValueFormula<>(new NameValue(TableTypeSystem.ROW_TYPE))) - ); - Formula binary = new ValueFormula<>(column.relationNameValue); - allUnaryFormulas.add(unary); - allBinaryFormulas.add(binary); - for (String s : getAllCollapsedForms(column.originalString)) { - MapUtils.addToSet(phraseToUnaryFormulas, s, unary); - MapUtils.addToSet(phraseToBinaryFormulas, s, binary); - } - // entity - for (TableCell cell : column.children) { - Formula entity = new ValueFormula<>(cell.properties.nameValue); - allEntityFormulas.add(entity); - for (String s : getAllCollapsedForms(cell.properties.originalString)) - MapUtils.addToSet(phraseToEntityFormulas, s, entity); - } - } - } - - // ============================================================ - // Internal methods - // ============================================================ - - Map, FuzzyMatchCache> cacheMap = new HashMap<>(); - - @Override - protected FuzzyMatchCache cacheSentence(List sentence, FuzzyMatchFnMode mode) { - String joined = String.join(" ", sentence); - FuzzyMatchCache cache = cacheMap.get(new Pair<>(joined, mode)); - if (cache != null) return cache; - // Compute a new FuzzyMatchCache - cache = new FuzzyMatchCache(); - for (int i = 0; i < sentence.size(); i++) { - for (int j = i + 1; j < sentence.size(); j++) { - String term = String.join(" ", sentence.subList(i, j)); - cache.addAll(i, j, getFuzzyMatchedFormulasInternal(term, mode)); - } - } - cacheMap.put(new Pair<>(joined, mode), cache); - return cache; - } - - @Override - protected Collection getFuzzyMatchedFormulasInternal(String term, - FuzzyMatchFnMode mode) { - String normalized = getCanonicalCollapsedForm(term); - Set answer; - switch (mode) { - case ENTITY: answer = phraseToEntityFormulas.get(normalized); break; - case UNARY: answer = phraseToUnaryFormulas.get(normalized); break; - case BINARY: answer = phraseToBinaryFormulas.get(normalized); break; - default: throw new RuntimeException("Unknown FuzzyMatchMode " + mode); - } - return (answer == null || answer.size() > opts.maxMatchedCandidates) ? Collections.emptySet() : answer; - } - - @Override - protected Collection getAllFormulasInternal(FuzzyMatchFnMode mode) { - switch (mode) { - case ENTITY: return allEntityFormulas; - case UNARY: return allUnaryFormulas; - case BINARY: return allBinaryFormulas; - default: throw new RuntimeException("Unknown FuzzyMatchMode " + mode); - } - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/DumpFilterer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/DumpFilterer.java deleted file mode 100644 index 0806c0c8d9..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/DumpFilterer.java +++ /dev/null @@ -1,90 +0,0 @@ -package edu.stanford.nlp.sempre.tables.serialize; - -import java.io.BufferedReader; -import java.io.File; -import java.io.PrintWriter; -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import fig.basic.*; -import fig.exec.Execution; - -public class DumpFilterer implements Runnable { - public static class Options { - @Option(gloss = "verbosity") public int verbose = 0; - @Option(gloss = "input dump directory") - public String filtererInputDumpDirectory; - } - public static Options opts = new Options(); - - public static void main(String[] args) { - Execution.run(args, "DumpFiltererMain", new DumpFilterer(), Master.getOptionsParser()); - } - - Builder builder; - - @Override - public void run() { - builder = new Builder(); - builder.build(); - String outDir = Execution.getFile("filtered"); - new File(outDir).mkdirs(); - for (Pair pathPair : Dataset.opts.inPaths) { - String group = pathPair.getFirst(); - String path = pathPair.getSecond(); - // Read LispTrees - LogInfo.begin_track("Reading %s", path); - int maxExamples = Dataset.getMaxExamplesForGroup(group); - Iterator trees = LispTree.proto.parseFromFile(path); - // Go through the examples - int n = 0; - while (n < maxExamples) { - // Format: (example (id ...) (utterance ...) (targetFormula ...) (targetValue ...)) - LispTree tree = trees.next(); - if (tree == null) break; - if (tree.children.size() < 2 || !"example".equals(tree.child(0).value)) { - if ("metadata".equals(tree.child(0).value)) continue; - throw new RuntimeException("Invalid example: " + tree); - } - Example ex = Example.fromLispTree(tree, path + ":" + n); - ex.preprocess(); - LogInfo.logs("Example %s (%d): %s => %s", ex.id, n, ex.getTokens(), ex.targetValue); - n++; - processExample(ex); - } - LogInfo.end_track(); - } - } - - private void processExample(Example ex) { - File inPath = new File(opts.filtererInputDumpDirectory, ex.id + ".gz"); - File outPath = new File(Execution.getFile("filtered"), ex.id + ".gz"); - try { - BufferedReader reader = IOUtils.openInHard(inPath); - PrintWriter writer = IOUtils.openOutHard(outPath); - int inLines = 0, outLines = 0; - String line; - while ((line = reader.readLine()) != null) { - inLines++; - LispTree tree = LispTree.proto.parseFromString(line); - if (!"formula".equals(tree.child(1).child(0).value)) - throw new RuntimeException("Invalid tree: " + tree); - Formula formula = Formulas.fromLispTree(tree.child(1).child(1)); - Value value = builder.executor.execute(formula, ex.context).value; - double compatibility = builder.valueEvaluator.getCompatibility(ex.targetValue, value); - if (compatibility == 1.0) { - writer.println(tree); - outLines++; - } else if (opts.verbose >= 2) { - LogInfo.logs("Filtered out %s <= %s", value, formula); - } - } - LogInfo.logs("Filtered %d => %d", inLines, outLines); - reader.close(); - writer.close(); - } catch (Exception e) { - LogInfo.warnings("Got an error: %s", e); - } - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/LazyLoadedExampleList.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/LazyLoadedExampleList.java deleted file mode 100644 index 398f3b10bd..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/LazyLoadedExampleList.java +++ /dev/null @@ -1,320 +0,0 @@ -package edu.stanford.nlp.sempre.tables.serialize; - -import java.io.File; -import java.util.*; -import java.util.regex.Matcher; - -import edu.stanford.nlp.sempre.*; -import fig.basic.*; - -/** - * Lazily read and construct examples from a dump file. - * - * The process is fast if the examples are read sequentially. - * - * @author ppasupat - */ -public class LazyLoadedExampleList implements List { - public static class Options { - @Option(gloss = "whether to ensure thread safety (makes things slower)") - public boolean threadSafe = false; - } - public static Options opts = new Options(); - - private final List paths; - private final List sizes; - private final List offsets; - private final List exampleIndexToPathIndex; - private final int size; - // Whether each file contains only a single example (faster) - private final boolean single; - - private LazyLoadedExampleListIterator defaultIterator; - - public LazyLoadedExampleList(String path, int maxSize) { - this(Collections.singletonList(path), maxSize); - } - - public LazyLoadedExampleList(List paths, int maxSize) { - this(paths, maxSize, false); - } - - public LazyLoadedExampleList(List paths, int maxSize, boolean single) { - this.paths = new ArrayList<>(paths); - this.single = single; - // Combined the number of examples from all files - this.sizes = new ArrayList<>(); - this.offsets = new ArrayList<>(); - this.exampleIndexToPathIndex = new ArrayList<>(); - int size = 0; - for (int pathIndex = 0; pathIndex < paths.size(); pathIndex++) { - String path = paths.get(pathIndex); - if (single) { - sizes.add(1); - exampleIndexToPathIndex.add(pathIndex); - offsets.add(size); - size++; - } else { - int thisSize = readSizeFromMetadata(LispTree.proto.parseFromFile(path).next()); - sizes.add(thisSize); - for (int i = 0; i < thisSize; i++) - exampleIndexToPathIndex.add(pathIndex); - offsets.add(size); - size += thisSize; - } - } - this.size = Math.min(size, maxSize); - LogInfo.logs("(LazyLoadedExampleList) Dataset size: %d", this.size); - defaultIterator = new LazyLoadedExampleListIterator(); - } - - public List getPaths() { return paths; } - - @Override public int size() { return size; } - @Override public boolean isEmpty() { return size == 0; } - - // ============================================================ - // Iterator - // ============================================================ - - public class LazyLoadedExampleListIterator implements Iterator { - Iterator trees = null; - private int currentPathIndex = -1, currentIndex = -1; - private Example currentExample = null; - - @Override - public boolean hasNext() { - return currentIndex + 1 < size; // size could be affected by MaxExampleForGroup - } - - @Override - public Example next() { - currentIndex++; - while (trees == null || !trees.hasNext()) { - trees = LispTree.proto.parseFromFile(paths.get(++currentPathIndex)); - trees.next(); // Skip metadata - } - return currentExample = readExample(trees.next()); - } - - public Example seek(int index) { - if (index < 0 || index >= size) - throw new IndexOutOfBoundsException("Array size: " + size + "; No index " + index); - int pathIndex = exampleIndexToPathIndex.get(index); - if (pathIndex != currentPathIndex || currentIndex > index) { - currentPathIndex = pathIndex; - trees = LispTree.proto.parseFromFile(paths.get(currentPathIndex)); - trees.next(); // Skip metadata - currentIndex = offsets.get(pathIndex) - 1; - } - while (currentIndex < index) { - currentIndex++; - LispTree tree = trees.next(); - if (currentIndex == index) - currentExample = readExample(tree); - } - return currentExample; - } - - public int getCurrentIndex() { return currentIndex; } - public Example getCurrentExample() { return currentExample; } - } - - @Override - public Iterator iterator() { - return new LazyLoadedExampleListIterator(); - } - - @Override - public Example get(int index) { - if (opts.threadSafe) - return new LazyLoadedExampleListIterator().seek(index); - return defaultIterator.seek(index); - } - - public List loadAll() { - List examples = new ArrayList<>(); - Iterator itr = iterator(); - while (itr.hasNext()) - examples.add(itr.next()); - return examples; - } - - public List getAllIds() { - List ids = new ArrayList<>(); - for (String path : paths) { - if (single) { - Matcher matcher = SerializedDataset.GZ_PATTERN.matcher(new File(path).getName()); - matcher.matches(); - ids.add(matcher.group(3)); - } else { - Iterator trees = LispTree.proto.parseFromFile(path); - while (trees.hasNext()) { - LispTree tree = trees.next(); - String exampleId = getExampleId(tree); - if (exampleId != null) ids.add(exampleId); - } - } - } - return ids; - } - - // ============================================================ - // Read Metadata - // ============================================================ - - private int readSizeFromMetadata(LispTree tree) { - if (!"metadata".equals(tree.child(0).value)) - throw new RuntimeException("Not metadata: " + tree); - for (int i = 1; i < tree.children.size(); i++) { - LispTree arg = tree.child(i); - if ("size".equals(arg.child(0).value)) - return Integer.parseInt(arg.child(1).value); - } - throw new RuntimeException("Size not specified: " + tree); - } - - // ============================================================ - // LispTree --> Example - // ============================================================ - - private static final Set finalFields = new HashSet<>(Arrays.asList( - "id", "utterance", "targetFormula", "targetValue", "targetValues", "context")); - - private String getExampleId(LispTree tree) { - if (!"example".equals(tree.child(0).value)) return null; - for (int i = 1; i < tree.children.size(); i++) { - LispTree arg = tree.child(i); - if ("id".equals(arg.child(0).value)) { - return arg.child(1).value; - } - } - // The ID is missing. Throw an error. - String treeS = tree.toString(); - treeS = treeS.substring(0, Math.min(140, treeS.length())); - throw new RuntimeException("Example does not have an ID: " + treeS); - } - - private Example readExample(LispTree tree) { - Example.Builder b = new Example.Builder(); - if (!"example".equals(tree.child(0).value)) - LogInfo.fails("Not an example: %s", tree); - - // final fields - for (int i = 1; i < tree.children.size(); i++) { - LispTree arg = tree.child(i); - String label = arg.child(0).value; - if ("id".equals(label)) { - b.setId(arg.child(1).value); - } else if ("utterance".equals(label)) { - b.setUtterance(arg.child(1).value); - } else if ("targetFormula".equals(label)) { - b.setTargetFormula(Formulas.fromLispTree(arg.child(1))); - } else if ("targetValue".equals(label) || "targetValues".equals(label)) { - if (arg.children.size() != 2) - throw new RuntimeException("Expect one target value"); - b.setTargetValue(Values.fromLispTree(arg.child(1))); - } else if ("context".equals(label)) { - b.setContext(new ContextValue(arg)); - } - } - b.setLanguageInfo(new LanguageInfo()); - - Example ex = b.createExample(); - - // other fields - for (int i = 1; i < tree.children.size(); i++) { - LispTree arg = tree.child(i); - String label = arg.child(0).value; - if ("tokens".equals(label)) { - int n = arg.child(1).children.size(); - for (int j = 0; j < n; j++) - ex.languageInfo.tokens.add(arg.child(1).child(j).value); - } else if ("lemmaTokens".equals(label)) { - int n = arg.child(1).children.size(); - for (int j = 0; j < n; j++) - ex.languageInfo.lemmaTokens.add(arg.child(1).child(j).value); - } else if ("posTags".equals(label)) { - int n = arg.child(1).children.size(); - for (int j = 0; j < n; j++) - ex.languageInfo.posTags.add(arg.child(1).child(j).value); - } else if ("nerTags".equals(label)) { - int n = arg.child(1).children.size(); - for (int j = 0; j < n; j++) - ex.languageInfo.nerTags.add(arg.child(1).child(j).value); - } else if ("nerValues".equals(label)) { - int n = arg.child(1).children.size(); - for (int j = 0; j < n; j++) { - String value = arg.child(1).child(j).value; - if ("null".equals(value)) value = null; - ex.languageInfo.nerValues.add(value); - } - } else if ("derivations".equals(label)) { - ex.predDerivations = new ArrayList<>(); - for (int j = 1; j < arg.children.size(); j++) - ex.predDerivations.add(readDerivation(arg.child(j))); - } else if (!finalFields.contains(label)) { - throw new RuntimeException("Invalid example argument: " + arg); - } - } - - return ex; - } - - public static final String SERIALIZED_ROOT = "$SERIALIZED_ROOT"; - - private Derivation readDerivation(LispTree tree) { - Derivation.Builder b = new Derivation.Builder() - .cat(SERIALIZED_ROOT).start(-1).end(-1).localFeatureVector(new FeatureVector()) - .rule(Rule.nullRule).children(new ArrayList()); - if (!"derivation".equals(tree.child(0).value)) - LogInfo.fails("Not a derivation: %s", tree); - - for (int i = 1; i < tree.children.size(); i++) { - LispTree arg = tree.child(i); - String label = arg.child(0).value; - if ("formula".equals(label)) { - b.formula(Formulas.fromLispTree(arg.child(1))); - } else if ("type".equals(label)) { - b.type(SemType.fromLispTree(arg.child(1))); - } else if ("value".equals(label)) { - b.value(Values.fromLispTree(arg.child(1))); - } else if (label.endsWith("values")) { - List values = new ArrayList<>(); - for (int j = 1; j < arg.children.size(); j++) { - values.add(Values.fromLispTree(arg.child(j))); - } - b.value(new ListValue(values)); - } else if ("canonicalUtterance".equals(label)) { - b.canonicalUtterance(arg.child(1).value); - } else { - throw new RuntimeException("Invalid derivation argument: " + arg); - } - } - return b.createDerivation(); - } - - // ============================================================ - // Unimplemented methods - // ============================================================ - - @Override public boolean contains(Object o) { throw new RuntimeException("Not implemented!"); } - @Override public Object[] toArray() { throw new RuntimeException("Not implemented!"); } - @Override public T[] toArray(T[] a) { throw new RuntimeException("Not implemented!"); } - @Override public boolean add(Example e) { throw new RuntimeException("Not implemented!"); } - @Override public boolean remove(Object o) { throw new RuntimeException("Not implemented!"); } - @Override public boolean containsAll(Collection c) { throw new RuntimeException("Not implemented!"); } - @Override public boolean addAll(Collection c) { throw new RuntimeException("Not implemented!"); } - @Override public boolean addAll(int index, Collection c) { throw new RuntimeException("Not implemented!"); } - @Override public boolean removeAll(Collection c) { throw new RuntimeException("Not implemented!"); } - @Override public boolean retainAll(Collection c) { throw new RuntimeException("Not implemented!"); } - @Override public void clear() { throw new RuntimeException("Not implemented!"); } - @Override public Example set(int index, Example element) { throw new RuntimeException("Not implemented!"); } - @Override public void add(int index, Example element) { throw new RuntimeException("Not implemented!"); } - @Override public Example remove(int index) { throw new RuntimeException("Not implemented!"); } - @Override public int indexOf(Object o) { throw new RuntimeException("Not implemented!"); } - @Override public int lastIndexOf(Object o) { throw new RuntimeException("Not implemented!"); } - @Override public ListIterator listIterator() { throw new RuntimeException("Not implemented!"); } - @Override public ListIterator listIterator(int index) { throw new RuntimeException("Not implemented!"); } - @Override public List subList(int fromIndex, int toIndex) { throw new RuntimeException("Not implemented!"); } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/SerializedDataset.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/SerializedDataset.java deleted file mode 100644 index 9f75a38a9a..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/SerializedDataset.java +++ /dev/null @@ -1,128 +0,0 @@ -package edu.stanford.nlp.sempre.tables.serialize; - -import java.io.File; -import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.TableKnowledgeGraph; -import fig.basic.*; - -/** - * A Dataset object created from serialized examples created by SerializedDumper. - * - * @author ppasupat - */ -public class SerializedDataset extends Dataset { - public static class Options { - @Option(gloss = "Directory with dumped-*.gz files; to load only specific files, use Dataset.inPaths") - public List dumpDirs = new ArrayList<>(); - } - public static Options opts = new Options(); - - private final Map> availableGroups = new LinkedHashMap<>(); - - @Override - public void read() { - LogInfo.begin_track_printAll("Dataset.read"); - if (Dataset.opts.trainFrac != 1) - LogInfo.warnings("Dataset.opts.trainFrac is ignored!"); - if (opts.dumpDirs != null && !opts.dumpDirs.isEmpty()) { - readDirs(opts.dumpDirs); - } else { - read(Dataset.opts.inPaths); - } - LogInfo.end_track(); - } - - public void read(String group, String inPath) { - MapUtils.addToList(availableGroups, group, inPath); - checkFiles(); - } - - public void read(List> inPaths) { - for (Pair pair : inPaths) - MapUtils.addToList(availableGroups, pair.getFirst(), pair.getSecond()); - checkFiles(); - } - - public void readDir(String dumpDir) { - readDirs(Collections.singleton(dumpDir)); - } - - public static final Pattern GZ_PATTERN = Pattern.compile("^dumped-([^-]+)(?:-(\\d+)(?:-(.*))?)?\\.gz$"); - - public void readDirs(Collection dumpDirs) { - // Get filenames - // File format is dumped-groupname[-offset][-examplename].gz - Set filenames = new HashSet<>(), groups = new HashSet<>(); - for (String dumpDir : dumpDirs) { - String[] filenamesInDumpDir = new File(dumpDir).list(); - for (String filename : filenamesInDumpDir) { - Matcher matcher = GZ_PATTERN.matcher(filename); - if (matcher.matches()) { - filenames.add(new File(dumpDir, filename).toString()); - groups.add(matcher.group(1)); - } - } - } - LogInfo.logs("Available groups: %s", groups); - for (String group : groups) { - List filenamesForGroup = new ArrayList<>(); - for (String filename : filenames) { - File file = new File(filename); - Matcher matcher = GZ_PATTERN.matcher(file.getName()); - if (matcher.matches() && group.equals(matcher.group(1))) - filenamesForGroup.add(file.toString()); - } - if (!filenamesForGroup.isEmpty()) { - Collections.sort(filenamesForGroup); - availableGroups.put(group, filenamesForGroup); - } - } - checkFiles(); - } - - private void checkFiles() { - // Check if all files exist - for (List filenamesForGroup : availableGroups.values()) { - for (String filename : filenamesForGroup) { - File file = new File(filename); - if (!file.isFile()) - throw new RuntimeException("Error reading dataset: " + file + " is not a file."); - } - } - } - - @Override - public Set groups() { return availableGroups.keySet(); } - - @Override - public LazyLoadedExampleList examples(String group) { - if (!availableGroups.containsKey(group)) return null; - return new LazyLoadedExampleList(availableGroups.get(group), - getMaxExamplesForGroup(group), group.startsWith("single_")); - } - - // ============================================================ - // Test - // ============================================================ - - public static void main(String[] args) { - TableKnowledgeGraph.opts.baseCSVDir = "lib/data/WikiTableQuestions/"; - SerializedDataset dataset = new SerializedDataset(); - dataset.readDir("out/sliced-dump-8-reps/representative-00-training-sliced-00000-00299/"); - LazyLoadedExampleList examples = dataset.examples("representative"); - for (int i : new int[]{ - 25, 20, 20, 20, 21, 31, 29, 45, 35, 36, 37, 31, 99, 99, 100, 100, 99, 1, 3, 10, 23, 499 - }) { - LogInfo.logs("=== %d ===", i); - Example ex = examples.get(i); - LogInfo.logs("%s %d", ex.id, ex.predDerivations.size()); - if (!ex.id.equals("nt-" + i)) - throw new RuntimeException(String.format("Wrong ID: %s != nt-%d", ex.id, i)); - } - } - -} \ No newline at end of file diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/SerializedDumper.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/SerializedDumper.java deleted file mode 100644 index 6fb8713a2c..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/SerializedDumper.java +++ /dev/null @@ -1,284 +0,0 @@ -package edu.stanford.nlp.sempre.tables.serialize; - -import java.io.*; -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.TableKnowledgeGraph; -import edu.stanford.nlp.sempre.tables.test.CustomExample; -import fig.basic.*; -import fig.exec.*; - -/** - * Dump examples and parsed derivations. - * - * This class can be run on its own, in which case the parser in Builder.parser will be used. - * Or it can be supplied the examples to dump in a streaming fashion. - * - * Syntax of the dumped files: - * - Filename: dumped-[prefix]-[numbering].gz - * - Content: - * - First line: (metadata (size [number_of_examples])) - * - Each example begins with a comment (########## Example [example_id] ##########), - * followed by an example LispTree with the following fields: - * id, utterance, targetFormula, targetValue, context, - * tokens, lemmaTokens, posTags, nerTags, nerValues, - * derivations (one derivation per line) - * - * @author ppasupat - */ -public class SerializedDumper implements Runnable { - public static class Options { - @Option(gloss = "Verbosity") public int verbosity = 0; - @Option(gloss = "Randomly shuffle dumped derivations") - public Random shuffleDerivsRandom = new Random(9); - @Option(gloss = "Skip if the table has more than this number of rows") - public int maxNumRowsToDump = 200; - @Option(gloss = "Number of examples per gzip file (0 = single file)") - public int numExamplesPerFile = 0; - @Option(gloss = "Custom dump file prefixes to use in standalone mode") - public String dumpedFilePrefix = ""; - } - public static Options opts = new Options(); - - String prefix; - String filename; - PrintWriter out; - int numExamples = -1, currentIndex = 0; - - public SerializedDumper(String prefix, int numExamples) { - reset(prefix, numExamples); - } - - public void reset(String prefix, int numExamples) { - closeFile(); - this.currentIndex = 0; - this.prefix = prefix; - this.numExamples = numExamples; - } - - public void openFile(String filenameSuffix) { - if (out != null) closeFile(); - filename = Execution.getFile("dumped-" + filenameSuffix + ".gz"); - LogInfo.logs("Opening %s", filename); - if (new File(filename).exists()) - LogInfo.warnings("File %s exists; will overwrite!", filename); - out = IOUtils.openOutHard(filename); - } - - public void closeFile() { - if (out != null) { - out.close(); - LogInfo.logs("Finished dumping to %s", filename); - out = null; - } - } - - public void dumpExample(Example ex) { - dumpExample(ex, ex.predDerivations); - } - - public void dumpExample(Example ex, List derivations) { - if (numExamples < 0) - throw new RuntimeException("numExamples must be specified via reset(group, numExamples)"); - if (currentIndex >= numExamples) - throw new RuntimeException("current example index exceeds numExamples"); - if (opts.numExamplesPerFile == 0) { - if (currentIndex == 0) { - openFile(String.format("%s-%06d", prefix, 0)); - writeMetadataLispTree(numExamples); - } - } else { - if (currentIndex % opts.numExamplesPerFile == 0) { - openFile(String.format("%s-%06d", prefix, currentIndex)); - writeMetadataLispTree(Math.min(opts.numExamplesPerFile, numExamples - currentIndex)); - } - } - out.printf("########## Example %s ##########\n", ex.id); - writeExampleLispTree(exampleToLispTree(ex, derivations)); - out.flush(); - currentIndex++; - if (currentIndex == numExamples || (opts.numExamplesPerFile > 0 && currentIndex % opts.numExamplesPerFile == 0)) - closeFile(); - } - - // ============================================================ - // Stand-alone mode - // ============================================================ - - private SerializedDumper() { } - - public static void main(String[] args) { - Execution.run(args, "SerializedDumperMain", new SerializedDumper(), Master.getOptionsParser()); - } - - @Override - public void run() { - Builder builder = new Builder(); - builder.build(); - Dataset dataset = new Dataset(); - dataset.read(); - if (dataset.groups().size() > 1 && !opts.dumpedFilePrefix.isEmpty()) { - LogInfo.warnings("Cannot use dumpedFilePrefix with more than one group; fall back to group names."); - opts.dumpedFilePrefix = ""; - } - for (String group : dataset.groups()) { - reset(opts.dumpedFilePrefix.isEmpty() ? group : opts.dumpedFilePrefix, dataset.examples(group).size()); - processExamples(dataset.examples(group), builder); - StopWatchSet.logStats(); - } - } - - private void processExamples(List examples, Builder builder) { - Evaluation evaluation = new Evaluation(); - if (examples.isEmpty()) return; - - final String logPrefix = "iter=0." + prefix; - Execution.putOutput("group", logPrefix); - LogInfo.begin_track_printAll("Processing %s: %s examples", logPrefix, examples.size()); - LogInfo.begin_track("Examples"); - - for (int e = 0; e < examples.size(); e++) { - if (!CustomExample.checkFilterExamples(e)) continue; - Example ex = examples.get(e); - LogInfo.begin_track_printAll("%s: example %s/%s: %s", logPrefix, e, examples.size(), ex.id); - ex.log(); - Execution.putOutput("example", e); - StopWatchSet.begin("Parser.parse"); - if (((TableKnowledgeGraph) ex.context.graph).numRows() > opts.maxNumRowsToDump) { - LogInfo.logs("SKIPPING Example %s (number of rows = %d > %d)", - ex.id, ((TableKnowledgeGraph) ex.context.graph).numRows(), opts.maxNumRowsToDump); - new DummyParserState(builder.parser, builder.params, ex, false); - } else { - builder.parser.parse(builder.params, ex, false); - } - StopWatchSet.end(); - dumpExample(ex); - LogInfo.logs("Current: %s", ex.evaluation.summary()); - evaluation.add(ex.evaluation); - LogInfo.logs("Cumulative(%s): %s", logPrefix, evaluation.summary()); - LogInfo.end_track(); - // Save memory - if (ex.predDerivations != null) { - ex.predDerivations.clear(); - System.gc(); - } - } - - LogInfo.end_track(); - LogInfo.logs("Stats for %s: %s", logPrefix, evaluation.summary()); - evaluation.logStats(logPrefix); - evaluation.putOutput(logPrefix); - LogInfo.end_track(); - } - - public static class DummyParserState extends ParserState { - - public DummyParserState(Parser parser, Params params, Example ex, boolean computeExpectedCounts) { - super(parser, params, ex, computeExpectedCounts); - ex.predDerivations = new ArrayList<>(); - ex.evaluation = new Evaluation(); - } - - @Override public void infer() { } // Unused. - - } - - // ============================================================ - // Conversion to LispTree - // ============================================================ - - private LispTree exampleToLispTree(Example ex, List preds) { - LispTree tree = LispTree.proto.newList(); - tree.addChild("example"); - - // Basic information - if (ex.id != null) - tree.addChild(LispTree.proto.newList("id", ex.id)); - if (ex.utterance != null) - tree.addChild(LispTree.proto.newList("utterance", ex.utterance)); - if (ex.targetFormula != null) - tree.addChild(LispTree.proto.newList("targetFormula", ex.targetFormula.toLispTree())); - if (ex.targetValue != null) - tree.addChild(LispTree.proto.newList("targetValue", ex.targetValue.toLispTree())); - if (ex.context != null) - tree.addChild(ex.context.toLispTree()); - - // Language info - if (ex.languageInfo != null) { - if (ex.languageInfo.tokens != null) - tree.addChild(LispTree.proto.newList("tokens", LispTree.proto.newList(ex.languageInfo.tokens))); - if (ex.languageInfo.lemmaTokens != null) - tree.addChild(LispTree.proto.newList("lemmaTokens", LispTree.proto.newList(ex.languageInfo.lemmaTokens))); - if (ex.languageInfo.posTags != null) - tree.addChild(LispTree.proto.newList("posTags", LispTree.proto.newList(ex.languageInfo.posTags))); - if (ex.languageInfo.nerTags != null) - tree.addChild(LispTree.proto.newList("nerTags", LispTree.proto.newList(ex.languageInfo.nerTags))); - if (ex.languageInfo.nerValues != null) - tree.addChild(LispTree.proto.newList("nerValues", LispTree.proto.newList(ex.languageInfo.nerValues))); - } - - // Derivations - List derivations = new ArrayList<>(); - for (int i = 0; i < preds.size(); i++) { - Derivation deriv = preds.get(i); - if (!isPruned(deriv)) { - derivations.add(deriv.toLispTree()); - } - } - Collections.shuffle(derivations, opts.shuffleDerivsRandom); - LispTree derivationsTree = LispTree.proto.newList(); - derivationsTree.addChild("derivations"); - for (LispTree derivation : derivations) - derivationsTree.addChild(derivation); - tree.addChild(derivationsTree); - return tree; - } - - // Decide whether we should skip the derivation - private boolean isPruned(Derivation derivation) { - if (!(derivation.value instanceof ListValue)) return true; - ListValue list = (ListValue) derivation.value; - // Check if there is at least one answer - if (list.values.isEmpty()) return true; - return false; - } - - // ============================================================ - // Writing LispTree to file - // ============================================================ - - private void writeMetadataLispTree(int size) { - LispTree tree = LispTree.proto.newList(); - tree.addChild("metadata"); - tree.addChild(LispTree.proto.newList("size", "" + size)); - tree.print(out); - out.println(); - out.flush(); - } - - private void writeExampleLispTree(LispTree tree) { - out.println("(example"); - for (LispTree subtree : tree.children.subList(1, tree.children.size())) { - if (!subtree.isLeaf() && "derivations".equals(subtree.children.get(0).value)) { - if (subtree.children.size() == 1) { - out.println(" (derivations)"); - } else { - out.println(" (derivations"); - for (LispTree derivation : subtree.children.subList(1, subtree.children.size())) { - out.write(" "); - derivation.print(Integer.MAX_VALUE, Integer.MAX_VALUE, out); - out.write("\n"); - } - out.println(" )"); - } - } else { - out.write(" "); - subtree.print(Integer.MAX_VALUE, Integer.MAX_VALUE, out); - out.write("\n"); - } - } - out.println(")"); - out.flush(); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/SerializedLoader.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/SerializedLoader.java deleted file mode 100644 index 6a47df7fa3..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/SerializedLoader.java +++ /dev/null @@ -1,27 +0,0 @@ -package edu.stanford.nlp.sempre.tables.serialize; - -import edu.stanford.nlp.sempre.*; -import fig.exec.Execution; - -/** - * Load the examples and derivations from SerializedDumper, and then run the learner. - */ -public class SerializedLoader implements Runnable { - public static void main(String[] args) { - Execution.run(args, "SerializedLoaderMain", new SerializedLoader(), Master.getOptionsParser()); - } - - @Override - public void run() { - Builder builder = new Builder(); - builder.build(); - - Dataset dataset = new SerializedDataset(); - dataset.read(); - - Learner learner = new Learner(builder.parser, builder.params, dataset); - learner.learn(); - } - -} - diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/SerializedParser.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/SerializedParser.java deleted file mode 100644 index 7f136841b8..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/SerializedParser.java +++ /dev/null @@ -1,130 +0,0 @@ -package edu.stanford.nlp.sempre.tables.serialize; - -import java.io.File; -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.alter.TurkEquivalentClassInfo; -import fig.basic.LogInfo; -import fig.basic.Option; -import fig.basic.Pair; - -/** - * Parser used when loading serialized data. - * - * SerializedParser assumes that all candidate derivations were already computed in the dump file. - * So the parser skips the parsing step and just load the candidates - * - * @author ppasupat - */ -public class SerializedParser extends Parser { - public static class Options { - // Must be a gzip file or a directory of gzip files - @Option(gloss = "Path for formula annotation") - public String annotationPath = null; - // Skip the example if some criterion is met - @Option(gloss = "(optional) Path for turk-info.tsv") - public String turkInfoPath = null; - @Option(gloss = "(If turkInfoPath is present) Maximum number of numClassesMatched") - public int maxNumClassesMatched = 50; - @Option(gloss = "(If turkInfoPath is present) Maximum number of numDerivsMatched") - public int maxNumDerivsMatched = 50000; - } - public static Options opts = new Options(); - - // Map from ID string to LazyLoadedExampleList and example index. - protected Map> idToSerializedIndex = null; - // Map from ID string to TurkEquivalentClassInfo. - protected Map idToTurkInfo = null; - - public SerializedParser(Spec spec) { - super(spec); - if (opts.annotationPath != null) - readSerializedFile(opts.annotationPath); - if (opts.turkInfoPath != null) { - LogInfo.begin_track("Reading Turk info from %s", opts.turkInfoPath); - idToTurkInfo = TurkEquivalentClassInfo.fromFile(opts.turkInfoPath); - LogInfo.end_track(); - } - } - - // Don't do it. - @Override protected void computeCatUnaryRules() { - catUnaryRules = Collections.emptyList(); - }; - - protected void readSerializedFile(String annotationPath) { - idToSerializedIndex = new HashMap<>(); - SerializedDataset dataset = new SerializedDataset(); - if (new File(annotationPath).isDirectory()) { - dataset.readDir(annotationPath); - } else { - dataset.read("annotated", annotationPath); - } - for (String group : dataset.groups()) { - LazyLoadedExampleList examples = dataset.examples(group); - List ids = examples.getAllIds(); - for (int i = 0; i < ids.size(); i++) - idToSerializedIndex.put(ids.get(i), new Pair<>(examples, i)); - } - } - - @Override - public ParserState newParserState(Params params, Example ex, boolean computeExpectedCounts) { - return new SerializedParserState(this, params, ex, computeExpectedCounts); - } - -} - -class SerializedParserState extends ParserState { - - public SerializedParserState(Parser parser, Params params, Example ex, boolean computeExpectedCounts) { - super(parser, params, ex, computeExpectedCounts); - } - - @Override - public void infer() { - SerializedParser parser = (SerializedParser) this.parser; - if (parser.idToTurkInfo != null) { - TurkEquivalentClassInfo info = parser.idToTurkInfo.get(ex.id); - if (info != null) { - if (info.numClassesMatched > SerializedParser.opts.maxNumClassesMatched) { - LogInfo.logs("Skipped %s since numClassesMatched = %d > %d", - ex.id, info.numClassesMatched, SerializedParser.opts.maxNumClassesMatched); - if (computeExpectedCounts) expectedCounts = new HashMap<>(); - return; - } - if (info.numDerivsMatched > SerializedParser.opts.maxNumDerivsMatched) { - LogInfo.logs("Skipped %s since numDerivsMatched = %d > %d", - ex.id, info.numDerivsMatched, SerializedParser.opts.maxNumDerivsMatched); - if (computeExpectedCounts) expectedCounts = new HashMap<>(); - return; - } - } - } - if (parser.idToSerializedIndex != null) { - Pair pair = parser.idToSerializedIndex.get(ex.id); - if (pair != null) { - Example annotatedEx = pair.getFirst().get(pair.getSecond()); - for (Derivation deriv : annotatedEx.predDerivations) { - featurizeAndScoreDerivation(deriv); - predDerivations.add(deriv); - } - } - } else { - // Assume that the example already has all derivations. - if (ex.predDerivations == null) - ex.predDerivations = new ArrayList<>(); - for (Derivation deriv : ex.predDerivations) { - featurizeAndScoreDerivation(deriv); - predDerivations.add(deriv); - } - } - ensureExecuted(); - if (computeExpectedCounts) { - expectedCounts = new HashMap<>(); - ParserState.computeExpectedCounts(predDerivations, expectedCounts); - } - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/TSVGenerator.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/TSVGenerator.java deleted file mode 100644 index 2771da5f73..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/TSVGenerator.java +++ /dev/null @@ -1,71 +0,0 @@ -package edu.stanford.nlp.sempre.tables.serialize; - -import java.io.*; -import java.util.*; - -import edu.stanford.nlp.sempre.*; - -/** - * Generate a TSV file for the dataset release. - * - * @author ppasupat - */ -public class TSVGenerator { - protected PrintWriter out; - - protected void dump(String... stuff) { - out.println(String.join("\t", stuff)); - } - - protected static String serialize(String x) { - if (x == null || x.isEmpty()) return ""; - StringBuilder sb = new StringBuilder(); - for (char y : x.toCharArray()) { - if (y == '\n') sb.append("\\n"); - else if (y == '\\') sb.append("\\\\"); - else if (y == '|') sb.append("\\p"); - else sb.append(y); - } - return sb.toString().replaceAll("\\s", " ").trim(); - } - - protected static String serialize(List xs) { - List serialized = new ArrayList<>(); - for (String x : xs) serialized.add(serialize(x)); - return String.join("|", serialized); - } - - protected static String serialize(Value value) { - if (value instanceof ListValue) { - List xs = new ArrayList<>(); - for (Value v : ((ListValue) value).values) { - xs.add(serialize(v)); - } - return String.join("|", xs); - } else if (value instanceof DescriptionValue) { - return serialize(((DescriptionValue) value).value); - } else if (value instanceof NameValue) { - return serialize(((NameValue) value).description); - } else if (value instanceof NumberValue) { - return "" + ((NumberValue) value).value; - } else if (value instanceof DateValue) { - return ((DateValue) value).isoString(); - } else { - throw new RuntimeException("Unknown value type: " + value); - } - } - - protected static String serializeId(Value value) { - if (value instanceof ListValue) { - List xs = new ArrayList<>(); - for (Value v : ((ListValue) value).values) { - xs.add(serializeId(v)); - } - return String.join("|", xs); - } else if (value instanceof NameValue) { - return serialize(((NameValue) value).id); - } else { - throw new RuntimeException("Unknown value type: " + value); - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/TableReader.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/TableReader.java deleted file mode 100644 index deecf94d7b..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/TableReader.java +++ /dev/null @@ -1,108 +0,0 @@ -package edu.stanford.nlp.sempre.tables.serialize; - -import java.io.*; -import java.util.*; - -import au.com.bytecode.opencsv.CSVReader; -import edu.stanford.nlp.sempre.tables.StringNormalizationUtils; -import fig.basic.LogInfo; - -/** - * Read a table in either CSV or TSV format. - * - * For CSV, this class is just a wrapper for OpenCSV. - * Escape sequences for CSV: - * - \\ => \ - * - \" or "" => " - * Each cell can be quoted inside "...". Embed newlines must be quoted. - * - * For TSV, each line must represent one table row (no embed newlines). - * Escape sequences for TSV (custom): - * - \n => [newline] - * - \\ => \ - * - \p => | - * - * @author ppasupat - */ -public class TableReader implements Closeable, Iterable { - - enum DataType { CSV, TSV, UNKNOWN } - CSVReader csvReader = null; - List tsvData = null; - - public TableReader(String filename) throws IOException { - switch (guessDataType(filename)) { - case CSV: - csvReader = new CSVReader(new FileReader(filename)); - break; - case TSV: - parseTSV(filename); - break; - default: - throw new RuntimeException("Unknown data type for " + filename); - } - } - - private DataType guessDataType(String filename) { - if (filename.endsWith(".csv")) - return DataType.CSV; - else if (filename.endsWith(".tsv")) - return DataType.TSV; - // Guess from the first line of the file - try (BufferedReader reader = new BufferedReader(new FileReader(filename))) { - String line = reader.readLine(); - if (line.contains("\t")) - return DataType.TSV; - else if (line.contains(",") || line.startsWith("\"")) - return DataType.CSV; - } catch (IOException e) { - throw new RuntimeException("Unknown data type for " + filename); - } - return DataType.UNKNOWN; - } - - private void parseTSV(String filename) { - try (BufferedReader reader = new BufferedReader(new FileReader(filename))) { - String line; - tsvData = new ArrayList<>(); - while ((line = reader.readLine()) != null) { - String[] fields = line.split("\t", -1); // Include trailing spaces - for (int i = 0; i < fields.length; i++) - fields[i] = StringNormalizationUtils.unescapeTSV(fields[i]); - tsvData.add(fields); - } - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - @Override - public Iterator iterator() { - if (csvReader != null) return csvReader.iterator(); - else return tsvData.iterator(); - } - - @Override - public void close() throws IOException { - if (csvReader != null) csvReader.close(); - } - - // ============================================================ - // Test - // ============================================================ - - public static void main(String[] args) { - String filename = "t/csv/200-csv/0.tsv"; - LogInfo.logs("%s", filename); - try (TableReader tableReader = new TableReader(filename)) { - for (String[] x : tableReader) { - LogInfo.begin_track("ROW"); - for (String y : x) LogInfo.logs("|%s|", y); - LogInfo.end_track(); - } - } catch (Exception e) { - e.printStackTrace(); - } - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/TableWriter.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/TableWriter.java deleted file mode 100644 index 6683b83650..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/TableWriter.java +++ /dev/null @@ -1,172 +0,0 @@ -package edu.stanford.nlp.sempre.tables.serialize; - -import java.io.*; - -import au.com.bytecode.opencsv.CSVWriter; -import edu.stanford.nlp.sempre.tables.*; -import fig.basic.LogInfo; - -/** - * Write a table in either CSV or TSV format. - * All IOExceptions are thrown as RuntimeException. - * - * For CSV, this class is just a wrapper for OpenCSV. - * Escape sequences for CSV: - * - \\ => \ - * - \" or "" => " - * Each cell can be quoted inside "...". Embed newlines must be quoted. - * - * For TSV, each line must represent one table row (no embed newlines). - * Escape sequences for TSV (custom): - * - \n => [newline] - * - \\ => \ - * - \p => | - * - * @author ppasupat - */ -public class TableWriter { - - public final TableKnowledgeGraph graph; - - public TableWriter(TableKnowledgeGraph graph) { - this.graph = graph; - } - - /** - * If out is null, log using LogInfo. Otherwise, print line to out. - */ - private void write(PrintWriter out, String stuff) { - if (out == null) - LogInfo.logs("%s", stuff); - else - out.println(stuff); - } - - // ============================================================ - // CSV - // ============================================================ - - public void writeCSV() { - writeCSVActual(null); - } - - public void writeCSV(PrintWriter out) { - writeCSVActual(out); - } - - public void writeCSV(String filename) { - try (PrintWriter out = new PrintWriter(filename)) { - writeCSVActual(out); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - private void writeCSVActual(PrintWriter out) { - try (CSVWriter writer = new CSVWriter(out)) { - String[] record = new String[graph.numColumns()]; - // Print header - for (int j = 0; j < record.length; j++) - record[j] = graph.getColumn(j).originalString; - writer.writeNext(record); - // Print other rows - for (int i = 0; i < graph.numRows(); i++) { - for (int j = 0; j < record.length; j++) - record[j] = graph.getCell(i, j).properties.originalString; - writer.writeNext(record); - } - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - // ============================================================ - // TSV - // ============================================================ - - public void writeTSV() { - writeTSVActual(null); - } - - public void writeTSV(PrintWriter out) { - writeTSVActual(out); - } - - public void writeTSV(String filename) { - try (PrintWriter out = new PrintWriter(filename)) { - writeTSVActual(out); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - private void writeTSVActual(PrintWriter out) { - String[] record = new String[graph.numColumns()]; - // Print header - for (int j = 0; j < record.length; j++) - record[j] = StringNormalizationUtils.escapeTSV(graph.getColumn(j).originalString); - write(out, String.join("\t", record)); - // Print other rows - for (int i = 0; i < graph.numRows(); i++) { - for (int j = 0; j < record.length; j++) - record[j] = StringNormalizationUtils.escapeTSV(graph.getCell(i, j).properties.originalString); - write(out, String.join("\t", record)); - } - } - - // ============================================================ - // Human Readable Format - // ============================================================ - - public void log() { - writeHumanReadableActual(null); - } - - public void writeHumanReadable() { - writeHumanReadableActual(null); - } - - public void writeHumanReadable(PrintWriter out) { - writeHumanReadableActual(out); - } - - public void writeHumanReadable(String filename) { - try (PrintWriter out = new PrintWriter(filename)) { - writeHumanReadableActual(out); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - private void writeHumanReadableActual(PrintWriter out) { - int LIMIT = 20, numColumns = graph.numColumns(); - // Measure widths - int[] widths = new int[numColumns]; - for (int i = 0; i < numColumns; i++) - widths[i] = Math.max(widths[i], graph.getColumn(i).originalString.length()); - for (TableRow row : graph.rows) - for (int j = 0; j < numColumns; j++) - widths[j] = Math.max(widths[j], row.children.get(j).properties.originalString.length()); - for (int i = 0; i < numColumns; i++) - widths[i] = Math.max(1, Math.min(widths[i], LIMIT)); - // Print! - { - StringBuilder sb = new StringBuilder("|"); - for (int i = 0; i < numColumns; i++) - sb.append(String.format(" %-" + widths[i] + "s", cutoff(graph.columns.get(i).originalString, LIMIT))).append(" |"); - write(out, sb.toString()); - } - for (TableRow row : graph.rows) { - StringBuilder sb = new StringBuilder("|"); - for (int i = 0; i < numColumns; i++) - sb.append(String.format(" %-" + widths[i] + "s", cutoff(row.children.get(i).properties.originalString, LIMIT))).append(" |"); - write(out, sb.toString()); - } - } - - private String cutoff(String x, int limit) { - x = x.replace('\n', ' '); - if (x.length() < limit) return x; - return x.substring(0, limit - 3) + "..."; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/TaggedDatasetGenerator.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/TaggedDatasetGenerator.java deleted file mode 100644 index 360170bcbf..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/TaggedDatasetGenerator.java +++ /dev/null @@ -1,122 +0,0 @@ -package edu.stanford.nlp.sempre.tables.serialize; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.TableKnowledgeGraph; -import fig.basic.*; -import fig.exec.Execution; - -/** - * Generate TSV files containing CoreNLP tags of the datasets. - * - * Field descriptions: - * - id: unique ID of the example - * - utterance: the question in its original format - * - context: the table used to answer the question - * - targetValue: the answer in its original format, possibly a `||`-separated list - * - tokens: the question, tokenized - * - lemmaTokens: the question, tokenized and lemmatized - * - posTags: the part of speech tag of each token - * - nerTags: the name entity tag of each token - * - nerValues: if the NER tag is numerical or temporal, the value of that - * NER span will be listed here - * - targetCanon: the answer, canonicalized - * - targetCanonType: type of the canonicalized answer (number, date, or string) - * - * @author ppasupat - */ -public class TaggedDatasetGenerator extends TSVGenerator implements Runnable { - - public static void main(String[] args) { - Execution.run(args, "TaggedDatasetGeneratorMain", new TaggedDatasetGenerator(), - Master.getOptionsParser()); - } - - @Override - public void run() { - // Read dataset - LogInfo.begin_track("Dataset.read"); - for (Pair pathPair : Dataset.opts.inPaths) { - String group = pathPair.getFirst(); - String path = pathPair.getSecond(); - // Open output file - String filename = Execution.getFile("tagged-" + group + ".tsv"); - out = IOUtils.openOutHard(filename); - dump(FIELDS); - // Read LispTrees - LogInfo.begin_track("Reading %s", path); - int maxExamples = Dataset.getMaxExamplesForGroup(group); - Iterator trees = LispTree.proto.parseFromFile(path); - // Go through the examples - int n = 0; - while (n < maxExamples) { - // Format: (example (id ...) (utterance ...) (targetFormula ...) (targetValue ...)) - LispTree tree = trees.next(); - if (tree == null) break; - if (tree.children.size() < 2 || !"example".equals(tree.child(0).value)) { - if ("metadata".equals(tree.child(0).value)) continue; - throw new RuntimeException("Invalid example: " + tree); - } - Example ex = Example.fromLispTree(tree, path + ":" + n); - ex.preprocess(); - LogInfo.logs("Example %s (%d): %s => %s", ex.id, n, ex.getTokens(), ex.targetValue); - n++; - dumpExample(ex, tree); - } - out.close(); - LogInfo.logs("Finished dumping to %s", filename); - LogInfo.end_track(); - } - LogInfo.end_track(); - } - - private static final String[] FIELDS = new String[] { - "id", "utterance", "context", "targetValue", - "tokens", "lemmaTokens", "posTags", "nerTags", "nerValues", - "targetCanon", "targetCanonType", - }; - - @Override - protected void dump(String... stuff) { - assert stuff.length == FIELDS.length; - super.dump(stuff); - } - - private void dumpExample(Example ex, LispTree tree) { - String[] fields = new String[FIELDS.length]; - // Get original information from the LispTree - for (int i = 1; i < tree.children.size(); i++) { - LispTree arg = tree.child(i); - String label = arg.child(0).value; - if ("id".equals(label)) { - fields[0] = serialize(arg.child(1).value); - } else if ("utterance".equals(label)) { - fields[1] = serialize(arg.child(1).value); - } else if ("targetValue".equals(label) || "targetValues".equals(label)) { - if (arg.children.size() != 2) - throw new RuntimeException("Expect one target value"); - fields[3] = serialize(Values.fromLispTree(arg.child(1))); - } - } - // Other information come from Example - fields[2] = serialize(((TableKnowledgeGraph) ex.context.graph).filename.replace("lib/data/WikiTableQuestions/", "")); - fields[4] = serialize(ex.languageInfo.tokens); - fields[5] = serialize(ex.languageInfo.lemmaTokens); - fields[6] = serialize(ex.languageInfo.posTags); - fields[7] = serialize(ex.languageInfo.nerTags); - fields[8] = serialize(ex.languageInfo.nerValues); - // Information from target value - fields[9] = serialize(ex.targetValue); - Value targetValue = ex.targetValue; - if (targetValue instanceof ListValue) - targetValue = ((ListValue) targetValue).values.get(0); - if (targetValue instanceof NumberValue) - fields[10] = "number"; - else if (targetValue instanceof DateValue) - fields[10] = "date"; - else - fields[10] = "string"; - dump(fields); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/TaggedFuzzyGenerator.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/TaggedFuzzyGenerator.java deleted file mode 100644 index 167f04e782..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/TaggedFuzzyGenerator.java +++ /dev/null @@ -1,109 +0,0 @@ -package edu.stanford.nlp.sempre.tables.serialize; - -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.FuzzyMatchFn.FuzzyMatchFnMode; -import edu.stanford.nlp.sempre.SemanticFn.CallInfo; -import edu.stanford.nlp.sempre.tables.TableKnowledgeGraph; -import fig.basic.*; -import fig.exec.Execution; - -/** - * Generate TSV files containing information about fuzzy matched objects. - * - * @author ppasupat - */ -public class TaggedFuzzyGenerator extends TSVGenerator implements Runnable { - - public static void main(String[] args) { - Execution.run(args, "TaggedFuzzyGeneratorMain", new TaggedFuzzyGenerator(), - Master.getOptionsParser()); - } - - private Grammar grammar = new Grammar(); - - @Override - public void run() { - // Read grammar - grammar.read(Grammar.opts.inPaths); - // Read dataset - LogInfo.begin_track("Dataset.read"); - for (Pair pathPair : Dataset.opts.inPaths) { - String group = pathPair.getFirst(); - String path = pathPair.getSecond(); - // Open output file - String filename = Execution.getFile("fuzzy-" + group + ".tsv"); - out = IOUtils.openOutHard(filename); - dump(FIELDS); - // Read LispTrees - LogInfo.begin_track("Reading %s", path); - int maxExamples = Dataset.getMaxExamplesForGroup(group); - Iterator trees = LispTree.proto.parseFromFile(path); - // Go through the examples - int n = 0; - while (n < maxExamples) { - // Format: (example (id ...) (utterance ...) (targetFormula ...) (targetValue ...)) - LispTree tree = trees.next(); - if (tree == null) break; - if (tree.children.size() < 2 || !"example".equals(tree.child(0).value)) { - if ("metadata".equals(tree.child(0).value)) continue; - throw new RuntimeException("Invalid example: " + tree); - } - Example ex = Example.fromLispTree(tree, path + ":" + n); - ex.preprocess(); - LogInfo.begin_track("Example %s (%d): %s => %s", ex.id, n, ex.getTokens(), ex.targetValue); - n++; - dumpExample(ex, tree); - LogInfo.end_track(); - } - out.close(); - LogInfo.logs("Finished dumping to %s", filename); - LogInfo.end_track(); - } - LogInfo.end_track(); - } - - private static final String[] FIELDS = new String[] { - "id", "type", "start", "end", "phrase", "fragment" - }; - - @Override - protected void dump(String... stuff) { - assert stuff.length == FIELDS.length; - super.dump(stuff); - } - - private void dumpExample(Example ex, LispTree tree) { - int n = ex.numTokens(); - for (int i = 0; i < n; i++) { - StringBuilder sb = new StringBuilder(ex.token(i)); - for (int j = i; j < n; j++) { - String term = sb.toString(); - Derivation deriv = - new Derivation.Builder() - .cat(Rule.phraseCat).start(i).end(j) - .rule(Rule.nullRule) - .children(Derivation.emptyList) - .withStringFormulaFrom(term) - .canonicalUtterance(term) - .createDerivation(); - List children = new ArrayList<>(); - children.add(deriv); - // Get the derived derivations - for (Rule rule : grammar.getRules()) { - CallInfo c = new CallInfo(rule.lhs, i, j + 1, rule, children); - Iterator itr = rule.sem.call(ex, c); - while (itr.hasNext()) { - deriv = itr.next(); - LogInfo.logs("Found %s %s -> %s", rule.lhs, term, deriv.formula); - dump(ex.id, rule.lhs.substring(1), "" + i, "" + (j + 1), term, deriv.formula.toString()); - } - } - if (j + 1 < n) - sb.append(" ").append(ex.token(j + 1)); - } - } - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/TaggedTableGenerator.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/TaggedTableGenerator.java deleted file mode 100644 index 36e8b65e45..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/serialize/TaggedTableGenerator.java +++ /dev/null @@ -1,148 +0,0 @@ -package edu.stanford.nlp.sempre.tables.serialize; - -import java.io.*; -import java.nio.file.*; -import java.nio.file.attribute.BasicFileAttributes; -import java.util.*; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.*; -import fig.basic.IOUtils; -import fig.basic.LogInfo; -import fig.exec.Execution; - -/** - * Generate TSV files containing CoreNLP tags of the tables. - * - * Mandatory fields: - * - row: row index (-1 is the header row) - * - col: column index - * - id: unique ID of the cell. - * - Each header cell gets a unique ID even when the contents are identical - * - Non-header cells get the same ID <=> they have exactly the same content - * - content: the cell text (images and hidden spans are removed) - * - tokens: the cell text, tokenized - * - lemmaTokens: the cell text, tokenized and lemmatized - * - posTags: the part of speech tag of each token - * - nerTags: the name entity tag of each token - * - nerValues: if the NER tag is numerical or temporal, the value of that - * NER span will be listed here - * - * The following fields are optional: - * - number: interpretation as a number - * - For multiple numbers, the first number is extracted - * - date: interpretation as a date - * - num2: the second number in the cell (useful for scores like `1-2`) - * - list: interpretation as a list of items - * - listId: unique ID of list items - * - * @author ppasupat - */ -public class TaggedTableGenerator extends TSVGenerator implements Runnable { - - public static void main(String[] args) { - Execution.run(args, "TaggedTableGeneratorMain", new TaggedTableGenerator(), - Master.getOptionsParser()); - } - - public static final Pattern FILENAME_PATTERN = Pattern.compile("^.*/(\\d+)-csv/(\\d+).csv$"); - private LanguageAnalyzer analyzer; - - @Override - public void run() { - // Get the list of all tables - analyzer = LanguageAnalyzer.getSingleton(); - Path baseDir = Paths.get(TableKnowledgeGraph.opts.baseCSVDir); - try { - Files.walkFileTree(baseDir, new SimpleFileVisitor() { - @Override - public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { - Matcher matcher = FILENAME_PATTERN.matcher(file.toString()); - if (matcher.matches()) { - LogInfo.begin_track("Processing %s", file); - int batchIndex = Integer.parseInt(matcher.group(1)), - dataIndex = Integer.parseInt(matcher.group(2)); - TableKnowledgeGraph table = TableKnowledgeGraph.fromFilename(baseDir.relativize(file).toString()); - String outDir = Execution.getFile("tagged/" + batchIndex + "-tagged/"), - outFilename = new File(outDir, dataIndex + ".tagged").getPath(); - new File(outDir).mkdirs(); - out = IOUtils.openOutHard(outFilename); - dumpTable(table); - out.close(); - LogInfo.end_track(); - } - return super.visitFile(file, attrs); - } - }); - } catch (IOException e) { - e.printStackTrace(); - LogInfo.fails("%s", e); - } - } - - private static final String[] FIELDS = new String[] { - "row", "col", "id", "content", - "tokens", "lemmaTokens", "posTags", "nerTags", "nerValues", - "number", "date", "num2", "list", "listId", - }; - - @Override - protected void dump(String... stuff) { - assert stuff.length == FIELDS.length; - super.dump(stuff); - } - - private void dumpTable(TableKnowledgeGraph table) { - dump(FIELDS); - // header row - for (int j = 0; j < table.columns.size(); j++) { - dumpColumnHeader(j, table.columns.get(j)); - } - // other rows - for (int i = 0; i < table.rows.size(); i++) { - for (int j = 0; j < table.columns.size(); j++) { - dumpCell(i, j, table.rows.get(i).children.get(j)); - } - } - } - - private void dumpColumnHeader(int j, TableColumn column) { - String[] fields = new String[FIELDS.length]; - fields[0] = "-1"; - fields[1] = "" + j; - fields[2] = serialize(column.relationNameValue.id); - fields[3] = serialize(column.originalString); - LanguageInfo info = analyzer.analyze(column.originalString); - fields[4] = serialize(info.tokens); - fields[5] = serialize(info.lemmaTokens); - fields[6] = serialize(info.posTags); - fields[7] = serialize(info.nerTags); - fields[8] = serialize(info.nerValues); - fields[9] = fields[10] = fields[11] = fields[12] = fields[13] = ""; - dump(fields); - } - - private void dumpCell(int i, int j, TableCell cell) { - String[] fields = new String[FIELDS.length]; - fields[0] = "" + i; - fields[1] = "" + j; - fields[2] = serialize(cell.properties.nameValue.id); - fields[3] = serialize(cell.properties.originalString); - LanguageInfo info = analyzer.analyze(cell.properties.originalString); - fields[4] = serialize(info.tokens); - fields[5] = serialize(info.lemmaTokens); - fields[6] = serialize(info.posTags); - fields[7] = serialize(info.nerTags); - fields[8] = serialize(info.nerValues); - fields[9] = serialize(new ListValue(new ArrayList<>(cell.properties.metadata.get(TableTypeSystem.CELL_NUMBER_VALUE)))); - fields[10] = serialize(new ListValue(new ArrayList<>(cell.properties.metadata.get(TableTypeSystem.CELL_DATE_VALUE)))); - fields[11] = serialize(new ListValue(new ArrayList<>(cell.properties.metadata.get(TableTypeSystem.CELL_NUM2_VALUE)))); - ListValue parts = new ListValue(new ArrayList<>(cell.properties.metadata.get(TableTypeSystem.CELL_PART_VALUE))); - fields[12] = serialize(parts); - fields[13] = serializeId(parts); - dump(fields); - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/test/BatchTableExecutor.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/test/BatchTableExecutor.java deleted file mode 100644 index 1dab7f6414..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/test/BatchTableExecutor.java +++ /dev/null @@ -1,136 +0,0 @@ -package edu.stanford.nlp.sempre.tables.test; - -import java.io.*; -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.TableKnowledgeGraph; -import edu.stanford.nlp.sempre.tables.TableValueEvaluator; -import edu.stanford.nlp.sempre.tables.lambdadcs.LambdaDCSExecutor; -import fig.basic.*; -import fig.exec.Execution; - -/** - * Execute the specified logical forms on the specified WikiTableQuestions context. - * - * @author ppasupat - */ -public class BatchTableExecutor implements Runnable { - public static class Options { - @Option(gloss = "TSV file containing table contexts and logical forms") - public String batchInput; - @Option(gloss = "Datasets for mapping example IDs to contexts") - public List batchDatasets = Arrays.asList("lib/data/tables/data/training.examples"); - } - public static Options opts = new Options(); - - public static void main(String[] args) { - Execution.run(args, "BatchTableExecutorMain", new BatchTableExecutor(), Master.getOptionsParser()); - } - - @Override - public void run() { - if (opts.batchInput == null || opts.batchInput.isEmpty()) { - LogInfo.logs("*******************************************************************************"); - LogInfo.logs("USAGE: ./run @mode=tables @class=execute -batchInput "); - LogInfo.logs(""); - LogInfo.logs("Input file format: Each line has something like"); - LogInfo.logs(" nt-218 [tab] (count (fb:type.object.type fb:type.row))"); - LogInfo.logs("or"); - LogInfo.logs(" csv/204-csv/23.csv [tab] (count (fb:type.object.type fb:type.row))"); - LogInfo.logs(""); - LogInfo.logs("Results will also be printed to state/execs/___.exec/denotations.tsv"); - LogInfo.logs("Output format:"); - LogInfo.logs(" nt-218 [tab] (count (fb:type.object.type fb:type.row)) [tab] (list (number 10)) [tab] false"); - LogInfo.logs("where the last column indicates whether the answer is consistent with the target answer"); - LogInfo.logs("(only available when the first column is nt-___)"); - LogInfo.logs("*******************************************************************************"); - System.exit(1); - } - LambdaDCSExecutor executor = new LambdaDCSExecutor(); - ValueEvaluator evaluator = new TableValueEvaluator(); - try { - BufferedReader reader = IOUtils.openIn(opts.batchInput); - PrintWriter output = IOUtils.openOut(Execution.getFile("denotations.tsv")); - String line; - while ((line = reader.readLine()) != null) { - String[] tokens = line.split("\t"); - String answer; - try { - Formula formula = Formula.fromString(tokens[1]); - if (tokens[0].startsWith("csv")) { - TableKnowledgeGraph graph = TableKnowledgeGraph.fromFilename(tokens[0]); - ContextValue context = new ContextValue(graph); - Value denotation = executor.execute(formula, context).value; - if (denotation instanceof ListValue) - denotation = addOriginalStrings((ListValue) denotation, graph); - answer = denotation.toString(); - } else { - Example ex = exIdToExample(tokens[0]); - Value denotation = executor.execute(formula, ex.context).value; - if (denotation instanceof ListValue) - denotation = addOriginalStrings((ListValue) denotation, (TableKnowledgeGraph) ex.context.graph); - answer = denotation.toString(); - boolean correct = evaluator.getCompatibility(ex.targetValue, denotation) == 1.; - answer = denotation.toString() + "\t" + correct; - } - } catch (Exception e) { - answer = "ERROR: " + e; - } - System.out.printf("%s\t%s\t%s\n", tokens[0], tokens[1], answer); - output.printf("%s\t%s\t%s\n", tokens[0], tokens[1], answer); - } - reader.close(); - output.close(); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - private Map exIdToExampleMap; - - private Example exIdToExample(String exId) { - if (exIdToExampleMap == null) { - exIdToExampleMap = new HashMap<>(); - try { - for (String filename : opts.batchDatasets) { - BufferedReader reader = IOUtils.openIn(filename); - String line; - while ((line = reader.readLine()) != null) { - LispTree tree = LispTree.proto.parseFromString(line); - if (!"id".equals(tree.child(1).child(0).value)) - throw new RuntimeException("Malformed example: " + line); - exIdToExampleMap.put(tree.child(1).child(1).value, tree); - } - } - } catch (IOException e) { - throw new RuntimeException(e); - } - } - Object obj = exIdToExampleMap.get(exId); - if (obj == null) return null; - Example ex; - if (obj instanceof LispTree) { - ex = Example.fromLispTree((LispTree) obj, exId); - ex.preprocess(); - exIdToExampleMap.put(exId, ex); - } else { - ex = (Example) obj; - } - return ex; - } - - ListValue addOriginalStrings(ListValue answers, TableKnowledgeGraph graph) { - List values = new ArrayList<>(); - for (Value value : answers.values) { - if (value instanceof NameValue) { - NameValue name = (NameValue) value; - if (name.description == null) - value = new NameValue(name.id, graph.getOriginalString(((NameValue) value).id)); - } - values.add(value); - } - return new ListValue(values); - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/test/CustomExample.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/test/CustomExample.java deleted file mode 100644 index 8b28b06013..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/test/CustomExample.java +++ /dev/null @@ -1,259 +0,0 @@ -package edu.stanford.nlp.sempre.tables.test; - -import java.io.*; -import java.util.*; -import java.util.regex.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.StringNormalizationUtils; -import edu.stanford.nlp.sempre.tables.TableTypeSystem; -import fig.basic.*; -import fig.exec.Execution; - -/** - * Custom version of Example. - * - * - Allow additional keys "warning", "error", and "alternativeFormula" - * - Allow shorthand annotation for targetFormula - * - * @author ppasupat - */ -public class CustomExample extends Example { - public static class Options { - // Format: "3-5,10,12-20" - @Option(gloss = "Verbosity") public int verbose = 2; - @Option(gloss = "Filter only these examples") public String filterExamples = null; - @Option public boolean allowNoAnnotation = false; - } - public static Options opts = new Options(); - - public List warnings = new ArrayList<>(); - public List errors = new ArrayList<>(); - public List alternativeFormulas = new ArrayList<>(); - - public CustomExample(Example ex) { - // Copy everything from ex - super(ex.id, ex.utterance, ex.context, ex.targetFormula, ex.targetValue, ex.languageInfo); - } - - static final Set usefulTags = - new HashSet(Arrays.asList("id", "utterance", "targetFormula", "targetValue", "targetValues", "context")); - - /** - * Convert LispTree to Example with additional tags - */ - public static CustomExample fromLispTree(LispTree tree, String defaultId) { - Builder b = new Builder().setId(defaultId); - - for (int i = 1; i < tree.children.size(); i++) { - LispTree arg = tree.child(i); - String label = arg.child(0).value; - if ("id".equals(label)) { - b.setId(arg.child(1).value); - } else if ("utterance".equals(label)) { - b.setUtterance(arg.child(1).value); - } else if ("targetFormula".equals(label)) { - LispTree canonicalized = canonicalizeFormula(arg.child(1)); - b.setTargetFormula(Formulas.fromLispTree(canonicalized)); - } else if ("targetValue".equals(label) || "targetValues".equals(label)) { - if (arg.children.size() != 2) - throw new RuntimeException("Expect one target value"); - b.setTargetValue(Values.fromLispTree(arg.child(1))); - } else if ("context".equals(label)) { - b.setContext(new ContextValue(arg)); - } - } - b.setLanguageInfo(new LanguageInfo()); - - CustomExample ex = new CustomExample(b.createExample()); - boolean error = false; - - for (int i = 1; i < tree.children.size(); i++) { - LispTree arg = tree.child(i); - String label = arg.child(0).value; - if ("warning".equals(label)) { - ex.warnings.add(arg.child(1).value); - } else if ("error".equals(label)) { - error = true; - ex.errors.add(arg.child(1).value); - } else if ("alternativeFormula".equals(label)) { - LispTree canonicalized = canonicalizeFormula(arg.child(1)); - ex.alternativeFormulas.add(Formulas.fromLispTree(canonicalized)); - } else if (!usefulTags.contains(label)) { - throw new RuntimeException("Invalid example argument: " + arg); - } - } - - // Check formula and error - if (ex.targetFormula == null && !error && !opts.allowNoAnnotation) - throw new RuntimeException("Either error or targetFormula must be present."); - if (ex.targetFormula != null && error) - throw new RuntimeException("Cannot use error when targetFormula is present."); - - return ex; - } - - static final Map formulaMacros; - static { - formulaMacros = new HashMap<>(); - formulaMacros.put("@type", CanonicalNames.TYPE); - formulaMacros.put("@row", TableTypeSystem.ROW_TYPE); - formulaMacros.put("@next", TableTypeSystem.ROW_NEXT_VALUE.id); - formulaMacros.put("@!next", "!" + TableTypeSystem.ROW_NEXT_VALUE.id); - formulaMacros.put("@index", TableTypeSystem.ROW_INDEX_VALUE.id); - formulaMacros.put("@!index", "!" + TableTypeSystem.ROW_INDEX_VALUE.id); - formulaMacros.put("@p.num", TableTypeSystem.CELL_NUMBER_VALUE.id); - formulaMacros.put("@!p.num", "!" + TableTypeSystem.CELL_NUMBER_VALUE.id); - formulaMacros.put("@p.date", TableTypeSystem.CELL_DATE_VALUE.id); - formulaMacros.put("@!p.date", "!" + TableTypeSystem.CELL_DATE_VALUE.id); - formulaMacros.put("@p.num2", TableTypeSystem.CELL_NUM2_VALUE.id); - formulaMacros.put("@!p.num2", "!" + TableTypeSystem.CELL_NUM2_VALUE.id); - formulaMacros.put("@p.part", TableTypeSystem.CELL_PART_VALUE.id); - formulaMacros.put("@!p.part", "!" + TableTypeSystem.CELL_PART_VALUE.id); - } - - static final Pattern regexProperty = Pattern.compile("r\\.(.*)"); - static final Pattern regexReversedProperty = Pattern.compile("!r\\.(.*)"); - static final Pattern regexEntity = Pattern.compile("c\\.(.*)"); - static final Pattern regexPart = Pattern.compile("q\\.(.*)"); - - /** - * Return a new LispTree representing the canonicalized version of the original formula - */ - public static LispTree canonicalizeFormula(LispTree orig) { - if (orig.isLeaf()) { - String value = orig.value; - // 45 --> (number 45) - if (StringNormalizationUtils.parseNumberStrict(value) != null) - return LispTree.proto.newList("number", value); - // value with "@" --> canonicalized name - if (value.contains("@")) { - String canonicalName = formulaMacros.get(value); - if (canonicalName == null) - throw new RuntimeException("Unrecognized macro: " + value); - return LispTree.proto.newLeaf(canonicalName); - } - // c.xxx or c_xxx.yyy --> canonicalized name - Matcher match; - if ((match = regexProperty.matcher(value)).matches()) - return LispTree.proto.newLeaf(TableTypeSystem.getRowPropertyName(match.group(1))); - if ((match = regexReversedProperty.matcher(value)).matches()) - return LispTree.proto.newLeaf("!" + TableTypeSystem.getRowPropertyName(match.group(1))); - if ((match = regexEntity.matcher(value)).matches()) - return LispTree.proto.newLeaf(TableTypeSystem.CELL_NAME_PREFIX + "." + match.group(1)); - if ((match = regexPart.matcher(value)).matches()) - return LispTree.proto.newLeaf(TableTypeSystem.PART_NAME_PREFIX + "." + match.group(1)); - if (value.contains(".") && !(value.startsWith("fb:") || value.startsWith("!fb:"))) - throw new RuntimeException("Unhandled '.': " + value); - return orig; - } else { - LispTree answer = LispTree.proto.newList(); - // Handle special cases - LispTree head = orig.child(0); - if ("date".equals(head.value)) { - for (LispTree child : orig.children) { - answer.addChild(LispTree.proto.newLeaf(child.value)); - } - } else { - for (LispTree child : orig.children) { - answer.addChild(canonicalizeFormula(child)); - } - } - return answer; - } - } - - // ============================================================ - // Read dataset - // ============================================================ - - public interface ExampleProcessor { - void run(CustomExample ex); - } - - public static boolean checkFilterExamples(int n) { - if (opts.filterExamples == null || opts.filterExamples.isEmpty()) return true; - for (String range : opts.filterExamples.split(",")) { - String[] tokens = range.split("-"); - if (tokens.length == 1) - if (Integer.parseInt(tokens[0]) == n) - return true; - if (tokens.length == 2) - if (Integer.parseInt(tokens[0]) <= n && Integer.parseInt(tokens[1]) >= n) - return true; - } - return false; - } - - public static List getDataset(List> pathPairs, ExampleProcessor processor) { - LogInfo.begin_track_printAll("Dataset.read"); - Evaluation evaluation = new Evaluation(); - List examples = new ArrayList<>(); - for (Pair pathPair : pathPairs) { - String group = pathPair.getFirst(); - String path = pathPair.getSecond(); - Execution.putOutput("group", group); - - LogInfo.begin_track("Reading %s", path); - Iterator trees = LispTree.proto.parseFromFile(path); - - while (trees.hasNext()) { - // Format: (example (id ...) (utterance ...) (targetFormula ...) (targetValue ...)) - LispTree tree = trees.next(); - if ("metadata".equals(tree.child(0).value)) continue; - if (!checkFilterExamples(examples.size())) { // Skip -- for debugging - examples.add(null); - continue; - } - if (opts.verbose >= 2) - LogInfo.begin_track("Reading Example %s", examples.size()); - if (tree.children.size() < 2 && !"example".equals(tree.child(0).value)) - throw new RuntimeException("Invalid example: " + tree); - CustomExample ex = null; - Execution.putOutput("example", examples.size()); - try { - ex = CustomExample.fromLispTree(tree, path + ":" + examples.size()); // Specify a default id if it doesn't exist - ex.preprocess(); - } catch (Exception e) { - StringWriter sw = new StringWriter(); - e.printStackTrace(new PrintWriter(sw)); - LogInfo.warnings("Example %s: CONTAINS ERROR! %s:\n%s", ex == null ? ex : ex.id, e, sw); - } - if (opts.verbose >= 2) { - ex.log(); - } else if (opts.verbose >= 1) { - LogInfo.logs("Example %s (%d): %s => %s", - ex.id, examples.size(), ex.getTokens(), ex.targetValue); - } - examples.add(ex); - if (opts.verbose >= 2) { - for (String warning : ex.warnings) LogInfo.logs("WARNING: %s", warning); - for (String error : ex.errors) LogInfo.logs("ERROR: %s", error); - } - if (processor != null) processor.run(ex); - if (opts.verbose >= 2) - LogInfo.end_track(); - if (ex != null && ex.evaluation != null) { - LogInfo.logs("Current: %s", ex.evaluation.summary()); - evaluation.add(ex.evaluation); - LogInfo.logs("Cumulative(%s): %s", group, evaluation.summary()); - } - } - LogInfo.end_track(); - LogInfo.logs("Stats for %s: %s", group, evaluation.summary()); - evaluation.logStats(group); - evaluation.putOutput(group); - } - LogInfo.end_track(); - return examples; - } - - public static List getDataset(List> pathPairs) { - return getDataset(pathPairs, null); - } - - public static List getDataset(String path) { - return getDataset(Collections.singletonList(new Pair<>("train", path)), null); - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/test/DPDParserChecker.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/test/DPDParserChecker.java deleted file mode 100644 index 0d8a0ec32f..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/test/DPDParserChecker.java +++ /dev/null @@ -1,25 +0,0 @@ -package edu.stanford.nlp.sempre.tables.test; - -import edu.stanford.nlp.sempre.*; -import fig.exec.Execution; - -/** - * Check 2 things: - * - Whether the annotated formula actually executes to the correct denotation. - * - Whether the formula is in the final beam of DPDParser. - * - * @author ppasupat - */ -public class DPDParserChecker implements Runnable { - public static void main(String[] args) { - Execution.run(args, "DPDParserCheckerMain", new DPDParserChecker(), Master.getOptionsParser()); - } - - @Override - public void run() { - DPDParserCheckerProcessor processor = new DPDParserCheckerProcessor(); - CustomExample.getDataset(Dataset.opts.inPaths, processor); - processor.summarize(); - } - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/test/DPDParserCheckerProcessor.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/test/DPDParserCheckerProcessor.java deleted file mode 100644 index f9e48a4618..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/test/DPDParserCheckerProcessor.java +++ /dev/null @@ -1,152 +0,0 @@ -package edu.stanford.nlp.sempre.tables.test; - -import java.io.PrintWriter; -import java.io.StringWriter; -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.TableKnowledgeGraph; -import edu.stanford.nlp.sempre.tables.lambdadcs.LambdaDCSExecutor; -import edu.stanford.nlp.sempre.tables.test.CustomExample.ExampleProcessor; -import fig.basic.*; -import fig.exec.Execution; - -public class DPDParserCheckerProcessor implements ExampleProcessor { - public static class Options { - @Option(gloss = "Only check annotated formulas (Don't check DPDParser beam)") - public boolean onlyCheckAnnotatedFormulas = false; - } - public static Options opts = new Options(); - - int n = 0, annotated = 0, oracle = 0, beamHasCorrectFormula = 0, beamNoCorrectFormula = 0, noBeam = 0; - final Builder builder; - final PrintWriter eventsOut; - - public DPDParserCheckerProcessor() { - builder = new Builder(); - builder.build(); - eventsOut = IOUtils.openOutHard(Execution.getFile("checker.results")); - } - - @Override - public void run(CustomExample ex) { - n++; - String formulaFlag = "", beamFlag = ""; - if (ex.targetFormula == null) { - formulaFlag = "no"; - } else { - annotated++; - if (isAnnotatedFormulaCorrect(ex)) { - oracle++; - formulaFlag = "good"; - } else { - formulaFlag = "incorrect"; - } - } - if (!opts.onlyCheckAnnotatedFormulas) { - ParserState state = builder.parser.parse(builder.params, ex, false); - LogInfo.logs("utterance: %s", ex.utterance); - LogInfo.logs("targetFormula: %s", ex.targetFormula); - LogInfo.logs("targetValue: %s", ex.targetValue); - if (state.predDerivations.isEmpty()) { - noBeam++; - beamFlag = "no"; - } else { - Derivation correctDeriv = isCorrectFormulaOnBeam(ex, state.predDerivations); - if (correctDeriv != null) { - LogInfo.logs("Found correct formula: %s", correctDeriv); - beamHasCorrectFormula++; - beamFlag = "yes"; - } else { - beamNoCorrectFormula++; - beamFlag = "reach"; - } - } - LogInfo.logs("RESULT: %s %s %s", ex.id, formulaFlag, beamFlag); - eventsOut.printf("%s\t%s\t%s\n", ex.id.replaceAll("nt-", ""), formulaFlag, beamFlag); - eventsOut.flush(); - } - // Save memory - if (ex.predDerivations != null) { - ex.predDerivations.clear(); - System.gc(); - } - } - - // See if all annotated formulas (targetFormula, alternativeFormulas) are correct - boolean isAnnotatedFormulaCorrect(CustomExample ex) { - boolean isCorrect = isAnnotatedFormulaCorrect(ex, ex.targetFormula, "targetFormula"); - for (Formula formula : ex.alternativeFormulas) { - isCorrect = isCorrect && isAnnotatedFormulaCorrect(ex, formula, "alternativeFormula"); - } - return isCorrect; - } - - // See if a formula executes to the targetValue - boolean isAnnotatedFormulaCorrect(CustomExample ex, Formula formula, String prefix) { - LogInfo.begin_track("isAnnotatedFormulaCorrect(%s): Example %s", prefix, ex.id); - StopWatch watch = new StopWatch(); - watch.start(); - LogInfo.logs("TRUE: %s", ex.targetValue); - double result = 0; - try { - LogInfo.logs("Inferred Type: %s", TypeInference.inferType(formula)); - Value pred = builder.executor.execute(formula, ex.context).value; - if (pred instanceof ListValue) - pred = ((TableKnowledgeGraph) ex.context.graph).getListValueWithOriginalStrings((ListValue) pred); - LogInfo.logs("Example %s: %s", ex.id, ex.getTokens()); - LogInfo.logs(" targetFormula: %s", formula); - LogInfo.logs(" canonicalized: %s", TableFormulaCanonicalizer.canonicalizeFormula(formula)); - LogInfo.logs("TRUE: %s", ex.targetValue); - LogInfo.logs("PRED: %s", pred); - result = builder.valueEvaluator.getCompatibility(ex.targetValue, pred); - if (result != 1) { - LogInfo.warnings("TRUE != PRED. %s Either targetValue or %s is wrong.", ex.id, prefix); - } - } catch (Exception e) { - StringWriter sw = new StringWriter(); - e.printStackTrace(new PrintWriter(sw)); - LogInfo.logs("Example %s: %s", ex.id, ex.getTokens()); - LogInfo.logs(" targetFormula: %s", formula); - LogInfo.logs(" canonicalized: %s", TableFormulaCanonicalizer.canonicalizeFormula(formula)); - LogInfo.logs("TRUE: %s", ex.targetValue); - LogInfo.logs("PRED: ERROR %s\n%s", e, sw); - LogInfo.warnings("TRUE != PRED. %s Something was wrong during the execution.", ex.id); - } - watch.stop(); - LogInfo.logs("Parse Time: %s", watch); - LogInfo.end_track(); - return result == 1; - } - - Derivation isCorrectFormulaOnBeam(CustomExample ex, List predDerivations) { - List formulas = new ArrayList<>(); - if (ex.targetFormula != null) - formulas.add(TableFormulaCanonicalizer.canonicalizeFormula(ex.targetFormula)); - if (ex.alternativeFormulas != null) - for (Formula formula : ex.alternativeFormulas) - formulas.add(TableFormulaCanonicalizer.canonicalizeFormula(formula)); - for (Derivation deriv : predDerivations) { - for (Formula formula : formulas) - if (formula.equals(TableFormulaCanonicalizer.canonicalizeFormula(deriv.formula))) return deriv; - } - return null; - } - - public void summarize() { - LogInfo.logs("N = %d | Annotated = %d | Oracle = %d", n, annotated, oracle); - Execution.putOutput("train.oracle.mean", oracle * 1.0 / n); - Execution.putOutput("train.correct.count", n); - if (!opts.onlyCheckAnnotatedFormulas) { - LogInfo.logs("No Beam = %d", noBeam); - LogInfo.logs("Beam has correct formula = %d", beamHasCorrectFormula); - LogInfo.logs("Beam doesn't have correct formula = %d", beamNoCorrectFormula); - Execution.putOutput("train.correct.mean", beamHasCorrectFormula * 1.0 / n); - } - if (builder.executor instanceof LambdaDCSExecutor) { - ((LambdaDCSExecutor) builder.executor).summarize(); - } - StopWatchSet.logStats(); - } - -} \ No newline at end of file diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/test/TableColumnAnalyzer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/test/TableColumnAnalyzer.java deleted file mode 100644 index a777b37cc5..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/test/TableColumnAnalyzer.java +++ /dev/null @@ -1,237 +0,0 @@ -package edu.stanford.nlp.sempre.tables.test; - -import java.io.*; -import java.util.*; -import java.util.regex.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.*; -import fig.basic.*; -import fig.exec.Execution; - -/** - * Analyze table columns and print out any hard-to-process column. - * - * @author ppasupat - */ -public class TableColumnAnalyzer implements Runnable { - public static class Options { - @Option(gloss = "Maximum number of tables to process (for debugging)") - public int maxNumTables = Integer.MAX_VALUE; - @Option(gloss = "Load Wikipedia article titles from this file") - public String wikiTitles = null; - } - public static Options opts = new Options(); - - public static void main(String[] args) { - Execution.run(args, "TableColumnAnalyzerMain", new TableColumnAnalyzer(), Master.getOptionsParser()); - } - - PrintWriter out; - PrintWriter outCompact; - - @Override - public void run() { - out = IOUtils.openOutHard(Execution.getFile("column-stats.tsv")); - outCompact = IOUtils.openOutHard(Execution.getFile("column-compact.tsv")); - Map> tableIdToExIds = getTableIds(); - int tablesProcessed = 0; - for (Map.Entry> entry : tableIdToExIds.entrySet()) { - Execution.putOutput("example", tablesProcessed); - String tableId = entry.getKey(), - tableIdAbbrev = tableId.replaceAll("csv/(\\d+)-csv/(\\d+)\\.csv", "$1-$2"); - LogInfo.begin_track("Processing %s ...", tableId); - TableKnowledgeGraph graph = TableKnowledgeGraph.fromFilename(tableId); - out.printf("%s\tIDS\t%s\n", tableIdAbbrev, String.join(" ", entry.getValue())); - out.printf("%s\tCOLUMNS\t%d\n", tableIdAbbrev, graph.numColumns()); - for (int i = 0; i < graph.numColumns(); i++) { - analyzeColumn(graph, graph.columns.get(i), tableIdAbbrev + "\t" + i); - } - LogInfo.end_track(); - if (tablesProcessed++ >= opts.maxNumTables) break; - } - out.close(); - outCompact.close(); - } - - protected Map> getTableIds() { - Map> tableIdToExIds = new LinkedHashMap<>(); - LogInfo.begin_track_printAll("Collect table IDs"); - for (Pair pathPair : Dataset.opts.inPaths) { - String group = pathPair.getFirst(); - String path = pathPair.getSecond(); - Execution.putOutput("group", group); - LogInfo.begin_track("Reading %s", path); - Iterator trees = LispTree.proto.parseFromFile(path); - while (trees.hasNext()) { - LispTree tree = trees.next(); - if ("metadata".equals(tree.child(0).value)) continue; - String exId = null, tableId = null; - for (int i = 1; i < tree.children.size(); i++) { - LispTree arg = tree.child(i); - String label = arg.child(0).value; - if ("id".equals(label)) { - exId = arg.child(1).value; - } else if ("context".equals(label)) { - tableId = arg.child(1).child(2).value; - } - } - if (exId != null && tableId != null) { - List exIdsForTable = tableIdToExIds.get(tableId); - if (exIdsForTable == null) - tableIdToExIds.put(tableId, exIdsForTable = new ArrayList<>()); - exIdsForTable.add(exId); - } - } - LogInfo.end_track(); - } - LogInfo.end_track(); - LogInfo.logs("Got %d IDs", tableIdToExIds.size()); - return tableIdToExIds; - } - - protected void analyzeColumn(TableKnowledgeGraph graph, TableColumn column, String printPrefix) { - List escapedCells = new ArrayList<>(); - // Print the header - String h = column.originalString, escapedH = StringNormalizationUtils.escapeTSV(h); - out.printf("%s\t0\t%s\n", printPrefix, escapedH); - escapedCells.add(escapedH); - // Print the cells - Map typeCounts = new HashMap<>(); - for (int j = 0; j < column.children.size(); j++) { - TableCell cell = column.children.get(j); - String c = cell.properties.originalString, escapedC = StringNormalizationUtils.escapeTSV(c); - escapedCells.add(escapedC); - // Infer the type - List types = analyzeCell(c); - for (String type : types) - MapUtils.incr(typeCounts, type); - out.printf("%s\t%d\t%s\t%s\n", printPrefix, j + 1, String.join("|", types), escapedC); - } - // Analyze the common types - List commonTypes = new ArrayList<>(); - for (Map.Entry entry : typeCounts.entrySet()) { - if (entry.getValue() == column.children.size()) { - commonTypes.add(entry.getKey()); - } else if (entry.getValue() == column.children.size() - 1) { - commonTypes.add("ALMOST-" + entry.getKey()); - } - } - outCompact.printf("%s\t%s\t%s\n", String.join("|", commonTypes), printPrefix, String.join("\t", escapedCells)); - } - - // ============================================================ - // Cell analysis - // ============================================================ - - public static final Pattern ORDINAL = Pattern.compile("^(\\d+)(st|nd|rd|th)$"); - - protected List analyzeCell(String c) { - List types = new ArrayList<>(); - LanguageInfo languageInfo = LanguageAnalyzer.getSingleton().analyze(c); - { - // Integer - NumberValue n = StringNormalizationUtils.parseNumberStrict(c); - if (n != null) { - // Number - types.add("num"); - // Integer - double value = n.value; - if (Math.abs(value - Math.round(value)) < 1e-9) { - types.add("int"); - if (c.matches("^[12]\\d\\d\\d$")) { - // Year? - types.add("year"); - } - } - } - } - { - // Ordinal - Matcher m = ORDINAL.matcher(c); - if (m.matches()) { - types.add("ordinal"); - } - } - { - // Integer-Integer - String[] splitted = StringNormalizationUtils.STRICT_DASH.split(c); - if (splitted.length == 2 && splitted[0].matches("^[0-9]+$") && splitted[1].matches("^[0-9]+$")) { - types.add("2ints"); - } - } - { - // Date - DateValue date = StringNormalizationUtils.parseDateWithLanguageAnalyzer(languageInfo); - if (date != null) { - types.add("date"); - // Also more detailed date type - types.add("date-" - + (date.year != -1 ? "Y" : "") - + (date.month != -1 ? "M" : "") - + (date.day != -1 ? "D" : "")); - } - } - { - // Quoted text - if (c.matches("^[“”\"].*[“”\"]$")) { - types.add("quoted"); - } - } - if (opts.wikiTitles != null) { - // Wikipedia titles - WikipediaTitleLibrary library = WikipediaTitleLibrary.getSingleton(); - if (library.contains(c)) { - types.add("wiki"); - } - } - { - // POS and NER - types.add("POS=" + String.join("-", languageInfo.posTags)); - types.add("NER=" + String.join("-", languageInfo.nerTags)); - } - return types; - } - - // ============================================================ - // Helper class: Wikipedia titles - // ============================================================ - - public static class WikipediaTitleLibrary { - - private static WikipediaTitleLibrary _singleton = null; - - public static WikipediaTitleLibrary getSingleton() { - if (_singleton == null) - _singleton = new WikipediaTitleLibrary(); - return _singleton; - } - - Set titles = new HashSet<>(); - - private WikipediaTitleLibrary() { - assert opts.wikiTitles != null; - LogInfo.begin_track("Reading Wikipedia article titles from %s ...", opts.wikiTitles); - try { - BufferedReader reader = IOUtils.openIn(opts.wikiTitles); - String line; - while ((line = reader.readLine()) != null) { - titles.add(line); - if (titles.size() <= 10) { - LogInfo.logs("Example title: %s", line); - } - } - } catch (IOException e) { - throw new RuntimeException(e); - } - LogInfo.logs("Read %d titles", titles.size()); - LogInfo.end_track(); - } - - public boolean contains(String c) { - return titles.contains(c.toLowerCase().trim()); - } - } - - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/test/TableFormulaCanonicalizer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/test/TableFormulaCanonicalizer.java deleted file mode 100644 index 1f56dd186f..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/test/TableFormulaCanonicalizer.java +++ /dev/null @@ -1,151 +0,0 @@ -package edu.stanford.nlp.sempre.tables.test; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.tables.TableTypeSystem; -import fig.basic.LogInfo; - -public class TableFormulaCanonicalizer { - private TableFormulaCanonicalizer() {} - - public static Formula canonicalizeFormula(Formula formula) { - return canonicalizePredicates(Formulas.betaReduction(formula)); - } - - // Canonicalize the following: - // * !___ => (reverse ___) except != - // * (cell.cell.date (date ___ -1 -1)) => (cell.cell.number (number ___)) - // * variable names => x - // * (lambda x (relation (var x))) => relation - // * (lambda x ((reverse relation) (var x))) => (reverse relation) - // * (reverse (lambda x (relation (var x)))) => (reverse relation) - // * (reverse (lambda x ((reverse relation) (var x))) => relation - // * sort the children of merge formulas - static Formula canonicalizePredicates(Formula formula) { - if (formula instanceof ValueFormula) { - ValueFormula valueF = (ValueFormula) formula; - if (valueF.value instanceof NameValue) { - String id = ((NameValue) valueF.value).id; - if (id.startsWith("!") && !"!=".equals(id)) { - return new ReverseFormula(new ValueFormula(new NameValue(id.substring(1)))); - } else { - return new ValueFormula(new NameValue(id)); - } - } - return valueF; - } else if (formula instanceof JoinFormula) { - JoinFormula join = (JoinFormula) formula; - if (join.relation instanceof ValueFormula && join.child instanceof ValueFormula) { - Value relation = ((ValueFormula) join.relation).value, - child = ((ValueFormula) join.child).value; - if (relation.equals(TableTypeSystem.CELL_DATE_VALUE) && child instanceof DateValue) { - DateValue date = (DateValue) (((ValueFormula) join.child).value); - if (date.month == -1 && date.day == -1) { - return new JoinFormula(new ValueFormula(TableTypeSystem.CELL_NUMBER_VALUE), - new ValueFormula(new NumberValue(date.year))); - } - } - } - return new JoinFormula(canonicalizeFormula(join.relation), - canonicalizeFormula(join.child)); - } else if (formula instanceof MergeFormula) { - MergeFormula merge = (MergeFormula) formula; - Formula child1 = canonicalizeFormula(merge.child1), - child2 = canonicalizeFormula(merge.child2); - if (child1.toString().compareTo(child2.toString()) <= 0) - return new MergeFormula(merge.mode, child1, child2); - else - return new MergeFormula(merge.mode, child2, child1); - } else if (formula instanceof AggregateFormula) { - AggregateFormula aggregate = (AggregateFormula) formula; - return new AggregateFormula(aggregate.mode, canonicalizeFormula(aggregate.child)); - } else if (formula instanceof SuperlativeFormula) { - SuperlativeFormula superlative = (SuperlativeFormula) formula; - return new SuperlativeFormula(superlative.mode, superlative.rank, superlative.count, - canonicalizeFormula(superlative.head), canonicalizeFormula(superlative.relation)); - } else if (formula instanceof ArithmeticFormula) { - ArithmeticFormula arithmetic = (ArithmeticFormula) formula; - return new ArithmeticFormula(arithmetic.mode, canonicalizeFormula(arithmetic.child1), - canonicalizeFormula(arithmetic.child2)); - } else if (formula instanceof VariableFormula) { - return new VariableFormula("x"); - } else if (formula instanceof MarkFormula) { - MarkFormula mark = (MarkFormula) formula; - return new MarkFormula("x", canonicalizeFormula(mark.body)); - } else if (formula instanceof ReverseFormula) { - Formula singleRelation; - if ((singleRelation = isSingleRelationLambda(formula)) != null) - return singleRelation; - ReverseFormula reverse = (ReverseFormula) formula; - return new ReverseFormula(canonicalizeFormula(reverse.child)); - } else if (formula instanceof LambdaFormula) { - Formula singleRelation; - if ((singleRelation = isSingleRelationLambda(formula)) != null) - return singleRelation; - LambdaFormula lambda = (LambdaFormula) formula; - return new LambdaFormula("x", canonicalizeFormula(lambda.body)); - } else { - throw new RuntimeException("Unsupported formula " + formula); - } - } - - // Detect the following patterns - // * (lambda x (relation (var x))) => relation - // * (lambda x (!relation (var x))) => (reverse relation) - // * (lambda x ((reverse relation) (var x))) => (reverse relation) - // * reverse of any case above - // Otherwise, return null - static Formula isSingleRelationLambda(Formula formula) { - boolean isReversed = false; - ValueFormula valueF; - NameValue relation; - // Outer layer - if (formula instanceof ReverseFormula) { - isReversed = !isReversed; - formula = ((ReverseFormula) formula).child; - } - if (!(formula instanceof LambdaFormula)) return null; - formula = ((LambdaFormula) formula).body; - if (!(formula instanceof JoinFormula)) return null; - JoinFormula join = (JoinFormula) formula; - if (!(join.child instanceof VariableFormula)) return null; - // Detect relation - if (join.relation instanceof ValueFormula) { - valueF = (ValueFormula) join.relation; - } else if (join.relation instanceof ReverseFormula) { - ReverseFormula reverse = (ReverseFormula) join.relation; - if (!(reverse.child instanceof ValueFormula)) return null; - isReversed = !isReversed; - valueF = (ValueFormula) reverse.child; - } else { - return null; - } - if (!(valueF.value instanceof NameValue)) return null; - relation = (NameValue) valueF.value; - if (!CanonicalNames.isBinary(relation)) return null; - if (CanonicalNames.isReverseProperty(relation)) { - isReversed = !isReversed; - relation = CanonicalNames.reverseProperty(relation); - } - // Return the answer - if (isReversed) { - return new ReverseFormula(new ValueFormula(relation)); - } else { - return new ValueFormula(relation); - } - } - - // ============================================================ - // Test - // ============================================================ - - public static void main(String[] args) { - LogInfo.logs("%s", isSingleRelationLambda(Formula.fromString("(lambda x (fb:a.b.c (var x)))"))); - LogInfo.logs("%s", isSingleRelationLambda(Formula.fromString("(lambda x (!fb:a.b.c (var x)))"))); - LogInfo.logs("%s", isSingleRelationLambda(Formula.fromString("(lambda x ((reverse fb:a.b.c) (var x)))"))); - LogInfo.logs("%s", isSingleRelationLambda(Formula.fromString("(lambda x ((reverse fb:a.b.c) (fb:d.e.f (var x))))"))); - LogInfo.logs("%s", isSingleRelationLambda(Formula.fromString("(lambda x (!= (var x)))"))); - LogInfo.logs("%s", isSingleRelationLambda(Formula.fromString("(reverse (lambda x (fb:a.b.c (var x))))"))); - LogInfo.logs("%s", isSingleRelationLambda(Formula.fromString("(reverse (lambda x (!fb:a.b.c (var x))))"))); - LogInfo.logs("%s", isSingleRelationLambda(Formula.fromString("(reverse (lambda x ((reverse fb:a.b.c) (var x))))"))); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/test/TableStatsComputer.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/test/TableStatsComputer.java deleted file mode 100644 index a897a129dc..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/tables/test/TableStatsComputer.java +++ /dev/null @@ -1,154 +0,0 @@ -package edu.stanford.nlp.sempre.tables.test; - -import java.io.*; -import java.util.*; - -import edu.stanford.nlp.sempre.*; -import edu.stanford.nlp.sempre.FuzzyMatchFn.FuzzyMatchFnMode; -import edu.stanford.nlp.sempre.MergeFormula.Mode; -import edu.stanford.nlp.sempre.tables.StringNormalizationUtils; -import edu.stanford.nlp.sempre.tables.TableKnowledgeGraph; -import edu.stanford.nlp.sempre.tables.TableTypeSystem; -import edu.stanford.nlp.sempre.tables.test.CustomExample.ExampleProcessor; -import fig.basic.*; -import fig.exec.*; - -/** - * Compute various statistics about the dataset. - * - * - Table size (rows, columns, unique cells) - * - Answer type - * - Whether the answer is in the table - * - * Also aggregate column strings and cell word shapes. - * - * @author ppasupat - */ -public class TableStatsComputer implements Runnable { - public static class Options { - @Option(gloss = "Maximum string length to consider") - public int statsMaxStringLength = 70; - } - public static Options opts = new Options(); - - public static void main(String[] args) { - Execution.run(args, "TableStatsComputerMain", new TableStatsComputer(), Master.getOptionsParser()); - } - - @Override - public void run() { - PrintWriter out = IOUtils.openOutHard(Execution.getFile("table-stats.tsv")); - TableStatsComputerProcessor processor = new TableStatsComputerProcessor(out); - CustomExample.getDataset(Dataset.opts.inPaths, processor); - processor.analyzeTables(); - out.close(); - } - - static class TableStatsComputerProcessor implements ExampleProcessor { - PrintWriter out; - Evaluation evaluation = new Evaluation(); - Map tableCounts = new HashMap<>(); - Map columnStrings = new HashMap<>(), cellStrings = new HashMap<>(); - Builder builder; - - public TableStatsComputerProcessor(PrintWriter out) { - builder = new Builder(); - builder.build(); - this.out = out; - out.println(String.join("\t", new String[] { - "id", "context", "rows", "columns", "uniqueCells", "targetType", "inTable", - })); - } - - @Override - public void run(CustomExample ex) { - List outputFields = new ArrayList<>(); - outputFields.add(ex.id); - TableKnowledgeGraph graph = (TableKnowledgeGraph) ex.context.graph; - MapUtils.incr(tableCounts, graph); - outputFields.add(graph.toLispTree().child(2).value); - outputFields.add("" + graph.numRows()); - outputFields.add("" + graph.numColumns()); - outputFields.add("" + graph.numUniqueCells()); - // Answer type. For convenience, just use the first answer from the list - Value value = ((ListValue) ex.targetValue).values.get(0); - evaluation.add("value-number", value instanceof NumberValue); - evaluation.add("value-date", value instanceof DateValue); - evaluation.add("value-text", value instanceof DescriptionValue); - evaluation.add("value-partial-number", - value instanceof DescriptionValue && ((DescriptionValue) value).value.matches(".*[0-9].*")); - // Check if the value is in the table - boolean inTable = false; - if (value instanceof DescriptionValue) { - outputFields.add("text"); - Collection formulas = graph.getFuzzyMatchedFormulas(((DescriptionValue) value).value, FuzzyMatchFnMode.ENTITY); - inTable = !formulas.isEmpty(); - evaluation.add("value-text-in-table", inTable); - } else if (value instanceof NumberValue) { - outputFields.add("number"); - // (and (@type @cell) (@p.num ___)) - Formula formula = new MergeFormula(Mode.and, - new JoinFormula(Formula.fromString(CanonicalNames.TYPE), Formula.fromString(TableTypeSystem.CELL_GENERIC_TYPE)), - new JoinFormula(Formula.fromString(TableTypeSystem.CELL_NUMBER_VALUE.id), new ValueFormula(value))); - Value result = builder.executor.execute(formula, ex.context).value; - inTable = result instanceof ListValue && !((ListValue) result).values.isEmpty(); - evaluation.add("value-number-in-table", inTable); - } else if (value instanceof DateValue) { - outputFields.add("date"); - // (and (@type @cell) (@p.num ___)) - Formula formula = new MergeFormula(Mode.and, - new JoinFormula(Formula.fromString(CanonicalNames.TYPE), Formula.fromString(TableTypeSystem.CELL_GENERIC_TYPE)), - new JoinFormula(Formula.fromString(TableTypeSystem.CELL_DATE_VALUE.id), new ValueFormula(value))); - Value result = builder.executor.execute(formula, ex.context).value; - inTable = result instanceof ListValue && !((ListValue) result).values.isEmpty(); - evaluation.add("value-number-in-table", inTable); - } else { - outputFields.add("unknown"); - } - evaluation.add("value-any-in-table", inTable); - outputFields.add("" + inTable); - out.println(String.join("\t", outputFields)); - } - - public void analyzeTables() { - for (Map.Entry entry : tableCounts.entrySet()) { - TableKnowledgeGraph table = entry.getKey(); - evaluation.add("count", entry.getValue()); - table.populateStats(evaluation); - for (String columnString : table.getAllColumnStrings()) - addIfOK(columnString, columnStrings); - for (String cellString : table.getAllCellStrings()) - addIfOK(cellString, cellStrings); - } - for (Map.Entry entry : columnStrings.entrySet()) { - evaluation.add("column-strings", entry.getKey(), entry.getValue()); - } - for (Map.Entry entry : cellStrings.entrySet()) { - evaluation.add("cell-strings", entry.getKey(), entry.getValue()); - } - evaluation.logStats("tables"); - dumpCollection(columnStrings, "columns"); - dumpCollection(cellStrings, "cells"); - } - - void addIfOK(String x, Map collection) { - x = StringNormalizationUtils.characterNormalize(x).toLowerCase(); - if (x.length() <= TableStatsComputer.opts.statsMaxStringLength) - MapUtils.incr(collection, x); - } - - void dumpCollection(Map collection, String filename) { - List> entries = new ArrayList<>(collection.entrySet()); - Collections.sort(entries, new ValueComparator(true)); - String path = Execution.getFile(filename); - LogInfo.begin_track("Writing to %s (%d entries)", path, entries.size()); - PrintWriter out = IOUtils.openOutHard(path); - for (Map.Entry entry : entries) { - out.printf("%6d : %s\n", entry.getValue(), entry.getKey()); - } - out.close(); - LogInfo.end_track(); - } - - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/DerivationStreamTest.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/DerivationStreamTest.java deleted file mode 100644 index c04e932da1..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/DerivationStreamTest.java +++ /dev/null @@ -1,32 +0,0 @@ -package edu.stanford.nlp.sempre.test; - -import edu.stanford.nlp.sempre.Derivation; -import edu.stanford.nlp.sempre.SingleDerivationStream; -import org.testng.annotations.Test; - -import static org.testng.AssertJUnit.assertEquals; - -/** - * @author Percy Liang - */ -public class DerivationStreamTest { - @Test - public void single() { - SingleDerivationStream s = new SingleDerivationStream() { - public Derivation createDerivation() { - return new Derivation.Builder().cat("NP").createDerivation(); - } - }; - assertEquals(true, s.hasNext()); - assertEquals(true, s.hasNext()); - assertEquals(true, s.hasNext()); - assertEquals("NP", s.next().cat); - assertEquals(false, s.hasNext()); - assertEquals(false, s.hasNext()); - - s = new SingleDerivationStream() { - public Derivation createDerivation() { return null; } - }; - assertEquals(false, s.hasNext()); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/FormulaTest.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/FormulaTest.java deleted file mode 100644 index 3670e0d6c3..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/FormulaTest.java +++ /dev/null @@ -1,29 +0,0 @@ -package edu.stanford.nlp.sempre.test; - -import edu.stanford.nlp.sempre.Formula; -import edu.stanford.nlp.sempre.Formulas; -import org.testng.annotations.Test; - -import static org.testng.AssertJUnit.assertEquals; - -/** - * Test Formulas. - * @author Percy Liang - */ -public class FormulaTest { - private static Formula F(String s) { return Formula.fromString(s); } - - @Test - public void simpleFormula() { - assertEquals(F("(f a)"), - Formulas.betaReduction(F("((lambda x (f (var x))) a)"))); - - // Bound, shouldn't replace x - assertEquals(F("((lambda x (f (var x))) (var y))"), - Formulas.substituteVar(F("((lambda x (f (var x))) (var y))"), "x", F("a"))); - - // Free, should replace y - assertEquals(F("((lambda x (f (var x))) a)"), - Formulas.substituteVar(F("((lambda x (f (var x))) (var y))"), "y", F("a"))); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/GrammarTest.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/GrammarTest.java deleted file mode 100644 index 349c6a33d2..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/GrammarTest.java +++ /dev/null @@ -1,53 +0,0 @@ -package edu.stanford.nlp.sempre.test; - -import edu.stanford.nlp.sempre.Grammar; -import edu.stanford.nlp.sempre.Rule; -import org.testng.Assert; -import org.testng.annotations.Test; - -import java.util.List; - -/** - * Test that the grammar correctly parsers rules. - */ -public class GrammarTest { - - public static Grammar makeTernaryGrammar() { - Grammar g = new Grammar(); - g.addStatement("(rule $ROOT ($X) (IdentityFn))"); - g.addStatement("(rule $X ($A $B $C) (IdentityFn))"); - g.addStatement("(rule $A (a) (ConstantFn (string a)))"); - g.addStatement("(rule $B (b) (ConstantFn (string b)))"); - g.addStatement("(rule $C (c) (ConstantFn (string c)))"); - return g; - } - - /** - * Checks that each rule is one of the following: - * $Cat => token - * $Cat => $Cat - * $Cat => token token - * $Cat => token $Cat - * $Cat => $Cat token - * $Cat => $Cat $Cat - */ - public boolean isValidBinaryGrammar(Grammar g) { - for (Rule rule : g.getRules()) { - if (!Rule.isCat(rule.lhs)) return false; - if (rule.rhs.size() != 1 && rule.rhs.size() != 2) return false; - } - - return true; - } - - @Test - public void testBinarizationOfTernaryGrammar() { - Grammar g = makeTernaryGrammar(); - List rules = g.getRules(); - Assert.assertEquals(6, rules.size()); - Assert.assertTrue(isValidBinaryGrammar(g)); - } - - - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/GrammarValidityTest.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/GrammarValidityTest.java deleted file mode 100644 index 715d3aa065..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/GrammarValidityTest.java +++ /dev/null @@ -1,57 +0,0 @@ -package edu.stanford.nlp.sempre.test; - -import edu.stanford.nlp.sempre.*; - -import org.testng.annotations.Test; - -import fig.basic.LogInfo; - -import java.util.*; -import java.nio.file.*; - -import static org.testng.AssertJUnit.assertEquals; - -/** - * Attempt to load all grammars to test for validity. - * - * @author Yushi Wang - */ - -public class GrammarValidityTest { - private String[] dataPaths = new String[] {"data/", "freebase/", "tables/", "regex/", "overnight/"}; - - @Test(groups = {"grammar"}) - public void readGrammars() { - try { - List successes = new ArrayList<>(), failures = new ArrayList<>(); - for (String dataPath : dataPaths) { - Files.walk(Paths.get(dataPath)).forEach(filePath -> { - try { - if (filePath.toString().toLowerCase().endsWith(".grammar")) { - Grammar test = new Grammar(); - LogInfo.logs("Reading grammar file: %s", filePath.toString()); - test.read(filePath.toString()); - LogInfo.logs("Finished reading", filePath.toString()); - successes.add(filePath.toString()); - } - } - catch (Exception ex) { - failures.add(filePath.toString()); - } - }); - } - LogInfo.begin_track("Following grammar tests passed:"); - for (String path : successes) - LogInfo.logs("%s", path); - LogInfo.end_track(); - LogInfo.begin_track("Following grammar tests failed:"); - for (String path : failures) - LogInfo.logs("%s", path); - LogInfo.end_track(); - assertEquals(0, failures.size()); - } - catch (Exception ex) { - LogInfo.logs(ex.toString()); - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/JavaExecutorTest.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/JavaExecutorTest.java deleted file mode 100644 index e12a13e895..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/JavaExecutorTest.java +++ /dev/null @@ -1,43 +0,0 @@ -package edu.stanford.nlp.sempre.test; - -import edu.stanford.nlp.sempre.*; -import org.testng.annotations.Test; - -import static org.testng.AssertJUnit.assertEquals; - -/** - * Test JavaExecutor. - * @author Percy Liang - */ -public class JavaExecutorTest { - JavaExecutor executor = new JavaExecutor(); - - private static Formula F(String s) { return Formula.fromString(s); } - - private static Value V(double x) { return new NumberValue(x); } - private static Value V(String x) { return Values.fromString(x); } - - @Test public void numbers() { - assertEquals(V(8), executor.execute(F("(call + (number 3) (number 5))"), null).value); - assertEquals(V(6), executor.execute(F("(call + (call - (number 10) (number 9)) (number 5))"), null).value); - assertEquals(V(1), executor.execute(F("(call java.lang.Math.cos (number 0))"), null).value); - - assertEquals(V(1), executor.execute(F("((lambda x (call java.lang.Math.cos (var x))) (number 0))"), null).value); // Make sure beta reduction is called - } - - @Test public void conditionals() { - assertEquals(V("(string no)"), executor.execute(F("(call if (boolean false) (string yes) (string no))"), null).value); - assertEquals(V("(string yes)"), executor.execute(F("(call if (call < (number 3) (number 4)) (string yes) (string no))"), null).value); - } - - @Test public void strings() { - assertEquals(V(5), executor.execute(F("(call .length (string hello))"), null).value); - assertEquals(V("(string abcdef)"), executor.execute(F("(call .concat (string abc) (string def))"), null).value); - } - - @Test public void higherOrder() { - assertEquals(V("(list (number 10) (number 40))"), executor.execute(F("(call map (list (number 1) (number 4)) (lambda x (call * (number 10) (var x))))"), null).value); - assertEquals(V("(list (number 4))"), executor.execute(F("(call select (list (number 1) (number 4)) (lambda x (call == (number 0) (call % (var x) (number 2)))))"), null).value); - // assertEquals(V("(list (number 5))"), executor.execute(F("(call reduce (list (number 1) (number 4)) +)")).value); // Not implemented yet - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/JsonTest.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/JsonTest.java deleted file mode 100644 index 2d2c60263a..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/JsonTest.java +++ /dev/null @@ -1,73 +0,0 @@ -package edu.stanford.nlp.sempre.test; - -import edu.stanford.nlp.sempre.*; -import fig.basic.LogInfo; -import org.testng.annotations.Test; - -/** - * Test JSON serialization and deserialization. - */ -public class JsonTest { - public static String S(Object o) { - return Json.writeValueAsStringHard(o); - } - - public static T D(String s, Class klass) { - return Json.readValueHard(s, klass); - } - - public static T D(String s, Class klass, Class view) { - return Json.readValueHard(s, klass, view); - } - - public static boolean exampleEquals(Example a, Example b) { - if (!a.id.equals(b.id)) return false; - if (!a.utterance.equals(b.utterance)) return false; - if (a.context != b.context && - !a.context.equals(b.context)) - return false; - if (a.targetFormula != b.targetFormula && - !a.targetFormula.toString().equals(b.targetFormula.toString())) - return false; - if (a.targetValue != b.targetValue && - !a.targetValue.equals(b.targetValue)) - return false; - return true; - } - - public static Parser makeSimpleBeamParser() { - return new BeamParser(ParserTest.ABCTest().getParserSpec()); - } - - @Test - public void testExample() { - Builder builder = new Builder(); - builder.build(); - - Example.Builder b = new Example.Builder(); - Example ex = b - .setId("id") - .setUtterance("A is for Alice") - .setTargetValue(new StringValue("B is for Bob")) - .createExample(); - LogInfo.log(S(ex)); - assert exampleEquals(ex, D(S(ex), Example.class)); - - ex.preprocess(); - LogInfo.log(S(ex)); - assert ex.languageInfo != null; - assert !ex.languageInfo.tokens.isEmpty(); - assert exampleEquals(ex, D(S(ex), Example.class)); - - ex = TestUtils.makeSimpleExample("1 2 3"); - ex.preprocess(); - makeSimpleBeamParser().parse(new Params(), ex, true); - String there = S(ex); - Example back = D(there, Example.class); - String thereAgain = S(back); - LogInfo.log(there); - LogInfo.log(thereAgain); - assert there.equals(thereAgain); - assert exampleEquals(ex, back); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/L1RegularizationTest.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/L1RegularizationTest.java deleted file mode 100644 index 3fd124d3bc..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/L1RegularizationTest.java +++ /dev/null @@ -1,183 +0,0 @@ -package edu.stanford.nlp.sempre.test; - -import java.util.*; - -import org.testng.annotations.Test; - -import static org.testng.AssertJUnit.*; -import edu.stanford.nlp.sempre.Params; - -/** - * Test lazy L1 regularization. - * - * @author ppasupat - */ -public class L1RegularizationTest { - - private static final double EPSILON = 1e-3; - - class Options { - public double initStepSize = 1.0; - public String l1Reg = "none"; - public double l1RegCoeff = 0; - public Options initStepSize(double x) { initStepSize = x; return this; } - public Options l1Reg(String x) { l1Reg = x; return this; } - public Options l1RegCoeff(double x) { l1RegCoeff = x; return this; } - } - - private Options originalOptions = null; - - private void saveOptions() { - originalOptions = new Options() - .initStepSize(Params.opts.initStepSize) - .l1Reg(Params.opts.l1Reg) - .l1RegCoeff(Params.opts.l1RegCoeff); - } - - private void loadOptions(Options options) { - Params.opts.initStepSize = options.initStepSize; - Params.opts.l1Reg = options.l1Reg; - Params.opts.l1RegCoeff = options.l1RegCoeff; - } - - private Map constructGradient(double a, double b, double c, double d) { - Map gradient = new HashMap<>(); - if (a != 0) gradient.put("a", a); - if (b != 0) gradient.put("b", b); - if (c != 0) gradient.put("c", c); - if (d != 0) gradient.put("d", d); - return gradient; - } - - @Test - public void zeroLazyL1Test() { - saveOptions(); - { - loadOptions(new Options().l1Reg("none").l1RegCoeff(0)); - Params params = new Params(); - assertEquals(0.0, params.getWeight("a"), EPSILON); - assertEquals(0.0, params.getWeight("b"), EPSILON); - params.update(constructGradient(1.0, 0, 0, 0)); - assertEquals(1.0 / Math.sqrt(1), params.getWeight("a"), EPSILON); - assertEquals(0.0, params.getWeight("b"), EPSILON); - params.update(constructGradient(1.0, 0, 0, 0)); - assertEquals(1.0 / Math.sqrt(1) + 1.0 / Math.sqrt(2), params.getWeight("a"), EPSILON); - assertEquals(0.0, params.getWeight("b"), EPSILON); - params.update(constructGradient(0.0, -2.0, 0, 0)); - assertEquals(1.0 / Math.sqrt(1) + 1.0 / Math.sqrt(2), params.getWeight("a"), EPSILON); - assertEquals(-2.0 / Math.sqrt(4), params.getWeight("b"), EPSILON); - params.update(constructGradient(1.0, 2.0, 0, 0)); - assertEquals(1.0 / Math.sqrt(1) + 1.0 / Math.sqrt(2) + 1.0 / Math.sqrt(3), params.getWeight("a"), EPSILON); - assertEquals(-2.0 / Math.sqrt(4) + 2.0 / Math.sqrt(8), params.getWeight("b"), EPSILON); - } - { - loadOptions(new Options().l1Reg("nonlazy").l1RegCoeff(0)); - Params params = new Params(); - assertEquals(0.0, params.getWeight("a"), EPSILON); - assertEquals(0.0, params.getWeight("b"), EPSILON); - params.update(constructGradient(1.0, 0, 0, 0)); - // NONLAZY will give a different result as the denominator of the AdaGrad update is incremented by 1 - assertEquals(1.0 / Math.sqrt(2), params.getWeight("a"), EPSILON); - assertEquals(0.0, params.getWeight("b"), EPSILON); - params.update(constructGradient(1.0, 0, 0, 0)); - assertEquals(1.0 / Math.sqrt(2) + 1.0 / Math.sqrt(3), params.getWeight("a"), EPSILON); - assertEquals(0.0, params.getWeight("b"), EPSILON); - params.update(constructGradient(0.0, -2.0, 0, 0)); - assertEquals(1.0 / Math.sqrt(2) + 1.0 / Math.sqrt(3), params.getWeight("a"), EPSILON); - assertEquals(-2.0 / Math.sqrt(5), params.getWeight("b"), EPSILON); - params.update(constructGradient(1.0, 2.0, 0, 0)); - assertEquals(1.0 / Math.sqrt(2) + 1.0 / Math.sqrt(3) + 1.0 / Math.sqrt(4), params.getWeight("a"), EPSILON); - assertEquals(-2.0 / Math.sqrt(5) + 2.0 / Math.sqrt(9), params.getWeight("b"), EPSILON); - } - { - loadOptions(new Options().l1Reg("lazy").l1RegCoeff(0)); - Params params = new Params(); - assertEquals(0.0, params.getWeight("a"), EPSILON); - assertEquals(0.0, params.getWeight("b"), EPSILON); - params.update(constructGradient(1.0, 0, 0, 0)); - // LAZY will give the same result as NONLAZY - assertEquals(1.0 / Math.sqrt(2), params.getWeight("a"), EPSILON); - assertEquals(0.0, params.getWeight("b"), EPSILON); - params.update(constructGradient(1.0, 0, 0, 0)); - assertEquals(1.0 / Math.sqrt(2) + 1.0 / Math.sqrt(3), params.getWeight("a"), EPSILON); - assertEquals(0.0, params.getWeight("b"), EPSILON); - params.update(constructGradient(0.0, -2.0, 0, 0)); - assertEquals(1.0 / Math.sqrt(2) + 1.0 / Math.sqrt(3), params.getWeight("a"), EPSILON); - assertEquals(-2.0 / Math.sqrt(5), params.getWeight("b"), EPSILON); - params.update(constructGradient(1.0, 2.0, 0, 0)); - assertEquals(1.0 / Math.sqrt(2) + 1.0 / Math.sqrt(3) + 1.0 / Math.sqrt(4), params.getWeight("a"), EPSILON); - assertEquals(-2.0 / Math.sqrt(5) + 2.0 / Math.sqrt(9), params.getWeight("b"), EPSILON); - } - loadOptions(originalOptions); - } - - @Test - public void nonZeroLazyL1Test() { - saveOptions(); - { - loadOptions(new Options().l1Reg("nonlazy").l1RegCoeff(1.0)); - Params params = new Params(); - assertEquals(0.0, params.getWeight("a"), EPSILON); - assertEquals(0.0, params.getWeight("b"), EPSILON); - params.update(constructGradient(2.0, 0, -3.14, 0)); - assertEquals(1.0 / Math.sqrt(5), params.getWeight("a"), EPSILON); - assertEquals(0.0, params.getWeight("b"), EPSILON); - params.update(constructGradient(1.0, 0, 0, 0)); - assertEquals(1.0 / Math.sqrt(5), params.getWeight("a"), EPSILON); - assertEquals(0.0, params.getWeight("b"), EPSILON); - params.update(constructGradient(0.0, -2.0, 0, 0)); - assertEquals(1.0 / Math.sqrt(5) - 1.0 / Math.sqrt(6), params.getWeight("a"), EPSILON); - assertEquals(-1.0 / Math.sqrt(5), params.getWeight("b"), EPSILON); - params.update(constructGradient(1.0, 2.0, 0, 0)); - assertEquals(1.0 / Math.sqrt(5) - 1.0 / Math.sqrt(6), params.getWeight("a"), EPSILON); - assertEquals(0.0, params.getWeight("b"), EPSILON); - params.update(constructGradient(0.0, 3.0, 0, 0)); - assertEquals(0.0, params.getWeight("a"), EPSILON); - assertEquals(2.0 / Math.sqrt(18), params.getWeight("b"), EPSILON); - params.update(constructGradient(0.0, 0.0, 0, 0)); - assertEquals(0.0, params.getWeight("a"), EPSILON); - assertEquals(1.0 / Math.sqrt(18), params.getWeight("b"), EPSILON); - params.update(constructGradient(-5.0, 0.0, 1.0, 0)); - assertEquals(-4.0 / Math.sqrt(32), params.getWeight("a"), EPSILON); - assertEquals(0.0, params.getWeight("b"), EPSILON); - params.update(constructGradient(0.0, 0.0, -1.0, 0)); - assertEquals(-3.0 / Math.sqrt(32), params.getWeight("a"), EPSILON); - assertEquals(0.0, params.getWeight("b"), EPSILON); - assertEquals(0.0, params.getWeight("c"), EPSILON); - } - // LAZY: Randomly access the features in between. - Random r = new Random(42); - for (double t = 1; t > 0; t -= 0.02) { - loadOptions(new Options().l1Reg("lazy").l1RegCoeff(1.0)); - Params params = new Params(); - if (r.nextDouble() < t) assertEquals(0.0, params.getWeight("a"), EPSILON); - if (r.nextDouble() < t) assertEquals(0.0, params.getWeight("b"), EPSILON); - params.update(constructGradient(2.0, 0, -3.14, 0)); - if (r.nextDouble() < t) assertEquals(1.0 / Math.sqrt(5), params.getWeight("a"), EPSILON); - if (r.nextDouble() < t) assertEquals(0.0, params.getWeight("b"), EPSILON); - params.update(constructGradient(1.0, 0, 0, 0)); - if (r.nextDouble() < t) assertEquals(1.0 / Math.sqrt(5), params.getWeight("a"), EPSILON); - if (r.nextDouble() < t) assertEquals(0.0, params.getWeight("b"), EPSILON); - params.update(constructGradient(0.0, -2.0, 0, 0)); - if (r.nextDouble() < t) assertEquals(1.0 / Math.sqrt(5) - 1.0 / Math.sqrt(6), params.getWeight("a"), EPSILON); - if (r.nextDouble() < t) assertEquals(-1.0 / Math.sqrt(5), params.getWeight("b"), EPSILON); - params.update(constructGradient(1.0, 2.0, 0, 0)); - if (r.nextDouble() < t) assertEquals(1.0 / Math.sqrt(5) - 1.0 / Math.sqrt(6), params.getWeight("a"), EPSILON); - if (r.nextDouble() < t) assertEquals(0.0, params.getWeight("b"), EPSILON); - params.update(constructGradient(0.0, 3.0, 0, 0)); - if (r.nextDouble() < t) assertEquals(0.0, params.getWeight("a"), EPSILON); - if (r.nextDouble() < t) assertEquals(2.0 / Math.sqrt(18), params.getWeight("b"), EPSILON); - params.update(constructGradient(0.0, 0.0, 0, 0)); - if (r.nextDouble() < t) assertEquals(0.0, params.getWeight("a"), EPSILON); - if (r.nextDouble() < t) assertEquals(1.0 / Math.sqrt(18), params.getWeight("b"), EPSILON); - params.update(constructGradient(-5.0, 0.0, 1.0, 0)); - if (r.nextDouble() < t) assertEquals(-4.0 / Math.sqrt(32), params.getWeight("a"), EPSILON); - if (r.nextDouble() < t) assertEquals(0.0, params.getWeight("b"), EPSILON); - params.update(constructGradient(0.0, 0.0, -1.0, 0)); - if (r.nextDouble() < t) assertEquals(-3.0 / Math.sqrt(32), params.getWeight("a"), EPSILON); - if (r.nextDouble() < t) assertEquals(0.0, params.getWeight("b"), EPSILON); - if (r.nextDouble() < t) assertEquals(0.0, params.getWeight("c"), EPSILON); - } - loadOptions(originalOptions); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/ParserTest.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/ParserTest.java deleted file mode 100644 index 84148e3779..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/ParserTest.java +++ /dev/null @@ -1,172 +0,0 @@ -package edu.stanford.nlp.sempre.test; - -import edu.stanford.nlp.sempre.*; -import fig.basic.LogInfo; -import org.testng.annotations.Test; - -import java.util.HashMap; -import java.util.Map; - -import static org.testng.AssertJUnit.assertEquals; - -/** - * Test parsers. - * - * @author Roy Frostig - * @author Percy Liang - */ -public class ParserTest { - // Collects a grammar, and some input/output test pairs - public abstract static class ParseTest { - public Grammar grammar; - ParseTest(Grammar g) { - this.grammar = g; - } - - public Parser.Spec getParserSpec() { - Executor executor = new JavaExecutor(); - FeatureExtractor extractor = new FeatureExtractor(executor); - FeatureExtractor.opts.featureDomains.add("rule"); - ValueEvaluator valueEvaluator = new ExactValueEvaluator(); - return new Parser.Spec(grammar, extractor, executor, valueEvaluator); - } - - public abstract void test(Parser parser); - } - - private static void checkNumDerivations(Parser parser, Params params, String utterance, String targetValue, int numExpected) { - Parser.opts.verbose = 5; - Example ex = TestUtils.makeSimpleExample(utterance, targetValue != null ? Value.fromString(targetValue) : null); - ParserState state = parser.parse(params, ex, targetValue != null); - - // Debug information - for (Derivation deriv : state.predDerivations) { - LogInfo.dbg(deriv.getAllFeatureVector()); - LogInfo.dbg(params.getWeights()); - LogInfo.dbgs("Score %f", deriv.computeScore(params)); - } - // parser.extractor.extractLocal(); - assertEquals(numExpected, ex.getPredDerivations().size()); - if (numExpected > 0 && targetValue != null) - assertEquals(targetValue, ex.getPredDerivations().get(0).value.toString()); - } - private static void checkNumDerivations(Parser parser, String utterance, String targetValue, int numExpected) { - checkNumDerivations(parser, new Params(), utterance, targetValue, numExpected); - } - - static ParseTest ABCTest() { - return new ParseTest(TestUtils.makeAbcGrammar()) { - @Override - public void test(Parser parser) { - checkNumDerivations(parser, "a +", null, 0); - checkNumDerivations(parser, "a", "(string a)", 1); - checkNumDerivations(parser, "a b", "(string a,b)", 1); - checkNumDerivations(parser, "a b c", "(string a,b,c)", 2); - checkNumDerivations(parser, "a b c a b c", "(string a,b,c,a,b,c)", 42); - } - }; - } - - static ParseTest ArithmeticTest() { - return new ParseTest(TestUtils.makeArithmeticGrammar()) { - @Override - public void test(Parser parser) { - checkNumDerivations(parser, "1 + ", null, 0); - checkNumDerivations(parser, "1 plus 2", "(number 3)", 1); - checkNumDerivations(parser, "2 times 3", "(number 6)", 1); - checkNumDerivations(parser, "1 plus times 3", null, 0); - checkNumDerivations(parser, "times", null, 0); - } - }; - }; - - // Create parsers - @Test public void checkBeamNumDerivationsForABCGrammar() { - Parser.opts.coarsePrune = false; - ParseTest p; - p = ABCTest(); - p.test(new BeamParser(p.getParserSpec())); - p = ArithmeticTest(); - p.test(new BeamParser(p.getParserSpec())); - } - @Test public void checkCoarseBeamNumDerivations() { - Parser.opts.coarsePrune = true; - ParseTest p; - p = ABCTest(); - p.test(new BeamParser(p.getParserSpec())); - p = ArithmeticTest(); - p.test(new BeamParser(p.getParserSpec())); - } - - @Test(groups = "reinforcement") public void checkReinforcementNumDerivations() { - ParseTest p; - p = ABCTest(); - p.test(new ReinforcementParser(p.getParserSpec())); - p = ArithmeticTest(); - p.test(new ReinforcementParser(p.getParserSpec())); - // TODO(chaganty): test more thoroughly - } - - @Test(groups = "floating") public void checkFloatingNumDerivations() { - FloatingParser.opts.defaultIsFloating = true; - FloatingParser.opts.useSizeInsteadOfDepth = true; - Parser parser = new FloatingParser(ABCTest().getParserSpec()); - FloatingParser.opts.maxDepth = 2; - checkNumDerivations(parser, "ignore", null, 3); - FloatingParser.opts.maxDepth = 4; - checkNumDerivations(parser, "ignore", null, 3 + 3 * 3); - } - - // TODO(chaganty): verify that things are ranked appropriately - public void checkRankingArithmetic(Parser parser) { - Params params = new Params(); - Map features = new HashMap<>(); - features.put("rule :: $Operator -> and (ConstantFn (lambda y (lambda x (call + (var x) (var y)))))", 1.0); - features.put("rule :: $Operator -> and (ConstantFn (lambda y (lambda x (call * (var x) (var y)))))", -1.0); - params.update(features); - checkNumDerivations(parser, params, "2 and 3", "(number 5)", 2); - - params = new Params(); - features.put("rule :: $Operator -> and (ConstantFn (lambda y (lambda x (call + (var x) (var y)))))", -1.0); - features.put("rule :: $Operator -> and (ConstantFn (lambda y (lambda x (call * (var x) (var y)))))", 1.0); - params.update(features); - checkNumDerivations(parser, params, "2 and 3", "(number 6)", 2); - } - @Test void checkRankingSimple() { - checkRankingArithmetic(new BeamParser(ArithmeticTest().getParserSpec())); - } - @Test(groups = "reinforcement") void checkRankingReinforcement() { - checkRankingArithmetic(new ReinforcementParser(ArithmeticTest().getParserSpec())); - } - - @Test(groups = "floating") public void checkRankingFloating() { - FloatingParser.opts.defaultIsFloating = true; - FloatingParser.opts.maxDepth = 4; - FloatingParser.opts.useAnchorsOnce = true; - Parser parser = new FloatingParser(new ParseTest(TestUtils.makeArithmeticFloatingGrammar()) { - @Override public void test(Parser parser) {} - }.getParserSpec()); - Params params = new Params(); - Map features = new HashMap<>(); - features.put("rule :: $Operator -> nothing (ConstantFn (lambda y (lambda x (call + (var x) (var y)))))", 1.0); - features.put("rule :: $Operator -> nothing (ConstantFn (lambda y (lambda x (call * (var x) (var y)))))", -1.0); - params.update(features); - /* - * Expected LFs: - * 2 3 - * 2 + 3 3 + 2 - * 2 * 3 3 * 2 - */ - checkNumDerivations(parser, params, "2 and 3", "(number 5)", 6); - - params = new Params(); - features.put("rule :: $Operator -> nothing (ConstantFn (lambda y (lambda x (call + (var x) (var y)))))", -1.0); - features.put("rule :: $Operator -> nothing (ConstantFn (lambda y (lambda x (call * (var x) (var y)))))", 1.0); - params.update(features); - checkNumDerivations(parser, params, "2 and 3", "(number 6)", 6); - } - - // TODO(chaganty): verify the parser gradients - - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/SemTypeTest.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/SemTypeTest.java deleted file mode 100644 index ccfd2e600c..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/SemTypeTest.java +++ /dev/null @@ -1,66 +0,0 @@ -package edu.stanford.nlp.sempre.test; - -import static org.testng.AssertJUnit.assertEquals; - -import org.testng.annotations.Test; - -import edu.stanford.nlp.sempre.*; -import fig.basic.*; - -/** - * Test type system. - * @author Percy Liang - * @author ppasupat - */ -public class SemTypeTest { - // For testing - private static SemType T(String str) { - return SemType.fromLispTree(LispTree.proto.parseFromString(str)); - } - - private static void verifyEquals(SemType predType, SemType wantedType) { - assertEquals(wantedType.toString(), predType.toString()); - } - - private static void verifyMeet(String t1, String t2) { verifyMeet(t1, t2, t2); } - private static void verifyMeet(String t1, String t2, String t) { - verifyEquals(T(t1).meet(T(t2)), T(t)); - verifyEquals(T(t2).meet(T(t1)), T(t)); - } - - @Test public void simpleSemType() { - SemTypeHierarchy.opts.failOnUnknownTypes = false; - verifyMeet("city", "city"); - verifyMeet("city", "country", "(union)"); - verifyMeet("city", "(union city country)", "city"); - verifyMeet("(union city country river)", "(union city country)"); - - verifyEquals(T("(-> city fb:type.int)").apply(T("(union city country)")), T("fb:type.int")); - verifyEquals(T("(-> city fb:type.int fb:type.float)").apply(T("(union city country)")).apply(T("fb:type.int")), T("fb:type.float")); - verifyEquals(T("fb:type.datetime").apply(T("fb:common.topic")), T("(union)")); - verifyEquals(T("(-> fb:type.int fb:type.datetime)").apply(T("fb:type.number")), T("fb:type.datetime")); - verifyEquals(T("(-> fb:type.number fb:type.datetime)").apply(T("fb:type.int")), T("fb:type.datetime")); - - verifyMeet("(-> fb:location.location fb:type.number)", "(-> fb:location.location fb:type.float)"); - verifyMeet("fb:common.topic", "fb:location.location"); - verifyMeet("fb:type.any", "fb:type.boolean"); - verifyMeet("fb:type.any", "fb:type.number"); - verifyMeet("fb:type.any", "fb:type.datetime"); - verifyMeet("fb:type.any", "fb:type.cvt"); - verifyMeet("fb:type.any", "fb:type.text"); - verifyMeet("fb:type.any", "fb:location.location"); - verifyMeet("fb:type.any", "fb:common.topic"); - - verifyMeet("fb:common.topic", "fb:common.topic"); - verifyMeet("top", "(-> t t)"); - verifyMeet("top", "fb:type.datetime"); - verifyMeet("top", "(union a b)"); - - verifyMeet("(-> (-> a b) top)", "(-> top (-> a b))", "(-> (-> a b) (-> a b))"); - verifyMeet("(-> (union city country) person)", "(-> city (union person dog))", "(-> city person)"); - } - - public static void main(String[] args) { - new SemTypeTest().simpleSemType(); - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/SemanticFnTest.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/SemanticFnTest.java deleted file mode 100644 index 9342dd5824..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/SemanticFnTest.java +++ /dev/null @@ -1,125 +0,0 @@ -package edu.stanford.nlp.sempre.test; - -import edu.stanford.nlp.sempre.*; -import fig.basic.LispTree; -import org.testng.annotations.Test; - -import java.util.Arrays; -import java.util.Collections; -import java.util.List; - -import static org.testng.AssertJUnit.assertEquals; - -/** - * Test Formulas. - * @author Percy Liang - */ -public class SemanticFnTest { - private static Formula F(String s) { return Formula.fromString(s); } - - void check(Formula target, DerivationStream derivations) { - if (!derivations.hasNext()) throw new RuntimeException("Expected 1 derivation, got " + derivations); - assertEquals(target, derivations.next().formula); - } - - void check(Formula target, String utterance, SemanticFn fn, List children) { - Example ex = TestUtils.makeSimpleExample(utterance); - check(target, fn.call(ex, new SemanticFn.CallInfo(null, 0, ex.numTokens(), Rule.nullRule, children))); - } - - void check(Formula target, String utterance, SemanticFn fn) { - List empty = Collections.emptyList(); - check(target, utterance, fn, empty); - } - - void checkNumDerivations(DerivationStream derivations, int num) { - assertEquals(num, derivations.estimatedSize()); - } - - @Test public void constantFn() { - LanguageAnalyzer.setSingleton(new SimpleAnalyzer()); - check(F("(number 3)"), "whatever", new ConstantFn(F("(number 3)"))); - } - - Derivation D(Formula f) { - return (new Derivation.Builder()) - .formula(f) - .prob(1.0) - .createDerivation(); - } - - LispTree T(String str) { - return LispTree.proto.parseFromString(str); - } - - // TODO(chaganty): Test bridge fn - requires freebase - - @Test public void concatFn() { - LanguageAnalyzer.setSingleton(new SimpleAnalyzer()); - check(F("(string \"a b\")"), "a b", new ConcatFn(" "), - Arrays.asList(D(F("(string a)")), D(F("(string b)")))); - } - - // TODO(chaganty): Test context fn - - @Test public void filterPosTagFn() { - LanguageAnalyzer.setSingleton(new SimpleAnalyzer()); - FilterPosTagFn filter = new FilterPosTagFn(); - filter.init(T("(FilterPosTagFn token NNP)")); - Derivation child = new Derivation.Builder().createDerivation(); - Example ex = TestUtils.makeSimpleExample("where is Obama"); - assertEquals(filter.call(ex, - new SemanticFn.CallInfo(null, 0, 1, Rule.nullRule, Collections.singletonList(child))).hasNext(), - false); - assertEquals(filter.call(ex, - new SemanticFn.CallInfo(null, 1, 2, Rule.nullRule, Collections.singletonList(child))).hasNext(), - false); - assertEquals(filter.call(ex, - new SemanticFn.CallInfo(null, 2, 3, Rule.nullRule, Collections.singletonList(child))).hasNext(), - true); - } - - @Test public void filterSpanLengthFn() { - LanguageAnalyzer.setSingleton(new SimpleAnalyzer()); - FilterSpanLengthFn filter = new FilterSpanLengthFn(); - filter.init(T("(FilterSpanLengthFn 2)")); - Derivation child = new Derivation.Builder().createDerivation(); - Example ex = TestUtils.makeSimpleExample("This is a sentence with some words"); - assertEquals( - filter.call(ex, new SemanticFn.CallInfo(null, 0, 1, Rule.nullRule, Collections.singletonList(child))).hasNext(), - false); - assertEquals( - filter.call(ex, new SemanticFn.CallInfo(null, 0, 2, Rule.nullRule, Collections.singletonList(child))).hasNext(), - true); - assertEquals( - filter.call(ex, new SemanticFn.CallInfo(null, 0, 2, Rule.nullRule, Collections.singletonList(child))).hasNext(), - true); - - filter = new FilterSpanLengthFn(); - filter.init(T("(FilterSpanLengthFn 2 4)")); - assertEquals( - filter.call(ex, new SemanticFn.CallInfo(null, 0, 1, Rule.nullRule, Collections.singletonList(child))).hasNext(), - false); - assertEquals( - filter.call(ex, new SemanticFn.CallInfo(null, 0, 2, Rule.nullRule, Collections.singletonList(child))).hasNext(), - true); - assertEquals( - filter.call(ex, new SemanticFn.CallInfo(null, 0, 3, Rule.nullRule, Collections.singletonList(child))).hasNext(), - true); - assertEquals( - filter.call(ex, new SemanticFn.CallInfo(null, 0, 4, Rule.nullRule, Collections.singletonList(child))).hasNext(), - true); - assertEquals( - filter.call(ex, new SemanticFn.CallInfo(null, 0, 5, Rule.nullRule, Collections.singletonList(child))).hasNext(), - false); - } - - // TODO(chaganty): Test fuzzy match fn - // TODO(chaganty): Test identity fn - // TODO(chaganty): Test join fn - // TODO(chaganty): Test lexicon fn - // TODO(chaganty): Test merge fn - // TODO(chaganty): Test select fn - // TODO(chaganty): Test simple lexicon fn - -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/SystemSanityTest.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/SystemSanityTest.java deleted file mode 100644 index 603540532a..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/SystemSanityTest.java +++ /dev/null @@ -1,65 +0,0 @@ -package edu.stanford.nlp.sempre.test; - -import com.google.common.collect.Iterables; -import com.google.common.collect.Maps; - -import edu.stanford.nlp.sempre.*; -import fig.basic.Pair; -import fig.basic.Evaluation; - -import org.testng.annotations.Test; - -import java.util.Collections; -import java.util.List; -import java.util.Map; - -import static org.testng.AssertJUnit.assertEquals; - -/** - * Various end-to-end sanity checks. - * - * @author Roy Frostig - * @author Percy Liang - */ -public class SystemSanityTest { - private static Builder makeBuilder(String grammarPath) { - Grammar g = new Grammar(); - g.read(grammarPath); - - Builder b = new Builder(); - b.grammar = g; - b.executor = new FormulaMatchExecutor(); - b.buildUnspecified(); - return b; - } - - private static Dataset makeDataset() { - Dataset d = new Dataset(); - d.readFromPathPairs(Collections.singletonList( - Pair.newPair("train", "freebase/data/unittest-learn.examples"))); - return d; - } - - private static Map> learn(Builder builder, Dataset dataset) { - Map> evals = Maps.newHashMap(); - new Learner(builder.parser, builder.params, dataset).learn(3, evals); - return evals; - } - - @Test(groups = { "sparql", "corenlp" }) - public void easyEndToEnd() { - LanguageAnalyzer.setSingleton(new SimpleAnalyzer()); - // Make sure learning works - Dataset dataset = makeDataset(); - String[] grammarPaths = new String[] { - "freebase/data/unittest-learn.grammar", - "freebase/data/unittest-learn-ccg.grammar", - }; - for (String grammarPath : grammarPaths) { - Builder builder = makeBuilder(grammarPath); - FeatureExtractor.opts.featureDomains.add("rule"); - Map> evals = learn(builder, dataset); - assertEquals(1.0d, Iterables.getLast(evals.get("train")).getFig("correct").min()); - } - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/TestUtils.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/TestUtils.java deleted file mode 100644 index bff60d11c8..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/TestUtils.java +++ /dev/null @@ -1,91 +0,0 @@ -package edu.stanford.nlp.sempre.test; - -import edu.stanford.nlp.sempre.*; - -/** - * Useful utilities and dummy system components for writing tests. - * - * @author Roy Frostig - */ -public final class TestUtils { - private TestUtils() { } - - public static Grammar makeAbcGrammar() { - Grammar g = new Grammar(); - g.addStatement("(rule $X (a) (ConstantFn (string a)))"); - g.addStatement("(rule $X (b) (ConstantFn (string b)))"); - g.addStatement("(rule $X (c) (ConstantFn (string c)))"); - g.addStatement("(rule $X ($X $X) (ConcatFn ,))"); - g.addStatement("(rule $ROOT ($X) (IdentityFn))"); - return g; - } - - public static Grammar makeArithmeticGrammar() { - Grammar g = new Grammar(); - g.addStatement("(rule $Expr ($TOKEN) (NumberFn))"); - g.addStatement("(rule $Expr ($Expr $Partial) (JoinFn backward))"); - g.addStatement("(rule $Partial ($Operator $Expr) (JoinFn forward))"); - g.addStatement("(rule $Operator (plus) (ConstantFn (lambda y (lambda x (call + (var x) (var y))))))"); - g.addStatement("(rule $Operator (times) (ConstantFn (lambda y (lambda x (call * (var x) (var y))))))"); - g.addStatement("(rule $Operator (and) (ConstantFn (lambda y (lambda x (call + (var x) (var y))))))"); - g.addStatement("(rule $Operator (and) (ConstantFn (lambda y (lambda x (call * (var x) (var y))))))"); - g.addStatement("(rule $ROOT ($Expr) (IdentityFn))"); - return g; - } - - public static Grammar makeArithmeticFloatingGrammar() { - Grammar g = new Grammar(); - g.addStatement("(rule $Expr ($TOKEN) (NumberFn) (anchored 1))"); - g.addStatement("(rule $Expr ($Expr $Partial) (JoinFn backward))"); - g.addStatement("(rule $Partial ($Operator $Expr) (JoinFn forward))"); - g.addStatement("(rule $Operator (nothing) (ConstantFn (lambda y (lambda x (call + (var x) (var y))))))"); - g.addStatement("(rule $Operator (nothing) (ConstantFn (lambda y (lambda x (call * (var x) (var y))))))"); - g.addStatement("(rule $ROOT ($Expr) (IdentityFn))"); - return g; - } - - public static Grammar makeNumberConcatGrammar() { - Grammar g = new Grammar(); - g.addStatement("(rule $Number ($TOKEN) (NumberFn))"); - g.addStatement("(rule $Number ($Number $Number) (ConcatFn ,))"); - g.addStatement("(rule $ROOT ($Number) (IdentityFn))"); - return g; - } - - public static Builder makeSimpleBuilder() { - Builder builder = new Builder(); - builder.grammar = makeNumberConcatGrammar(); - builder.executor = new FormulaMatchExecutor(); - builder.buildUnspecified(); - return builder; - } - - public static Dataset makeSimpleDataset() { - return new Dataset(); - } - - public static Learner makeSimpleLearner(Parser parser, Params params, Dataset dataset) { - return new Learner(parser, params, dataset); - } - - public static Learner makeSimpleLearner(Builder builder, Dataset dataset) { - return makeSimpleLearner(builder.parser, builder.params, dataset); - } - - public static Learner makeSimpleLearner() { - return makeSimpleLearner(makeSimpleBuilder(), makeSimpleDataset()); - } - - public static Example makeSimpleExample(String utterance) { return makeSimpleExample(utterance, null); } - public static Example makeSimpleExample(String utterance, Value targetValue) { - Builder builder = new Builder(); - builder.build(); - Example ex = new Example.Builder() - .setId("_id") - .setUtterance(utterance) - .setTargetValue(targetValue) - .createExample(); - ex.preprocess(); - return ex; - } -} diff --git a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/TypeInferenceTest.java b/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/TypeInferenceTest.java deleted file mode 100644 index 2de343f853..0000000000 --- a/examples/lassie/sempre/src/edu/stanford/nlp/sempre/test/TypeInferenceTest.java +++ /dev/null @@ -1,117 +0,0 @@ -package edu.stanford.nlp.sempre.test; - -import static org.testng.AssertJUnit.assertEquals; - -import org.testng.annotations.Test; - -import edu.stanford.nlp.sempre.*; - -/** - * Test type inference. - * @author Percy Liang - * @author ppasupat - */ -public class TypeInferenceTest { - // For testing - private static final CustomTypeLookup typeLookup = new CustomTypeLookup(); - private static Formula F(String str) { return Formula.fromString(str); } - private static SemType T(String str) { return SemType.fromString(str); } - private static SemType FT(String str) { return TypeInference.inferType(F(str), typeLookup); } - - static class CustomTypeLookup implements TypeLookup { - - - @Override - public SemType getEntityType(String entity) { - return null; - } - - @Override - public SemType getPropertyType(String property) { - switch (property) { - case "fb:location.location.area": - return T("(-> fb:type.number fb:location.location)"); - case "fb:people.person.date_of_birth": - return T("(-> fb:type.datetime fb:people.person)"); - case "fb:people.person.parents": - return T("(-> fb:people.person fb:people.person)"); - case "fb:people.person.place_of_birth": - return T("(-> fb:location.location fb:people.person)"); - case "fb:people.person.profession": - return T("(-> fb:people.profession fb:people.person)"); - default: - return null; - } - } - - } - - void check(String fstr, String tstr) { - System.out.println("check " + fstr + " " + tstr); - assertEquals(T(tstr).toString(), FT(fstr).toString()); - } - - @Test public void simpleSemType() { - check("(fb:location.location.area (>= (number 200)))", "fb:location.location"); - - check("(number 3)", "fb:type.number"); - check("(string foo)", "fb:type.text"); - check("(date 1981 1 1)", "fb:type.datetime"); - check("fb:en.barack_obama", "fb:common.topic"); // Don't have getEntityTypes - check("fb:people.person.place_of_birth", "(-> fb:location.location fb:people.person)"); - - // Join - check("(fb:type.object.type fb:location.location)", "fb:location.location"); - check("(fb:people.person.place_of_birth (fb:type.object.type fb:location.location))", "fb:people.person"); - check("(!fb:people.person.place_of_birth (fb:type.object.type fb:location.location))", "(union)"); - - // Merge - check("(and (fb:type.object.type fb:common.topic) (fb:people.person.place_of_birth fb:en.seattle))", "fb:people.person"); - check("(and (fb:type.object.type fb:location.location) (fb:people.person.place_of_birth fb:en.seattle))", "(union)"); - - // Mark - check("(mark x (fb:people.person.parents (var x)))", "fb:people.person"); - check("(mark x (fb:people.person.place_of_birth (var x)))", "(union)"); - - // Lambda - check("(lambda x (fb:people.person.place_of_birth (var x)))", "(-> fb:location.location fb:people.person)"); - check("(lambda x (!fb:people.person.place_of_birth (var x)))", "(-> fb:people.person fb:location.location)"); - check("(lambda x (fb:people.person.place_of_birth (var x)))", "(-> fb:location.location fb:people.person)"); - check("(lambda x (!fb:people.person.place_of_birth (var x)))", "(-> fb:people.person fb:location.location)"); - check("(lambda x (!fb:people.person.profession (fb:people.person.place_of_birth (var x))))", "(-> fb:location.location fb:people.profession)"); - check("(lambda b ((var b) (fb:type.object.type fb:people.person)))", "(-> (-> fb:people.person top) top)"); - // Note: and the other way doesn't work, since we don't propagate everything. - check("(lambda b (and (fb:type.object.type fb:location.location) ((var b) (fb:type.object.type fb:people.person))))", "(-> (-> fb:people.person fb:location.location) fb:location.location)"); - check("(lambda x (lambda y ((var x) (var y))))", "(-> (-> top top) (-> top top))"); - check("(lambda x (lambda x (fb:people.person.place_of_birth (var x))))", "(-> top (-> fb:location.location fb:people.person))"); // No variable capture - - // Aggregation - check("(lambda x (not (var x)))", "(-> fb:type.any fb:type.any)"); - check("(lambda x (count (var x)))", "(-> fb:type.any fb:type.number)"); - check("(lambda x (count (fb:people.person.place_of_birth (var x))))", "(-> fb:location.location fb:type.number)"); - - // Arithmetic - check("(+ (number 3) (number 4))", "fb:type.number"); - check("(+ (date 1981 1 1) (string 4))", "(union)"); - check("(- (date 1982 1 1) (date 1981 1 1))", "fb:type.datetime"); // Future: should be a different duration type - - // Reverse - check("(reverse fb:people.person.place_of_birth)", "(-> fb:people.person fb:location.location)"); - - // Superlative - check("(argmax 1 1 (fb:type.object.type fb:people.person) fb:people.person.date_of_birth)", "fb:people.person"); - check("(argmax 1 1 (fb:type.object.type fb:common.topic) fb:people.person.date_of_birth)", "fb:people.person"); - check("(argmax 1 1 (fb:type.object.type fb:common.topic) (reverse (lambda x (number 3))))", "fb:common.topic"); - check("(lambda x (lambda y (argmax 1 1 (var x) (var y))))", "(-> fb:type.any (-> (-> (union fb:type.number fb:type.datetime) fb:type.any) fb:type.any))"); - - // Call - check("(call Math.cos (number 0))", "fb:type.float"); - check("(call Math.cos (string abc))", "(union)"); - check("(lambda x (lambda y (call .concat (var x) (var y))))", "(-> fb:type.text (-> fb:type.text fb:type.text))"); - check("(lambda x (call .length (var x)))", "(-> fb:type.text fb:type.int)"); - } - - public static void main(String[] args) { - new TypeInferenceTest().simpleSemType(); - } -} diff --git a/examples/lassie/sempre/testng.xml b/examples/lassie/sempre/testng.xml deleted file mode 100644 index a17e9c4635..0000000000 --- a/examples/lassie/sempre/testng.xml +++ /dev/null @@ -1,17 +0,0 @@ - - - - - - - - - - - - - - - - - diff --git a/examples/lassie/sempre/unittest-files/README b/examples/lassie/sempre/unittest-files/README deleted file mode 100644 index 3997f8a2bb..0000000000 --- a/examples/lassie/sempre/unittest-files/README +++ /dev/null @@ -1 +0,0 @@ -This directory contains small files which are read by unit tests. diff --git a/examples/lassie/sempre/unittest-files/binaryInfoStringAndAlignment.txt b/examples/lassie/sempre/unittest-files/binaryInfoStringAndAlignment.txt deleted file mode 100644 index 4b5e33d261..0000000000 --- a/examples/lassie/sempre/unittest-files/binaryInfoStringAndAlignment.txt +++ /dev/null @@ -1,5 +0,0 @@ -{"formula":"(lambda x (fb:people.person.education (fb:education.education.institution (var x))))","source":"ALIGNMENT","features":{"FB_typed_size":1740.0,"Intersection_size_typed":53.0,"NL-size":18096.0,"NL_typed_size":65.0},"lexeme":"bear in"} -{"formula":"fb:people.deceased_person.place_of_cremation","source":"ALIGNMENT","features":{"FB_typed_size":3.0,"Intersection_size_typed":1.0,"NL-size":18096.0,"NL_typed_size":7156.0},"lexeme":"bear in"} -{"formula":"fb:location.location.people_born_here","source":"ALIGNMENT","features":{"FB_typed_size":351.0,"Intersection_size_typed":1.0,"NL-size":18096.0,"NL_typed_size":1.0},"lexeme":"bear in"} -{"formula":"!fb:location.location.people_born_here","source":"ALIGNMENT","features":{"FB_typed_size":16184.0,"Intersection_size_typed":13856.0,"NL-size":18096.0,"NL_typed_size":15765.0},"lexeme":"bear in"} -{"formula":"(lambda x (!fb:tv.tv_producer_term.producer (!fb:tv.tv_program.tv_producer (var x))))","source":"ALIGNMENT","features":{"FB_typed_size":156.0,"Intersection_size_typed":1.0,"NL-size":10.0,"NL_typed_size":1.0},"lexeme":"hilarious in"} diff --git a/examples/lassie/sempre/unittest-files/unaryInfoStringAndAlignment.txt b/examples/lassie/sempre/unittest-files/unaryInfoStringAndAlignment.txt deleted file mode 100644 index 6f657e199d..0000000000 --- a/examples/lassie/sempre/unittest-files/unaryInfoStringAndAlignment.txt +++ /dev/null @@ -1,4 +0,0 @@ -{"formula":"(fb:people.person.profession fb:en.attorney)","source":"STRING_MATCH","features":{},"lexeme":"lawyer"} -{"formula":"(fb:people.person.profession fb:en.attorney)","source":"ALIGNMENT","features":{"fb_size":847.0,"intersection":26.0,"nl_size":57.0},"lexeme":"lawyer"} -{"formula":"(fb:type.object.type fb:location.continent)","source":"STRING_MATCH","features":{},"lexeme":"continent"} -{"formula":"(fb:type.object.type fb:location.continent)","source":"ALIGNMENT","features":{"fb_size":6.0,"intersection":5.0,"nl_size":15.0},"lexeme":"continent"} diff --git a/examples/lassie/src/AssocMap.sml b/examples/lassie/src/AssocMap.sml deleted file mode 100644 index b8310c747e..0000000000 --- a/examples/lassie/src/AssocMap.sml +++ /dev/null @@ -1,25 +0,0 @@ -structure AssocMap = -struct - - datatype ('a, 'b) tree = - Leaf - | Node of 'a * 'b * ('a, 'b) tree * ('a, 'b) tree; - - fun append (kN:'a) (vN:'b) (tr:('a, 'b) tree) (cmp:'a * 'a -> order) = - case tr of - Leaf => Node (kN, vN, Leaf, Leaf) - | Node (k, v, tr1, tr2) => - case cmp (kN, k) of - LESS => Node (k, v, append kN vN tr1 cmp, tr2) - | _ => Node (k, v, tr1, append kN vN tr2 cmp); - - fun lookup (k1:'a) (tr:('a, 'b) tree) (cmp:'a * 'a -> order) :'b option = - case tr of - Leaf => NONE - | Node (k2, v, tr1, tr2) => - case cmp (k1,k2) of - EQUAL => SOME v - | LESS => lookup k1 tr1 cmp - | GREATER => lookup k1 tr2 cmp; - -end; diff --git a/examples/lassie/src/Holmakefile b/examples/lassie/src/Holmakefile deleted file mode 100644 index b0676d80a3..0000000000 --- a/examples/lassie/src/Holmakefile +++ /dev/null @@ -1,20 +0,0 @@ -CLINE_OPTIONS=-j1 -TACTIC_WORLD = ../sempre/classes/interactive/edu/stanford/nlp/sempre/interactive/lassie/TacticWorld.class - -all: LassieTestTheory.sml -.PHONY: all - -$(TACTIC_WORLD): - export LASSIEDIR=../ &&\ - ./init.sh &&\ - cd ../sempre &&\ - ant core interactive &&\ - cd ../src - -LassieTestTheory.sml: $(TACTIC_WORLD) - -EXTRA_CLEANS = ../sempre/classes/ ../sempre/fig/ ../sempre/int-output/ \ - ../sempre/interactive/lassie.lexicon \ - ../sempre/interactive/sempre-out-socket.sml ../sempre/lib/ \ - ../sempre/libsempre/ ../sempre/module-classes.txt \ - ../sempre/state/ diff --git a/examples/lassie/src/LassieLib.sml b/examples/lassie/src/LassieLib.sml deleted file mode 100644 index f2c3eef0b0..0000000000 --- a/examples/lassie/src/LassieLib.sml +++ /dev/null @@ -1,403 +0,0 @@ -(** - Structure LassieLib - - Implements the main communication interface between HOL4 and SEMPRE -**) -structure LassieLib = -struct - - open Abbrev Tactical Manager proofManagerLib; - open LassieUtilsLib LassieParserLib; - - exception LassieException of string; - - type sempre_response = - { formula: string, - result: SempreParse, - descr: string}; - - type ambiguity_warning = - { set : string list, - span: string }; - - datatype AmbiguityWarning = - Warning of ambiguity_warning; - - datatype GoalPart = - All | Sub of int; - - val map = List.map - fun mem x l = List.exists (fn x' => x = x') l - val LASSIEPROMPT = "|>"; - val LASSIESEP = ref "."; - - val knownJargon :(string * (unit->unit)) list ref= ref []; - - val sempreResponse :sempre_response list ref = ref []; - - val ambiguityWarning : AmbiguityWarning option ref = ref NONE; - - val lastUtterance = ref ""; - - (* val HOLDIR = - let val lDir = getOSVar "HOLDIR" in - if (endsWith lDir #"/") then lDir else (lDir ^ "/") end; *) - - val LASSIEDIR = Globals.HOLDIR ^ ("/examples/lassie"); - - val historyPath = LASSIEDIR ^ "sempre/interactive/last-sempre-output.sml"; - - (**************************************) - (* Communication *) - (**************************************) - val logging = ref false; - - (* wait for the SEMPRE prompt; signifies end of execution - returns the complete string read from SEMPRE *) - fun waitSempre instream :string = - let - val s = TextIO.input(instream); - val _ = if !logging then print s else () - in - if String.isSuffix "#SEMPRE# " s orelse s = "#SEMPRE# " then s - (* else if s = "" then raise LassieException "Reached EOS? Empty string was read." *) - else s ^ (waitSempre instream) - end; - - (* run SEMPRE as a subprocess through the run script - returns in- and outstream of its shell *) - fun launchSempre () = - let - val currDir = OS.FileSys.getDir(); - (* SEMPRE's run script is dependent on being at the top of its directory *) - val _ = OS.FileSys.chDir (LASSIEDIR ^ "/sempre") - val instream' = - Unix.textInstreamOf - (Unix.executeInEnv("interactive/run",["-n","@mode=lassie"], - Posix.ProcEnv.environ())) - val execCommand = TextIO.input(instream') - val (instr,outstr) = - case String.tokens Char.isSpace execCommand of - [] => raise Fail "Run script returned no arguments" - | cmd::args => Unix.streamsOf(Unix.execute(cmd,args)) - val _ = waitSempre(instr); - val _ = OS.FileSys.chDir currDir; - in - (ref instr, ref outstr) - end; - - (* Start SEMPRE when the Lib file is loaded - TODO: Box into a function? *) - val (instream, outstream) = launchSempre(); - - (* send a string to sempre *) - fun writeSempre (cmd : string) = - let - (* not needed anymore as we do not load from the socket file - val _ = if OS.FileSys.access (socketPath, []) then OS.FileSys.remove socketPath else () *) - val _ = lastUtterance := cmd - val _ = if !logging then (print "Writing "; print cmd; print "\n") else () - val _ = TextIO.output(!outstream, cmd ^ "\n") - in - () - end; - - (* Splits the response of SEMPRE into separate components based on matching - pairs of { and } *) - fun prepareResponse s = - List.foldl - (fn (xs, ys) => - ys @ (LassieUtilsLib.string_split xs #"}")) [] (LassieUtilsLib.string_split s #"{"); - - (* Extracts text starting with descr from list xs *) - fun getPart descr xs = - case xs of - [] => NONE - | x::[] => NONE - | x::y::xs => - if (String.isSuffix descr x) then - SOME(y,xs) - else getPart descr (y::xs); - - (* Removes a trailing quotation mark " from s *) - fun strip_quotmark s = - let val xs = explode s in - if hd xs = #"\"" then implode (tl xs) else s end; - - (* read SEMPRE's response from stdin *) - (* returns a derivation (i.e. the first candidate) of type sempre_response *) - (* TODO: Ambiguities ? *) - fun readSempre () :sempre_response= - let - val response = waitSempre (!instream) |> prepareResponse; - val (theFormula,theResponse) = - case getPart "Top formula " response of - NONE => raise LassieException "Could not extract formula" - | SOME (formula,remainder) => - case getPart "Top value " remainder of - NONE => raise LassieException "Could not extract value" - | SOME (response,remainder) => - (String.map (fn c => if (c = #"\n") then #" " else c) formula, response) - val cleanedResponse = - LassieUtilsLib.get_suffix_after_match "(string " theResponse - |> explode |> List.rev |> implode - |> LassieUtilsLib.get_suffix_after_match ")" (* TODO: This may be too fragile...*) - |> explode |> List.rev |> implode - |> strip_quotmark |> explode |> List.rev |> implode - |> strip_quotmark |> explode |> List.rev |> implode - |> String.map (fn c => if (c = #"$") then #" " else c) - val _ = if !logging then (print "\n"; print cleanedResponse; print "\n") else (); - val res = LassieParserLib.parse cleanedResponse; - in - { formula= theFormula, result = fst res, descr = snd res} - end; - - (* send a NL query to sempre and return at least a derivation *) - fun sempre utt = (writeSempre utt; readSempre ()); - - (*************************************) - (* Main interface *) - (*************************************) - fun find_matching_goal tq gl = - let val (id,found) = - foldl (fn (g,(id, found)) => - if found then (id,found) else - let val _ = rename1 tq g in (id,true) end handle Feedback.HOL_ERR e => (id+1,false)) - (1, false) - gl - in - if found then id else raise (LassieException "No matching subgoal found") - end; - - (* parse and apply most likely tactic *) - fun nltac (utt:'a frag list) g : goal list * validation= - let - (* preprocess the input string *) - val uttStr = - case utt of - [QUOTE s] => LassieUtilsLib.preprocess s - | _ => raise LassieException "Illegal input to nltac"; - val _ = - if (not (String.isSuffix (! LASSIESEP) uttStr)) then - raise LassieException "Tactics must end with LASSIESEP" - else (); - val theStrings = LassieUtilsLib.string_split uttStr #" "; - val (gls1,vld1) = ALL_TAC g; - val (str, pos, gls, vld) = - (List.foldl - (fn (str, (strAcc, goalpos, gl, vld)) => - if not (String.isSuffix (! LASSIESEP) str) then - (strAcc ^ " " ^ str, goalpos, gl, vld) - else - let - val theString = strAcc ^ " " ^ (removeTrailing (! LASSIESEP) str); - val t = sempre theString; - in - case #result t of - HOLTactic t => - (case goalpos of - All => - let val (gls, vld2) = ALLGOALS t gl in - ("", goalpos, gls, vld o vld2) end - | Sub i => - let val (gls, vld2) = NTH_GOAL t i gl in - ("", goalpos, gls, vld o vld2) end) - | Subgoal n => if n = ~1 then ("", All, gl, vld) else ("", Sub n, gl, vld) - | Termgoal t => - let val id = find_matching_goal t gl in - ("", Sub id, gl, vld) end - | Command c => raise LassieException "Command found during tactic" - end - (* The Lassie separator was a HOL4 level token *) - handle LassieException diag => - if (diag = "Could not extract formula") - then (strAcc ^ " " ^ str, goalpos, gl, vld) - else raise LassieException diag) - ("", All, gls1, vld1) theStrings) - in - if (str = "") then (gls, vld) - else raise LassieException ("Could not parse string "^str^"\n") - end; - - (* define an utterance in terms of a list of utterances*) - local - fun define ndum niens : string = - let - fun extract s = case hd s of QUOTE s => LassieUtilsLib.preprocess s | _ => raise LassieException "Illegal Quote" - (* for each utterance of the definition, get its logical form *) - fun getFormula u = [u, (u |> sempre |> #formula |> escape |> escape)] - (* formatting *) - fun quot s = "\"" ^ s ^ "\"" - fun quot' s = "\\\"" ^ s ^ "\\\"" - fun list2string l = "[" ^ (String.concatWith "," l) ^ "]" - fun stripAllSpaces s = explode s |> stripSpaces |> explode |> List.rev - |> stripSpaces |> explode |> List.rev |> implode; - val definiens = - niens |> (map extract) - |> (map getFormula) - |> map (map stripAllSpaces) - |> (map (map quot')) - |> (map list2string) - |> list2string - val theDef = "(:def " ^ (quot (extract ndum)) ^ " " ^ (quot definiens) ^ ")" - val _ = if (!logging) then (print "Defining:\n"; print theDef; print "\n\n") else () - val _ = writeSempre ("(:def " ^ (quot (extract ndum)) ^ " " ^ (quot definiens) ^ ")") - val res = waitSempre(!instream) - val _ = (if (!logging) then print res else ()) - in - res - end; - in - fun def cmd def = define cmd [def]; - end; - - fun addRule lhs rhs sem anchoring : unit = - let - fun paren str = - let - val clist = String.explode str - in - if (hd clist = #"(" andalso last clist = #")") then str - else "(" ^ str ^ ")" - end - in - (writeSempre ("(rule " ^ lhs ^ " " ^ paren rhs ^ " " ^ paren sem ^ " " ^ paren anchoring ^ ")"); - waitSempre (!instream); ()) - end; - - fun addIDRule (cat:string) (str:string) (anchoring:string) : unit = - addRule cat str ("ConstantFn ( string \"" ^ str ^ "\")") anchoring; - - (** Adding a custom SML tactic to the grammar **) - fun addCustomTactic tac str : unit = - (addIDRule "$tactic" str "anchored 1"; - LassieParserLib.addCustomTactic tac str); - - (** Adding a custom SML thm tactic to the grammar **) - fun addCustomThmTactic thmtac str: unit = - (addIDRule "$thm->tactic" str "anchored 1"; - LassieParserLib.addCustomThmTactic thmtac str) - - (** Adding a custom SML thmlist tactic to the grammar **) - (** - fun addCustomThmlistTactic tac : unit = - addIDRule "$thmlist->tactic" tac "anchored 1"; - - fun addCustomTermTactic tac : unit = - addIDRule "$term->tactic" tac "anchored 1"; - **) - - fun printGrammar () : unit = - let - val prev = !logging; - val _ = logging := true; - val _ = writeSempre ("(grammar)"); - val _ = waitSempre (!instream); - val _ = logging := prev; - in - () end; - - (** Jargon Management **) - - fun registerJargon (name:string) (loader:unit->unit) = - knownJargon := (name, loader):: !knownJargon; - - fun knownJargons () = !knownJargon; - - fun loadJargon (n:string) = - case List.find (fn (s,f) => s = n) (!knownJargon) of - SOME (s,f) => f() - | NONE => raise LassieException ("Jargon " ^ n ^ " not found. Did you 'open' the correct library?"); - - (** Interactive mode **) - local - fun printHelp () = - ( - map (fn x => print (x ^"\n")) - [ "", - "=======================================", - "======= Lassie Interactive Mode =======", - "=======================================", - " ", - "Send natural language commands with the same keybinding as the one", - "used to send code to you running HOL4 session.", - "The commands will be send to Lassie and evaluated.", - "HOL4 keybindings still work as before.", - "Sending \"exit\" quits the session and clears the goal state,", - "\"pause\" quits the session and keeps the goal state.", - "" - ]; ()); - fun getAll instream = - case TextIO.canInput (instream,1) of - NONE => "" - | SOME _ => - (case TextIO.input1 instream of - NONE => "" - | SOME c => implode [c] ^ (getAll instream)) - fun proofLoop () = - let - (* Set up prompt; wait for input *) - val _ = print ("\n"^LASSIEPROMPT); - val theText = - case (TextIO.inputLine (TextIO.stdIn)) of - NONE => raise LassieException "Error getting input" - | SOME s => LassieUtilsLib.preprocess (s ^ (getAll (TextIO.stdIn))) - val theTrueText = - LassieUtilsLib.preprocess theText - |> LassieUtilsLib.removeTrailing ((!LASSIESEP)^"; ") - in - (* Handle exit keyword separately TODO: Make command? *) - if (theTrueText = "exit") - then (print " Exiting\n") (* ProofRecorderLib.reset()) *) - (* Handle pause keyword separately TODO: Make command? *) - else if (theTrueText = "pause") - then (print "Pausing proof.\nReturn with LassieLib.nlexplain().\n") - (* help keyword *) - else if (theTrueText = "help") - then (printHelp(); proofLoop()) - (* Proof step or command was given, parse with SEMPRE *) - else - let - (* Remove semicolons and line-breaks from string *) - val theString = String.translate - (fn x => if ((x = #"\n") orelse (x = #";")) then "" else implode [x]) - theTrueText; - (* Get a tactic from SEMPRE *) - val res = theString |> sempre - val theTactic = #descr res; - val theResult = #result res; - val _ = case theResult of - Command c => (c (); ()) - | Subgoal _ => (print "Subgoals are not supported in verbose prove mode."; ()) - | Termgoal _ => (print "Subgoals are not supported in verbose prove mode."; ()) - | HOLTactic t => (et (theTactic, t); ()); - (* first print the current goal *) - val _ = print "\n"; - val t = proofManagerLib.pp_proof (proofManagerLib.p()); - val _ = PolyML.prettyPrint (print, 80) t; - (* - val done = - (let val _ = proofManagerLib.top_goal(); in false end - handle HOL_ERR _=> true); *) - in - (* - if (done) - then (print ("Finished proof;\nPrinting proofscript\n\n" ^ - ProofRecorderLib.pp_finished (hd(! ProofRecorderLib.finished))); - ProofRecorderLib.reset()) - else *) - (proofLoop()) - end - end - in - fun nlexplain () = - let - val (asms,gl) = proofManagerLib.initial_goal(); - val _ = proofManagerLib.drop(); - val _ = proofManagerLib.gt (‘^gl’); - in - proofLoop() - end; - end; - -end diff --git a/examples/lassie/src/LassieParserLib.sml b/examples/lassie/src/LassieParserLib.sml deleted file mode 100644 index a83af94080..0000000000 --- a/examples/lassie/src/LassieParserLib.sml +++ /dev/null @@ -1,303 +0,0 @@ -(** - structure LassieParserLib - Implements a parser from the intermediate language produced by - SEMPRE into HOL4 tactics by looking them up in a map provided by - TacticMap.sml -**) -structure LassieParserLib = -struct - - open Abbrev Tactical Manager Conv BoundedRewrites proofManagerLib; - open LassieUtilsLib TacticMap; - - exception NoParseException of string; - - datatype SempreParse = - HOLTactic of tactic - | Command of unit -> proof - | Subgoal of int - | Termgoal of term frag list; - - val tacticMap = ref TacticMap.stdTree; - - val thmModifs = ["Once", "GSYM"]; - - datatype token = - Subg of int - | Id of string - | TermStart - | TermEnd - | LBrac - | RBrac - | ListStart - | ListEnd - | ListSep; - - fun lex (strs:string list) : (token * string list) option = - case strs of - [] => NONE - | s1 :: strs => - case s1 of - "(" => SOME (LBrac, strs) - | ")" => SOME (RBrac, strs) - | "[" => SOME (ListStart, strs) - | "]" => SOME (ListEnd, strs) - | "," => SOME (ListSep, strs) - | "TERMSTART" => SOME (TermStart, strs) - | "TERMEND" => SOME (TermEnd, strs) - | "ALLGOALS" => SOME (Subg (~ 1), strs) - | "TERMGOAL" => SOME (Subg (~2), strs) - | "INTGOAL" => - (case strs of - [] => NONE - | s2 :: strs2 => - (case Int.fromString s2 of - NONE => NONE - | SOME i => SOME (Subg i, strs))) - | _ => SOME (Id s1, strs); - - fun findThm (name:string) :thm option = - let val spl = LassieUtilsLib.string_split name #"." - val cmp = - if (List.length spl = 1) then - fn ((theory, theorem), stmt) => hd spl = theorem - else - fn ((theory, theorem), stmt) => - (LassieUtilsLib.get_prefix_before_match "Theory" (hd spl)) = theory andalso - hd (tl spl) = theorem - in - case List.find cmp (DB.listDB()) of - NONE => NONE - | SOME (_, (th, _, _)) => SOME th - end; - - fun parseThm (strs:string list) : (string * thm * string list) = - case lex strs of - NONE => raise NoParseException "No theorem identifier found where theorem was expected" - | SOME (Id s, strs1) => - if (List.exists (fn a => a = s) thmModifs) then - let val (thStr, th, strs2) = parseThm strs1 - val txt = s ^ " " ^ thStr in - case s of - "Once" => (txt, Once th, strs2) - | "GSYM" => (txt, GSYM th, strs2) - | _ => raise NoParseException ("Invalid theorem modifier "^s^" found\n") - end - else - (case findThm s of - NONE => raise NoParseException ("Could not find theorem " ^ s ^ " in current context") - | SOME th => (s, th, strs1)) - | _ => raise NoParseException ("Could not parse a theorem where a theorem was expected\n"); - - fun peek (s:string list) :token = - case lex s of - SOME (tok, strs) => tok - | _ => raise (NoParseException "Could not look into next token when expecting a token\n") - - (** Function tokToString should only be used for list pretty printing, nothing - else. Thus we exclude other tokens from prettyprinting. **) - fun tokToString t = - case t of - TermStart => "`" - | TermEnd => "`" - | LBrac => "(" - | RBrac => ")" - | ListStart => "[" - | ListEnd => "]" - | ListSep => "," - | _ => ""; - - local - (* Generic list parsing function, takes as input the strings to be parsed, a - function parsing a single element of the list, and tokens describing the - end and function parsing the separator of the list *) - fun readList strs singleton endTok sep : (string * 'a list * string list) = - if (case lex strs of - NONE => false - | SOME (tok, strs2) => tok = endTok) then (tokToString endTok,[], snd (valOf (lex strs))) - else - let val (strdescr, th, strs2) = singleton strs in - if (case lex strs2 of - NONE => false - | SOME (tok, strs3) => tok = endTok) then - (strdescr ^ " " ^ tokToString endTok, [th], snd (valOf (lex strs2))) - else - let - val (strSep, strs3) = sep strs2 - val (strs, ths, strs4) = readList strs3 singleton endTok sep - val descr = strdescr ^ strSep ^ " " ^ strs - in - (descr, th :: ths, strs4) - end - end - in - fun parseList (strs:string list) singleton startTok endTok sep = - case lex strs of - SOME (tok,strs) => - if (tok = startTok) then - let val (strdescr, lst, strs2) = readList strs singleton endTok sep - in (tokToString startTok ^ " " ^ strdescr, lst, strs2) end - else raise NoParseException "No valid list" - | _ => raise NoParseException "No valid theorem list" - end; - - fun consumeListSep strs = - case lex strs of - SOME (ListSep, strs2) => (",", strs2) - | _ => raise NoParseException "No list separator found"; - - fun parseThmList strs = parseList strs parseThm ListStart ListEnd consumeListSep; - - fun consumeTmSep strs = ("", strs); - - fun parseTm (strs:string list) :(string * term frag list * string list) = - let - val (strdescr, tm, strs) = - parseList strs (fn (ss:string list) => (hd ss, hd ss, tl ss)) TermStart TermEnd consumeTmSep - val fullTm = foldl (fn (s1,s2) => if s2 = "" then s1 else s1 ^ " " ^ s2) "" (List.rev tm) - in - (strdescr, [QUOTE fullTm], strs) - end; - - fun parseTmList strs = - parseList strs parseTm ListStart ListEnd consumeListSep; - - fun parseThmTactic strs = - case lex strs of - SOME (LBrac, strs1) => - let val (descr, thmtac, strs2) = parseThmTactic strs1 in - case lex strs2 of - SOME (RBrac, strs3) => (descr, thmtac, strs3) - | _ => raise NoParseException ("Imbalanced parenthesis\n") - end - | SOME (Id id, strs) => - (case TacticMap.lookupTac id (!tacticMap) of - SOME (ThmTactic th) => (id, th, strs) - | SOME (QuotSpecThmTactic t) => - let val (tmStr, tm, strs2) = parseTm strs - val (thmTac, thmtac, strs3) = parseThmTactic strs2 in - (id ^" "^ tmStr ^" "^ thmTac, t tm thmtac, strs3) end - | SOME (QuotListSpecThmTactic t) => - let val (tmsStr, tm, strs2) = parseTmList strs - val (thmTac, thmtac, strs3) = parseThmTactic strs2 in - (id ^" "^ tmsStr ^" "^ thmTac, t tm thmtac, strs3) end - | _ => raise NoParseException ("Id " ^ id ^ " not found \n")) - | _ => raise NoParseException ("No theorem tactic found where it was expected\n"); - - local - fun parsePartial (inp:string list) :(string * tactic * string list) = - case lex inp of - SOME (Id str, strs) => - (case TacticMap.lookupTac str (!tacticMap) of - SOME (Tactic t) => (str, t,strs) - | SOME (Tactical tt) => - let val (descr, t, strs) = parsePartial strs in - (str ^" "^ descr, tt t, strs) - end - | SOME (ThmTactic th) => - let val (thTacDescr,thTac, strs) = parseThmTactic inp - val (thmDescr, th, strs) = parseThm strs in - (thTacDescr ^ " " ^ thmDescr, thTac th, strs) - end - | SOME (ThmListTactic thsTac) => - let val (thmsDescr, thms, strs) = parseThmList strs in - (str ^ " " ^ thmsDescr, thsTac thms, strs) - end - | SOME (QuotTactic qt) => - let val (tmDescr, tm, strs) = parseTm strs in - (str ^" "^tmDescr, qt tm, strs) - end - | SOME (AsmTestTactic t) => - let val (thmTacDescr, thTac, strs) = parseThmTactic strs in - (str ^" "^ thmTacDescr, t thTac, strs) - end - | SOME (AsmMatchTactic t) => - let val (tmDescr, tm, strs2) = parseTm strs - val (thmTacDescr, thTac, strs3) = parseThmTactic strs2 - in - (str ^" "^tmDescr^" "^thmTacDescr, t tm thTac, strs3) - end - | SOME (QuotSpecThmTactic t) => - let - val (tmDescr, tm, strs2) = parseTm strs - val (thTacDescr, thTac, strs3) = parseThmTactic strs2 - val (thmDescr, thm, strs4) = parseThm strs3 - in - (str ^" "^tmDescr^" "^thTacDescr^" "^thmDescr, t tm thTac thm, strs4) - end - | SOME (QuotListSpecThmTactic t) => - let - val (tmsDescr, tms, strs2) = parseTmList strs - val (thTacDescr, thTac, strs3) = parseThmTactic strs2 - val (thmDescr, thm, strs4) = parseThm strs3 - in - (str ^" "^tmsDescr^" "^thTacDescr^" "^thmDescr, t tms thTac thm, strs4) - end - | _ => raise NoParseException ("Id " ^ str ^ " not found\n")) - | _ => raise NoParseException ("Unparsable string found\n"); - fun parseFull (inp:string list) : (string * tactic * string list) = - let val (strDescr1, t1, strs1) = - (case peek inp of - TermStart => - let val (tmDescr, tm, strs2) = parseTm inp in - case lex strs2 of - SOME (Id str, strs3) => - (case TacticMap.lookupTac str (!tacticMap) of - SOME (TermComb tc) => - let val (tacDescr, tac, strs4) = parseFull strs3 in - (tmDescr ^" "^str^" "^tacDescr,tc (tm,tac), strs4) - end - | _ => raise NoParseException ("Term combinator " ^ str ^ " not found\n")) - | _ => raise NoParseException ("Unsupported tactic structure in " ^ (foldl (fn (a,b) => b ^ a) "" inp)) - end - | LBrac => - let val (tacDescr, t1, strs1) = parseFull (snd (valOf (lex inp))) in - case peek strs1 of - RBrac => ("(" ^ tacDescr^")", t1, snd (valOf (lex strs1))) - | _ => raise NoParseException "Unmatched parenthesis" - end - | _ => parsePartial inp) - in - case lex strs1 of - SOME (Id str, strs2) => - (case TacticMap.lookupTac str (!tacticMap) of - SOME (TacticComb t) => - let val (tacDescr, t2, strs3) = parseFull strs2 in - (strDescr1 ^" "^ str ^" "^ tacDescr, t (t1, t2), strs3) - end - | _ => raise NoParseException ("Tactic combinator " ^ str ^ " not found\n")) - | _ => (strDescr1, t1, strs1) - end; - in - fun parse (sempreResp:string) :(SempreParse * string)= - let - val inp = LassieUtilsLib.string_split sempreResp #" " - val inp = List.rev (foldl (fn (s,ss) => if s = "" then ss else s::ss) [] inp) - in - case peek inp of - Subg n => - if n = ~2 then - let - val (_, strs) = Option.valOf (lex inp) - val (descr, tm, strs1) = parseTm (snd (Option.valOf (lex inp))) in - (Termgoal tm, "Subgoal " ^ descr) end - else (Subgoal n, "Subgoal " ^ (Int.toString n)) - | Id s => - if s = "back" then (Command b,"") else - let val res = parseFull inp in - ((HOLTactic (#2 res)),#1res) - end - | _ => - let val res = parseFull inp in - ((HOLTactic (#2 res)),#1res) - end - end; - end; - - fun addCustomTactic (tac:tactic) (str:string) = - tacticMap := insTac (str, tac) (!tacticMap); - - fun addCustomThmTactic (tac:thm_tactic) (str:string) = - tacticMap := insThmTac (str, tac) (!tacticMap) - -end; diff --git a/examples/lassie/src/LassieTacticsLib.sml b/examples/lassie/src/LassieTacticsLib.sml deleted file mode 100644 index 44dd5bce60..0000000000 --- a/examples/lassie/src/LassieTacticsLib.sml +++ /dev/null @@ -1,98 +0,0 @@ -structure LassieTacticsLib = -struct - - open LassieUtilsLib; - -(* Exception to be raised if no valid pattern can be found *) -exception PATGENERROR of string; - -(* Turn string sl into a "pattern" for Q tactics *) -fun mk_tm_quote sl :term quotation = [QUOTE (String.concatWith " " sl)]; - -(* Utility function, check if pattern p is unique for assumptions asl and - conclusion tm - The check is implemented by running qpat_x_assum twice. - The first call must succeed, and the second fail for the pattern to be unique. -*) -fun is_unique p asl tm = - let - (* mp_tac to keep variables of the matched agains assumption in context *) - val (gls, _) = qpat_x_assum p mp_tac (asl, tm); - val r = - let val _ = qpat_x_assum p kall_tac (hd(gls)) in false end - handle HOL_ERR _ => true; - in r end - handle HOL_ERR _ => false; - -(* Pattern generation algorithm - Generates a list of patterns for the pattern sl given as a list of strings. - Parameter n determines from which index to start replacing variables/values by - _, asl is the assumption list from the proof and tm the conclusion *) -fun gen_pats sl (n:int) asl tm : string list list= - (* The initially given pattern must be unique to be generalized *) - if (is_unique (mk_tm_quote sl) asl tm) - then - if (Int.< (n,(List.length sl))) (* check that we can replace a part by _ *) - then - let - val thePat = LassieUtilsLib.list_replace n "_" sl - val r1 = gen_pats thePat (n+1) asl tm - handle PATGENERROR s => [] - val r2 = gen_pats sl (n+1) asl tm - handle PATGENERROR s => [] - in - r1 @ r2 - end - else (* List length exceeded, pattern was unique -> return *) - [sl] - else (* pattern was not unique -> fail with an error *) - raise PATGENERROR "Pattern not unique"; - -(* Takes the nth assumption, generates a most-general pattern from it and uses - that as input to ttac *) -fun get_tac (n:int) (ttac:term quotation -> tactic) : tactic = - fn (g as (asl, tm)) => - let - val theAsm = List.nth (asl, n) - val strList = LassieUtilsLib.string_split (term_to_string theAsm) #" " - val finalList = LassieUtilsLib.rejoin_pars strList - val pats = gen_pats strList n asl tm - val thePat = hd pats - val _ = print ( - "\nAssumption " ^ (Int.toString n) ^ " can be obtained with pattern " - ^ (String.concatWith " " thePat) ^ "\n" - ^ "using the tactic qpat_x_assum " ^ (String.concatWith " " thePat) - ^ "ttac\n"); - val theQuote = (mk_tm_quote (hd pats)) (* TODO: Find good heuristic for picking a quotation *) - in - ttac theQuote g - end; - -rpt strip_tac -get_tac 0 (fn p => qpat_x_assum p mp_tac) - -(* -val quot_ls = ["f", "a", "b"]; -val tmquot_test :Term.term bossLib.quotation = mk_tm_quote quot_ls; -val asms = [``(f:'a -> 'b -> bool) a b``, ``(g:'a -> 'b -> bool) a c``]; -val gl = ``T``; - -val (r,_) = qpat_x_assum tmquot_test kall_tac (asms, gl); -val (r, _) = qpat_x_assum tmquot_test kall_tac (hd r); - - -gen_pats ["f", "a", "b"] 0 asms ``T``; - -g `f a b /\ g a b==> T` -rpt strip_tac - -get_tac 0 (fn (t) => qpat_x_assum (mk_tm_quote (string_split (term_to_string t) #" ")) mp_tac) - -get 0 (fn tm => - (fn g as (asl, gl) => - let - val pat = gen_pat (string_split (term_to_string tm) #" ") 0 asl gl; - val _ = map (map print) pat; - in qpat_x_assum (mk_tm_quote (hd pat)) mp_tac g end)) -*) -end diff --git a/examples/lassie/src/LassieTestScript.sml b/examples/lassie/src/LassieTestScript.sml deleted file mode 100644 index 144bee4e56..0000000000 --- a/examples/lassie/src/LassieTestScript.sml +++ /dev/null @@ -1,38 +0,0 @@ -open BasicProvers Defn HolKernel Parse SatisfySimps Tactic monadsyntax boolTheory bossLib; -open LassieLib; - -val _ = new_theory "LassieTest"; - -val this_can_never_be_a_thm = Q.store_thm ("test", `∀ (n:num). T`, fs[]); - -val tg:(term list * term) = ([], “∀ (n:num). T”); - -val t = LassieLib.nltac ‘cheat. cheat. cheat. cheat.’ tg; - -val t = LassieLib.nltac ‘Cases.’ tg; - -val t = LassieLib.nltac ‘Cases_on ' n '.’ tg; - -val t = LassieLib.nltac ‘fs [ arithmeticTheory.ADD_ASSOC ].’ tg; - -val t = LassieLib.def `test123` `cheat`; - -val t = LassieLib.nltac ‘test123.’ tg; - -val t = LassieLib.nltac ‘imp_res_tac test.’ tg; - -val t = LassieLib.nltac ‘(qspec_then ' x ' irule test).’ tg; - -val t = LassieLib.def `resolve_with test` `imp_res_tac test`; - -val t = LassieLib.nltac ‘resolve_with CONJ_COMM.’ tg; - -val t = LassieLib.nltac ‘fs [ test , test ].’ tg; - -val t = LassieLib.nltac ‘cheat THEN cheat.’ tg; - -val t = LassieLib.nltac ‘' T ' by cheat.’ tg; - -val t = LassieLib.nltac ‘Goal 1. cheat.’ tg; - -val _ = export_theory(); diff --git a/examples/lassie/src/LassieUtilsLib.sml b/examples/lassie/src/LassieUtilsLib.sml deleted file mode 100644 index e31778d67d..0000000000 --- a/examples/lassie/src/LassieUtilsLib.sml +++ /dev/null @@ -1,234 +0,0 @@ -structure LassieUtilsLib = -struct - - open Lib; -(*********************************) -(* Utils *) -(*********************************) -fun sleep t = - let - val wakeUp = Time.+ (Time.now(), Time.fromReal(t)) - fun wait () = if Time.> (Time.now(), wakeUp) then () else wait () - in - wait () - end; - -fun flushStream instream = - case TextIO.canInput(instream, 5000) of - NONE => () - | SOME n => - if n = 0 then () - else (TextIO.input(instream); flushStream(instream)); - -(* some string editing to remove long package names esp. in call formulas *) -fun simplifyAbsoluteNames str = - let - fun isSep s = mem s [#" ", #"(", #")", #"\""] - fun append s l = - case l of - [] => [s] - | hd::tl => (s ^ hd)::tl - val tokens = - List.foldl - (fn (c,l) => if isSep c then ""::(String.str c)::l else append (String.str c) l) - [] - (List.rev (String.explode str)) - fun isNotEmpty s = not (s = "") - fun getLocalName s = List.hd (List.rev (String.tokens (fn c => c = #".") s)) - in - String.concat (map getLocalName (List.filter isNotEmpty tokens)) - end; - -(* escape quotes and backslashes before writing to a string *) -fun escape str = - let - val escEsc = map (fn c => if c = "\\" then "\\\\" else c) - val escQuotes = map (fn c => if c = "\"" then "\\\"" else c) - in - str |> String.explode - |> map String.str - |> escEsc - |> escQuotes - |> String.concat - end; - -(* normalize a string representing an HOL4 expression for viewing *) -fun normalize str = - let - (* space out function applications through direct parens e.g. map(f)lst *) - fun injectSpc sl = - case sl of - s1::s2::tl => - if (s2 = "(" andalso not (mem s1 ["("," ",")"])) orelse - (s1 = ")" andalso not (mem s2 [")"," "])) - then injectSpc (s1::" "::s2::tl) - else s1::(injectSpc (s2::tl)) - | other => other - (* rewrite string with a minimal number of parentheses *) - fun paren str b = if b then ("("::str) @ [")"] else str - fun rmParens left p right = - case right of - [] => (left, false, []) (* base case *) - | c::tail => - if c = ")" then (left, p, tail) (* base case of rec calls *) - else if c = "(" - then - let (* inductive case *) - val (left', p', right') = rmParens [] false tail (* rec *) - (* if nothing on left do not parenthesize, applications are left associative *) - val left' = if left = [] then left' else paren left' p' - val left' = if left' = [] then ["(",")"] else left' (* unit *) - in - rmParens (left @ left') p right' - end (* continue *) - else rmParens (left@[c]) (p orelse c = " ") tail - val (retStr, _, _) = - rmParens [] false ( str |> String.explode - |> (map String.str) - |> injectSpc ) - in - String.concat retStr - end; - -exception VariableUndefined of string; - -fun endsWith (s:string) (c:char) : bool = - let - val sl = explode s; - in - (hd (List.rev sl) = c) - end; - -fun getOSVar name = - case OS.Process.getEnv name of - NONE => raise VariableUndefined ("Variable " ^ name ^ " not defined in environment") - | SOME s => s; - -fun string_split s cr = - let - fun nextStr cr [] strAcc = ([],List.rev strAcc) | - nextStr cr (c::res) strAcc = - if c = cr - then (res, List.rev strAcc) - else nextStr cr res (c :: strAcc); - fun splitAll cr [] acc = List.rev acc | - splitAll cr chrl acc = - let val (res, nextStr) = nextStr cr chrl [] in splitAll cr res (implode nextStr::acc) end; - in - splitAll cr (explode s) [] - end; - -exception NotFoundException; - -fun get_suffix_after_match str1 str2 = - let - fun get_suffix_after_match_list flag ls1 ls2 = - case (ls1,ls2) of - ([], _) => if flag then ls2 else raise NotFoundException - | (_, []) => raise NotFoundException - | (c1::ls1, c2::ls2) => - if (c1 = c2) then - get_suffix_after_match_list true ls1 ls2 - else if flag then - raise NotFoundException - else get_suffix_after_match_list false (c1::ls1) ls2 - in - implode (get_suffix_after_match_list false (explode str1) (explode str2)) - end; - -fun get_prefix_before_match str1 str2 = - let - fun get_prefix_before_match_akk str1 str2 akk = - if (String.isPrefix str1 str2) then implode (List.rev akk) - else - if (str2 = "") then raise NotFoundException - else get_prefix_before_match_akk str1 (implode (tl (explode str2))) (hd(explode str2)::akk); - in - get_prefix_before_match_akk str1 str2 [] - end; - -fun list_replace n x l = - if (n = 0) - then - case l of - [] => [] - | y::l' => x::l' - else - case l of - [] => [] - | y :: l' => y :: (list_replace (n-1) x l'); - -fun matchRBrack [] = NONE | - matchRBrack (s::sl) = - if (String.isPrefix "(" s) - then - if (String.isSuffix ")" s) - then case matchRBrack sl of - NONE => NONE - | SOME (sNew, rs) => SOME (s ^ sNew, rs) - else - case matchRBrack sl of - NONE => NONE - | SOME (sNew1, rs1) => - case matchRBrack rs1 of - NONE => NONE - | SOME (sNew2, rs2) => - SOME (s^ sNew1 ^ sNew2, rs2) - else if (String.isSuffix ")" s) - then SOME (s, sl) - else - case matchRBrack sl of - NONE => NONE - | SOME (sNew, rs) => - SOME (s ^ sNew, rs); - -fun rejoin_pars [] = [] | - rejoin_pars (s::sl) = - if (String.isPrefix "(" s) - then - if (String.isSuffix ")" s) - then s :: rejoin_pars sl - else - let val (sNew, rs) = valOf (matchRBrack sl) in - (s ^ sNew) :: rejoin_pars rs - end - else s :: (rejoin_pars sl); - - fun stripSpaces s = - case s of - [] => "" - | c::cs => if (c = #" ") - then stripSpaces cs - else implode (c::cs); - - fun preprocess s = - let - val strs = string_split s #")" - val remainder = - if (String.isPrefix "(*#loc" (stripSpaces (explode (hd (strs))))) - then tl (strs) - else strs - val noBreaks = - List.map (String.translate - (fn c => if c = #"\n" then " " else if Char.isCntrl c then "" else implode [c])) - remainder - val res = - if String.isPrefix " " (hd noBreaks) - then String.concatWith ")" (stripSpaces (explode (hd noBreaks)) :: (tl noBreaks)) - else String.concatWith ")"noBreaks - in - if (String.isSuffix ")" s) - then (res^")") - else res - end; - - fun listStrip ls1 ls2 = - case (ls1, ls2) of - ([], _) => ls2 - | (i1::ls1, i2::ls2) => if (i1 = i2) then listStrip ls1 ls2 else [] - | (_,_) => []; - - fun removeTrailing str fullStr = - implode (rev (listStrip (List.rev (explode str)) (List.rev (explode fullStr)))); - -end diff --git a/examples/lassie/src/TacticMap.sml b/examples/lassie/src/TacticMap.sml deleted file mode 100644 index 848f3a43d0..0000000000 --- a/examples/lassie/src/TacticMap.sml +++ /dev/null @@ -1,89 +0,0 @@ -(** - Structure: TacticMap - - Uses the association map implemented in AssocMap.sml that maps strings to - tactic "closures". - Lassie uses it internally to map the SEMPRE returned intermediate language - into concrete HOL4 tactics code -**) -structure TacticMap = -struct - - open Lib Tactic Tactical Rewrite bossLib mesonLib; - - datatype tacticClos = - Tactic of tactic - | Tactical of (tactic -> tactic) - | TacticComb of (tactic * tactic -> tactic) - | TermComb of (term quotation * tactic -> tactic) - | ThmTactic of (thm -> tactic) - | QuotTactic of (term quotation -> tactic) - | ThmListTactic of (thm list -> tactic) - (* first_x_assum ,... *) - | AsmTestTactic of (thm_tactic -> Tactical.tactic) (* TODO: Fix overloading? *) - (* qpat_assum, ... *) - | AsmMatchTactic of (term quotation -> (thm -> tactic) -> tactic) - (* qspec_then, ... *) - | QuotSpecThmTactic of (term quotation -> (thm -> tactic) -> thm -> tactic) - (* qspecl_then, ... *) - | QuotListSpecThmTactic of (term quotation list -> (thm -> tactic) -> thm -> tactic); - - fun empty (_:unit) = AssocMap.Leaf; - - fun lookupTac (s:string) (tr:(string,tacticClos) AssocMap.tree) = - AssocMap.lookup s tr String.compare; - - fun insertTac (s:string) (t:tacticClos) (tr:(string,tacticClos) AssocMap.tree) = - AssocMap.append s t tr String.compare; - - fun insTac (s,t) = insertTac s (Tactic t); - fun insTact (s, tt) = insertTac s (Tactical tt); - fun insTacComb (s, tc) = insertTac s (TacticComb tc); - fun insTmComb (s, tc) = insertTac s (TermComb tc); - fun insThmTac (s,t) = insertTac s (ThmTactic t); - fun insQuotTac (s,t) = insertTac s (QuotTactic t); - fun insThmsTac (s,t) = insertTac s (ThmListTactic t); - fun insAsmTt (s,t) = insertTac s (AsmTestTactic t); - fun insAsmMt (s,t) = insertTac s (AsmMatchTactic t); - fun insQuotSpecTac (s,t) = insertTac s (QuotSpecThmTactic t) - fun insQuotListSpecTac (s,t) = insertTac s (QuotListSpecThmTactic t); - - fun appendTacs tf s = fn t => foldl (fn (e,t) => tf e t) t s; - - (* Define a standard Lassie Tree that has rudimentary support for the most - common tactics *) - val stdTree = - appendTacs insTac - [("cheat", cheat), ("strip_tac", strip_tac), ("gen_tac", gen_tac), - ("Cases", Cases), ("Induct", Induct), ("res_tac", res_tac), - ("conj_tac", conj_tac), ("all_tac", all_tac), ("NO_TAC", NO_TAC), - ("EQ_TAC", EQ_TAC), ("CCONTR_TAC", CCONTR_TAC), - ("AP_THM_TAC", AP_THM_TAC), ("AP_TERM_TAC", AP_TERM_TAC)] - (empty()) - |> appendTacs insTact [("rpt", rpt), ("TRY", TRY)] - |> appendTacs insTacComb [("THEN",op THEN), ("ORELSE", op ORELSE)] - |> appendTacs insTmComb [("by", op by), ("suffices_by", op suffices_by)] - |> appendTacs insThmTac - [("imp_res_tac", imp_res_tac), ("assume_tac", assume_tac), - ("irule", irule), ("drule", drule), ("match_mp_tac", match_mp_tac), - ("mp_tac", mp_tac)] - |> appendTacs insQuotTac - [("Cases_on", Cases_on), ("Induct_on", Induct_on), - ("completeInduct_on", completeInduct_on), ("qexists_tac", qexists_tac), - ("sg", sg), ("subgoal", subgoal)] - |> appendTacs insThmsTac - [("asm_rewrite_tac", asm_rewrite_tac), ("rewrite_tac", rewrite_tac), - ("once_rewrite_tac", once_rewrite_tac), - ("once_asm_rewrite_tac", once_asm_rewrite_tac), ("simp", simp), - ("fs", fs), ("rfs", rfs), ("rw", rw), ("metis_tac", metis_tac), - ("MESON_TAC", MESON_TAC)] - |> appendTacs insAsmTt - [("first_x_assum", first_x_assum), ("first_assum", first_assum), - ("last_x_assum", last_x_assum), ("last_assum", last_assum), - ("spose_not_then", spose_not_then), ("pop_assum", pop_assum) ] - |> appendTacs insAsmMt - [("qpat_x_assum", qpat_x_assum), ("qpat_assum", qpat_assum)] - |> appendTacs insQuotSpecTac [("qspec_then", qspec_then)] - |> appendTacs insQuotListSpecTac [("qspecl_then", qspecl_then)]; - -end; diff --git a/examples/lassie/src/init.sh b/examples/lassie/src/init.sh deleted file mode 100755 index f883cb762f..0000000000 --- a/examples/lassie/src/init.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -if [ "$LASSIEDIR" == "" ]; then - export LASSIEDIR=../ #recovery attempt -fi - -if [[ -r "$LASSIEDIR/sempre/interactive/lassie.lexicon" ]] -then - echo "SEMPRE already initialized skipping intialization" -else - cd $LASSIEDIR/sempre - echo "Initialization running" - ./pull-dependencies core interactive - #ant core interactive - echo "Initialization done" - cd $LASSIEDIR/src -fi diff --git a/src/parallel_builds/core/Holmakefile b/src/parallel_builds/core/Holmakefile index f071ceac5b..c1c98b233e 100644 --- a/src/parallel_builds/core/Holmakefile +++ b/src/parallel_builds/core/Holmakefile @@ -40,7 +40,6 @@ EXDIRS = arm/arm6-verification arm/armv8-memory-model arm/experimental \ imperative ind_def \ l3-machine-code/common \ lambda \ -# lassie \ logic logic/folcompactness logic/modal-models logic/modal-tableaux \ logic/ncfolproofs logic/propositional_logic logic/relevant-logic \ misc parity rings set-theory/zfset set-theory/vbg zipper