diff --git a/lib/lucene_query_parser/parser.rb b/lib/lucene_query_parser/parser.rb index c3c115b..253928e 100644 --- a/lib/lucene_query_parser/parser.rb +++ b/lib/lucene_query_parser/parser.rb @@ -29,7 +29,7 @@ def initialize(args={}) # must define :term rule at run-time so that it can include # the term_re_str self.class.rule :term do - match[term_re_str].repeat(1).as(:term) >> (fuzzy | boost).maybe + ( (escape_special_words | match[term_re_str]).repeat(1) ).as(:term) >> (fuzzy | boost).maybe end else self.class.rule :term do @@ -44,16 +44,16 @@ def initialize(args={}) rule :expr do space.maybe >> - operand >> (space >> (operator >> space >> operand | operand)).repeat >> + operand >> (space.maybe >> (operator >> space.maybe >> operand | operand)).repeat >> space.maybe end rule :operator do - str('AND').as(:op) | str('OR').as(:op) + str('AND').as(:op) | str('OR').as(:op) | str('&&').as(:op) | str('||').as(:op) end rule :operand do - unary_operator.maybe >> ( + unary_operator.maybe >> space.maybe >> ( group | field | term | @@ -70,12 +70,9 @@ def initialize(args={}) (distance | boost).maybe end - rule :distance do - str('~') >> match['0-9'].repeat(1).as(:distance) - end - rule :group do - str('(') >> space.maybe >> expr.as(:group) >> space.maybe >> str(')') + str('(') >> space.maybe >> expr.as(:group) >> space.maybe >> str(')') >> + boost.maybe end rule :field do @@ -102,16 +99,21 @@ def initialize(args={}) rule :unary_operator do str('+').as(:required) | str('-').as(:prohibited) | + str('!').as(:prohibited) | (str('NOT').as(:op) >> space) end + rule :distance do + space.maybe >> str('~') >> space.maybe >> match['0-9'].repeat(1).as(:distance) + end + rule :fuzzy do - str('~') >> + space.maybe >> str('~') >> ( str('0.') >> match['0-9'].repeat(1) | match['01'] ).maybe.as(:similarity) end rule :boost do - str('^') >> ( + space.maybe >> str('^') >> space.maybe >> ( str('0.') >> match['0-9'].repeat(1) | match['0-9'].repeat(1) ).as(:boost) @@ -130,7 +132,7 @@ def initialize(args={}) end rule :space do - match["\n \t"].repeat(1) + match["\n \t\u00a0\u200B"].repeat(1) end end diff --git a/spec/lucene_query_parser/parser_spec.rb b/spec/lucene_query_parser/parser_spec.rb index c487102..f0aeb61 100644 --- a/spec/lucene_query_parser/parser_spec.rb +++ b/spec/lucene_query_parser/parser_spec.rb @@ -62,12 +62,56 @@ def show_err(input, location) ) end + it "parses a nearness query (forgiving)" do + should parse(%q("foo bar" ~2)).as( + {:phrase => "foo bar", :distance => "2"} + ) + end + + it "parses a nearness query (even more forgiving)" do + should parse(%q("foo bar" ~ 2)).as( + {:phrase => "foo bar", :distance => "2"} + ) + end + it "parses a paren grouping" do should parse(%q((foo bar))).as( {:group => [{:term => "foo"}, {:term => "bar"}]} ) end + it "parses grouping side by side with space" do + should parse('(foo bar) (lorem ipsum)').as([ + {:group => [{:term => "foo"}, {:term => "bar"}]}, + {:group => [{:term => "lorem"}, {:term => "ipsum"}]} + ]) + end + + it "parses grouping side by side with no space" do + should parse('(foo bar)(lorem ipsum)').as([ + {:group => [{:term => "foo"}, {:term => "bar"}]}, + {:group => [{:term => "lorem"}, {:term => "ipsum"}]} + ]) + end + + it "parses boosts in groupings" do + should parse('(foo bar)^5').as( + {:group => [{:term => "foo"}, {:term => "bar"}], :boost => "5"} + ) + end + + it "parses boosts in groupings (forgiving)" do + should parse('(foo bar) ^5').as( + {:group => [{:term => "foo"}, {:term => "bar"}], :boost => "5"} + ) + end + + it "parses boosts in groupings (even more forgiving)" do + should parse('(foo bar) ^ 5').as( + {:group => [{:term => "foo"}, {:term => "bar"}], :boost => "5"} + ) + end + it "parses nested paren groups" do should parse(%q((foo (bar (baz))))).as( {:group => [ @@ -84,10 +128,32 @@ def show_err(input, location) should parse("+foo").as({:term => "foo", :required => "+"}) end + it "parses a required term (lenient)" do + should parse("+ foo").as({:term => "foo", :required => "+"}) + end + + it "parses a required term (lenient) v2" do + should parse("foo + bar").as([ + {:term => "foo"}, + {:term => "bar", :required => "+"} + ]) + end + it "parses a prohibited term" do should parse("-foo").as({:term => "foo", :prohibited => "-"}) end + it "parses a prohibited term (lenient)" do + should parse("- foo").as({:term => "foo", :prohibited => "-"}) + end + + it "parses a prohibited term (lenient) v2" do + should parse("foo - bar").as([ + {:term => "foo"}, + {:term => "bar", :prohibited => "-"} + ]) + end + it "parses prohibited groups and phrases" do should parse(%q(+(foo bar) -"mumble stuff")).as [ {:group => [{:term => "foo"}, {:term => "bar"}], :required => "+"}, @@ -114,6 +180,20 @@ def show_err(input, location) ] end + it "parses && groupings" do + should parse(%q(foo && bar)).as [ + {:term => "foo"}, + {:op => "&&", :term => "bar"} + ] + end + + it "parses || groupings" do + should parse(%q(foo || bar)).as [ + {:term => "foo"}, + {:op => "||", :term => "bar"} + ] + end + it "parses a sequence of AND and OR" do should parse(%q(foo AND bar OR baz OR mumble)).as [ {:term => "foo"}, @@ -130,6 +210,41 @@ def show_err(input, location) ] end + it "parses NOTs with a group" do + should parse("foo NOT (bar coca)").as [ + {:term => "foo"}, + {:group => [{:term => "bar"}, {:term => "coca"}], :op => "NOT"} + ] + end + + it "parses negation in terms" do + should parse("foo !bar").as [ + {:term => "foo"}, + {:term => "bar", :prohibited => "!"} + ] + end + + it "parses negation in groupings" do + should parse('!(foo bar)^5').as( + {:group => [{:term => "foo"}, {:term => "bar"}], :prohibited => "!", :boost => "5"} + ) + end + + it "parses negation in phrases" do + q = %q(!"foo bar" isn't one) + should parse(q).as [ + {:phrase => "foo bar", :prohibited => "!"}, + {:term => "isn't"}, + {:term => "one"} + ] + end + + it "parses negation in field:value" do + should parse("!title:foo").as( + {:field => "title", :term => "foo", :prohibited => "!"} + ) + end + it "parses field:value" do should parse("title:foo").as( {:field => "title", :term => "foo"} @@ -175,6 +290,18 @@ def show_err(input, location) ) end + it "parses a boost on phrase" do + should parse('"some phrase"^3').as( + {:phrase => "some phrase", :boost => "3"} + ) + end + + it "parses a boost on phrase (forgiving)" do + should parse('"some phrase" ^3').as( + {:phrase => "some phrase", :boost => "3"} + ) + end + it { should parse('year:[2010 TO 2011]').as( {:field => "year", :inclusive_range => {:from => "2010", :to => "2011"}} ) } @@ -249,6 +376,12 @@ def show_err(input, location) should parse('fo?').as( {:term => 'fo?'} ) end + it "parses non-breaking space" do + should parse("foo bar").as [ # do not be fooled, there is a non-breaking space between foo and bar + {:term => "foo"}, + {:term => "bar"}, + ] + end end describe "#error_location" do