Added more tests; fixed run file path clash.

castorini · Dec 20, 2023 · de53f6c · de53f6c
1 parent c359d44
commit de53f6c
Show file tree

Hide file tree

Showing 8 changed files with 156 additions and 57 deletions.
diff --git a/docs/regressions/regressions-msmarco-passage-ca.md b/docs/regressions/regressions-msmarco-passage-ca.md
@@ -21,11 +21,11 @@ Typical indexing command:
 ```
 target/appassembler/bin/IndexCollection \
   -collection JsonCollection \
-  -input /path/to/msmarco-wp \
+  -input /path/to/msmarco-passage \
   -generator DefaultLuceneDocumentGenerator \
   -index indexes/lucene-index.msmarco-passage-ca/ \
   -threads 9 -storePositions -storeDocvectors -storeRaw -analyzeWithHuggingFaceTokenizer bert-base-uncased -useCompositeAnalyzer \
-  >& logs/log.msmarco-wp &
+  >& logs/log.msmarco-passage &
 ```
 
 The directory `/path/to/msmarco-passage-wp/` should be a directory containing the corpus in Anserini's jsonl format.
@@ -44,17 +44,17 @@ target/appassembler/bin/SearchCollection \
   -index indexes/lucene-index.msmarco-passage-ca/ \
   -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \
   -topicReader TsvInt \
-  -output runs/run.msmarco-wp.bm25-default.topics.msmarco-passage.dev-subset.txt \
-  -bm25  -analyzeWithHuggingFaceTokenizer bert-base-uncased -useCompositeAnalyzer &
+  -output runs/run.msmarco-passage.bm25-default.topics.msmarco-passage.dev-subset.txt \
+  -bm25 -analyzeWithHuggingFaceTokenizer bert-base-uncased -useCompositeAnalyzer &
 ```
 
 Evaluation can be performed using `trec_eval`:
 
 ```
-tools/eval/trec_eval.9.0.4/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-wp.bm25-default.topics.msmarco-passage.dev-subset.txt
-tools/eval/trec_eval.9.0.4/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-wp.bm25-default.topics.msmarco-passage.dev-subset.txt
-tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-wp.bm25-default.topics.msmarco-passage.dev-subset.txt
-tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-wp.bm25-default.topics.msmarco-passage.dev-subset.txt
+tools/eval/trec_eval.9.0.4/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.bm25-default.topics.msmarco-passage.dev-subset.txt
+tools/eval/trec_eval.9.0.4/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.bm25-default.topics.msmarco-passage.dev-subset.txt
+tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.bm25-default.topics.msmarco-passage.dev-subset.txt
+tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.bm25-default.topics.msmarco-passage.dev-subset.txt
 ```
 
 ## Effectiveness

diff --git a/docs/regressions/regressions-msmarco-passage-hgf-wp.md b/docs/regressions/regressions-msmarco-passage-hgf-wp.md
@@ -23,11 +23,11 @@ Typical indexing command:
 ```
 target/appassembler/bin/IndexCollection \
   -collection JsonCollection \
-  -input /path/to/msmarco-wp \
+  -input /path/to/msmarco-passage \
   -generator DefaultLuceneDocumentGenerator \
   -index indexes/lucene-index.msmarco-passage-hgf-wp/ \
   -threads 9 -storePositions -storeDocvectors -storeRaw -analyzeWithHuggingFaceTokenizer bert-base-uncased \
-  >& logs/log.msmarco-wp &
+  >& logs/log.msmarco-passage &
 ```
 
 The directory `/path/to/msmarco-passage-wp/` should be a directory containing the corpus in Anserini's jsonl format.
@@ -46,17 +46,17 @@ target/appassembler/bin/SearchCollection \
   -index indexes/lucene-index.msmarco-passage-hgf-wp/ \
   -topics tools/topics-and-qrels/topics.msmarco-passage.dev-subset.txt \
   -topicReader TsvInt \
-  -output runs/run.msmarco-wp.bm25-default.topics.msmarco-passage.dev-subset.txt \
-  -bm25  -analyzeWithHuggingFaceTokenizer  bert-base-uncased &
+  -output runs/run.msmarco-passage.bm25-default.topics.msmarco-passage.dev-subset.txt \
+  -bm25 -analyzeWithHuggingFaceTokenizer  bert-base-uncased &
 ```
 
 Evaluation can be performed using `trec_eval`:
 
 ```
-tools/eval/trec_eval.9.0.4/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-wp.bm25-default.topics.msmarco-passage.dev-subset.txt
-tools/eval/trec_eval.9.0.4/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-wp.bm25-default.topics.msmarco-passage.dev-subset.txt
-tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-wp.bm25-default.topics.msmarco-passage.dev-subset.txt
-tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-wp.bm25-default.topics.msmarco-passage.dev-subset.txt
+tools/eval/trec_eval.9.0.4/trec_eval -c -m map tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.bm25-default.topics.msmarco-passage.dev-subset.txt
+tools/eval/trec_eval.9.0.4/trec_eval -c -M 10 -m recip_rank tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.bm25-default.topics.msmarco-passage.dev-subset.txt
+tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.100 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.bm25-default.topics.msmarco-passage.dev-subset.txt
+tools/eval/trec_eval.9.0.4/trec_eval -c -m recall.1000 tools/topics-and-qrels/qrels.msmarco-passage.dev-subset.txt runs/run.msmarco-passage.bm25-default.topics.msmarco-passage.dev-subset.txt
 ```
 
 ## Effectiveness

diff --git a/src/main/java/io/anserini/search/SearchCollection.java b/src/main/java/io/anserini/search/SearchCollection.java
@@ -235,7 +235,7 @@ public static class Args extends BaseSearchArgs {
     // ----------------------------------------------------------
 
     @Option(name = "-impact",
-        forbids = {"-bm25", "-qld", "-qljm", "-inl2", "-spl", "-f2exp", "-f2log"},
+        forbids = {"-bm25", "-bm25.accurate", "-qld", "-qljm", "-inl2", "-spl", "-f2exp", "-f2log"},
         usage = "ranking model: BM25")
     public boolean impact = false;
 
@@ -244,11 +244,13 @@ public static class Args extends BaseSearchArgs {
     // -------------------
 
     @Option(name = "-bm25",
-        forbids = {"-impact", "-qld", "-qljm", "-inl2", "-spl", "-f2exp", "-f2log"},
+        forbids = {"-impact", "-bm25.accurate", "-qld", "-qljm", "-inl2", "-spl", "-f2exp", "-f2log"},
         usage = "ranking model: BM25")
     public boolean bm25 = false;
 
-    @Option(name = "-bm25.accurate", usage = "BM25: use accurate document lengths")
+    @Option(name = "-bm25.accurate",
+        forbids = {"-impact", "-bm25", "-qld", "-qljm", "-inl2", "-spl", "-f2exp", "-f2log"},
+        usage = "BM25: use accurate document lengths")
     public boolean bm25Accurate = false;
 
     // BM25 parameters: Robertson et al. (TREC 4) propose the range of 1.0-2.0 for k1 and 0.6-0.75 for b, with k1 = 1.2
@@ -269,7 +271,7 @@ public static class Args extends BaseSearchArgs {
     // --------------------------------------------------------
 
     @Option(name = "-qld",
-        forbids = {"-impact", "-bm25", "-qljm", "-inl2", "-spl", "-f2exp", "-f2log"},
+        forbids = {"-impact", "-bm25", "-bm25.accurate", "-qljm", "-inl2", "-spl", "-f2exp", "-f2log"},
         usage = "ranking model: query likelihood with Dirichlet smoothing")
     public boolean qld = false;
 
@@ -289,7 +291,7 @@ public static class Args extends BaseSearchArgs {
     // -------------------------------------------------------------
 
     @Option(name = "-qljm",
-        forbids = {"-impact", "-bm25", "-qld", "-inl2", "-spl", "-f2exp", "-f2log"},
+        forbids = {"-impact", "-bm25", "-bm25.accurate", "-qld", "-inl2", "-spl", "-f2exp", "-f2log"},
         usage = "ranking model: query likelihood with Jelinek-Mercer smoothing")
     public boolean qljm = false;
 
@@ -301,31 +303,31 @@ public static class Args extends BaseSearchArgs {
     // -----------------------------------------
 
     @Option(name = "-inl2",
-        forbids = {"-impact", "bm25", "-qld", "-qljm", "-spl", "-f2exp", "-f2log"},
+        forbids = {"-impact", "bm25", "-bm25.accurate", "-qld", "-qljm", "-spl", "-f2exp", "-f2log"},
         usage = "use I(n)L2 scoring model")
     public boolean inl2 = false;
 
     @Option(name = "-inl2.c", metaVar = "[value]", usage = "I(n)L2 c parameter")
     public String[] inl2_c = new String[]{"0.1"};
 
     @Option(name = "-spl",
-        forbids = {"-impact", "bm25", "-qld", "-qljm", "-inl2", "-f2exp", "-f2log"},
+        forbids = {"-impact", "bm25", "-bm25.accurate", "-qld", "-qljm", "-inl2", "-f2exp", "-f2log"},
         usage = "use SPL scoring model")
     public boolean spl = false;
 
     @Option(name = "-spl.c", metaVar = "[value]", usage = "SPL c parameter")
     public String[] spl_c = new String[]{"0.1"};
 
     @Option(name = "-f2exp",
-        forbids = {"-impact", "bm25", "-qld", "-qljm", "-inl2", "-spl", "-f2log"},
+        forbids = {"-impact", "bm25", "-bm25.accurate", "-qld", "-qljm", "-inl2", "-spl", "-f2log"},
         usage = "use F2Exp scoring model")
     public boolean f2exp = false;
 
     @Option(name = "-f2exp.s", metaVar = "[value]", usage = "F2Exp s parameter")
     public String[] f2exp_s = new String[]{"0.5"};
 
     @Option(name = "-f2log",
-        forbids = {"-impact", "bm25", "-qld", "-qljm", "-inl2", "-spl", "-f2exp"},
+        forbids = {"-impact", "bm25", "-bm25.accurate", "-qld", "-qljm", "-inl2", "-spl", "-f2exp"},
         usage = "use F2Log scoring model")
     public boolean f2log = false;
 
@@ -503,6 +505,7 @@ public static class Args extends BaseSearchArgs {
     public Args impact() {
       this.impact = true;
       this.bm25 = false;
+      this.bm25Accurate = false;
       this.qld = false;
       this.qljm = false;
       this.inl2 = false;
@@ -516,6 +519,21 @@ public Args impact() {
     public Args bm25() {
       this.impact = false;
       this.bm25 = true;
+      this.bm25Accurate = false;
+      this.qld = false;
+      this.qljm = false;
+      this.inl2 = false;
+      this.spl = false;
+      this.f2exp = false;
+      this.f2log = false;
+
+      return this;
+    }
+
+    public Args bm25Accurate() {
+      this.impact = false;
+      this.bm25 = false;
+      this.bm25Accurate = true;
       this.qld = false;
       this.qljm = false;
       this.inl2 = false;
@@ -529,6 +547,7 @@ public Args bm25() {
     public Args qld() {
       this.impact = false;
       this.bm25 = false;
+      this.bm25Accurate = false;
       this.qld = true;
       this.qljm = false;
       this.inl2 = false;
@@ -542,6 +561,7 @@ public Args qld() {
     public Args qljm() {
       this.impact = false;
       this.bm25 = false;
+      this.bm25Accurate = false;
       this.qld = false;
       this.qljm = true;
       this.inl2 = false;
@@ -555,6 +575,7 @@ public Args qljm() {
     public Args inl2() {
       this.impact = false;
       this.bm25 = false;
+      this.bm25Accurate = false;
       this.qld = false;
       this.qljm = false;
       this.inl2 = true;
@@ -568,6 +589,7 @@ public Args inl2() {
     public Args spl() {
       this.impact = false;
       this.bm25 = false;
+      this.bm25Accurate = false;
       this.qld = false;
       this.qljm = false;
       this.inl2 = false;
@@ -581,6 +603,7 @@ public Args spl() {
     public Args f2exp() {
       this.impact = false;
       this.bm25 = false;
+      this.bm25Accurate = false;
       this.qld = false;
       this.qljm = false;
       this.inl2 = false;
@@ -594,6 +617,7 @@ public Args f2exp() {
     public Args f2log() {
       this.impact = false;
       this.bm25 = false;
+      this.bm25Accurate = false;
       this.qld = false;
       this.qljm = false;
       this.inl2 = false;

diff --git a/src/main/python/run_regression.py b/src/main/python/run_regression.py
@@ -137,8 +137,10 @@ def construct_indexing_command(yaml_data, args):
     return index_command
 
 
-def construct_runfile_path(corpus, id, model_name):
-    return os.path.join('runs/', 'run.{0}.{1}.{2}'.format(corpus, id, model_name))
+def construct_runfile_path(index, id, model_name):
+    # if the index is 'indexes/lucene-index.msmarco-passage-ca/', we pull out 'msmarco-passage-ca'
+    index_part = index.split('/')[1].split('.')[1]
+    return os.path.join('runs/', 'run.{0}.{1}.{2}'.format(index_part, id, model_name))
 
 
 def construct_search_commands(yaml_data):
@@ -148,7 +150,7 @@ def construct_search_commands(yaml_data):
             '-index', construct_index_path(yaml_data),
             '-topics', os.path.join('tools/topics-and-qrels', topic_set['path']),
             '-topicReader', topic_set['topic_reader'] if 'topic_reader' in topic_set and topic_set['topic_reader'] else yaml_data['topic_reader'],
-            '-output', construct_runfile_path(yaml_data['corpus'], topic_set['id'], model['name']),
+            '-output', construct_runfile_path(yaml_data['index_path'], topic_set['id'], model['name']),
             model['params']
         ]
         for (model, topic_set) in list(itertools.product(yaml_data['models'], yaml_data['topics']))
@@ -162,8 +164,8 @@ def construct_convert_commands(yaml_data):
             conversion['command'],
             '--index', construct_index_path(yaml_data),
             '--topics', topic_set['id'],
-            '--input', construct_runfile_path(yaml_data['corpus'], topic_set['id'], model['name']) + conversion['in_file_ext'],
-            '--output', construct_runfile_path(yaml_data['corpus'], topic_set['id'], model['name']) + conversion['out_file_ext'],
+            '--input', construct_runfile_path(yaml_data['index_path'], topic_set['id'], model['name']) + conversion['in_file_ext'],
+            '--output', construct_runfile_path(yaml_data['index_path'], topic_set['id'], model['name']) + conversion['out_file_ext'],
             conversion['params'] if 'params' in conversion and conversion['params'] else '',
             topic_set['convert_params'] if 'convert_params' in topic_set and topic_set['convert_params'] else '',
         ]
@@ -186,7 +188,7 @@ def evaluate_and_verify(yaml_data, dry_run):
                 eval_cmd = [
                   os.path.join(metric['command']), metric['params'] if 'params' in metric and metric['params'] else '',
                   os.path.join('tools/topics-and-qrels', topic_set['qrel']) if 'qrel' in topic_set and topic_set['qrel'] else '',
-                  construct_runfile_path(yaml_data['corpus'], topic_set['id'], model['name']) + (yaml_data['conversions'][-1]['out_file_ext'] if 'conversions' in yaml_data and yaml_data['conversions'][-1]['out_file_ext'] else '')
+                  construct_runfile_path(yaml_data['index_path'], topic_set['id'], model['name']) + (yaml_data['conversions'][-1]['out_file_ext'] if 'conversions' in yaml_data and yaml_data['conversions'][-1]['out_file_ext'] else '')
                 ]
                 if dry_run:
                     logger.info(' '.join(eval_cmd))

diff --git a/src/main/resources/regression/msmarco-passage-ca.yaml b/src/main/resources/regression/msmarco-passage-ca.yaml
@@ -1,5 +1,5 @@
 ---
-corpus: msmarco-wp
+corpus: msmarco-passage
 corpus_path: collections/msmarco/passage
 
 index_path: indexes/lucene-index.msmarco-passage-ca/
@@ -52,7 +52,7 @@ topics:
 models:
   - name: bm25-default
     display: BM25 (default)
-    params: -bm25  -analyzeWithHuggingFaceTokenizer bert-base-uncased -useCompositeAnalyzer
+    params: -bm25 -analyzeWithHuggingFaceTokenizer bert-base-uncased -useCompositeAnalyzer
     results:
       AP@1000:
         - 0.1968

diff --git a/src/main/resources/regression/msmarco-passage-hgf-wp.yaml b/src/main/resources/regression/msmarco-passage-hgf-wp.yaml
@@ -1,5 +1,5 @@
 ---
-corpus: msmarco-wp
+corpus: msmarco-passage
 corpus_path: collections/msmarco/passage
 
 index_path: indexes/lucene-index.msmarco-passage-hgf-wp/
@@ -52,7 +52,7 @@ topics:
 models:
   - name: bm25-default
     display: BM25 (default)
-    params: -bm25  -analyzeWithHuggingFaceTokenizer  bert-base-uncased
+    params: -bm25 -analyzeWithHuggingFaceTokenizer  bert-base-uncased
     results:
       AP@1000:
         - 0.1836

diff --git a/src/test/java/io/anserini/integration/TrecEndToEndTest.java b/src/test/java/io/anserini/integration/TrecEndToEndTest.java
@@ -18,6 +18,7 @@
 
 import io.anserini.collection.TrecCollection;
 import io.anserini.index.IndexCollection;
+import io.anserini.search.SearchCollection;
 
 import java.util.Arrays;
 import java.util.List;
@@ -70,12 +71,9 @@ protected void setCheckIndexGroundTruth() {
             "text\n" +
             "</TEXT>"));
 
-    referenceDocTokens.put("TREC_DOC_1", Map.of(
-        "contents", Arrays.asList(new String[]{null, null, "head", "veri", "simpl", "text"})));
-    referenceDocTokens.put("WSJ_1", Map.of(
-        "contents", List.of("head", "text", "01", "30", "03", "content")));
-    referenceDocTokens.put("DOC222", Map.of(
-        "contents", List.of("head", "simpl", "enough", "text", "text", "text")));
+    referenceDocTokens.put("TREC_DOC_1", Map.of("contents", Arrays.asList(null, null, "head", "veri", "simpl", "text")));
+    referenceDocTokens.put("WSJ_1", Map.of("contents", List.of("head", "text", "01", "30", "03", "content")));
+    referenceDocTokens.put("DOC222", Map.of("contents", List.of("head", "simpl", "enough", "text", "text", "text")));
 
     fieldNormStatusTotalFields = 1;  // text
     termIndexStatusTermCount = 12;   // Note that standard analyzer ignores stopwords; includes docids.
@@ -97,6 +95,36 @@ protected void setSearchGroundTruth() {
         "1 Q0 TREC_DOC_1 2 0.333400 Anserini",
         "1 Q0 WSJ_1 3 0.068700 Anserini"});
 
+    SearchCollection.Args argsRm3 = createDefaultSearchArgs().bm25();
+    argsRm3.rm3 = true;
+    testQueries.put("bm25.rm3", argsRm3);
+    referenceRunOutput.put("bm25.rm3", new String[]{
+        "1 Q0 DOC222 1 0.085800 Anserini",
+        "1 Q0 TREC_DOC_1 2 0.083400 Anserini",
+        "1 Q0 WSJ_1 3 0.017200 Anserini"});
+
+    SearchCollection.Args argsRocchio = createDefaultSearchArgs().bm25();
+    argsRocchio.rocchio = true;
+    testQueries.put("bm25.rocchio", argsRocchio);
+    referenceRunOutput.put("bm25.rocchio", new String[]{
+        "1 Q0 DOC222 1 0.242700 Anserini",
+        "1 Q0 TREC_DOC_1 2 0.235800 Anserini",
+        "1 Q0 WSJ_1 3 0.048500 Anserini"});
+
+    SearchCollection.Args argsBM25prf = createDefaultSearchArgs().bm25();
+    argsBM25prf.bm25prf = true;
+    testQueries.put("bm25.bm25prf", argsBM25prf);
+    referenceRunOutput.put("bm25.bm25prf", new String[]{
+        "1 Q0 DOC222 1 1.942500 Anserini",
+        "1 Q0 TREC_DOC_1 2 1.572300 Anserini",
+        "1 Q0 WSJ_1 3 1.200600 Anserini"});
+
+    testQueries.put("bm25Accurate", createDefaultSearchArgs().bm25Accurate());
+    referenceRunOutput.put("bm25Accurate", new String[]{
+        "1 Q0 DOC222 1 0.343200 Anserini",
+        "1 Q0 TREC_DOC_1 2 0.333400 Anserini",
+        "1 Q0 WSJ_1 3 0.068700 Anserini"});
+
     testQueries.put("qld", createDefaultSearchArgs().qld());
     referenceRunOutput.put("qld", new String[]{
         "1 Q0 DOC222 1 0.002500 Anserini",