Skip to content

Commit 7b5b648

Browse files
committed
Fixed bglinking.
1 parent aeea053 commit 7b5b648

File tree

7 files changed

+26
-20
lines changed

7 files changed

+26
-20
lines changed

docs/regressions/regressions-backgroundlinking18.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,21 +47,21 @@ target/appassembler/bin/SearchCollection \
4747
-topics tools/topics-and-qrels/topics.backgroundlinking18.txt \
4848
-topicReader BackgroundLinking \
4949
-output runs/run.wapo.v2.bm25.topics.backgroundlinking18.txt \
50-
-backgroundlinking -backgroundlinking.k 100 -bm25 -hits 100 &
50+
-backgroundLinking -backgroundLinking.k 100 -bm25 -hits 100 &
5151
5252
target/appassembler/bin/SearchCollection \
5353
-index indexes/lucene-index.wapo.v2/ \
5454
-topics tools/topics-and-qrels/topics.backgroundlinking18.txt \
5555
-topicReader BackgroundLinking \
5656
-output runs/run.wapo.v2.bm25+rm3.topics.backgroundlinking18.txt \
57-
-backgroundlinking -backgroundlinking.k 100 -bm25 -rm3 -hits 100 &
57+
-backgroundLinking -backgroundLinking.k 100 -bm25 -rm3 -hits 100 &
5858
5959
target/appassembler/bin/SearchCollection \
6060
-index indexes/lucene-index.wapo.v2/ \
6161
-topics tools/topics-and-qrels/topics.backgroundlinking18.txt \
6262
-topicReader BackgroundLinking \
6363
-output runs/run.wapo.v2.bm25+rm3+df.topics.backgroundlinking18.txt \
64-
-backgroundlinking -backgroundlinking.datefilter -backgroundlinking.k 100 -bm25 -rm3 -hits 100 &
64+
-backgroundLinking -backgroundLinking.dateFilter -backgroundLinking.k 100 -bm25 -rm3 -hits 100 &
6565
```
6666

6767
Evaluation can be performed using `trec_eval`:

docs/regressions/regressions-backgroundlinking19.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,21 +47,21 @@ target/appassembler/bin/SearchCollection \
4747
-topics tools/topics-and-qrels/topics.backgroundlinking19.txt \
4848
-topicReader BackgroundLinking \
4949
-output runs/run.wapo.v2.bm25.topics.backgroundlinking19.txt \
50-
-backgroundlinking -backgroundlinking.k 100 -bm25 -hits 100 &
50+
-backgroundLinking -backgroundLinking.k 100 -bm25 -hits 100 &
5151
5252
target/appassembler/bin/SearchCollection \
5353
-index indexes/lucene-index.wapo.v2/ \
5454
-topics tools/topics-and-qrels/topics.backgroundlinking19.txt \
5555
-topicReader BackgroundLinking \
5656
-output runs/run.wapo.v2.bm25+rm3.topics.backgroundlinking19.txt \
57-
-backgroundlinking -backgroundlinking.k 100 -bm25 -rm3 -hits 100 &
57+
-backgroundLinking -backgroundLinking.k 100 -bm25 -rm3 -hits 100 &
5858
5959
target/appassembler/bin/SearchCollection \
6060
-index indexes/lucene-index.wapo.v2/ \
6161
-topics tools/topics-and-qrels/topics.backgroundlinking19.txt \
6262
-topicReader BackgroundLinking \
6363
-output runs/run.wapo.v2.bm25+rm3+df.topics.backgroundlinking19.txt \
64-
-backgroundlinking -backgroundlinking.datefilter -backgroundlinking.k 100 -bm25 -rm3 -hits 100 &
64+
-backgroundLinking -backgroundLinking.dateFilter -backgroundLinking.k 100 -bm25 -rm3 -hits 100 &
6565
```
6666

6767
Evaluation can be performed using `trec_eval`:

docs/regressions/regressions-backgroundlinking20.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,21 +47,21 @@ target/appassembler/bin/SearchCollection \
4747
-topics tools/topics-and-qrels/topics.backgroundlinking20.txt \
4848
-topicReader BackgroundLinking \
4949
-output runs/run.wapo.v3.bm25.topics.backgroundlinking20.txt \
50-
-backgroundlinking -backgroundlinking.k 100 -bm25 -hits 100 &
50+
-backgroundLinking -backgroundLinking.k 100 -bm25 -hits 100 &
5151
5252
target/appassembler/bin/SearchCollection \
5353
-index indexes/lucene-index.wapo.v3/ \
5454
-topics tools/topics-and-qrels/topics.backgroundlinking20.txt \
5555
-topicReader BackgroundLinking \
5656
-output runs/run.wapo.v3.bm25+rm3.topics.backgroundlinking20.txt \
57-
-backgroundlinking -backgroundlinking.k 100 -bm25 -rm3 -hits 100 &
57+
-backgroundLinking -backgroundLinking.k 100 -bm25 -rm3 -hits 100 &
5858
5959
target/appassembler/bin/SearchCollection \
6060
-index indexes/lucene-index.wapo.v3/ \
6161
-topics tools/topics-and-qrels/topics.backgroundlinking20.txt \
6262
-topicReader BackgroundLinking \
6363
-output runs/run.wapo.v3.bm25+rm3+df.topics.backgroundlinking20.txt \
64-
-backgroundlinking -backgroundlinking.datefilter -backgroundlinking.k 100 -bm25 -rm3 -hits 100 &
64+
-backgroundLinking -backgroundLinking.dateFilter -backgroundLinking.k 100 -bm25 -rm3 -hits 100 &
6565
```
6666

6767
Evaluation can be performed using `trec_eval`:

src/main/java/io/anserini/rerank/lib/NewsBackgroundLinkingReranker.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,18 @@ public class NewsBackgroundLinkingReranker implements Reranker {
4848
private final Class parser;
4949

5050
public NewsBackgroundLinkingReranker(Analyzer analyzer, Class parser) {
51+
assert analyzer != null;
52+
assert parser != null;
53+
5154
this.analyzer = analyzer;
5255
this.parser = parser;
53-
5456
}
5557

5658
@Override
5759
public ScoredDocs rerank(ScoredDocs docs, RerankerContext context) {
60+
assert docs != null;
61+
assert context != null;
62+
5863
IndexReader reader = context.getIndexSearcher().getIndexReader();
5964
String queryDocId = context.getQueryDocId();
6065
final Map<String, Long> queryTermsMap = convertDocVectorToMap(reader, queryDocId);
@@ -99,12 +104,14 @@ public ScoredDocs rerank(ScoredDocs docs, RerankerContext context) {
99104
ScoredDocs scoredDocs = new ScoredDocs();
100105
int resSize = docs.lucene_documents.length - toRemove.size();
101106
scoredDocs.lucene_documents = new Document[resSize];
107+
scoredDocs.docids = new String[resSize];
102108
scoredDocs.lucene_docids = new int[resSize];
103109
scoredDocs.scores = new float[resSize];
104110
int idx = 0;
105111
for (int i = 0; i < docs.lucene_documents.length; i++) {
106112
if (!toRemove.contains(i)) {
107113
scoredDocs.lucene_documents[idx] = docs.lucene_documents[i];
114+
scoredDocs.docids[idx] = docs.docids[i];
108115
scoredDocs.scores[idx] = docs.scores[i];
109116
scoredDocs.lucene_docids[idx] = docs.lucene_docids[i];
110117
idx++;

src/main/java/io/anserini/search/BaseSearcher.java

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import org.apache.lucene.document.Document;
2121
import org.apache.lucene.search.IndexSearcher;
2222
import org.apache.lucene.search.TopDocs;
23-
import org.jetbrains.annotations.NotNull;
2423

2524
import java.io.IOException;
2625
import java.util.ArrayList;
@@ -119,10 +118,9 @@ public ScoredDoc[] processLuceneTopDocs(K qid, TopDocs docs) {
119118
* @param keepLuceneDocument whether to retain references to the original Lucene docs
120119
* @return processed ranked list
121120
*/
122-
public ScoredDoc[] processLuceneTopDocs(K qid, @NotNull TopDocs docs, boolean keepLuceneDocument) {
121+
public ScoredDoc[] processLuceneTopDocs(K qid, TopDocs docs, boolean keepLuceneDocument) {
123122
List<ScoredDoc> results = new ArrayList<>();
124-
// For removing duplicate docids.
125-
Set<String> docids = new HashSet<>();
123+
Set<String> docids = new HashSet<>(); // For removing duplicate docids.
126124

127125
int rank = 1;
128126
for (int i = 0; i < docs.scoreDocs.length; i++) {
@@ -194,7 +192,12 @@ public ScoredDoc[] processScoredDocs(K qid, ScoredDocs docs) {
194192
* @param keepLuceneDocument whether to retain references to the original Lucene docs
195193
* @return processed ranked list
196194
*/
197-
public ScoredDoc[] processScoredDocs(K qid, @NotNull ScoredDocs docs, boolean keepLuceneDocument) {
195+
public ScoredDoc[] processScoredDocs(K qid, ScoredDocs docs, boolean keepLuceneDocument) {
196+
assert docs.docids != null;
197+
assert docs.lucene_docids != null;
198+
assert docs.lucene_documents != null;
199+
assert docs.scores != null;
200+
198201
List<ScoredDoc> results = new ArrayList<>();
199202
// For removing duplicate docids.
200203
Set<String> docids = new HashSet<>();

src/main/java/io/anserini/search/ScoredDocs.java

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
import org.apache.lucene.search.Query;
2929
import org.apache.lucene.search.TermQuery;
3030
import org.apache.lucene.search.TopDocs;
31-
import org.mockito.internal.matchers.Null;
3231

3332
import java.io.IOException;
3433
import java.util.ArrayList;
@@ -52,8 +51,6 @@ public class ScoredDocs {
5251
public float[] scores;
5352

5453
public static ScoredDocs fromTopDocs(TopDocs rs, IndexSearcher searcher) {
55-
assert rs != null;
56-
5754
ScoredDocs scoredDocs = new ScoredDocs();
5855
scoredDocs.docids = new String[rs.scoreDocs.length];
5956
scoredDocs.lucene_documents = new Document[rs.scoreDocs.length];

src/main/java/io/anserini/search/SearchCollection.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -943,7 +943,7 @@ public void run() {
943943
out.writeTopic((T) arr[0], results.get(Integer.parseInt(arr[0])));
944944
}
945945
} catch (IOException e) {
946-
e.printStackTrace();
946+
throw new RuntimeException(String.format("Error writing output to %s", outputPath));
947947
}
948948
} else {
949949
results.forEach((qid, hits) -> out.writeTopic(qid, results.get(qid)));
@@ -1304,7 +1304,6 @@ private Analyzer getAnalyzer() {
13041304
}
13051305

13061306
@Override
1307-
@SuppressWarnings("unchecked")
13081307
public void run() {
13091308
final String runTag = args.runtag == null ? "Anserini" : args.runtag;
13101309
LOG.info("runtag: " + runTag);

0 commit comments

Comments
 (0)