diff --git a/pyterrier/__init__.py b/pyterrier/__init__.py index ee4fe2ea..a0a86002 100644 --- a/pyterrier/__init__.py +++ b/pyterrier/__init__.py @@ -29,6 +29,8 @@ # will be set in terrier.terrier.java once java is loaded IndexRef = None +# will be set in once utils.set_tqdm() once _() runs +tqdm = None # deprecated functions explored to the main namespace, which will be removed in a future version diff --git a/pyterrier/apply_base.py b/pyterrier/apply_base.py index 186a11e9..65a0dbab 100644 --- a/pyterrier/apply_base.py +++ b/pyterrier/apply_base.py @@ -1,7 +1,7 @@ from typing import Callable, Any, Union, Optional, Iterable import itertools import more_itertools -import numpy as np +import numpy.typing as npt import pandas as pd import pyterrier as pt @@ -92,7 +92,7 @@ def transform(self, inp: pd.DataFrame) -> pd.DataFrame: # batching iterator = pt.model.split_df(inp, batch_size=self.batch_size) if self.verbose: - iterator = pt.tqdm(iterator, desc="pt.apply", unit='row') + iterator = pt.tqdm(iterator, desc="pt.apply", unit='row') # type: ignore return pd.concat([self._apply_df(chunk_df) for chunk_df in iterator]) def _apply_df(self, inp: pd.DataFrame) -> pd.DataFrame: @@ -148,7 +148,7 @@ def transform(self, res: pd.DataFrame) -> pd.DataFrame: it = res.groupby("qid") lastqid = None if self.verbose: - it = pt.tqdm(it, unit='query') + it = pt.tqdm(it, unit='query') # type: ignore try: if self.batch_size is None: query_dfs = [] @@ -275,7 +275,7 @@ def transform(self, inp: pd.DataFrame) -> pd.DataFrame: iterator = pt.model.split_df(outputRes, batch_size=self.batch_size) if self.verbose: - iterator = pt.tqdm(iterator, desc="pt.apply", unit='row') + iterator = pt.tqdm(iterator, desc="pt.apply", unit='row') # type: ignore rtr = pd.concat([self._transform_batchwise(chunk_df) for chunk_df in iterator]) rtr = pt.model.add_ranks(rtr) return rtr @@ -294,7 +294,7 @@ def _feature_fn(row): pipe = pt.terrier.Retriever(index) >> pt.apply.doc_features(_feature_fn) >> pt.LTRpipeline(xgBoost()) """ def __init__(self, - fn: Callable[[Union[pd.Series, pt.model.IterDictRecord]], np.array], + fn: Callable[[Union[pd.Series, pt.model.IterDictRecord]], npt.NDArray], *, verbose: bool = False ): @@ -313,7 +313,7 @@ def transform_iter(self, inp: pt.model.IterDict) -> pt.model.IterDict: # we assume that the function can take a dictionary as well as a pandas.Series. As long as [""] notation is used # to access fields, both should work if self.verbose: - inp = pt.tqdm(inp, desc="pt.apply.doc_features") + inp = pt.tqdm(inp, desc="pt.apply.doc_features") # type: ignore for row in inp: row["features"] = self.fn(row) yield row @@ -322,7 +322,7 @@ def transform(self, inp: pd.DataFrame) -> pd.DataFrame: fn = self.fn outputRes = inp.copy() if self.verbose: - pt.tqdm.pandas(desc="pt.apply.doc_features", unit="d") + pt.tqdm.pandas(desc="pt.apply.doc_features", unit="d") # type: ignore outputRes["features"] = outputRes.progress_apply(fn, axis=1) else: outputRes["features"] = outputRes.apply(fn, axis=1) @@ -368,7 +368,7 @@ def transform_iter(self, inp: pt.model.IterDict) -> pt.model.IterDict: # we assume that the function can take a dictionary as well as a pandas.Series. As long as [""] notation is used # to access fields, both should work if self.verbose: - inp = pt.tqdm(inp, desc="pt.apply.query") + inp = pt.tqdm(inp, desc="pt.apply.query") # type: ignore for row in inp: row = row.copy() if "query" in row: @@ -384,7 +384,7 @@ def transform(self, inp: pd.DataFrame) -> pd.DataFrame: outputRes = inp.copy() try: if self.verbose: - pt.tqdm.pandas(desc="pt.apply.query", unit="d") + pt.tqdm.pandas(desc="pt.apply.query", unit="d") # type: ignore outputRes["query"] = outputRes.progress_apply(self.fn, axis=1) else: outputRes["query"] = outputRes.apply(self.fn, axis=1) @@ -444,7 +444,7 @@ def transform(self, inp: pd.DataFrame) -> pd.DataFrame: # batching iterator = pt.model.split_df(inp, batch_size=self.batch_size) if self.verbose: - iterator = pt.tqdm(iterator, desc="pt.apply", unit='row') + iterator = pt.tqdm(iterator, desc="pt.apply", unit='row') # type: ignore rtr = pd.concat([self.fn(chunk_df) for chunk_df in iterator]) return rtr diff --git a/pyterrier/datasets.py b/pyterrier/datasets.py index 714f6c6a..61682816 100644 --- a/pyterrier/datasets.py +++ b/pyterrier/datasets.py @@ -4,7 +4,7 @@ import pandas as pd from .transformer import is_lambda import types -from typing import Union, Tuple, Iterator, Dict, Any, List, Literal +from typing import Union, Tuple, Iterator, Dict, Any, List, Literal, Optional from warnings import warn import requests from .io import autoopen, touch @@ -139,7 +139,7 @@ def download(URLs : Union[str,List[str]], filename : str, **kwargs): r = requests.get(url, allow_redirects=True, stream=True, **kwargs) r.raise_for_status() total = int(r.headers.get('content-length', 0)) - with pt.io.finalized_open(filename, 'b') as file, pt.tqdm( + with pt.io.finalized_open(filename, 'b') as file, pt.tqdm( # type: ignore desc=basename, total=total, unit='iB', @@ -507,7 +507,7 @@ def get_results(self, variant=None) -> pd.DataFrame: result.sort_values(by=['qid', 'score', 'docno'], ascending=[True, False, True], inplace=True) # ensure data is sorted by qid, -score, did # result doesn't yet contain queries (only qids) so load and merge them in topics = self.get_topics(variant) - result = pd.merge(result, topics, how='left', on='qid', copy=False) + result = pd.merge(result, topics, how='left', on='qid') return result def _describe_component(self, component): @@ -610,7 +610,7 @@ def transform(self, inp: pd.DataFrame) -> pd.DataFrame: set_docnos = set(docnos) it = (tuple(getattr(doc, f) for f in fields) for doc in docstore.get_many_iter(set_docnos)) if self.verbose: - it = pd.tqdm(it, unit='d', total=len(set_docnos), desc='IRDSTextLoader') + it = pt.tqdm(it, unit='d', total=len(set_docnos), desc='IRDSTextLoader') # type: ignore metadata = pd.DataFrame(list(it), columns=fields).set_index('doc_id') metadata_frame = metadata.loc[docnos].reset_index(drop=True) @@ -1104,7 +1104,7 @@ def _merge_years(self, component, variant): "corpus_iter" : lambda dataset, **kwargs : pt.index.treccollection2textgen(dataset.get_corpus(), num_docs=11429, verbose=kwargs.get("verbose", False)) } -DATASET_MAP = { +DATASET_MAP : Dict[str, Dataset] = { # used for UGlasgow teaching "50pct" : RemoteDataset("50pct", FIFTY_PCT_FILES), # umass antique corpus - see http://ciir.cs.umass.edu/downloads/Antique/ @@ -1210,7 +1210,7 @@ def list_datasets(en_only=True): def transformer_from_dataset( dataset : Union[str, Dataset], clz, - variant: str = None, + variant: Optional[str] = None, version: str = 'latest', **kwargs) -> pt.Transformer: """Returns a Transformer instance of type ``clz`` for the provided index of variant ``variant``.""" diff --git a/pyterrier/debug.py b/pyterrier/debug.py index 6704072a..0ff5ea1a 100644 --- a/pyterrier/debug.py +++ b/pyterrier/debug.py @@ -1,7 +1,7 @@ from . import Transformer -from typing import List +from typing import List, Optional -def print_columns(by_query : bool = False, message : str = None) -> Transformer: +def print_columns(by_query : Optional[bool] = False, message : Optional[str] = None) -> Transformer: """ Returns a transformer that can be inserted into pipelines that can print the column names of the dataframe at this stage in the pipeline: @@ -82,8 +82,8 @@ def print_rows( by_query : bool = True, jupyter: bool = True, head : int = 2, - message : str = None, - columns : List[str] = None) -> Transformer: + message : Optional[str] = None, + columns : Optional[List[str]] = None) -> Transformer: """ Returns a transformer that can be inserted into pipelines that can print some of the dataframe at this stage in the pipeline: diff --git a/pyterrier/java/_core.py b/pyterrier/java/_core.py index c3490c0e..307d8da8 100644 --- a/pyterrier/java/_core.py +++ b/pyterrier/java/_core.py @@ -153,7 +153,7 @@ def add_jar(jar_path): @before_init -def add_package(org_name: str = None, package_name: str = None, version: str = None, file_type='jar'): +def add_package(org_name : str, package_name : str, version : Optional[str] = None, file_type : str = 'jar'): if version is None or version == 'snapshot': version = mavenresolver.latest_version_num(org_name, package_name) file_name = mavenresolver.get_package_jar(org_name, package_name, version, artifact=file_type) diff --git a/pyterrier/java/_utils.py b/pyterrier/java/_utils.py index ee1107f9..a053b5b2 100644 --- a/pyterrier/java/_utils.py +++ b/pyterrier/java/_utils.py @@ -387,7 +387,7 @@ def register_config(name, config: Dict[str, Any]): class JavaClasses: def __init__(self, **mapping: Union[str, Callable[[], str]]): self._mapping = mapping - self._cache = {} + self._cache : Dict[str, Callable]= {} def __dir__(self): return list(self._mapping.keys()) diff --git a/pyterrier/new.py b/pyterrier/new.py index 67d3e27d..da3a5d81 100644 --- a/pyterrier/new.py +++ b/pyterrier/new.py @@ -1,5 +1,5 @@ -from typing import Sequence, Union +from typing import Sequence, Union, Optional, cast, Iterable import pandas as pd from .model import add_ranks @@ -9,7 +9,7 @@ def empty_Q() -> pd.DataFrame: """ return pd.DataFrame(columns=["qid", "query"]) -def queries(queries : Union[str, Sequence[str]], qid : Union[str, Sequence[str]] = None, **others) -> pd.DataFrame: +def queries(queries : Union[str, Sequence[str]], qid : Optional[Union[str, Iterable[str]]] = None, **others) -> pd.DataFrame: """ Creates a new queries dataframe. Will return a dataframe with the columns `["qid", "query"]`. Any further lists in others will also be added. @@ -40,7 +40,7 @@ def queries(queries : Union[str, Sequence[str]], qid : Union[str, Sequence[str]] assert type(qid) == str return pd.DataFrame({"qid" : [qid], "query" : [queries], **others}) if qid is None: - qid = map(str, range(1, len(queries)+1)) + qid = cast(Iterable[str], map(str, range(1, len(queries)+1))) # noqa: PT100 (this is typing.cast, not jinus.cast) return pd.DataFrame({"qid" : qid, "query" : queries, **others}) Q = queries @@ -53,8 +53,8 @@ def empty_R() -> pd.DataFrame: def ranked_documents( scores : Sequence[Sequence[float]], - qid : Sequence[str] = None, - docno=None, + qid : Optional[Sequence[str]] = None, + docno = Optional[Sequence[Sequence[str]]], **others) -> pd.DataFrame: """ Creates a new ranked documents dataframe. Will return a dataframe with the columns `["qid", "docno", "score", "rank"]`. @@ -120,4 +120,4 @@ def ranked_documents( raise ValueError("We assume multiple documents, for now") return add_ranks(rtr) -R = ranked_documents \ No newline at end of file +R = ranked_documents diff --git a/pyterrier/pipelines.py b/pyterrier/pipelines.py index 77821390..a680109a 100644 --- a/pyterrier/pipelines.py +++ b/pyterrier/pipelines.py @@ -6,6 +6,7 @@ from . import Transformer from .model import coerce_dataframe_types import ir_measures +import tqdm as tqdm_module from ir_measures.measures import BaseMeasure import pyterrier as pt MEASURE_TYPE=Union[str,BaseMeasure] @@ -107,7 +108,7 @@ def _ir_measures_to_dict( rev_mapping : Dict[BaseMeasure,str], num_q : int, perquery : bool = True, - backfill_qids : Sequence[str] = None): + backfill_qids : Optional[Sequence[str]] = None): from collections import defaultdict if perquery: # qid -> measure -> value @@ -135,32 +136,32 @@ def _ir_measures_to_dict( for m in seq: metric = m.measure metric = rev_mapping.get(metric, str(metric)) - rtr[metric].add(m.value) + rtr[metric].add(m.value) # type: ignore # THERE is no typing for aggregators in ir_measures for m in rtr: - rtr[m] = rtr[m].result() + rtr[m] = rtr[m].result() # type: ignore # THERE is no typing for aggregators in ir_measures return rtr def _run_and_evaluate( system : SYSTEM_OR_RESULTS_TYPE, - topics : pd.DataFrame, + topics : Optional[pd.DataFrame], qrels: pd.DataFrame, metrics : MEASURES_TYPE, - pbar = None, - save_mode : SAVEMODE_TYPE = None, - save_file : str = None, + pbar : Optional[tqdm_module.tqdm] = None, + save_mode : Optional[SAVEMODE_TYPE] = None, + save_file : Optional[str] = None, perquery : bool = False, batch_size : Optional[int] = None, - backfill_qids : Sequence[str] = None): + backfill_qids : Optional[Sequence[str]] = None): from .io import read_results, write_results if pbar is None: - pbar = pt.tqdm(disable=True) + pbar = pt.tqdm(disable=True) # type: ignore metrics, rev_mapping = _convert_measures(metrics) qrels = qrels.rename(columns={'qid': 'query_id', 'docno': 'doc_id', 'label': 'relevance'}) from timeit import default_timer as timer - runtime = 0 + runtime : float = 0. num_q = qrels['query_id'].nunique() if save_file is not None and os.path.exists(save_file): if save_mode == 'reuse': @@ -178,12 +179,16 @@ def _run_and_evaluate( else: raise ValueError("Unknown save_mode argument '%s', valid options are 'error', 'warn', 'reuse' or 'overwrite'." % save_mode) + res : pd.DataFrame # if its a DataFrame, use it as the results if isinstance(system, pd.DataFrame): res = system res = coerce_dataframe_types(res) if len(res) == 0: - raise ValueError("%d topics, but no results in dataframe" % len(topics)) + if topics is None: + raise ValueError("No topics specified, and no results in dataframe") + else: + raise ValueError("%d topics, but no results in dataframe" % len(topics)) evalMeasuresDict = _ir_measures_to_dict( ir_measures.iter_calc(metrics, qrels, res.rename(columns=_irmeasures_columns)), metrics, @@ -194,6 +199,8 @@ def _run_and_evaluate( pbar.update() elif batch_size is None: + + assert topics is not None, "topics must be specified" #transformer, evaluate all queries at once starttime = timer() @@ -219,12 +226,15 @@ def _run_and_evaluate( backfill_qids) pbar.update() else: + assert topics is not None, "topics must be specified" + #transformer, evaluate queries in batches assert batch_size > 0 starttime = timer() evalMeasuresDict = {} remaining_qrel_qids = set(qrels.query_id) try: + batch_topics : pd.DataFrame for i, (res, batch_topics) in enumerate( system.transform_gen(topics, batch_size=batch_size, output_topics=True)): if len(res) == 0: raise ValueError("batch of %d topics, but no results received in batch %d from %s" % (len(batch_topics), i, str(system) ) ) @@ -282,20 +292,20 @@ def Experiment( topics : pd.DataFrame, qrels : pd.DataFrame, eval_metrics : MEASURES_TYPE, - names : Sequence[str] = None, + names : Optional[Sequence[str]] = None, perquery : bool = False, dataframe : bool = True, batch_size : Optional[int] = None, filter_by_qrels : bool = False, filter_by_topics : bool = True, - baseline : int = None, + baseline : Optional[int] = None, test : Union[str,TEST_FN_TYPE] = "t", - correction : str = None, + correction : Optional[str] = None, correction_alpha : float = 0.05, - highlight : str = None, - round : Union[int,Dict[str,int]] = None, + highlight : Optional[str] = None, + round : Optional[Union[int,Dict[str,int]]] = None, verbose : bool = False, - save_dir : str = None, + save_dir : Optional[str] = None, save_mode : SAVEMODE_TYPE = 'warn', **kwargs): """ @@ -420,10 +430,13 @@ def _apply_round(measure, value): raise ValueError('There is no overlap between the qids found in the topics and qrels. If this is intentional, set filter_by_topics=False and filter_by_qrels=False.') from scipy import stats + test_fn : TEST_FN_TYPE if test == "t": - test = stats.ttest_rel - if test == "wilcoxon": - test = stats.wilcoxon + test_fn = stats.ttest_rel + elif test == "wilcoxon": + test_fn = stats.wilcoxon + else: + test_fn = test # obtain system names if not specified if names is None: @@ -469,7 +482,7 @@ def _apply_round(measure, value): # round number of batches up for each system tqdm_args['total'] = math.ceil((len(topics) / batch_size)) * len(retr_systems) - with pt.tqdm(**tqdm_args) as pbar: + with pt.tqdm(**tqdm_args) as pbar: # type: ignore # run and evaluate each system for name, system in zip(names, retr_systems): save_file = None @@ -518,7 +531,7 @@ def _apply_round(measure, value): if dataframe: if perquery: df = pd.DataFrame(evalsRows, columns=["name", "qid", "measure", "value"]).sort_values(['name', 'qid']) - if round is not None: + if round is not None and isinstance(round, int): df["value"] = df["value"].round(round) return df @@ -526,7 +539,7 @@ def _apply_round(measure, value): if mrt_needed: highlight_cols["mrt"] = "-" - p_col_names=[] + p_col_names : List[str] = [] if baseline is not None: assert len(evalDictsPerQ) == len(retr_systems) baselinePerQuery={} @@ -547,7 +560,7 @@ def _apply_round(measure, value): perQuery = np.array( [ evalDictsPerQ[i][q][m] for q in evalDictsPerQ[baseline] ]) delta_plus = (perQuery > baselinePerQuery[m]).sum() delta_minus = (perQuery < baselinePerQuery[m]).sum() - p = test(perQuery, baselinePerQuery[m])[1] + p = test_fn(perQuery, baselinePerQuery[m])[1] additionals.extend([delta_plus, delta_minus, p]) evalsRows[i].extend(additionals) delta_names=[] @@ -565,12 +578,12 @@ def _apply_round(measure, value): # multiple testing correction. This adds two new columns for each measure experience statistical significance testing if baseline is not None and correction is not None: - import statsmodels.stats.multitest + import statsmodels.stats.multitest # type: ignore for pcol in p_col_names: pcol_reject = pcol.replace("p-value", "reject") pcol_corrected = pcol + " corrected" reject, corrected, _, _ = statsmodels.stats.multitest.multipletests(df[pcol].drop(df.index[baseline]), alpha=correction_alpha, method=correction) - insert_pos = df.columns.get_loc(pcol) + insert_pos : int = df.columns.get_loc(pcol) # add reject/corrected values for the baseline reject = np.insert(reject, baseline, False) corrected = np.insert(corrected, baseline, np.nan) @@ -597,6 +610,12 @@ def _apply_round(measure, value): List[GRID_SCAN_PARAM_SETTING] ] +GRID_SEARCH_RETURN_TYPE_BOTH = Tuple[ + Transformer, + float, + List[GRID_SCAN_PARAM_SETTING] +] + def _save_state(param_dict): rtr = [] for tran, param_set in param_dict.items(): @@ -634,7 +653,7 @@ def KFoldGridSearch( jobs : int = 1, backend='joblib', verbose: bool = False, - batch_size = None) -> Tuple[pd.DataFrame, GRID_SEARCH_RETURN_TYPE_SETTING]: + batch_size : Optional[int] = None) -> Tuple[pd.DataFrame, GRID_SEARCH_RETURN_TYPE_SETTING]: """ Applies a GridSearch using different folds. It returns the *results* of the tuned transformer pipeline on the test topics. The number of topics dataframes passed @@ -731,9 +750,9 @@ def GridSearch( jobs : int = 1, backend='joblib', verbose: bool = False, - batch_size = None, - return_type : str = "opt_pipeline" - ) -> Union[Transformer,GRID_SEARCH_RETURN_TYPE_SETTING]: + batch_size : Optional[int] = None, + return_type : Literal['opt_pipeline', 'best_setting', 'both'] = "opt_pipeline" + ) -> Union[Transformer,GRID_SEARCH_RETURN_TYPE_SETTING,GRID_SEARCH_RETURN_TYPE_BOTH]: """ GridSearch is essentially, an argmax GridScan(), i.e. it returns an instance of the pipeline to tune with the best parameter settings among params, that were found that were obtained using the specified @@ -775,7 +794,7 @@ def GridSearch( assert len(grid_outcomes) > 0, "GridScan returned 0 rows" max_measure = grid_outcomes[0][1][metric] max_setting = grid_outcomes[0][0] - for setting, measures in grid_outcomes: + for setting, measures in grid_outcomes: # TODO what is the type of this iteration? if measures[metric] > max_measure: max_measure = measures[metric] max_setting = setting @@ -793,7 +812,7 @@ def GridSearch( for tran, param, value in max_setting: tran.set_parameter(param, value) return (pipeline, max_measure, max_setting) - + raise ValueError("Unknown return_type option %s" % return_type) def GridScan( pipeline : Transformer, @@ -806,7 +825,7 @@ def GridScan( verbose: bool = False, batch_size = None, dataframe = True, - ) -> Union[pd.DataFrame, List [ Tuple [ List[ GRID_SCAN_PARAM_SETTING ] , Dict[str,float] ] ] ]: + ) -> Union[pd.DataFrame, List [ Tuple [ List[ GRID_SCAN_PARAM_SETTING ], Dict[str,float] ] ] ]: """ GridScan applies a set of named parameters on a given pipeline and evaluates the outcome. The topics and qrels must be specified. The trec_eval measure names can be optionally specified. @@ -899,7 +918,7 @@ def _evaluate_several_settings(inputs : List[Tuple]): eval_list = [] #for each combination of parameter values if jobs == 1: - for v in pt.tqdm(combinations, total=len(combinations), desc="GridScan", mininterval=0.3) if verbose else combinations: + for v in pt.tqdm(combinations, total=len(combinations), desc="GridScan", mininterval=0.3) if verbose else combinations: # type: ignore parameter_list, eval_scores = _evaluate_one_setting(keys, v) eval_list.append( (parameter_list, eval_scores) ) else: diff --git a/pyterrier/terrier/retriever.py b/pyterrier/terrier/retriever.py index 5fb0a95d..d58af4dd 100644 --- a/pyterrier/terrier/retriever.py +++ b/pyterrier/terrier/retriever.py @@ -1,4 +1,4 @@ -from typing import Union +from typing import Union, Optional import pandas as pd import numpy as np from deprecated import deprecated @@ -124,7 +124,7 @@ def matchop(t, w=1): @staticmethod def from_dataset(dataset : Union[str,Dataset], - variant : str = None, + variant : Optional[str] = None, version='latest', **kwargs): """ @@ -657,7 +657,7 @@ def __setstate__(self, d): @staticmethod def from_dataset(dataset : Union[str,Dataset], - variant : str = None, + variant : Optional[str] = None, version='latest', **kwargs): return pt.datasets.transformer_from_dataset(dataset, variant=variant, version=version, clz=FeaturesRetriever, **kwargs) diff --git a/pyterrier/text.py b/pyterrier/text.py index 2eb0fe1e..d9a3b393 100644 --- a/pyterrier/text.py +++ b/pyterrier/text.py @@ -63,6 +63,7 @@ def get_text( if not isinstance(indexlike, HasTextLoader): raise ValueError('indexlike must provide a .text_loader() method.') + result : pt.Transformer result = indexlike.text_loader(metadata, verbose=verbose and not by_query, **kwargs) if by_query: diff --git a/pyterrier/utils.py b/pyterrier/utils.py index 2ee631e8..a6e63693 100644 --- a/pyterrier/utils.py +++ b/pyterrier/utils.py @@ -1,6 +1,6 @@ import inspect import sys -from typing import Callable, Tuple, List, Callable +from typing import Callable, Tuple, List, Callable, Dict import platform from functools import wraps from importlib.metadata import EntryPoint @@ -39,7 +39,7 @@ def convert_qrels_to_dataframe(qrels_dict) -> pd.DataFrame: Returns: pd.DataFrame: columns=['qid', 'docno', 'label'] """ - result = {'qid': [], 'docno': [], 'label': []} + result : Dict[str,List[str]] = {'qid': [], 'docno': [], 'label': []} for qid in qrels_dict: for docno, label in qrels_dict[qid]: result['qid'].append(qid)