-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add lookup verb and verbs-taking-arguments infrastructure
- Verbs can now be defined with a class that encapsulates the cli and notebook interfaces. - Verbs use a registry similar to models and datasets, but without support for external verbs - Lookup verb with a CLI and notebook interface - Inference now creates an index of object_id -> batch to facilitate lookups in the batch numpy files - Backwards compatibility: Lookup verb will regenerate the object_id-> batch index if it does not exist. - Stub implementation of similarity search verb.
- Loading branch information
Showing
8 changed files
with
364 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
from .verb_registry import all_class_verbs, all_verbs, fetch_verb_class, is_verb_class | ||
|
||
__all__ = ["VERB_REGISTRY", "is_verb_class", "fetch_verb_class", "all_class_verbs", "all_verbs"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
import logging | ||
import re | ||
from argparse import ArgumentParser, Namespace | ||
from pathlib import Path | ||
from typing import Optional, Union | ||
|
||
import numpy as np | ||
|
||
from fibad.config_utils import find_most_recent_results_dir | ||
from fibad.infer import save_batch_index | ||
|
||
from .verb_registry import Verb, fibad_verb | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
@fibad_verb | ||
class Lookup(Verb): | ||
"""Look up an inference result using the ID of a data member""" | ||
|
||
cli_name = "lookup" | ||
add_parser_kwargs = {} | ||
|
||
@staticmethod | ||
def setup_parser(parser: ArgumentParser): | ||
"""Set up our arguments by configuring a subparser | ||
Parameters | ||
---------- | ||
parser : ArgumentParser | ||
The sub-parser to configure | ||
""" | ||
parser.add_argument("-i", "--id", type=str, required=True, help="ID of image") | ||
parser.add_argument( | ||
"-r", "--results-dir", type=str, required=False, help="Directory containing inference results." | ||
) | ||
|
||
def run_cli(self, args: Optional[Namespace] = None): | ||
"""Entrypoint to Lookup from the CLI. | ||
Parameters | ||
---------- | ||
args : Optional[Namespace], optional | ||
The parsed command line arguments | ||
""" | ||
logger.info("Lookup run from cli") | ||
if args is None: | ||
raise RuntimeError("Run CLI called with no arguments.") | ||
# This is where we map from CLI parsed args to a | ||
# self.run (args) call. | ||
vector = self.run(id=args.id, results_dir=args.results_dir) | ||
if vector is None: | ||
logger.info("No inference result found") | ||
else: | ||
logger.info("Inference result found") | ||
print(vector) | ||
|
||
def run(self, id: str, results_dir: Optional[Union[Path, str]]) -> Optional[np.ndarray]: | ||
"""Lookup the latent-space representation of a particular ID | ||
Requires the relevant dataset to be configured, and for inference to have been run. | ||
Parameters | ||
---------- | ||
id : str | ||
The ID of the input data to look up the inference result | ||
results_dir : str, Optional | ||
The directory containing the inference results. | ||
Returns | ||
------- | ||
Optional[np.ndarray] | ||
The output tensor of the model for the given input. | ||
""" | ||
if results_dir is None: | ||
if self.config["results"]["inference_dir"]: | ||
results_dir = self.config["results"]["inference_dir"] | ||
else: | ||
results_dir = find_most_recent_results_dir(self.config, verb="infer") | ||
msg = f"Using most recent results dir {results_dir} for lookup." | ||
msg += "Use the [results] inference_dir config to set a directory or pass it to this verb." | ||
logger.info(msg) | ||
|
||
if results_dir is None: | ||
msg = "Could not find a results directory. Run infer or use " | ||
msg += "[results] inference_dir config to specify a directory" | ||
logger.error(msg) | ||
return None | ||
|
||
if isinstance(results_dir, str): | ||
results_dir = Path(results_dir) | ||
|
||
# Open the batch index numpy file. | ||
# Loop over files and create if it does not exist | ||
batch_index_path = results_dir / "batch_index.npy" | ||
if not batch_index_path.exists(): | ||
self.create_index(results_dir) | ||
|
||
batch_index = np.load(results_dir / "batch_index.npy") | ||
batch_num = batch_index[batch_index["id"] == int(id)]["batch_num"] | ||
if len(batch_num) == 0: | ||
return None | ||
batch_num = batch_num[0] | ||
|
||
recarray = np.load(results_dir / f"batch_{batch_num}.npy") | ||
tensor = recarray[recarray["id"] == int(id)]["tensor"] | ||
if len(tensor) == 0: | ||
return None | ||
|
||
return np.array(tensor[0]) | ||
|
||
def create_index(self, results_dir: Path): | ||
"""Recreate the index into the batch numpy files | ||
Parameters | ||
---------- | ||
results_dir : Path | ||
Path to the batch numpy files | ||
""" | ||
ids = [] | ||
batch_nums = [] | ||
# Use the batched numpy files to assemble an index. | ||
logger.info("Recreating index...") | ||
for file in results_dir.glob("batch_*.npy"): | ||
print(".", end="", flush=True) | ||
m = re.match(r"batch_([0-9]+).npy", file.name) | ||
if m is None: | ||
logger.warn(f"Could not find batch number for {file}") | ||
continue | ||
batch_num = int(m[1]) | ||
recarray = np.load(file) | ||
ids += list(recarray["id"]) | ||
batch_nums += [batch_num] * len(recarray["id"]) | ||
|
||
save_batch_index(results_dir, np.array(ids), np.array(batch_nums)) | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
import logging | ||
from argparse import ArgumentParser, Namespace | ||
from typing import Optional | ||
|
||
from .verb_registry import Verb, fibad_verb | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
@fibad_verb | ||
class Search(Verb): | ||
"""Stub of similarity search""" | ||
|
||
cli_name = "search" | ||
add_parser_kwargs = {} | ||
|
||
@staticmethod | ||
def setup_parser(parser: ArgumentParser): | ||
"""Stub of parser setup""" | ||
parser.add_argument("-i", "--image-file", type=str, help="Path to image file", required=True) | ||
|
||
# If both of these move to the verb superclass then a new verb is basically | ||
# | ||
# If you want no args, just make the class, define run(self) | ||
# If you want args | ||
# 1) write setup_parser (which sets up for ArgumentParser and name/type info for cli run) | ||
# 2) write run(self, <your args>) to do what you want | ||
# | ||
|
||
# Should there be a version of this on the base class which uses a dict on the Verb | ||
# superclass to build the call to run based on what the subclass verb defined in setup_parser | ||
def run_cli(self, args: Optional[Namespace] = None): | ||
"""Stub CLI implementation""" | ||
logger.info("Search run from cli") | ||
if args is None: | ||
raise RuntimeError("Run CLI called with no arguments.") | ||
# This is where we map from CLI parsed args to a | ||
# self.run (args) call. | ||
return self.run(image_file=args.image_file) | ||
|
||
def run(self, image_file: str): | ||
"""Search for... todo | ||
Parameters | ||
---------- | ||
image_file : str | ||
_description_ | ||
""" | ||
logger.info(f"Got Image {image_file}") | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
import logging | ||
from abc import ABC | ||
from typing import Optional | ||
|
||
from fibad.config_utils import ConfigDict | ||
from fibad.plugin_utils import update_registry | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class Verb(ABC): | ||
"""Base class for all fibad verbs""" | ||
|
||
# Verbs get to define how their parser gets added to the main parser | ||
# This is given in case verbs do not define any keyword args for | ||
# subparser.add_parser() | ||
add_parser_kwargs: dict[str, str] = {} | ||
|
||
def __init__(self, config: ConfigDict): | ||
"""Overall initialization for all verbs that saves the config""" | ||
self.config = config | ||
|
||
|
||
# Verbs with no class are assumed to have a function in fibad.py which | ||
# performs their function. All other verbs should be defined by named classes | ||
# in fibad.verbs and use the @fibad_verb decorator | ||
VERB_REGISTRY: dict[str, Optional[type[Verb]]] = { | ||
"train": None, | ||
"infer": None, | ||
"download": None, | ||
"prepare": None, | ||
"rebuild_manifest": None, | ||
} | ||
|
||
|
||
def fibad_verb(cls: type[Verb]) -> type[Verb]: | ||
"""Decorator to Register a fibad verb""" | ||
update_registry(VERB_REGISTRY, cls.cli_name, cls) # type: ignore[attr-defined] | ||
return cls | ||
|
||
|
||
def all_verbs() -> list[str]: | ||
"""Returns All verbs that are currently registered""" | ||
return [verb for verb in VERB_REGISTRY] | ||
|
||
|
||
def all_class_verbs() -> list[str]: | ||
"""Returns All verbs that are currently registered with a class-based implementation""" | ||
return [verb for verb in VERB_REGISTRY if VERB_REGISTRY.get(verb) is not None] | ||
|
||
|
||
def is_verb_class(cli_name: str) -> bool: | ||
"""Returns true if the verb has a class based implementation | ||
Parameters | ||
---------- | ||
cli_name : str | ||
The name of the verb on the command line interface | ||
Returns | ||
------- | ||
bool | ||
True if the verb has a class-based implementation | ||
""" | ||
return cli_name in VERB_REGISTRY and VERB_REGISTRY.get(cli_name) is not None | ||
|
||
|
||
def fetch_verb_class(cli_name: str) -> Optional[type[Verb]]: | ||
"""Gives the class object for the named verb | ||
Parameters | ||
---------- | ||
cli_name : str | ||
The name of the verb on the command line interface | ||
Returns | ||
------- | ||
Optional[type[Verb]] | ||
The verb class or None if no such verb class exists. | ||
""" | ||
return VERB_REGISTRY.get(cli_name) | ||
Oops, something went wrong.