Skip to content

Commit

Permalink
Add lookup verb and verbs-taking-arguments infrastructure
Browse files Browse the repository at this point in the history
- Verbs can now be defined with a class that encapsulates the
  cli and notebook interfaces.
- Verbs use a registry similar to models and datasets, but
  without support for external verbs
- Lookup verb with a CLI and notebook interface
- Inference now creates an index of object_id -> batch
  to facilitate lookups in the batch numpy files
- Backwards compatibility: Lookup verb will regenerate
  the object_id-> batch index if it does not exist.
- Stub implementation of similarity search verb.
  • Loading branch information
mtauraso committed Jan 27, 2025
1 parent 50ba387 commit c38b5bf
Show file tree
Hide file tree
Showing 8 changed files with 364 additions and 9 deletions.
25 changes: 22 additions & 3 deletions src/fibad/fibad.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,17 @@
from typing import Optional, Union

from .config_utils import ConfigManager
from .verbs.verb_registry import all_class_verbs, fetch_verb_class, is_verb_class


class Fibad:
"""
Overall class that represents an interface into fibad. Currently this encapsulates a configuration and is
the external interface to all verbs in a programmatic context.
the external interface to all verbs in a programmatic or notebook context.
CLI functions in fibad_cli are implemented by calling this class
"""

verbs = ["train", "infer", "download", "prepare", "rebuild_manifest"]

def __init__(self, *, config_file: Optional[Union[Path, str]] = None, setup_logging: bool = True):
"""Initialize fibad. Always applies the default config, and merges it with any provided config file.
Expand Down Expand Up @@ -193,3 +192,23 @@ def rebuild_manifest(self, **kwargs):
from .rebuild_manifest import run

return run(config=self.config, **kwargs)

# Python notebook interface to class verbs
# we need both __dir__ and __getattr__ so that the
# functions from the various verb classes appear to be
# methods on the fibad object
def __dir__(self):
return sorted(dir(Fibad) + list(self.__dict__.keys()) + all_class_verbs())

Check warning on line 201 in src/fibad/fibad.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/fibad.py#L201

Added line #L201 was not covered by tests

def __getattr__(self, name):
if not is_verb_class(name):
return None

Check warning on line 205 in src/fibad/fibad.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/fibad.py#L204-L205

Added lines #L204 - L205 were not covered by tests

# We return the run function on the verb class after
# just-in-time creating the verb so that a notebook user
# sees the function signature and help.
#
# It may be possible to do this with functools.partial techniques
# but should be tested.
verb_inst = fetch_verb_class(name)(config=self.config)
return verb_inst.run

Check warning on line 214 in src/fibad/fibad.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/fibad.py#L213-L214

Added lines #L213 - L214 were not covered by tests
4 changes: 4 additions & 0 deletions src/fibad/fibad_default_config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -192,3 +192,7 @@ split = false

# Whether to generate a chromadb vector database of inference results
chromadb = true

[results]
# Path to inference results to use for visualization and lookups. Uses latest inference run if none provided.
inference_dir = false
38 changes: 35 additions & 3 deletions src/fibad/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def run(config: ConfigDict):
},
)

# These are values the _save_batch callback needs to run
write_index = 0
batch_index = 0
object_ids: list[int] = []
Expand Down Expand Up @@ -93,18 +94,49 @@ def _save_batch(batch_results: Tensor):
filename = f"batch_{batch_index}.npy"
savepath = results_dir / filename
if savepath.exists():
RuntimeError("The path to save results for object {object_id} already exists.")
RuntimeError(f"The path to save results for objects in batch {batch_index} already exists.")

Check warning on line 97 in src/fibad/infer.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/infer.py#L97

Added line #L97 was not covered by tests

np.save(savepath, structured_batch, allow_pickle=False)

batch_index += 1
write_index += batch_len

# Run inference
evaluator = create_evaluator(model, _save_batch)
evaluator.run(data_loader)

logger.info(f"Results saved in {results_dir}")
logger.info("finished evaluating...")
# Write out a dictionary to map IDs->Batch
batch_size = config["data_loader"]["batch_size"]
batch_nums = np.array([np.full(batch_size, i) for i in range(0, batch_index)]).ravel()
save_batch_index(results_dir, np.array(object_ids), batch_nums[: len(object_ids)])

Check warning on line 111 in src/fibad/infer.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/infer.py#L109-L111

Added lines #L109 - L111 were not covered by tests

# Log completion
logger.info(f"Inference Results saved in {results_dir}")

Check warning on line 114 in src/fibad/infer.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/infer.py#L114

Added line #L114 was not covered by tests


def save_batch_index(results_dir: Path, ids: np.ndarray, batch_nums: np.ndarray):

Check warning on line 117 in src/fibad/infer.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/infer.py#L117

Added line #L117 was not covered by tests
"""Save a batch index in the result directory provided
Parameters
----------
results_dir : Path
The results directory
ids : np.ndarray
All IDs to write out.
batch_nums : np.ndarray
The corresponding batch numbers for the IDs provided.
"""
batch_index_dtype = np.dtype([("id", np.int64), ("batch_num", np.int64)])
batch_index = np.zeros(len(ids), batch_index_dtype)
batch_index["id"] = np.array(ids)
batch_index["batch_num"] = np.array(batch_nums)
batch_index.sort(order="id")

Check warning on line 133 in src/fibad/infer.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/infer.py#L129-L133

Added lines #L129 - L133 were not covered by tests

filename = "batch_index.npy"
savepath = results_dir / filename
if savepath.exists():
RuntimeError("The path to save batch index already exists.")
np.save(savepath, batch_index, allow_pickle=False)

Check warning on line 139 in src/fibad/infer.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/infer.py#L135-L139

Added lines #L135 - L139 were not covered by tests


def load_model_weights(config: ConfigDict, model):
Expand Down
3 changes: 3 additions & 0 deletions src/fibad/verbs/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .verb_registry import all_class_verbs, all_verbs, fetch_verb_class, is_verb_class

__all__ = ["VERB_REGISTRY", "is_verb_class", "fetch_verb_class", "all_class_verbs", "all_verbs"]
137 changes: 137 additions & 0 deletions src/fibad/verbs/lookup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import logging
import re
from argparse import ArgumentParser, Namespace
from pathlib import Path
from typing import Optional, Union

Check warning on line 5 in src/fibad/verbs/lookup.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/lookup.py#L1-L5

Added lines #L1 - L5 were not covered by tests

import numpy as np

Check warning on line 7 in src/fibad/verbs/lookup.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/lookup.py#L7

Added line #L7 was not covered by tests

from fibad.config_utils import find_most_recent_results_dir
from fibad.infer import save_batch_index

Check warning on line 10 in src/fibad/verbs/lookup.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/lookup.py#L9-L10

Added lines #L9 - L10 were not covered by tests

from .verb_registry import Verb, fibad_verb

Check warning on line 12 in src/fibad/verbs/lookup.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/lookup.py#L12

Added line #L12 was not covered by tests

logger = logging.getLogger(__name__)

Check warning on line 14 in src/fibad/verbs/lookup.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/lookup.py#L14

Added line #L14 was not covered by tests


@fibad_verb
class Lookup(Verb):

Check warning on line 18 in src/fibad/verbs/lookup.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/lookup.py#L17-L18

Added lines #L17 - L18 were not covered by tests
"""Look up an inference result using the ID of a data member"""

cli_name = "lookup"
add_parser_kwargs = {}

Check warning on line 22 in src/fibad/verbs/lookup.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/lookup.py#L21-L22

Added lines #L21 - L22 were not covered by tests

@staticmethod
def setup_parser(parser: ArgumentParser):

Check warning on line 25 in src/fibad/verbs/lookup.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/lookup.py#L24-L25

Added lines #L24 - L25 were not covered by tests
"""Set up our arguments by configuring a subparser
Parameters
----------
parser : ArgumentParser
The sub-parser to configure
"""
parser.add_argument("-i", "--id", type=str, required=True, help="ID of image")
parser.add_argument(

Check warning on line 34 in src/fibad/verbs/lookup.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/lookup.py#L33-L34

Added lines #L33 - L34 were not covered by tests
"-r", "--results-dir", type=str, required=False, help="Directory containing inference results."
)

def run_cli(self, args: Optional[Namespace] = None):

Check warning on line 38 in src/fibad/verbs/lookup.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/lookup.py#L38

Added line #L38 was not covered by tests
"""Entrypoint to Lookup from the CLI.
Parameters
----------
args : Optional[Namespace], optional
The parsed command line arguments
"""
logger.info("Lookup run from cli")
if args is None:
raise RuntimeError("Run CLI called with no arguments.")

Check warning on line 49 in src/fibad/verbs/lookup.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/lookup.py#L47-L49

Added lines #L47 - L49 were not covered by tests
# This is where we map from CLI parsed args to a
# self.run (args) call.
vector = self.run(id=args.id, results_dir=args.results_dir)
if vector is None:
logger.info("No inference result found")

Check warning on line 54 in src/fibad/verbs/lookup.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/lookup.py#L52-L54

Added lines #L52 - L54 were not covered by tests
else:
logger.info("Inference result found")
print(vector)

Check warning on line 57 in src/fibad/verbs/lookup.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/lookup.py#L56-L57

Added lines #L56 - L57 were not covered by tests

def run(self, id: str, results_dir: Optional[Union[Path, str]]) -> Optional[np.ndarray]:

Check warning on line 59 in src/fibad/verbs/lookup.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/lookup.py#L59

Added line #L59 was not covered by tests
"""Lookup the latent-space representation of a particular ID
Requires the relevant dataset to be configured, and for inference to have been run.
Parameters
----------
id : str
The ID of the input data to look up the inference result
results_dir : str, Optional
The directory containing the inference results.
Returns
-------
Optional[np.ndarray]
The output tensor of the model for the given input.
"""
if results_dir is None:
if self.config["results"]["inference_dir"]:
results_dir = self.config["results"]["inference_dir"]

Check warning on line 79 in src/fibad/verbs/lookup.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/lookup.py#L77-L79

Added lines #L77 - L79 were not covered by tests
else:
results_dir = find_most_recent_results_dir(self.config, verb="infer")
msg = f"Using most recent results dir {results_dir} for lookup."
msg += "Use the [results] inference_dir config to set a directory or pass it to this verb."
logger.info(msg)

Check warning on line 84 in src/fibad/verbs/lookup.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/lookup.py#L81-L84

Added lines #L81 - L84 were not covered by tests

if results_dir is None:
msg = "Could not find a results directory. Run infer or use "
msg += "[results] inference_dir config to specify a directory"
logger.error(msg)
return None

Check warning on line 90 in src/fibad/verbs/lookup.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/lookup.py#L86-L90

Added lines #L86 - L90 were not covered by tests

if isinstance(results_dir, str):
results_dir = Path(results_dir)

Check warning on line 93 in src/fibad/verbs/lookup.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/lookup.py#L92-L93

Added lines #L92 - L93 were not covered by tests

# Open the batch index numpy file.
# Loop over files and create if it does not exist
batch_index_path = results_dir / "batch_index.npy"
if not batch_index_path.exists():
self.create_index(results_dir)

Check warning on line 99 in src/fibad/verbs/lookup.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/lookup.py#L97-L99

Added lines #L97 - L99 were not covered by tests

batch_index = np.load(results_dir / "batch_index.npy")
batch_num = batch_index[batch_index["id"] == int(id)]["batch_num"]
if len(batch_num) == 0:
return None
batch_num = batch_num[0]

Check warning on line 105 in src/fibad/verbs/lookup.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/lookup.py#L101-L105

Added lines #L101 - L105 were not covered by tests

recarray = np.load(results_dir / f"batch_{batch_num}.npy")
tensor = recarray[recarray["id"] == int(id)]["tensor"]
if len(tensor) == 0:
return None

Check warning on line 110 in src/fibad/verbs/lookup.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/lookup.py#L107-L110

Added lines #L107 - L110 were not covered by tests

return np.array(tensor[0])

Check warning on line 112 in src/fibad/verbs/lookup.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/lookup.py#L112

Added line #L112 was not covered by tests

def create_index(self, results_dir: Path):

Check warning on line 114 in src/fibad/verbs/lookup.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/lookup.py#L114

Added line #L114 was not covered by tests
"""Recreate the index into the batch numpy files
Parameters
----------
results_dir : Path
Path to the batch numpy files
"""
ids = []
batch_nums = []

Check warning on line 123 in src/fibad/verbs/lookup.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/lookup.py#L122-L123

Added lines #L122 - L123 were not covered by tests
# Use the batched numpy files to assemble an index.
logger.info("Recreating index...")
for file in results_dir.glob("batch_*.npy"):
print(".", end="", flush=True)
m = re.match(r"batch_([0-9]+).npy", file.name)
if m is None:
logger.warn(f"Could not find batch number for {file}")
continue
batch_num = int(m[1])
recarray = np.load(file)
ids += list(recarray["id"])
batch_nums += [batch_num] * len(recarray["id"])

Check warning on line 135 in src/fibad/verbs/lookup.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/lookup.py#L125-L135

Added lines #L125 - L135 were not covered by tests

save_batch_index(results_dir, np.array(ids), np.array(batch_nums))

Check warning on line 137 in src/fibad/verbs/lookup.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/lookup.py#L137

Added line #L137 was not covered by tests
49 changes: 49 additions & 0 deletions src/fibad/verbs/search.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import logging
from argparse import ArgumentParser, Namespace
from typing import Optional

Check warning on line 3 in src/fibad/verbs/search.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/search.py#L1-L3

Added lines #L1 - L3 were not covered by tests

from .verb_registry import Verb, fibad_verb

Check warning on line 5 in src/fibad/verbs/search.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/search.py#L5

Added line #L5 was not covered by tests

logger = logging.getLogger(__name__)

Check warning on line 7 in src/fibad/verbs/search.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/search.py#L7

Added line #L7 was not covered by tests


@fibad_verb
class Search(Verb):

Check warning on line 11 in src/fibad/verbs/search.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/search.py#L10-L11

Added lines #L10 - L11 were not covered by tests
"""Stub of similarity search"""

cli_name = "search"
add_parser_kwargs = {}

Check warning on line 15 in src/fibad/verbs/search.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/search.py#L14-L15

Added lines #L14 - L15 were not covered by tests

@staticmethod
def setup_parser(parser: ArgumentParser):

Check warning on line 18 in src/fibad/verbs/search.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/search.py#L17-L18

Added lines #L17 - L18 were not covered by tests
"""Stub of parser setup"""
parser.add_argument("-i", "--image-file", type=str, help="Path to image file", required=True)

Check warning on line 20 in src/fibad/verbs/search.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/search.py#L20

Added line #L20 was not covered by tests

# If both of these move to the verb superclass then a new verb is basically
#
# If you want no args, just make the class, define run(self)
# If you want args
# 1) write setup_parser (which sets up for ArgumentParser and name/type info for cli run)
# 2) write run(self, <your args>) to do what you want
#

# Should there be a version of this on the base class which uses a dict on the Verb
# superclass to build the call to run based on what the subclass verb defined in setup_parser
def run_cli(self, args: Optional[Namespace] = None):

Check warning on line 32 in src/fibad/verbs/search.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/search.py#L32

Added line #L32 was not covered by tests
"""Stub CLI implementation"""
logger.info("Search run from cli")
if args is None:
raise RuntimeError("Run CLI called with no arguments.")

Check warning on line 36 in src/fibad/verbs/search.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/search.py#L34-L36

Added lines #L34 - L36 were not covered by tests
# This is where we map from CLI parsed args to a
# self.run (args) call.
return self.run(image_file=args.image_file)

Check warning on line 39 in src/fibad/verbs/search.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/search.py#L39

Added line #L39 was not covered by tests

def run(self, image_file: str):

Check warning on line 41 in src/fibad/verbs/search.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/search.py#L41

Added line #L41 was not covered by tests
"""Search for... todo
Parameters
----------
image_file : str
_description_
"""
logger.info(f"Got Image {image_file}")

Check warning on line 49 in src/fibad/verbs/search.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/search.py#L49

Added line #L49 was not covered by tests
82 changes: 82 additions & 0 deletions src/fibad/verbs/verb_registry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import logging
from abc import ABC
from typing import Optional

from fibad.config_utils import ConfigDict
from fibad.plugin_utils import update_registry

logger = logging.getLogger(__name__)


class Verb(ABC):
"""Base class for all fibad verbs"""

# Verbs get to define how their parser gets added to the main parser
# This is given in case verbs do not define any keyword args for
# subparser.add_parser()
add_parser_kwargs: dict[str, str] = {}

def __init__(self, config: ConfigDict):
"""Overall initialization for all verbs that saves the config"""
self.config = config

Check warning on line 21 in src/fibad/verbs/verb_registry.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/verb_registry.py#L21

Added line #L21 was not covered by tests


# Verbs with no class are assumed to have a function in fibad.py which
# performs their function. All other verbs should be defined by named classes
# in fibad.verbs and use the @fibad_verb decorator
VERB_REGISTRY: dict[str, Optional[type[Verb]]] = {
"train": None,
"infer": None,
"download": None,
"prepare": None,
"rebuild_manifest": None,
}


def fibad_verb(cls: type[Verb]) -> type[Verb]:
"""Decorator to Register a fibad verb"""
update_registry(VERB_REGISTRY, cls.cli_name, cls) # type: ignore[attr-defined]
return cls

Check warning on line 39 in src/fibad/verbs/verb_registry.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/verb_registry.py#L38-L39

Added lines #L38 - L39 were not covered by tests


def all_verbs() -> list[str]:
"""Returns All verbs that are currently registered"""
return [verb for verb in VERB_REGISTRY]

Check warning on line 44 in src/fibad/verbs/verb_registry.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/verb_registry.py#L44

Added line #L44 was not covered by tests


def all_class_verbs() -> list[str]:
"""Returns All verbs that are currently registered with a class-based implementation"""
return [verb for verb in VERB_REGISTRY if VERB_REGISTRY.get(verb) is not None]

Check warning on line 49 in src/fibad/verbs/verb_registry.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/verb_registry.py#L49

Added line #L49 was not covered by tests


def is_verb_class(cli_name: str) -> bool:
"""Returns true if the verb has a class based implementation
Parameters
----------
cli_name : str
The name of the verb on the command line interface
Returns
-------
bool
True if the verb has a class-based implementation
"""
return cli_name in VERB_REGISTRY and VERB_REGISTRY.get(cli_name) is not None

Check warning on line 65 in src/fibad/verbs/verb_registry.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/verb_registry.py#L65

Added line #L65 was not covered by tests


def fetch_verb_class(cli_name: str) -> Optional[type[Verb]]:
"""Gives the class object for the named verb
Parameters
----------
cli_name : str
The name of the verb on the command line interface
Returns
-------
Optional[type[Verb]]
The verb class or None if no such verb class exists.
"""
return VERB_REGISTRY.get(cli_name)

Check warning on line 82 in src/fibad/verbs/verb_registry.py

View check run for this annotation

Codecov / codecov/patch

src/fibad/verbs/verb_registry.py#L82

Added line #L82 was not covered by tests
Loading

0 comments on commit c38b5bf

Please sign in to comment.