Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

90 featbackend simplify database tables with json columns #95

Draft
wants to merge 12 commits into
base: trunk
Choose a base branch
from
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ click = "*"
psycopg2-binary = "*"
PyYAML = "*"
transitions = "*"
pydantic = "^2.9.2"

[tool.poetry.dev-dependencies]
ipykernel = "*"
Expand Down
34 changes: 20 additions & 14 deletions spiderexpress/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,14 @@
- Refine verbs/commands for the CLI
- find a mechanism for stopping/starting collections
"""

import sys
from importlib.metadata import entry_points
from pathlib import Path
from loguru import logger as log

import click
import yaml
import sys
from loguru import logger as log

from .spider import CONNECTOR_GROUP, STRATEGY_GROUP, Spider
from .types import Configuration
Expand All @@ -34,22 +35,21 @@ def cli(ctx):
@cli.command()
@click.argument("config", type=click.Path(path_type=Path, exists=True))
@click.option("-v", "--verbose", count=True)
@click.option("-l", "--logfile", type=click.Path(dir_okay=False, writable=True, path_type=str))
@click.option(
"-l", "--logfile", type=click.Path(dir_okay=False, writable=True, path_type=str)
)
@click.pass_context
def start(ctx: click.Context, config: Path, verbose: int, logfile: str):
"""start a job"""
logging_level = max(50 - (10 * verbose), 0) # Allows logging level to be between 0 and 50.
logging_level = max(
50 - (10 * verbose), 0
) # Allows logging level to be between 0 and 50.
logging_configuration = {
"handlers": [
{
"sink": logfile or sys.stdout,
"level": logging_level
}
],
"extra": {}
"handlers": [{"sink": logfile or sys.stdout, "level": logging_level}],
"extra": {},
}
log.configure(**logging_configuration)
log.debug(f"Starting logging with verbosity {logging_level}.")
log.debug(f"Starting logging with verbosity {logging_level}.")
ctx.obj.start(config)


Expand All @@ -62,7 +62,13 @@ def create(config: str, interactive: bool):

if interactive:
for key, description in [
("seeds", "add seeds?"),
("project_name", "Name of your project?"),
("db_url", "URL of your database?"),
("max_iteration", "How many iterations should be done?"),
(
"empty_seeds",
"What should happen if seeds are empty? Can be 'stop' or 'retry'",
),
("seed_file", "do you wish to read a file for seeds?"),
]:
args[key] = click.prompt(description)
Expand All @@ -76,7 +82,7 @@ def create(config: str, interactive: bool):
@cli.command()
def list(): # pylint: disable=W0622
"""list all plugins"""
click.echo("--- connectors ---", color="blue")
click.echo("--- connectors ---")
for connector in entry_points(group=CONNECTOR_GROUP):
click.echo(connector.name)
click.echo("--- strategies ---")
Expand Down
14 changes: 9 additions & 5 deletions spiderexpress/connectors/csv.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
"""A CSV-reading, network-rippin' connector for your testing purposes."""

import dataclasses
from typing import Dict, List, Optional, Tuple, Union

import pandas as pd

from spiderexpress.types import PlugIn, fromdict
from spiderexpress.types import PlugIn, from_dict

_cache = {}


@dataclasses.dataclass
class CSVConnectorConfiguration:
"""Configuration items for the csv_connector."""
Expand All @@ -23,7 +25,7 @@ def csv_connector(
) -> Tuple[pd.DataFrame, pd.DataFrame]:
"""The CSV connector!"""
if isinstance(configuration, dict):
configuration = fromdict(CSVConnectorConfiguration, configuration)
configuration = from_dict(CSVConnectorConfiguration, configuration)

if configuration.cache:
if configuration.edge_list_location not in _cache:
Expand Down Expand Up @@ -61,9 +63,11 @@ def csv_connector(

return (
edge_return,
nodes.loc[nodes.name.isin(node_ids), :]
if nodes is not None
else pd.DataFrame(),
(
nodes.loc[nodes.name.isin(node_ids), :]
if nodes is not None
else pd.DataFrame()
),
)


Expand Down
Loading
Loading