From 2d3f3445d00d54a5a516a98bf3a4a1029a7b8a0c Mon Sep 17 00:00:00 2001 From: nsthorat Date: Thu, 6 Jul 2023 16:34:02 -0400 Subject: [PATCH 1/7] save --- Dockerfile | 3 +++ src/server.py | 11 +++++++++++ 2 files changed, 14 insertions(+) diff --git a/Dockerfile b/Dockerfile index 8c728d9d4..d68612487 100644 --- a/Dockerfile +++ b/Dockerfile @@ -27,4 +27,7 @@ COPY /web/blueprint/build ./web/blueprint/build # Copy python files. COPY /src ./src/ +# Copy the entrypoint file. +COPY docker_entrypoint.sh . + CMD ["uvicorn", "src.server:app", "--host", "0.0.0.0", "--port", "5432"] diff --git a/src/server.py b/src/server.py index 2a7f841de..970517ff9 100644 --- a/src/server.py +++ b/src/server.py @@ -2,6 +2,7 @@ import logging import os +from contextlib import asynccontextmanager from typing import Any from fastapi import APIRouter, FastAPI @@ -53,6 +54,16 @@ def custom_generate_unique_id(route: APIRoute) -> str: app.mount('/', StaticFiles(directory=os.path.join(DIST_PATH), html=True, check_dir=False)) +@asynccontextmanager +async def lifespan(app: FastAPI): + """The lifspan hook for the server.""" + # Setup. + + yield + + # Teardown. + + @app.on_event('shutdown') async def shutdown_event() -> None: """Kill the task manager when FastAPI shuts down.""" From 92f8a90c44dd7dfe0b3d06e2ca63c4abeb498ba9 Mon Sep 17 00:00:00 2001 From: nsthorat Date: Fri, 7 Jul 2023 11:52:10 -0400 Subject: [PATCH 2/7] save --- .env | 12 +++++++++++ Dockerfile | 9 -------- README.md | 24 ++++++++++++++++++++-- scripts/__init__.py | 0 scripts/deploy_hf.py | 45 +++++++++++++++++++++++++++++----------- src/router_dataset.py | 36 ++------------------------------ src/server.py | 48 +++++++++++++++++++++++++++++++++++-------- src/utils.py | 37 +++++++++++++++++++++++++++++++++ 8 files changed, 146 insertions(+), 65 deletions(-) create mode 100644 scripts/__init__.py diff --git a/.env b/.env index b361ad2d1..e06e7fc3e 100644 --- a/.env +++ b/.env @@ -17,3 +17,15 @@ DUCKDB_USE_VIEWS=0 # Get key from https://platform.openai.com/account/api-keys # OPENAI_API_KEY= + +# For authenticating with HuggingFace to read private data from the hub from the huggingface +# demo. +# HF_USERNAME= +# https://huggingface.co/settings/tokens +# HF_ACCESS_TOKEN= + +# The repo to use for the huggingface demo. +# HF_STAGING_DEMO_REPO='HF_ORG/HF_REPO_NAME' + +# To sync data from huggingface before the server boots. +# LILAC_DL_DATA_FROM_HF_SPACE='HF_ORG/HF_REPO_NAME' diff --git a/Dockerfile b/Dockerfile index d68612487..7a2a6a50b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,12 +12,6 @@ WORKDIR /server COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt -# Copy the data to /data, the HF persistent storage. We do this after pip install to avoid -# re-installing dependencies if the data changes, which is likely more often. -WORKDIR / -COPY /data /data -WORKDIR /server - COPY .env . COPY LICENSE . @@ -27,7 +21,4 @@ COPY /web/blueprint/build ./web/blueprint/build # Copy python files. COPY /src ./src/ -# Copy the entrypoint file. -COPY docker_entrypoint.sh . - CMD ["uvicorn", "src.server:app", "--host", "0.0.0.0", "--port", "5432"] diff --git a/README.md b/README.md index 452f859e4..9cb4f1948 100644 --- a/README.md +++ b/README.md @@ -38,13 +38,33 @@ Details can be found at [Managing Spaces with Github Actions](https://huggingfac We use the HuggingFace git server, [follow the instructions](https://huggingface.co/docs/hub/repositories-getting-started) to use your git SSH keys to talk to HuggingFace. +###### Staging demo + +Make sure you have created a HuggingFace space: [huggingface.co/spaces](https://huggingface.co/spaces) + +Set .env.local environment variables so you can upload data to the soace: + +```sh +# The repo to use for the huggingface demo. +HF_STAGING_DEMO_REPO='lilacai/your-space' +# To authenticate with HuggingFace for uploading to the space. +HF_USERNAME='your-username' +``` + +Set the variables on the HuggingFace space from the UI to authenticate the binary running on HuggingFace to read private space data: + +- `LILAC_DL_HF_SPACE_DATA`: lilacai/your-space +- `HF_ACCESS_TOKEN`: yourtoken + +NOTE: `HF_ACCESS_TOKEN` can be generated from [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens). Create a read-only token for this step. + To deploy to huggingface: ``` poetry run python -m scripts.deploy_hf \ - --hf_username=$HF_USERNAME \ - --hf_space=$HF_ORG/$HF_SPACE \ --dataset=$DATASET_NAMESPACE/$DATASET_NAME + +# --hf_username and --hf_space are optional and can override the ENV for local uploading. ``` #### Deployment diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/scripts/deploy_hf.py b/scripts/deploy_hf.py index 7c28927d0..af31a8322 100644 --- a/scripts/deploy_hf.py +++ b/scripts/deploy_hf.py @@ -1,24 +1,25 @@ """Deploy to a huggingface space.""" import os import subprocess +from typing import Optional import click from huggingface_hub import HfApi +from src.config import CONFIG, data_path +from src.utils import get_dataset_output_dir + HF_SPACE_DIR = 'hf_spaces' @click.command() @click.option( - '--hf_username', - help='The huggingface username to use to authenticate for the space.', - type=str, - required=True) + '--hf_username', help='The huggingface username to use to authenticate for the space.', type=str) @click.option( '--hf_space', - help='The huggingface space. Should be formatted like `SPACE_ORG/SPACE_NAME`', - type=str, - required=True) + help='The huggingface space. Defaults to env.HF_STAGING_DEMO_REPO. ' + 'Should be formatted like `SPACE_ORG/SPACE_NAME`.', + type=str) @click.option( '--skip_build', help='Skip building the web server TypeScript. ' @@ -26,17 +27,27 @@ type=bool, default=False) @click.option('--dataset', help='The name of a dataset to upload', type=str, multiple=True) -def main(hf_username: str, hf_space: str, dataset: list[str], skip_build: bool) -> None: +def main(hf_username: Optional[str], hf_space: Optional[str], dataset: list[str], + skip_build: bool) -> None: """Generate the huggingface space app.""" + hf_username = hf_username or CONFIG['HF_USERNAME'] + if not hf_username: + raise ValueError('Must specify --hf_username or set env.HF_USERNAME') + + hf_space = hf_space or CONFIG['HF_STAGING_DEMO_REPO'] + if not hf_space: + raise ValueError('Must specify --hf_space or set env.HF_STAGING_DEMO_REPO') + # Upload datasets to HuggingFace. # NOTE(nsthorat): This currently doesn't write to persistent storage and does not work because of # a bug in HuggingFace. hf_api = HfApi() for d in dataset: - dataset_path = os.path.join('data', 'datasets', d) + namespace, name = d.split('/') + hf_api.upload_folder( - folder_path=os.path.abspath(dataset_path), - path_in_repo='/' + dataset_path, + folder_path=get_dataset_output_dir(data_path(), namespace, name), + path_in_repo=get_dataset_output_dir('data', namespace, name), repo_id=hf_space, repo_type='space', # Delete all data on the server. @@ -54,14 +65,24 @@ def main(hf_username: str, hf_space: str, dataset: list[str], skip_build: bool) run(f'poetry export --without-hashes > {repo_basedir}/requirements.txt') + # Create a .gitignore to avoid uploading unnecessary files. + with open(f'{repo_basedir}/.gitignore', 'w') as f: + f.write("""**/__pycache__ +**/*.pyc +**/*.pyo +**/*.pyd +**/*_test.py +""") + # Copy source code. copy_dirs = ['src', 'web/blueprint/build'] for dir in copy_dirs: + run(f'rm -rf {repo_basedir}/{dir}') run(f'mkdir -p {repo_basedir}/{dir}') run(f'cp -vaR ./{dir}/* {repo_basedir}/{dir}') # Copy a subset of root files. - copy_files = ['.env', 'Dockerfile', 'LICENSE'] + copy_files = ['.dockerignore', '.env', 'Dockerfile', 'LICENSE'] for file in copy_files: run(f'cp ./{file} {repo_basedir}/{file}') diff --git a/src/router_dataset.py b/src/router_dataset.py index 723a842cc..059320eae 100644 --- a/src/router_dataset.py +++ b/src/router_dataset.py @@ -1,5 +1,4 @@ """Router for the dataset database.""" -import os from typing import Optional, Sequence, Union, cast from urllib.parse import unquote @@ -39,7 +38,7 @@ ) from .signals.substring_search import SubstringSignal from .tasks import TaskId, task_manager -from .utils import DATASETS_DIR_NAME +from .utils import DatasetInfo, list_datasets router = APIRouter(route_class=RouteErrorHandler) @@ -47,41 +46,10 @@ set_default_dataset_cls(DatasetDuckDB) -class DatasetInfo(BaseModel): - """Information about a dataset.""" - namespace: str - dataset_name: str - description: Optional[str] - - @router.get('/', response_model_exclude_none=True) def get_datasets() -> list[DatasetInfo]: """List the datasets.""" - datasets_path = os.path.join(data_path(), DATASETS_DIR_NAME) - # Skip if 'datasets' doesn't exist. - if not os.path.isdir(datasets_path): - return [] - - dataset_infos: list[DatasetInfo] = [] - for namespace in os.listdir(datasets_path): - dataset_dir = os.path.join(datasets_path, namespace) - # Skip if namespace is not a directory. - if not os.path.isdir(dataset_dir): - continue - if namespace.startswith('.'): - continue - - for dataset_name in os.listdir(dataset_dir): - # Skip if dataset_name is not a directory. - dataset_path = os.path.join(dataset_dir, dataset_name) - if not os.path.isdir(dataset_path): - continue - if dataset_name.startswith('.'): - continue - - dataset_infos.append(DatasetInfo(namespace=namespace, dataset_name=dataset_name)) - - return dataset_infos + return list_datasets(data_path()) class WebManifest(BaseModel): diff --git a/src/server.py b/src/server.py index 970517ff9..d06883d32 100644 --- a/src/server.py +++ b/src/server.py @@ -2,17 +2,21 @@ import logging import os -from contextlib import asynccontextmanager +import shutil +import subprocess from typing import Any from fastapi import APIRouter, FastAPI from fastapi.responses import ORJSONResponse from fastapi.routing import APIRoute from fastapi.staticfiles import StaticFiles +from huggingface_hub import snapshot_download from . import router_concept, router_data_loader, router_dataset, router_signal, router_tasks +from .config import CONFIG, data_path from .router_utils import RouteErrorHandler from .tasks import task_manager +from .utils import get_dataset_output_dir, list_datasets DIST_PATH = os.path.abspath(os.path.join('web', 'blueprint', 'build')) @@ -54,14 +58,42 @@ def custom_generate_unique_id(route: APIRoute) -> str: app.mount('/', StaticFiles(directory=os.path.join(DIST_PATH), html=True, check_dir=False)) -@asynccontextmanager -async def lifespan(app: FastAPI): - """The lifspan hook for the server.""" +@app.on_event('startup') +def startup() -> None: + """Download dataset files from the HF space that is uploaded before building the image.""" # Setup. - - yield - - # Teardown. + repo_id = CONFIG.get('LILAC_DL_DATA_FROM_HF_SPACE', None) + + if repo_id: + # Download the huggingface space data. This includes code and datasets, so we move the datasets + # alone to the data directory. + spaces_download_dir = os.path.join(data_path(), '.hf-spaces', repo_id) + snapshot_download( + repo_id=repo_id, + repo_type='space', + local_dir=spaces_download_dir, + local_dir_use_symlinks=False, + token=CONFIG['HF_ACCESS_TOKEN']) + + datasets = list_datasets(os.path.join(spaces_download_dir, 'data')) + for dataset in datasets: + spaces_dataset_output_dir = get_dataset_output_dir( + os.path.join(spaces_download_dir, 'data'), dataset.namespace, dataset.dataset_name) + persistent_output_dir = get_dataset_output_dir(data_path(), dataset.namespace, + dataset.dataset_name) + + shutil.rmtree(persistent_output_dir, ignore_errors=True) + print('~~~~moving', os.path.join(spaces_download_dir, dataset.namespace, + dataset.dataset_name), 'to', persistent_output_dir) + shutil.move(spaces_dataset_output_dir, persistent_output_dir) + + run('ls -al') + run(f'ls {data_path()}') + + +def run(cmd: str) -> subprocess.CompletedProcess[bytes]: + """Run a command and return the result.""" + return subprocess.run(cmd, shell=True, check=True) @app.on_event('shutdown') diff --git a/src/utils.py b/src/utils.py index eb99ceaba..79b13ca35 100644 --- a/src/utils.py +++ b/src/utils.py @@ -106,6 +106,43 @@ def get_dataset_output_dir(base_dir: Union[str, pathlib.Path], namespace: str, return os.path.join(get_datasets_dir(base_dir), namespace, dataset_name) +class DatasetInfo(BaseModel): + """Information about a dataset.""" + namespace: str + dataset_name: str + description: Optional[str] + + +def list_datasets(base_dir: Union[str, pathlib.Path]) -> list[DatasetInfo]: + """List the datasets in a data directory.""" + datasets_path = get_datasets_dir(base_dir) + + # Skip if 'datasets' doesn't exist. + if not os.path.isdir(datasets_path): + return [] + + dataset_infos: list[DatasetInfo] = [] + for namespace in os.listdir(datasets_path): + dataset_dir = os.path.join(datasets_path, namespace) + # Skip if namespace is not a directory. + if not os.path.isdir(dataset_dir): + continue + if namespace.startswith('.'): + continue + + for dataset_name in os.listdir(dataset_dir): + # Skip if dataset_name is not a directory. + dataset_path = os.path.join(dataset_dir, dataset_name) + if not os.path.isdir(dataset_path): + continue + if dataset_name.startswith('.'): + continue + + dataset_infos.append(DatasetInfo(namespace=namespace, dataset_name=dataset_name)) + + return dataset_infos + + class CopyRequest(BaseModel): """A request to copy a file from source to destination path. Used to copy media files to GCS.""" from_path: str From 2a0661e0563b1e764e04c546cd80c780866c887f Mon Sep 17 00:00:00 2001 From: nsthorat Date: Sun, 9 Jul 2023 18:40:56 -0400 Subject: [PATCH 3/7] save --- .env | 2 +- scripts/deploy_hf.py | 18 +++++++++--------- src/server.py | 20 +++++++++++--------- 3 files changed, 21 insertions(+), 19 deletions(-) diff --git a/.env b/.env index a67ad5c57..bf277f11a 100644 --- a/.env +++ b/.env @@ -29,4 +29,4 @@ DUCKDB_USE_VIEWS=0 # Get a token from https://huggingface.co/settings/tokens # HF_ACCESS_TOKEN= # To sync data from huggingface before the server boots. -# LILAC_DL_DATA_FROM_HF_SPACE='HF_ORG/HF_REPO_NAME' +# LILAC_DATA_FROM_HF_SPACE='HF_ORG/HF_REPO_NAME' diff --git a/scripts/deploy_hf.py b/scripts/deploy_hf.py index af31a8322..e1a83a751 100644 --- a/scripts/deploy_hf.py +++ b/scripts/deploy_hf.py @@ -65,15 +65,6 @@ def main(hf_username: Optional[str], hf_space: Optional[str], dataset: list[str] run(f'poetry export --without-hashes > {repo_basedir}/requirements.txt') - # Create a .gitignore to avoid uploading unnecessary files. - with open(f'{repo_basedir}/.gitignore', 'w') as f: - f.write("""**/__pycache__ -**/*.pyc -**/*.pyo -**/*.pyd -**/*_test.py -""") - # Copy source code. copy_dirs = ['src', 'web/blueprint/build'] for dir in copy_dirs: @@ -86,6 +77,15 @@ def main(hf_username: Optional[str], hf_space: Optional[str], dataset: list[str] for file in copy_files: run(f'cp ./{file} {repo_basedir}/{file}') + # Create a .gitignore to avoid uploading unnecessary files. + with open(f'{repo_basedir}/.gitignore', 'w') as f: + f.write("""**/__pycache__ +**/*.pyc +**/*.pyo +**/*.pyd +**/*_test.py +""") + # Create the huggingface README. with open(f'{repo_basedir}/README.md', 'w') as f: f.write("""--- diff --git a/src/server.py b/src/server.py index d06883d32..6f17744f3 100644 --- a/src/server.py +++ b/src/server.py @@ -7,7 +7,7 @@ from typing import Any from fastapi import APIRouter, FastAPI -from fastapi.responses import ORJSONResponse +from fastapi.responses import FileResponse, ORJSONResponse from fastapi.routing import APIRoute from fastapi.staticfiles import StaticFiles from huggingface_hub import snapshot_download @@ -54,15 +54,22 @@ def custom_generate_unique_id(route: APIRoute) -> str: app.include_router(v1_router, prefix='/api/v1') + +@app.api_route('/{path_name}', include_in_schema=False) +def catch_all() -> FileResponse: + """Catch any other requests and serve index for HTML5 history.""" + return FileResponse(path=os.path.join(DIST_PATH, 'index.html')) + + # Serve static files in production mode. -app.mount('/', StaticFiles(directory=os.path.join(DIST_PATH), html=True, check_dir=False)) +app.mount('/', StaticFiles(directory=DIST_PATH, html=True, check_dir=False)) @app.on_event('startup') def startup() -> None: - """Download dataset files from the HF space that is uploaded before building the image.""" + """Download dataset files from the HF space that was uploaded before building the image.""" # Setup. - repo_id = CONFIG.get('LILAC_DL_DATA_FROM_HF_SPACE', None) + repo_id = CONFIG.get('LILAC_DATA_FROM_HF_SPACE', None) if repo_id: # Download the huggingface space data. This includes code and datasets, so we move the datasets @@ -83,13 +90,8 @@ def startup() -> None: dataset.dataset_name) shutil.rmtree(persistent_output_dir, ignore_errors=True) - print('~~~~moving', os.path.join(spaces_download_dir, dataset.namespace, - dataset.dataset_name), 'to', persistent_output_dir) shutil.move(spaces_dataset_output_dir, persistent_output_dir) - run('ls -al') - run(f'ls {data_path()}') - def run(cmd: str) -> subprocess.CompletedProcess[bytes]: """Run a command and return the result.""" From c3ea658e52edcd39200832aae93b853d0f799c52 Mon Sep 17 00:00:00 2001 From: nsthorat Date: Sun, 9 Jul 2023 19:31:17 -0400 Subject: [PATCH 4/7] save --- .env | 6 +++++- .gitignore | 2 -- README.md | 3 ++- scripts/deploy_hf.py | 9 +++++++-- src/server.py | 4 ++-- .../lib/components/datasetView/SearchPanel.svelte | 14 ++++++++++++++ web/blueprint/src/routes/+layout.svelte | 5 +++++ 7 files changed, 35 insertions(+), 8 deletions(-) diff --git a/.env b/.env index bf277f11a..ba2995c32 100644 --- a/.env +++ b/.env @@ -20,13 +20,17 @@ DUCKDB_USE_VIEWS=0 # Get key from https://makersuite.google.com/app/apikey # PALM_API_KEY= +# HuggingFace demos: machine that uploads to HuggingFace. + # For authenticating with HuggingFace to deploy to a Space. # HF_USERNAME= # The default repo to deploy to for a staging demo. Can be overridden by a command line flag. # HF_STAGING_DEMO_REPO='HF_ORG/HF_REPO_NAME' +# HuggingFace demos: HuggingFace machine that runs the demo. + # To read private uploaded data from the server (running on HF spaces) for the demo. # Get a token from https://huggingface.co/settings/tokens # HF_ACCESS_TOKEN= # To sync data from huggingface before the server boots. -# LILAC_DATA_FROM_HF_SPACE='HF_ORG/HF_REPO_NAME' +# HF_DATA_FROM_SPACE='HF_ORG/HF_REPO_NAME' diff --git a/.gitignore b/.gitignore index e191b8620..fb437b73c 100644 --- a/.gitignore +++ b/.gitignore @@ -8,8 +8,6 @@ cloned_repos/ py_coverage_html/ *.deps.txt requirements.txt -# Cloned huggingface spaces repos for pushing demos. -hf_spaces/ # Mac OS. .DS_Store diff --git a/README.md b/README.md index 9cb4f1948..e6c8d8efd 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,8 @@ HF_STAGING_DEMO_REPO='lilacai/your-space' HF_USERNAME='your-username' ``` -Set the variables on the HuggingFace space from the UI to authenticate the binary running on HuggingFace to read private space data: +Set the environment variables on the HuggingFace space from the HuggingFace Space Settings UI to +authenticate the binary running on HuggingFace to read private space data: - `LILAC_DL_HF_SPACE_DATA`: lilacai/your-space - `HF_ACCESS_TOKEN`: yourtoken diff --git a/scripts/deploy_hf.py b/scripts/deploy_hf.py index e1a83a751..a1861b523 100644 --- a/scripts/deploy_hf.py +++ b/scripts/deploy_hf.py @@ -9,7 +9,7 @@ from src.config import CONFIG, data_path from src.utils import get_dataset_output_dir -HF_SPACE_DIR = 'hf_spaces' +HF_SPACE_DIR = os.path.join(data_path(), '.hf_spaces') @click.command() @@ -23,7 +23,7 @@ @click.option( '--skip_build', help='Skip building the web server TypeScript. ' - 'Useful if you are only changing python or are only changing data.', + 'Useful to speed up the build if you are only changing python or data.', type=bool, default=False) @click.option('--dataset', help='The name of a dataset to upload', type=str, multiple=True) @@ -53,6 +53,7 @@ def main(hf_username: Optional[str], hf_space: Optional[str], dataset: list[str] # Delete all data on the server. delete_patterns='*') + # Build the web server Svelte & TypeScript. if not skip_build: run('sh ./scripts/build_server_prod.sh') @@ -63,6 +64,10 @@ def main(hf_username: Optional[str], hf_space: Optional[str], dataset: list[str] run(f'rm -rf {repo_basedir}') run(f'git clone https://{hf_username}@huggingface.co/spaces/{hf_space} {repo_basedir}') + # Clear out the repo. + run(f'rm -rf {repo_basedir}/*') + + # Export the requirements file so it can be pip installed in the docker container. run(f'poetry export --without-hashes > {repo_basedir}/requirements.txt') # Copy source code. diff --git a/src/server.py b/src/server.py index 6f17744f3..4e9f9ff11 100644 --- a/src/server.py +++ b/src/server.py @@ -69,12 +69,12 @@ def catch_all() -> FileResponse: def startup() -> None: """Download dataset files from the HF space that was uploaded before building the image.""" # Setup. - repo_id = CONFIG.get('LILAC_DATA_FROM_HF_SPACE', None) + repo_id = CONFIG.get('HF_DATA_FROM_SPACE', None) if repo_id: # Download the huggingface space data. This includes code and datasets, so we move the datasets # alone to the data directory. - spaces_download_dir = os.path.join(data_path(), '.hf-spaces', repo_id) + spaces_download_dir = os.path.join(data_path(), '.hf_spaces', repo_id) snapshot_download( repo_id=repo_id, repo_type='space', diff --git a/web/blueprint/src/lib/components/datasetView/SearchPanel.svelte b/web/blueprint/src/lib/components/datasetView/SearchPanel.svelte index 18fe7009b..a0d768ccd 100644 --- a/web/blueprint/src/lib/components/datasetView/SearchPanel.svelte +++ b/web/blueprint/src/lib/components/datasetView/SearchPanel.svelte @@ -252,6 +252,20 @@ } datasetViewStore.setSortOrder(sort?.order === 'ASC' ? 'DESC' : 'ASC'); }; + // const pageClickHandler = () => console.log('clicky'); + // onMount(() => { + // if (parent) { + // console.log(parent, '=', parent); + // window.parent = window; + // parent.addEventListener('click', pageClickHandler); + // } + + // return () => { + // if (parent) { + // parent.removeEventListener('click', pageClickHandler); + // } + // }; + // });
diff --git a/web/blueprint/src/routes/+layout.svelte b/web/blueprint/src/routes/+layout.svelte index 4f1d91e77..e43c36550 100644 --- a/web/blueprint/src/routes/+layout.svelte +++ b/web/blueprint/src/routes/+layout.svelte @@ -18,6 +18,11 @@ let showError: ApiError | undefined = undefined; onMount(() => { + // This fixes a cross-origin error when the app is embedding in an iframe. Some carbon + // components attach listeners to window.parent, which is not allowed in an iframe, so we set + // the parent to window. + window.parent = window; + urlHash.set(location.hash); history.pushState = function (_state, _unused, url) { if (url instanceof URL) { From fc66f83264f94118f1f7396a88745c01b72aad2b Mon Sep 17 00:00:00 2001 From: nsthorat Date: Sun, 9 Jul 2023 19:33:15 -0400 Subject: [PATCH 5/7] save --- src/server.py | 3 ++- .../lib/components/datasetView/SearchPanel.svelte | 14 -------------- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/src/server.py b/src/server.py index 4e9f9ff11..15c53c89e 100644 --- a/src/server.py +++ b/src/server.py @@ -68,7 +68,6 @@ def catch_all() -> FileResponse: @app.on_event('startup') def startup() -> None: """Download dataset files from the HF space that was uploaded before building the image.""" - # Setup. repo_id = CONFIG.get('HF_DATA_FROM_SPACE', None) if repo_id: @@ -89,6 +88,8 @@ def startup() -> None: persistent_output_dir = get_dataset_output_dir(data_path(), dataset.namespace, dataset.dataset_name) + # Huggingface doesn't let you selectively download files so we just copy the data directory + # out of the cloned space. shutil.rmtree(persistent_output_dir, ignore_errors=True) shutil.move(spaces_dataset_output_dir, persistent_output_dir) diff --git a/web/blueprint/src/lib/components/datasetView/SearchPanel.svelte b/web/blueprint/src/lib/components/datasetView/SearchPanel.svelte index a0d768ccd..18fe7009b 100644 --- a/web/blueprint/src/lib/components/datasetView/SearchPanel.svelte +++ b/web/blueprint/src/lib/components/datasetView/SearchPanel.svelte @@ -252,20 +252,6 @@ } datasetViewStore.setSortOrder(sort?.order === 'ASC' ? 'DESC' : 'ASC'); }; - // const pageClickHandler = () => console.log('clicky'); - // onMount(() => { - // if (parent) { - // console.log(parent, '=', parent); - // window.parent = window; - // parent.addEventListener('click', pageClickHandler); - // } - - // return () => { - // if (parent) { - // parent.removeEventListener('click', pageClickHandler); - // } - // }; - // });
From ff79ad7b3a158d4b27d4b8d646ebc1ed870b961c Mon Sep 17 00:00:00 2001 From: nsthorat Date: Mon, 10 Jul 2023 12:32:31 -0400 Subject: [PATCH 6/7] save --- Dockerfile | 6 +++++- README.md | 11 ++++++----- mypy.ini | 5 +++++ poetry.lock | 22 +++++++++++++++++++++- pyproject.toml | 1 + scripts/deploy_hf.py | 3 +-- 6 files changed, 39 insertions(+), 9 deletions(-) diff --git a/Dockerfile b/Dockerfile index 7a2a6a50b..e6a9569af 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,4 +21,8 @@ COPY /web/blueprint/build ./web/blueprint/build # Copy python files. COPY /src ./src/ -CMD ["uvicorn", "src.server:app", "--host", "0.0.0.0", "--port", "5432"] +CMD [ \ + "gunicorn", "src.server:app", \ + "--bind", "0.0.0.0:5432", \ + "-k", "uvicorn.workers.UvicornWorker" \ + ] diff --git a/README.md b/README.md index e6c8d8efd..4b611303c 100644 --- a/README.md +++ b/README.md @@ -40,6 +40,7 @@ We use the HuggingFace git server, [follow the instructions](https://huggingface ###### Staging demo +Create a HuggingFace space. Make sure you have created a HuggingFace space: [huggingface.co/spaces](https://huggingface.co/spaces) Set .env.local environment variables so you can upload data to the soace: @@ -51,14 +52,14 @@ HF_STAGING_DEMO_REPO='lilacai/your-space' HF_USERNAME='your-username' ``` -Set the environment variables on the HuggingFace space from the HuggingFace Space Settings UI to -authenticate the binary running on HuggingFace to read private space data: +- Generate a read-only token from [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens) which will be used as `HF_ACCESS_TOKEN` below. +- Open the HuggingFace space in your browser and click "Settings". +- Set these two environment variables from the settings UI to + authenticate the binary running on HuggingFace to read private space data: - `LILAC_DL_HF_SPACE_DATA`: lilacai/your-space - `HF_ACCESS_TOKEN`: yourtoken -NOTE: `HF_ACCESS_TOKEN` can be generated from [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens). Create a read-only token for this step. - To deploy to huggingface: ``` @@ -73,7 +74,7 @@ poetry run python -m scripts.deploy_hf \ To build the docker image: ```sh -./build_docker.sh +./scripts/build_docker.sh ``` To run the docker image locally: diff --git a/mypy.ini b/mypy.ini index 8bf6a9b15..6561d2d15 100644 --- a/mypy.ini +++ b/mypy.ini @@ -116,3 +116,8 @@ follow_imports = skip [mypy-google.generativeai.*] ignore_missing_imports = True follow_imports = skip + +[mypy-huggingface_hub.*] +ignore_missing_imports = True +follow_imports = skip + diff --git a/poetry.lock b/poetry.lock index 4efc8f454..f01b90575 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1887,6 +1887,26 @@ googleapis-common-protos = ">=1.5.5" grpcio = ">=1.56.0" protobuf = ">=4.21.6" +[[package]] +name = "gunicorn" +version = "20.1.0" +description = "WSGI HTTP Server for UNIX" +optional = false +python-versions = ">=3.5" +files = [ + {file = "gunicorn-20.1.0-py3-none-any.whl", hash = "sha256:9dcc4547dbb1cb284accfb15ab5667a0e5d1881cc443e0677b4882a4067a807e"}, + {file = "gunicorn-20.1.0.tar.gz", hash = "sha256:e0a968b5ba15f8a328fdfd7ab1fcb5af4470c28aaf7e55df02a99bc13138e6e8"}, +] + +[package.dependencies] +setuptools = ">=3.0" + +[package.extras] +eventlet = ["eventlet (>=0.24.1)"] +gevent = ["gevent (>=1.4.0)"] +setproctitle = ["setproctitle"] +tornado = ["tornado (>=0.2)"] + [[package]] name = "h11" version = "0.14.0" @@ -6211,4 +6231,4 @@ testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more [metadata] lock-version = "2.0" python-versions = "~3.9" -content-hash = "2ef9495d9487c43879081a097f41955de910032019a1369f8c946b36c68379e5" +content-hash = "3a2070f9d45f19db63333a16c0034a3584c07d9c3013728b1f90b27fb87a2cba" diff --git a/pyproject.toml b/pyproject.toml index 0b93623bc..75c87bed2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,7 @@ email-reply-parser = "^0.5.12" # For text statistics. textacy = "^0.13.0" +gunicorn = "^20.1.0" [tool.poetry.group.dev] # Deps for development. optional = true diff --git a/scripts/deploy_hf.py b/scripts/deploy_hf.py index a1861b523..809c05d12 100644 --- a/scripts/deploy_hf.py +++ b/scripts/deploy_hf.py @@ -62,7 +62,7 @@ def main(hf_username: Optional[str], hf_space: Optional[str], dataset: list[str] # Clone the HuggingFace spaces repo. repo_basedir = os.path.join(HF_SPACE_DIR, hf_space) run(f'rm -rf {repo_basedir}') - run(f'git clone https://{hf_username}@huggingface.co/spaces/{hf_space} {repo_basedir}') + run(f'git clone https://{hf_username}@huggingface.co/spaces/{hf_space} {repo_basedir} --depth 1') # Clear out the repo. run(f'rm -rf {repo_basedir}/*') @@ -73,7 +73,6 @@ def main(hf_username: Optional[str], hf_space: Optional[str], dataset: list[str] # Copy source code. copy_dirs = ['src', 'web/blueprint/build'] for dir in copy_dirs: - run(f'rm -rf {repo_basedir}/{dir}') run(f'mkdir -p {repo_basedir}/{dir}') run(f'cp -vaR ./{dir}/* {repo_basedir}/{dir}') From 53336b64721c9786888dda106a0ccc3786b91ab0 Mon Sep 17 00:00:00 2001 From: nsthorat Date: Mon, 10 Jul 2023 12:36:17 -0400 Subject: [PATCH 7/7] save --- README.md | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 4b611303c..d31e673d4 100644 --- a/README.md +++ b/README.md @@ -40,10 +40,10 @@ We use the HuggingFace git server, [follow the instructions](https://huggingface ###### Staging demo -Create a HuggingFace space. -Make sure you have created a HuggingFace space: [huggingface.co/spaces](https://huggingface.co/spaces) +1. Create a HuggingFace space. + Create a huggingface space from your browser: [huggingface.co/spaces](https://huggingface.co/spaces) -Set .env.local environment variables so you can upload data to the soace: +2. Set .env.local environment variables so you can upload data to the soace: ```sh # The repo to use for the huggingface demo. @@ -52,15 +52,17 @@ HF_STAGING_DEMO_REPO='lilacai/your-space' HF_USERNAME='your-username' ``` -- Generate a read-only token from [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens) which will be used as `HF_ACCESS_TOKEN` below. -- Open the HuggingFace space in your browser and click "Settings". -- Set these two environment variables from the settings UI to - authenticate the binary running on HuggingFace to read private space data: +3. Generate a read-only token from [huggingface.co/settings/tokens](https://huggingface.co/settings/tokens) which will be used as `HF_ACCESS_TOKEN` below. + +4. Open the HuggingFace space in your browser and click "Settings". + +5. Set these two environment variables from the settings UI to + authenticate the binary running on HuggingFace to read private space data: - `LILAC_DL_HF_SPACE_DATA`: lilacai/your-space - `HF_ACCESS_TOKEN`: yourtoken -To deploy to huggingface: +6: Deploy to your HuggingFace Space: ``` poetry run python -m scripts.deploy_hf \