Skip to content

Commit

Permalink
Add ability to skip toplevel keys & only sync whitelisted releases (#918
Browse files Browse the repository at this point in the history
)

* Add release sync script to production dockerfile.

* Add the ability to skip existing toplevel keys.

* Implement an optional whitelist
  • Loading branch information
bhearsum authored May 14, 2019
1 parent deb64cf commit 5d76fea
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 3 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ RUN apt-get install -q --yes gcc && \
COPY auslib/ /app/auslib/
COPY ui/ /app/ui/
COPY uwsgi/ /app/uwsgi/
COPY scripts/manage-db.py scripts/run-batch-deletes.sh scripts/run.sh scripts/reset-stage-db.sh scripts/get-prod-db-dump.py /app/scripts/
COPY scripts/manage-db.py scripts/run-batch-deletes.sh scripts/run.sh scripts/reset-stage-db.sh scripts/get-prod-db-dump.py scripts/releases-history-to-gcs.py /app/scripts/
COPY version.json /app/

WORKDIR /app
Expand Down
28 changes: 26 additions & 2 deletions scripts/releases-history-to-gcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import base64
from collections import defaultdict
import hashlib
import os
import ssl
import sys

Expand Down Expand Up @@ -87,7 +88,7 @@ async def process_release(r, session, balrog_api, bucket, sem, loop):
return releases, uploads


async def main(loop, balrog_api, bucket_name, limit_to, concurrency):
async def main(loop, balrog_api, bucket_name, limit_to, concurrency, skip_toplevel_keys, whitelist):
# limit the number of connections at any one time
sem = asyncio.Semaphore(concurrency)
releases = defaultdict(int)
Expand All @@ -100,10 +101,28 @@ async def main(loop, balrog_api, bucket_name, limit_to, concurrency):
storage = Storage(session=session)
bucket = storage.get_bucket(bucket_name)

toplevel_keys = []
if skip_toplevel_keys:
batch = await storage.list_objects(bucket_name, params={"delimiter": "/"})
while batch:
toplevel_keys.extend([name.rstrip("/") for name in batch.get("prefixes")])
if batch.get("nextPageToken"):
batch = await storage.list_objects(bucket_name, params={"delimiter": "/", "pageToken": batch["nextPageToken"]})
else:
batch = None

to_process = (await (await session.get("{}/releases".format(balrog_api))).json())["releases"]
for r in to_process:
release_name = r["name"]

if skip_toplevel_keys and release_name in toplevel_keys:
print("Skipping {} because it is an existing toplevel key".format(release_name), flush=True)
continue

if whitelist and release_name not in whitelist:
print("Skipping {} because it is not in the whitelist".format(release_name), flush=True)
continue

if limit_to and n >= limit_to:
break

Expand Down Expand Up @@ -143,6 +162,11 @@ async def main(loop, balrog_api, bucket_name, limit_to, concurrency):
concurrency = int(sys.argv[4])
else:
concurrency = 5

skip_toplevel_keys = bool(int(os.environ.get("SKIP_TOPLEVEL_KEYS", True)))
whitelist = os.environ.get("ONLY_SYNC_RELEASES", None)
if whitelist:
whitelist = whitelist.split()
loop = asyncio.get_event_loop()
ignore_aiohttp_ssl_error(loop)
loop.run_until_complete(main(loop, balrog_api, bucket_name, limit_to, concurrency))
loop.run_until_complete(main(loop, balrog_api, bucket_name, limit_to, concurrency, skip_toplevel_keys, whitelist))

0 comments on commit 5d76fea

Please sign in to comment.