Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 37 additions & 4 deletions mcserver/tasks.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import json
import requests
import time
from http import HTTPStatus
from django.conf import settings
from django.core.files import File
Expand Down Expand Up @@ -58,10 +59,10 @@ def download_session_archive(self, session_id, user_id=None):
try:
session_dir_path = SessionDirectoryConstructor().build(session_id)
session_zip_path = zipdir(session_dir_path)
with open(session_zip_path, "rb") as archive:
log = DownloadLog.objects.create(task_id=str(self.request.id), user_id=user_id)
log.media.save(os.path.basename(session_zip_path), archive)
os.remove(session_zip_path)

create_download_log(session_zip_path, self.request.id, user_id)
os_remove_with_retry(session_zip_path)

except Exception as e:
# Delete files and send the traceback to Sentry if something went wrong
if session_dir_path and os.path.isfile(session_dir_path):
Expand Down Expand Up @@ -197,6 +198,38 @@ def submit_cloudwatch_metrics():
from mcserver.utils import submit_number_of_pending_trials_to_cloudwatch
submit_number_of_pending_trials_to_cloudwatch()

# Helper functions
def create_download_log(zip_path, task_id, user_id,
max_retries=5, backoff=0.1):
archive = None
for attempt in range(max_retries):
try:
archive = open(zip_path, "rb")
break

except (FileNotFoundError, PermissionError, OSError) as e:
if attempt == max_retries - 1:
raise
time.sleep(backoff * (2**attempt))

if archive is not None:
with archive:
log = DownloadLog.objects.create(task_id=task_id, user_id=user_id)
log.media.save(os.path.basename(zip_path), archive)

def os_remove_with_retry(path, max_retries=5, backoff=0.1):
"""
os.remove(path) with exponential backoff for robustness in cloud
"""
for attempt in range(max_retries):
try:
os.remove(path)
return

except (FileNotFoundError, PermissionError, OSError) as e:
if attempt == max_retries - 1:
raise
time.sleep(backoff * (2**attempt))

# TODO: temporary disabled - need testing
# @shared_task
Expand Down
54 changes: 44 additions & 10 deletions mcserver/zipsession_v2.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import pathlib
import shutil
import time
import pickle
import boto3
import zipfile
Expand Down Expand Up @@ -316,16 +317,49 @@ def zipdir(dir_path, delete_directory_after_zip=True):
if os.path.isfile(zip_path):
os.remove(zip_path)

zip_file = zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED)
for root, dirs, files in os.walk(dir_path):
for f in files:
zip_file.write(
os.path.join(root, f),
os.path.relpath(os.path.join(root, f), os.path.join(dir_path, '..'))
)
zip_file.close()
zipdir_contents_with_retry(dir_path, zip_path)

if delete_directory_after_zip and os.path.exists(dir_path):
shutil.rmtree(dir_path)
if delete_directory_after_zip:
rmtree_with_retry(dir_path)

return zip_path

# Helper functions
def rmtree_with_retry(path, max_retries=5, backoff=0.1):
"""
Retries shutil.rmtree() to handle both race conditions inherent in the
function itself (prior to Python 3.13) and in containers with possible
file system delays or other race conditions.
"""
for attempt in range(max_retries):
try:
shutil.rmtree(path)
return

except (FileNotFoundError, PermissionError, OSError) as e:
if attempt == max_retries - 1:
raise
time.sleep(backoff * (2**attempt))

def zipdir_contents_with_retry(dir_path, zip_path, max_retries=5, backoff=0.1):
"""
Given a source dir_path and a target zip_path, zip the folder with
retrying with exponential backoff to avoid temporary issues with
cloud storage.
"""
for attempt in range(max_retries):
try:
zip_file = zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED)
for root, dirs, files in os.walk(dir_path):
for f in files:
zip_file.write(
os.path.join(root, f),
os.path.relpath(os.path.join(root, f), os.path.join(dir_path, '..'))
)
zip_file.close()
return

except (FileNotFoundError, PermissionError, OSError) as e:
if attempt == max_retries - 1:
raise
time.sleep(backoff * (2 ** attempt))
Loading