Skip to content

Commit c7f8da5

Browse files
authored
Merge pull request #5 from LCOGT/feature/implement_median
Feature/implement median
2 parents 5b0fd9d + 2cdf3f6 commit c7f8da5

File tree

4 files changed

+122
-1
lines changed

4 files changed

+122
-1
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,3 +158,6 @@ cython_debug/
158158
# and can be added to the global gitignore or merged into this file. For a more nuclear
159159
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
160160
#.idea/
161+
162+
# vscode
163+
.vscode

datalab/datalab_session/data_operations/median.py

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,16 @@
1+
from io import BytesIO
2+
import logging
3+
import os
4+
import tempfile
5+
6+
import numpy as np
7+
from astropy.io import fits
8+
19
from datalab.datalab_session.data_operations.data_operation import BaseDataOperation
10+
from datalab.datalab_session.util import store_fits_output, get_archive_from_basename
11+
12+
log = logging.getLogger()
13+
log.setLevel(logging.INFO)
214

315

416
class Median(BaseDataOperation):
@@ -31,4 +43,62 @@ def wizard_description():
3143
}
3244

3345
def operate(self):
34-
pass
46+
input_files = self.input_data.get('input_files', [])
47+
file_count = len(input_files)
48+
49+
if file_count == 0:
50+
return { 'output_files': [] }
51+
52+
log.info(f'Executing median operation on {file_count} files')
53+
54+
with tempfile.TemporaryDirectory() as temp_dir:
55+
memmap_paths = []
56+
57+
for index, file_info in enumerate(input_files):
58+
basename = file_info.get('basename', 'No basename found')
59+
archive_record = get_archive_from_basename(basename)
60+
61+
try:
62+
fits_url = archive_record[0].get('url', 'No URL found')
63+
except IndexError:
64+
continue
65+
66+
with fits.open(fits_url, use_fsspec=True) as hdu_list:
67+
data = hdu_list['SCI'].data
68+
memmap_path = os.path.join(temp_dir, f'memmap_{index}.dat')
69+
memmap_array = np.memmap(memmap_path, dtype=data.dtype, mode='w+', shape=data.shape)
70+
memmap_array[:] = data[:]
71+
memmap_paths.append(memmap_path)
72+
73+
self.set_percent_completion(index / file_count)
74+
75+
image_data_list = [
76+
np.memmap(path, dtype=np.float32, mode='r', shape=memmap_array.shape)
77+
for path in memmap_paths
78+
]
79+
80+
# Crop fits image data to be the same shape then stack
81+
min_shape = min(arr.shape for arr in image_data_list)
82+
cropped_data_list = [arr[:min_shape[0], :min_shape[1]] for arr in image_data_list]
83+
stacked_data = np.stack(cropped_data_list, axis=2)
84+
85+
# Calculate a Median along the z axis
86+
median = np.median(stacked_data, axis=2)
87+
88+
cache_key = self.generate_cache_key()
89+
header = fits.Header([('KEY', cache_key)])
90+
primary_hdu = fits.PrimaryHDU(header=header)
91+
image_hdu = fits.ImageHDU(median)
92+
hdu_list = fits.HDUList([primary_hdu, image_hdu])
93+
94+
fits_buffer = BytesIO()
95+
hdu_list.writeto(fits_buffer)
96+
fits_buffer.seek(0)
97+
98+
# Write the HDU List to the output FITS file in the bucket
99+
response = store_fits_output(cache_key, fits_buffer)
100+
101+
# TODO: No output yet, need to build a thumbnail service
102+
output = {'output_files': []}
103+
self.set_percent_completion(file_count / file_count)
104+
self.set_output(output)

datalab/datalab_session/util.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import requests
2+
import logging
3+
4+
import boto3
5+
6+
from django.conf import settings
7+
8+
log = logging.getLogger()
9+
log.setLevel(logging.INFO)
10+
11+
def store_fits_output(item_key: str, fits_buffer: object) -> object:
12+
"""
13+
Stores a fits into the operation bucket in S3
14+
15+
Keyword Arguements:
16+
item_key -- name under which to store the fits file
17+
fits_buffer -- the fits file to add to the bucket
18+
"""
19+
log.info(f'Adding {item_key} to {settings.DATALAB_OPERATION_BUCKET}')
20+
21+
s3 = boto3.resource('s3')
22+
response = s3.Bucket(settings.DATALAB_OPERATION_BUCKET).put_object(Key = item_key, Body = fits_buffer.getvalue())
23+
return response
24+
25+
def get_archive_from_basename(basename: str) -> dict:
26+
"""
27+
Queries and returns an archive file from the Archive
28+
29+
Keyword Arguements:
30+
basename -- name to query
31+
"""
32+
query_params = {'basename_exact': basename }
33+
34+
response = requests.get(settings.ARCHIVE_API + '/frames/', params=query_params)
35+
36+
try:
37+
image_data = response.json()
38+
results = image_data.get('results', None)
39+
except IndexError:
40+
log.error(f"No image found with specified basename: {basename}")
41+
raise FileNotFoundError
42+
43+
return results

datalab/settings.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,11 @@ def get_list_from_env(variable, default=None):
130130
# AdminMiddleware is enabled. The default value is 'default'.
131131
DRAMATIQ_TASKS_DATABASE = 'default'
132132

133+
# AWS S3 Bitbucket
134+
DATALAB_OPERATION_BUCKET = os.getenv('DATALAB_OPERATION_BUCKET', 'datalab-operation-output-bucket')
135+
136+
# Datalab Archive
137+
ARCHIVE_API = os.getenv('ARCHIVE_API', 'https://datalab-archive.photonranch.org')
133138

134139
# Database
135140
# https://docs.djangoproject.com/en/4.2/ref/settings/#databases

0 commit comments

Comments
 (0)