Skip to content

Commit

Permalink
Merge branch 'develop' into add-ML-wisp-finder
Browse files Browse the repository at this point in the history
  • Loading branch information
bhilbert4 authored Jan 17, 2025
2 parents 52f148d + 3c7e1a4 commit dd91ace
Show file tree
Hide file tree
Showing 7 changed files with 250 additions and 67 deletions.
50 changes: 25 additions & 25 deletions jwql/jwql_monitors/monitor_filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,22 +49,27 @@
import numpy as np
from sqlalchemy.exc import DataError

from jwql.database.database_interface import engine
from jwql.database.database_interface import session
from jwql.database.database_interface import FilesystemCharacteristics
from jwql.database.database_interface import FilesystemGeneral
from jwql.database.database_interface import FilesystemInstrument
from jwql.database.database_interface import CentralStore
from jwql.utils.logging_functions import log_info, log_fail
from jwql.utils.permissions import set_permissions
from jwql.utils.constants import FILESYSTEM_MONITOR_SUBDIRS, FILE_SUFFIX_TYPES, FILTERS_PER_INSTRUMENT, INSTRUMENT_SERVICE_MATCH
from jwql.utils.constants import JWST_INSTRUMENT_NAMES, JWST_INSTRUMENT_NAMES_MIXEDCASE, JWST_INSTRUMENT_NAMES_MIXEDCASE
from jwql.utils.constants import ON_GITHUB_ACTIONS, ON_READTHEDOCS
from jwql.utils.utils import filename_parser
from jwql.utils.utils import get_config
from jwql.utils.monitor_utils import initialize_instrument_monitor, update_monitor_table
from jwql.utils.protect_module import lock_module
from jwql.website.apps.jwql.data_containers import get_instrument_proposals

if not ON_GITHUB_ACTIONS and not ON_READTHEDOCS:
# Need to set up django apps before we can access the models
import django # noqa: E402 (module level import not at top of file)
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "jwql.website.jwql_proj.settings")
django.setup()

# Import * is okay here because this module specifically only contains database models
# for this monitor
from jwql.website.apps.jwql.monitor_models.common import * # noqa: E402 (module level import not at top of file)

SETTINGS = get_config()
FILESYSTEM = SETTINGS['filesystem']
PROPRIETARY_FILESYSTEM = os.path.join(FILESYSTEM, 'proprietary')
Expand All @@ -74,6 +79,7 @@
PREVIEW_IMAGES = SETTINGS['preview_image_filesystem']
THUMBNAILS = SETTINGS['thumbnail_filesystem']
LOGS = SETTINGS['log_dir']
WORKING = SETTINGS['working']


def files_per_filter():
Expand Down Expand Up @@ -232,7 +238,8 @@ def get_area_stats(central_storage_dict):
'logs': LOGS,
'preview_images': PREVIEW_IMAGES,
'thumbnails': THUMBNAILS,
'all': CENTRAL}
'all': CENTRAL,
'working':WORKING}

counteddirs = []

Expand Down Expand Up @@ -368,7 +375,7 @@ def initialize_results_dicts():
A dictionary for the ``central_storage`` database table
"""

now = datetime.datetime.now()
now = datetime.datetime.now(datetime.timezone.utc)

general_results_dict = {}
general_results_dict['date'] = now
Expand Down Expand Up @@ -430,9 +437,9 @@ def update_central_store_database(central_storage_dict):
new_record['size'] = central_storage_dict[area]['size']
new_record['used'] = central_storage_dict[area]['used']
new_record['available'] = central_storage_dict[area]['available']
with engine.begin() as connection:
connection.execute(CentralStore.__table__.insert(), new_record)
session.close()

entry = CentralStorage(**new_record)
entry.save()


def update_characteristics_database(char_info):
Expand All @@ -447,7 +454,7 @@ def update_characteristics_database(char_info):
using that filter/pupil.
"""
logging.info('\tUpdating the characteristics database')
now = datetime.datetime.now()
now = datetime.datetime.now(datetime.timezone.utc)

# Add data to filesystem_instrument table
for instrument in ['nircam', 'niriss', 'nirspec', 'miri']:
Expand All @@ -458,11 +465,9 @@ def update_characteristics_database(char_info):
new_record['instrument'] = instrument
new_record['filter_pupil'] = optics
new_record['obs_per_filter_pupil'] = values
with engine.begin() as connection:
connection.execute(
FilesystemCharacteristics.__table__.insert(), new_record)

session.close()
entry = FilesystemCharacteristics(**new_record)
entry.save()


def update_database(general_results_dict, instrument_results_dict, central_storage_dict):
Expand All @@ -478,8 +483,8 @@ def update_database(general_results_dict, instrument_results_dict, central_stora
"""
logging.info('\tUpdating the database')

with engine.begin() as connection:
connection.execute(FilesystemGeneral.__table__.insert(), general_results_dict)
fs_general_entry = FilesystemGeneral(**general_results_dict)
fs_general_entry.save()

# Add data to filesystem_instrument table
for instrument in JWST_INSTRUMENT_NAMES:
Expand All @@ -493,13 +498,8 @@ def update_database(general_results_dict, instrument_results_dict, central_stora

# Protect against updated enum options that have not been propagated to
# the table definition
try:
with engine.begin() as connection:
connection.execute(FilesystemInstrument.__table__.insert(), new_record)
except DataError as e:
logging.error(e)

session.close()
fs_instrument_entry = FilesystemInstrument(**new_record)
fs_instrument_entry.save()


@lock_module
Expand Down
38 changes: 38 additions & 0 deletions jwql/tests/test_archive_database_update.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#! /usr/bin/env python

"""Tests for the ``archive_database_update`` module.
Authors
-------
- Bryan Hilbert
Use
---
These tests can be run via the command line (omit the ``-s`` to
suppress verbose output to stdout):
::
pytest -s test_archive_database_update.py
"""


import pytest

from jwql.website.apps.jwql import archive_database_update


def test_filter_rootnames():
"""Test the filtering of source-based level 2 files
"""
files = ['jw06434-c1021_s000001510_nircam_f444w-grismr.fits',
'jw01068004001_02102_00001_nrcb4_rate.fits',
'jw06434-c1021_t000_nircam_clear-f090w_segm.fits',
'jw06434-o001_t000_nircam_clear-f090w_segm.fits',
'jw02183117001_03103_00001-seg001_nrca1_rate.fits']

filtered = archive_database_update.filter_rootnames(files)
expected = ['jw01068004001_02102_00001_nrcb4_rate.fits',
'jw02183117001_03103_00001-seg001_nrca1_rate.fits']
assert filtered == expected
1 change: 1 addition & 0 deletions jwql/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,7 @@
FILE_PROG_ID_LEN = 5
FILE_SEG_LEN = 3
FILE_SOURCE_ID_LEN = 5
FILE_SOURCE_ID_LONG_LEN = 9
FILE_TARG_ID_LEN = 3
FILE_VISIT_GRP_LEN = 2
FILE_VISIT_LEN = 3
Expand Down
116 changes: 96 additions & 20 deletions jwql/website/apps/jwql/archive_database_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,32 +43,45 @@
import logging
import os
import argparse
import re

import numpy as np
import django

from django.apps import apps
from jwql.utils.protect_module import lock_module
from jwql.utils.constants import DEFAULT_MODEL_CHARFIELD

# These lines are needed in order to use the Django models in a standalone
# script (as opposed to code run as a result of a webpage request). If these
# lines are not run, the script will crash when attempting to import the
# Django models in the line below.
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "jwql.website.jwql_proj.settings")
django.setup()

from jwql.website.apps.jwql.models import Archive, Observation, Proposal, RootFileInfo # noqa
from jwql.utils.constants import JWST_INSTRUMENT_NAMES_MIXEDCASE # noqa
from jwql.utils.logging_functions import log_info, log_fail # noqa
from jwql.utils.monitor_utils import initialize_instrument_monitor # noqa
from jwql.utils.constants import MAST_QUERY_LIMIT # noqa
from jwql.utils.utils import filename_parser, filesystem_path, get_config # noqa
from jwql.website.apps.jwql.data_containers import create_archived_proposals_context # noqa
from jwql.website.apps.jwql.data_containers import get_instrument_proposals, get_filenames_by_instrument # noqa
from jwql.website.apps.jwql.data_containers import get_proposal_info, mast_query_filenames_by_instrument, mast_query_by_rootname # noqa

FILESYSTEM = get_config()['filesystem']
from jwql.utils.constants import (DEFAULT_MODEL_CHARFIELD,
FILE_PROG_ID_LEN,
FILE_AC_O_ID_LEN,
FILE_AC_CAR_ID_LEN,
FILE_SOURCE_ID_LONG_LEN,
FILE_TARG_ID_LEN,
JWST_INSTRUMENT_NAMES_MIXEDCASE,
MAST_QUERY_LIMIT,
ON_GITHUB_ACTIONS,
ON_READTHEDOCS
)
from jwql.utils.logging_functions import log_info, log_fail
from jwql.utils.monitor_utils import initialize_instrument_monitor
from jwql.utils.utils import filename_parser, filesystem_path, get_config
from jwql.website.apps.jwql.data_containers import create_archived_proposals_context
from jwql.website.apps.jwql.data_containers import get_instrument_proposals, get_filenames_by_instrument
from jwql.website.apps.jwql.data_containers import (get_proposal_info,
mast_query_filenames_by_instrument,
mast_query_by_rootname
)


if not ON_GITHUB_ACTIONS and not ON_READTHEDOCS:
# These lines are needed in order to use the Django models in a standalone
# script (as opposed to code run as a result of a webpage request). If these
# lines are not run, the script will crash when attempting to import the
# Django models in the line below.
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "jwql.website.jwql_proj.settings")
django.setup()

from jwql.website.apps.jwql.models import Archive, Observation, Proposal, RootFileInfo # noqa
FILESYSTEM = get_config()['filesystem']


@log_info
Expand Down Expand Up @@ -113,6 +126,11 @@ def get_updates(update_database):

# Get set of unique rootnames
all_rootnames = set(['_'.join(f.split('/')[-1].split('_')[:-1]) for f in filenames])

# Filter source-based level 2 files out of the rootnames and filenames
all_rootnames = filter_rootnames(all_rootnames)
filenames = filter_filenames(filenames, all_rootnames)

rootnames = []
for rootname in all_rootnames:
filename_dict = filename_parser(rootname)
Expand Down Expand Up @@ -510,6 +528,64 @@ def fill_empty_rootfileinfo(rootfileinfo_set):
logging.info(f'\tSaved {saved_rootfileinfos} Root File Infos')


def filter_filenames(fnames, roots):
"""Filter out filenames from ``fnames`` that don't match the names in ``roots``
Parameters
----------
fnames : list
List of filenames
roots : list
List of rootnames
Returns
-------
filtered_fnames : list
Filtered list of filenames
"""
filtered_fnames = []
for fname in fnames:
for root in roots:
if root in fname:
filtered_fnames.append(fname)
break
return filtered_fnames


def filter_rootnames(rootnames):
"""Filter out rootnames that we know can't be parsed by the filename_parser. We use this
custom filter here rather than within the filename parser itself because in archive_database_update
we can end up providing thousands of unrecognized filenames (e.g. source-based WFSS files) to
the filename parser, which would result in thousands of logging statments and massive log files.
This way, we filter out the rootnames that obviously won't be parsed before calling the
filename_parser with the rest. jw06434-c1021_s000001510_nircam_f444w-grismr
jw06434-c1021_t000_nircam_clear-f090w_segm.fits
Parameters
----------
rootnames : list
List of rootnames
Returns
-------
good_rootnames : list
List of rootnames that do not match the filters
"""
stage_2_source = \
r"jw" \
r"(?P<program_id>\d{" + f"{FILE_PROG_ID_LEN}" + "})"\
r"-(?P<ac_id>(o\d{" + f"{FILE_AC_O_ID_LEN}" + r"}|(c|a|r)\d{" + f"{FILE_AC_CAR_ID_LEN}" + "}))"\
r"_(?P<target_id>(s\d{" + f"{FILE_SOURCE_ID_LONG_LEN}" + r"}|(t)\d{" + f"{FILE_TARG_ID_LEN}" + "}))"\
r"_(?P<instrument>(nircam|niriss|miri))"\
r"_(?P<optical_elements>((?!_)[\w-])+)"\
r"-"

elements = re.compile(stage_2_source)
good_rootnames = [e for e in rootnames if elements.match(e) is None]
return good_rootnames


@lock_module
def protected_code(update_database, fill_empty_list):
"""Protected code ensures only 1 instance of module will run at any given time
Expand Down
2 changes: 1 addition & 1 deletion jwql/website/apps/jwql/data_containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2084,7 +2084,7 @@ def text_scrape(prop_id):

links = html.findAll('a')

proposal_type = links[0].contents[0]
proposal_type = links[3].contents[0]

program_meta['prop_type'] = proposal_type

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Generated by Django 5.1.4 on 2025-01-16 21:35

import django.contrib.postgres.fields
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('jwql', '0027_alter_fgsbadpixelstats_source_files_and_more'),
]

operations = [
migrations.AlterField(
model_name='filesystemcharacteristics',
name='filter_pupil',
field=django.contrib.postgres.fields.ArrayField(base_field=models.CharField(default='empty', help_text='filter and/or pupil name', max_length=7), blank=True, null=True, size=None),
),
migrations.AlterField(
model_name='filesystemcharacteristics',
name='instrument',
field=models.CharField(),
),
migrations.AlterField(
model_name='filesystemcharacteristics',
name='obs_per_filter_pupil',
field=django.contrib.postgres.fields.ArrayField(base_field=models.IntegerField(), blank=True, null=True, size=None),
),
migrations.AlterField(
model_name='filesysteminstrument',
name='filetype',
field=models.CharField(),
),
migrations.AlterField(
model_name='filesysteminstrument',
name='instrument',
field=models.CharField(default='empty', help_text='JWST instrument name', max_length=7),
),
]
Loading

0 comments on commit dd91ace

Please sign in to comment.