Skip to content

Commit

Permalink
Switch build_table() to use Django models
Browse files Browse the repository at this point in the history
  • Loading branch information
bhilbert4 committed Jan 16, 2025
1 parent e9669fd commit 3e192cd
Show file tree
Hide file tree
Showing 5 changed files with 145 additions and 73 deletions.
43 changes: 43 additions & 0 deletions jwql/tests/test_bokeh_dashboard.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/usr/bin/env python

"""Tests for the ``bokeh_dashboard`` module in the ``jwql`` web
application.
Authors
-------
- Bryan Hilbert
Use
---
These tests can be run via the command line (omit the -s to
suppress verbose output to stdout):
::
pytest -s test_bokeh_dashboard.py
"""

import os

from django import setup
import pandas as pd
import pytest

from jwql.utils.constants import DEFAULT_MODEL_CHARFIELD, ON_GITHUB_ACTIONS, ON_READTHEDOCS

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "jwql.website.jwql_proj.settings")

# Skip testing this module if on Github Actions
from jwql.website.apps.jwql import bokeh_dashboard # noqa: E402 (module level import not at top of file)

if not ON_GITHUB_ACTIONS and not ON_READTHEDOCS:
setup()


@pytest.mark.skipif(ON_GITHUB_ACTIONS, reason='Requires access to django models.')
def test_build_table_latest_entry():
tab = bokeh_dashboard.build_table('FilesystemCharacteristics')
assert isinstance(tab, pd.DataFrame)
assert len(tab['date']) > 0
2 changes: 1 addition & 1 deletion jwql/tests/test_data_containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@

@pytest.mark.skipif(ON_GITHUB_ACTIONS, reason='Requires access to django models.')
def test_build_table():
tab = data_containers.build_table('filesystem_general')
tab = data_containers.build_table('FilesystemGeneral')
assert isinstance(tab, pd.DataFrame)
assert len(tab['date']) > 0

Expand Down
30 changes: 30 additions & 0 deletions jwql/website/apps/jwql/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#! /usr/bin/env python

"""
apps.py is the standard and recommended way to configure application-specific settings
in Django, including tasks like importing additional modules during initialization.
Author
------
B. Hilbert
"""

from django.apps import AppConfig


class JwqlAppConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'jwql'

def ready(self):
# Import models not defined in models.py here
# By importing these models here, they will be available
# to the build_table() function.
import jwql.website.apps.jwql.monitor_models.bad_pixel
import jwql.website.apps.jwql.monitor_models.bias
import jwql.website.apps.jwql.monitor_models.claw
import jwql.website.apps.jwql.monitor_models.common
import jwql.website.apps.jwql.monitor_models.dark_current
import jwql.website.apps.jwql.monitor_models.readnoise
import jwql.website.apps.jwql.monitor_models.ta
86 changes: 41 additions & 45 deletions jwql/website/apps/jwql/bokeh_dashboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
placed in the ``jwql`` directory.
"""

from collections import defaultdict
from datetime import datetime as dt
from math import pi
from operator import itemgetter
Expand All @@ -40,17 +41,30 @@
from bokeh.models.layouts import TabPanel, Tabs
from bokeh.plotting import figure
from bokeh.transform import cumsum
from django import setup
from django.db.models import OuterRef, Subquery
import numpy as np
import pandas as pd
from sqlalchemy import func, and_

import jwql.database.database_interface as di
from jwql.database.database_interface import CentralStore
from jwql.utils.constants import ANOMALY_CHOICES_PER_INSTRUMENT, FILTERS_PER_INSTRUMENT, JWST_INSTRUMENT_NAMES_MIXEDCASE
from jwql.utils.constants import (ANOMALY_CHOICES_PER_INSTRUMENT,
FILTERS_PER_INSTRUMENT,
JWST_INSTRUMENT_NAMES_MIXEDCASE,
ON_GITHUB_ACTIONS,
ON_READTHEDOCS
)
from jwql.utils.utils import get_base_url, get_config
from jwql.website.apps.jwql.data_containers import build_table
from jwql.website.apps.jwql.data_containers import build_table, import_all_models
from jwql.website.apps.jwql.models import Anomalies

if not ON_GITHUB_ACTIONS and not ON_READTHEDOCS:
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "jwql.website.jwql_proj.settings")
setup()

from jwql.website.apps.jwql.models import get_model_column_names


def build_table_latest_entry(tablename):
"""Create Pandas dataframe from the most recent entry of a JWQLDB table.
Expand All @@ -65,46 +79,29 @@ def build_table_latest_entry(tablename):
table_meta_data : pandas.DataFrame
Pandas data frame version of JWQL database table.
"""
# Make dictionary of tablename : class object
# This matches what the user selects in the select element
# in the webform to the python object on the backend.
tables_of_interest = {}
for item in di.__dict__.keys():
table = getattr(di, item)
if hasattr(table, '__tablename__'):
tables_of_interest[table.__tablename__] = table

session, _, _, _ = di.load_connection(get_config()['connection_string'])
table_object = tables_of_interest[tablename] # Select table object

subq = session.query(table_object.instrument,
func.max(table_object.date).label('maxdate')
).group_by(table_object.instrument).subquery('t2')

result = session.query(table_object).join(
subq,
and_(
table_object.instrument == subq.c.instrument,
table_object.date == subq.c.maxdate
)
)

# Turn query result into list of dicts
result_dict = [row.__dict__ for row in result.all()]
column_names = table_object.__table__.columns.keys()

# Build list of column data based on column name.
data = []
for column in column_names:
column_data = list(map(itemgetter(column), result_dict))
data.append(column_data)

data = dict(zip(column_names, data))
all_models = import_all_models()
table_object = all_models.get(tablename)
column_names = get_model_column_names(table_object)

if 'instrument' not in column_names:
raise ValueError(f"No 'instrument' column name in {tablename}. Unable to get latest entry by instrument.")

# Create a subquery to get the latest date for each instrument
subquery = table_object.objects.filter(instrument=OuterRef('instrument')).order_by('-date').values('date')[:1]

# Query the model with the subquery
most_recent_entries = table_object.objects.filter(date=Subquery(subquery))

# Convert the QuerySet into a dictionary
rows = most_recent_entries.values()
data = defaultdict(list)

for row in rows:
for key, value in row.items():
data[key].append(value)

# Build table.
table_meta_data = pd.DataFrame(data)

session.close()
return table_meta_data


Expand Down Expand Up @@ -360,7 +357,7 @@ def dashboard_filetype_bar_chart(self):

# Make Pandas DF for filesystem_instrument
# If time delta exists, filter data based on that.
data = build_table('filesystem_instrument')
data = build_table('FilesystemInstrument')

# Keep only the rows containing the most recent timestamp
data = data[data['date'] == data['date'].max()]
Expand Down Expand Up @@ -390,8 +387,7 @@ def dashboard_instrument_pie_chart(self):
plot : bokeh.plotting.figure
Pie chart figure
"""
# Replace with jwql.website.apps.jwql.data_containers.build_table
data = build_table('filesystem_instrument')
data = build_table('FilesystemInstrument')

# Keep only the rows containing the most recent timestamp
data = data[data['date'] == data['date'].max()]
Expand Down Expand Up @@ -439,7 +435,7 @@ def dashboard_files_per_day(self):
A figure with tabs for each instrument.
"""

source = build_table('filesystem_general')
source = build_table('FilesystemGeneral')
if not pd.isnull(self.delta_t):
source = source[(source['date'] >= self.date - self.delta_t) & (source['date'] <= self.date)]

Expand Down Expand Up @@ -495,7 +491,7 @@ def dashboard_monitor_tracking(self):
Numpy array of column values from monitor table.
"""

data = build_table('monitor')
data = build_table('Monitor')

if not pd.isnull(self.delta_t):
data = data[(data['start_time'] >= self.date - self.delta_t) & (data['start_time'] <= self.date)]
Expand Down Expand Up @@ -551,7 +547,7 @@ def dashboard_exposure_count_by_filter(self):
"""
# build_table_latest_query will return only the database entries with the latest date. This should
# correspond to one row/entry per instrument
data = build_table_latest_entry('filesystem_characteristics')
data = build_table_latest_entry('FilesystemCharacteristics')

# Sort by instrument name so that the order of the tabs will always be the same
data = data.sort_values('instrument')
Expand Down
57 changes: 30 additions & 27 deletions jwql/website/apps/jwql/data_containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
import os
import re
import tempfile
from collections import OrderedDict
from collections import defaultdict, OrderedDict
from datetime import datetime
from operator import getitem, itemgetter

Expand All @@ -46,13 +46,12 @@
from astroquery.mast import Mast
from bs4 import BeautifulSoup
from django import forms, setup
from django.apps import apps
from django.conf import settings
from django.contrib import messages
from django.core.exceptions import ObjectDoesNotExist
from django.db.models.query import QuerySet

from jwql.database import database_interface as di
from jwql.database.database_interface import load_connection
from jwql.edb.engineering_database import get_mnemonic, get_mnemonic_info, mnemonic_inventory
from jwql.utils.constants import (
DEFAULT_MODEL_COMMENT,
Expand Down Expand Up @@ -83,6 +82,7 @@
get_rootnames_for_instrument_proposal,
)


# Increase the limit on the number of entries that can be returned by
# a MAST query.
Mast._portal_api_connection.PAGESIZE = MAST_QUERY_LIMIT
Expand All @@ -96,7 +96,7 @@
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "jwql.website.jwql_proj.settings")
setup()

from jwql.website.apps.jwql.models import Anomalies, Observation, Proposal, RootFileInfo
from jwql.website.apps.jwql.models import Anomalies, get_model_column_names, Observation, Proposal, RootFileInfo

from .forms import (
InstrumentAnomalySubmitForm,
Expand Down Expand Up @@ -139,36 +139,23 @@ def build_table(tablename):
table_meta_data : pandas.DataFrame
Pandas data frame version of JWQL database table.
"""
# Make dictionary of tablename : class object
# This matches what the user selects in the select element
# in the webform to the python object on the backend.
tables_of_interest = {}
for item in di.__dict__.keys():
table = getattr(di, item)
if hasattr(table, '__tablename__'):
tables_of_interest[table.__tablename__] = table

session, _, _, _ = load_connection(get_config()['connection_string'])
table_object = tables_of_interest[tablename] # Select table object

result = session.query(table_object)
all_models = import_all_models()
table_object = all_models.get(tablename)

# Turn query result into list of dicts
result_dict = [row.__dict__ for row in result.all()]
column_names = table_object.__table__.columns.keys()
result = table_object.objects.all()
column_names = get_model_column_names(table_object)

# Build list of column data based on column name.
data = []
for column in column_names:
column_data = list(map(itemgetter(column), result_dict))
data.append(column_data)
# Convert the QuerySet into a dictionary
rows = result.values()
data = defaultdict(list)

data = dict(zip(column_names, data))
for row in rows:
for key, value in row.items():
data[key].append(value)

# Build table.
table_meta_data = pd.DataFrame(data)

session.close()
return table_meta_data


Expand Down Expand Up @@ -1970,6 +1957,22 @@ def get_thumbnail_by_rootname(rootname):
return thumbnail_basename


def import_all_models():
"""
Dynamically import and return all Django models as a dictionary.
Keys are model names (as strings), and values are model classes.
Returns
-------
models : dict
Keys are model names, values are model classes
"""
models = {}
for model in apps.get_app_config('jwql').get_models():
models[model.__name__] = model
return models


def log_into_mast(request):
"""Login via astroquery.mast if user authenticated in web app.
Expand Down

0 comments on commit 3e192cd

Please sign in to comment.