Skip to content

Commit 00792f9

Browse files
authored
Merge pull request #10 from drewmee/develop
Develop
2 parents 3de48b4 + 6c0cdd4 commit 00792f9

File tree

14 files changed

+80650
-1020
lines changed

14 files changed

+80650
-1020
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
[![pypi version](https://img.shields.io/pypi/v/pyeem.svg 'pypi version')](https://pypi.org/project/pyeem/)
44
[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pyeem.svg)](https://pypi.org/project/pyeem/)
55
[![CircleCI](https://circleci.com/gh/drewmee/PyEEM.svg?style=shield&circle-token=ccdb16078dcb8ee4e4c9b923f547fc7cb2742aae)](https://app.circleci.com/pipelines/github/drewmee/PyEEM)
6-
[![Read the Docs](https://readthedocs.org/projects/drewmee-demo/badge/?version=latest)](https://pyeem.readthedocs.io/)
6+
[![Read the Docs](https://readthedocs.org/projects/pyeem/badge/?version=latest)](https://pyeem.readthedocs.io/)
77
[![Binder](https://mybinder.org/badge.svg)](https://mybinder.org/v2/gh/drewmee/PyEEM/master?filepath=docs%2Fsource%2Ftutorials%2Fnotebooks)
88
[![license](https://img.shields.io/github/license/mashape/apistatus.svg)](https://github.com/drewmee/PyEEM/blob/master/LICENSE)
99
<!--- Badge for codecov -->
@@ -22,7 +22,7 @@ Install via pip:
2222

2323
## Dependencies for Local Development
2424

25-
If you wish to build the local documentation or run unit tests, there are a few additional dependencies that are required including:
25+
If you wish to build the local documentation or run unit tests, there are a few additional dependencies. Those can be installed by:
2626

2727
pip install -e ".[docs, tests]"
2828

docs/source/_static/logo.png

-20.7 KB
Loading

docs/source/tutorials/notebooks/tutorial_1.ipynb

Lines changed: 80382 additions & 1003 deletions
Large diffs are not rendered by default.

pyeem/analysis/models/rutherfordnet.py

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,15 @@
55
import numpy as np
66
import pandas as pd
77
import tensorflow as tf
8-
from tensorflow.keras.layers import (Activation, Conv2D, Dense, Dropout,
9-
Flatten, MaxPooling2D)
8+
from scipy import stats
9+
from tensorflow.keras.layers import (
10+
Activation,
11+
Conv2D,
12+
Dense,
13+
Dropout,
14+
Flatten,
15+
MaxPooling2D,
16+
)
1017
from tensorflow.keras.models import Sequential
1118

1219

@@ -97,6 +104,8 @@ def get_training_data(self, dataset, ss_results_df, mix_results_df):
97104
X, y = [], []
98105

99106
aug_df.index = aug_df.index.droplevel(drop_indices)
107+
# shuffle
108+
aug_df = aug_df.sample(frac=1)
100109
for concentrations, eem_df in aug_df.groupby(
101110
sources + ["source"], as_index=False
102111
):
@@ -110,7 +119,15 @@ def get_training_data(self, dataset, ss_results_df, mix_results_df):
110119
X.append(eem_np)
111120
y.append(concentrations[:-1])
112121

113-
return np.asarray(X), np.asarray(y)
122+
X = np.asarray(X)
123+
y = np.asarray(y)
124+
125+
randomize = np.arange(len(X))
126+
np.random.shuffle(randomize)
127+
X = X[randomize]
128+
y = y[randomize]
129+
130+
return X, y
114131

115132
def _isolate_test_samples(self, dataset, routine_results_df):
116133
# Isolate test samples from the metadata
@@ -239,8 +256,41 @@ def train(self, X, y, fit_kws={}):
239256
tensorflow.python.keras.callbacks.History: [description]
240257
"""
241258
default_fit_kws = dict(
242-
batch_size=32, epochs=5, validation_split=0.5, shuffle=True
259+
batch_size=32, epochs=5, validation_split=0.3, shuffle=True
243260
)
244261
fit_kws = dict(default_fit_kws, **fit_kws)
245262
history = self.model.fit(X, y, **fit_kws)
246263
return history
264+
265+
def get_prediction_results(self, dataset, predictions, y):
266+
cal_sources = list(dataset.calibration_sources.keys())
267+
true_df = pd.DataFrame(y, columns=cal_sources)
268+
pred_df = pd.DataFrame(predictions, columns=cal_sources)
269+
270+
results_df = pd.DataFrame()
271+
for source, units in dataset.calibration_sources.items():
272+
tmp_df = pd.concat(
273+
[
274+
true_df[source].to_frame(name="true_concentration"),
275+
pred_df[source].to_frame(name="predicted_concentration"),
276+
],
277+
axis=1,
278+
)
279+
tmp_df[["source", "units"]] = source, units
280+
(
281+
tmp_df["slope"],
282+
tmp_df["intercept"],
283+
tmp_df["r_value"],
284+
_,
285+
_,
286+
) = stats.linregress(
287+
tmp_df["true_concentration"], tmp_df["predicted_concentration"]
288+
)
289+
tmp_df["r_squared"] = tmp_df["r_value"] ** 2
290+
tmp_df = tmp_df.set_index(
291+
["source", "units", "slope", "intercept", "r_squared"]
292+
)
293+
tmp_df = tmp_df.drop(columns="r_value")
294+
results_df = pd.concat([results_df, tmp_df])
295+
296+
return results_df

pyeem/instruments/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33

44
supported, _supported = get_supported_instruments()
55

6-
__all__ = ["agilent", "horiba", "supported"]
6+
__all__ = ["agilent", "horiba", "get_supported_instruments", "supported"]

pyeem/plots/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import matplotlib.pyplot as plt
22
from pkg_resources import resource_filename
33

4+
from .analysis import model_history, prediction_parity_plots
45
from .augmentations import (
56
mixture_animation,
67
plot_prototypical_spectra,
@@ -18,5 +19,5 @@
1819
"single_source_animation",
1920
"eem_plot",
2021
"plot_calibration_curves",
21-
"plot_preprocessing"
22+
"plot_preprocessing",
2223
]

pyeem/plots/analysis.py

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
import matplotlib.pyplot as plt
2+
import numpy as np
3+
4+
from .base import _get_subplot_dims
5+
6+
7+
def model_history(history):
8+
"""[summary]
9+
10+
Args:
11+
history ([type]): [description]
12+
13+
Returns:
14+
[type]: [description]
15+
"""
16+
fig, axes = plt.subplots(figsize=(8, 4), ncols=2, sharex=True)
17+
for i, metric in enumerate(["accuracy", "loss"]):
18+
ax = axes[i]
19+
ax.plot(history.history[metric])
20+
ax.plot(history.history["val_%s" % metric])
21+
ax.set_title("Model %s" % metric.title())
22+
ax.set_ylabel(metric.title())
23+
ax.set_xlabel("Epoch")
24+
ax.legend(["Train", "Val."], loc="upper left", fontsize=11)
25+
26+
plt.tight_layout()
27+
return axes
28+
29+
30+
def prediction_parity_plots(
31+
dataset, test_df, train_df=None, subplots=False, fig_kws={}, **kwargs
32+
):
33+
"""[summary]
34+
35+
Args:
36+
dataset ([type]): [description]
37+
test_df ([type]): [description]
38+
train_df ([type], optional): [description]. Defaults to None.
39+
subplots (bool, optional): [description]. Defaults to False.
40+
fig_kws (dict, optional): [description]. Defaults to {}.
41+
42+
Returns:
43+
[type]: [description]
44+
"""
45+
colors = plt.rcParams["axes.prop_cycle"]()
46+
sources = dataset.calibration_sources
47+
48+
nsources = len(sources)
49+
nrows, ncols = _get_subplot_dims(nsources)
50+
nplots = nrows * ncols
51+
52+
default_fig_kws = dict(figsize=(ncols ** 2, nrows * ncols), squeeze=False)
53+
fig_kws = dict(default_fig_kws, **fig_kws)
54+
55+
fig, axes = plt.subplots(1, nsources, **fig_kws)
56+
57+
def _get_regression_metric(source_df, metric):
58+
return source_df.index.get_level_values(level=metric).unique().item()
59+
60+
pred_dfs = {"test": test_df, "train": train_df}
61+
62+
ax_idx = 0
63+
lines = []
64+
labels = []
65+
for source in sources:
66+
for key, df in pred_dfs.items():
67+
if df is None:
68+
continue
69+
70+
if key == "test":
71+
marker_color = next(colors)["color"]
72+
line_color = "black"
73+
alpha = 1
74+
zorder = 1
75+
else:
76+
marker_color = "lightblue"
77+
line_color = "grey"
78+
alpha = 0.25
79+
zorder = -1
80+
81+
source_df = df.xs(source, level="source")
82+
source_units = _get_regression_metric(source_df, "units")
83+
slope = _get_regression_metric(source_df, "slope")
84+
y_intercept = _get_regression_metric(source_df, "intercept")
85+
r_squared = _get_regression_metric(source_df, "r_squared")
86+
cal_poly = np.poly1d([slope, y_intercept])
87+
88+
x = source_df["true_concentration"]
89+
y = source_df["predicted_concentration"]
90+
axes.flat[ax_idx].scatter(
91+
x, y, label=key, color=marker_color, alpha=alpha, zorder=zorder
92+
)
93+
94+
x = np.linspace(
95+
source_df["true_concentration"].min(),
96+
source_df["true_concentration"].max(),
97+
)
98+
axes.flat[ax_idx].plot(
99+
x,
100+
cal_poly(x),
101+
label="y = %s\n$R^2=%.2f$"
102+
% (str(cal_poly).replace("\n", ""), r_squared),
103+
color=line_color,
104+
linestyle="--",
105+
zorder=zorder,
106+
)
107+
108+
formatted_source_str = source.replace("_", " ").title()
109+
xlabel_str = "True Conc., %s" % source_units
110+
ylabel_str = "Predicted Conc., %s" % source_units
111+
axes.flat[ax_idx].set_xlabel(xlabel_str, fontsize=14)
112+
axes.flat[ax_idx].set_ylabel(ylabel_str, fontsize=14)
113+
axes.flat[ax_idx].tick_params(axis="both", which="major", labelsize=12)
114+
axes.flat[ax_idx].set_title(
115+
"Parity Plot for\n%s Concentration" % formatted_source_str,
116+
pad=10,
117+
fontsize=16,
118+
)
119+
ax_line, ax_label = axes.flat[ax_idx].get_legend_handles_labels()
120+
lines.extend(ax_line)
121+
labels.extend(ax_label)
122+
axes.flat[ax_idx].legend(
123+
loc="upper center", bbox_to_anchor=(0.5, -0.25), ncol=2, fontsize=11
124+
)
125+
ax_idx += 1
126+
127+
hspace = kwargs.get("subplot_hspace", 0)
128+
wspace = kwargs.get("subplot_wspace", 0.3)
129+
plt.subplots_adjust(wspace=wspace, hspace=hspace)
130+
return axes

pyeem/plots/preprocessing.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,10 @@ def _get_regression_metric(source_df, metric):
219219
ax_line, ax_label = axes.flat[ax_idx].get_legend_handles_labels()
220220
lines.extend(ax_line)
221221
labels.extend(ax_label)
222-
axes.flat[ax_idx].legend(loc="upper left", fontsize=11)
222+
#axes.flat[ax_idx].legend(loc="upper left", fontsize=11)
223+
axes.flat[ax_idx].legend(
224+
loc="upper center", bbox_to_anchor=(0.5, -0.25), ncol=2, fontsize=11
225+
)
223226
ax_idx += 1
224227

225228
hspace = kwargs.get("subplot_hspace", 0)

pyeem/preprocessing/calibration/calibration.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -124,10 +124,19 @@ def calibration_summary_info(cal_df):
124124

125125
def _get_summary_info(row):
126126
source_df = cal_df.xs(row["source"], level="source")
127-
row["Number of Samples"] = source_df.shape[0]
128-
row["Min. Concentration"] = source_df["concentration"].min()
129-
row["Max. Concentration"] = source_df["concentration"].max()
130-
return row
127+
num_samples = source_df.shape[0]
128+
min_conc = source_df["concentration"].min()
129+
max_conc = source_df["concentration"].max()
130+
return pd.Series(
131+
{
132+
"Number of Samples": num_samples,
133+
"Min. Concentration": min_conc,
134+
"Max. Concentration": max_conc,
135+
}
136+
)
137+
138+
summary_df[
139+
["Number of Samples", "Min. Concentration", "Max. Concentration"]
140+
] = summary_df.apply(_get_summary_info, axis=1)
131141

132-
summary_df = summary_df.apply(_get_summary_info, axis=1)
133142
return summary_df

pyeem/preprocessing/corrections/corrections.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,8 @@ def raman_normalization(eem_df, raman_source_type, raman_source, method="gradien
133133
# peak boundary definition (Murphy and others, 2011)
134134
# raman_sources = ['water_raman', 'blank', 'metadata']
135135

136+
# This really oughta be refactored ASAP
137+
136138
if raman_source_type in ["blank", "water_raman"]:
137139
a = 371 # lower limit
138140
b = 428 # upper limit

tests/conftest.py

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
@pytest.fixture(scope="session", autouse=True)
99
def tmp_dir_fixture(tmpdir_factory):
1010
# setup section
11-
tmp_data_dir = tmpdir_factory.mktemp("demo_data")
12-
# tmp_data_dir = "local_test_data"
11+
#tmp_data_dir = tmpdir_factory.mktemp("demo_data")
12+
tmp_data_dir = "local_test_data"
1313
yield tmp_data_dir
1414
# teardown section
1515
if tmp_data_dir != "local_test_data":
@@ -102,3 +102,19 @@ def demo_augmentation(tmp_dir_fixture, demo_preprocessed_dataset, demo_calibrati
102102
dataset, cal_df, conc_range=(0.01, 6.3), num_steps=5
103103
)
104104
return proto_results_df, ss_results_df, mix_results_df
105+
106+
107+
@pytest.fixture(scope="session", autouse=True)
108+
def demo_rutherfordnet(
109+
tmp_dir_fixture, demo_preprocessed_dataset, demo_calibration, demo_augmentation
110+
):
111+
dataset, routine_results_df = demo_preprocessed_dataset
112+
cal_df = demo_calibration
113+
(_, ss_results_df, mix_results_df,) = demo_augmentation
114+
115+
rutherfordnet = pyeem.analysis.models.RutherfordNet()
116+
(x_train, y_train), (x_test, y_test) = rutherfordnet.prepare_data(
117+
dataset, ss_results_df, mix_results_df, routine_results_df
118+
)
119+
rutherfordnet.train(x_train, y_train)
120+
return rutherfordnet

tests/test_analysis.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,4 +60,7 @@ def setup(self, demo_datasets):
6060
self.demo_datasets = demo_datasets
6161

6262
def testFluorescenceRegionalIntegration(self):
63+
#eem_df =
64+
#integ_result = pyeem.analysis.basic.fluorescence_regional_integration(eem_df)
65+
#assert integ_result ==
6366
return

0 commit comments

Comments
 (0)