Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP Initial scoping for upgrade to Python 3 #3

Draft
wants to merge 39 commits into
base: tests_framework
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
8a0ff35
initial commit of python2 deployment ci
ma595 Jan 10, 2023
3c07bc5
syntax fix
ma595 Jan 10, 2023
fb670c8
syntax fix
ma595 Jan 10, 2023
3d3aa88
change branch
ma595 Jan 10, 2023
5703c1f
change python version
ma595 Jan 10, 2023
eb9d918
added simple partitioning_nt test run and pytest example
ma595 Jan 11, 2023
4e1f587
updated build to point to frozen requirements
ma595 Jan 11, 2023
03318a5
removed software health tools
ma595 Jan 11, 2023
53d948b
fix syntax
ma595 Jan 11, 2023
79b88d8
add setuptools to frozen-requirements.txt
ma595 Jan 11, 2023
2e1e23c
upgrade pip
ma595 Jan 11, 2023
4ea607b
add wheel to prevent source build of pandas
ma595 Jan 11, 2023
fc5e796
try forcing pandas wheel collection
ma595 Jan 11, 2023
ea44b8b
add pytest and bump pandas
ma595 Jan 11, 2023
0030188
attempt with old build system
ma595 Jan 11, 2023
e15d25c
amend pytest example
ma595 Jan 11, 2023
7667c93
remove whitespace
ma595 Jan 11, 2023
58833b2
removed partition_nt test code
ma595 Jan 26, 2023
5528e2f
placeholder files for tests
ma595 Jan 26, 2023
f9a4be6
partition_nt test now passes with data setup function implemented
ma595 Jan 26, 2023
d9fded5
started unit testing
ma595 Jan 27, 2023
3dbac46
changed directory structure and updated integration test so equality …
ma595 Jan 30, 2023
3b9ed73
add pythonpath to workflow
ma595 Jan 30, 2023
8a56f66
remove unfinished rm fn
ma595 Jan 30, 2023
241e422
removed unnecessary comments from code
ma595 Apr 4, 2023
9642ef6
lowered upper bound on statsmodels
ma595 Apr 4, 2023
535e026
lowered upper bound on statsmodels
ma595 Apr 4, 2023
d39c927
removed partitioning unit tests
ma595 Apr 4, 2023
8f4c664
neaten CI
ma595 Apr 4, 2023
cbfe9f3
test_context.py now converted to pytest
ma595 Apr 4, 2023
3445efe
removed unnecessary files and fixed test, now expected to fail
ma595 Apr 4, 2023
e1fec90
still fails - immediate fail due to extra columns in generated output…
ma595 Apr 4, 2023
8e919f2
reverted to 1.25
ma595 Apr 4, 2023
09aa9aa
upgrading libraries for python 3.12.0 compatibility, note not backwar…
AmyOctoCat Jun 14, 2024
d93da54
further python 3 updates
AmyOctoCat Jun 14, 2024
9dd6697
think the dtype parameter needs to be changed due to python 2/3 chang…
AmyOctoCat Jun 21, 2024
cd8fbf5
Move datatype for numpy genfromtxt for string variables into separate…
AmyOctoCat Jun 21, 2024
ffce7db
run the 2to3 tool
AmyOctoCat Jun 21, 2024
073abd1
replace second instance of numpy importing to byte object
AmyOctoCat Jun 21, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions .github/workflows/python-app.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
name: ONEFlux CI

on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

permissions:
contents: read

jobs:
build:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v3
- name: Set up Python 2.7.18
uses: actions/setup-python@v3
with:
python-version: "2.7.18"
- name: Install OneFLUX
run: |
python -m pip install --upgrade pip
pip install setuptools wheel pytest
make
- name: Download data
run: |
# get US-ARc_sample data for tests
mkdir -p ./tests/data/test_input
mkdir -p ./tests/data/test_output
wget ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_input.zip
wget ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_output.zip
unzip US-ARc_sample_input.zip -d ./tests/data/test_input
unzip US-ARc_sample_output.zip -d ./tests/data/test_output
- name: Run pytest
run: |
export PYTHONPATH=/home/runner/work/ONEFlux/ONEFlux:$PYTHONPATH
pytest tests/python
5 changes: 3 additions & 2 deletions oneflux/partition/auxiliary.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#FLOAT_PREC = 'f8'
FLOAT_PREC = 'f4'
DOUBLE_PREC = 'f8'
STRING_VARIABLE_LENGTH = 'U12'


_log = logging.getLogger(__name__)
Expand Down Expand Up @@ -111,7 +112,7 @@ def compare_col_to_pvwave(py_array, filename, label=None, diff=False, show_plot=
s_string = s_string.replace(' ', '')
s_string = s_string.replace('-1.#IND000', '-9999')
s_string = s_string.replace('\r', '')
u_string = unicode(s_string)
u_string = str(s_string)
pw_array = numpy.genfromtxt(StringIO(u_string), dtype=FLOAT_PREC, delimiter=',', skip_header=0, missing_values='-9999,-9999.0,-6999,-6999.0, ', usemask=True)
pw_array = numpy.ma.filled(pw_array, numpy.NaN)
# **************************************************************************************************************************************************
Expand Down Expand Up @@ -204,7 +205,7 @@ def compare_col_to_pvwave(py_array, filename, label=None, diff=False, show_plot=
figure_basename = figure_basename.replace('_PW', '') # remove _PW from PW data source filename

record_interval = (timedelta(minutes=30) if resolution == 'hh' else timedelta(minutes=60))
timestamp_list = [datetime(year, 1, 1, 0, 0) + (record_interval * i) for i in xrange(1, py_array.size + 1)]
timestamp_list = [datetime(year, 1, 1, 0, 0) + (record_interval * i) for i in range(1, py_array.size + 1)]

_log.debug("Using year={y}, resolution={r}, first timestamp={f}, last timestamp={l}".format(y=year, r=resolution, f=timestamp_list[0], l=timestamp_list[-1]))

Expand Down
10 changes: 5 additions & 5 deletions oneflux/partition/daytime.py
Original file line number Diff line number Diff line change
Expand Up @@ -909,7 +909,7 @@ def estimate_parasets(data, winsize, fguess, trimperc, name_out, dt_output_dir,
###############################################

#### Creating the arrays we're going to use
n_parasets = long(365 / winsize) * 2
n_parasets = int(365 / winsize) * 2
params = numpy.zeros((3, 2 * len(fguess), n_parasets), dtype=FLOAT_PREC)
params_ok = numpy.zeros((2 * len(fguess), n_parasets), dtype=FLOAT_PREC)
params_nok = numpy.zeros((2 * len(fguess), n_parasets), dtype=FLOAT_PREC)
Expand Down Expand Up @@ -1041,7 +1041,7 @@ def estimate_parasets(data, winsize, fguess, trimperc, name_out, dt_output_dir,
#ind[i, :, :] = long((day_begin + winsize / 2.0) * 48.0)

#### Calculate the first index of the window we're using now
ind[:, :, i] = long((day_begin + winsize / 2.0) * 48.0)
ind[:, :, i] = int((day_begin + winsize / 2.0) * 48.0)

'''
#print("ind[:, :, i]")
Expand Down Expand Up @@ -1938,12 +1938,12 @@ def percentiles_fn(data, columns, values=[0.0, 0.25, 0.5, 0.75, 1.0], remove_mis

#### Setting ind to the percentile wanted
if values[i] <= 0.5:
ind = long(values[i] * n_elements)
ind = int(values[i] * n_elements)
else:
ind = long(values[i] * (n_elements + 1))
ind = int(values[i] * (n_elements + 1))

if ind >= n_elements:
ind = n_elements - long(1)
ind = n_elements - int(1)

if i == 0:
result = data[columns[0]][sorted_index_arr[ind]]
Expand Down
6 changes: 3 additions & 3 deletions oneflux/partition/library.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from oneflux import ONEFluxError
from oneflux.partition.ecogeo import lloyd_taylor, lloyd_taylor_dt, hlrc_lloyd, hlrc_lloydvpd
from oneflux.partition.ecogeo import hlrc_lloyd_afix, hlrc_lloydvpd_afix, lloydt_e0fix
from oneflux.partition.auxiliary import FLOAT_PREC, DOUBLE_PREC, NAN, nan, not_nan
from oneflux.partition.auxiliary import FLOAT_PREC, DOUBLE_PREC, STRING_VARIABLE_LENGTH, NAN, nan, not_nan

from oneflux.graph.compare import plot_comparison
from oneflux.utils.files import file_exists_not_empty
Expand Down Expand Up @@ -76,7 +76,7 @@ def load_output(filename, delimiter=',', skip_header=1):
_log.debug("Finished loading headers: {h}".format(h=headers))

_log.debug("Started loading data")
dtype = [(i, ('a25' if i.lower() in STRING_HEADERS else FLOAT_PREC)) for i in headers]
dtype = [(i, (STRING_VARIABLE_LENGTH if i.lower() in STRING_HEADERS else FLOAT_PREC)) for i in headers]
vfill = [('' if i.lower() in STRING_HEADERS else numpy.NaN) for i in headers]
data = numpy.genfromtxt(fname=filename, dtype=dtype, names=headers, delimiter=delimiter, skip_header=skip_header, missing_values='-9999,-9999.0,-6999,-6999.0, ', usemask=True)
data = numpy.ma.filled(data, vfill)
Expand Down Expand Up @@ -1299,7 +1299,7 @@ def load_outputs(filename, delimiter=',', skip_header=1, is_not_hourly=True, is_
_log.debug("Loaded headers: {h}".format(h=headers))

_log.debug("Started loading data")
dtype = [(i, ('a25' if i.lower() in STRING_HEADERS else FLOAT_PREC)) for i in headers]
dtype = [(i, (STRING_VARIABLE_LENGTH if i.lower() in STRING_HEADERS else FLOAT_PREC)) for i in headers]
vfill = [('' if i.lower() in STRING_HEADERS else numpy.NaN) for i in headers]
data = numpy.genfromtxt(fname=filename, dtype=dtype, names=headers, delimiter=delimiter, skip_header=skip_header, missing_values='-9999,-9999.0,-6999,-6999.0, ', usemask=True)
data = numpy.ma.filled(data, vfill)
Expand Down
2 changes: 1 addition & 1 deletion oneflux/partition/nighttime.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def flux_partition(data, lat, tempvar='tair', nomsg=False, temp_output_filename=
julmin, julmax = int(juldays[0]), int(numpy.max(juldays)) ### first/last day of year
n_regr = 0 ### counter of number of regressions/optimizations

window_steps = range(julmin, julmax + 1, STEP_SIZE)
window_steps = list(range(julmin, julmax + 1, STEP_SIZE))

# TODO: (potential) add e0_1_list, e0_2_list, e0_3_list, and corresponding se and idx to track individual

Expand Down
2 changes: 1 addition & 1 deletion oneflux_steps/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ CC := gcc -O3
MKDIR = mkdir -p

# copy file command (verbose, keep file metadata)
COPY = cp -av
COPY = cp -v

SRCDIR := $(shell pwd)/
TGTDIR := ${HOME}/bin/oneflux/
Expand Down
5 changes: 5 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[pytest]
log_cli = 1
log_cli_level = INFO
log_cli_format = %(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s)
log_cli_date_format=%Y-%m-%d %H:%M:%S
5 changes: 3 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
numpy>=1.11.0,<1.16.0
numpy<2,>=1.18
scipy>=0.17.0
matplotlib>=1.5.1
statsmodels>=0.8.0
statsmodels==0.14.1
pytest==8.2.2
2 changes: 1 addition & 1 deletion runoneflux.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@
# start execution
try:
# check arguments
print os.path.join(args.datadir, args.sitedir)
print(os.path.join(args.datadir, args.sitedir))
if not os.path.isdir(os.path.join(args.datadir, args.sitedir)):
raise ONEFluxError("Site dir not found: {d}".format(d=args.sitedir))

Expand Down
19 changes: 0 additions & 19 deletions tests/context.py

This file was deleted.

File renamed without changes.
125 changes: 125 additions & 0 deletions tests/python/integration/test_partitioning.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import pytest
import os, glob
import errno
import urllib.request
from shutil import copytree
import logging
import time

_log = logging.getLogger(__name__)


@pytest.fixture(scope="module")
def get_data():
'''
Utilising python to obtain sample test data. Function currently unused.
as a fixture in this class.
'''
if os.path.isdir('tests/data'):
_log.info('Skipping sample data retrieval as sample test data directory '
'already exists: ./tests/data')
return

from zipfile import ZipFile
input_zip_name, headers = urllib.request.urlretrieve('ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_input.zip')
output_zip_name, headers = urllib.request.urlretrieve('ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_output.zip')

_log.info('successfully downloaded sample data zip files. Extracting...')

with ZipFile(input_zip_name) as zi, ZipFile(output_zip_name) as zo:
zi.extractall(path='tests/data/test_input')
zo.extractall(path='tests/data/test_output')

_log.info('sample data successfully extracted from zip files')


def equal_csv(csv_1, csv_2):
'''
Check equality of two csv files.
'''
_log.info("Check csv equality")
start = time.time()
with open(csv_1, 'r') as t1, open(csv_2, 'r') as t2:
fileone = t1.readlines()
filetwo = t2.readlines()
for line in filetwo:
if line not in fileone:
return False

_log.info("total time", start - time.time())

return True


@pytest.fixture
def setup_data(get_data):
'''
Set up input data for run_partition_nt test.

Create data directory for tests './tests/integration/step10' and copy
data from expected output ('./datadir/test_output/US-ARc_sample_output')
to this directory.
'''
try:
os.mkdir('tests/integration/data/step_10')
except OSError as e:
if e.errno == errno.EEXIST:
print("directory exists")

testdata = 'tests/python/integration/input/step_10/US-ARc_sample_input'

copytree('tests/data/test_input/', testdata, dirs_exist_ok=True)

refoutdir = 'tests/data/test_output/US-ARc_sample_output'

copytree(os.path.join(refoutdir, '07_meteo_proc'), \
os.path.join(testdata, '07_meteo_proc'), dirs_exist_ok=True)
copytree(os.path.join(refoutdir, '08_nee_proc'), \
os.path.join(testdata, '08_nee_proc/'), dirs_exist_ok=True)
copytree(os.path.join(refoutdir, '02_qc_auto'), \
os.path.join(testdata, '02_qc_auto/'), dirs_exist_ok=True)


def test_run_partition_nt(setup_data):
'''
Run partition_nt on single percentile.
'''
datadir = "./tests/python/integration/input/step_10/"
refoutdir = "./tests/data/test_output/"
siteid = "US-ARc"
sitedir = "US-ARc_sample_input"
years = [2005] # years = [2005, 2006]
# PROD_TO_COMPARE = ['c', 'y']
PROD_TO_COMPARE = ['y', ]
# PERC_TO_COMPARE = ['1.25', '3.75',]
PERC_TO_COMPARE = ['1.25', ]

from oneflux.tools.partition_nt import remove_previous_run, run_python
remove_previous_run(datadir=datadir, siteid=siteid, sitedir=sitedir, python=True,
prod_to_compare=PROD_TO_COMPARE,
perc_to_compare=PERC_TO_COMPARE,
years_to_compare=years)

run_python(datadir=datadir, siteid=siteid, sitedir=sitedir,
prod_to_compare=PROD_TO_COMPARE,
perc_to_compare=PERC_TO_COMPARE, years_to_compare=years)

# check whether csv of "output" is same as csv of reference

# the generated output is actually in the "input" directory.
rootdir = os.path.join(datadir, sitedir, "10_nee_partition_nt")
nee_y_files = glob.glob(os.path.join(rootdir, "nee_y_1.25_US-ARc_2005*"))
nee_y_files = filter(lambda x: not x.endswith('_orig.csv'), nee_y_files)

# paths to the "reference" output data
refoutdir = os.path.join(refoutdir, "US-ARc_sample_output", "10_nee_partition_nt")
ref_nee_y_files = glob.glob(os.path.join(refoutdir, "nee_y_1.25_US-ARc_2005*"))

assert len(nee_y_files) == len(ref_nee_y_files)
retval = True
for f, b in zip(nee_y_files, ref_nee_y_files):
print(f, b)
assert equal_csv(f, b) == True

# clean up data.
# shutil.rmtree(datadir)
14 changes: 14 additions & 0 deletions tests/python/test_context.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
'''
For license information:
see LICENSE file or headers in oneflux.__init__.py

Simple context/import setup test

@author: Gilberto Pastorello
@contact: gzpastorello@lbl.gov
@date: 2017-01-31
'''

def test_import_oneflux():
import oneflux
assert hasattr(oneflux, '__version__') == True
21 changes: 0 additions & 21 deletions tests/test_context.py

This file was deleted.

Loading