Cambridge-ICCS · AmyOctoCat · Jan 10, 2023 · Jan 10, 2023 · Jan 10, 2023 · Jan 10, 2023
diff --git a/.github/workflows/python-app.yaml b/.github/workflows/python-app.yaml
@@ -0,0 +1,38 @@
+name: ONEFlux CI
+
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+
+permissions:
+  contents: read
+
+jobs:
+  build:
+    runs-on: ubuntu-20.04
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python 2.7.18
+      uses: actions/setup-python@v3
+      with:
+        python-version: "2.7.18"
+    - name: Install OneFLUX
+      run: |
+        python -m pip install --upgrade pip
+        pip install setuptools wheel pytest
+        make
+    - name: Download data
+      run: |
+        # get US-ARc_sample data for tests
+        mkdir -p ./tests/data/test_input
+        mkdir -p ./tests/data/test_output
+        wget ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_input.zip
+        wget ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_output.zip
+        unzip US-ARc_sample_input.zip -d ./tests/data/test_input
+        unzip US-ARc_sample_output.zip -d ./tests/data/test_output
+    - name: Run pytest
+      run: |
+        export PYTHONPATH=/home/runner/work/ONEFlux/ONEFlux:$PYTHONPATH
+        pytest tests/python
diff --git a/oneflux/partition/auxiliary.py b/oneflux/partition/auxiliary.py
@@ -28,6 +28,7 @@
 #FLOAT_PREC = 'f8'
 FLOAT_PREC = 'f4'
 DOUBLE_PREC = 'f8'
+STRING_VARIABLE_LENGTH = 'U12'
 
 
 _log = logging.getLogger(__name__)
@@ -111,7 +112,7 @@ def compare_col_to_pvwave(py_array, filename, label=None, diff=False, show_plot=
     s_string = s_string.replace(' ', '')
     s_string = s_string.replace('-1.#IND000', '-9999')
     s_string = s_string.replace('\r', '')
-    u_string = unicode(s_string)
+    u_string = str(s_string)
     pw_array = numpy.genfromtxt(StringIO(u_string), dtype=FLOAT_PREC, delimiter=',', skip_header=0, missing_values='-9999,-9999.0,-6999,-6999.0, ', usemask=True)
     pw_array = numpy.ma.filled(pw_array, numpy.NaN)
     # **************************************************************************************************************************************************
@@ -204,7 +205,7 @@ def compare_col_to_pvwave(py_array, filename, label=None, diff=False, show_plot=
             figure_basename = figure_basename.replace('_PW', '') # remove _PW from PW data source filename
 
         record_interval = (timedelta(minutes=30) if resolution == 'hh' else timedelta(minutes=60))
-        timestamp_list = [datetime(year, 1, 1, 0, 0) + (record_interval * i) for i in xrange(1, py_array.size + 1)]
+        timestamp_list = [datetime(year, 1, 1, 0, 0) + (record_interval * i) for i in range(1, py_array.size + 1)]
 
         _log.debug("Using year={y}, resolution={r}, first timestamp={f}, last timestamp={l}".format(y=year, r=resolution, f=timestamp_list[0], l=timestamp_list[-1]))
 

diff --git a/oneflux/partition/daytime.py b/oneflux/partition/daytime.py
@@ -909,7 +909,7 @@ def estimate_parasets(data, winsize, fguess, trimperc, name_out, dt_output_dir,
     ###############################################
 
     #### Creating the arrays we're going to use
-    n_parasets = long(365 / winsize) * 2
+    n_parasets = int(365 / winsize) * 2
     params = numpy.zeros((3, 2 * len(fguess), n_parasets), dtype=FLOAT_PREC)
     params_ok = numpy.zeros((2 * len(fguess), n_parasets), dtype=FLOAT_PREC)
     params_nok = numpy.zeros((2 * len(fguess), n_parasets), dtype=FLOAT_PREC)
@@ -1041,7 +1041,7 @@ def estimate_parasets(data, winsize, fguess, trimperc, name_out, dt_output_dir,
         #ind[i, :, :] = long((day_begin + winsize / 2.0) * 48.0)
 
         #### Calculate the first index of the window we're using now
-        ind[:, :, i] = long((day_begin + winsize / 2.0) * 48.0)
+        ind[:, :, i] = int((day_begin + winsize / 2.0) * 48.0)
 
         '''
         #print("ind[:, :, i]")
@@ -1938,12 +1938,12 @@ def percentiles_fn(data, columns, values=[0.0, 0.25, 0.5, 0.75, 1.0], remove_mis
 
         #### Setting ind to the percentile wanted
         if values[i] <= 0.5:
-            ind = long(values[i] * n_elements)
+            ind = int(values[i] * n_elements)
         else:
-            ind = long(values[i] * (n_elements + 1))
+            ind = int(values[i] * (n_elements + 1))
 
         if ind >= n_elements:
-            ind = n_elements - long(1)
+            ind = n_elements - int(1)
 
         if i == 0:
             result = data[columns[0]][sorted_index_arr[ind]]

diff --git a/oneflux/partition/library.py b/oneflux/partition/library.py
@@ -23,7 +23,7 @@
 from oneflux import ONEFluxError
 from oneflux.partition.ecogeo import lloyd_taylor, lloyd_taylor_dt, hlrc_lloyd, hlrc_lloydvpd
 from oneflux.partition.ecogeo import hlrc_lloyd_afix, hlrc_lloydvpd_afix, lloydt_e0fix
-from oneflux.partition.auxiliary import FLOAT_PREC, DOUBLE_PREC, NAN, nan, not_nan
+from oneflux.partition.auxiliary import FLOAT_PREC, DOUBLE_PREC, STRING_VARIABLE_LENGTH, NAN, nan, not_nan
 
 from oneflux.graph.compare import plot_comparison
 from oneflux.utils.files import file_exists_not_empty
@@ -76,7 +76,7 @@ def load_output(filename, delimiter=',', skip_header=1):
     _log.debug("Finished loading headers: {h}".format(h=headers))
 
     _log.debug("Started loading data")
-    dtype = [(i, ('a25' if i.lower() in STRING_HEADERS else FLOAT_PREC)) for i in headers]
+    dtype = [(i, (STRING_VARIABLE_LENGTH if i.lower() in STRING_HEADERS else FLOAT_PREC)) for i in headers]
     vfill = [('' if i.lower() in STRING_HEADERS else numpy.NaN) for i in headers]
     data = numpy.genfromtxt(fname=filename, dtype=dtype, names=headers, delimiter=delimiter, skip_header=skip_header, missing_values='-9999,-9999.0,-6999,-6999.0, ', usemask=True)
     data = numpy.ma.filled(data, vfill)
@@ -1299,7 +1299,7 @@ def load_outputs(filename, delimiter=',', skip_header=1, is_not_hourly=True, is_
     _log.debug("Loaded headers: {h}".format(h=headers))
 
     _log.debug("Started loading data")
-    dtype = [(i, ('a25' if i.lower() in STRING_HEADERS else FLOAT_PREC)) for i in headers]
+    dtype = [(i, (STRING_VARIABLE_LENGTH if i.lower() in STRING_HEADERS else FLOAT_PREC)) for i in headers]
     vfill = [('' if i.lower() in STRING_HEADERS else numpy.NaN) for i in headers]
     data = numpy.genfromtxt(fname=filename, dtype=dtype, names=headers, delimiter=delimiter, skip_header=skip_header, missing_values='-9999,-9999.0,-6999,-6999.0, ', usemask=True)
     data = numpy.ma.filled(data, vfill)

diff --git a/oneflux/partition/nighttime.py b/oneflux/partition/nighttime.py
@@ -246,7 +246,7 @@ def flux_partition(data, lat, tempvar='tair', nomsg=False, temp_output_filename=
     julmin, julmax = int(juldays[0]), int(numpy.max(juldays))  ### first/last day of year
     n_regr = 0                                                 ### counter of number of regressions/optimizations
 
-    window_steps = range(julmin, julmax + 1, STEP_SIZE)
+    window_steps = list(range(julmin, julmax + 1, STEP_SIZE))
 
     # TODO: (potential) add e0_1_list, e0_2_list, e0_3_list, and corresponding se and idx to track individual
 

diff --git a/oneflux_steps/Makefile b/oneflux_steps/Makefile
@@ -31,7 +31,7 @@ CC := gcc -O3
 MKDIR = mkdir -p
 
 # copy file command (verbose, keep file metadata)
-COPY = cp -av
+COPY = cp -v
 
 SRCDIR := $(shell pwd)/
 TGTDIR := ${HOME}/bin/oneflux/

diff --git a/pytest.ini b/pytest.ini
@@ -0,0 +1,5 @@
+[pytest]
+log_cli = 1
+log_cli_level = INFO
+log_cli_format = %(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s)
+log_cli_date_format=%Y-%m-%d %H:%M:%S
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,5 @@
-numpy>=1.11.0,<1.16.0
+numpy<2,>=1.18
 scipy>=0.17.0
 matplotlib>=1.5.1
-statsmodels>=0.8.0
+statsmodels==0.14.1
+pytest==8.2.2
diff --git a/runoneflux.py b/runoneflux.py
@@ -81,7 +81,7 @@
     # start execution
     try:
         # check arguments
-        print os.path.join(args.datadir, args.sitedir)
+        print(os.path.join(args.datadir, args.sitedir))
         if not os.path.isdir(os.path.join(args.datadir, args.sitedir)):
             raise ONEFluxError("Site dir not found: {d}".format(d=args.sitedir))
 

diff --git a/tests/context.py b/tests/context.py
diff --git a/tests/__init__.py → tests/python/__init__.py b/tests/__init__.py → tests/python/__init__.py
diff --git a/tests/python/integration/test_partitioning.py b/tests/python/integration/test_partitioning.py
@@ -0,0 +1,125 @@
+import pytest
+import os, glob
+import errno
+import urllib.request
+from shutil import copytree
+import logging
+import time
+
+_log = logging.getLogger(__name__)
+
+
+@pytest.fixture(scope="module")
+def get_data():
+    '''
+    Utilising python to obtain sample test data. Function currently unused. 
+    as a fixture in this class. 
+    '''
+    if os.path.isdir('tests/data'):
+        _log.info('Skipping sample data retrieval as sample test data directory '
+                  'already exists: ./tests/data')
+        return
+
+    from zipfile import ZipFile
+    input_zip_name, headers = urllib.request.urlretrieve('ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_input.zip')
+    output_zip_name, headers = urllib.request.urlretrieve('ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_output.zip')
+
+    _log.info('successfully downloaded sample data zip files. Extracting...')
+
+    with ZipFile(input_zip_name) as zi, ZipFile(output_zip_name) as zo:
+        zi.extractall(path='tests/data/test_input')
+        zo.extractall(path='tests/data/test_output')
+
+    _log.info('sample data successfully extracted from zip files')
+
+
+def equal_csv(csv_1, csv_2):
+    '''
+    Check equality of two csv files.
+    '''
+    _log.info("Check csv equality")
+    start = time.time()
+    with open(csv_1, 'r') as t1, open(csv_2, 'r') as t2:
+        fileone = t1.readlines()
+        filetwo = t2.readlines()
+        for line in filetwo:
+            if line not in fileone:
+                return False
+
+        _log.info("total time", start - time.time())
+
+        return True
+
+
+@pytest.fixture
+def setup_data(get_data):
+    '''
+    Set up input data for run_partition_nt test. 
+
+    Create data directory for tests './tests/integration/step10' and copy 
+    data from expected output ('./datadir/test_output/US-ARc_sample_output')
+    to this directory.
+    '''
+    try:
+        os.mkdir('tests/integration/data/step_10')
+    except OSError as e:
+        if e.errno == errno.EEXIST:
+            print("directory exists")
+
+    testdata = 'tests/python/integration/input/step_10/US-ARc_sample_input'
+
+    copytree('tests/data/test_input/', testdata, dirs_exist_ok=True)
+
+    refoutdir = 'tests/data/test_output/US-ARc_sample_output'
+
+    copytree(os.path.join(refoutdir, '07_meteo_proc'), \
+             os.path.join(testdata, '07_meteo_proc'), dirs_exist_ok=True)
+    copytree(os.path.join(refoutdir, '08_nee_proc'), \
+             os.path.join(testdata, '08_nee_proc/'), dirs_exist_ok=True)
+    copytree(os.path.join(refoutdir, '02_qc_auto'), \
+             os.path.join(testdata, '02_qc_auto/'), dirs_exist_ok=True)
+
+
+def test_run_partition_nt(setup_data):
+    '''
+    Run partition_nt on single percentile.
+    '''
+    datadir = "./tests/python/integration/input/step_10/"
+    refoutdir = "./tests/data/test_output/"
+    siteid = "US-ARc"
+    sitedir = "US-ARc_sample_input"
+    years = [2005]  # years = [2005, 2006]
+    # PROD_TO_COMPARE = ['c', 'y']
+    PROD_TO_COMPARE = ['y', ]
+    # PERC_TO_COMPARE = ['1.25', '3.75',]
+    PERC_TO_COMPARE = ['1.25', ]
+
+    from oneflux.tools.partition_nt import remove_previous_run, run_python
+    remove_previous_run(datadir=datadir, siteid=siteid, sitedir=sitedir, python=True,
+                        prod_to_compare=PROD_TO_COMPARE,
+                        perc_to_compare=PERC_TO_COMPARE,
+                        years_to_compare=years)
+
+    run_python(datadir=datadir, siteid=siteid, sitedir=sitedir,
+               prod_to_compare=PROD_TO_COMPARE,
+               perc_to_compare=PERC_TO_COMPARE, years_to_compare=years)
+
+    # check whether csv of "output" is same as csv of reference
+
+    # the generated output is actually in the "input" directory.
+    rootdir = os.path.join(datadir, sitedir, "10_nee_partition_nt")
+    nee_y_files = glob.glob(os.path.join(rootdir, "nee_y_1.25_US-ARc_2005*"))
+    nee_y_files = filter(lambda x: not x.endswith('_orig.csv'), nee_y_files)
+
+    # paths to the "reference" output data
+    refoutdir = os.path.join(refoutdir, "US-ARc_sample_output", "10_nee_partition_nt")
+    ref_nee_y_files = glob.glob(os.path.join(refoutdir, "nee_y_1.25_US-ARc_2005*"))
+
+    assert len(nee_y_files) == len(ref_nee_y_files)
+    retval = True
+    for f, b in zip(nee_y_files, ref_nee_y_files):
+        print(f, b)
+        assert equal_csv(f, b) == True
+
+    # clean up data. 
+    # shutil.rmtree(datadir)
diff --git a/tests/python/test_context.py b/tests/python/test_context.py
@@ -0,0 +1,14 @@
+'''
+For license information:
+see LICENSE file or headers in oneflux.__init__.py
+
+Simple context/import setup test
+
+@author: Gilberto Pastorello
+@contact: gzpastorello@lbl.gov
+@date: 2017-01-31
+'''
+
+def test_import_oneflux():
+    import oneflux
+    assert hasattr(oneflux, '__version__') == True
diff --git a/tests/test_context.py b/tests/test_context.py