From 8a0ff35a3f3723c9ced3bac100f761b8efe88080 Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Tue, 10 Jan 2023 18:45:25 +0000
Subject: [PATCH 01/39] initial commit of python2 deployment ci

---
 .github/workflows/python-app.yaml | 40 +++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 .github/workflows/python-app.yaml

diff --git a/.github/workflows/python-app.yaml b/.github/workflows/python-app.yaml
new file mode 100644
index 00000000..e91f9648
--- /dev/null
+++ b/.github/workflows/python-app.yaml
@@ -0,0 +1,40 @@
+# This workflow will install Python dependencies, run tests and lint with a single version of Python
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: Python application
+
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+
+permissions:
+  contents: read
+
+jobs:
+  build:
+
+	# 20.04 currently still supports old version of python (ubuntu-latest doesn't)
+    runs-on: ubuntu-20.04
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python 2.7.10
+      uses: actions/setup-python@v3
+      with:
+        python-version: "2.7.10"
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install flake8 pytest
+        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+    # - name: Lint with flake8
+    #   run: |
+    #     # stop the build if there are Python syntax errors or undefined names
+    #     flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+    #     # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+    #     flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    # - name: Test with pytest
+    #   run: |
+    #     pytest

From 3c07bc5e240ebe9a42545ddc358d8a036dda7cc1 Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Tue, 10 Jan 2023 18:46:50 +0000
Subject: [PATCH 02/39] syntax fix

---
 .github/workflows/python-app.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-app.yaml b/.github/workflows/python-app.yaml
index e91f9648..cfce1c5d 100644
--- a/.github/workflows/python-app.yaml
+++ b/.github/workflows/python-app.yaml
@@ -15,7 +15,7 @@ permissions:
 jobs:
   build:
 
-	# 20.04 currently still supports old version of python (ubuntu-latest doesn't)
+    # 20.04 currently still supports old version of python (ubuntu-latest doesn't)
     runs-on: ubuntu-20.04
 
     steps:

From fb670c89523bf8b649f07c85daf21fdecf7a2ac4 Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Tue, 10 Jan 2023 18:49:48 +0000
Subject: [PATCH 03/39] syntax fix

---
 .github/workflows/python-app.yaml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/.github/workflows/python-app.yaml b/.github/workflows/python-app.yaml
index cfce1c5d..2c30a65e 100644
--- a/.github/workflows/python-app.yaml
+++ b/.github/workflows/python-app.yaml
@@ -14,10 +14,7 @@ permissions:
 
 jobs:
   build:
-
-    # 20.04 currently still supports old version of python (ubuntu-latest doesn't)
     runs-on: ubuntu-20.04
-
     steps:
     - uses: actions/checkout@v3
     - name: Set up Python 2.7.10

From 3d3aa88d0829204c8cf3c262078f7f7450ae32a4 Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Tue, 10 Jan 2023 18:52:25 +0000
Subject: [PATCH 04/39] change branch

---
 .github/workflows/python-app.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/python-app.yaml b/.github/workflows/python-app.yaml
index 2c30a65e..40d7a49e 100644
--- a/.github/workflows/python-app.yaml
+++ b/.github/workflows/python-app.yaml
@@ -5,9 +5,9 @@ name: Python application
 
 on:
   push:
-    branches: [ "main" ]
+    branches: [ "ci" ]
   pull_request:
-    branches: [ "main" ]
+    branches: [ "ci" ]
 
 permissions:
   contents: read

From 5703c1fccb58eea84bd90a80e0278a7a909046e2 Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Tue, 10 Jan 2023 18:53:41 +0000
Subject: [PATCH 05/39] change python version

---
 .github/workflows/python-app.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/python-app.yaml b/.github/workflows/python-app.yaml
index 40d7a49e..e09f8231 100644
--- a/.github/workflows/python-app.yaml
+++ b/.github/workflows/python-app.yaml
@@ -17,10 +17,10 @@ jobs:
     runs-on: ubuntu-20.04
     steps:
     - uses: actions/checkout@v3
-    - name: Set up Python 2.7.10
+    - name: Set up Python 2.7.18
       uses: actions/setup-python@v3
       with:
-        python-version: "2.7.10"
+        python-version: "2.7.18"
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip

From eb9d91813dc7a096af05df46db6f0a70f41cc60c Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Wed, 11 Jan 2023 11:58:33 +0000
Subject: [PATCH 06/39] added simple partitioning_nt test run and pytest
 example

---
 .github/workflows/python-app.yaml | 24 +++++++++++--
 frozen-requirements.txt           | 14 ++++++++
 oneflux/tools/partition_nt.py     | 56 +++++++++++++++++++++++++++++++
 oneflux_steps/Makefile            |  2 +-
 4 files changed, 93 insertions(+), 3 deletions(-)
 create mode 100644 frozen-requirements.txt

diff --git a/.github/workflows/python-app.yaml b/.github/workflows/python-app.yaml
index e09f8231..b079628a 100644
--- a/.github/workflows/python-app.yaml
+++ b/.github/workflows/python-app.yaml
@@ -21,11 +21,31 @@ jobs:
       uses: actions/setup-python@v3
       with:
         python-version: "2.7.18"
-    - name: Install dependencies
+    - name: Install OneFLUX
       run: |
         python -m pip install --upgrade pip
         pip install flake8 pytest
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+        # if [ -f requirements-frozen.txt ]; then pip install -r requirements-frozen.txt; fi
+        make
+    - name: Run OneFLUX partitioning_nt
+      run: |
+        # get necessary data
+        wget ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_input.zip
+        wget ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_output.zip
+        unzip US-ARc_sample_input.zip -d ./datadir/test_input/
+        unzip US-ARc_sample_output.zip -d ./datadir/test_output/
+        # copy necessary output data to force partitioning_nt to run
+        cp -r ./datadir/test_output/US-ARc_sample_output/02_qc_auto/ ./datadir/test_input/US-ARc_sample_input/
+        cp -r ./datadir/test_output/US-ARc_sample_output/07_meteo_proc/ ./datadir/test_input/US-ARc_sample_input/
+        cp -r ./datadir/test_output/US-ARc_sample_output/08_nee_proc/ ./datadir/test_input/US-ARc_sample_input/
+        python runoneflux.py partition_nt ./datadir/test_input/ US-ARc US-ARc_sample_input 2005 2006 -l fluxnet_pipeline_US-ARc.log --recint hh
+
+    - name: Run pytest
+      run: |
+        pytest oneflux/tools/partition_nt.py
+
+
+
     # - name: Lint with flake8
     #   run: |
     #     # stop the build if there are Python syntax errors or undefined names
diff --git a/frozen-requirements.txt b/frozen-requirements.txt
new file mode 100644
index 00000000..af23b1fa
--- /dev/null
+++ b/frozen-requirements.txt
@@ -0,0 +1,14 @@
+backports.functools-lru-cache==1.6.4
+cycler==0.10.0
+kiwisolver==1.1.0
+matplotlib==2.2.5
+numpy==1.15.4
+pandas==0.20.0
+patsy==0.5.2
+pyparsing==2.4.7
+python-dateutil==2.8.2
+pytz==2022.4
+scipy==1.2.3
+six==1.16.0
+statsmodels==0.8.0
+subprocess32==3.5.4
diff --git a/oneflux/tools/partition_nt.py b/oneflux/tools/partition_nt.py
index 92034b8d..3e8ce31c 100644
--- a/oneflux/tools/partition_nt.py
+++ b/oneflux/tools/partition_nt.py
@@ -12,6 +12,7 @@
 '''
 import sys
 import os
+import glob
 import logging
 import time
 import numpy
@@ -19,6 +20,7 @@
 import socket
 import numpy
 import calendar
+import pytest
 
 from datetime import datetime
 from io import StringIO
@@ -133,6 +135,60 @@ def run_partition_nt(datadir, siteid, sitedir, years_to_compare,
     remove_previous_run(datadir=datadir, siteid=siteid, sitedir=sitedir, python=py_remove_old, prod_to_compare=prod_to_compare, perc_to_compare=perc_to_compare, years_to_compare=years_to_compare)
     run_python(datadir=datadir, siteid=siteid, sitedir=sitedir, prod_to_compare=prod_to_compare, perc_to_compare=perc_to_compare, years_to_compare=years_to_compare)
 
+@pytest.fixture
+def get_data():
+    pass
+
+def equal_csv(csv_1, csv_2):
+    with open(csv_1, 'r') as t1, open(csv_2, 'r') as t2:
+        fileone = t1.readlines()
+        filetwo = t2.readlines()
+        for line in filetwo:
+            if line not in fileone:
+                return False
+    
+# deal with fixtures for running nt_test
+# step 10
+def test_run_partition_nt():
+    
+    datadir = "../datadir/"
+    siteid = "US-ARc"
+    sitedir = "US-ARc_sample_input"
+    years = [2005] # years = [2005, 2006]
+    PROD_TO_COMPARE = ['c', 'y']
+    # PERC_TO_COMPARE = ['1.25', '3.75',]
+    PERC_TO_COMPARE = ['1.25',]
+    remove_previous_run(datadir=datadir, siteid=siteid, sitedir=sitedir, python=True, 
+                        prod_to_compare=PROD_TO_COMPARE, perc_to_compare=PERC_TO_COMPARE,
+                        years_to_compare=years)
+
+    run_python(datadir=datadir, siteid=siteid, sitedir=sitedir, prod_to_compare=PROD_TO_COMPARE,
+               perc_to_compare=PERC_TO_COMPARE, years_to_compare=years)
+    
+    # now do simple check of output 
+    rootdir = os.path.join(datadir, sitedir, "10_nee_partition_nt")
+    nee_y_files = glob.glob(os.path.join(rootdir, "nee_y_1.25_US-ARc_2005*"))
+    compare_y_files = glob.glob(os.path.join('saved', "nee_y_1.25_US-ARc_2005*"))
+    
+    # log.info(nee_y_files)
+    # log.info(compare_y_files)
+    for f, b in zip(nee_y_files, compare_y_files):
+        if not equal_csv(f, b):
+            return False
+
+    # glob the files with this root
+    # for file in glob.glob(nee_y_files):
+    #     print(file)
+    #     log.info(file)
+    #     if not equal_csv(file, )
+        
+        
+    
+    
+
+    # with open('saved/nee_y_1.25_US-ARc_2005.csv', 'r') as t1, open(nee_y, 'r') as t2:
+
+    
 
 if __name__ == '__main__':
     raise ONEFluxError('Not executable')
diff --git a/oneflux_steps/Makefile b/oneflux_steps/Makefile
index d56115bf..a5e22cc9 100644
--- a/oneflux_steps/Makefile
+++ b/oneflux_steps/Makefile
@@ -31,7 +31,7 @@ CC := gcc -O3
 MKDIR = mkdir -p
 
 # copy file command (verbose, keep file metadata)
-COPY = cp -av
+COPY = cp -v
 
 SRCDIR := $(shell pwd)/
 TGTDIR := ${HOME}/bin/oneflux/

From 4e1f5876c105812d61f39ffa3d8f714a9922ca07 Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Wed, 11 Jan 2023 12:03:08 +0000
Subject: [PATCH 07/39] updated build to point to frozen requirements

---
 .github/workflows/python-app.yaml | 2 +-
 Makefile                          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/python-app.yaml b/.github/workflows/python-app.yaml
index b079628a..6ea4379b 100644
--- a/.github/workflows/python-app.yaml
+++ b/.github/workflows/python-app.yaml
@@ -25,7 +25,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         pip install flake8 pytest
-        # if [ -f requirements-frozen.txt ]; then pip install -r requirements-frozen.txt; fi
+        # if [ -f frozen-requirement.txt ]; then pip install -r frozen-requirements.txt; fi
         make
     - name: Run OneFLUX partitioning_nt
       run: |
diff --git a/Makefile b/Makefile
index 58665241..4d258567 100644
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,7 @@ buildpy:
 	@echo "Installing Python dependencies..."
 ifeq ($(PYPACKAGE),pip)
 	@echo "Using pip to install dependencies..."
-	pip install -r requirements.txt
+	pip install -r frozen-requirements.txt
 else
 ifeq ($(PYPACKAGE),conda)
 	@echo "Using conda to install dependencies..."

From 03318a5a9110e4476b8764e86091d89ebf108527 Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Wed, 11 Jan 2023 14:06:00 +0000
Subject: [PATCH 08/39] removed software health tools

---
 .github/workflows/python-app.yaml | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/python-app.yaml b/.github/workflows/python-app.yaml
index 6ea4379b..7d92bf0b 100644
--- a/.github/workflows/python-app.yaml
+++ b/.github/workflows/python-app.yaml
@@ -23,17 +23,21 @@ jobs:
         python-version: "2.7.18"
     - name: Install OneFLUX
       run: |
-        python -m pip install --upgrade pip
-        pip install flake8 pytest
+        # python -m pip install --upgrade pip
+        # pip install flake8 pytest
         # if [ -f frozen-requirement.txt ]; then pip install -r frozen-requirements.txt; fi
         make
-    - name: Run OneFLUX partitioning_nt
+    -name: Get data
       run: |
+        mkdir datadir
         # get necessary data
         wget ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_input.zip
         wget ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_output.zip
         unzip US-ARc_sample_input.zip -d ./datadir/test_input/
         unzip US-ARc_sample_output.zip -d ./datadir/test_output/
+
+    - name: Run OneFLUX partitioning_nt
+      run: |
         # copy necessary output data to force partitioning_nt to run
         cp -r ./datadir/test_output/US-ARc_sample_output/02_qc_auto/ ./datadir/test_input/US-ARc_sample_input/
         cp -r ./datadir/test_output/US-ARc_sample_output/07_meteo_proc/ ./datadir/test_input/US-ARc_sample_input/

From 53d948b44edba159468b83cc3f81d528764c7b39 Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Wed, 11 Jan 2023 14:07:20 +0000
Subject: [PATCH 09/39] fix syntax

---
 .github/workflows/python-app.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-app.yaml b/.github/workflows/python-app.yaml
index 7d92bf0b..b4003efe 100644
--- a/.github/workflows/python-app.yaml
+++ b/.github/workflows/python-app.yaml
@@ -27,7 +27,7 @@ jobs:
         # pip install flake8 pytest
         # if [ -f frozen-requirement.txt ]; then pip install -r frozen-requirements.txt; fi
         make
-    -name: Get data
+    - name: Get data
       run: |
         mkdir datadir
         # get necessary data

From 79b88d8241a32d74faf465354fc86fc0e6724d10 Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Wed, 11 Jan 2023 14:11:44 +0000
Subject: [PATCH 10/39] add setuptools to frozen-requirements.txt

---
 frozen-requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/frozen-requirements.txt b/frozen-requirements.txt
index af23b1fa..6280a83b 100644
--- a/frozen-requirements.txt
+++ b/frozen-requirements.txt
@@ -12,3 +12,4 @@ scipy==1.2.3
 six==1.16.0
 statsmodels==0.8.0
 subprocess32==3.5.4
+setuptools==44.1.1

From 2e1e23cd3203996ce77f7c65062789cf988d33d3 Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Wed, 11 Jan 2023 14:13:45 +0000
Subject: [PATCH 11/39] upgrade pip

---
 .github/workflows/python-app.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/python-app.yaml b/.github/workflows/python-app.yaml
index b4003efe..b40bcffe 100644
--- a/.github/workflows/python-app.yaml
+++ b/.github/workflows/python-app.yaml
@@ -23,7 +23,8 @@ jobs:
         python-version: "2.7.18"
     - name: Install OneFLUX
       run: |
-        # python -m pip install --upgrade pip
+        python -m pip install --upgrade pip
+        pip install setuptools==44.1.1
         # pip install flake8 pytest
         # if [ -f frozen-requirement.txt ]; then pip install -r frozen-requirements.txt; fi
         make

From 4ea607b98a645f695f2bfb88fe633cc2c925d8ab Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Wed, 11 Jan 2023 14:18:36 +0000
Subject: [PATCH 12/39] add wheel to prevent source build of pandas

---
 .github/workflows/python-app.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python-app.yaml b/.github/workflows/python-app.yaml
index b40bcffe..e0db1c2f 100644
--- a/.github/workflows/python-app.yaml
+++ b/.github/workflows/python-app.yaml
@@ -24,7 +24,7 @@ jobs:
     - name: Install OneFLUX
       run: |
         python -m pip install --upgrade pip
-        pip install setuptools==44.1.1
+        pip install setuptools==44.1.1 wheel
         # pip install flake8 pytest
         # if [ -f frozen-requirement.txt ]; then pip install -r frozen-requirements.txt; fi
         make

From fc5e796d11a522d802cefafe955429dd64694659 Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Wed, 11 Jan 2023 14:24:32 +0000
Subject: [PATCH 13/39] try forcing pandas wheel collection

---
 frozen-requirements.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/frozen-requirements.txt b/frozen-requirements.txt
index 6280a83b..e4c5b7b5 100644
--- a/frozen-requirements.txt
+++ b/frozen-requirements.txt
@@ -2,7 +2,6 @@ backports.functools-lru-cache==1.6.4
 cycler==0.10.0
 kiwisolver==1.1.0
 matplotlib==2.2.5
-numpy==1.15.4
 pandas==0.20.0
 patsy==0.5.2
 pyparsing==2.4.7

From ea44b8b034d586cafaae08ca761ed1e152dd09c1 Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Wed, 11 Jan 2023 14:39:45 +0000
Subject: [PATCH 14/39] add pytest and bump pandas

---
 .github/workflows/python-app.yaml | 2 +-
 frozen-requirements.txt           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/python-app.yaml b/.github/workflows/python-app.yaml
index e0db1c2f..7bd47a52 100644
--- a/.github/workflows/python-app.yaml
+++ b/.github/workflows/python-app.yaml
@@ -24,7 +24,7 @@ jobs:
     - name: Install OneFLUX
       run: |
         python -m pip install --upgrade pip
-        pip install setuptools==44.1.1 wheel
+        pip install setuptools==44.1.1 wheel pytest
         # pip install flake8 pytest
         # if [ -f frozen-requirement.txt ]; then pip install -r frozen-requirements.txt; fi
         make
diff --git a/frozen-requirements.txt b/frozen-requirements.txt
index e4c5b7b5..d92e10cb 100644
--- a/frozen-requirements.txt
+++ b/frozen-requirements.txt
@@ -2,7 +2,7 @@ backports.functools-lru-cache==1.6.4
 cycler==0.10.0
 kiwisolver==1.1.0
 matplotlib==2.2.5
-pandas==0.20.0
+pandas==0.20.1
 patsy==0.5.2
 pyparsing==2.4.7
 python-dateutil==2.8.2

From 0030188f9e104f6a63a34b502741eba18fdcc2e8 Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Wed, 11 Jan 2023 14:45:54 +0000
Subject: [PATCH 15/39] attempt with old build system

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 4d258567..58665241 100644
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,7 @@ buildpy:
 	@echo "Installing Python dependencies..."
 ifeq ($(PYPACKAGE),pip)
 	@echo "Using pip to install dependencies..."
-	pip install -r frozen-requirements.txt
+	pip install -r requirements.txt
 else
 ifeq ($(PYPACKAGE),conda)
 	@echo "Using conda to install dependencies..."

From e15d25c158dc32e4a886224c8452b695e075d376 Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Wed, 11 Jan 2023 15:20:16 +0000
Subject: [PATCH 16/39] amend pytest example

---
 Makefile                      |  2 +-
 oneflux/tools/partition_nt.py | 16 ++++++----------
 pytest.ini                    |  5 +++++
 3 files changed, 12 insertions(+), 11 deletions(-)
 create mode 100644 pytest.ini

diff --git a/Makefile b/Makefile
index 58665241..4d258567 100644
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,7 @@ buildpy:
 	@echo "Installing Python dependencies..."
 ifeq ($(PYPACKAGE),pip)
 	@echo "Using pip to install dependencies..."
-	pip install -r requirements.txt
+	pip install -r frozen-requirements.txt
 else
 ifeq ($(PYPACKAGE),conda)
 	@echo "Using conda to install dependencies..."
diff --git a/oneflux/tools/partition_nt.py b/oneflux/tools/partition_nt.py
index 3e8ce31c..60240df2 100644
--- a/oneflux/tools/partition_nt.py
+++ b/oneflux/tools/partition_nt.py
@@ -147,11 +147,11 @@ def equal_csv(csv_1, csv_2):
             if line not in fileone:
                 return False
     
-# deal with fixtures for running nt_test
+# TODO: deal with fixtures for running nt_test
 # step 10
 def test_run_partition_nt():
-    
-    datadir = "../datadir/"
+    datadir = "./datadir/test_input"
+    data_output = "./datadir/test_output"
     siteid = "US-ARc"
     sitedir = "US-ARc_sample_input"
     years = [2005] # years = [2005, 2006]
@@ -168,11 +168,12 @@ def test_run_partition_nt():
     # now do simple check of output 
     rootdir = os.path.join(datadir, sitedir, "10_nee_partition_nt")
     nee_y_files = glob.glob(os.path.join(rootdir, "nee_y_1.25_US-ARc_2005*"))
-    compare_y_files = glob.glob(os.path.join('saved', "nee_y_1.25_US-ARc_2005*"))
+    ref_output = os.path.join(data_output, sitedir, "10_nee_partition_nt")
+    ref_y_files = glob.glob(os.path.join(ref_output, "nee_y_1.25_US-ARc_2005*"))
     
     # log.info(nee_y_files)
     # log.info(compare_y_files)
-    for f, b in zip(nee_y_files, compare_y_files):
+    for f, b in zip(nee_y_files, ref_y_files):
         if not equal_csv(f, b):
             return False
 
@@ -181,11 +182,6 @@ def test_run_partition_nt():
     #     print(file)
     #     log.info(file)
     #     if not equal_csv(file, )
-        
-        
-    
-    
-
     # with open('saved/nee_y_1.25_US-ARc_2005.csv', 'r') as t1, open(nee_y, 'r') as t2:
 
     
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 00000000..f5276a23
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,5 @@
+[pytest]
+log_cli = 1
+log_cli_level = INFO
+log_cli_format = %(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s)
+log_cli_date_format=%Y-%m-%d %H:%M:%S

From 7667c936ce6aa0104deaf14d04a7204e24f261a7 Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Wed, 11 Jan 2023 15:28:49 +0000
Subject: [PATCH 17/39] remove whitespace

---
 .github/workflows/python-app.yaml | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/.github/workflows/python-app.yaml b/.github/workflows/python-app.yaml
index 7bd47a52..30effcdd 100644
--- a/.github/workflows/python-app.yaml
+++ b/.github/workflows/python-app.yaml
@@ -36,7 +36,6 @@ jobs:
         wget ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_output.zip
         unzip US-ARc_sample_input.zip -d ./datadir/test_input/
         unzip US-ARc_sample_output.zip -d ./datadir/test_output/
-
     - name: Run OneFLUX partitioning_nt
       run: |
         # copy necessary output data to force partitioning_nt to run
@@ -44,13 +43,9 @@ jobs:
         cp -r ./datadir/test_output/US-ARc_sample_output/07_meteo_proc/ ./datadir/test_input/US-ARc_sample_input/
         cp -r ./datadir/test_output/US-ARc_sample_output/08_nee_proc/ ./datadir/test_input/US-ARc_sample_input/
         python runoneflux.py partition_nt ./datadir/test_input/ US-ARc US-ARc_sample_input 2005 2006 -l fluxnet_pipeline_US-ARc.log --recint hh
-
     - name: Run pytest
       run: |
         pytest oneflux/tools/partition_nt.py
-
-
-
     # - name: Lint with flake8
     #   run: |
     #     # stop the build if there are Python syntax errors or undefined names

From 58833b27e57d1c006bb3914aca7a4545d96c05b3 Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Thu, 26 Jan 2023 13:47:36 +0000
Subject: [PATCH 18/39] removed partition_nt test code

---
 oneflux/tools/partition_nt.py | 54 -----------------------------------
 1 file changed, 54 deletions(-)

diff --git a/oneflux/tools/partition_nt.py b/oneflux/tools/partition_nt.py
index 60240df2..0aa507eb 100644
--- a/oneflux/tools/partition_nt.py
+++ b/oneflux/tools/partition_nt.py
@@ -134,57 +134,3 @@ def run_partition_nt(datadir, siteid, sitedir, years_to_compare,
     """
     remove_previous_run(datadir=datadir, siteid=siteid, sitedir=sitedir, python=py_remove_old, prod_to_compare=prod_to_compare, perc_to_compare=perc_to_compare, years_to_compare=years_to_compare)
     run_python(datadir=datadir, siteid=siteid, sitedir=sitedir, prod_to_compare=prod_to_compare, perc_to_compare=perc_to_compare, years_to_compare=years_to_compare)
-
-@pytest.fixture
-def get_data():
-    pass
-
-def equal_csv(csv_1, csv_2):
-    with open(csv_1, 'r') as t1, open(csv_2, 'r') as t2:
-        fileone = t1.readlines()
-        filetwo = t2.readlines()
-        for line in filetwo:
-            if line not in fileone:
-                return False
-    
-# TODO: deal with fixtures for running nt_test
-# step 10
-def test_run_partition_nt():
-    datadir = "./datadir/test_input"
-    data_output = "./datadir/test_output"
-    siteid = "US-ARc"
-    sitedir = "US-ARc_sample_input"
-    years = [2005] # years = [2005, 2006]
-    PROD_TO_COMPARE = ['c', 'y']
-    # PERC_TO_COMPARE = ['1.25', '3.75',]
-    PERC_TO_COMPARE = ['1.25',]
-    remove_previous_run(datadir=datadir, siteid=siteid, sitedir=sitedir, python=True, 
-                        prod_to_compare=PROD_TO_COMPARE, perc_to_compare=PERC_TO_COMPARE,
-                        years_to_compare=years)
-
-    run_python(datadir=datadir, siteid=siteid, sitedir=sitedir, prod_to_compare=PROD_TO_COMPARE,
-               perc_to_compare=PERC_TO_COMPARE, years_to_compare=years)
-    
-    # now do simple check of output 
-    rootdir = os.path.join(datadir, sitedir, "10_nee_partition_nt")
-    nee_y_files = glob.glob(os.path.join(rootdir, "nee_y_1.25_US-ARc_2005*"))
-    ref_output = os.path.join(data_output, sitedir, "10_nee_partition_nt")
-    ref_y_files = glob.glob(os.path.join(ref_output, "nee_y_1.25_US-ARc_2005*"))
-    
-    # log.info(nee_y_files)
-    # log.info(compare_y_files)
-    for f, b in zip(nee_y_files, ref_y_files):
-        if not equal_csv(f, b):
-            return False
-
-    # glob the files with this root
-    # for file in glob.glob(nee_y_files):
-    #     print(file)
-    #     log.info(file)
-    #     if not equal_csv(file, )
-    # with open('saved/nee_y_1.25_US-ARc_2005.csv', 'r') as t1, open(nee_y, 'r') as t2:
-
-    
-
-if __name__ == '__main__':
-    raise ONEFluxError('Not executable')

From 5528e2f446b6266b22a75db75edc536fa405aaec Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Thu, 26 Jan 2023 14:02:27 +0000
Subject: [PATCH 19/39] placeholder files for tests

---
 tests/integration/test_partitioning.py | 55 ++++++++++++++++++++++++++
 tests/test_basic.py                    |  5 +++
 tests/test_partitioning.py             | 44 +++++++++++++++++++++
 3 files changed, 104 insertions(+)
 create mode 100644 tests/integration/test_partitioning.py
 create mode 100644 tests/test_basic.py
 create mode 100644 tests/test_partitioning.py

diff --git a/tests/integration/test_partitioning.py b/tests/integration/test_partitioning.py
new file mode 100644
index 00000000..bbe91267
--- /dev/null
+++ b/tests/integration/test_partitioning.py
@@ -0,0 +1,55 @@
+import oneflux.tools.partition_nt
+
+# not sure about the existence of this test. Not really a unit test
+@pytest.fixture
+def get_data():
+    pass
+
+def equal_csv(csv_1, csv_2):
+    with open(csv_1, 'r') as t1, open(csv_2, 'r') as t2:
+        fileone = t1.readlines()
+        filetwo = t2.readlines()
+        for line in filetwo:
+            if line not in fileone:
+                return False
+   
+    
+# TODO: deal with fixtures for running nt_test
+# step 10
+def test_run_partition_nt():
+    datadir = "./datadir/test_input"
+    data_output = "./datadir/test_output"
+    siteid = "US-ARc"
+    sitedir = "US-ARc_sample_input"
+    years = [2005] # years = [2005, 2006]
+    PROD_TO_COMPARE = ['c', 'y']
+    # PERC_TO_COMPARE = ['1.25', '3.75',]
+    PERC_TO_COMPARE = ['1.25',]
+    remove_previous_run(datadir=datadir, siteid=siteid, sitedir=sitedir, python=True, 
+                        prod_to_compare=PROD_TO_COMPARE, perc_to_compare=PERC_TO_COMPARE,
+                        years_to_compare=years)
+
+    run_python(datadir=datadir, siteid=siteid, sitedir=sitedir, prod_to_compare=PROD_TO_COMPARE,
+               perc_to_compare=PERC_TO_COMPARE, years_to_compare=years)
+    
+    # now do simple check of output 
+    rootdir = os.path.join(datadir, sitedir, "10_nee_partition_nt")
+    nee_y_files = glob.glob(os.path.join(rootdir, "nee_y_1.25_US-ARc_2005*"))
+    ref_output = os.path.join(data_output, sitedir, "10_nee_partition_nt")
+    ref_y_files = glob.glob(os.path.join(ref_output, "nee_y_1.25_US-ARc_2005*"))
+    
+    # log.info(nee_y_files)
+    # log.info(compare_y_files)
+    for f, b in zip(nee_y_files, ref_y_files):
+        if not equal_csv(f, b):
+            return False
+
+    # glob the files with this root
+    # for file in glob.glob(nee_y_files):
+    #     print(file)
+    #     log.info(file)
+    #     if not equal_csv(file, )
+    # with open('saved/nee_y_1.25_US-ARc_2005.csv', 'r') as t1, open(nee_y, 'r') as t2:
+
+if __name__ == '__main__':
+    raise ONEFluxError('Not executable')
diff --git a/tests/test_basic.py b/tests/test_basic.py
new file mode 100644
index 00000000..13ce93d2
--- /dev/null
+++ b/tests/test_basic.py
@@ -0,0 +1,5 @@
+import pytest
+
+def test_import_oneflux():
+    import oneflux
+    assert oneflux.VERSION == "0.4.1-rc"
\ No newline at end of file
diff --git a/tests/test_partitioning.py b/tests/test_partitioning.py
new file mode 100644
index 00000000..ba51737e
--- /dev/null
+++ b/tests/test_partitioning.py
@@ -0,0 +1,44 @@
+import oneflux.partition.nighttime
+import oneflux.partition.library # optimisation related functions
+import oneflux.partition.ecogeo
+
+
+# LIBRARY
+# looks like a good candidate for unit testing - lots of maths functions:
+# get_first_last_ts
+# cov2cor
+# root_mean_sq_error
+# least_squares () - calls scipy function 
+# check_parameters (checks values within given threshold)
+
+# loading data function:
+# load outputs
+
+# Less easy:
+# create_data_structures - very long function
+#  nlinlts1 (non-linear least-squares driver function)
+
+# function is used in daytime at present - so ignore for now
+# array consisting of 6 elements
+def test_check_parameters():
+    from oneflux.partition.library import check_parameter
+   
+    #  
+    p1 = [0, 0, 0, 50, 0, 0]
+    
+    p2 = [0.2199999, 250, 0, 0, 0, 0]
+
+# flux partition in nighttime.py is a huge function (250+ lines)
+
+def test_load_output():
+    from oneflux.partition.library import load_output
+    pass
+
+def test_compu():
+    pass
+
+def test_get_first_last_ts():
+    pass
+
+
+# partitioning_nt -> flux_partition -> nlinlts -> 
\ No newline at end of file

From f9a4be6a34907c2ae9ef7b749a185c78293e08fe Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Thu, 26 Jan 2023 17:29:02 +0000
Subject: [PATCH 20/39] partition_nt test now passes with data setup function
 implemented

---
 tests/integration/test_partitioning.py | 50 ++++++++++++++++++++------
 1 file changed, 39 insertions(+), 11 deletions(-)

diff --git a/tests/integration/test_partitioning.py b/tests/integration/test_partitioning.py
index bbe91267..dd4eaeac 100644
--- a/tests/integration/test_partitioning.py
+++ b/tests/integration/test_partitioning.py
@@ -1,8 +1,14 @@
-import oneflux.tools.partition_nt
+import pytest
+import os, glob
+import errno
+import shutil
+from distutils.dir_util import copy_tree
+
 
-# not sure about the existence of this test. Not really a unit test
 @pytest.fixture
 def get_data():
+    urllib.request.urlretrieve('ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_output.zip') 
+    urllib.request.urlretrieve('ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_input.zip') 
     pass
 
 def equal_csv(csv_1, csv_2):
@@ -12,13 +18,33 @@ def equal_csv(csv_1, csv_2):
         for line in filetwo:
             if line not in fileone:
                 return False
-   
+
+
+# create dataset for step 10
+def setup_data():
+    try:
+        os.mkdir('tests/integration/step_10')
+    except OSError as e:
+        if e.errno == errno.EEXIST:
+            print("dir exists")
+    
+    copy_tree('datadir/test_input/', 'tests/integration/step_10')
+    copy_tree('datadir/test_output/US-ARc_sample_output/07_meteo_proc/', \
+        'tests/integration/step_10/US-ARc_sample_input/07_meteo_proc/')
+    copy_tree('datadir/test_output/US-ARc_sample_output/08_nee_proc/', \
+        'tests/integration/step_10/US-ARc_sample_input/08_nee_proc/')
+    copy_tree('datadir/test_output/US-ARc_sample_output/02_qc_auto/', \
+        'tests/integration/step_10/US-ARc_sample_input/02_qc_auto/')
+    
     
-# TODO: deal with fixtures for running nt_test
 # step 10
+# TODO: deal with fixtures for running nt_test
+# TODO: Does not work without output of step 7 - run step 7 first 
 def test_run_partition_nt():
-    datadir = "./datadir/test_input"
-    data_output = "./datadir/test_output"
+    from oneflux.tools.partition_nt import remove_previous_run, run_python
+    setup_data()
+    datadir = "./tests/integration/step_10/"
+    data_output = "./tests/integration/step_10/test_output"
     siteid = "US-ARc"
     sitedir = "US-ARc_sample_input"
     years = [2005] # years = [2005, 2006]
@@ -37,19 +63,21 @@ def test_run_partition_nt():
     nee_y_files = glob.glob(os.path.join(rootdir, "nee_y_1.25_US-ARc_2005*"))
     ref_output = os.path.join(data_output, sitedir, "10_nee_partition_nt")
     ref_y_files = glob.glob(os.path.join(ref_output, "nee_y_1.25_US-ARc_2005*"))
-    
+   
+    retval = True 
     # log.info(nee_y_files)
     # log.info(compare_y_files)
     for f, b in zip(nee_y_files, ref_y_files):
         if not equal_csv(f, b):
-            return False
+            retval = False
 
+    # clean up data
+    shutil.rmtree('./tests/integration/step_10')
+    
+    return retval 
     # glob the files with this root
     # for file in glob.glob(nee_y_files):
     #     print(file)
     #     log.info(file)
     #     if not equal_csv(file, )
     # with open('saved/nee_y_1.25_US-ARc_2005.csv', 'r') as t1, open(nee_y, 'r') as t2:
-
-if __name__ == '__main__':
-    raise ONEFluxError('Not executable')

From d9fded5d8c9fe66c713fd27a8415cdce98857cac Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Fri, 27 Jan 2023 10:59:30 +0000
Subject: [PATCH 21/39] started unit testing

---
 tests/test_partitioning.py | 64 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 62 insertions(+), 2 deletions(-)

diff --git a/tests/test_partitioning.py b/tests/test_partitioning.py
index ba51737e..ee29713e 100644
--- a/tests/test_partitioning.py
+++ b/tests/test_partitioning.py
@@ -1,7 +1,10 @@
 import oneflux.partition.nighttime
 import oneflux.partition.library # optimisation related functions
 import oneflux.partition.ecogeo
+from oneflux.partition.library import QC_AUTO_DIR, METEO_PROC_DIR, NEE_PROC_DIR, NT_OUTPUT_DIR, HEADER_SEPARATOR, EXTRA_FILENAME, NT_STR
+import os
 
+from distutils.dir_util import copy_tree
 
 # LIBRARY
 # looks like a good candidate for unit testing - lots of maths functions:
@@ -20,10 +23,11 @@
 
 # function is used in daytime at present - so ignore for now
 # array consisting of 6 elements
+
+# this function isn't actually used much
 def test_check_parameters():
     from oneflux.partition.library import check_parameter
    
-    #  
     p1 = [0, 0, 0, 50, 0, 0]
     
     p2 = [0.2199999, 250, 0, 0, 0, 0]
@@ -32,10 +36,66 @@ def test_check_parameters():
 
 def test_load_output():
     from oneflux.partition.library import load_output
+    datadir = "./datadir/test_input"
+    sitedir = "US-ARc_sample_input"
+    siteid = "US-ARc"
+    sitedir_full = os.path.join(datadir, sitedir)
+    meteo_proc_dir = os.path.join(sitedir_full, METEO_PROC_DIR)
+    meteo_proc_f = os.path.join(meteo_proc_dir, '{s}_meteo_hh.csv'.format(s=siteid))
+    whole_dataset_meteo, headers_meteo, timestamp_list_meteo, year_list_meteo = load_output(meteo_proc_f)
+    
+    
+    
+# create dataset for step 10
+def setup_data():
+    try:
+        os.mkdir('tests/data/step_10')
+    except OSError as e:
+        if e.errno == errno.EEXIST:
+            print("dir exists")
+    
+    copy_tree('datadir/test_input/', 'tests/data/step_10')
+   
+    # copy data from assumed output as input to suite of partitioning tests 
+    copy_tree('datadir/test_output/US-ARc_sample_output/07_meteo_proc/', \
+        'tests/data/step_10/US-ARc_sample_input/07_meteo_proc/')
+    copy_tree('datadir/test_output/US-ARc_sample_output/08_nee_proc/', \
+        'tests/data/step_10/US-ARc_sample_input/08_nee_proc/')
+    copy_tree('datadir/test_output/US-ARc_sample_output/02_qc_auto/', \
+        'tests/data/step_10/US-ARc_sample_input/02_qc_auto/')
+    
+
+
+def test_create_data_structures():
+    return True
+    ustar_type = ['c'] 
+    create_data_structures(ustar_type=ustar_type, whole_dataset_nee=whole_dataset_nee, whole_dataset_meteo=whole_dataset_meteo)
     pass
 
 def test_compu():
-    pass
+    from oneflux.partition.nighttime import compu
+    from oneflux.partition.nighttime import load_output
+    from oneflux.partition.nighttime import create_data_structures
+    from oneflux.partition.compu import compu_qcnee_filter
+    ustar_type = ['y']
+   
+
+    meteo_proc_dir = os.path.join(sitedir_full, METEO_PROC_DIR)
+    meteo_proc_f = os.path.join(meteo_proc_dir, '{s}_meteo_hh.csv'.format(s=siteid))
+    nee_proc_percentiles_f = os.path.join(nee_proc_dir, '{s}_NEE_percentiles_{u}_hh.csv'.format(s=siteid, u=ustar_type))
+   
+    whole_dataset_meteo, headers_meteo, timestamp_list_meteo, year_list_meteo = load_output(meteo_proc_f)
+    whole_dataset_nee, headers_nee, timestamp_list_nee, year_list_nee = load_output(nee_proc_percentiles_f)
+    
+    data = working_year_data = create_data_structures(ustar_type=ustar_type, whole_dataset_nee=whole_dataset_nee, whole_dataset_meteo=whole_dataset_meteo)
+    func = compu_qcnee_filter
+    columns = None
+    parameters = None
+    skip_if_present = False
+    no_missing = False
+    new_ = False
+    
+    compu(data, func, columns, parameters, skip_if_present, no_missing, new_)
 
 def test_get_first_last_ts():
     pass

From 3dbac463ad59d279c12af03af11022e2fbf07c69 Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Mon, 30 Jan 2023 20:22:53 +0000
Subject: [PATCH 22/39] changed directory structure and updated integration
 test so equality is actually tested

---
 .github/workflows/python-app.yaml             |  28 ++---
 tests/integration/test_partitioning.py        |  83 --------------
 tests/{ => python}/__init__.py                |   0
 tests/{ => python}/context.py                 |   0
 tests/python/integration/test_partitioning.py | 107 ++++++++++++++++++
 tests/python/integration/test_qcauto.py       |   7 ++
 tests/python/test_basic.py                    |   6 +
 tests/{ => python}/test_context.py            |   0
 .../unit/test_partitioning_unit.py}           |  60 ++++++----
 tests/test_basic.py                           |   5 -
 10 files changed, 174 insertions(+), 122 deletions(-)
 delete mode 100644 tests/integration/test_partitioning.py
 rename tests/{ => python}/__init__.py (100%)
 rename tests/{ => python}/context.py (100%)
 create mode 100644 tests/python/integration/test_partitioning.py
 create mode 100644 tests/python/integration/test_qcauto.py
 create mode 100644 tests/python/test_basic.py
 rename tests/{ => python}/test_context.py (100%)
 rename tests/{test_partitioning.py => python/unit/test_partitioning_unit.py} (66%)
 delete mode 100644 tests/test_basic.py

diff --git a/.github/workflows/python-app.yaml b/.github/workflows/python-app.yaml
index 30effcdd..57bc832f 100644
--- a/.github/workflows/python-app.yaml
+++ b/.github/workflows/python-app.yaml
@@ -5,9 +5,9 @@ name: Python application
 
 on:
   push:
-    branches: [ "ci" ]
+    branches: [ "main" ]
   pull_request:
-    branches: [ "ci" ]
+    branches: [ "main" ]
 
 permissions:
   contents: read
@@ -30,22 +30,17 @@ jobs:
         make
     - name: Get data
       run: |
-        mkdir datadir
+        mkdir -p ./tests/data/test_input
+        mkdir -p ./tests/data/test_output
         # get necessary data
         wget ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_input.zip
         wget ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_output.zip
-        unzip US-ARc_sample_input.zip -d ./datadir/test_input/
-        unzip US-ARc_sample_output.zip -d ./datadir/test_output/
-    - name: Run OneFLUX partitioning_nt
-      run: |
-        # copy necessary output data to force partitioning_nt to run
-        cp -r ./datadir/test_output/US-ARc_sample_output/02_qc_auto/ ./datadir/test_input/US-ARc_sample_input/
-        cp -r ./datadir/test_output/US-ARc_sample_output/07_meteo_proc/ ./datadir/test_input/US-ARc_sample_input/
-        cp -r ./datadir/test_output/US-ARc_sample_output/08_nee_proc/ ./datadir/test_input/US-ARc_sample_input/
-        python runoneflux.py partition_nt ./datadir/test_input/ US-ARc US-ARc_sample_input 2005 2006 -l fluxnet_pipeline_US-ARc.log --recint hh
+        unzip US-ARc_sample_input.zip -d ./tests/data/test_input/
+        unzip US-ARc_sample_output.zip -d ./tests/data/test_output/
     - name: Run pytest
       run: |
-        pytest oneflux/tools/partition_nt.py
+        pytest tests/python/
+
     # - name: Lint with flake8
     #   run: |
     #     # stop the build if there are Python syntax errors or undefined names
@@ -55,3 +50,10 @@ jobs:
     # - name: Test with pytest
     #   run: |
     #     pytest
+    # - name: Run OneFLUX partitioning_nt
+    #   run: |
+    #     # copy necessary output data to force partitioning_nt to run
+    #     cp -r ./datadir/test_output/US-ARc_sample_output/02_qc_auto/ ./datadir/test_input/US-ARc_sample_input/
+    #     cp -r ./datadir/test_output/US-ARc_sample_output/07_meteo_proc/ ./datadir/test_input/US-ARc_sample_input/
+    #     cp -r ./datadir/test_output/US-ARc_sample_output/08_nee_proc/ ./datadir/test_input/US-ARc_sample_input/
+    #     python runoneflux.py partition_nt ./datadir/test_input/ US-ARc US-ARc_sample_input 2005 2006 -l fluxnet_pipeline_US-ARc.log --recint hh
diff --git a/tests/integration/test_partitioning.py b/tests/integration/test_partitioning.py
deleted file mode 100644
index dd4eaeac..00000000
--- a/tests/integration/test_partitioning.py
+++ /dev/null
@@ -1,83 +0,0 @@
-import pytest
-import os, glob
-import errno
-import shutil
-from distutils.dir_util import copy_tree
-
-
-@pytest.fixture
-def get_data():
-    urllib.request.urlretrieve('ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_output.zip') 
-    urllib.request.urlretrieve('ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_input.zip') 
-    pass
-
-def equal_csv(csv_1, csv_2):
-    with open(csv_1, 'r') as t1, open(csv_2, 'r') as t2:
-        fileone = t1.readlines()
-        filetwo = t2.readlines()
-        for line in filetwo:
-            if line not in fileone:
-                return False
-
-
-# create dataset for step 10
-def setup_data():
-    try:
-        os.mkdir('tests/integration/step_10')
-    except OSError as e:
-        if e.errno == errno.EEXIST:
-            print("dir exists")
-    
-    copy_tree('datadir/test_input/', 'tests/integration/step_10')
-    copy_tree('datadir/test_output/US-ARc_sample_output/07_meteo_proc/', \
-        'tests/integration/step_10/US-ARc_sample_input/07_meteo_proc/')
-    copy_tree('datadir/test_output/US-ARc_sample_output/08_nee_proc/', \
-        'tests/integration/step_10/US-ARc_sample_input/08_nee_proc/')
-    copy_tree('datadir/test_output/US-ARc_sample_output/02_qc_auto/', \
-        'tests/integration/step_10/US-ARc_sample_input/02_qc_auto/')
-    
-    
-# step 10
-# TODO: deal with fixtures for running nt_test
-# TODO: Does not work without output of step 7 - run step 7 first 
-def test_run_partition_nt():
-    from oneflux.tools.partition_nt import remove_previous_run, run_python
-    setup_data()
-    datadir = "./tests/integration/step_10/"
-    data_output = "./tests/integration/step_10/test_output"
-    siteid = "US-ARc"
-    sitedir = "US-ARc_sample_input"
-    years = [2005] # years = [2005, 2006]
-    PROD_TO_COMPARE = ['c', 'y']
-    # PERC_TO_COMPARE = ['1.25', '3.75',]
-    PERC_TO_COMPARE = ['1.25',]
-    remove_previous_run(datadir=datadir, siteid=siteid, sitedir=sitedir, python=True, 
-                        prod_to_compare=PROD_TO_COMPARE, perc_to_compare=PERC_TO_COMPARE,
-                        years_to_compare=years)
-
-    run_python(datadir=datadir, siteid=siteid, sitedir=sitedir, prod_to_compare=PROD_TO_COMPARE,
-               perc_to_compare=PERC_TO_COMPARE, years_to_compare=years)
-    
-    # now do simple check of output 
-    rootdir = os.path.join(datadir, sitedir, "10_nee_partition_nt")
-    nee_y_files = glob.glob(os.path.join(rootdir, "nee_y_1.25_US-ARc_2005*"))
-    ref_output = os.path.join(data_output, sitedir, "10_nee_partition_nt")
-    ref_y_files = glob.glob(os.path.join(ref_output, "nee_y_1.25_US-ARc_2005*"))
-   
-    retval = True 
-    # log.info(nee_y_files)
-    # log.info(compare_y_files)
-    for f, b in zip(nee_y_files, ref_y_files):
-        if not equal_csv(f, b):
-            retval = False
-
-    # clean up data
-    shutil.rmtree('./tests/integration/step_10')
-    
-    return retval 
-    # glob the files with this root
-    # for file in glob.glob(nee_y_files):
-    #     print(file)
-    #     log.info(file)
-    #     if not equal_csv(file, )
-    # with open('saved/nee_y_1.25_US-ARc_2005.csv', 'r') as t1, open(nee_y, 'r') as t2:
diff --git a/tests/__init__.py b/tests/python/__init__.py
similarity index 100%
rename from tests/__init__.py
rename to tests/python/__init__.py
diff --git a/tests/context.py b/tests/python/context.py
similarity index 100%
rename from tests/context.py
rename to tests/python/context.py
diff --git a/tests/python/integration/test_partitioning.py b/tests/python/integration/test_partitioning.py
new file mode 100644
index 00000000..de6a4973
--- /dev/null
+++ b/tests/python/integration/test_partitioning.py
@@ -0,0 +1,107 @@
+import pytest
+import os, glob
+import errno
+import shutil
+import urllib
+from distutils.dir_util import copy_tree
+import logging
+
+_log = logging.getLogger(__name__)
+# @pytest.fixture(scope="module")
+@pytest.fixture(scope="module")
+def get_data():
+    '''
+    Utilising python to obtain sample test data. Function currently not used
+    as a fixture in this class. 
+    '''
+    from zipfile import ZipFile
+    urllib.urlopen('ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_output.zip') 
+    urllib.urlopen('ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_input.zip') 
+  
+    input_zip = "US-ARc_sample_input.zip"
+    output_zip = "US-ARc_sample_output.zip"
+    
+    with ZipFile(input_zip) as zi, ZipFile(output_zip) as zo:
+        zi.extractall(path='tests/data/test_input')
+        zo.extractall(path='tests/data/test_output')
+
+def equal_csv(csv_1, csv_2):
+    '''
+    Check equality of two csv files.
+    '''
+    # _log.info(str(csv_1))
+    with open(csv_1, 'r') as t1, open(csv_2, 'r') as t2:
+        fileone = t1.readlines()
+        filetwo = t2.readlines()
+        for line in filetwo:
+            if line not in fileone:
+                return False
+        return True
+
+@pytest.fixture
+def setup_data():
+    '''
+    Set up input data for run_partition_nt test. 
+    
+    Create data directory for tests './tests/integration/step10' and copy 
+    data from expected output ('./datadir/test_output/US-ARc_sample_output')
+    to this directory.
+    '''
+    try:
+        os.mkdir('tests/integration/data/step_10')
+    except OSError as e:
+        if e.errno == errno.EEXIST:
+            print("directory exists")
+            
+    testdata = 'tests/python/integration/input/step_10'
+    
+    copy_tree('tests/data/test_input/', testdata)
+    copy_tree('tests/data/test_output/US-ARc_sample_output/07_meteo_proc/', \
+        os.path.join(testdata, 'US-ARc_sample_input/07_meteo_proc/'))
+    copy_tree('tests/data/test_output/US-ARc_sample_output/08_nee_proc/', \
+        os.path.join(testdata, 'US-ARc_sample_input/08_nee_proc/'))
+    copy_tree('tests/data/test_output/US-ARc_sample_output/02_qc_auto/', \
+        os.path.join(testdata, 'US-ARc_sample_input/02_qc_auto/'))
+    
+    
+def test_run_partition_nt(setup_data):
+    '''
+    Run partition_nt on single percentile.
+    '''
+    datadir = "./tests/python/integration/input/step_10/"
+    refoutdir = "./tests/data/test_output/"
+    siteid = "US-ARc"
+    sitedir = "US-ARc_sample_input"
+    years = [2005] # years = [2005, 2006]
+    PROD_TO_COMPARE = ['c', 'y']
+    # PERC_TO_COMPARE = ['1.25', '3.75',]
+    PERC_TO_COMPARE = ['1.25',]
+    
+    from oneflux.tools.partition_nt import remove_previous_run, run_python
+    remove_previous_run(datadir=datadir, siteid=siteid, sitedir=sitedir, python=True, 
+                        prod_to_compare=PROD_TO_COMPARE, perc_to_compare=PERC_TO_COMPARE,
+                        years_to_compare=years)
+
+    run_python(datadir=datadir, siteid=siteid, sitedir=sitedir, prod_to_compare=PROD_TO_COMPARE,
+               perc_to_compare=PERC_TO_COMPARE, years_to_compare=years)
+    
+    # check whether csv of "output" is same as csv of reference
+    # paths to the generated output in "input" directory, confusingly.
+    rootdir = os.path.join(datadir, sitedir, "10_nee_partition_nt")
+    nee_y_files = glob.glob(os.path.join(rootdir, "nee_y_1.25_US-ARc_2005*"))
+    nee_y_files = filter(lambda x: not x.endswith('_orig.csv'), nee_y_files)
+    
+    # paths to the "reference" output data
+    refoutdir = os.path.join(refoutdir, "US-ARc_sample_output", "10_nee_partition_nt")
+    ref_nee_y_files = glob.glob(os.path.join(refoutdir, "nee_y_1.25_US-ARc_2005*"))
+   
+    assert len(nee_y_files) == len(ref_nee_y_files)
+    retval = True 
+    for f, b in zip(nee_y_files, ref_nee_y_files):
+        if not equal_csv(f, b):
+            retval = False
+
+    # clean up data. We clean up every call anyway. 
+    shutil.rmtree(datadir)
+    
+    return retval 
\ No newline at end of file
diff --git a/tests/python/integration/test_qcauto.py b/tests/python/integration/test_qcauto.py
new file mode 100644
index 00000000..1a829f7b
--- /dev/null
+++ b/tests/python/integration/test_qcauto.py
@@ -0,0 +1,7 @@
+import pytest
+
+# looks like qc_auto requires qc_visual
+# don't need to create and instance of the class in wrappers.py
+
+def test_qcauto():
+    pass
\ No newline at end of file
diff --git a/tests/python/test_basic.py b/tests/python/test_basic.py
new file mode 100644
index 00000000..8e76bc47
--- /dev/null
+++ b/tests/python/test_basic.py
@@ -0,0 +1,6 @@
+import pytest
+
+def test_import_oneflux():
+    import oneflux
+    assert oneflux.VERSION == "0.4.1-rc"
+    assert hasattr(oneflux, '__version__') == True
\ No newline at end of file
diff --git a/tests/test_context.py b/tests/python/test_context.py
similarity index 100%
rename from tests/test_context.py
rename to tests/python/test_context.py
diff --git a/tests/test_partitioning.py b/tests/python/unit/test_partitioning_unit.py
similarity index 66%
rename from tests/test_partitioning.py
rename to tests/python/unit/test_partitioning_unit.py
index ee29713e..2fca0786 100644
--- a/tests/test_partitioning.py
+++ b/tests/python/unit/test_partitioning_unit.py
@@ -1,8 +1,10 @@
+import pytest
+import shutil
 import oneflux.partition.nighttime
 import oneflux.partition.library # optimisation related functions
 import oneflux.partition.ecogeo
 from oneflux.partition.library import QC_AUTO_DIR, METEO_PROC_DIR, NEE_PROC_DIR, NT_OUTPUT_DIR, HEADER_SEPARATOR, EXTRA_FILENAME, NT_STR
-import os
+import os, errno
 
 from distutils.dir_util import copy_tree
 
@@ -24,29 +26,20 @@
 # function is used in daytime at present - so ignore for now
 # array consisting of 6 elements
 
+# create dataset for step 10
+
 # this function isn't actually used much
 def test_check_parameters():
-    from oneflux.partition.library import check_parameter
+    # from oneflux.partition.library import check_parameter
    
     p1 = [0, 0, 0, 50, 0, 0]
     
     p2 = [0.2199999, 250, 0, 0, 0, 0]
+    return True
 
-# flux partition in nighttime.py is a huge function (250+ lines)
-
-def test_load_output():
-    from oneflux.partition.library import load_output
-    datadir = "./datadir/test_input"
-    sitedir = "US-ARc_sample_input"
-    siteid = "US-ARc"
-    sitedir_full = os.path.join(datadir, sitedir)
-    meteo_proc_dir = os.path.join(sitedir_full, METEO_PROC_DIR)
-    meteo_proc_f = os.path.join(meteo_proc_dir, '{s}_meteo_hh.csv'.format(s=siteid))
-    whole_dataset_meteo, headers_meteo, timestamp_list_meteo, year_list_meteo = load_output(meteo_proc_f)
-    
-    
-    
 # create dataset for step 10
+# TODO: excludes extracting and copying data into correct location. 
+@pytest.fixture
 def setup_data():
     try:
         os.mkdir('tests/data/step_10')
@@ -54,17 +47,39 @@ def setup_data():
         if e.errno == errno.EEXIST:
             print("dir exists")
     
-    copy_tree('datadir/test_input/', 'tests/data/step_10')
+    copy_tree('tests/data/test_input/', 'tests/data/step_10')
    
     # copy data from assumed output as input to suite of partitioning tests 
-    copy_tree('datadir/test_output/US-ARc_sample_output/07_meteo_proc/', \
+    copy_tree('tests/data/test_output/US-ARc_sample_output/07_meteo_proc/', \
         'tests/data/step_10/US-ARc_sample_input/07_meteo_proc/')
-    copy_tree('datadir/test_output/US-ARc_sample_output/08_nee_proc/', \
+    copy_tree('tests/data/test_output/US-ARc_sample_output/08_nee_proc/', \
         'tests/data/step_10/US-ARc_sample_input/08_nee_proc/')
-    copy_tree('datadir/test_output/US-ARc_sample_output/02_qc_auto/', \
+    copy_tree('tests/data/test_output/US-ARc_sample_output/02_qc_auto/', \
         'tests/data/step_10/US-ARc_sample_input/02_qc_auto/')
+    return
+
+# flux partition in nighttime.py is a huge function (250+ lines)
+# use module scope to do this once (including downloading and extracting the data)
+def test_load_output(setup_data):
+    from oneflux.partition.library import load_output
+    datadir = "./tests/data/step_10/"
+    sitedir = "US-ARc_sample_input"
+    siteid = "US-ARc"
+    sitedir_full = os.path.join(datadir, sitedir)
+    meteo_proc_dir = os.path.join(sitedir_full, METEO_PROC_DIR)
+    meteo_proc_f = os.path.join(meteo_proc_dir, '{s}_meteo_hh.csv'.format(s=siteid))
+    whole_dataset_meteo, headers_meteo, timestamp_list_meteo, year_list_meteo = load_output(meteo_proc_f)
+   
+    # TIMESTAMP_START,TIMESTAMP_END,DTIME,TA_f,TA_fqc,TA_ERA,TA_m,TA_mqc,SW_IN_pot,SW_IN_f,SW_IN_fqc,SW_IN_ERA,SW_IN_m,SW_IN_mqc,LW_IN_f,LW_IN_fqc,LW_IN_ERA,LW_IN_m,LW_IN_mqc,LW_IN_calc,LW_IN_calc_qc,LW_IN_calc_ERA,LW_IN_calc_m,LW_IN_calc_mqc,VPD_f,VPD_fqc,VPD_ERA,VPD_m,VPD_mqc,PA,PA_ERA,PA_m,PA_mqc,P,P_ERA,P_m,P_mqc,WS,WS_ERA,WS_m,WS_mqc,CO2_f,CO2_fqc,TS_1_f,TS_1_fqc,SWC_1_f,SWC_1_fqc
+    # 198901010000,198901010030,1.02083,-9999.000,-9999,0.316,0.316,2,0,-9999.000,-9999,0.000,0.000,2,-9999.000,-9999,246.830,246.830,2,-9999.000,-9999,253.002,253.002,2,-9999.000,-9999,0.000,0.000,2,-9999.000,96.420,96.420,2,-9999.000,0.000,0.000,2,-9999.000,2.879,2.879,2,-9999.000,-9999,-9999.000,-9999,-9999.000,-9999
     
+    print(dir(whole_dataset_meteo))
+    shutil.rmtree('./tests/integration/step_10')
 
+    
+def test_least_squares():
+    from oneflux.partition.nighttime import least_squares
+    pass
 
 def test_create_data_structures():
     return True
@@ -72,7 +87,9 @@ def test_create_data_structures():
     create_data_structures(ustar_type=ustar_type, whole_dataset_nee=whole_dataset_nee, whole_dataset_meteo=whole_dataset_meteo)
     pass
 
-def test_compu():
+
+def test_compu(setup_data):
+    return True
     from oneflux.partition.nighttime import compu
     from oneflux.partition.nighttime import load_output
     from oneflux.partition.nighttime import create_data_structures
@@ -80,6 +97,7 @@ def test_compu():
     ustar_type = ['y']
    
 
+    sitedir_full = os.path.join(datadir, sitedir)
     meteo_proc_dir = os.path.join(sitedir_full, METEO_PROC_DIR)
     meteo_proc_f = os.path.join(meteo_proc_dir, '{s}_meteo_hh.csv'.format(s=siteid))
     nee_proc_percentiles_f = os.path.join(nee_proc_dir, '{s}_NEE_percentiles_{u}_hh.csv'.format(s=siteid, u=ustar_type))
diff --git a/tests/test_basic.py b/tests/test_basic.py
deleted file mode 100644
index 13ce93d2..00000000
--- a/tests/test_basic.py
+++ /dev/null
@@ -1,5 +0,0 @@
-import pytest
-
-def test_import_oneflux():
-    import oneflux
-    assert oneflux.VERSION == "0.4.1-rc"
\ No newline at end of file

From 3b9ed73962277890a5af948120380e42c750bab3 Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Mon, 30 Jan 2023 20:32:19 +0000
Subject: [PATCH 23/39] add pythonpath to workflow

---
 .github/workflows/python-app.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/python-app.yaml b/.github/workflows/python-app.yaml
index 57bc832f..0a7a61f6 100644
--- a/.github/workflows/python-app.yaml
+++ b/.github/workflows/python-app.yaml
@@ -39,6 +39,7 @@ jobs:
         unzip US-ARc_sample_output.zip -d ./tests/data/test_output/
     - name: Run pytest
       run: |
+        export PYTHONPATH=/home/runner/work/ONEFlux/ONEFlux/:$PYTHONPATH
         pytest tests/python/
 
     # - name: Lint with flake8

From 8a56f66f71c05fac92a960e61d43bbfde8637df8 Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Mon, 30 Jan 2023 20:37:45 +0000
Subject: [PATCH 24/39] remove unfinished rm fn

---
 tests/python/unit/test_partitioning_unit.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python/unit/test_partitioning_unit.py b/tests/python/unit/test_partitioning_unit.py
index 2fca0786..663cb9ab 100644
--- a/tests/python/unit/test_partitioning_unit.py
+++ b/tests/python/unit/test_partitioning_unit.py
@@ -74,7 +74,7 @@ def test_load_output(setup_data):
     # 198901010000,198901010030,1.02083,-9999.000,-9999,0.316,0.316,2,0,-9999.000,-9999,0.000,0.000,2,-9999.000,-9999,246.830,246.830,2,-9999.000,-9999,253.002,253.002,2,-9999.000,-9999,0.000,0.000,2,-9999.000,96.420,96.420,2,-9999.000,0.000,0.000,2,-9999.000,2.879,2.879,2,-9999.000,-9999,-9999.000,-9999,-9999.000,-9999
     
     print(dir(whole_dataset_meteo))
-    shutil.rmtree('./tests/integration/step_10')
+    # shutil.rmtree('./tests/integration/step_10')
 
     
 def test_least_squares():

From 241e422ac71f787b88cc2db883842ce6b246ccfb Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Tue, 4 Apr 2023 09:03:08 +0100
Subject: [PATCH 25/39] removed unnecessary comments from code

---
 .github/workflows/python-app.yaml             | 21 ++--------------
 Makefile                                      |  2 +-
 oneflux/tools/partition_nt.py                 |  6 +++--
 tests/python/integration/test_partitioning.py |  6 ++---
 tests/python/integration/test_qcauto.py       |  6 ++---
 tests/python/test_basic.py                    |  2 +-
 tests/python/unit/test_partitioning_unit.py   | 24 +------------------
 7 files changed, 15 insertions(+), 52 deletions(-)

diff --git a/.github/workflows/python-app.yaml b/.github/workflows/python-app.yaml
index 0a7a61f6..9232ae52 100644
--- a/.github/workflows/python-app.yaml
+++ b/.github/workflows/python-app.yaml
@@ -39,22 +39,5 @@ jobs:
         unzip US-ARc_sample_output.zip -d ./tests/data/test_output/
     - name: Run pytest
       run: |
-        export PYTHONPATH=/home/runner/work/ONEFlux/ONEFlux/:$PYTHONPATH
-        pytest tests/python/
-
-    # - name: Lint with flake8
-    #   run: |
-    #     # stop the build if there are Python syntax errors or undefined names
-    #     flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-    #     # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-    #     flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
-    # - name: Test with pytest
-    #   run: |
-    #     pytest
-    # - name: Run OneFLUX partitioning_nt
-    #   run: |
-    #     # copy necessary output data to force partitioning_nt to run
-    #     cp -r ./datadir/test_output/US-ARc_sample_output/02_qc_auto/ ./datadir/test_input/US-ARc_sample_input/
-    #     cp -r ./datadir/test_output/US-ARc_sample_output/07_meteo_proc/ ./datadir/test_input/US-ARc_sample_input/
-    #     cp -r ./datadir/test_output/US-ARc_sample_output/08_nee_proc/ ./datadir/test_input/US-ARc_sample_input/
-    #     python runoneflux.py partition_nt ./datadir/test_input/ US-ARc US-ARc_sample_input 2005 2006 -l fluxnet_pipeline_US-ARc.log --recint hh
+        export PYTHONPATH=/home/runner/work/ONEFlux/ONEFlux:$PYTHONPATH
+        pytest tests/python
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 4d258567..58665241 100644
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,7 @@ buildpy:
 	@echo "Installing Python dependencies..."
 ifeq ($(PYPACKAGE),pip)
 	@echo "Using pip to install dependencies..."
-	pip install -r frozen-requirements.txt
+	pip install -r requirements.txt
 else
 ifeq ($(PYPACKAGE),conda)
 	@echo "Using conda to install dependencies..."
diff --git a/oneflux/tools/partition_nt.py b/oneflux/tools/partition_nt.py
index 0aa507eb..92034b8d 100644
--- a/oneflux/tools/partition_nt.py
+++ b/oneflux/tools/partition_nt.py
@@ -12,7 +12,6 @@
 '''
 import sys
 import os
-import glob
 import logging
 import time
 import numpy
@@ -20,7 +19,6 @@
 import socket
 import numpy
 import calendar
-import pytest
 
 from datetime import datetime
 from io import StringIO
@@ -134,3 +132,7 @@ def run_partition_nt(datadir, siteid, sitedir, years_to_compare,
     """
     remove_previous_run(datadir=datadir, siteid=siteid, sitedir=sitedir, python=py_remove_old, prod_to_compare=prod_to_compare, perc_to_compare=perc_to_compare, years_to_compare=years_to_compare)
     run_python(datadir=datadir, siteid=siteid, sitedir=sitedir, prod_to_compare=prod_to_compare, perc_to_compare=perc_to_compare, years_to_compare=years_to_compare)
+
+
+if __name__ == '__main__':
+    raise ONEFluxError('Not executable')
diff --git a/tests/python/integration/test_partitioning.py b/tests/python/integration/test_partitioning.py
index de6a4973..9e483bc6 100644
--- a/tests/python/integration/test_partitioning.py
+++ b/tests/python/integration/test_partitioning.py
@@ -7,7 +7,7 @@
 import logging
 
 _log = logging.getLogger(__name__)
-# @pytest.fixture(scope="module")
+
 @pytest.fixture(scope="module")
 def get_data():
     '''
@@ -101,7 +101,7 @@ def test_run_partition_nt(setup_data):
         if not equal_csv(f, b):
             retval = False
 
-    # clean up data. We clean up every call anyway. 
+    # clean up data. 
     shutil.rmtree(datadir)
     
-    return retval 
\ No newline at end of file
+    return retval
diff --git a/tests/python/integration/test_qcauto.py b/tests/python/integration/test_qcauto.py
index 1a829f7b..dfc306e6 100644
--- a/tests/python/integration/test_qcauto.py
+++ b/tests/python/integration/test_qcauto.py
@@ -1,7 +1,7 @@
 import pytest
 
-# looks like qc_auto requires qc_visual
-# don't need to create and instance of the class in wrappers.py
+# qc_auto requires qc_visual
+# do not need to create an instance of the class in wrappers.py
 
 def test_qcauto():
-    pass
\ No newline at end of file
+    pass
diff --git a/tests/python/test_basic.py b/tests/python/test_basic.py
index 8e76bc47..154a549e 100644
--- a/tests/python/test_basic.py
+++ b/tests/python/test_basic.py
@@ -3,4 +3,4 @@
 def test_import_oneflux():
     import oneflux
     assert oneflux.VERSION == "0.4.1-rc"
-    assert hasattr(oneflux, '__version__') == True
\ No newline at end of file
+    assert hasattr(oneflux, '__version__') == True
diff --git a/tests/python/unit/test_partitioning_unit.py b/tests/python/unit/test_partitioning_unit.py
index 663cb9ab..c8f020e5 100644
--- a/tests/python/unit/test_partitioning_unit.py
+++ b/tests/python/unit/test_partitioning_unit.py
@@ -8,27 +8,6 @@
 
 from distutils.dir_util import copy_tree
 
-# LIBRARY
-# looks like a good candidate for unit testing - lots of maths functions:
-# get_first_last_ts
-# cov2cor
-# root_mean_sq_error
-# least_squares () - calls scipy function 
-# check_parameters (checks values within given threshold)
-
-# loading data function:
-# load outputs
-
-# Less easy:
-# create_data_structures - very long function
-#  nlinlts1 (non-linear least-squares driver function)
-
-# function is used in daytime at present - so ignore for now
-# array consisting of 6 elements
-
-# create dataset for step 10
-
-# this function isn't actually used much
 def test_check_parameters():
     # from oneflux.partition.library import check_parameter
    
@@ -118,5 +97,4 @@ def test_compu(setup_data):
 def test_get_first_last_ts():
     pass
 
-
-# partitioning_nt -> flux_partition -> nlinlts -> 
\ No newline at end of file
+# partitioning_nt -> flux_partition -> nlinlts -> 

From 9642ef673f5a968f56f6451ab339dc3cb201fc4c Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Tue, 4 Apr 2023 10:15:11 +0100
Subject: [PATCH 26/39] lowered upper bound on statsmodels

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 44066824..0b6f923c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
 numpy>=1.11.0,<1.16.0
 scipy>=0.17.0
 matplotlib>=1.5.1
-statsmodels>=0.8.0
+statsmodels>=0.8.0<0.11.0

From 535e026b5969b99f0ff5f1ec19b12f13636a1b00 Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Tue, 4 Apr 2023 10:19:35 +0100
Subject: [PATCH 27/39] lowered upper bound on statsmodels

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 0b6f923c..268aa467 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
 numpy>=1.11.0,<1.16.0
 scipy>=0.17.0
 matplotlib>=1.5.1
-statsmodels>=0.8.0<0.11.0
+statsmodels>=0.8.0,<0.11.0

From d39c9277054bbcc94b1be2c40dd95faf870a58ae Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Tue, 4 Apr 2023 10:25:45 +0100
Subject: [PATCH 28/39] removed partitioning unit tests

---
 tests/python/unit/test_partitioning_unit.py | 100 --------------------
 1 file changed, 100 deletions(-)
 delete mode 100644 tests/python/unit/test_partitioning_unit.py

diff --git a/tests/python/unit/test_partitioning_unit.py b/tests/python/unit/test_partitioning_unit.py
deleted file mode 100644
index c8f020e5..00000000
--- a/tests/python/unit/test_partitioning_unit.py
+++ /dev/null
@@ -1,100 +0,0 @@
-import pytest
-import shutil
-import oneflux.partition.nighttime
-import oneflux.partition.library # optimisation related functions
-import oneflux.partition.ecogeo
-from oneflux.partition.library import QC_AUTO_DIR, METEO_PROC_DIR, NEE_PROC_DIR, NT_OUTPUT_DIR, HEADER_SEPARATOR, EXTRA_FILENAME, NT_STR
-import os, errno
-
-from distutils.dir_util import copy_tree
-
-def test_check_parameters():
-    # from oneflux.partition.library import check_parameter
-   
-    p1 = [0, 0, 0, 50, 0, 0]
-    
-    p2 = [0.2199999, 250, 0, 0, 0, 0]
-    return True
-
-# create dataset for step 10
-# TODO: excludes extracting and copying data into correct location. 
-@pytest.fixture
-def setup_data():
-    try:
-        os.mkdir('tests/data/step_10')
-    except OSError as e:
-        if e.errno == errno.EEXIST:
-            print("dir exists")
-    
-    copy_tree('tests/data/test_input/', 'tests/data/step_10')
-   
-    # copy data from assumed output as input to suite of partitioning tests 
-    copy_tree('tests/data/test_output/US-ARc_sample_output/07_meteo_proc/', \
-        'tests/data/step_10/US-ARc_sample_input/07_meteo_proc/')
-    copy_tree('tests/data/test_output/US-ARc_sample_output/08_nee_proc/', \
-        'tests/data/step_10/US-ARc_sample_input/08_nee_proc/')
-    copy_tree('tests/data/test_output/US-ARc_sample_output/02_qc_auto/', \
-        'tests/data/step_10/US-ARc_sample_input/02_qc_auto/')
-    return
-
-# flux partition in nighttime.py is a huge function (250+ lines)
-# use module scope to do this once (including downloading and extracting the data)
-def test_load_output(setup_data):
-    from oneflux.partition.library import load_output
-    datadir = "./tests/data/step_10/"
-    sitedir = "US-ARc_sample_input"
-    siteid = "US-ARc"
-    sitedir_full = os.path.join(datadir, sitedir)
-    meteo_proc_dir = os.path.join(sitedir_full, METEO_PROC_DIR)
-    meteo_proc_f = os.path.join(meteo_proc_dir, '{s}_meteo_hh.csv'.format(s=siteid))
-    whole_dataset_meteo, headers_meteo, timestamp_list_meteo, year_list_meteo = load_output(meteo_proc_f)
-   
-    # TIMESTAMP_START,TIMESTAMP_END,DTIME,TA_f,TA_fqc,TA_ERA,TA_m,TA_mqc,SW_IN_pot,SW_IN_f,SW_IN_fqc,SW_IN_ERA,SW_IN_m,SW_IN_mqc,LW_IN_f,LW_IN_fqc,LW_IN_ERA,LW_IN_m,LW_IN_mqc,LW_IN_calc,LW_IN_calc_qc,LW_IN_calc_ERA,LW_IN_calc_m,LW_IN_calc_mqc,VPD_f,VPD_fqc,VPD_ERA,VPD_m,VPD_mqc,PA,PA_ERA,PA_m,PA_mqc,P,P_ERA,P_m,P_mqc,WS,WS_ERA,WS_m,WS_mqc,CO2_f,CO2_fqc,TS_1_f,TS_1_fqc,SWC_1_f,SWC_1_fqc
-    # 198901010000,198901010030,1.02083,-9999.000,-9999,0.316,0.316,2,0,-9999.000,-9999,0.000,0.000,2,-9999.000,-9999,246.830,246.830,2,-9999.000,-9999,253.002,253.002,2,-9999.000,-9999,0.000,0.000,2,-9999.000,96.420,96.420,2,-9999.000,0.000,0.000,2,-9999.000,2.879,2.879,2,-9999.000,-9999,-9999.000,-9999,-9999.000,-9999
-    
-    print(dir(whole_dataset_meteo))
-    # shutil.rmtree('./tests/integration/step_10')
-
-    
-def test_least_squares():
-    from oneflux.partition.nighttime import least_squares
-    pass
-
-def test_create_data_structures():
-    return True
-    ustar_type = ['c'] 
-    create_data_structures(ustar_type=ustar_type, whole_dataset_nee=whole_dataset_nee, whole_dataset_meteo=whole_dataset_meteo)
-    pass
-
-
-def test_compu(setup_data):
-    return True
-    from oneflux.partition.nighttime import compu
-    from oneflux.partition.nighttime import load_output
-    from oneflux.partition.nighttime import create_data_structures
-    from oneflux.partition.compu import compu_qcnee_filter
-    ustar_type = ['y']
-   
-
-    sitedir_full = os.path.join(datadir, sitedir)
-    meteo_proc_dir = os.path.join(sitedir_full, METEO_PROC_DIR)
-    meteo_proc_f = os.path.join(meteo_proc_dir, '{s}_meteo_hh.csv'.format(s=siteid))
-    nee_proc_percentiles_f = os.path.join(nee_proc_dir, '{s}_NEE_percentiles_{u}_hh.csv'.format(s=siteid, u=ustar_type))
-   
-    whole_dataset_meteo, headers_meteo, timestamp_list_meteo, year_list_meteo = load_output(meteo_proc_f)
-    whole_dataset_nee, headers_nee, timestamp_list_nee, year_list_nee = load_output(nee_proc_percentiles_f)
-    
-    data = working_year_data = create_data_structures(ustar_type=ustar_type, whole_dataset_nee=whole_dataset_nee, whole_dataset_meteo=whole_dataset_meteo)
-    func = compu_qcnee_filter
-    columns = None
-    parameters = None
-    skip_if_present = False
-    no_missing = False
-    new_ = False
-    
-    compu(data, func, columns, parameters, skip_if_present, no_missing, new_)
-
-def test_get_first_last_ts():
-    pass
-
-# partitioning_nt -> flux_partition -> nlinlts -> 

From 8f4c6640ccc20cc421a9be2cc40572e9c09d6cde Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Tue, 4 Apr 2023 10:29:49 +0100
Subject: [PATCH 29/39] neaten CI

---
 .github/workflows/python-app.yaml | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/python-app.yaml b/.github/workflows/python-app.yaml
index 9232ae52..75016de5 100644
--- a/.github/workflows/python-app.yaml
+++ b/.github/workflows/python-app.yaml
@@ -24,19 +24,17 @@ jobs:
     - name: Install OneFLUX
       run: |
         python -m pip install --upgrade pip
-        pip install setuptools==44.1.1 wheel pytest
-        # pip install flake8 pytest
-        # if [ -f frozen-requirement.txt ]; then pip install -r frozen-requirements.txt; fi
+        pip install setuptools wheel pytest
         make
-    - name: Get data
+    - name: Download data
       run: |
+        # get US-ARc_sample data for tests
         mkdir -p ./tests/data/test_input
         mkdir -p ./tests/data/test_output
-        # get necessary data
         wget ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_input.zip
         wget ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_output.zip
-        unzip US-ARc_sample_input.zip -d ./tests/data/test_input/
-        unzip US-ARc_sample_output.zip -d ./tests/data/test_output/
+        unzip US-ARc_sample_input.zip -d ./tests/data/test_input
+        unzip US-ARc_sample_output.zip -d ./tests/data/test_output
     - name: Run pytest
       run: |
         export PYTHONPATH=/home/runner/work/ONEFlux/ONEFlux:$PYTHONPATH

From cbfe9f3aec777391a26dd6d17d1a54a6b8f1eb98 Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Tue, 4 Apr 2023 10:50:47 +0100
Subject: [PATCH 30/39] test_context.py now converted to pytest

---
 tests/python/context.py      | 19 -------------------
 tests/python/test_basic.py   |  6 ------
 tests/python/test_context.py | 13 +++----------
 3 files changed, 3 insertions(+), 35 deletions(-)
 delete mode 100644 tests/python/context.py
 delete mode 100644 tests/python/test_basic.py

diff --git a/tests/python/context.py b/tests/python/context.py
deleted file mode 100644
index a619dd4a..00000000
--- a/tests/python/context.py
+++ /dev/null
@@ -1,19 +0,0 @@
-'''
-For license information:
-see LICENSE file or headers in oneflux.__init__.py
-
-Context file for tests; import resolution when path not set
-
-@author: Gilberto Pastorello
-@contact: gzpastorello@lbl.gov
-@date: 2017-01-31
-'''
-import sys
-
-# support execution without package set (e.g., call unittest from command line)
-if __package__ is None:
-    import os
-    path = os.path.dirname(os.path.dirname(__file__))
-    sys.path.insert(0, path)
-
-import oneflux
diff --git a/tests/python/test_basic.py b/tests/python/test_basic.py
deleted file mode 100644
index 154a549e..00000000
--- a/tests/python/test_basic.py
+++ /dev/null
@@ -1,6 +0,0 @@
-import pytest
-
-def test_import_oneflux():
-    import oneflux
-    assert oneflux.VERSION == "0.4.1-rc"
-    assert hasattr(oneflux, '__version__') == True
diff --git a/tests/python/test_context.py b/tests/python/test_context.py
index 9ec95d17..c8b3c3f7 100644
--- a/tests/python/test_context.py
+++ b/tests/python/test_context.py
@@ -8,14 +8,7 @@
 @contact: gzpastorello@lbl.gov
 @date: 2017-01-31
 '''
-import unittest
 
-from context import oneflux
-
-class BasicTest(unittest.TestCase):
-    def test_context(self):
-        """Test import by checking imported 'oneflux' module has '__version__' attribute"""
-        self.assertTrue(hasattr(oneflux, '__version__'))
-
-if __name__ == '__main__':
-    unittest.main()
+def test_import_oneflux():
+    import oneflux
+    assert hasattr(oneflux, '__version__') == True

From 3445efe3cfe60ea62a26586b463c7888f000f8a9 Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Tue, 4 Apr 2023 11:22:07 +0100
Subject: [PATCH 31/39] removed unnecessary files and fixed test, now expected
 to fail

---
 .github/workflows/python-app.yaml             |  7 ++-----
 frozen-requirements.txt                       | 14 --------------
 tests/python/integration/test_partitioning.py | 10 +++++-----
 tests/python/integration/test_qcauto.py       |  7 -------
 4 files changed, 7 insertions(+), 31 deletions(-)
 delete mode 100644 frozen-requirements.txt
 delete mode 100644 tests/python/integration/test_qcauto.py

diff --git a/.github/workflows/python-app.yaml b/.github/workflows/python-app.yaml
index 75016de5..636a72fd 100644
--- a/.github/workflows/python-app.yaml
+++ b/.github/workflows/python-app.yaml
@@ -1,7 +1,4 @@
-# This workflow will install Python dependencies, run tests and lint with a single version of Python
-# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
-
-name: Python application
+name: ONEFlux CI
 
 on:
   push:
@@ -38,4 +35,4 @@ jobs:
     - name: Run pytest
       run: |
         export PYTHONPATH=/home/runner/work/ONEFlux/ONEFlux:$PYTHONPATH
-        pytest tests/python
\ No newline at end of file
+        pytest tests/python
diff --git a/frozen-requirements.txt b/frozen-requirements.txt
deleted file mode 100644
index d92e10cb..00000000
--- a/frozen-requirements.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-backports.functools-lru-cache==1.6.4
-cycler==0.10.0
-kiwisolver==1.1.0
-matplotlib==2.2.5
-pandas==0.20.1
-patsy==0.5.2
-pyparsing==2.4.7
-python-dateutil==2.8.2
-pytz==2022.4
-scipy==1.2.3
-six==1.16.0
-statsmodels==0.8.0
-subprocess32==3.5.4
-setuptools==44.1.1
diff --git a/tests/python/integration/test_partitioning.py b/tests/python/integration/test_partitioning.py
index 9e483bc6..01906095 100644
--- a/tests/python/integration/test_partitioning.py
+++ b/tests/python/integration/test_partitioning.py
@@ -11,7 +11,7 @@
 @pytest.fixture(scope="module")
 def get_data():
     '''
-    Utilising python to obtain sample test data. Function currently not used
+    Utilising python to obtain sample test data. Function currently unused. 
     as a fixture in this class. 
     '''
     from zipfile import ZipFile
@@ -98,10 +98,10 @@ def test_run_partition_nt(setup_data):
     assert len(nee_y_files) == len(ref_nee_y_files)
     retval = True 
     for f, b in zip(nee_y_files, ref_nee_y_files):
-        if not equal_csv(f, b):
-            retval = False
+        print(f, b)
+        assert equal_csv(f, b) == True
 
     # clean up data. 
-    shutil.rmtree(datadir)
+    # shutil.rmtree(datadir)
     
-    return retval
+    # assert retval == True
diff --git a/tests/python/integration/test_qcauto.py b/tests/python/integration/test_qcauto.py
deleted file mode 100644
index dfc306e6..00000000
--- a/tests/python/integration/test_qcauto.py
+++ /dev/null
@@ -1,7 +0,0 @@
-import pytest
-
-# qc_auto requires qc_visual
-# do not need to create an instance of the class in wrappers.py
-
-def test_qcauto():
-    pass

From e1fec900d8c52425fc194d54b13e34db2e832c9e Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Tue, 4 Apr 2023 17:12:19 +0100
Subject: [PATCH 32/39] still fails - immediate fail due to extra columns in
 generated output but numerical differences persist in many columns

---
 tests/python/integration/test_partitioning.py | 35 ++++++++++++-------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/tests/python/integration/test_partitioning.py b/tests/python/integration/test_partitioning.py
index 01906095..e8344d2b 100644
--- a/tests/python/integration/test_partitioning.py
+++ b/tests/python/integration/test_partitioning.py
@@ -5,6 +5,7 @@
 import urllib
 from distutils.dir_util import copy_tree
 import logging
+import time
 
 _log = logging.getLogger(__name__)
 
@@ -29,15 +30,20 @@ def equal_csv(csv_1, csv_2):
     '''
     Check equality of two csv files.
     '''
-    # _log.info(str(csv_1))
+    _log.info("Check csv equality")
+    start = time.time()
     with open(csv_1, 'r') as t1, open(csv_2, 'r') as t2:
         fileone = t1.readlines()
         filetwo = t2.readlines()
         for line in filetwo:
             if line not in fileone:
                 return False
+
+        _log.info("total time", start - time.time())
+
         return True
 
+
 @pytest.fixture
 def setup_data():
     '''
@@ -53,15 +59,18 @@ def setup_data():
         if e.errno == errno.EEXIST:
             print("directory exists")
             
-    testdata = 'tests/python/integration/input/step_10'
+    testdata = 'tests/python/integration/input/step_10/US-ARc_sample_input'
     
     copy_tree('tests/data/test_input/', testdata)
-    copy_tree('tests/data/test_output/US-ARc_sample_output/07_meteo_proc/', \
-        os.path.join(testdata, 'US-ARc_sample_input/07_meteo_proc/'))
-    copy_tree('tests/data/test_output/US-ARc_sample_output/08_nee_proc/', \
-        os.path.join(testdata, 'US-ARc_sample_input/08_nee_proc/'))
-    copy_tree('tests/data/test_output/US-ARc_sample_output/02_qc_auto/', \
-        os.path.join(testdata, 'US-ARc_sample_input/02_qc_auto/'))
+
+    refoutdir = 'tests/data/test_output/US-ARc_sample_output'
+
+    copy_tree(os.path.join(refoutdir, '07_meteo_proc'), \
+        os.path.join(testdata, '07_meteo_proc'))
+    copy_tree(os.path.join(refoutdir, '08_nee_proc'), \
+        os.path.join(testdata, '08_nee_proc/'))
+    copy_tree(os.path.join(refoutdir, '02_qc_auto'), \
+        os.path.join(testdata, '02_qc_auto/'))
     
     
 def test_run_partition_nt(setup_data):
@@ -73,9 +82,10 @@ def test_run_partition_nt(setup_data):
     siteid = "US-ARc"
     sitedir = "US-ARc_sample_input"
     years = [2005] # years = [2005, 2006]
-    PROD_TO_COMPARE = ['c', 'y']
+    # PROD_TO_COMPARE = ['c', 'y']
+    PROD_TO_COMPARE = ['y',]
     # PERC_TO_COMPARE = ['1.25', '3.75',]
-    PERC_TO_COMPARE = ['1.25',]
+    PERC_TO_COMPARE = ['3.75',]
     
     from oneflux.tools.partition_nt import remove_previous_run, run_python
     remove_previous_run(datadir=datadir, siteid=siteid, sitedir=sitedir, python=True, 
@@ -86,7 +96,8 @@ def test_run_partition_nt(setup_data):
                perc_to_compare=PERC_TO_COMPARE, years_to_compare=years)
     
     # check whether csv of "output" is same as csv of reference
-    # paths to the generated output in "input" directory, confusingly.
+
+    # the generated output is actually in the "input" directory.
     rootdir = os.path.join(datadir, sitedir, "10_nee_partition_nt")
     nee_y_files = glob.glob(os.path.join(rootdir, "nee_y_1.25_US-ARc_2005*"))
     nee_y_files = filter(lambda x: not x.endswith('_orig.csv'), nee_y_files)
@@ -103,5 +114,3 @@ def test_run_partition_nt(setup_data):
 
     # clean up data. 
     # shutil.rmtree(datadir)
-    
-    # assert retval == True

From 8e919f2477c426b77752c1b7062bc62b9730f156 Mon Sep 17 00:00:00 2001
From: Matthew Archer <ma595@cam.ac.uk>
Date: Tue, 4 Apr 2023 17:28:12 +0100
Subject: [PATCH 33/39] reverted to 1.25

---
 tests/python/integration/test_partitioning.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python/integration/test_partitioning.py b/tests/python/integration/test_partitioning.py
index e8344d2b..1e313782 100644
--- a/tests/python/integration/test_partitioning.py
+++ b/tests/python/integration/test_partitioning.py
@@ -85,7 +85,7 @@ def test_run_partition_nt(setup_data):
     # PROD_TO_COMPARE = ['c', 'y']
     PROD_TO_COMPARE = ['y',]
     # PERC_TO_COMPARE = ['1.25', '3.75',]
-    PERC_TO_COMPARE = ['3.75',]
+    PERC_TO_COMPARE = ['1.25',]
     
     from oneflux.tools.partition_nt import remove_previous_run, run_python
     remove_previous_run(datadir=datadir, siteid=siteid, sitedir=sitedir, python=True, 

From 09aa9aac9c50c405f1a33cd8ec654bbc943e2423 Mon Sep 17 00:00:00 2001
From: Amy Pike <amymaypike@hotmail.co.uk>
Date: Fri, 14 Jun 2024 12:50:10 +0100
Subject: [PATCH 34/39] upgrading libraries for python 3.12.0 compatibility,
 note not backwards compatible with python 2, but python 2 is EOL

---
 requirements.txt                              |  5 +++--
 runoneflux.py                                 |  2 +-
 tests/python/integration/test_partitioning.py | 20 +++++++++----------
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 268aa467..8e8c53cf 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,5 @@
-numpy>=1.11.0,<1.16.0
+numpy<2,>=1.18
 scipy>=0.17.0
 matplotlib>=1.5.1
-statsmodels>=0.8.0,<0.11.0
+statsmodels==0.14.1
+pytest==8.2.2
\ No newline at end of file
diff --git a/runoneflux.py b/runoneflux.py
index 5fcbc03a..372ced17 100644
--- a/runoneflux.py
+++ b/runoneflux.py
@@ -81,7 +81,7 @@
     # start execution
     try:
         # check arguments
-        print os.path.join(args.datadir, args.sitedir)
+        print(os.path.join(args.datadir, args.sitedir))
         if not os.path.isdir(os.path.join(args.datadir, args.sitedir)):
             raise ONEFluxError("Site dir not found: {d}".format(d=args.sitedir))
 
diff --git a/tests/python/integration/test_partitioning.py b/tests/python/integration/test_partitioning.py
index 1e313782..93edc5e7 100644
--- a/tests/python/integration/test_partitioning.py
+++ b/tests/python/integration/test_partitioning.py
@@ -1,12 +1,12 @@
 import pytest
 import os, glob
 import errno
-import shutil
-import urllib
-from distutils.dir_util import copy_tree
+import urllib.request
+from shutil import copytree
 import logging
 import time
 
+
 _log = logging.getLogger(__name__)
 
 @pytest.fixture(scope="module")
@@ -16,8 +16,8 @@ def get_data():
     as a fixture in this class. 
     '''
     from zipfile import ZipFile
-    urllib.urlopen('ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_output.zip') 
-    urllib.urlopen('ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_input.zip') 
+    urllib.request.urlopen('ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_output.zip')
+    urllib.request.urlopen('ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_input.zip')
   
     input_zip = "US-ARc_sample_input.zip"
     output_zip = "US-ARc_sample_output.zip"
@@ -61,19 +61,19 @@ def setup_data():
             
     testdata = 'tests/python/integration/input/step_10/US-ARc_sample_input'
     
-    copy_tree('tests/data/test_input/', testdata)
+    copytree('tests/data/test_input/', testdata)
 
     refoutdir = 'tests/data/test_output/US-ARc_sample_output'
 
-    copy_tree(os.path.join(refoutdir, '07_meteo_proc'), \
+    copytree(os.path.join(refoutdir, '07_meteo_proc'), \
         os.path.join(testdata, '07_meteo_proc'))
-    copy_tree(os.path.join(refoutdir, '08_nee_proc'), \
+    copytree(os.path.join(refoutdir, '08_nee_proc'), \
         os.path.join(testdata, '08_nee_proc/'))
-    copy_tree(os.path.join(refoutdir, '02_qc_auto'), \
+    copytree(os.path.join(refoutdir, '02_qc_auto'), \
         os.path.join(testdata, '02_qc_auto/'))
     
     
-def test_run_partition_nt(setup_data):
+def test_run_partition_nt(get_data, setup_data):
     '''
     Run partition_nt on single percentile.
     '''

From d93da54ba568a42ea02e74fd1489d6e92b7a1fb1 Mon Sep 17 00:00:00 2001
From: Amy Pike <amymaypike@hotmail.co.uk>
Date: Fri, 14 Jun 2024 15:27:42 +0100
Subject: [PATCH 35/39] further python 3 updates

---
 tests/python/integration/test_partitioning.py | 67 +++++++++++--------
 1 file changed, 38 insertions(+), 29 deletions(-)

diff --git a/tests/python/integration/test_partitioning.py b/tests/python/integration/test_partitioning.py
index 93edc5e7..0d3b3a40 100644
--- a/tests/python/integration/test_partitioning.py
+++ b/tests/python/integration/test_partitioning.py
@@ -6,26 +6,33 @@
 import logging
 import time
 
-
 _log = logging.getLogger(__name__)
 
+
 @pytest.fixture(scope="module")
 def get_data():
     '''
     Utilising python to obtain sample test data. Function currently unused. 
     as a fixture in this class. 
     '''
+    if os.path.isdir('tests/data'):
+        _log.info('Skipping sample data retrieval as sample test data directory '
+                  'already exists: ./tests/data')
+        return
+
     from zipfile import ZipFile
-    urllib.request.urlopen('ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_output.zip')
-    urllib.request.urlopen('ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_input.zip')
-  
-    input_zip = "US-ARc_sample_input.zip"
-    output_zip = "US-ARc_sample_output.zip"
-    
-    with ZipFile(input_zip) as zi, ZipFile(output_zip) as zo:
+    input_zip_name, headers = urllib.request.urlretrieve('ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_input.zip')
+    output_zip_name, headers = urllib.request.urlretrieve('ftp://ftp.fluxdata.org/.ameriflux_downloads/.test/US-ARc_sample_output.zip')
+
+    _log.info('successfully downloaded sample data zip files. Extracting...')
+
+    with ZipFile(input_zip_name) as zi, ZipFile(output_zip_name) as zo:
         zi.extractall(path='tests/data/test_input')
         zo.extractall(path='tests/data/test_output')
 
+    _log.info('sample data successfully extracted from zip files')
+
+
 def equal_csv(csv_1, csv_2):
     '''
     Check equality of two csv files.
@@ -45,7 +52,7 @@ def equal_csv(csv_1, csv_2):
 
 
 @pytest.fixture
-def setup_data():
+def setup_data(get_data):
     '''
     Set up input data for run_partition_nt test. 
     
@@ -58,22 +65,22 @@ def setup_data():
     except OSError as e:
         if e.errno == errno.EEXIST:
             print("directory exists")
-            
+
     testdata = 'tests/python/integration/input/step_10/US-ARc_sample_input'
-    
-    copytree('tests/data/test_input/', testdata)
+
+    copytree('tests/data/test_input/', testdata, dirs_exist_ok=True)
 
     refoutdir = 'tests/data/test_output/US-ARc_sample_output'
 
     copytree(os.path.join(refoutdir, '07_meteo_proc'), \
-        os.path.join(testdata, '07_meteo_proc'))
+             os.path.join(testdata, '07_meteo_proc'), dirs_exist_ok=True)
     copytree(os.path.join(refoutdir, '08_nee_proc'), \
-        os.path.join(testdata, '08_nee_proc/'))
+             os.path.join(testdata, '08_nee_proc/'), dirs_exist_ok=True)
     copytree(os.path.join(refoutdir, '02_qc_auto'), \
-        os.path.join(testdata, '02_qc_auto/'))
-    
-    
-def test_run_partition_nt(get_data, setup_data):
+             os.path.join(testdata, '02_qc_auto/'), dirs_exist_ok=True)
+
+
+def test_run_partition_nt(setup_data):
     '''
     Run partition_nt on single percentile.
     '''
@@ -81,33 +88,35 @@ def test_run_partition_nt(get_data, setup_data):
     refoutdir = "./tests/data/test_output/"
     siteid = "US-ARc"
     sitedir = "US-ARc_sample_input"
-    years = [2005] # years = [2005, 2006]
+    years = [2005]  # years = [2005, 2006]
     # PROD_TO_COMPARE = ['c', 'y']
-    PROD_TO_COMPARE = ['y',]
+    PROD_TO_COMPARE = ['y', ]
     # PERC_TO_COMPARE = ['1.25', '3.75',]
-    PERC_TO_COMPARE = ['1.25',]
-    
+    PERC_TO_COMPARE = ['1.25', ]
+
     from oneflux.tools.partition_nt import remove_previous_run, run_python
-    remove_previous_run(datadir=datadir, siteid=siteid, sitedir=sitedir, python=True, 
-                        prod_to_compare=PROD_TO_COMPARE, perc_to_compare=PERC_TO_COMPARE,
+    remove_previous_run(datadir=datadir, siteid=siteid, sitedir=sitedir, python=True,
+                        prod_to_compare=PROD_TO_COMPARE,
+                        perc_to_compare=PERC_TO_COMPARE,
                         years_to_compare=years)
 
-    run_python(datadir=datadir, siteid=siteid, sitedir=sitedir, prod_to_compare=PROD_TO_COMPARE,
+    run_python(datadir=datadir, siteid=siteid, sitedir=sitedir,
+               prod_to_compare=PROD_TO_COMPARE,
                perc_to_compare=PERC_TO_COMPARE, years_to_compare=years)
-    
+
     # check whether csv of "output" is same as csv of reference
 
     # the generated output is actually in the "input" directory.
     rootdir = os.path.join(datadir, sitedir, "10_nee_partition_nt")
     nee_y_files = glob.glob(os.path.join(rootdir, "nee_y_1.25_US-ARc_2005*"))
     nee_y_files = filter(lambda x: not x.endswith('_orig.csv'), nee_y_files)
-    
+
     # paths to the "reference" output data
     refoutdir = os.path.join(refoutdir, "US-ARc_sample_output", "10_nee_partition_nt")
     ref_nee_y_files = glob.glob(os.path.join(refoutdir, "nee_y_1.25_US-ARc_2005*"))
-   
+
     assert len(nee_y_files) == len(ref_nee_y_files)
-    retval = True 
+    retval = True
     for f, b in zip(nee_y_files, ref_nee_y_files):
         print(f, b)
         assert equal_csv(f, b) == True

From 9dd669790381698a4ebd40752261e47484bd3c52 Mon Sep 17 00:00:00 2001
From: Amy Pike <amymaypike@hotmail.co.uk>
Date: Fri, 21 Jun 2024 11:48:02 +0100
Subject: [PATCH 36/39] think the dtype parameter needs to be changed due to
 python 2/3 changes to representation of byte objects, but don't think U12 is
 the right thing to use either

---
 oneflux/partition/library.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/oneflux/partition/library.py b/oneflux/partition/library.py
index 6a5829c1..0daa0824 100644
--- a/oneflux/partition/library.py
+++ b/oneflux/partition/library.py
@@ -76,7 +76,7 @@ def load_output(filename, delimiter=',', skip_header=1):
     _log.debug("Finished loading headers: {h}".format(h=headers))
 
     _log.debug("Started loading data")
-    dtype = [(i, ('a25' if i.lower() in STRING_HEADERS else FLOAT_PREC)) for i in headers]
+    dtype = [(i, ('U12' if i.lower() in STRING_HEADERS else FLOAT_PREC)) for i in headers]
     vfill = [('' if i.lower() in STRING_HEADERS else numpy.NaN) for i in headers]
     data = numpy.genfromtxt(fname=filename, dtype=dtype, names=headers, delimiter=delimiter, skip_header=skip_header, missing_values='-9999,-9999.0,-6999,-6999.0, ', usemask=True)
     data = numpy.ma.filled(data, vfill)

From cd8fbf59bb2e41555d3b57ef63ab55ccedf26ce0 Mon Sep 17 00:00:00 2001
From: Amy Pike <amymaypike@hotmail.co.uk>
Date: Fri, 21 Jun 2024 13:48:45 +0100
Subject: [PATCH 37/39] Move datatype for numpy genfromtxt for string variables
 into separate constant

---
 oneflux/partition/auxiliary.py | 1 +
 oneflux/partition/library.py   | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/oneflux/partition/auxiliary.py b/oneflux/partition/auxiliary.py
index 710d5001..74f5dfb5 100644
--- a/oneflux/partition/auxiliary.py
+++ b/oneflux/partition/auxiliary.py
@@ -28,6 +28,7 @@
 #FLOAT_PREC = 'f8'
 FLOAT_PREC = 'f4'
 DOUBLE_PREC = 'f8'
+STRING_VARIABLE_LENGTH = 'U12'
 
 
 _log = logging.getLogger(__name__)
diff --git a/oneflux/partition/library.py b/oneflux/partition/library.py
index 0daa0824..3ec4abee 100644
--- a/oneflux/partition/library.py
+++ b/oneflux/partition/library.py
@@ -23,7 +23,7 @@
 from oneflux import ONEFluxError
 from oneflux.partition.ecogeo import lloyd_taylor, lloyd_taylor_dt, hlrc_lloyd, hlrc_lloydvpd
 from oneflux.partition.ecogeo import hlrc_lloyd_afix, hlrc_lloydvpd_afix, lloydt_e0fix
-from oneflux.partition.auxiliary import FLOAT_PREC, DOUBLE_PREC, NAN, nan, not_nan
+from oneflux.partition.auxiliary import FLOAT_PREC, DOUBLE_PREC, STRING_VARIABLE_LENGTH, NAN, nan, not_nan
 
 from oneflux.graph.compare import plot_comparison
 from oneflux.utils.files import file_exists_not_empty
@@ -76,7 +76,7 @@ def load_output(filename, delimiter=',', skip_header=1):
     _log.debug("Finished loading headers: {h}".format(h=headers))
 
     _log.debug("Started loading data")
-    dtype = [(i, ('U12' if i.lower() in STRING_HEADERS else FLOAT_PREC)) for i in headers]
+    dtype = [(i, (STRING_VARIABLE_LENGTH if i.lower() in STRING_HEADERS else FLOAT_PREC)) for i in headers]
     vfill = [('' if i.lower() in STRING_HEADERS else numpy.NaN) for i in headers]
     data = numpy.genfromtxt(fname=filename, dtype=dtype, names=headers, delimiter=delimiter, skip_header=skip_header, missing_values='-9999,-9999.0,-6999,-6999.0, ', usemask=True)
     data = numpy.ma.filled(data, vfill)

From ffce7db15f938c8945753de5a8922f2a0278e3d4 Mon Sep 17 00:00:00 2001
From: Amy Pike <amymaypike@hotmail.co.uk>
Date: Fri, 21 Jun 2024 14:20:27 +0100
Subject: [PATCH 38/39] run the 2to3 tool

---
 oneflux/partition/auxiliary.py |  4 ++--
 oneflux/partition/daytime.py   | 10 +++++-----
 oneflux/partition/nighttime.py |  2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/oneflux/partition/auxiliary.py b/oneflux/partition/auxiliary.py
index 74f5dfb5..6aeeb019 100644
--- a/oneflux/partition/auxiliary.py
+++ b/oneflux/partition/auxiliary.py
@@ -112,7 +112,7 @@ def compare_col_to_pvwave(py_array, filename, label=None, diff=False, show_plot=
     s_string = s_string.replace(' ', '')
     s_string = s_string.replace('-1.#IND000', '-9999')
     s_string = s_string.replace('\r', '')
-    u_string = unicode(s_string)
+    u_string = str(s_string)
     pw_array = numpy.genfromtxt(StringIO(u_string), dtype=FLOAT_PREC, delimiter=',', skip_header=0, missing_values='-9999,-9999.0,-6999,-6999.0, ', usemask=True)
     pw_array = numpy.ma.filled(pw_array, numpy.NaN)
     # **************************************************************************************************************************************************
@@ -205,7 +205,7 @@ def compare_col_to_pvwave(py_array, filename, label=None, diff=False, show_plot=
             figure_basename = figure_basename.replace('_PW', '') # remove _PW from PW data source filename
 
         record_interval = (timedelta(minutes=30) if resolution == 'hh' else timedelta(minutes=60))
-        timestamp_list = [datetime(year, 1, 1, 0, 0) + (record_interval * i) for i in xrange(1, py_array.size + 1)]
+        timestamp_list = [datetime(year, 1, 1, 0, 0) + (record_interval * i) for i in range(1, py_array.size + 1)]
 
         _log.debug("Using year={y}, resolution={r}, first timestamp={f}, last timestamp={l}".format(y=year, r=resolution, f=timestamp_list[0], l=timestamp_list[-1]))
 
diff --git a/oneflux/partition/daytime.py b/oneflux/partition/daytime.py
index 613582c4..eecc2946 100644
--- a/oneflux/partition/daytime.py
+++ b/oneflux/partition/daytime.py
@@ -909,7 +909,7 @@ def estimate_parasets(data, winsize, fguess, trimperc, name_out, dt_output_dir,
     ###############################################
 
     #### Creating the arrays we're going to use
-    n_parasets = long(365 / winsize) * 2
+    n_parasets = int(365 / winsize) * 2
     params = numpy.zeros((3, 2 * len(fguess), n_parasets), dtype=FLOAT_PREC)
     params_ok = numpy.zeros((2 * len(fguess), n_parasets), dtype=FLOAT_PREC)
     params_nok = numpy.zeros((2 * len(fguess), n_parasets), dtype=FLOAT_PREC)
@@ -1041,7 +1041,7 @@ def estimate_parasets(data, winsize, fguess, trimperc, name_out, dt_output_dir,
         #ind[i, :, :] = long((day_begin + winsize / 2.0) * 48.0)
 
         #### Calculate the first index of the window we're using now
-        ind[:, :, i] = long((day_begin + winsize / 2.0) * 48.0)
+        ind[:, :, i] = int((day_begin + winsize / 2.0) * 48.0)
 
         '''
         #print("ind[:, :, i]")
@@ -1938,12 +1938,12 @@ def percentiles_fn(data, columns, values=[0.0, 0.25, 0.5, 0.75, 1.0], remove_mis
 
         #### Setting ind to the percentile wanted
         if values[i] <= 0.5:
-            ind = long(values[i] * n_elements)
+            ind = int(values[i] * n_elements)
         else:
-            ind = long(values[i] * (n_elements + 1))
+            ind = int(values[i] * (n_elements + 1))
 
         if ind >= n_elements:
-            ind = n_elements - long(1)
+            ind = n_elements - int(1)
 
         if i == 0:
             result = data[columns[0]][sorted_index_arr[ind]]
diff --git a/oneflux/partition/nighttime.py b/oneflux/partition/nighttime.py
index a2d7c5bd..0791b69b 100644
--- a/oneflux/partition/nighttime.py
+++ b/oneflux/partition/nighttime.py
@@ -246,7 +246,7 @@ def flux_partition(data, lat, tempvar='tair', nomsg=False, temp_output_filename=
     julmin, julmax = int(juldays[0]), int(numpy.max(juldays))  ### first/last day of year
     n_regr = 0                                                 ### counter of number of regressions/optimizations
 
-    window_steps = range(julmin, julmax + 1, STEP_SIZE)
+    window_steps = list(range(julmin, julmax + 1, STEP_SIZE))
 
     # TODO: (potential) add e0_1_list, e0_2_list, e0_3_list, and corresponding se and idx to track individual
 

From 073abd1a77a5e249ef52a56acd31c04cef5eb76f Mon Sep 17 00:00:00 2001
From: Amy Pike <amymaypike@hotmail.co.uk>
Date: Fri, 21 Jun 2024 14:23:25 +0100
Subject: [PATCH 39/39] replace second instance of numpy importing to byte
 object

---
 oneflux/partition/library.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/oneflux/partition/library.py b/oneflux/partition/library.py
index 3ec4abee..f9afe641 100644
--- a/oneflux/partition/library.py
+++ b/oneflux/partition/library.py
@@ -1299,7 +1299,7 @@ def load_outputs(filename, delimiter=',', skip_header=1, is_not_hourly=True, is_
     _log.debug("Loaded headers: {h}".format(h=headers))
 
     _log.debug("Started loading data")
-    dtype = [(i, ('a25' if i.lower() in STRING_HEADERS else FLOAT_PREC)) for i in headers]
+    dtype = [(i, (STRING_VARIABLE_LENGTH if i.lower() in STRING_HEADERS else FLOAT_PREC)) for i in headers]
     vfill = [('' if i.lower() in STRING_HEADERS else numpy.NaN) for i in headers]
     data = numpy.genfromtxt(fname=filename, dtype=dtype, names=headers, delimiter=delimiter, skip_header=skip_header, missing_values='-9999,-9999.0,-6999,-6999.0, ', usemask=True)
     data = numpy.ma.filled(data, vfill)