From 660de072b1b048854a9e7e10d897b0116f6c3a8a Mon Sep 17 00:00:00 2001 From: James Emberton Date: Mon, 9 Dec 2024 19:26:11 +0000 Subject: [PATCH 01/27] modularised --- .../cpdAssignUStarTh20100901.m | 522 ++++++++++-------- .../test_cpdAssignUStarTh20100901.py | 2 +- 2 files changed, 280 insertions(+), 244 deletions(-) diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m index 5632e02f..b03d29a5 100644 --- a/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m +++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m @@ -1,243 +1,279 @@ - function [CpA,nA,tW,CpW,cMode,cFailure,fSelect,sSine,FracSig,FracModeD,FracSelect] ... - = cpdAssignUStarTh20100901(Stats,fPlot,cSiteYr,varargin) - -%cpdAssignUStarTh20100901 -% aggregates and assigns uStarTh from the Stats* structured records -% as output by cpdBootstrapUStarTh20100901. -% -%Syntax: -% -% [CpA,nA,tW,CpW,cMode,cFailure,fSelect,sSine,FracSig,FracModeD,FracSelect] ... -% = cpdExtractuStarTh20100901 (Stats,fPlot,cSiteYr) -% -% where: -% -% - Stats is a structured record output by -% cpdBootstrapuStarTh20100901, can be: -% - Stats2 (2-parameter operational change-point model) or -% - Stats3 (3-parameter diagnostic change-point model) -% - fPlot is a flag that is set to 1 for plotting -% the aggregation analysis -% - cSiteYr is text containing site and year for the fPlot plot -% -% - CpA is a scalar or vector of annual uStarTh (ChangePoint) means -% - nA is the number of selected change-points in the annual mean -% - CpW and tW are vectors showing seasonal variation in uStarTh -% - cMode is the dominant change-point mode: -% D Deficit (b1>0) or E Excess (b1<0) -% - cFailure is a string containing failure messages -% - fSelect is an array the same size as Stats* that flags the -% selected Cp values for computing CpA and CpW -% - sSine contains the coefficients of an annual sine curve -% fit to tW and CpW -% - FracSig,FracModeD,FracSelect are the fraction of attempted -% change-point detections that are significant, in mode D and -% select. -% -%The Stats2 or Stats3 records may be 2D (nWindows x nStrata) -%or 3D (nWindows x nStrata x nBoot). If 2D, then CpA -%is a scalar and CpW is averaged across the nStrata temperature strata. -%If 3D, then CpA is a vector of length nBoot and CpW is averaged -%across nBoot bootstraps and nStrata temperature strata. -%Stats input is accepted from both 4Season (nWindows=4) -%and flexible window (nWindows>=7) processing. -% -%The aggregation process is selective, selecting only: -% - significant change points (p <= 0.05) -% - in the dominant mode (Deficit (b1>0) or Excess (b1<0)) -% - after excluding outliers (based on regression stats). -%No assignment is made if the detection failure rate -%is too high. - -% ======================================================================== -% ======================================================================== - -% Functions called: -% -% fcBin, fcDatetick, fcEqnAnnualSine, fcNaniqr, fcReadFields -% fcr2Calc, fcx2colvec, fcx2rowvec -% stats toolbox: nanmedian, nanmean, nlinfit, prctile - -% Written 16 April 2010 by Alan Barr - -% ======================================================================= -% ======================================================================= - - for i = 1:length(varargin) - a = varargin{i}; - if iscell(a) && strcmp(a{1}, 'jsondecode') - for j = 2:length(a) - switch a{j} - case 1 - Stats = jsondecode(Stats); - end - end - end - end - - CpA=[]; nA=[]; tW=[]; CpW=[]; fSelect=[]; cMode=''; cFailure=''; sSine=[]; - FracSig=[]; FracModeD=[]; FracSelect=[]; - -% Compute window sizes etc. - - nDim=ndims(Stats); - switch nDim; - case 2; [nWindows,nBoot]=size(Stats); nStrata=1; nStrataN=0.5; - case 3; [nWindows,nStrata,nBoot]=size(Stats); nStrataN=1; - otherwise; cFailure='Stats must be 2D or 3D.'; return; - end; - nWindowsN=4; nSelectN=nWindowsN*nStrataN*nBoot; - - CpA=NaN*ones(nBoot,1); nA=NaN*ones(nBoot,1); tW=NaN*ones(nWindows,1); CpW=NaN*ones(nWindows,1); - -% Extract variable arrays from Stats structure. -% Reassign mt and Cp as x* to retain array shape, -% then convert the extracted arrays to column vectors. - - cVars={'mt','Cp','b1','c2','cib1','cic2','p'}; nVars=length(cVars); - for i=1:nVars; - cv=char(cVars(i)); eval([cv '=fcReadFields(Stats,''' cv ''');']); - switch cv; - case 'mt'; xmt=mt; - case 'Cp'; xCp=Cp; - otherwise; - end; - eval([cv '=fcx2colvec(' cv ');']); - end; - pSig=0.05; fP = p<=pSig; - -% Determine if Stats input is from the operational 2-parameter -% or diagnostic 3-parameter change-point model -% and set c2 and cic2 to zero if 2-parameter - - nPar=3; if sum(~isnan(c2))==0; nPar=2; c2=0*b1; cic2=c2; end; - -% Classify Cp regressions by slopes of b1 and c2 regression coeff: -% - NS: not sig, mfP=NaN, p>0.05 -% - ModeE: atypical significant Cp (b1=c2) - - iTry=find(~isnan(mt)); nTry=length(iTry); - iCp=find(~isnan(b1+c2+Cp)); nCp=length(iCp); - iNS=find(fP==0 & ~isnan(b1+c2+Cp)); nNS=length(iNS); - iSig=find(fP==1 & ~isnan(b1+c2+Cp)); nSig=length(iSig); - iModeE=find(fP==1 & b1=c2); nModeD=length(iModeD); - -% Evaluate and accept primary mode of significant Cps - - if nModeD>=nModeE; iSelect=iModeD; cMode='D'; else iSelect=iModeE; cMode='E'; end; nSelect=length(iSelect); - fSelect=zeros(size(fP)); fSelect(iSelect)=1; fSelect=logical(fSelect); - fModeD=NaN*ones(size(fP)); fModeD(iModeD)=1; - fModeE=NaN*ones(size(fP)); fModeE(iModeE)=1; - - FracSig=nSig/nTry; FracModeD=nModeD/nSig; FracSelect=nSelect/nTry; - -% Abort analysis if too few of the regressions produce significant Cps. - - if FracSelect<0.10; cFailure='Less than 10% successful detections. '; return; end; - -% Exclude outliers from Select mode based on Cp and regression stats - - switch nPar; - case 2; x=[Cp b1 cib1]; nx=3; - case 3; x=[Cp b1 c2 cib1 cic2]; nx=5; - end; - - mx=nanmedian(x); sx=fcNaniqr(x); - xNorm=NaN*x; for i=1:nx; xNorm(:,i)=(x(:,1)-mx(i))/sx(i); end; - xNormX=max(abs(xNorm),[],2); - ns=5; fOut=(xNormX>ns); iOut=find(fOut); - iSelect=setdiff(iSelect,iOut); nSelect=length(iSelect); - fSelect = ~fOut & fSelect; - fModeD(iOut)=NaN; iModeD=find(fModeD==1); nModeD=length(iModeD); - fModeE(iOut)=NaN; iModeE=find(fModeE==1); nModeE=length(iModeE); - iSig=union(iModeD,iModeE); nSig=length(iSig); - - FracSig=nSig/nTry; FracModeD=nModeD/nSig; FracSelect=nSelect/nTry; - - if nSelect=nBins*30; - [n,mx,my]=fcBin(mt(iModeD),Cp(iModeD),[],round(nModeD/nBins)); - hold on; plot(mx,my,'ko-','MarkerFaceColor','y','MarkerSize',8,'LineWidth',2); - end; - if nModeE>=nBins*30; - [n,mx,my]=fcBin(mt(iModeE),Cp(iModeE),[],round(nModeE/nBins)); - hold on; plot(mx,my,'bo-','MarkerFaceColor','c','MarkerSize',8,'LineWidth',2); - end; - fcDatetick(mt,'Mo',4,1); ylabel('Cp'); - ylabel('Raw Cp Modes D (green) E (red)'); ylim([0 prctile(Cp,99.9)]); - hold off; - title(sprintf('%s Stats%g Mode%s nSelect/nTry: %g/%g uStarTh: %5.3f (%5.3f) ', ... - cSiteYr,nPar,cMode,nSelect,nTry, ... - nanmedian(Cp(iSelect)),0.5*diff(prctile(Cp(iSelect),[2.5 97.5])) )); - - subplot('position',[0.08 0.06 0.60 0.38]); hold on; box on; - switch cMode; case 'G'; c='g'; case 'L'; c='b'; otherwise; c='k'; end; - plot(mt(iSelect),Cp(iSelect),[c '.'],mtHat,CpHat,'r-','LineWidth',3); - plot(tW,CpW,'ro','MarkerFaceColor','y','MarkerSize',9','LineWidth',2); - fcDatetick(mt(iSelect),'Mo',4,1); - ylabel('Select Cp'); ylim([0 prctile(Cp(iSelect),99)]); - title(sprintf('Cp = %5.3f + %5.3f sin(wt - %3.0f) (r^2 = %5.3f) ',bSine,r2 )); - - subplot('position',[0.76 0.56 0.22 0.38]); - hist(CpA,30); grid on; box on; - xlim([min(CpA) max(CpA)]); - xlabel('Annual \itu_*^{Th}'); ylabel('Frequency'); - title(sprintf('Median (CI): %5.3f (%5.3f) ', ... - nanmedian(CpA),0.5*diff(prctile(CpA,[2.5 97.5])) )); - - subplot('position',[0.76 0.06 0.22 0.38]); - plot(mtByWindow,FracSelectByWindow,'o-'); - fcDatetick(mtByWindow,'Mo',4,1); - ylabel('FracSelectByWindow'); ylim([0 1]); - - end; - -% ======================================================================= -% ======================================================================= - +function [CpA, nA, tW, CpW, cMode, cFailure, fSelect, sSine, FracSig, FracModeD, FracSelect] = cpdAssignUStarTh20100901(Stats, fPlot, cSiteYr, varargin) + + % Initialize Variables + CpA = []; nA = []; tW = []; CpW = []; fSelect = []; cMode = ''; cFailure = ''; sSine = []; + FracSig = []; FracModeD = []; FracSelect = []; + + % Decode JSON if Needed + for i = 1:length(varargin) + a = varargin{i}; + if iscell(a) && strcmp(a{1}, 'jsondecode') + for j = 2:length(a) + if a{j} == 1 + Stats = jsondecode(Stats); + end + end + end + end + + % Determine Window Sizes + nDim = ndims(Stats); + if nDim == 2 + [nWindows, nBoot] = size(Stats); + nStrata = 1; nStrataN = 0.5; + elseif nDim == 3 + [nWindows, nStrata, nBoot] = size(Stats); + nStrataN = 1; + else + cFailure = 'Stats must be 2D or 3D.'; + return; + end + + % Set Reference Values + nWindowsN = 4; + nSelectN = nWindowsN * nStrataN * nBoot; + + % Preallocate Outputs + CpA = NaN(nBoot, 1); + nA = NaN(nBoot, 1); + tW = NaN(nWindows, 1); + CpW = NaN(nWindows, 1); + + % Extract Variables from Stats Structure + cVars = {'mt', 'Cp', 'b1', 'c2', 'cib1', 'cic2', 'p'}; + nVars = length(cVars); + + for i = 1:nVars + cv = cVars{i}; + eval([cv ' = fcReadFields(Stats, ''' cv ''');']); + + if strcmp(cv, 'mt') + xmt = mt; + elseif strcmp(cv, 'Cp') + xCp = Cp; + end + + eval([cv ' = fcx2colvec(' cv ');']); + end + + % Identify Significant Change Points + pSig = 0.05; + fP = p <= pSig; + + % Identify Model Type + if sum(~isnan(c2)) == 0 + nPar = 2; + c2 = zeros(size(b1)); + cic2 = zeros(size(b1)); + else + nPar = 3; + end + + % Classify Significant Change Points + iTry = find(~isnan(mt)); + nTry = length(iTry); + + %iCp = find(~isnan(b1 + c2 + Cp)); + %nCp = length(iCp); + + %iNS = find(fP == 0 & ~isnan(b1 + c2 + Cp)); + %nNS = length(iNS); + + iSig = find(fP == 1 & ~isnan(b1 + c2 + Cp)); + nSig = length(iSig); + + iModeE = find(fP == 1 & b1 < c2); + nModeE = length(iModeE); + + iModeD = find(fP == 1 & b1 >= c2); + nModeD = length(iModeD); + + if nModeD >= nModeE + iSelect = iModeD; + cMode = 'D'; + else + iSelect = iModeE; + cMode = 'E'; + end + + nSelect = length(iSelect); + + % Update Selection Flags + fSelect = false(size(fP)); + fSelect(iSelect) = true; + + fModeD = NaN(size(fP)); + fModeD(iModeD) = 1; + + fModeE = NaN(size(fP)); + fModeE(iModeE) = 1; + + + FracSig = nSig / nTry; + FracModeD = nModeD / nSig; + FracSelect = nSelect / nTry; + + % Abort if Too Few Selections + if FracSelect < 0.10 + cFailure = 'Less than 10% successful detections.'; + return; + end + + % Configure Regression Matrix + if nPar == 2 + x = [Cp, b1, cib1]; + %nx = 3; + else + x = [Cp, b1, c2, cib1, cic2]; + %nx = 5; + end + + % Exclude Outliers + xNormX = computeStandardizedScores(x); + + % Identify Outliers + [fOut, iOut] = identifyOutliers(xNormX, 5); + + % Update Selected Indices + [iSelect, nSelect, fSelect] = updateSelectedIndices(iSelect, iOut, fSelect, fOut); + + % Update Modes + [iModeD, nModeD] = updateModes(fModeD,iOut); + [iModeE, ~] = updateModes(fModeE,iOut); + + % Recalculate Significant Indices + iSig = union(iModeD, iModeE); + nSig = length(iSig); + + FracSig = nSig / nTry; + FracModeD = nModeD / nSig; + FracSelect = nSelect / nTry; + + if nSelect < nSelectN + cFailure = sprintf('Too few selected change points: %g/%g', nSelect, nSelectN); + return; + end + + % Aggregate Seasonal and Annual Values + [CpA, nA, ~] = aggregateSeasonalAndAnnualValues(xCp, iSelect, nDim, nWindows, nStrata, nBoot); + + + % Aggregate Seasonal Means + [tW, CpW] = aggregateSeasonalMeans(mt, Cp, xmt, iSelect, nWindows, nStrata, nBoot); + + % Fit Annual Sine Curve + sSine = fitAnnualSineCurve(mt, Cp, iSelect); + +end + +function sSine = fitAnnualSineCurve(mt, Cp, iSelect) + % fitAnnualSineCurve + % Fits an annual sine curve to the selected data points and returns + % the sine coefficients and r-squared value. + + % Initial Guess for Sine Coefficients + bSine = [1, 1, 1]; + + % Fit Sine Curve Using Nonlinear Regression + bSine = nlinfit(mt(iSelect), Cp(iSelect), 'fcEqnAnnualSine', bSine); + + % Calculate Predicted Values and R-Squared + predictedCp = fcEqnAnnualSine(bSine, mt(iSelect)); + r2 = fcr2Calc(Cp(iSelect), predictedCp); + + % Adjust Phase and Wrap to One Year + bSine(3) = mod(bSine(3), 365.25); + + % Return Fitted Sine Coefficients and R-Squared + sSine = [bSine, r2]; + + end + +function [tW, CpW] = aggregateSeasonalMeans(mt, Cp, xmt, iSelect, nWindows, nStrata, nBoot) + % aggregateSeasonalMeans + % Aggregates seasonal means for time and change points. + + % Calculate Median Number of Windows + nW = nanmedian(sum(~isnan(reshape(xmt, nWindows, nStrata * nBoot)))); + + % Sort Selected Measurements + [mtSelect, i] = sort(mt(iSelect)); + CpSelect = Cp(iSelect(i)); + + % Define Bins Based on Percentiles + xBins = prctile(mtSelect, 0:(100/nW):100); + + % Aggregate Seasonal Means Using fcBin + [~, tW, CpW] = fcBin(mtSelect, CpSelect, xBins, 0); + + end + +function [CpA, nA, xCpSelect] = aggregateSeasonalAndAnnualValues(xCp, iSelect, nDim, nWindows, nStrata, nBoot) + % aggregateSeasonalAndAnnualValues + % Aggregates seasonal and annual change point values based on selection. + + % Initialize Selection Array + xCpSelect = NaN(size(xCp)); + + % Assign Selected Change Points + xCpSelect(iSelect) = xCp(iSelect); + xCpGF = xCpSelect; + + % Aggregate Values Based on Dimensions + if nDim == 2 + CpA = nanmean(xCpGF); + nA = sum(~isnan(xCpSelect)); + elseif nDim == 3 + CpA = nanmean(reshape(xCpGF, nWindows * nStrata, nBoot)); + nA = sum(~isnan(reshape(xCpSelect, nWindows * nStrata, nBoot))); + else + error('Invalid number of dimensions: Expected 2D or 3D Stats.'); + end + + end + +function xNormX = computeStandardizedScores(x) + % computeStandardizedScores + % Standardizes the matrix x by its median and interquartile range. + + mx = nanmedian(x); + sx = fcNaniqr(x); + xNorm = (x - mx) ./ sx; + xNormX = max(abs(xNorm), [], 2); + + end + +function [fOut, iOut] = identifyOutliers(xNormX, threshold) + % identifyOutliers + % Identifies outliers based on standardized scores. + + fOut = xNormX > threshold; + iOut = find(fOut); + + end + +function [iSelect, nSelect, fSelect] = updateSelectedIndices(iSelect, iOut, fSelect, fOut) + % updateSelectedIndices + % Updates selected indices after removing outliers. + + iSelect = setdiff(iSelect, iOut); + nSelect = length(iSelect); + fSelect = fSelect & ~fOut; + + end + +function [iModeX, nModeX] = updateModes(fModeX, iOut) + % updateModes + % Recalculates mode indices after removing outliers. + + fModeX(iOut) = NaN; + iModeX = find(fModeX == 1); + nModeX = length(iModeX); + + end + + + + + + diff --git a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py index c4e42ed6..ef01ac52 100644 --- a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py +++ b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py @@ -1,5 +1,4 @@ import pytest -import json import matlab.engine import numpy as np @@ -92,3 +91,4 @@ def set_attr(struct_array, key, val): # Assertions for edge cases assert len(results_all_sig[0]) > 0, "Should produce results for all significant change points" assert len(results_no_sig[5]) > 0, "Should produce a failure message for no significant change points" + From d24139fd4c0b0fb1f46f4e027061fd9eb485b021 Mon Sep 17 00:00:00 2001 From: James Emberton Date: Mon, 9 Dec 2024 19:40:56 +0000 Subject: [PATCH 02/27] added meaningful names --- .../cpdAssignUStarTh20100901.m | 300 +++++++++--------- 1 file changed, 148 insertions(+), 152 deletions(-) diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m index b03d29a5..8a439139 100644 --- a/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m +++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m @@ -1,171 +1,167 @@ -function [CpA, nA, tW, CpW, cMode, cFailure, fSelect, sSine, FracSig, FracModeD, FracSelect] = cpdAssignUStarTh20100901(Stats, fPlot, cSiteYr, varargin) +function [annualChangePoint, numAnnualSelected, seasonalTimeWindow, seasonalChangePoint, ... + dominantMode, failureMessage, selectedPointsFlag, sineCurve, ... + fractionSignificant, fractionModeD, fractionSelected] = cpdAssignUStarTh20100901(Stats, plotFlag, siteYearText, varargin) - % Initialize Variables - CpA = []; nA = []; tW = []; CpW = []; fSelect = []; cMode = ''; cFailure = ''; sSine = []; - FracSig = []; FracModeD = []; FracSelect = []; - - % Decode JSON if Needed - for i = 1:length(varargin) - a = varargin{i}; - if iscell(a) && strcmp(a{1}, 'jsondecode') - for j = 2:length(a) - if a{j} == 1 - Stats = jsondecode(Stats); - end +% Initialize Variables +annualChangePoint = []; numAnnualSelected = []; seasonalTimeWindow = []; seasonalChangePoint = []; +selectedPointsFlag = []; dominantMode = ''; failureMessage = ''; sineCurve = []; +fractionSignificant = []; fractionModeD = []; fractionSelected = []; + +% Decode JSON if Needed +for i = 1:length(varargin) + arg = varargin{i}; + if iscell(arg) && strcmp(arg{1}, 'jsondecode') + for j = 2:length(arg) + if arg{j} == 1 + Stats = jsondecode(Stats); end end end - - % Determine Window Sizes - nDim = ndims(Stats); - if nDim == 2 - [nWindows, nBoot] = size(Stats); - nStrata = 1; nStrataN = 0.5; - elseif nDim == 3 - [nWindows, nStrata, nBoot] = size(Stats); - nStrataN = 1; - else - cFailure = 'Stats must be 2D or 3D.'; - return; - end - - % Set Reference Values - nWindowsN = 4; - nSelectN = nWindowsN * nStrataN * nBoot; - - % Preallocate Outputs - CpA = NaN(nBoot, 1); - nA = NaN(nBoot, 1); - tW = NaN(nWindows, 1); - CpW = NaN(nWindows, 1); - - % Extract Variables from Stats Structure - cVars = {'mt', 'Cp', 'b1', 'c2', 'cib1', 'cic2', 'p'}; - nVars = length(cVars); - - for i = 1:nVars - cv = cVars{i}; - eval([cv ' = fcReadFields(Stats, ''' cv ''');']); - - if strcmp(cv, 'mt') - xmt = mt; - elseif strcmp(cv, 'Cp') - xCp = Cp; - end - - eval([cv ' = fcx2colvec(' cv ');']); - end - - % Identify Significant Change Points - pSig = 0.05; - fP = p <= pSig; - - % Identify Model Type - if sum(~isnan(c2)) == 0 - nPar = 2; - c2 = zeros(size(b1)); - cic2 = zeros(size(b1)); - else - nPar = 3; - end - - % Classify Significant Change Points - iTry = find(~isnan(mt)); - nTry = length(iTry); - - %iCp = find(~isnan(b1 + c2 + Cp)); - %nCp = length(iCp); - - %iNS = find(fP == 0 & ~isnan(b1 + c2 + Cp)); - %nNS = length(iNS); - - iSig = find(fP == 1 & ~isnan(b1 + c2 + Cp)); - nSig = length(iSig); - - iModeE = find(fP == 1 & b1 < c2); - nModeE = length(iModeE); - - iModeD = find(fP == 1 & b1 >= c2); - nModeD = length(iModeD); - - if nModeD >= nModeE - iSelect = iModeD; - cMode = 'D'; - else - iSelect = iModeE; - cMode = 'E'; - end - - nSelect = length(iSelect); - - % Update Selection Flags - fSelect = false(size(fP)); - fSelect(iSelect) = true; - - fModeD = NaN(size(fP)); - fModeD(iModeD) = 1; - - fModeE = NaN(size(fP)); - fModeE(iModeE) = 1; +end - - FracSig = nSig / nTry; - FracModeD = nModeD / nSig; - FracSelect = nSelect / nTry; - - % Abort if Too Few Selections - if FracSelect < 0.10 - cFailure = 'Less than 10% successful detections.'; - return; - end - - % Configure Regression Matrix - if nPar == 2 - x = [Cp, b1, cib1]; - %nx = 3; - else - x = [Cp, b1, c2, cib1, cic2]; - %nx = 5; - end +% Determine Window Sizes +numDimensions = ndims(Stats); +if numDimensions == 2 + [numWindows, numBootstraps] = size(Stats); + numTemperatureStrata = 1; temperatureStrataFactor = 0.5; +elseif numDimensions == 3 + [numWindows, numTemperatureStrata, numBootstraps] = size(Stats); + temperatureStrataFactor = 1; +else + failureMessage = 'Stats must be 2D or 3D.'; + return; +end - % Exclude Outliers - xNormX = computeStandardizedScores(x); - - % Identify Outliers - [fOut, iOut] = identifyOutliers(xNormX, 5); +% Set Reference Values +referenceWindows = 4; +requiredSelectionCount = referenceWindows * temperatureStrataFactor * numBootstraps; - % Update Selected Indices - [iSelect, nSelect, fSelect] = updateSelectedIndices(iSelect, iOut, fSelect, fOut); - - % Update Modes - [iModeD, nModeD] = updateModes(fModeD,iOut); - [iModeE, ~] = updateModes(fModeE,iOut); +% Preallocate Outputs +annualChangePoint = NaN(numBootstraps, 1); +numAnnualSelected = NaN(numBootstraps, 1); +seasonalTimeWindow = NaN(numWindows, 1); +seasonalChangePoint = NaN(numWindows, 1); - % Recalculate Significant Indices - iSig = union(iModeD, iModeE); - nSig = length(iSig); +% Extract Variables from Stats Structure +variableNames = {'mt', 'Cp', 'b1', 'c2', 'cib1', 'cic2', 'p'}; +numVariables = length(variableNames); - FracSig = nSig / nTry; - FracModeD = nModeD / nSig; - FracSelect = nSelect / nTry; +for i = 1:numVariables + variableName = variableNames{i}; + eval([variableName ' = fcReadFields(Stats, ''' variableName ''');']); - if nSelect < nSelectN - cFailure = sprintf('Too few selected change points: %g/%g', nSelect, nSelectN); - return; + if strcmp(variableName, 'mt') + measurementTime = mt; + elseif strcmp(variableName, 'Cp') + changePoint = Cp; end - % Aggregate Seasonal and Annual Values - [CpA, nA, ~] = aggregateSeasonalAndAnnualValues(xCp, iSelect, nDim, nWindows, nStrata, nBoot); + eval([variableName ' = fcx2colvec(' variableName ');']); +end - - % Aggregate Seasonal Means - [tW, CpW] = aggregateSeasonalMeans(mt, Cp, xmt, iSelect, nWindows, nStrata, nBoot); - - % Fit Annual Sine Curve - sSine = fitAnnualSineCurve(mt, Cp, iSelect); +% Identify Significant Change Points +significanceThreshold = 0.05; +significantFlag = p <= significanceThreshold; +% Identify Model Type +if sum(~isnan(c2)) == 0 + numParameters = 2; + c2 = zeros(size(b1)); + cic2 = zeros(size(b1)); +else + numParameters = 3; end +% Classify Significant Change Points +validMeasurementIndices = find(~isnan(mt)); +numValidMeasurements = length(validMeasurementIndices); + +nonSignificantIndices = find(significantFlag == 0 & ~isnan(b1 + c2 + Cp)); +numNonSignificant = length(nonSignificantIndices); + +significantIndices = find(significantFlag == 1 & ~isnan(b1 + c2 + Cp)); +numSignificant = length(significantIndices); + +modeEIndices = find(significantFlag == 1 & b1 < c2); +numModeE = length(modeEIndices); + +modeDIndices = find(significantFlag == 1 & b1 >= c2); +numModeD = length(modeDIndices); + +% Select Dominant Mode +if numModeD >= numModeE + selectedIndices = modeDIndices; + dominantMode = 'D'; +else + selectedIndices = modeEIndices; + dominantMode = 'E'; +end + +numSelected = length(selectedIndices); + +% Update Selection Flags +selectedPointsFlag = false(size(significantFlag)); +selectedPointsFlag(selectedIndices) = true; + +modeDFlag = NaN(size(significantFlag)); +modeDFlag(modeDIndices) = 1; + +modeEFlag = NaN(size(significantFlag)); +modeEFlag(modeEIndices) = 1; + +fractionSignificant = numSignificant / numValidMeasurements; +fractionModeD = numModeD / numSignificant; +fractionSelected = numSelected / numValidMeasurements; + +% Abort if Too Few Selections +if fractionSelected < 0.10 + failureMessage = 'Less than 10% successful detections.'; + return; +end + +% Configure Regression Matrix +if numParameters == 2 + regressionMatrix = [Cp, b1, cib1]; +else + regressionMatrix = [Cp, b1, c2, cib1, cic2]; +end + +% Exclude Outliers +standardizedScores = computeStandardizedScores(regressionMatrix); +[outlierFlag, outlierIndices] = identifyOutliers(standardizedScores, 5); + +[selectedIndices, numSelected, selectedPointsFlag] = ... + updateSelectedIndices(selectedIndices, outlierIndices, selectedPointsFlag, outlierFlag); + +[modeDIndices, numModeD] = updateModes(modeDFlag, outlierIndices); +[modeEIndices, ~] = updateModes(modeEFlag, outlierIndices); + +significantIndices = union(modeDIndices, modeEIndices); +numSignificant = length(significantIndices); + +fractionSignificant = numSignificant / numValidMeasurements; +fractionModeD = numModeD / numSignificant; +fractionSelected = numSelected / numValidMeasurements; + +if numSelected < requiredSelectionCount + failureMessage = sprintf('Too few selected change points: %g/%g', numSelected, requiredSelectionCount); + return; +end + +% Aggregate Seasonal and Annual Values +[annualChangePoint, numAnnualSelected, ~] = ... + aggregateSeasonalAndAnnualValues(changePoint, selectedIndices, numDimensions, numWindows, numTemperatureStrata, numBootstraps); + +% Aggregate Seasonal Means +[seasonalTimeWindow, seasonalChangePoint] = ... + aggregateSeasonalMeans(mt, Cp, measurementTime, selectedIndices, numWindows, numTemperatureStrata, numBootstraps); + +% Fit Annual Sine Curve +sineCurve = fitAnnualSineCurve(mt, Cp, selectedIndices); + +end + + function sSine = fitAnnualSineCurve(mt, Cp, iSelect) % fitAnnualSineCurve % Fits an annual sine curve to the selected data points and returns From 61d40bb92e2290016511b72a8c3fc689d7ba1a23 Mon Sep 17 00:00:00 2001 From: James Emberton Date: Mon, 9 Dec 2024 19:56:40 +0000 Subject: [PATCH 03/27] tests --- tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py index ef01ac52..be129ab9 100644 --- a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py +++ b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py @@ -91,4 +91,3 @@ def set_attr(struct_array, key, val): # Assertions for edge cases assert len(results_all_sig[0]) > 0, "Should produce results for all significant change points" assert len(results_no_sig[5]) > 0, "Should produce a failure message for no significant change points" - From 5be214670bb7a8512cc0476baa0ad93cb82f6e11 Mon Sep 17 00:00:00 2001 From: James Emberton Date: Fri, 20 Dec 2024 12:02:11 +0000 Subject: [PATCH 04/27] storage --- .../cpdAssign_functions/fitAnnualSineCurve.m | 22 ++++++ .../test_ustar_cp/test_fitAnnualSine.py | 75 +++++++++++++++++++ 2 files changed, 97 insertions(+) create mode 100644 oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/fitAnnualSineCurve.m create mode 100644 tests/unit_tests/test_ustar_cp/test_fitAnnualSine.py diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/fitAnnualSineCurve.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/fitAnnualSineCurve.m new file mode 100644 index 00000000..fb7b58b3 --- /dev/null +++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/fitAnnualSineCurve.m @@ -0,0 +1,22 @@ +function sSine = fitAnnualSineCurve(mt, Cp, iSelect) + % fitAnnualSineCurve + % Fits an annual sine curve to the selected data points and returns + % the sine coefficients and r-squared value. + + % Initial Guess for Sine Coefficients + bSine = [1, 1, 1]; + + % Fit Sine Curve Using Nonlinear Regression + bSine = nlinfit(mt(iSelect), Cp(iSelect), 'fcEqnAnnualSine', bSine); + + % Calculate Predicted Values and R-Squared + predictedCp = fcEqnAnnualSine(bSine, mt(iSelect)); + r2 = fcr2Calc(Cp(iSelect), predictedCp); + + % Adjust Phase and Wrap to One Year + bSine(3) = mod(bSine(3), 365.25); + + % Return Fitted Sine Coefficients and R-Squared + sSine = [bSine, r2]; + + end \ No newline at end of file diff --git a/tests/unit_tests/test_ustar_cp/test_fitAnnualSine.py b/tests/unit_tests/test_ustar_cp/test_fitAnnualSine.py new file mode 100644 index 00000000..49d806e1 --- /dev/null +++ b/tests/unit_tests/test_ustar_cp/test_fitAnnualSine.py @@ -0,0 +1,75 @@ +import numpy as np +import pytest + +@pytest.fixture +def synthetic_data(): + # Create synthetic annual sine data + # True parameters: + true_amplitude = 2.0 + true_offset = 10.0 + true_phase = 50.0 + days = np.linspace(0, 365, 100) + Cp = true_amplitude * np.sin(2 * np.pi * (days - true_phase)/365.25) + true_offset + # Add some noise + noise = np.random.normal(0, 0.1, size=len(days)) + Cp_noisy = Cp + noise + + # We'll select all data points + iSelect = np.arange(len(days)) + iSelect = iSelect[iSelect != 0] # Cant index an array with zero in Matlab + + return days, Cp_noisy, iSelect, ( true_offset, true_amplitude, true_phase) + +def test_fit_output_shape_and_type(test_engine, synthetic_data): + days, Cp_noisy, iSelect, true_sine = synthetic_data + + days = test_engine.convert(days) + Cp_noisy = test_engine.convert(Cp_noisy) + iSelect = test_engine.convert(iSelect) + + result = test_engine.fitAnnualSineCurve(days, Cp_noisy, iSelect) + # Expecting [amplitude, offset, phase, r2] + assert test_engine.equal(len(test_engine.convert(result[0])), 4) + for val in result[0]: + assert isinstance(val, float) + assert np.allclose(result[0][0], true_sine[0], rtol = 0.001) # test amplitiude accuracy + assert np.allclose(result[0][1], true_sine[1], rtol = 0.1) # test offset accuracy + assert np.allclose(result[0][2], true_sine[2], rtol = 0.01) # test phase accuracy + +def test_fit_on_synthetic_data(test_engine, synthetic_data): + days, Cp_noisy, iSelect, (true_amp, true_off, true_ph) = synthetic_data + + result = test_engine.fitAnnualSineCurve(test_engine.convert(days), + test_engine.convert(Cp_noisy), + test_engine.convert(iSelect)) + + fitted_amp, fitted_off, fitted_ph, fitted_r2 = test_engine.convert(result) + + # Check that fitted parameters are close to the true parameters. + # Allow some tolerance due to noise. + assert np.isclose(fitted_amp, true_amp, rtol=0.2) + assert np.isclose(fitted_off, true_off, rtol=0.2) + + # Phase can wrap around, so it's trickier. We can check a mod difference: + phase_diff = (fitted_ph - true_ph) % 365.25 + if phase_diff > 182.625: + phase_diff = 365.25 - phase_diff + assert phase_diff < 30 # Somewhat lenient + + # R-squared should be reasonably high for low-noise data + assert fitted_r2 > 0.8 + +def test_constant_data(test_engine): + # If the data is constant, the fit might fail or result in amplitude ~ 0 + days = test_engine.convert(np.linspace(0, 365, 100)) + Cp = test_engine.convert(np.ones_like(days) * 5.0) + iSelect = test_engine.convert(np.arange(len(days))) + + result = test_engine.fitAnnualSineCurve(days, Cp, iSelect) + + fitted_amp, fitted_off, fitted_ph, fitted_r2 = result + # For constant data, amplitude should be near zero, offset near 5, and R^2 near 1. + assert abs(fitted_amp) < 0.5 + assert abs(fitted_off - 5.0) < 0.5 + # Phase doesn't matter much here, R^2 should be very high + assert fitted_r2 > 0.9 From 38bf63bfc3ae2dfd13ff4c2f21f8c9bd22cd6835 Mon Sep 17 00:00:00 2001 From: James Emberton Date: Fri, 20 Dec 2024 12:54:09 +0000 Subject: [PATCH 05/27] etsts working --- .../cpdAssignUStarTh20100901.m | 24 -------------- .../test_ustar_cp/test_fitAnnualSine.py | 32 ++++++++++++------- 2 files changed, 20 insertions(+), 36 deletions(-) diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m index 8a439139..2f162bf1 100644 --- a/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m +++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m @@ -161,30 +161,6 @@ end - -function sSine = fitAnnualSineCurve(mt, Cp, iSelect) - % fitAnnualSineCurve - % Fits an annual sine curve to the selected data points and returns - % the sine coefficients and r-squared value. - - % Initial Guess for Sine Coefficients - bSine = [1, 1, 1]; - - % Fit Sine Curve Using Nonlinear Regression - bSine = nlinfit(mt(iSelect), Cp(iSelect), 'fcEqnAnnualSine', bSine); - - % Calculate Predicted Values and R-Squared - predictedCp = fcEqnAnnualSine(bSine, mt(iSelect)); - r2 = fcr2Calc(Cp(iSelect), predictedCp); - - % Adjust Phase and Wrap to One Year - bSine(3) = mod(bSine(3), 365.25); - - % Return Fitted Sine Coefficients and R-Squared - sSine = [bSine, r2]; - - end - function [tW, CpW] = aggregateSeasonalMeans(mt, Cp, xmt, iSelect, nWindows, nStrata, nBoot) % aggregateSeasonalMeans % Aggregates seasonal means for time and change points. diff --git a/tests/unit_tests/test_ustar_cp/test_fitAnnualSine.py b/tests/unit_tests/test_ustar_cp/test_fitAnnualSine.py index 49d806e1..e7609c18 100644 --- a/tests/unit_tests/test_ustar_cp/test_fitAnnualSine.py +++ b/tests/unit_tests/test_ustar_cp/test_fitAnnualSine.py @@ -32,19 +32,22 @@ def test_fit_output_shape_and_type(test_engine, synthetic_data): assert test_engine.equal(len(test_engine.convert(result[0])), 4) for val in result[0]: assert isinstance(val, float) - assert np.allclose(result[0][0], true_sine[0], rtol = 0.001) # test amplitiude accuracy - assert np.allclose(result[0][1], true_sine[1], rtol = 0.1) # test offset accuracy - assert np.allclose(result[0][2], true_sine[2], rtol = 0.01) # test phase accuracy + assert np.allclose(result[0][0], true_sine[0], rtol = 0.1) # test offset accuracy + assert np.allclose(result[0][1], true_sine[1], rtol = 0.1) # test amplitude accuracy + assert np.allclose(result[0][2], true_sine[2], rtol = 0.1) # test phase accuracy def test_fit_on_synthetic_data(test_engine, synthetic_data): - days, Cp_noisy, iSelect, (true_amp, true_off, true_ph) = synthetic_data + days, Cp_noisy, iSelect, (true_off, true_amp, true_ph) = synthetic_data result = test_engine.fitAnnualSineCurve(test_engine.convert(days), test_engine.convert(Cp_noisy), test_engine.convert(iSelect)) - fitted_amp, fitted_off, fitted_ph, fitted_r2 = test_engine.convert(result) - + result = test_engine.convert(result) + fitted_amp = result[0][1] + fitted_off = result[0][0] + fitted_ph = result[0][2] + fitted_r2 = result[0][3] # Check that fitted parameters are close to the true parameters. # Allow some tolerance due to noise. assert np.isclose(fitted_amp, true_amp, rtol=0.2) @@ -61,15 +64,20 @@ def test_fit_on_synthetic_data(test_engine, synthetic_data): def test_constant_data(test_engine): # If the data is constant, the fit might fail or result in amplitude ~ 0 - days = test_engine.convert(np.linspace(0, 365, 100)) - Cp = test_engine.convert(np.ones_like(days) * 5.0) - iSelect = test_engine.convert(np.arange(len(days))) + days_array = np.linspace(0, 365, 100) + days = test_engine.convert(days_array) + Cp = test_engine.convert(np.ones_like(days_array) * 5.0) + iSelect = np.arange(len(days_array)) + iSelect = iSelect[iSelect != 0] # Cant index an array with zero in Matlab + iSelect = test_engine.convert(iSelect) result = test_engine.fitAnnualSineCurve(days, Cp, iSelect) + result = test_engine.convert(result) + fitted_amp = result[0][1] + fitted_off = result[0][0] + fitted_r2 = result[0][3] - fitted_amp, fitted_off, fitted_ph, fitted_r2 = result - # For constant data, amplitude should be near zero, offset near 5, and R^2 near 1. - assert abs(fitted_amp) < 0.5 + assert abs(fitted_amp) < 0.5 assert abs(fitted_off - 5.0) < 0.5 # Phase doesn't matter much here, R^2 should be very high assert fitted_r2 > 0.9 From 74aa307a3533b17cc62265812d230730519c1964 Mon Sep 17 00:00:00 2001 From: James Emberton Date: Wed, 29 Jan 2025 08:59:31 +0000 Subject: [PATCH 06/27] breakout subfunctions --- .../cpdAssignUStarTh20100901.m | 83 ------------------- .../aggregateSeasonalAndAnnualValues.m | 23 +++++ .../aggregateSeasonalMeans.m | 19 +++++ .../computeStandardizedScores.m | 10 +++ .../cpdAssign_functions/identifyOutliers.m | 8 ++ .../cpdAssign_functions/updateModes.m | 10 +++ .../updateSelectedIndices.m | 9 ++ 7 files changed, 79 insertions(+), 83 deletions(-) create mode 100644 oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/aggregateSeasonalAndAnnualValues.m create mode 100644 oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/aggregateSeasonalMeans.m create mode 100644 oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/computeStandardizedScores.m create mode 100644 oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/identifyOutliers.m create mode 100644 oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/updateModes.m create mode 100644 oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/updateSelectedIndices.m diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m index 2f162bf1..dacde7d9 100644 --- a/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m +++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m @@ -160,89 +160,6 @@ sineCurve = fitAnnualSineCurve(mt, Cp, selectedIndices); end - -function [tW, CpW] = aggregateSeasonalMeans(mt, Cp, xmt, iSelect, nWindows, nStrata, nBoot) - % aggregateSeasonalMeans - % Aggregates seasonal means for time and change points. - - % Calculate Median Number of Windows - nW = nanmedian(sum(~isnan(reshape(xmt, nWindows, nStrata * nBoot)))); - - % Sort Selected Measurements - [mtSelect, i] = sort(mt(iSelect)); - CpSelect = Cp(iSelect(i)); - - % Define Bins Based on Percentiles - xBins = prctile(mtSelect, 0:(100/nW):100); - - % Aggregate Seasonal Means Using fcBin - [~, tW, CpW] = fcBin(mtSelect, CpSelect, xBins, 0); - - end - -function [CpA, nA, xCpSelect] = aggregateSeasonalAndAnnualValues(xCp, iSelect, nDim, nWindows, nStrata, nBoot) - % aggregateSeasonalAndAnnualValues - % Aggregates seasonal and annual change point values based on selection. - - % Initialize Selection Array - xCpSelect = NaN(size(xCp)); - - % Assign Selected Change Points - xCpSelect(iSelect) = xCp(iSelect); - xCpGF = xCpSelect; - - % Aggregate Values Based on Dimensions - if nDim == 2 - CpA = nanmean(xCpGF); - nA = sum(~isnan(xCpSelect)); - elseif nDim == 3 - CpA = nanmean(reshape(xCpGF, nWindows * nStrata, nBoot)); - nA = sum(~isnan(reshape(xCpSelect, nWindows * nStrata, nBoot))); - else - error('Invalid number of dimensions: Expected 2D or 3D Stats.'); - end - - end - -function xNormX = computeStandardizedScores(x) - % computeStandardizedScores - % Standardizes the matrix x by its median and interquartile range. - - mx = nanmedian(x); - sx = fcNaniqr(x); - xNorm = (x - mx) ./ sx; - xNormX = max(abs(xNorm), [], 2); - - end - -function [fOut, iOut] = identifyOutliers(xNormX, threshold) - % identifyOutliers - % Identifies outliers based on standardized scores. - - fOut = xNormX > threshold; - iOut = find(fOut); - - end - -function [iSelect, nSelect, fSelect] = updateSelectedIndices(iSelect, iOut, fSelect, fOut) - % updateSelectedIndices - % Updates selected indices after removing outliers. - - iSelect = setdiff(iSelect, iOut); - nSelect = length(iSelect); - fSelect = fSelect & ~fOut; - - end - -function [iModeX, nModeX] = updateModes(fModeX, iOut) - % updateModes - % Recalculates mode indices after removing outliers. - - fModeX(iOut) = NaN; - iModeX = find(fModeX == 1); - nModeX = length(iModeX); - - end diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/aggregateSeasonalAndAnnualValues.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/aggregateSeasonalAndAnnualValues.m new file mode 100644 index 00000000..8f03854b --- /dev/null +++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/aggregateSeasonalAndAnnualValues.m @@ -0,0 +1,23 @@ +function [CpA, nA, xCpSelect] = aggregateSeasonalAndAnnualValues(xCp, iSelect, nDim, nWindows, nStrata, nBoot) + % aggregateSeasonalAndAnnualValues + % Aggregates seasonal and annual change point values based on selection. + + % Initialize Selection Array + xCpSelect = NaN(size(xCp)); + + % Assign Selected Change Points + xCpSelect(iSelect) = xCp(iSelect); + xCpGF = xCpSelect; + + % Aggregate Values Based on Dimensions + if nDim == 2 + CpA = nanmean(xCpGF); + nA = sum(~isnan(xCpSelect)); + elseif nDim == 3 + CpA = nanmean(reshape(xCpGF, nWindows * nStrata, nBoot)); + nA = sum(~isnan(reshape(xCpSelect, nWindows * nStrata, nBoot))); + else + error('Invalid number of dimensions: Expected 2D or 3D Stats.'); + end + + end \ No newline at end of file diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/aggregateSeasonalMeans.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/aggregateSeasonalMeans.m new file mode 100644 index 00000000..d9cfdd36 --- /dev/null +++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/aggregateSeasonalMeans.m @@ -0,0 +1,19 @@ +function [tW, CpW] = aggregateSeasonalMeans(mt, Cp, xmt, iSelect, nWindows, nStrata, nBoot) + % aggregateSeasonalMeans + % Aggregates seasonal means for time and change points. + + % Calculate Median Number of Windows + nW = nanmedian(sum(~isnan(reshape(xmt, nWindows, nStrata * nBoot)))); + + % Sort Selected Measurements + [mtSelect, i] = sort(mt(iSelect)); + CpSelect = Cp(iSelect(i)); + + % Define Bins Based on Percentiles + xBins = prctile(mtSelect, 0:(100/nW):100); + + % Aggregate Seasonal Means Using fcBin + [~, tW, CpW] = fcBin(mtSelect, CpSelect, xBins, 0); + + end + \ No newline at end of file diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/computeStandardizedScores.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/computeStandardizedScores.m new file mode 100644 index 00000000..8a9ce012 --- /dev/null +++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/computeStandardizedScores.m @@ -0,0 +1,10 @@ +function xNormX = computeStandardizedScores(x) + % computeStandardizedScores + % Standardizes the matrix x by its median and interquartile range. + + mx = nanmedian(x); + sx = fcNaniqr(x); + xNorm = (x - mx) ./ sx; + xNormX = max(abs(xNorm), [], 2); + + end \ No newline at end of file diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/identifyOutliers.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/identifyOutliers.m new file mode 100644 index 00000000..8422742e --- /dev/null +++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/identifyOutliers.m @@ -0,0 +1,8 @@ +function [fOut, iOut] = identifyOutliers(xNormX, threshold) + % identifyOutliers + % Identifies outliers based on standardized scores. + + fOut = xNormX > threshold; + iOut = find(fOut); + + end \ No newline at end of file diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/updateModes.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/updateModes.m new file mode 100644 index 00000000..31fda4b9 --- /dev/null +++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/updateModes.m @@ -0,0 +1,10 @@ +function [iModeX, nModeX] = updateModes(fModeX, iOut) + % updateModes + % Recalculates mode indices after removing outliers. + + fModeX(iOut) = NaN; + iModeX = find(fModeX == 1); + nModeX = length(iModeX); + + end + \ No newline at end of file diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/updateSelectedIndices.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/updateSelectedIndices.m new file mode 100644 index 00000000..7ea3c55e --- /dev/null +++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/updateSelectedIndices.m @@ -0,0 +1,9 @@ +function [iSelect, nSelect, fSelect] = updateSelectedIndices(iSelect, iOut, fSelect, fOut) + % updateSelectedIndices + % Updates selected indices after removing outliers. + + iSelect = setdiff(iSelect, iOut); + nSelect = length(iSelect); + fSelect = fSelect & ~fOut; + + end \ No newline at end of file From 59b5590730c5dd2197cd72d566312d0850314219 Mon Sep 17 00:00:00 2001 From: James Emberton Date: Fri, 31 Jan 2025 16:03:27 +0000 Subject: [PATCH 07/27] initial tests --- oneflux_steps/ustar_cp_python/__init__.py | 1 + .../ustar_cp_python/cpdAssignUStarTh.py | 17 +++++++ oneflux_steps/ustar_cp_python/test.py | 46 +++++++++++++++++++ tests/conftest.py | 2 + .../test_cpdAssignUStarTh20100901.py | 21 ++++++++- 5 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py create mode 100644 oneflux_steps/ustar_cp_python/test.py diff --git a/oneflux_steps/ustar_cp_python/__init__.py b/oneflux_steps/ustar_cp_python/__init__.py index de8c9aff..cd08b737 100644 --- a/oneflux_steps/ustar_cp_python/__init__.py +++ b/oneflux_steps/ustar_cp_python/__init__.py @@ -10,6 +10,7 @@ from .cpdFmax2pCore import interpolate_FmaxCritical, calculate_p_low, calculate_p_interpolate from .fcDatenum import datenum from .fcBin import fcBin +from .cpdAssignUStarTh import * from os.path import dirname, basename, isfile, join diff --git a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py new file mode 100644 index 00000000..f2a07ab1 --- /dev/null +++ b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py @@ -0,0 +1,17 @@ +import numpy as np + +def identify_outliers(x_norm_x, threshold): + """ + Identifies outliers based on standardized scores. + + Parameters: + x_norm_x (numpy.ndarray): Array of normalized values. + threshold (float): Threshold value for outlier detection. + + Returns: + tuple: A boolean array (f_out) indicating outliers and an array (i_out) of outlier indices. + """ + f_out = x_norm_x > threshold # Boolean mask of outliers + i_out = np.where(f_out)[0] # Indices of outliers + + return f_out, i_out \ No newline at end of file diff --git a/oneflux_steps/ustar_cp_python/test.py b/oneflux_steps/ustar_cp_python/test.py new file mode 100644 index 00000000..17ad1d00 --- /dev/null +++ b/oneflux_steps/ustar_cp_python/test.py @@ -0,0 +1,46 @@ +import numpy as np +from scipy.stats import f +from scipy.interpolate import PchipInterpolator + + +Fmax = 6.89397364185411 +n= 53 + + +""" The critical F-max values as a 2D NumPy array. """ +FmaxTable : np.ndarray = np.array([ + [3.9293, 6.2992, 9.1471, 18.2659], + [3.7734, 5.6988, 7.8770, 13.8100], + [3.7516, 5.5172, 7.4426, 12.6481], + [3.7538, 5.3224, 7.0306, 11.4461], + [3.7941, 5.3030, 6.8758, 10.6635], + [3.8548, 5.3480, 6.8883, 10.5026], + [3.9798, 5.4465, 6.9184, 10.4527], + [4.0732, 5.5235, 6.9811, 10.3859], + [4.1467, 5.6136, 7.0624, 10.5596], + [4.2770, 5.7391, 7.2005, 10.6871], + [4.4169, 5.8733, 7.3421, 10.6751], + [4.5556, 6.0591, 7.5627, 11.0072], + [4.7356, 6.2738, 7.7834, 11.2319] +]) + +""" A 1D NumPy array containing sample sizes. """ +# tmp run to 200 1e5 reps +nTable : np.ndarray = np.array([10, 15, 20, 30, 50, 70, 100 + , 150, 200, 300, 500, 700, 1000]) + +""" A 1D NumPy array containing significance levels. """ +pTable : np.ndarray = np.array([0.80, 0.90, 0.95, 0.99]) + +pTableSize = len(pTable) +FmaxCritical = np.zeros(pTableSize) +for ip in range(pTableSize): + interpolator = PchipInterpolator(nTable, FmaxTable[:, ip]) + FmaxCritical[ip] = interpolator(n) +try: + interpolator = PchipInterpolator(FmaxCritical, 1 - pTable, extrapolate=True) + y = interpolator(Fmax) +except ValueError: + y = np.nan + +print(y) \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index 83ab7440..89527b3c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -569,6 +569,8 @@ def to_matlab_type(data: Any) -> Any: return [to_matlab_type(elem) for elem in data] elif isinstance(data, (int, float)): return matlab.double([data]) # Convert single numbers + elif isinstance(data, bool): + return matlab.logical(data) else: return data # If the data type is already MATLAB-compatible diff --git a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py index e113d15d..8bbcb5f0 100644 --- a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py +++ b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py @@ -90,4 +90,23 @@ def set_attr(struct_array, key, val): # Assertions for edge cases assert len(results_all_sig[0]) > 0, "Should produce results for all significant change points" - assert len(results_no_sig[5]) > 0, "Should produce a failure message for no significant change points" \ No newline at end of file + assert len(results_no_sig[5]) > 0, "Should produce a failure message for no significant change points" + +@pytest.mark.parametrize( + "x_norm_x, threshold, expected_f_out, expected_i_out", + [ + ([0.5, 1.2, 3.5, 0.1, 2.8], 2.0, [[False, False, True, False, True]], [[3.0, 5.0]]), # Basic test + ([0.1, 0.2, 0.3], 1.0, [[False, False, False]], [[]]), # No outliers + ([3.1, 2.9, 3.5], 2.0, [[True, True, True]], [[1, 2, 3]]), # All outliers + ([], 2.0, np.bool([]), []), # Empty input case + ([-3, -2, -1, 0, 1, 2, 3], -1.0, [[False, False, False, True, True, True, True]], [[4.0, 5.0, 6.0, 7.0]]), # Negative threshold + ] +) +def test_identify_outliers(test_engine, x_norm_x, threshold, expected_f_out, expected_i_out): + """Test MATLAB's identifyOutliers function from Python using MATLAB Engine.""" + + + f_out, i_out = test_engine.identifyOutliers(test_engine.convert(x_norm_x), test_engine.convert(threshold), nargout=2) + + assert np.array_equal(f_out, test_engine.convert(expected_f_out)), "Boolean outlier array does not match expected" + assert i_out == test_engine.convert(expected_i_out), "Index output does not match expected" \ No newline at end of file From fa3562cdde79c8d938622c4ed592e83873374361 Mon Sep 17 00:00:00 2001 From: James Emberton Date: Mon, 3 Feb 2025 09:23:50 +0000 Subject: [PATCH 08/27] experiments --- oneflux_steps/ustar_cp_python/__init__.py | 2 +- oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py | 8 ++++---- tests/conftest.py | 9 ++++++++- .../test_ustar_cp/test_cpdAssignUStarTh20100901.py | 2 +- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/oneflux_steps/ustar_cp_python/__init__.py b/oneflux_steps/ustar_cp_python/__init__.py index 230eb046..b0e0fe88 100644 --- a/oneflux_steps/ustar_cp_python/__init__.py +++ b/oneflux_steps/ustar_cp_python/__init__.py @@ -11,7 +11,7 @@ from .cpdFmax2pCore import interpolate_FmaxCritical, calculate_p_low, calculate_p_interpolate from .fcDatenum import datenum from .fcBin import fcBin -from .cpdAssignUStarTh import * +from .cpdAssignUStarTh import identifyOutliers from os.path import dirname, basename, isfile, join import glob diff --git a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py index f2a07ab1..d47a3d08 100644 --- a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py +++ b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py @@ -1,6 +1,5 @@ import numpy as np - -def identify_outliers(x_norm_x, threshold): +def identifyOutliers(x_norm_x, threshold): """ Identifies outliers based on standardized scores. @@ -11,7 +10,8 @@ def identify_outliers(x_norm_x, threshold): Returns: tuple: A boolean array (f_out) indicating outliers and an array (i_out) of outlier indices. """ - f_out = x_norm_x > threshold # Boolean mask of outliers + f_out = (x_norm_x > threshold) # Ensures f_out is a boolean array i_out = np.where(f_out)[0] # Indices of outliers + - return f_out, i_out \ No newline at end of file + return f_out, i_out # Ensure f_out remains a boolean array \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index bbfe4283..fc6013a3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -95,6 +95,7 @@ def mf_factory(cls, *args, **kwargs): from oneflux_steps.ustar_cp_python.cpd_evaluate_functions import * from oneflux_steps.ustar_cp_python.cpdFindChangePoint_functions import * from oneflux_steps.ustar_cp_python.cpdBootstrap import * +from oneflux_steps.ustar_cp_python.cpdAssignUStarTh import * def pytest_addoption(parser): parser.addoption("--language", action="store", default="matlab") @@ -141,7 +142,12 @@ def convert(self, x, index=False, fromFile=False): x = x-1 elif isinstance(x, list): x = np.asarray(x)-1 - if isinstance(x, list): + # Explicitly check for NumPy boolean arrays + if isinstance(x, np.ndarray) and x.dtype is bool: + print("NumPy bool array detected") + return x # Keep as boolean + elif isinstance(x, list): + print("list") # Transpose to capture MATLAB data layout # when the data has been serialised from MATLAB # to a file @@ -153,6 +159,7 @@ def convert(self, x, index=False, fromFile=False): elif isinstance(x, tuple): return tuple([self.convert(xi) for xi in x]) else: + print("hmm") return x def unconvert(self, x): diff --git a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py index 8bbcb5f0..e768482a 100644 --- a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py +++ b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py @@ -109,4 +109,4 @@ def test_identify_outliers(test_engine, x_norm_x, threshold, expected_f_out, exp f_out, i_out = test_engine.identifyOutliers(test_engine.convert(x_norm_x), test_engine.convert(threshold), nargout=2) assert np.array_equal(f_out, test_engine.convert(expected_f_out)), "Boolean outlier array does not match expected" - assert i_out == test_engine.convert(expected_i_out), "Index output does not match expected" \ No newline at end of file + assert i_out == expected_i_out, "Index output does not match expected" \ No newline at end of file From f45209715b30eed313dc3357ccd57014a5b76879 Mon Sep 17 00:00:00 2001 From: James Emberton Date: Mon, 3 Feb 2025 09:25:25 +0000 Subject: [PATCH 09/27] stuff --- tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py index e768482a..8bbcb5f0 100644 --- a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py +++ b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py @@ -109,4 +109,4 @@ def test_identify_outliers(test_engine, x_norm_x, threshold, expected_f_out, exp f_out, i_out = test_engine.identifyOutliers(test_engine.convert(x_norm_x), test_engine.convert(threshold), nargout=2) assert np.array_equal(f_out, test_engine.convert(expected_f_out)), "Boolean outlier array does not match expected" - assert i_out == expected_i_out, "Index output does not match expected" \ No newline at end of file + assert i_out == test_engine.convert(expected_i_out), "Index output does not match expected" \ No newline at end of file From c1ea99ce13c528ba7a94edc091611b36792c6954 Mon Sep 17 00:00:00 2001 From: James Emberton Date: Sat, 8 Feb 2025 14:10:31 +0000 Subject: [PATCH 10/27] added bool list check --- tests/conftest.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index fc6013a3..94f15755 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -153,6 +153,8 @@ def convert(self, x, index=False, fromFile=False): # to a file if fromFile: return transpose(np.array(x).astype(np.float64)) + elif all(isinstance(i, bool) for i in x): + return x else: return np.array(x).astype(np.float64) From 9d545aba91743ba125f246f5f155ca56d8a39b60 Mon Sep 17 00:00:00 2001 From: James Emberton Date: Tue, 11 Feb 2025 13:54:55 +0000 Subject: [PATCH 11/27] wip --- .../ustar_cp_python/cpdAssignUStarTh.py | 78 ++++++++++++++++++- .../cpdAssignUStarTh20100901.m | 8 +- .../aggregateSeasonalAndAnnualValues.m | 2 +- .../cpdAssign_functions/fcEqnAnnualSine.m | 6 ++ .../cpdAssign_functions/fitAnnualSineCurve.m | 2 +- tests/conftest.py | 20 +++-- .../test_aggregateSeasonalMeans.py | 75 ++++++++++++++++++ .../test_computeStandardisedScores.py | 18 +++++ .../test_cpdAssignUStarTh20100901.py | 58 +++++++++++++- .../test_ustar_cp/test_fitAnnualSine.py | 44 ++++++++--- .../test_ustar_cp/test_fitAnnualSineCurve.py | 53 +++++++++++++ .../test_ustar_cp/test_identifyOutliers.py | 13 ++++ .../test_updateSelectedIndices.py | 21 +++++ 13 files changed, 367 insertions(+), 31 deletions(-) create mode 100644 oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/fcEqnAnnualSine.m create mode 100644 tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py create mode 100644 tests/unit_tests/test_ustar_cp/test_computeStandardisedScores.py create mode 100644 tests/unit_tests/test_ustar_cp/test_fitAnnualSineCurve.py create mode 100644 tests/unit_tests/test_ustar_cp/test_identifyOutliers.py create mode 100644 tests/unit_tests/test_ustar_cp/test_updateSelectedIndices.py diff --git a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py index d47a3d08..5fb8380f 100644 --- a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py +++ b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py @@ -1,4 +1,12 @@ import numpy as np +from scipy.optimize import curve_fit +from oneflux_steps.ustar_cp_python.fcNaniqr import fcNaniqr +from oneflux_steps.ustar_cp_python.fcEqnAnnualSine import fcEqnAnnualSine +from oneflux_steps.ustar_cp_python.fcr2Calc import fcr2Calc + + + + def identifyOutliers(x_norm_x, threshold): """ Identifies outliers based on standardized scores. @@ -10,8 +18,72 @@ def identifyOutliers(x_norm_x, threshold): Returns: tuple: A boolean array (f_out) indicating outliers and an array (i_out) of outlier indices. """ - f_out = (x_norm_x > threshold) # Ensures f_out is a boolean array + f_out = (x_norm_x > threshold) i_out = np.where(f_out)[0] # Indices of outliers - - return f_out, i_out # Ensure f_out remains a boolean array \ No newline at end of file + return f_out, i_out + +def computeStandardizedScores(x): + """Standardizes the matrix x by its median and interquartile range.""" + mx = np.nanmedian(x, axis=0) # Compute median ignoring NaNs + iqr = fcNaniqr(x) + x_norm = (x - mx) / iqr # Standardize + + return np.nanmax(np.abs(x_norm), axis=1, keepdims=True) # Max absolute standardized score per row + + + +def fitAnnualSineCurve(mt, Cp, iSelect): + """ + Fits an annual sine curve to the selected data points and returns + the sine coefficients and R-squared value. + + Parameters + ---------- + mt : array-like + The time data (e.g., day of year). + Cp : array-like + The corresponding values to be fitted. + iSelect : array-like of bool + A boolean mask or indices that indicate which points to use + for the fitting. + + Returns + ------- + sSine : np.ndarray + A 1D array of length 4: + [offset, amplitude, phase, rSquared] + """ + # Prepare the data + xdata = np.array(mt)[iSelect] + ydata = np.array(Cp)[iSelect] + + # Define a local function for curve_fit: curve_fit expects + # a callable f(t, b0, b1, b2) with first arg = x, subsequent = params + def _annual_sine_for_curve_fit(t, b0, b1, b2): + return fcEqnAnnualSine(np.asarray([b0, b1, b2]), t) + + # Initial guess for [offset, amplitude, phase] + initial_guess = [1.0, 1.0, 1.0] + + # Perform the fit via non-linear regression + popt, _ = curve_fit(_annual_sine_for_curve_fit, xdata, ydata, p0=initial_guess) + + print(popt) + + # Compute predicted values for the fitted parameters + predictedCp = fcEqnAnnualSine(np.asarray(popt), xdata) + + print(predictedCp) + + # Compute R-squared + r2 = fcr2Calc(ydata, predictedCp) + + # Adjust phase to be in the range [0, 365.25) + popt[2] = popt[2] % 365.25 + + # Return fitted coefficients plus the R-squared value + # List containing ndarray to make output format match matlab for comparative testing + sSine = np.array([popt[0], popt[1], popt[2], r2], dtype=float) + print(sSine[1]) + return sSine \ No newline at end of file diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m index dacde7d9..5559b163 100644 --- a/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m +++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m @@ -20,7 +20,7 @@ end % Determine Window Sizes -numDimensions = ndims(Stats); +numDimensions = ndims(Stats); if numDimensions == 2 [numWindows, numBootstraps] = size(Stats); numTemperatureStrata = 1; temperatureStrataFactor = 0.5; @@ -28,7 +28,7 @@ [numWindows, numTemperatureStrata, numBootstraps] = size(Stats); temperatureStrataFactor = 1; else - failureMessage = 'Stats must be 2D or 3D.'; + failureMessage = 'Stats must be 2D or 3D.'; return; end @@ -115,7 +115,7 @@ % Abort if Too Few Selections if fractionSelected < 0.10 - failureMessage = 'Less than 10% successful detections.'; + failureMessage = 'Less than 10% successful detections.'; return; end @@ -127,7 +127,7 @@ end % Exclude Outliers -standardizedScores = computeStandardizedScores(regressionMatrix); +standardizedScores = computeStandardizedScores(regressionMatrix); [outlierFlag, outlierIndices] = identifyOutliers(standardizedScores, 5); [selectedIndices, numSelected, selectedPointsFlag] = ... diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/aggregateSeasonalAndAnnualValues.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/aggregateSeasonalAndAnnualValues.m index 8f03854b..362361eb 100644 --- a/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/aggregateSeasonalAndAnnualValues.m +++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/aggregateSeasonalAndAnnualValues.m @@ -11,7 +11,7 @@ % Aggregate Values Based on Dimensions if nDim == 2 - CpA = nanmean(xCpGF); + CpA = nanmean(xCpGF); nA = sum(~isnan(xCpSelect)); elseif nDim == 3 CpA = nanmean(reshape(xCpGF, nWindows * nStrata, nBoot)); diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/fcEqnAnnualSine.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/fcEqnAnnualSine.m new file mode 100644 index 00000000..5bb0f337 --- /dev/null +++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/fcEqnAnnualSine.m @@ -0,0 +1,6 @@ + function y=fcEqnAnnualSine(b,t); + + nDaysPerYr=datenum(2000-1,12,31)/2000; + Omega=2*pi/nDaysPerYr; + y=b(1)+b(2)*sin(Omega*(t-b(3))); + diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/fitAnnualSineCurve.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/fitAnnualSineCurve.m index fb7b58b3..0b6b6bab 100644 --- a/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/fitAnnualSineCurve.m +++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/fitAnnualSineCurve.m @@ -14,7 +14,7 @@ r2 = fcr2Calc(Cp(iSelect), predictedCp); % Adjust Phase and Wrap to One Year - bSine(3) = mod(bSine(3), 365.25); + bSine(3) = mod(bSine(3), 365.25); % Return Fitted Sine Coefficients and R-Squared sSine = [bSine, r2]; diff --git a/tests/conftest.py b/tests/conftest.py index 94f15755..30ba456d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -142,26 +142,23 @@ def convert(self, x, index=False, fromFile=False): x = x-1 elif isinstance(x, list): x = np.asarray(x)-1 - # Explicitly check for NumPy boolean arrays - if isinstance(x, np.ndarray) and x.dtype is bool: - print("NumPy bool array detected") - return x # Keep as boolean elif isinstance(x, list): - print("list") # Transpose to capture MATLAB data layout # when the data has been serialised from MATLAB # to a file if fromFile: return transpose(np.array(x).astype(np.float64)) - elif all(isinstance(i, bool) for i in x): - return x + elif len(x) == 1: + if all(isinstance(i, bool) for i in x[0]): + return np.array(x).astype(np.bool) + elif isinstance(x[0], list): + return np.array(x[0]) else: return np.array(x).astype(np.float64) elif isinstance(x, tuple): return tuple([self.convert(xi) for xi in x]) else: - print("hmm") return x def unconvert(self, x): @@ -170,6 +167,9 @@ def unconvert(self, x): def equal(self, x, y) -> bool: """Enhanced equality check for MATLAB arrays.""" + + #print(x) + #print(y) if x is None or y is None: raise ValueError("Comparison values cannot be None") if isinstance(x, float) or isinstance(y, float): @@ -263,6 +263,8 @@ def _unconvert(x): return x def _equal(x, y): + print(x) + print(y) return compare_matlab_arrays(x, y) # Choose which function to call @@ -583,6 +585,8 @@ def to_matlab_type(data: Any) -> Any: # Convert Python list to MATLAB double array if all elements are numbers if all(isinstance(elem, (int, float)) for elem in flatten(data)): return matlab.double(data) + elif all(isinstance(elem, (bool)) for elem in flatten(data)): + return matlab.logical(data) else: # Create a cell array for lists containing non-numeric data return [to_matlab_type(elem) for elem in data] diff --git a/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py b/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py new file mode 100644 index 00000000..e7263e07 --- /dev/null +++ b/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py @@ -0,0 +1,75 @@ +# test_aggregateSeasonalMeans.py +import pytest +import numpy as np + +@pytest.mark.parametrize( + "mt, Cp, xmt, iSelect, nWindows, nStrata, nBoot, expected_tW, expected_CpW", + [ + # -- Test Case 1: Simple data, single "window" scenario -- + ( + [10, 20, 30], # mt + [1.0, 2.0, 3.0], # Cp + [10, 20, 30], # xmt + [True, True, True], # iSelect (bool mask) + 1, # nWindows + 1, # nStrata + 1, # nBoot + [20.0], # expected_tW (mock result; you must refine) + [2.0], # expected_CpW (mock result) + ), + + # -- Test Case 2: Multiple windows, partial selection -- + ( + [10, 20, 30, 40, 50], + [1.0, 2.0, 4.0, 8.0, 16.0], + [10, 20, 30, 40, 50], + [False, True, True, True, False], # iSelect + 2, + 1, + 1, + [25.0, 40.0], # expected_tW (mock result; you must refine) + [3.0, 8.0], # expected_CpW (mock result) + ), + ] +) +def test_aggregateSeasonalMeans(test_engine, mt, Cp, xmt, iSelect, + nWindows, nStrata, nBoot, + expected_tW, expected_CpW): + """ + Test the MATLAB function aggregateSeasonalMeans by calling it via + the MATLAB Engine API for Python. We pass arrays from Python + to MATLAB, execute the function, and verify the outputs. + """ + + # Convert Python lists/arrays to MATLAB data types + mt_matlab = test_engine.double(mt) # 1D array of floats + Cp_matlab = test_engine.double(Cp) + xmt_matlab = test_engine.double(xmt) + + # For a boolean mask (iSelect), convert True/False to 1/0 (logical) + # This creates a MATLAB logical array of the same length + iSelect_matlab = test_engine.logical([int(x) for x in iSelect]) + + # Call the MATLAB function. Note nargout=2 to receive two outputs (tW, CpW) + tW_mat, CpW_mat = test_engine.aggregateSeasonalMeans( + mt_matlab, # mt + Cp_matlab, # Cp + xmt_matlab, # xmt + iSelect_matlab, + float(nWindows), + float(nStrata), + float(nBoot), + nargout=2 + ) + + # Convert outputs from MATLAB arrays back to Python floats/arrays + # MATLAB returns "double" arrays. We turn them into NumPy arrays. + tW = np.array(tW_mat).flatten() + CpW = np.array(CpW_mat).flatten() + + # Compare the results with the expected values (within a tolerance). + # Adjust `rtol`, `atol` as needed, or do exact match if you expect integer results. + assert np.allclose(tW, expected_tW, rtol=1e-5, atol=1e-8), \ + f"tW mismatch: got {tW}, expected {expected_tW}" + assert np.allclose(CpW, expected_CpW, rtol=1e-5, atol=1e-8), \ + f"CpW mismatch: got {CpW}, expected {expected_CpW}" \ No newline at end of file diff --git a/tests/unit_tests/test_ustar_cp/test_computeStandardisedScores.py b/tests/unit_tests/test_ustar_cp/test_computeStandardisedScores.py new file mode 100644 index 00000000..f709236c --- /dev/null +++ b/tests/unit_tests/test_ustar_cp/test_computeStandardisedScores.py @@ -0,0 +1,18 @@ +import pytest +import numpy as np + +@pytest.mark.parametrize("input_data, expected_rows, expected_columns", [ + (np.random.randn(100, 5), 100, 1), + (100*np.ones((100, 5)), 100, 1), +]) + +def test_compute_standardized_scores(test_engine, input_data, expected_rows, expected_columns): + """Test that computeStandardizedScores executes correctly and returns expected values.""" + + # Call MATLAB function + result = test_engine.computeStandardizedScores(input_data, nargout=1) + + len_result = len(test_engine.convert(result)) + + assert test_engine.equal(len_result, test_engine.convert(expected_rows)) + \ No newline at end of file diff --git a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py index 8bbcb5f0..4fa69872 100644 --- a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py +++ b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py @@ -108,5 +108,59 @@ def test_identify_outliers(test_engine, x_norm_x, threshold, expected_f_out, exp f_out, i_out = test_engine.identifyOutliers(test_engine.convert(x_norm_x), test_engine.convert(threshold), nargout=2) - assert np.array_equal(f_out, test_engine.convert(expected_f_out)), "Boolean outlier array does not match expected" - assert i_out == test_engine.convert(expected_i_out), "Index output does not match expected" \ No newline at end of file + assert test_engine.equal(f_out, test_engine.convert(expected_f_out)), "Boolean outlier array does not match expected" + assert test_engine.equal(i_out, test_engine.convert(expected_i_out)), "Index output does not match expected" + + +def test_aggregate_2d_case(matlab_engine): + """Test function for 2D case (nDim=2).""" + eng = matlab_engine + + # Define test inputs + xCp = matlab.double([1.0, 2.0, np.nan, 3.0, np.nan, 4.0]) # MATLAB double array + iSelect = matlab.logical([1, 1, 0, 1, 0, 1]) # Boolean mask in MATLAB logical array + nDim = 2 + nWindows = 0 # Unused for 2D case + nStrata = 0 # Unused for 2D case + nBoot = 0 # Unused for 2D case + + # Run MATLAB function + CpA, nA, xCpSelect = eng.aggregateSeasonalAndAnnualValues( + xCp, iSelect, nDim, nWindows, nStrata, nBoot, nargout=3 + ) + + # Convert MATLAB output to NumPy for assertions + CpA = np.array(CpA) + nA = int(nA) + xCpSelect = np.array(xCpSelect) + + # Expected values + expected_CpA = np.nanmean([1.0, 2.0, 3.0, 4.0]) # Mean of selected change points + expected_nA = 4 # Count of non-NaN values in the selected array + + # Assertions + np.testing.assert_almost_equal(CpA, expected_CpA, decimal=5) + assert nA == expected_nA + assert np.isnan(xCpSelect[2]) # Check if unselected indices remain NaN + +@pytest.mark.parametrize( + "xCp, iSelect, nDim, nWindows, nStrata, nBoot, expected_CpA, expected_nA, expected_xCpSelect", + [ + ([1.0, 2.0, np.nan, 3.0, np.nan, 4.0], matlab.logical([1, 1, 0, 1, 0, 1]), 2, 0, 0, 0, [2, np.nan, 4], [2, 0, 1], [[1, np.nan, np.nan], [3, np.nan, 4]]), + ([[1.0, np.nan, 3.0],[4.0, 5.0, np.nan]], matlab.logical([[1, 0, 1],[0, 1, 0]]), 3, 2, 1, 3, [1.0, 5.0, 3.0], [1.0, 1.0, 1.0], [[1, np.nan, 3.0], [np.nan, 5.0, np.nan]]) + ] +) + + +def test_aggregate_3d_case(test_engine, xCp, iSelect, nDim, nWindows, nStrata, nBoot, expected_CpA, expected_nA, expected_xCpSelect): + """Test function for aggregateSeasonalAndAnnualValues 2D and 3D cases.""" + + + # Run MATLAB function + CpA, nA, xCpSelect = test_engine.aggregateSeasonalAndAnnualValues( + test_engine.convert(xCp), iSelect, nDim, nWindows, nStrata, nBoot, nargout=3 + ) + + assert test_engine.equal(CpA, expected_CpA) + assert test_engine.equal(nA, expected_nA) + assert test_engine.equal(xCpSelect, expected_xCpSelect) diff --git a/tests/unit_tests/test_ustar_cp/test_fitAnnualSine.py b/tests/unit_tests/test_ustar_cp/test_fitAnnualSine.py index e7609c18..5987a911 100644 --- a/tests/unit_tests/test_ustar_cp/test_fitAnnualSine.py +++ b/tests/unit_tests/test_ustar_cp/test_fitAnnualSine.py @@ -29,12 +29,18 @@ def test_fit_output_shape_and_type(test_engine, synthetic_data): result = test_engine.fitAnnualSineCurve(days, Cp_noisy, iSelect) # Expecting [amplitude, offset, phase, r2] - assert test_engine.equal(len(test_engine.convert(result[0])), 4) - for val in result[0]: + + result = test_engine.convert(result) + + if len(result) == 1: + result = result[0] + + assert test_engine.equal(len(result), 4) + for val in result: assert isinstance(val, float) - assert np.allclose(result[0][0], true_sine[0], rtol = 0.1) # test offset accuracy - assert np.allclose(result[0][1], true_sine[1], rtol = 0.1) # test amplitude accuracy - assert np.allclose(result[0][2], true_sine[2], rtol = 0.1) # test phase accuracy + assert np.allclose(result[0], true_sine[0], rtol = 0.1) # test offset accuracy + assert np.allclose(result[1], true_sine[1], rtol = 0.1) # test amplitude accuracy + assert np.allclose(result[2], true_sine[2], rtol = 0.1) # test phase accuracy def test_fit_on_synthetic_data(test_engine, synthetic_data): days, Cp_noisy, iSelect, (true_off, true_amp, true_ph) = synthetic_data @@ -44,10 +50,18 @@ def test_fit_on_synthetic_data(test_engine, synthetic_data): test_engine.convert(iSelect)) result = test_engine.convert(result) - fitted_amp = result[0][1] - fitted_off = result[0][0] - fitted_ph = result[0][2] - fitted_r2 = result[0][3] + + if len(result) == 1: # Matlab case + fitted_amp = result[0][1] + fitted_off = result[0][0] + fitted_ph = result[0][2] + fitted_r2 = result[0][3] + else: #Python case + fitted_amp = result[1] + fitted_off = result[0] + fitted_ph = result[2] + fitted_r2 = result[3] + # Check that fitted parameters are close to the true parameters. # Allow some tolerance due to noise. assert np.isclose(fitted_amp, true_amp, rtol=0.2) @@ -73,9 +87,15 @@ def test_constant_data(test_engine): result = test_engine.fitAnnualSineCurve(days, Cp, iSelect) result = test_engine.convert(result) - fitted_amp = result[0][1] - fitted_off = result[0][0] - fitted_r2 = result[0][3] + + if len(result) == 1: + fitted_amp = result[0][1] + fitted_off = result[0][0] + fitted_r2 = result[0][3] + else: + fitted_amp = result[1] + fitted_off = result[0] + fitted_r2 = result[3] assert abs(fitted_amp) < 0.5 assert abs(fitted_off - 5.0) < 0.5 diff --git a/tests/unit_tests/test_ustar_cp/test_fitAnnualSineCurve.py b/tests/unit_tests/test_ustar_cp/test_fitAnnualSineCurve.py new file mode 100644 index 00000000..4e961776 --- /dev/null +++ b/tests/unit_tests/test_ustar_cp/test_fitAnnualSineCurve.py @@ -0,0 +1,53 @@ +import numpy as np +import pytest +import matlab + +@pytest.fixture +def sample_data(): + """Provides sample input data for testing the MATLAB function.""" + mt = np.linspace(0, 365, 100) # Simulated time values in days + Cp = 10 * np.sin(2 * np.pi * mt / 365) + np.random.normal(0, 0.5, 100) # Noisy sine wave + iSelect = np.ones_like(mt, dtype=bool) # Select all points + return mt, Cp, iSelect + + +def test_fit_annual_sine_curve(test_engine, sample_data): + """Test that fitAnnualSineCurve executes correctly and returns expected values.""" + mt, Cp, iSelect = sample_data + + # Call MATLAB function + result = test_engine.fitAnnualSineCurve(mt, Cp, iSelect, nargout=1) + + # Sine wave properties + sine_offset = result[0][0] + sine_amp = result[0][1] + sine_phase = result[0][2] + + # Validate the R-squared value + r2_value = result[0][3] + + assert -0.3 < sine_offset < 0.3 + assert 9.5 < sine_amp < 10.5 + assert 363 < sine_phase or sine_phase < 1 + assert r2_value > 0.99 + +def test_fit_annual_sine_curve_partial_selection(test_engine, sample_data): + """Test the function with only part of the dataset selected.""" + mt, Cp, iSelect = sample_data + iSelect[:50] = False # Only use the last 50 data points + + + result = test_engine.fitAnnualSineCurve(mt, Cp, iSelect, nargout=1) + + # Sine wave properties + sine_offset = result[0][0] + sine_amp = result[0][1] + sine_phase = result[0][2] + + # Validate the R-squared value + r2_value = result[0][3] + + assert -0.3 < sine_offset < 0.3 + assert 9.5 < sine_amp < 10.5 + assert sine_phase < 1 or sine_phase > 363 + assert r2_value > 0.96 \ No newline at end of file diff --git a/tests/unit_tests/test_ustar_cp/test_identifyOutliers.py b/tests/unit_tests/test_ustar_cp/test_identifyOutliers.py new file mode 100644 index 00000000..c00657ad --- /dev/null +++ b/tests/unit_tests/test_ustar_cp/test_identifyOutliers.py @@ -0,0 +1,13 @@ +import numpy as np +import pytest +from tests.conftest import flatten + +@pytest.mark.parametrize("input_data, threshold, expected_outlierFlag, expected_outlierIndices", [ + (np.linspace(0, 10, 11), 5, [False,False,False,False,False,False,True,True,True,True,True], np.array([5,6,7,8,9])), +]) + +def test_identify_outliers(test_engine, input_data, threshold, expected_outlierFlag, expected_outlierIndices): + + result = test_engine.identifyOutliers(input_data, threshold) + + assert result[0] == test_engine.convert(expected_outlierFlag) \ No newline at end of file diff --git a/tests/unit_tests/test_ustar_cp/test_updateSelectedIndices.py b/tests/unit_tests/test_ustar_cp/test_updateSelectedIndices.py new file mode 100644 index 00000000..f994c6f3 --- /dev/null +++ b/tests/unit_tests/test_ustar_cp/test_updateSelectedIndices.py @@ -0,0 +1,21 @@ +import numpy as np +import pytest + + +@pytest.mark.parametrize("iSelect, iOut, fSelect, fOut, expected_nSelect", [ + ([1, 2, 3, 4, 5], [2, 4], [True, True, False, True, False], [False, False, False, False, False], 3), + ([10, 20, 30, 40], [20], [True, False, True, True], [False, False, False, False], 3), + ([5, 10, 15, 20, 25], [], [True, True, True, True, True], [False, False, False, False, False], 5), + ([1, 3, 5, 7, 9], [1, 3, 9], [True, False, True, True, True], [False, True, False, False, False], 2), +]) +def test_update_selected_indices(test_engine, iSelect, iOut, fSelect, fOut, expected_nSelect): + """Test that updateSelectedIndices correctly removes outliers and updates selection.""" + + # Call MATLAB function + result = test_engine.updateSelectedIndices(iSelect, iOut, fSelect, fOut, nargout=3) + + # Extract outputs + updated_iSelect, updated_nSelect, updated_fSelect = result + + # Check expected number of selections + assert updated_nSelect == expected_nSelect, f"Expected {expected_nSelect}, got {updated_nSelect}" \ No newline at end of file From 996ea4d544c945d8eb8921bed43dfc01b7fd8161 Mon Sep 17 00:00:00 2001 From: James Emberton Date: Tue, 11 Feb 2025 13:55:38 +0000 Subject: [PATCH 12/27] wip --- oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py index 5fb8380f..0ecf5073 100644 --- a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py +++ b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py @@ -69,13 +69,9 @@ def _annual_sine_for_curve_fit(t, b0, b1, b2): # Perform the fit via non-linear regression popt, _ = curve_fit(_annual_sine_for_curve_fit, xdata, ydata, p0=initial_guess) - print(popt) - # Compute predicted values for the fitted parameters predictedCp = fcEqnAnnualSine(np.asarray(popt), xdata) - print(predictedCp) - # Compute R-squared r2 = fcr2Calc(ydata, predictedCp) @@ -85,5 +81,5 @@ def _annual_sine_for_curve_fit(t, b0, b1, b2): # Return fitted coefficients plus the R-squared value # List containing ndarray to make output format match matlab for comparative testing sSine = np.array([popt[0], popt[1], popt[2], r2], dtype=float) - print(sSine[1]) + return sSine \ No newline at end of file From 9215dcbc008eb6e77166c6880da2b3cd4169f832 Mon Sep 17 00:00:00 2001 From: James Emberton Date: Wed, 12 Feb 2025 11:54:32 +0000 Subject: [PATCH 13/27] wip --- .../test_aggregateSeasonalMeans.py | 23 ++++++------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py b/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py index e7263e07..c4ecb5f4 100644 --- a/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py +++ b/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py @@ -41,24 +41,15 @@ def test_aggregateSeasonalMeans(test_engine, mt, Cp, xmt, iSelect, to MATLAB, execute the function, and verify the outputs. """ - # Convert Python lists/arrays to MATLAB data types - mt_matlab = test_engine.double(mt) # 1D array of floats - Cp_matlab = test_engine.double(Cp) - xmt_matlab = test_engine.double(xmt) - - # For a boolean mask (iSelect), convert True/False to 1/0 (logical) - # This creates a MATLAB logical array of the same length - iSelect_matlab = test_engine.logical([int(x) for x in iSelect]) - # Call the MATLAB function. Note nargout=2 to receive two outputs (tW, CpW) tW_mat, CpW_mat = test_engine.aggregateSeasonalMeans( - mt_matlab, # mt - Cp_matlab, # Cp - xmt_matlab, # xmt - iSelect_matlab, - float(nWindows), - float(nStrata), - float(nBoot), + test_engine.convert(mt), # mt + test_engine.convert(Cp), # Cp + test_engine.convert(xmt), # xmt + test_engine.convert(iSelect), + test_engine.convert(nWindows), + test_engine.convert(nStrata), + test_engine.convert(nBoot), nargout=2 ) From e62aedb5ee1c804871b6b4db832df0c2f6d23c3e Mon Sep 17 00:00:00 2001 From: James Emberton Date: Thu, 13 Feb 2025 14:41:39 +0000 Subject: [PATCH 14/27] fixing test --- .../test_aggregateSeasonalMeans.py | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py b/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py index c4ecb5f4..6f29fd47 100644 --- a/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py +++ b/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py @@ -9,27 +9,29 @@ ( [10, 20, 30], # mt [1.0, 2.0, 3.0], # Cp - [10, 20, 30], # xmt + [20], # xmt [True, True, True], # iSelect (bool mask) 1, # nWindows 1, # nStrata 1, # nBoot - [20.0], # expected_tW (mock result; you must refine) - [2.0], # expected_CpW (mock result) + [10.0], # expected_tW (mock result; you must refine) + [1.0], # expected_CpW (mock result) ), # -- Test Case 2: Multiple windows, partial selection -- ( - [10, 20, 30, 40, 50], - [1.0, 2.0, 4.0, 8.0, 16.0], - [10, 20, 30, 40, 50], - [False, True, True, True, False], # iSelect - 2, - 1, - 1, - [25.0, 40.0], # expected_tW (mock result; you must refine) - [3.0, 8.0], # expected_CpW (mock result) - ), + [10, 20, 30, 40, 50], # mt + [1.0, 2.0, 4.0, 8.0, 16.0], # Cp + [10, 20, 30, 40, 50, 60, 70, 80, 90, + 100,110,120,130,140,150,160,170,180,190, + 200,210,220,230,240], # <-- 24 elements for xmt + [False, True, True, True, False], # iSelect + 4, # nWindows + 3, # nStrata + 2, # nBoot + [25.0, 40.0], # expected_tW + [3.0, 8.0], # expected_CpW +), ] ) def test_aggregateSeasonalMeans(test_engine, mt, Cp, xmt, iSelect, From 094ffbd7ea30cfa7c0cca601e814ac5c335301f2 Mon Sep 17 00:00:00 2001 From: James Emberton Date: Sun, 16 Feb 2025 20:21:59 +0000 Subject: [PATCH 15/27] messy commit with lots of stuff --- .../aggregateSeasonalAndAnnualValues.py | 72 ++++++++++ .../ustar_cp_python/aggregateSeasonalMeans.py | 109 +++++++++++++++ .../computeStandardizedScores.m | 15 +- tests/conftest.py | 7 +- .../test_aggregateAndSeasonalValues.py | 128 ++++++++++++++++++ .../test_aggregateSeasonalMeans.py | 94 ++++++++----- .../test_computeStandardisedScores.py | 116 ++++++++++++++-- 7 files changed, 493 insertions(+), 48 deletions(-) create mode 100644 oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py create mode 100644 oneflux_steps/ustar_cp_python/aggregateSeasonalMeans.py create mode 100644 tests/unit_tests/test_ustar_cp/test_aggregateAndSeasonalValues.py diff --git a/oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py b/oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py new file mode 100644 index 00000000..a53e323d --- /dev/null +++ b/oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py @@ -0,0 +1,72 @@ +import numpy as np + +def aggregateSeasonalAndAnnualValues( + xCp, + iSelect, + nDim, + nWindows, + nStrata, + nBoot +): + """ + Python equivalent of the MATLAB function aggregateSeasonalAndAnnualValues. + + Parameters + ---------- + xCp : array-like + In MATLAB, can be 2D [nWindows, nBoot] or 3D [nWindows, nStrata, nBoot]. + In Python, pass a nested list or NumPy array with the same shape. + iSelect : array-like of int + 1-based linear indices in MATLAB's column-major ordering that should be selected. + We subtract 1 and interpret them in the same column-major order in Python. + nDim : int + 2 or 3 (to indicate if xCp is 2D or 3D). + nWindows : int + nStrata : int + nBoot : int + + Returns + ------- + CpA : np.ndarray + Aggregated mean of selected change points along the appropriate dimension. + For nDim=2, shape is (nBoot,). For nDim=3, shape is (nBoot,). + nA : np.ndarray + Count of non-NaN selected points. Same shape as CpA. + xCpSelect : np.ndarray + Same shape as xCp, with NaN everywhere except the selected positions. + """ + # Convert inputs to float arrays + xCp = np.asarray(xCp, dtype=float) + + # Prepare an output array (same shape) filled with NaNs + xCpSelect = np.full_like(xCp, np.nan, dtype=float) + + # Flatten both arrays in column-major (Fortran) order to replicate MATLAB indexing + xCp_flat_F = xCp.flatten(order='F') + xCpSelect_flat_F = xCpSelect.flatten(order='F') + + iSelect_array = np.asarray(iSelect, dtype=int) + + # Assign the selected change points in the flattened array + xCpSelect_flat_F[iSelect_array] = xCp_flat_F[iSelect_array] + + # Reshape back to original shape (column-major) + xCpSelect = xCpSelect_flat_F.reshape(xCp.shape, order='F') + xCpGF = xCpSelect # Just like the MATLAB code + + # Aggregate values based on dimensions + if nDim == 2: + # xCp shape = [nWindows, nBoot] + # MATLAB default nanmean(xCpGF) => mean across axis=0 in Python + CpA = np.nanmean(xCpGF, axis=0) + nA = np.sum(~np.isnan(xCpSelect), axis=0) + elif nDim == 3: + # xCp shape = [nWindows, nStrata, nBoot] + # reshape => (nWindows*nStrata, nBoot) in column-major + xCpGF_reshaped = xCpGF.reshape(nWindows * nStrata, nBoot, order='F') + CpA = np.nanmean(xCpGF_reshaped, axis=0) + nA = np.sum(~np.isnan(xCpGF_reshaped), axis=0) + else: + raise ValueError("Invalid number of dimensions: Expected 2D or 3D Stats.") + + return CpA, nA, xCpSelect diff --git a/oneflux_steps/ustar_cp_python/aggregateSeasonalMeans.py b/oneflux_steps/ustar_cp_python/aggregateSeasonalMeans.py new file mode 100644 index 00000000..1e37d76c --- /dev/null +++ b/oneflux_steps/ustar_cp_python/aggregateSeasonalMeans.py @@ -0,0 +1,109 @@ +from typing import Tuple +import numpy as np +from numpy.typing import NDArray +from oneflux_steps.ustar_cp_python.fcBin import fcBin + +def aggregateSeasonalMeans(mt: NDArray, Cp: NDArray, xmt: NDArray, iSelect: NDArray, nWindows: int, nStrata: int, nBoot: int)-> Tuple[NDArray, NDArray]: + """ + Python equivalent of the MATLAB function aggregateSeasonalMeans. + Aggregates seasonal means for time and change points. + + Parameters + ---------- + mt : array-like + The time data. + Cp : array-like + The corresponding change point values. + xmt : array-like + A reference array that must reshape cleanly to (nWindows, nStrata * nBoot). + Used to compute the median number of windows (nW). + iSelect : array-like of int + Numeric indices (1-based in MATLAB) of selected measurements to use. + For Python (0-based), ensure you pass valid 0-based indices. + nWindows : int + nStrata : int + nBoot : int + + Returns + ------- + tW : np.ndarray + The seasonal mean times for each bin. + CpW : np.ndarray + The seasonal mean change points for each bin. + """ + + # ---------------------------------------------------------- + # 1) Calculate Median Number of Windows + # In MATLAB: + # reshape(xmt, nWindows, nStrata * nBoot) + # Here we reshape the 1D array xmt => shape (nWindows, nStrata*nBoot) + # ---------------------------------------------------------- + try: + xmt_reshaped = xmt.reshape(nWindows, nStrata * nBoot) + except ValueError: + raise ValueError( + f"Cannot reshape xmt of length {xmt.size} " + f"to shape ({nWindows}, {nStrata*nBoot})." + ) + + # sum(~isnan(...)) in MATLAB => np.sum(~np.isnan(...)) in NumPy + # nanmedian(...) => np.nanmedian(...) + # => sum over axis=1 if you want row sums (assuming typical usage) + # but in the MATLAB code it's sum( (nWindows, nStrata*nBoot) ) => axis=0 by default + nW_array = np.sum(~np.isnan(xmt_reshaped), axis=0) # shape: (nStrata*nBoot,) + nW = np.nanmedian(nW_array) + + # ---------------------------------------------------------- + # 2) Sort Selected Measurements + # MATLAB: + # [mtSelect, i] = sort(mt(iSelect)); + # CpSelect = Cp(iSelect(i)); + # ---------------------------------------------------------- + # *In Python*, iSelect is an array of *0-based* indices. + # We then do: + iSelect = iSelect.astype(int) + selected_mt = mt[iSelect] + # Sort them while keeping track of the sorted order + sort_order = np.argsort(selected_mt) # array of int + mtSelect = selected_mt[sort_order] + # Now reorder Cp similarly + selected_Cp = Cp[iSelect] + CpSelect = selected_Cp[sort_order] + + # ---------------------------------------------------------- + # 3) Define Bins Based on Percentiles + # xBins = prctile(mtSelect, 0:(100/nW):100); + # In Python, np.percentile(...) + # and we create an array of percentiles from 0 to 100 in steps of (100/nW) + # But nW might be float => we do something like + # np.linspace(0, 100, int(nW)+1) or something similar. However, + # to replicate MATLAB's 0:(100/nW):100 exactly, we must handle floats carefully. + # We'll do: + if nW < 1: + # Fallback: if nW < 1 for some reason, do 1 bin, i.e. [0, 100] + pct_array = [0, 100] + else: + step = 100.0 / nW # step size + # The number of steps is int(nW)+1 if nW is integer-like. + # However, if nW is float, MATLAB still enumerates 0, step, 2*step,..., up to 100. + # We'll replicate that logic using np.arange, then clip at <=100 + pct_array = [] + current = 0.0 + while current < 100.0 + 1e-9: + pct_array.append(current) + current += step + # In case floating arithmetic overshoots 100 a bit + pct_array[-1] = 100.0 + + # Now compute bin edges from these percentiles + xBins = np.percentile(mtSelect, pct_array) + + # ---------------------------------------------------------- + # 4) fcBin equivalent in Python + # [~, tW, CpW] = fcBin(mtSelect, CpSelect, xBins, 0); + + + # Run fcBin + nCount, tW, CpW = fcBin(mtSelect, CpSelect, xBins, 0) + + return tW, CpW diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/computeStandardizedScores.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/computeStandardizedScores.m index 8a9ce012..84fb6feb 100644 --- a/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/computeStandardizedScores.m +++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/computeStandardizedScores.m @@ -1,10 +1,19 @@ function xNormX = computeStandardizedScores(x) % computeStandardizedScores % Standardizes the matrix x by its median and interquartile range. - + % + % x : N x M (can contain NaNs) + % - mx = nanmedian(x) -> 1 x M median (column-wise) + % - sx = fcNaniqr(x) -> 1 x M IQR (column-wise) + % - xNorm = (x - mx) ./ sx + % - xNormX = max(abs(xNorm), [], 2) -> N x 1 + % + % By default, if a row has a NaN in one of its columns, that + % can lead to xNormX(row)=NaN, since max(abs(...)) sees NaN. + % + mx = nanmedian(x); sx = fcNaniqr(x); xNorm = (x - mx) ./ sx; xNormX = max(abs(xNorm), [], 2); - - end \ No newline at end of file +end \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index 30ba456d..b15fc3ce 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -96,6 +96,8 @@ def mf_factory(cls, *args, **kwargs): from oneflux_steps.ustar_cp_python.cpdFindChangePoint_functions import * from oneflux_steps.ustar_cp_python.cpdBootstrap import * from oneflux_steps.ustar_cp_python.cpdAssignUStarTh import * +from oneflux_steps.ustar_cp_python.aggregateSeasonalMeans import aggregateSeasonalMeans +from oneflux_steps.ustar_cp_python.aggregateSeasonalAndAnnualValues import aggregateSeasonalAndAnnualValues def pytest_addoption(parser): parser.addoption("--language", action="store", default="matlab") @@ -142,13 +144,16 @@ def convert(self, x, index=False, fromFile=False): x = x-1 elif isinstance(x, list): x = np.asarray(x)-1 - elif isinstance(x, list): + print(x) + if isinstance(x, list): # Transpose to capture MATLAB data layout # when the data has been serialised from MATLAB # to a file if fromFile: return transpose(np.array(x).astype(np.float64)) elif len(x) == 1: + if isinstance(x, list) and all(isinstance(item, (int, float)) for item in x): + return np.asarray(x) if all(isinstance(i, bool) for i in x[0]): return np.array(x).astype(np.bool) elif isinstance(x[0], list): diff --git a/tests/unit_tests/test_ustar_cp/test_aggregateAndSeasonalValues.py b/tests/unit_tests/test_ustar_cp/test_aggregateAndSeasonalValues.py new file mode 100644 index 00000000..0f1b1307 --- /dev/null +++ b/tests/unit_tests/test_ustar_cp/test_aggregateAndSeasonalValues.py @@ -0,0 +1,128 @@ +import pytest +import numpy as np + +@pytest.mark.parametrize( + ( + "xCp", # 2D or 3D matrix to pass into MATLAB + "iSelect", # numeric (1-based) indices into xCp + "nDim", + "nWindows", + "nStrata", + "nBoot", + "expected_CpA", + "expected_nA", + "expected_xCpSelect" + ), + [ + # + # Test Case 1: 2D, shape(2,3). + # In MATLAB, xCp(1,1)=1, xCp(2,1)=4, xCp(1,2)=2, xCp(2,2)=5, xCp(1,3)=3, xCp(2,3)=6 + # We select iSelect=[2,4,6], i.e. xCp(2)=4, xCp(4)=5, xCp(6)=6. + # Then: + # xCpSelect => [NaN,4, NaN,5, NaN,6] in linear indexing => shape(2,3): + # [[NaN, NaN, NaN], + # [ 4 , 5 , 6 ]] + # CpA => nanmean(xCpSelect) => row-wise mean => [4,5,6] (a 1x3 vector in MATLAB) + # nA => sum(~isnan(xCpSelect)) => [1,1,1] + # + # NOTE: We'll store "expected_xCpSelect" as a nested Python list to compare shape(2,3). + # + ( + # xCp as a nested list to form 2D [2,3] + [[1, 2, 3], + [4, 5, 6]], + + [2, 4, 6], # 1-based indices => picks xCp(2)=4, xCp(4)=5, xCp(6)=6 + 2, # nDim + 2, # nWindows + 1, # nStrata + 3, # nBoot + [4.0, 5.0, 6.0], # expected_CpA + [1, 1, 1], # expected_nA + [[np.nan, np.nan, np.nan], + [4.0, 5.0, 6.0]] + ), + + # + # Test Case 2: 3D, shape(2,2,2). + # In MATLAB (column-major), xCp(1,1,1)=1, xCp(2,1,1)=2, xCp(1,2,1)=3, xCp(2,2,1)=4, + # xCp(1,1,2)=5, xCp(2,1,2)=6, xCp(1,2,2)=7, xCp(2,2,2)=8 + # iSelect=[1,8] => picks xCp(1)=1, xCp(8)=8. + # => xCpSelect => [1, NaN, NaN, NaN, NaN, NaN, NaN, 8] => shape(2,2,2) => in "layer" form: + # + # layer 1 => [[1, NaN], + # [NaN, NaN]] + # layer 2 => [[NaN, NaN], + # [NaN, 8]] + # + # Then we do nDim=3 => reshape => (nWindows*nStrata, nBoot) => (2*2, 2) => (4,2). + # We get a 4x2 array => only [1,8] are non-NaN => so: + # CpA => [1, 8] (i.e. nanmean along dimension=1 => a row vector 1x2) + # nA => [1, 1] (count of non-NaN in each column) + ( + # xCp as nested lists for shape(2,2,2). The dimension order in Python is [row, col, "page"]. + # We'll pass it as we want MATLAB to interpret it with column-major flattening. + [ + [ [1, 5], [3, 7] ], # "row 1" + [ [2, 6], [4, 8] ] # "row 2" + ], + [1, 8], # picks xCp(1)=1, xCp(8)=8 in MATLAB + 3, # nDim + 2, # nWindows + 2, # nStrata + 2, # nBoot + [1.0, 8.0], # expected_CpA (1x2 row vector) + [1, 1], # expected_nA + [ + [[1.0, np.nan], + [np.nan, np.nan]], + [[np.nan, np.nan], + [np.nan, 8.0 ]] + ] + ), + ] +) +def test_aggregateSeasonalAndAnnualValues( + test_engine, + xCp, + iSelect, + nDim, + nWindows, + nStrata, + nBoot, + expected_CpA, + expected_nA, + expected_xCpSelect +): + """ + Test the MATLAB function aggregateSeasonalAndAnnualValues by calling it via + the MATLAB Engine API for Python. We pass arrays from Python to MATLAB, + execute the function, and verify the outputs. + + Explanation: + ------------ + - xCp is provided in a shape that (in MATLAB) should be [nWindows, nBoot] if nDim=2, + or [nWindows, nStrata, nBoot] if nDim=3. Because MATLAB uses column-major order, + the exact flattening can be tricky in Python. We keep the nested structure so + the final shape is correct in MATLAB once passed through the engine. + - iSelect is 1-based, matching MATLAB indexing. For instance, iSelect=[1,8] means + xCp(1) and xCp(8) are selected. + - The function returns three outputs: [CpA, nA, xCpSelect]. + We check each against the expected result (accounting for possible shape and + dimensional differences). + """ + + # Call the MATLAB function with nargout=3 + CpA_mat, nA_mat, xCpSelect_mat = test_engine.aggregateSeasonalAndAnnualValues( + test_engine.convert(xCp), + test_engine.convert(iSelect, index = "to_python"), + test_engine.convert(nDim), + test_engine.convert(nWindows), + test_engine.convert(nStrata), + test_engine.convert(nBoot), + nargout=3 + ) + + assert test_engine.equal(CpA_mat, test_engine.convert(expected_CpA)) + assert test_engine.equal(nA_mat, test_engine.convert(expected_nA)) + assert test_engine.equal(xCpSelect_mat, test_engine.convert(expected_xCpSelect)) diff --git a/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py b/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py index 6f29fd47..38e956ed 100644 --- a/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py +++ b/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py @@ -1,37 +1,44 @@ -# test_aggregateSeasonalMeans.py import pytest import numpy as np @pytest.mark.parametrize( "mt, Cp, xmt, iSelect, nWindows, nStrata, nBoot, expected_tW, expected_CpW", [ - # -- Test Case 1: Simple data, single "window" scenario -- + # + # Test Case 1: Single window, select all points with numeric indices + # ( [10, 20, 30], # mt [1.0, 2.0, 3.0], # Cp - [20], # xmt - [True, True, True], # iSelect (bool mask) + [20], # xmt => length=1 => reshape(1,1*1) + [1, 2, 3], # iSelect as numeric indices (MATLAB is 1-based) 1, # nWindows 1, # nStrata 1, # nBoot - [10.0], # expected_tW (mock result; you must refine) - [1.0], # expected_CpW (mock result) + [20.0], # expected_tW + [2.0], # expected_CpW ), - # -- Test Case 2: Multiple windows, partial selection -- + # + # Test Case 2: Multiple windows, partial selection + # Here iSelect = [2,3,4] means we are selecting the 2nd, 3rd, and 4th elements + # in the arrays mt/Cp. xmt must have 4*(3*2) = 24 elements for reshape(). + # ( - [10, 20, 30, 40, 50], # mt - [1.0, 2.0, 4.0, 8.0, 16.0], # Cp - [10, 20, 30, 40, 50, 60, 70, 80, 90, - 100,110,120,130,140,150,160,170,180,190, - 200,210,220,230,240], # <-- 24 elements for xmt - [False, True, True, True, False], # iSelect - 4, # nWindows - 3, # nStrata - 2, # nBoot - [25.0, 40.0], # expected_tW - [3.0, 8.0], # expected_CpW -), + [10, 20, 30, 40, 50], # mt + [1.0, 2.0, 4.0, 8.0, 16.0], # Cp + [ + 10, 20, 30, 40, 50, 60, 70, 80, 90, + 100,110,120,130,140,150,160,170,180,190, + 200,210,220,230,240 + ], # xmt => 24 elements + [2, 3, 4], # iSelect => partial selection + 4, # nWindows + 3, # nStrata + 2, # nBoot + [20.0, 30.0, 30.0, 40.0], # expected_tW + [2.0, 4.0, 4.0, 8.0], # expected_CpW (mock result) + ), ] ) def test_aggregateSeasonalMeans(test_engine, mt, Cp, xmt, iSelect, @@ -41,28 +48,51 @@ def test_aggregateSeasonalMeans(test_engine, mt, Cp, xmt, iSelect, Test the MATLAB function aggregateSeasonalMeans by calling it via the MATLAB Engine API for Python. We pass arrays from Python to MATLAB, execute the function, and verify the outputs. + + Key point: + ---------- + - In the original cpdAssignUStarTh20100901.m code, `iSelect` is a + numeric array of indices (found via `find(...)`) rather than a + logical mask. This avoids the mixing of sort(...) and logical + indexing that caused errors. + - We ensure length(xmt) == nWindows * nStrata * nBoot so reshape(xmt, ...) + is valid in MATLAB. + - The `expected_tW` and `expected_CpW` here are "mock" placeholders to + show the format of the test. Adjust them based on what you actually + expect from fcBin. """ - # Call the MATLAB function. Note nargout=2 to receive two outputs (tW, CpW) + # Convert Python lists/arrays to MATLAB data + mt_matlab = test_engine.convert(mt) + Cp_matlab = test_engine.convert(Cp) + xmt_matlab = test_engine.convert(xmt) + + # Important: iSelect is now numeric. For MATLAB, it's 1-based indexing. + # This means if iSelect=[2,3], we are selecting the 2nd, 3rd elements of mt/Cp. + iSelect_matlab = test_engine.convert(iSelect, index = "to_python") + + nWindows_matlab = test_engine.convert(nWindows) + nStrata_matlab = test_engine.convert(nStrata) + nBoot_matlab = test_engine.convert(nBoot) + + # Call the MATLAB function tW_mat, CpW_mat = test_engine.aggregateSeasonalMeans( - test_engine.convert(mt), # mt - test_engine.convert(Cp), # Cp - test_engine.convert(xmt), # xmt - test_engine.convert(iSelect), - test_engine.convert(nWindows), - test_engine.convert(nStrata), - test_engine.convert(nBoot), + mt_matlab, + Cp_matlab, + xmt_matlab, + iSelect_matlab, + nWindows_matlab, + nStrata_matlab, + nBoot_matlab, nargout=2 ) - # Convert outputs from MATLAB arrays back to Python floats/arrays - # MATLAB returns "double" arrays. We turn them into NumPy arrays. + # Convert MATLAB outputs to NumPy arrays tW = np.array(tW_mat).flatten() CpW = np.array(CpW_mat).flatten() - # Compare the results with the expected values (within a tolerance). - # Adjust `rtol`, `atol` as needed, or do exact match if you expect integer results. + # Compare the results with the expected values assert np.allclose(tW, expected_tW, rtol=1e-5, atol=1e-8), \ f"tW mismatch: got {tW}, expected {expected_tW}" assert np.allclose(CpW, expected_CpW, rtol=1e-5, atol=1e-8), \ - f"CpW mismatch: got {CpW}, expected {expected_CpW}" \ No newline at end of file + f"CpW mismatch: got {CpW}, expected {expected_CpW}" diff --git a/tests/unit_tests/test_ustar_cp/test_computeStandardisedScores.py b/tests/unit_tests/test_ustar_cp/test_computeStandardisedScores.py index f709236c..3dcbdbc4 100644 --- a/tests/unit_tests/test_ustar_cp/test_computeStandardisedScores.py +++ b/tests/unit_tests/test_ustar_cp/test_computeStandardisedScores.py @@ -1,18 +1,110 @@ import pytest import numpy as np -@pytest.mark.parametrize("input_data, expected_rows, expected_columns", [ - (np.random.randn(100, 5), 100, 1), - (100*np.ones((100, 5)), 100, 1), -]) +@pytest.mark.parametrize( + "input_data, expected_xNormX", + [ + # + # Test 1: 2x3 matrix, no NaNs + # + # x = [[1,2,3], + # [4,5,6]] + # + # For each column (with only 2 values): + # median => (value1 + value2)/2 + # iqr => 75th - 25th percentile => typically 1.5 for each col + # + # => xNorm => row0 => [-1, -1, -1] + # row1 => [ 1, 1, 1] + # => xNormX => [1, 1] + # + ( + [ + [1, 2, 3], + [4, 5, 6] + ], + [[np.nan],[np.nan]], + ), -def test_compute_standardized_scores(test_engine, input_data, expected_rows, expected_columns): - """Test that computeStandardizedScores executes correctly and returns expected values.""" - - # Call MATLAB function - result = test_engine.computeStandardizedScores(input_data, nargout=1) + # + # Test 2: 3x3 matrix, no NaNs + # + # x = [[1,2,3], + # [4,5,6], + # [7,8,9]] + # + # column 0 => median=4, iqr=3 + # column 1 => median=5, iqr=3 + # column 2 => median=6, iqr=3 + # + # => xNorm => row0 => [-1, -1, -1] + # row1 => [ 0, 0, 0] + # row2 => [ 1, 1, 1] + # => xNormX => [1, 0, 1] + # + ( + [ + [1, 2, 3], + [4, 5, 6], + [7, 8, 9], + ], + [[np.nan],[np.nan],[np.nan]], + ), + + # + # Test 3: 3x3 with a NaN in the middle row + # + # x = [[1, 2, 3], + # [4, NaN, 6], + # [7, 8, 9]] + # + # We already reasoned that row0 => xNorm => [-1, -1, -1] + # row1 => => [0, NaN, 0] => max => NaN + # row2 => => [1, 1, 1] + # => xNormX => [1, NaN, 1] + # + ( + [ + [1, 2, 3], + [4, np.nan,6], + [7, 8, 9], + ], + [[np.nan],[np.nan],[np.nan]], + ), + + # + # Test 4: 3x1 matrix + # + # x = [[1], + # [2], + # [5]] + # + # => col median=2, iqr= (3.5-1.5)=2 + # => row0 => (1-2)/2 = -0.5 => abs=0.5 + # row1 => (2-2)/2 = 0 + # row2 => (5-2)/2 = 1.5 => abs=1.5 + # => xNormX => [0.5, 0.0, 1.5] + # + ( + [ + [1], + [2], + [5], + ], + [[0.3333333333333333],[0.0],[1.0]], + ), + ] +) +def test_computeStandardizedScores(test_engine, input_data, expected_xNormX): + """ + Test the MATLAB function computeStandardizedScores by calling it + via the MATLAB Engine. We verify xNormX row-by-row against + expected values, including NaNs. + """ - len_result = len(test_engine.convert(result)) + # Call the MATLAB function computeStandardizedScores + xNormX = test_engine.computeStandardizedScores(test_engine.convert(input_data), nargout=1) - assert test_engine.equal(len_result, test_engine.convert(expected_rows)) - \ No newline at end of file + assert test_engine.equal(xNormX, test_engine.convert(expected_xNormX)) + + From cc857a5fafecef0a710ce0ebb41b2a4ff42130f0 Mon Sep 17 00:00:00 2001 From: Dominic Orchard Date: Thu, 20 Feb 2025 17:14:37 +0000 Subject: [PATCH 16/27] fix a test --- tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py index 4fa69872..73b062fb 100644 --- a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py +++ b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py @@ -98,7 +98,7 @@ def set_attr(struct_array, key, val): ([0.5, 1.2, 3.5, 0.1, 2.8], 2.0, [[False, False, True, False, True]], [[3.0, 5.0]]), # Basic test ([0.1, 0.2, 0.3], 1.0, [[False, False, False]], [[]]), # No outliers ([3.1, 2.9, 3.5], 2.0, [[True, True, True]], [[1, 2, 3]]), # All outliers - ([], 2.0, np.bool([]), []), # Empty input case + ([], 2.0, [], []), # Empty input case ([-3, -2, -1, 0, 1, 2, 3], -1.0, [[False, False, False, True, True, True, True]], [[4.0, 5.0, 6.0, 7.0]]), # Negative threshold ] ) From be114f216f99c95feb648e60b3d7d819de242fd8 Mon Sep 17 00:00:00 2001 From: Dominic Orchard Date: Thu, 20 Feb 2025 21:48:30 +0000 Subject: [PATCH 17/27] convert booleans before int, due to the fact that bools look like ints to Python --- tests/conftest.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index b15fc3ce..37b22d02 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -588,10 +588,10 @@ def to_matlab_type(data: Any) -> Any: return data.tolist() # Convert non-numeric arrays to lists elif isinstance(data, list): # Convert Python list to MATLAB double array if all elements are numbers - if all(isinstance(elem, (int, float)) for elem in flatten(data)): - return matlab.double(data) - elif all(isinstance(elem, (bool)) for elem in flatten(data)): + if all(isinstance(elem, (bool)) for elem in flatten(data)): return matlab.logical(data) + elif all(isinstance(elem, (int, float)) for elem in flatten(data)): + return matlab.double(data) else: # Create a cell array for lists containing non-numeric data return [to_matlab_type(elem) for elem in data] From 60887fe97fa7288dcc6bf89850bb68bc820765ec Mon Sep 17 00:00:00 2001 From: Dominic Orchard Date: Thu, 20 Feb 2025 21:49:41 +0000 Subject: [PATCH 18/27] fix aggregateSeasonalAndAnnualValues 2d case - and its test --- .../aggregateSeasonalAndAnnualValues.py | 18 +- .../test_cpdAssignUStarTh20100901.py | 303 +++++++++--------- 2 files changed, 171 insertions(+), 150 deletions(-) diff --git a/oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py b/oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py index a53e323d..4943533e 100644 --- a/oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py +++ b/oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py @@ -35,23 +35,27 @@ def aggregateSeasonalAndAnnualValues( xCpSelect : np.ndarray Same shape as xCp, with NaN everywhere except the selected positions. """ - # Convert inputs to float arrays - xCp = np.asarray(xCp, dtype=float) + # # Convert inputs to float arrays + # xCp = np.asarray(xCp, dtype=float) # Prepare an output array (same shape) filled with NaNs xCpSelect = np.full_like(xCp, np.nan, dtype=float) # Flatten both arrays in column-major (Fortran) order to replicate MATLAB indexing - xCp_flat_F = xCp.flatten(order='F') - xCpSelect_flat_F = xCpSelect.flatten(order='F') +# xCp_flat_F = xCp.flatten(order='F') +# xCpSelect_flat_F = xCpSelect.flatten(order='F') iSelect_array = np.asarray(iSelect, dtype=int) # Assign the selected change points in the flattened array - xCpSelect_flat_F[iSelect_array] = xCp_flat_F[iSelect_array] - + # mask xCp using iSelect_array + xCpSelect = xCp.copy() + for i in range(len(xCp)): + if iSelect_array[i] == 0: + xCpSelect[i] = np.nan + # Reshape back to original shape (column-major) - xCpSelect = xCpSelect_flat_F.reshape(xCp.shape, order='F') + #xCpSelect = xCpSelect_flat_F.reshape(xCp.shape, order='F') xCpGF = xCpSelect # Just like the MATLAB code # Aggregate values based on dimensions diff --git a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py index 73b062fb..58d9796f 100644 --- a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py +++ b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py @@ -3,164 +3,181 @@ import numpy as np -@pytest.fixture(scope="module") -def mock_data(test_engine, nt=300, tspan=(0, 1), uStar_pars=(0.1, 3.5), T_pars=(-10, 30), fNight=None): - """ - Fixture to generate mock time series data for testing purposes. This fixture - creates a set of synthetic data typically used in environmental studies, - such as Net Ecosystem Exchange (NEE), uStar values, temperature, and day/night flags. - - Args: - nt (int, optional): Number of time points to generate in the series. - Defaults to 300. - tspan (tuple, optional): Start and end time for the time vector. - Defaults to (0, 1). - uStar_pars (tuple, optional): Minimum and maximum values for the random - generation of u* values. Defaults to (0.1, 3.5). - T_pars (tuple, optional): Minimum and maximum values for the random - generation of temperature values. Defaults to (-10, 30). - fNight (array-like or None, optional): Binary values indicating day (0) or - night (1). If None, a random assignment - is made. Defaults to None. - - Returns: - tuple: A tuple containing: - - t (np.ndarray): Time vector of length `nt`. - - NEE (np.ndarray): Randomly generated Net Ecosystem Exchange values. - - uStar (np.ndarray): Randomly generated u* values. - - T (np.ndarray): Randomly generated temperature values. - - fNight (np.ndarray): Array indicating day (0) or night (1) conditions. - """ - fPlot = 0 - nBoot = 10 - cSiteYr = "Site_2024" - t = np.linspace(*tspan, nt) # Generate time vector - uStar = np.random.uniform(*uStar_pars, size=nt) # u* values between typical ranges - NEE = np.random.uniform(-10, 10, size=nt) # Random NEE values - T = np.random.uniform(*T_pars, size=nt) # Temperature values - if fNight is None: - fNight = np.random.choice([0, 1], size=nt) # Randomly assign day/night - else: - fNight = np.resize(fNight, nt) - - Cp2, Stats2, Cp3, Stats3 = test_engine.cpdBootstrapUStarTh4Season20100901( - t,NEE,uStar,T,fNight,fPlot,cSiteYr,nBoot,jsonencode=[1,3],nargout=4 - ) - return Stats2, fPlot, cSiteYr - - -def test_cpdAssignUStarTh20100901_basic(test_engine, mock_data): - stats, fPlot, cSiteYr = mock_data - - # Call MATLAB function - CpA, nA, tW, CpW, cMode, cFailure, fSelect, sSine, FracSig, FracModeD, FracSelect = test_engine.cpdAssignUStarTh20100901(stats, fPlot, cSiteYr, jsondecode=[0], nargout=11) - - # Assertions - assert isinstance(CpA, matlab.double), "CpA should be a MATLAB double array" - assert isinstance(nA, matlab.double), "nA should be a MATLAB double array" - assert isinstance(tW, matlab.double), "tW should be a MATLAB double array" - assert isinstance(CpW, matlab.double), "CpW should be a MATLAB double array" - assert isinstance(cMode, str), "cMode should be a string" - assert isinstance(cFailure, str), "cFailure should be a string" - assert isinstance(fSelect, matlab.logical), "fSelect should be a MATLAB logical array" - assert isinstance(sSine, matlab.double), "sSine should be a MATLAB double array" - assert isinstance(FracSig, float), "FracSig should be a float" - assert isinstance(FracModeD, float), "FracModeD should be a float" - assert isinstance(FracSelect, float), "FracSelect should be a float" - - -def test_cpdAssignUStarTh20100901_edge_cases(test_engine, mock_data): - def set_attr(struct_array, key, val): - np.vectorize(lambda x: x.update({key: val}))(struct_array) - - mock_stats, fPlot, cSiteYr = mock_data - edge_stats = mock_stats.copy() - - # Case 1: All significant change points - set_attr(edge_stats, "p", 0) - assert edge_stats[0][0][0]['p'] == 0 - - results_all_sig = test_engine.cpdAssignUStarTh20100901(edge_stats, 0, "AllSig_2024", jsondecode=[0], nargout=11) - - # Case 2: No significant change points - set_attr(edge_stats, "p", 1) - assert edge_stats[0][0][0]['p'] == 1 - - results_no_sig = test_engine.cpdAssignUStarTh20100901(edge_stats, 0, "NoSig_2024", jsondecode=[0], nargout=11) - - # Assertions for edge cases - assert len(results_all_sig[0]) > 0, "Should produce results for all significant change points" - assert len(results_no_sig[5]) > 0, "Should produce a failure message for no significant change points" - -@pytest.mark.parametrize( - "x_norm_x, threshold, expected_f_out, expected_i_out", - [ - ([0.5, 1.2, 3.5, 0.1, 2.8], 2.0, [[False, False, True, False, True]], [[3.0, 5.0]]), # Basic test - ([0.1, 0.2, 0.3], 1.0, [[False, False, False]], [[]]), # No outliers - ([3.1, 2.9, 3.5], 2.0, [[True, True, True]], [[1, 2, 3]]), # All outliers - ([], 2.0, [], []), # Empty input case - ([-3, -2, -1, 0, 1, 2, 3], -1.0, [[False, False, False, True, True, True, True]], [[4.0, 5.0, 6.0, 7.0]]), # Negative threshold - ] -) -def test_identify_outliers(test_engine, x_norm_x, threshold, expected_f_out, expected_i_out): - """Test MATLAB's identifyOutliers function from Python using MATLAB Engine.""" - - - f_out, i_out = test_engine.identifyOutliers(test_engine.convert(x_norm_x), test_engine.convert(threshold), nargout=2) - - assert test_engine.equal(f_out, test_engine.convert(expected_f_out)), "Boolean outlier array does not match expected" - assert test_engine.equal(i_out, test_engine.convert(expected_i_out)), "Index output does not match expected" - - -def test_aggregate_2d_case(matlab_engine): - """Test function for 2D case (nDim=2).""" - eng = matlab_engine +# @pytest.fixture(scope="module") +# def mock_data(test_engine, nt=300, tspan=(0, 1), uStar_pars=(0.1, 3.5), T_pars=(-10, 30), fNight=None): +# """ +# Fixture to generate mock time series data for testing purposes. This fixture +# creates a set of synthetic data typically used in environmental studies, +# such as Net Ecosystem Exchange (NEE), uStar values, temperature, and day/night flags. + +# Args: +# nt (int, optional): Number of time points to generate in the series. +# Defaults to 300. +# tspan (tuple, optional): Start and end time for the time vector. +# Defaults to (0, 1). +# uStar_pars (tuple, optional): Minimum and maximum values for the random +# generation of u* values. Defaults to (0.1, 3.5). +# T_pars (tuple, optional): Minimum and maximum values for the random +# generation of temperature values. Defaults to (-10, 30). +# fNight (array-like or None, optional): Binary values indicating day (0) or +# night (1). If None, a random assignment +# is made. Defaults to None. + +# Returns: +# tuple: A tuple containing: +# - t (np.ndarray): Time vector of length `nt`. +# - NEE (np.ndarray): Randomly generated Net Ecosystem Exchange values. +# - uStar (np.ndarray): Randomly generated u* values. +# - T (np.ndarray): Randomly generated temperature values. +# - fNight (np.ndarray): Array indicating day (0) or night (1) conditions. +# """ +# fPlot = 0 +# nBoot = 10 +# cSiteYr = "Site_2024" +# t = np.linspace(*tspan, nt) # Generate time vector +# uStar = np.random.uniform(*uStar_pars, size=nt) # u* values between typical ranges +# NEE = np.random.uniform(-10, 10, size=nt) # Random NEE values +# T = np.random.uniform(*T_pars, size=nt) # Temperature values +# if fNight is None: +# fNight = np.random.choice([0, 1], size=nt) # Randomly assign day/night +# else: +# fNight = np.resize(fNight, nt) + +# Cp2, Stats2, Cp3, Stats3 = test_engine.cpdBootstrapUStarTh4Season20100901( +# t,NEE,uStar,T,fNight,fPlot,cSiteYr,nBoot,jsonencode=[1,3],nargout=4 +# ) +# return Stats2, fPlot, cSiteYr + + +# def test_cpdAssignUStarTh20100901_basic(test_engine, mock_data): +# stats, fPlot, cSiteYr = mock_data + +# # Call MATLAB function +# CpA, nA, tW, CpW, cMode, cFailure, fSelect, sSine, FracSig, FracModeD, FracSelect = test_engine.cpdAssignUStarTh20100901(stats, fPlot, cSiteYr, jsondecode=[0], nargout=11) + +# # Assertions +# assert isinstance(CpA, matlab.double), "CpA should be a MATLAB double array" +# assert isinstance(nA, matlab.double), "nA should be a MATLAB double array" +# assert isinstance(tW, matlab.double), "tW should be a MATLAB double array" +# assert isinstance(CpW, matlab.double), "CpW should be a MATLAB double array" +# assert isinstance(cMode, str), "cMode should be a string" +# assert isinstance(cFailure, str), "cFailure should be a string" +# assert isinstance(fSelect, matlab.logical), "fSelect should be a MATLAB logical array" +# assert isinstance(sSine, matlab.double), "sSine should be a MATLAB double array" +# assert isinstance(FracSig, float), "FracSig should be a float" +# assert isinstance(FracModeD, float), "FracModeD should be a float" +# assert isinstance(FracSelect, float), "FracSelect should be a float" + + +# def test_cpdAssignUStarTh20100901_edge_cases(test_engine, mock_data): +# def set_attr(struct_array, key, val): +# np.vectorize(lambda x: x.update({key: val}))(struct_array) + +# mock_stats, fPlot, cSiteYr = mock_data +# edge_stats = mock_stats.copy() + +# # Case 1: All significant change points +# set_attr(edge_stats, "p", 0) +# assert edge_stats[0][0][0]['p'] == 0 + +# results_all_sig = test_engine.cpdAssignUStarTh20100901(edge_stats, 0, "AllSig_2024", jsondecode=[0], nargout=11) + +# # Case 2: No significant change points +# set_attr(edge_stats, "p", 1) +# assert edge_stats[0][0][0]['p'] == 1 + +# results_no_sig = test_engine.cpdAssignUStarTh20100901(edge_stats, 0, "NoSig_2024", jsondecode=[0], nargout=11) + +# # Assertions for edge cases +# assert len(results_all_sig[0]) > 0, "Should produce results for all significant change points" +# assert len(results_no_sig[5]) > 0, "Should produce a failure message for no significant change points" + +# @pytest.mark.parametrize( +# "x_norm_x, threshold, expected_f_out, expected_i_out", +# [ +# ([0.5, 1.2, 3.5, 0.1, 2.8], 2.0, [[False, False, True, False, True]], [[3.0, 5.0]]), # Basic test +# ([0.1, 0.2, 0.3], 1.0, [[False, False, False]], [[]]), # No outliers +# ([3.1, 2.9, 3.5], 2.0, [[True, True, True]], [[1, 2, 3]]), # All outliers +# ([], 2.0, [], []), # Empty input case +# ([-3, -2, -1, 0, 1, 2, 3], -1.0, [[False, False, False, True, True, True, True]], [[4.0, 5.0, 6.0, 7.0]]), # Negative threshold +# ] +# ) +# def test_identify_outliers(test_engine, x_norm_x, threshold, expected_f_out, expected_i_out): +# """Test MATLAB's identifyOutliers function from Python using MATLAB Engine.""" + + +# f_out, i_out = test_engine.identifyOutliers(test_engine.convert(x_norm_x), test_engine.convert(threshold), nargout=2) + +# assert test_engine.equal(f_out, test_engine.convert(expected_f_out)), "Boolean outlier array does not match expected" +# assert test_engine.equal(i_out, test_engine.convert(expected_i_out)), "Index output does not match expected" + + +# def test_aggregate_2d_case(matlab_engine): +# """Test function for 2D case (nDim=2).""" +# eng = matlab_engine - # Define test inputs - xCp = matlab.double([1.0, 2.0, np.nan, 3.0, np.nan, 4.0]) # MATLAB double array - iSelect = matlab.logical([1, 1, 0, 1, 0, 1]) # Boolean mask in MATLAB logical array - nDim = 2 - nWindows = 0 # Unused for 2D case - nStrata = 0 # Unused for 2D case - nBoot = 0 # Unused for 2D case - - # Run MATLAB function - CpA, nA, xCpSelect = eng.aggregateSeasonalAndAnnualValues( - xCp, iSelect, nDim, nWindows, nStrata, nBoot, nargout=3 - ) - - # Convert MATLAB output to NumPy for assertions - CpA = np.array(CpA) - nA = int(nA) - xCpSelect = np.array(xCpSelect) - - # Expected values - expected_CpA = np.nanmean([1.0, 2.0, 3.0, 4.0]) # Mean of selected change points - expected_nA = 4 # Count of non-NaN values in the selected array - - # Assertions - np.testing.assert_almost_equal(CpA, expected_CpA, decimal=5) - assert nA == expected_nA - assert np.isnan(xCpSelect[2]) # Check if unselected indices remain NaN +# # Define test inputs +# xCp = matlab.double([1.0, 2.0, np.nan, 3.0, np.nan, 4.0]) # MATLAB double array +# iSelect = matlab.logical([1, 1, 0, 1, 0, 1]) # Boolean mask in MATLAB logical array +# nDim = 2 +# nWindows = 0 # Unused for 2D case +# nStrata = 0 # Unused for 2D case +# nBoot = 0 # Unused for 2D case + +# # Run MATLAB function +# CpA, nA, xCpSelect = eng.aggregateSeasonalAndAnnualValues( +# xCp, iSelect, nDim, nWindows, nStrata, nBoot, nargout=3 +# ) + +# # Convert MATLAB output to NumPy for assertions +# CpA = np.array(CpA) +# nA = int(nA) +# xCpSelect = np.array(xCpSelect) + +# # Expected values +# expected_CpA = np.nanmean([1.0, 2.0, 3.0, 4.0]) # Mean of selected change points +# expected_nA = 4 # Count of non-NaN values in the selected array + +# # Assertions +# np.testing.assert_almost_equal(CpA, expected_CpA, decimal=5) +# assert nA == expected_nA +# assert np.isnan(xCpSelect[2]) # Check if unselected indices remain NaN + +def toLogical(xs): + def convert(x): + if (x == 0): + False + else: + True + return list(map(lambda x: list(map(convert, x)), xs)) @pytest.mark.parametrize( "xCp, iSelect, nDim, nWindows, nStrata, nBoot, expected_CpA, expected_nA, expected_xCpSelect", [ - ([1.0, 2.0, np.nan, 3.0, np.nan, 4.0], matlab.logical([1, 1, 0, 1, 0, 1]), 2, 0, 0, 0, [2, np.nan, 4], [2, 0, 1], [[1, np.nan, np.nan], [3, np.nan, 4]]), - ([[1.0, np.nan, 3.0],[4.0, 5.0, np.nan]], matlab.logical([[1, 0, 1],[0, 1, 0]]), 3, 2, 1, 3, [1.0, 5.0, 3.0], [1.0, 1.0, 1.0], [[1, np.nan, 3.0], [np.nan, 5.0, np.nan]]) + ([1.0, 2.0, np.nan, 3.0, np.nan, 4.0] + , [True, True, False, True, False, True] + , 2, 0, 0, 0 + # expected + #, 2, np.nan, 4], [2, 0, 1], [[1, np.nan, np.nan], [3, np.nan, 4]] + , 2.5, 4.0, [1.0,2.0,np.nan,3.0,np.nan,4.0] + ) + , ([[1.0, np.nan, 3.0],[4.0, 5.0, np.nan]] + , ([[True, False, True],[False, True, False]]) + , 3, 2, 1, 3 + # expected + , [1.0, 5.0, 3.0], [1.0, 1.0, 1.0], [[1, np.nan, 3.0], [np.nan, 5.0, np.nan]]) ] ) - def test_aggregate_3d_case(test_engine, xCp, iSelect, nDim, nWindows, nStrata, nBoot, expected_CpA, expected_nA, expected_xCpSelect): """Test function for aggregateSeasonalAndAnnualValues 2D and 3D cases.""" # Run MATLAB function CpA, nA, xCpSelect = test_engine.aggregateSeasonalAndAnnualValues( - test_engine.convert(xCp), iSelect, nDim, nWindows, nStrata, nBoot, nargout=3 + test_engine.convert(xCp), test_engine.convert(iSelect), nDim, nWindows, test_engine.convert(nStrata), test_engine.convert(nBoot), nargout=3 ) assert test_engine.equal(CpA, expected_CpA) assert test_engine.equal(nA, expected_nA) - assert test_engine.equal(xCpSelect, expected_xCpSelect) + assert test_engine.equal(xCpSelect, test_engine.convert(expected_xCpSelect)) \ No newline at end of file From cd6d451244f1fa4773f51d8d0365bb1a6dd760df Mon Sep 17 00:00:00 2001 From: Dominic Orchard Date: Fri, 21 Feb 2025 10:54:48 +0000 Subject: [PATCH 19/27] fix up tests and code for aggregateSeasonalAndAnnualValues --- .../aggregateSeasonalAndAnnualValues.py | 50 +++++++++---------- .../test_cpdAssignUStarTh20100901.py | 13 +---- 2 files changed, 27 insertions(+), 36 deletions(-) diff --git a/oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py b/oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py index 4943533e..7e1993ad 100644 --- a/oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py +++ b/oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py @@ -14,11 +14,9 @@ def aggregateSeasonalAndAnnualValues( Parameters ---------- xCp : array-like - In MATLAB, can be 2D [nWindows, nBoot] or 3D [nWindows, nStrata, nBoot]. - In Python, pass a nested list or NumPy array with the same shape. + Can be 2D [nWindows, nBoot] or 3D [nWindows, nStrata, nBoot]. iSelect : array-like of int 1-based linear indices in MATLAB's column-major ordering that should be selected. - We subtract 1 and interpret them in the same column-major order in Python. nDim : int 2 or 3 (to indicate if xCp is 2D or 3D). nWindows : int @@ -35,39 +33,41 @@ def aggregateSeasonalAndAnnualValues( xCpSelect : np.ndarray Same shape as xCp, with NaN everywhere except the selected positions. """ - # # Convert inputs to float arrays - # xCp = np.asarray(xCp, dtype=float) - # Prepare an output array (same shape) filled with NaNs xCpSelect = np.full_like(xCp, np.nan, dtype=float) - # Flatten both arrays in column-major (Fortran) order to replicate MATLAB indexing -# xCp_flat_F = xCp.flatten(order='F') -# xCpSelect_flat_F = xCpSelect.flatten(order='F') - + # Treat selection arrays an array of integers + # (this allows both an array of booleans and an array of integers to be used + # for iselect) iSelect_array = np.asarray(iSelect, dtype=int) - # Assign the selected change points in the flattened array - # mask xCp using iSelect_array - xCpSelect = xCp.copy() - for i in range(len(xCp)): - if iSelect_array[i] == 0: - xCpSelect[i] = np.nan - - # Reshape back to original shape (column-major) - #xCpSelect = xCpSelect_flat_F.reshape(xCp.shape, order='F') - xCpGF = xCpSelect # Just like the MATLAB code - # Aggregate values based on dimensions if nDim == 2: + # mask xCp using iSelect_array + xCpSelect = xCp.copy() + for i in range(len(xCp)): + if iSelect_array[i] == 0: + xCpSelect[i] = np.nan + + xCpGF = xCpSelect # naming convention + # xCp shape = [nWindows, nBoot] - # MATLAB default nanmean(xCpGF) => mean across axis=0 in Python - CpA = np.nanmean(xCpGF, axis=0) - nA = np.sum(~np.isnan(xCpSelect), axis=0) + CpA = np.nanmean(xCpGF) + nA = np.sum(~np.isnan(xCpSelect)) elif nDim == 3: + + # mask xCp using iSelect_array + xCpSelect = xCp.copy() + for i in range(len(xCp)): + for j in range(len(xCp[i])): + if iSelect_array[i][j] == 0: + xCpSelect[i][j] = np.nan + + xCpGF = xCpSelect # Naming convention + # xCp shape = [nWindows, nStrata, nBoot] # reshape => (nWindows*nStrata, nBoot) in column-major - xCpGF_reshaped = xCpGF.reshape(nWindows * nStrata, nBoot, order='F') + xCpGF_reshaped = np.reshape(xCpGF, (nWindows * nStrata, nBoot), order='F') CpA = np.nanmean(xCpGF_reshaped, axis=0) nA = np.sum(~np.isnan(xCpGF_reshaped), axis=0) else: diff --git a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py index 58d9796f..fcb5ff09 100644 --- a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py +++ b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py @@ -143,14 +143,6 @@ # assert nA == expected_nA # assert np.isnan(xCpSelect[2]) # Check if unselected indices remain NaN -def toLogical(xs): - def convert(x): - if (x == 0): - False - else: - True - return list(map(lambda x: list(map(convert, x)), xs)) - @pytest.mark.parametrize( "xCp, iSelect, nDim, nWindows, nStrata, nBoot, expected_CpA, expected_nA, expected_xCpSelect", [ @@ -158,7 +150,6 @@ def convert(x): , [True, True, False, True, False, True] , 2, 0, 0, 0 # expected - #, 2, np.nan, 4], [2, 0, 1], [[1, np.nan, np.nan], [3, np.nan, 4]] , 2.5, 4.0, [1.0,2.0,np.nan,3.0,np.nan,4.0] ) , ([[1.0, np.nan, 3.0],[4.0, 5.0, np.nan]] @@ -178,6 +169,6 @@ def test_aggregate_3d_case(test_engine, xCp, iSelect, nDim, nWindows, nStrata, n test_engine.convert(xCp), test_engine.convert(iSelect), nDim, nWindows, test_engine.convert(nStrata), test_engine.convert(nBoot), nargout=3 ) - assert test_engine.equal(CpA, expected_CpA) - assert test_engine.equal(nA, expected_nA) + assert test_engine.equal(CpA, test_engine.convert(expected_CpA)) + assert test_engine.equal(nA, test_engine.convert(expected_nA)) assert test_engine.equal(xCpSelect, test_engine.convert(expected_xCpSelect)) \ No newline at end of file From 123782e6483f3a0f5da61a2285d132b7f2a58aaa Mon Sep 17 00:00:00 2001 From: Dominic Orchard Date: Fri, 21 Feb 2025 11:29:12 +0000 Subject: [PATCH 20/27] remove a duplicate test already covered by a fixture --- .../test_cpdAssignUStarTh20100901.py | 32 ------------------- 1 file changed, 32 deletions(-) diff --git a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py index fcb5ff09..1a1ff49d 100644 --- a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py +++ b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py @@ -111,38 +111,6 @@ # assert test_engine.equal(f_out, test_engine.convert(expected_f_out)), "Boolean outlier array does not match expected" # assert test_engine.equal(i_out, test_engine.convert(expected_i_out)), "Index output does not match expected" - -# def test_aggregate_2d_case(matlab_engine): -# """Test function for 2D case (nDim=2).""" -# eng = matlab_engine - -# # Define test inputs -# xCp = matlab.double([1.0, 2.0, np.nan, 3.0, np.nan, 4.0]) # MATLAB double array -# iSelect = matlab.logical([1, 1, 0, 1, 0, 1]) # Boolean mask in MATLAB logical array -# nDim = 2 -# nWindows = 0 # Unused for 2D case -# nStrata = 0 # Unused for 2D case -# nBoot = 0 # Unused for 2D case - -# # Run MATLAB function -# CpA, nA, xCpSelect = eng.aggregateSeasonalAndAnnualValues( -# xCp, iSelect, nDim, nWindows, nStrata, nBoot, nargout=3 -# ) - -# # Convert MATLAB output to NumPy for assertions -# CpA = np.array(CpA) -# nA = int(nA) -# xCpSelect = np.array(xCpSelect) - -# # Expected values -# expected_CpA = np.nanmean([1.0, 2.0, 3.0, 4.0]) # Mean of selected change points -# expected_nA = 4 # Count of non-NaN values in the selected array - -# # Assertions -# np.testing.assert_almost_equal(CpA, expected_CpA, decimal=5) -# assert nA == expected_nA -# assert np.isnan(xCpSelect[2]) # Check if unselected indices remain NaN - @pytest.mark.parametrize( "xCp, iSelect, nDim, nWindows, nStrata, nBoot, expected_CpA, expected_nA, expected_xCpSelect", [ From 3e53db36075127e9164947434aa021e790b79d91 Mon Sep 17 00:00:00 2001 From: Dominic Orchard Date: Fri, 21 Feb 2025 11:44:14 +0000 Subject: [PATCH 21/27] fix bug in conftest --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 37b22d02..5ea39925 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -155,7 +155,7 @@ def convert(self, x, index=False, fromFile=False): if isinstance(x, list) and all(isinstance(item, (int, float)) for item in x): return np.asarray(x) if all(isinstance(i, bool) for i in x[0]): - return np.array(x).astype(np.bool) + return np.array(x).astype(bool) elif isinstance(x[0], list): return np.array(x[0]) else: From 6384b5adff0ce4e0667a92ba7cf8fb10620cc0b5 Mon Sep 17 00:00:00 2001 From: Dominic Orchard Date: Fri, 21 Feb 2025 11:44:39 +0000 Subject: [PATCH 22/27] make identifyOutliers test language agnostic --- .../test_cpdAssignUStarTh20100901.py | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py index 1a1ff49d..645db606 100644 --- a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py +++ b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py @@ -92,24 +92,24 @@ # assert len(results_all_sig[0]) > 0, "Should produce results for all significant change points" # assert len(results_no_sig[5]) > 0, "Should produce a failure message for no significant change points" -# @pytest.mark.parametrize( -# "x_norm_x, threshold, expected_f_out, expected_i_out", -# [ -# ([0.5, 1.2, 3.5, 0.1, 2.8], 2.0, [[False, False, True, False, True]], [[3.0, 5.0]]), # Basic test -# ([0.1, 0.2, 0.3], 1.0, [[False, False, False]], [[]]), # No outliers -# ([3.1, 2.9, 3.5], 2.0, [[True, True, True]], [[1, 2, 3]]), # All outliers -# ([], 2.0, [], []), # Empty input case -# ([-3, -2, -1, 0, 1, 2, 3], -1.0, [[False, False, False, True, True, True, True]], [[4.0, 5.0, 6.0, 7.0]]), # Negative threshold -# ] -# ) -# def test_identify_outliers(test_engine, x_norm_x, threshold, expected_f_out, expected_i_out): -# """Test MATLAB's identifyOutliers function from Python using MATLAB Engine.""" - - -# f_out, i_out = test_engine.identifyOutliers(test_engine.convert(x_norm_x), test_engine.convert(threshold), nargout=2) - -# assert test_engine.equal(f_out, test_engine.convert(expected_f_out)), "Boolean outlier array does not match expected" -# assert test_engine.equal(i_out, test_engine.convert(expected_i_out)), "Index output does not match expected" +@pytest.mark.parametrize( + "x_norm_x, threshold, expected_f_out, expected_i_out", + [ + ([0.5, 1.2, 3.5, 0.1, 2.8], 2.0, [[False, False, True, False, True]], [[3.0, 5.0]]), # Basic test + ([0.1, 0.2, 0.3], 1.0, [[False, False, False]], [[]]), # No outliers + ([3.1, 2.9, 3.5], 2.0, [[True, True, True]], [[1, 2, 3]]), # All outliers + ([], 2.0, [], []), # Empty input case + ([-3, -2, -1, 0, 1, 2, 3], -1.0, [[False, False, False, True, True, True, True]], [[4.0, 5.0, 6.0, 7.0]]), # Negative threshold + ] +) +def test_identify_outliers(test_engine, x_norm_x, threshold, expected_f_out, expected_i_out): + """Test MATLAB's identifyOutliers function from Python using MATLAB Engine.""" + + + f_out, i_out = test_engine.identifyOutliers(test_engine.convert(x_norm_x), test_engine.convert(threshold), nargout=2) + + assert test_engine.equal(f_out, test_engine.convert(expected_f_out)), "Boolean outlier array does not match expected" + assert test_engine.equal(i_out, test_engine.convert(expected_i_out, index="to_python")), "Index output does not match expected" @pytest.mark.parametrize( "xCp, iSelect, nDim, nWindows, nStrata, nBoot, expected_CpA, expected_nA, expected_xCpSelect", From a3f49cf22b45a95db37e45d9fdce497c1a58d447 Mon Sep 17 00:00:00 2001 From: Dominic Orchard Date: Fri, 21 Feb 2025 11:52:43 +0000 Subject: [PATCH 23/27] make more tests available --- .../test_cpdAssignUStarTh20100901.py | 120 +++++++++--------- 1 file changed, 60 insertions(+), 60 deletions(-) diff --git a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py index 645db606..e3b6b534 100644 --- a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py +++ b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py @@ -3,50 +3,50 @@ import numpy as np -# @pytest.fixture(scope="module") -# def mock_data(test_engine, nt=300, tspan=(0, 1), uStar_pars=(0.1, 3.5), T_pars=(-10, 30), fNight=None): -# """ -# Fixture to generate mock time series data for testing purposes. This fixture -# creates a set of synthetic data typically used in environmental studies, -# such as Net Ecosystem Exchange (NEE), uStar values, temperature, and day/night flags. - -# Args: -# nt (int, optional): Number of time points to generate in the series. -# Defaults to 300. -# tspan (tuple, optional): Start and end time for the time vector. -# Defaults to (0, 1). -# uStar_pars (tuple, optional): Minimum and maximum values for the random -# generation of u* values. Defaults to (0.1, 3.5). -# T_pars (tuple, optional): Minimum and maximum values for the random -# generation of temperature values. Defaults to (-10, 30). -# fNight (array-like or None, optional): Binary values indicating day (0) or -# night (1). If None, a random assignment -# is made. Defaults to None. - -# Returns: -# tuple: A tuple containing: -# - t (np.ndarray): Time vector of length `nt`. -# - NEE (np.ndarray): Randomly generated Net Ecosystem Exchange values. -# - uStar (np.ndarray): Randomly generated u* values. -# - T (np.ndarray): Randomly generated temperature values. -# - fNight (np.ndarray): Array indicating day (0) or night (1) conditions. -# """ -# fPlot = 0 -# nBoot = 10 -# cSiteYr = "Site_2024" -# t = np.linspace(*tspan, nt) # Generate time vector -# uStar = np.random.uniform(*uStar_pars, size=nt) # u* values between typical ranges -# NEE = np.random.uniform(-10, 10, size=nt) # Random NEE values -# T = np.random.uniform(*T_pars, size=nt) # Temperature values -# if fNight is None: -# fNight = np.random.choice([0, 1], size=nt) # Randomly assign day/night -# else: -# fNight = np.resize(fNight, nt) - -# Cp2, Stats2, Cp3, Stats3 = test_engine.cpdBootstrapUStarTh4Season20100901( -# t,NEE,uStar,T,fNight,fPlot,cSiteYr,nBoot,jsonencode=[1,3],nargout=4 -# ) -# return Stats2, fPlot, cSiteYr +@pytest.fixture(scope="module") +def mock_data(test_engine, nt=300, tspan=(0, 1), uStar_pars=(0.1, 3.5), T_pars=(-10, 30), fNight=None): + """ + Fixture to generate mock time series data for testing purposes. This fixture + creates a set of synthetic data typically used in environmental studies, + such as Net Ecosystem Exchange (NEE), uStar values, temperature, and day/night flags. + + Args: + nt (int, optional): Number of time points to generate in the series. + Defaults to 300. + tspan (tuple, optional): Start and end time for the time vector. + Defaults to (0, 1). + uStar_pars (tuple, optional): Minimum and maximum values for the random + generation of u* values. Defaults to (0.1, 3.5). + T_pars (tuple, optional): Minimum and maximum values for the random + generation of temperature values. Defaults to (-10, 30). + fNight (array-like or None, optional): Binary values indicating day (0) or + night (1). If None, a random assignment + is made. Defaults to None. + + Returns: + tuple: A tuple containing: + - t (np.ndarray): Time vector of length `nt`. + - NEE (np.ndarray): Randomly generated Net Ecosystem Exchange values. + - uStar (np.ndarray): Randomly generated u* values. + - T (np.ndarray): Randomly generated temperature values. + - fNight (np.ndarray): Array indicating day (0) or night (1) conditions. + """ + fPlot = 0 + nBoot = 10 + cSiteYr = "Site_2024" + t = np.linspace(*tspan, nt) # Generate time vector + uStar = np.random.uniform(*uStar_pars, size=nt) # u* values between typical ranges + NEE = np.random.uniform(-10, 10, size=nt) # Random NEE values + T = np.random.uniform(*T_pars, size=nt) # Temperature values + if fNight is None: + fNight = np.random.choice([0, 1], size=nt) # Randomly assign day/night + else: + fNight = np.resize(fNight, nt) + + Cp2, Stats2, Cp3, Stats3 = test_engine.cpdBootstrapUStarTh4Season20100901( + t,NEE,uStar,T,fNight,fPlot,cSiteYr,nBoot,jsonencode=[1,3],nargout=4 + ) + return Stats2, fPlot, cSiteYr # def test_cpdAssignUStarTh20100901_basic(test_engine, mock_data): @@ -69,28 +69,28 @@ # assert isinstance(FracSelect, float), "FracSelect should be a float" -# def test_cpdAssignUStarTh20100901_edge_cases(test_engine, mock_data): -# def set_attr(struct_array, key, val): -# np.vectorize(lambda x: x.update({key: val}))(struct_array) +def test_cpdAssignUStarTh20100901_edge_cases(test_engine, mock_data): + def set_attr(struct_array, key, val): + np.vectorize(lambda x: x.update({key: val}))(struct_array) -# mock_stats, fPlot, cSiteYr = mock_data -# edge_stats = mock_stats.copy() + mock_stats, fPlot, cSiteYr = mock_data + edge_stats = mock_stats.copy() -# # Case 1: All significant change points -# set_attr(edge_stats, "p", 0) -# assert edge_stats[0][0][0]['p'] == 0 + # Case 1: All significant change points + set_attr(edge_stats, "p", 0) + assert edge_stats[0][0][0]['p'] == 0 -# results_all_sig = test_engine.cpdAssignUStarTh20100901(edge_stats, 0, "AllSig_2024", jsondecode=[0], nargout=11) + results_all_sig = test_engine.cpdAssignUStarTh20100901(edge_stats, 0, "AllSig_2024", jsondecode=[0], nargout=11) -# # Case 2: No significant change points -# set_attr(edge_stats, "p", 1) -# assert edge_stats[0][0][0]['p'] == 1 + # Case 2: No significant change points + set_attr(edge_stats, "p", 1) + assert edge_stats[0][0][0]['p'] == 1 -# results_no_sig = test_engine.cpdAssignUStarTh20100901(edge_stats, 0, "NoSig_2024", jsondecode=[0], nargout=11) + results_no_sig = test_engine.cpdAssignUStarTh20100901(edge_stats, 0, "NoSig_2024", jsondecode=[0], nargout=11) -# # Assertions for edge cases -# assert len(results_all_sig[0]) > 0, "Should produce results for all significant change points" -# assert len(results_no_sig[5]) > 0, "Should produce a failure message for no significant change points" + # Assertions for edge cases + assert len(results_all_sig[0]) > 0, "Should produce results for all significant change points" + assert len(results_no_sig[5]) > 0, "Should produce a failure message for no significant change points" @pytest.mark.parametrize( "x_norm_x, threshold, expected_f_out, expected_i_out", From a8369da8164e81ef62ae9eae1f051657d373fc18 Mon Sep 17 00:00:00 2001 From: Dominic Orchard Date: Fri, 21 Feb 2025 12:50:54 +0000 Subject: [PATCH 24/27] move helper files into the cpdAssignUStarTh file --- .../aggregateSeasonalAndAnnualValues.py | 76 ------- .../ustar_cp_python/aggregateSeasonalMeans.py | 109 ---------- .../ustar_cp_python/cpdAssignUStarTh.py | 189 +++++++++++++++++- oneflux_steps/ustar_cp_python/launch.py | 2 - 4 files changed, 187 insertions(+), 189 deletions(-) delete mode 100644 oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py delete mode 100644 oneflux_steps/ustar_cp_python/aggregateSeasonalMeans.py diff --git a/oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py b/oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py deleted file mode 100644 index 7e1993ad..00000000 --- a/oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py +++ /dev/null @@ -1,76 +0,0 @@ -import numpy as np - -def aggregateSeasonalAndAnnualValues( - xCp, - iSelect, - nDim, - nWindows, - nStrata, - nBoot -): - """ - Python equivalent of the MATLAB function aggregateSeasonalAndAnnualValues. - - Parameters - ---------- - xCp : array-like - Can be 2D [nWindows, nBoot] or 3D [nWindows, nStrata, nBoot]. - iSelect : array-like of int - 1-based linear indices in MATLAB's column-major ordering that should be selected. - nDim : int - 2 or 3 (to indicate if xCp is 2D or 3D). - nWindows : int - nStrata : int - nBoot : int - - Returns - ------- - CpA : np.ndarray - Aggregated mean of selected change points along the appropriate dimension. - For nDim=2, shape is (nBoot,). For nDim=3, shape is (nBoot,). - nA : np.ndarray - Count of non-NaN selected points. Same shape as CpA. - xCpSelect : np.ndarray - Same shape as xCp, with NaN everywhere except the selected positions. - """ - # Prepare an output array (same shape) filled with NaNs - xCpSelect = np.full_like(xCp, np.nan, dtype=float) - - # Treat selection arrays an array of integers - # (this allows both an array of booleans and an array of integers to be used - # for iselect) - iSelect_array = np.asarray(iSelect, dtype=int) - - # Aggregate values based on dimensions - if nDim == 2: - # mask xCp using iSelect_array - xCpSelect = xCp.copy() - for i in range(len(xCp)): - if iSelect_array[i] == 0: - xCpSelect[i] = np.nan - - xCpGF = xCpSelect # naming convention - - # xCp shape = [nWindows, nBoot] - CpA = np.nanmean(xCpGF) - nA = np.sum(~np.isnan(xCpSelect)) - elif nDim == 3: - - # mask xCp using iSelect_array - xCpSelect = xCp.copy() - for i in range(len(xCp)): - for j in range(len(xCp[i])): - if iSelect_array[i][j] == 0: - xCpSelect[i][j] = np.nan - - xCpGF = xCpSelect # Naming convention - - # xCp shape = [nWindows, nStrata, nBoot] - # reshape => (nWindows*nStrata, nBoot) in column-major - xCpGF_reshaped = np.reshape(xCpGF, (nWindows * nStrata, nBoot), order='F') - CpA = np.nanmean(xCpGF_reshaped, axis=0) - nA = np.sum(~np.isnan(xCpGF_reshaped), axis=0) - else: - raise ValueError("Invalid number of dimensions: Expected 2D or 3D Stats.") - - return CpA, nA, xCpSelect diff --git a/oneflux_steps/ustar_cp_python/aggregateSeasonalMeans.py b/oneflux_steps/ustar_cp_python/aggregateSeasonalMeans.py deleted file mode 100644 index 1e37d76c..00000000 --- a/oneflux_steps/ustar_cp_python/aggregateSeasonalMeans.py +++ /dev/null @@ -1,109 +0,0 @@ -from typing import Tuple -import numpy as np -from numpy.typing import NDArray -from oneflux_steps.ustar_cp_python.fcBin import fcBin - -def aggregateSeasonalMeans(mt: NDArray, Cp: NDArray, xmt: NDArray, iSelect: NDArray, nWindows: int, nStrata: int, nBoot: int)-> Tuple[NDArray, NDArray]: - """ - Python equivalent of the MATLAB function aggregateSeasonalMeans. - Aggregates seasonal means for time and change points. - - Parameters - ---------- - mt : array-like - The time data. - Cp : array-like - The corresponding change point values. - xmt : array-like - A reference array that must reshape cleanly to (nWindows, nStrata * nBoot). - Used to compute the median number of windows (nW). - iSelect : array-like of int - Numeric indices (1-based in MATLAB) of selected measurements to use. - For Python (0-based), ensure you pass valid 0-based indices. - nWindows : int - nStrata : int - nBoot : int - - Returns - ------- - tW : np.ndarray - The seasonal mean times for each bin. - CpW : np.ndarray - The seasonal mean change points for each bin. - """ - - # ---------------------------------------------------------- - # 1) Calculate Median Number of Windows - # In MATLAB: - # reshape(xmt, nWindows, nStrata * nBoot) - # Here we reshape the 1D array xmt => shape (nWindows, nStrata*nBoot) - # ---------------------------------------------------------- - try: - xmt_reshaped = xmt.reshape(nWindows, nStrata * nBoot) - except ValueError: - raise ValueError( - f"Cannot reshape xmt of length {xmt.size} " - f"to shape ({nWindows}, {nStrata*nBoot})." - ) - - # sum(~isnan(...)) in MATLAB => np.sum(~np.isnan(...)) in NumPy - # nanmedian(...) => np.nanmedian(...) - # => sum over axis=1 if you want row sums (assuming typical usage) - # but in the MATLAB code it's sum( (nWindows, nStrata*nBoot) ) => axis=0 by default - nW_array = np.sum(~np.isnan(xmt_reshaped), axis=0) # shape: (nStrata*nBoot,) - nW = np.nanmedian(nW_array) - - # ---------------------------------------------------------- - # 2) Sort Selected Measurements - # MATLAB: - # [mtSelect, i] = sort(mt(iSelect)); - # CpSelect = Cp(iSelect(i)); - # ---------------------------------------------------------- - # *In Python*, iSelect is an array of *0-based* indices. - # We then do: - iSelect = iSelect.astype(int) - selected_mt = mt[iSelect] - # Sort them while keeping track of the sorted order - sort_order = np.argsort(selected_mt) # array of int - mtSelect = selected_mt[sort_order] - # Now reorder Cp similarly - selected_Cp = Cp[iSelect] - CpSelect = selected_Cp[sort_order] - - # ---------------------------------------------------------- - # 3) Define Bins Based on Percentiles - # xBins = prctile(mtSelect, 0:(100/nW):100); - # In Python, np.percentile(...) - # and we create an array of percentiles from 0 to 100 in steps of (100/nW) - # But nW might be float => we do something like - # np.linspace(0, 100, int(nW)+1) or something similar. However, - # to replicate MATLAB's 0:(100/nW):100 exactly, we must handle floats carefully. - # We'll do: - if nW < 1: - # Fallback: if nW < 1 for some reason, do 1 bin, i.e. [0, 100] - pct_array = [0, 100] - else: - step = 100.0 / nW # step size - # The number of steps is int(nW)+1 if nW is integer-like. - # However, if nW is float, MATLAB still enumerates 0, step, 2*step,..., up to 100. - # We'll replicate that logic using np.arange, then clip at <=100 - pct_array = [] - current = 0.0 - while current < 100.0 + 1e-9: - pct_array.append(current) - current += step - # In case floating arithmetic overshoots 100 a bit - pct_array[-1] = 100.0 - - # Now compute bin edges from these percentiles - xBins = np.percentile(mtSelect, pct_array) - - # ---------------------------------------------------------- - # 4) fcBin equivalent in Python - # [~, tW, CpW] = fcBin(mtSelect, CpSelect, xBins, 0); - - - # Run fcBin - nCount, tW, CpW = fcBin(mtSelect, CpSelect, xBins, 0) - - return tW, CpW diff --git a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py index 0ecf5073..472da442 100644 --- a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py +++ b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py @@ -3,8 +3,12 @@ from oneflux_steps.ustar_cp_python.fcNaniqr import fcNaniqr from oneflux_steps.ustar_cp_python.fcEqnAnnualSine import fcEqnAnnualSine from oneflux_steps.ustar_cp_python.fcr2Calc import fcr2Calc +from typing import Tuple +from numpy.typing import NDArray +from oneflux_steps.ustar_cp_python.fcBin import fcBin - +def cpdAssignUStarTh20100901(*args): + return None, None, None, None, None, None, None, None, None, None, None, None def identifyOutliers(x_norm_x, threshold): @@ -82,4 +86,185 @@ def _annual_sine_for_curve_fit(t, b0, b1, b2): # List containing ndarray to make output format match matlab for comparative testing sSine = np.array([popt[0], popt[1], popt[2], r2], dtype=float) - return sSine \ No newline at end of file + return sSine + +def aggregateSeasonalAndAnnualValues( + xCp, + iSelect, + nDim, + nWindows, + nStrata, + nBoot +): + """ + Python equivalent of the MATLAB function aggregateSeasonalAndAnnualValues. + + Parameters + ---------- + xCp : array-like + Can be 2D [nWindows, nBoot] or 3D [nWindows, nStrata, nBoot]. + iSelect : array-like of int + 1-based linear indices in MATLAB's column-major ordering that should be selected. + nDim : int + 2 or 3 (to indicate if xCp is 2D or 3D). + nWindows : int + nStrata : int + nBoot : int + + Returns + ------- + CpA : np.ndarray + Aggregated mean of selected change points along the appropriate dimension. + For nDim=2, shape is (nBoot,). For nDim=3, shape is (nBoot,). + nA : np.ndarray + Count of non-NaN selected points. Same shape as CpA. + xCpSelect : np.ndarray + Same shape as xCp, with NaN everywhere except the selected positions. + """ + # Prepare an output array (same shape) filled with NaNs + xCpSelect = np.full_like(xCp, np.nan, dtype=float) + + # Treat selection arrays an array of integers + # (this allows both an array of booleans and an array of integers to be used + # for iselect) + iSelect_array = np.asarray(iSelect, dtype=int) + + # Aggregate values based on dimensions + if nDim == 2: + # mask xCp using iSelect_array + xCpSelect = xCp.copy() + for i in range(len(xCp)): + if iSelect_array[i] == 0: + xCpSelect[i] = np.nan + + xCpGF = xCpSelect # naming convention + + # xCp shape = [nWindows, nBoot] + CpA = np.nanmean(xCpGF) + nA = np.sum(~np.isnan(xCpSelect)) + elif nDim == 3: + + # mask xCp using iSelect_array + xCpSelect = xCp.copy() + for i in range(len(xCp)): + for j in range(len(xCp[i])): + if iSelect_array[i][j] == 0: + xCpSelect[i][j] = np.nan + + xCpGF = xCpSelect # Naming convention + + # xCp shape = [nWindows, nStrata, nBoot] + # reshape => (nWindows*nStrata, nBoot) in column-major + xCpGF_reshaped = np.reshape(xCpGF, (nWindows * nStrata, nBoot), order='F') + CpA = np.nanmean(xCpGF_reshaped, axis=0) + nA = np.sum(~np.isnan(xCpGF_reshaped), axis=0) + else: + raise ValueError("Invalid number of dimensions: Expected 2D or 3D Stats.") + + return CpA, nA, xCpSelect + + +def aggregateSeasonalMeans(mt: NDArray, Cp: NDArray, xmt: NDArray, iSelect: NDArray, nWindows: int, nStrata: int, nBoot: int)-> Tuple[NDArray, NDArray]: + """ + Python equivalent of the MATLAB function aggregateSeasonalMeans. + Aggregates seasonal means for time and change points. + + Parameters + ---------- + mt : array-like + The time data. + Cp : array-like + The corresponding change point values. + xmt : array-like + A reference array that must reshape cleanly to (nWindows, nStrata * nBoot). + Used to compute the median number of windows (nW). + iSelect : array-like of int + Numeric indices (1-based in MATLAB) of selected measurements to use. + For Python (0-based), ensure you pass valid 0-based indices. + nWindows : int + nStrata : int + nBoot : int + + Returns + ------- + tW : np.ndarray + The seasonal mean times for each bin. + CpW : np.ndarray + The seasonal mean change points for each bin. + """ + + # ---------------------------------------------------------- + # 1) Calculate Median Number of Windows + # In MATLAB: + # reshape(xmt, nWindows, nStrata * nBoot) + # Here we reshape the 1D array xmt => shape (nWindows, nStrata*nBoot) + # ---------------------------------------------------------- + try: + xmt_reshaped = xmt.reshape(nWindows, nStrata * nBoot) + except ValueError: + raise ValueError( + f"Cannot reshape xmt of length {xmt.size} " + f"to shape ({nWindows}, {nStrata*nBoot})." + ) + + # sum(~isnan(...)) in MATLAB => np.sum(~np.isnan(...)) in NumPy + # nanmedian(...) => np.nanmedian(...) + # => sum over axis=1 if you want row sums (assuming typical usage) + # but in the MATLAB code it's sum( (nWindows, nStrata*nBoot) ) => axis=0 by default + nW_array = np.sum(~np.isnan(xmt_reshaped), axis=0) # shape: (nStrata*nBoot,) + nW = np.nanmedian(nW_array) + + # ---------------------------------------------------------- + # 2) Sort Selected Measurements + # MATLAB: + # [mtSelect, i] = sort(mt(iSelect)); + # CpSelect = Cp(iSelect(i)); + # ---------------------------------------------------------- + # *In Python*, iSelect is an array of *0-based* indices. + # We then do: + iSelect = iSelect.astype(int) + selected_mt = mt[iSelect] + # Sort them while keeping track of the sorted order + sort_order = np.argsort(selected_mt) # array of int + mtSelect = selected_mt[sort_order] + # Now reorder Cp similarly + selected_Cp = Cp[iSelect] + CpSelect = selected_Cp[sort_order] + + # ---------------------------------------------------------- + # 3) Define Bins Based on Percentiles + # xBins = prctile(mtSelect, 0:(100/nW):100); + # In Python, np.percentile(...) + # and we create an array of percentiles from 0 to 100 in steps of (100/nW) + # But nW might be float => we do something like + # np.linspace(0, 100, int(nW)+1) or something similar. However, + # to replicate MATLAB's 0:(100/nW):100 exactly, we must handle floats carefully. + # We'll do: + if nW < 1: + # Fallback: if nW < 1 for some reason, do 1 bin, i.e. [0, 100] + pct_array = [0, 100] + else: + step = 100.0 / nW # step size + # The number of steps is int(nW)+1 if nW is integer-like. + # However, if nW is float, MATLAB still enumerates 0, step, 2*step,..., up to 100. + # We'll replicate that logic using np.arange, then clip at <=100 + pct_array = [] + current = 0.0 + while current < 100.0 + 1e-9: + pct_array.append(current) + current += step + # In case floating arithmetic overshoots 100 a bit + pct_array[-1] = 100.0 + + # Now compute bin edges from these percentiles + xBins = np.percentile(mtSelect, pct_array) + + # ---------------------------------------------------------- + # 4) fcBin equivalent in Python + # [~, tW, CpW] = fcBin(mtSelect, CpSelect, xBins, 0); + + + # Run fcBin + nCount, tW, CpW = fcBin(mtSelect, CpSelect, xBins, 0) + + return tW, CpW diff --git a/oneflux_steps/ustar_cp_python/launch.py b/oneflux_steps/ustar_cp_python/launch.py index 6336d0ef..ff7255a5 100644 --- a/oneflux_steps/ustar_cp_python/launch.py +++ b/oneflux_steps/ustar_cp_python/launch.py @@ -748,8 +748,6 @@ def createTimeArray(uStar: Union[np.ndarray, pd.Series]) -> np.ndarray: -def cpdAssignUStarTh20100901(*args): - return None, None, None, None, None, None, None, None, None, None, None, None def saveResult(*args): return None, None, None \ No newline at end of file From f30c1616a85c813bd72262b3f0863fde15344f55 Mon Sep 17 00:00:00 2001 From: Dominic Orchard Date: Fri, 21 Feb 2025 13:28:48 +0000 Subject: [PATCH 25/27] updateSelectIndices translation and fix tests --- .../ustar_cp_python/cpdAssignUStarTh.py | 33 +++++++++++++++++++ tests/conftest.py | 5 ++- .../test_updateSelectedIndices.py | 12 +++---- 3 files changed, 42 insertions(+), 8 deletions(-) diff --git a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py index 472da442..81d8904c 100644 --- a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py +++ b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py @@ -268,3 +268,36 @@ def aggregateSeasonalMeans(mt: NDArray, Cp: NDArray, xmt: NDArray, iSelect: NDAr nCount, tW, CpW = fcBin(mtSelect, CpSelect, xBins, 0) return tW, CpW + +def updateSelectedIndices(iSelect : np.ndarray, iOut : np.ndarray, fSelect : np.ndarray, fOut : np.ndarray) -> (np.ndarray, int, np.ndarray): + """ + Parameters + ---------- + iSelect : np.ndarray (int) + Array of selected indices. + iOut : np.ndarray (int) + Array of outlier indices to remove from iSelect. + fSelect : np.ndarray (bool) + Boolean array indicating whether each point is selected. + fOut : np.ndarray (bool) + Boolean array indicating outlier points. + + Returns + ------- + iSelect : np.ndarray (int) + Updated array of selected indices after outlier removal. + nSelect : int + Length of the updated iSelect. + fSelect : np.ndarray (bool) + Updated boolean array with outliers removed from selection. + """ + # Remove outlier indices from iSelect + iSelect = np.setdiff1d(iSelect, iOut) + + # Count the remaining selected indices + nSelect = len(iSelect) + + # Exclude outliers from fSelect (element-wise "and not") + fSelect = fSelect & np.logical_not(fOut) + + return iSelect, nSelect, fSelect diff --git a/tests/conftest.py b/tests/conftest.py index ec68454e..d7c54216 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -124,7 +124,10 @@ def convert(self, x, index='to_matlab', fromFile=False): elif isinstance(x[0], list): return np.array(x[0]) else: - return np.array(x).astype(np.float64) + if ((len(x) > 1) and (isinstance(x[0], bool))): + return np.array(x).astype(bool) + else: + return np.array(x).astype(np.float64) elif isinstance(x, tuple): return tuple([self.convert(xi) for xi in x]) diff --git a/tests/unit_tests/test_ustar_cp/test_updateSelectedIndices.py b/tests/unit_tests/test_ustar_cp/test_updateSelectedIndices.py index f994c6f3..40dcad0d 100644 --- a/tests/unit_tests/test_ustar_cp/test_updateSelectedIndices.py +++ b/tests/unit_tests/test_ustar_cp/test_updateSelectedIndices.py @@ -1,7 +1,6 @@ import numpy as np import pytest - @pytest.mark.parametrize("iSelect, iOut, fSelect, fOut, expected_nSelect", [ ([1, 2, 3, 4, 5], [2, 4], [True, True, False, True, False], [False, False, False, False, False], 3), ([10, 20, 30, 40], [20], [True, False, True, True], [False, False, False, False], 3), @@ -11,11 +10,10 @@ def test_update_selected_indices(test_engine, iSelect, iOut, fSelect, fOut, expected_nSelect): """Test that updateSelectedIndices correctly removes outliers and updates selection.""" - # Call MATLAB function - result = test_engine.updateSelectedIndices(iSelect, iOut, fSelect, fOut, nargout=3) - - # Extract outputs - updated_iSelect, updated_nSelect, updated_fSelect = result + updated_iSelect, updated_nSelect, updated_fSelect = test_engine.updateSelectedIndices(test_engine.convert(iSelect) + , test_engine.convert(iOut) + , test_engine.convert(fSelect) + , test_engine.convert(fOut), nargout=3) # Check expected number of selections - assert updated_nSelect == expected_nSelect, f"Expected {expected_nSelect}, got {updated_nSelect}" \ No newline at end of file + assert test_engine.equal(updated_nSelect, expected_nSelect), f"Expected {expected_nSelect}, got {updated_nSelect}" \ No newline at end of file From 626b1a93c07fa9d8435a7c06b218ec7c71917d6a Mon Sep 17 00:00:00 2001 From: Dominic Orchard Date: Fri, 21 Feb 2025 13:29:03 +0000 Subject: [PATCH 26/27] initial translation of cpdAssign --- .../ustar_cp_python/cpdAssignUStarTh.py | 336 +++++++++++++++++- tests/conftest.py | 2 + 2 files changed, 327 insertions(+), 11 deletions(-) diff --git a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py index 81d8904c..9a3e6c07 100644 --- a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py +++ b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py @@ -1,5 +1,6 @@ import numpy as np from scipy.optimize import curve_fit +from oneflux_steps.ustar_cp_python.fcReadFields import fcReadFields from oneflux_steps.ustar_cp_python.fcNaniqr import fcNaniqr from oneflux_steps.ustar_cp_python.fcEqnAnnualSine import fcEqnAnnualSine from oneflux_steps.ustar_cp_python.fcr2Calc import fcr2Calc @@ -7,9 +8,291 @@ from numpy.typing import NDArray from oneflux_steps.ustar_cp_python.fcBin import fcBin -def cpdAssignUStarTh20100901(*args): - return None, None, None, None, None, None, None, None, None, None, None, None +def cpdAssignUStarTh20100901(Stats, plotFlag, siteYearText, *args): + """ + Parameters: + ----------- + Stats : array-like or str + Stats structure that may be JSON-encoded (depending on *args). + plotFlag : bool or int + Flag indicating whether to plot (unused in this excerpt). + siteYearText : str + String describing site year (unused in this excerpt). + *args : list + Variable-length argument list. Can contain instructions like + ['jsondecode', 1], specifying that Stats should be decoded from JSON. + + Returns: + -------- + annualChangePoint : np.ndarray + numAnnualSelected : np.ndarray + seasonalTimeWindow : np.ndarray + seasonalChangePoint : np.ndarray + dominantMode : str + failureMessage : str + selectedPointsFlag : np.ndarray (bool) + sineCurve : np.ndarray + fractionSignificant : float + fractionModeD : float + fractionSelected : float + + The logic follows the MATLAB code step-by-step, including: + - Decoding JSON if required. + - Checking dimensions of Stats. + - Extracting relevant fields (like Cp, b1, p). + - Determining significance and modes (D/E). + - Excluding outliers, aggregating results. + - Fitting annual sine curve. + """ + + # ------------------------------------------------------------------------- + # 1) Initialize Output Variables + + annualChangePoint = [] + numAnnualSelected = [] + seasonalTimeWindow = [] + seasonalChangePoint = [] + selectedPointsFlag = [] + dominantMode = '' + failureMessage = '' + sineCurve = [] + fractionSignificant = [] + fractionModeD = [] + fractionSelected = [] + + # ------------------------------------------------------------------------- + # 2) Decode JSON if Required + # TODO - PROBABLY REMOVE + + # In MATLAB, the code checks 'varargin' for a cell array with 'jsondecode'. + # In Python, we check if `args` includes something like ['jsondecode', 1]. + for arg in args: + # Example check: if arg is a list, arg[0] might be 'jsondecode' + if isinstance(arg, list) and len(arg) > 0 and arg[0] == 'jsondecode': + # Then check subsequent entries for numeric IDs + for j in arg[1:]: + # If j == 1, decode Stats from JSON + if j == 1: + if isinstance(Stats, str): + Stats = json.loads(Stats) + + # ------------------------------------------------------------------------- + # 3) Determine Dimension Sizes of Stats + + # The MATLAB code checks ndims(Stats) and shape(Stats). + # We assume Stats is a NumPy array (or a nested list that's been + # converted to an array). + if not isinstance(Stats, np.ndarray): + # Attempt to convert Stats to a NumPy array if it's still a list + Stats = np.array(Stats, dtype=object) + + numDimensions = Stats.ndim + + if numDimensions == 2: + numWindows, numBootstraps = Stats.shape + numTemperatureStrata = 1 + temperatureStrataFactor = 0.5 + + elif numDimensions == 3: + numWindows, numTemperatureStrata, numBootstraps = Stats.shape + temperatureStrataFactor = 1 + + else: + failureMessage = 'Stats must be 2D or 3D.' + return (annualChangePoint, numAnnualSelected, seasonalTimeWindow, + seasonalChangePoint, dominantMode, failureMessage, + selectedPointsFlag, sineCurve, fractionSignificant, + fractionModeD, fractionSelected) + + # ------------------------------------------------------------------------- + # 4) Set Reference Values + + referenceWindows = 4 + requiredSelectionCount = referenceWindows * temperatureStrataFactor * numBootstraps + + # ------------------------------------------------------------------------- + # 5) Preallocate Outputs + + annualChangePoint = np.full((numBootstraps,), np.nan) + numAnnualSelected = np.full((numBootstraps,), np.nan) + seasonalTimeWindow = np.full((numWindows,), np.nan) + seasonalChangePoint = np.full((numWindows,), np.nan) + + # ------------------------------------------------------------------------- + # 6) Extract Variables from Stats Structure + # The MATLAB code uses fcReadFields and fcx2colvec to read each field and reshape. + # Here, we assume Python equivalents: readFields(Stats, varName) and x2colvec(). + # In an actual implementation, these must be defined or replaced by direct indexing. + + variableNames = ['mt', 'Cp', 'b1', 'c2', 'cib1', 'cic2', 'p'] + # We'll store them in a dictionary by field name. + b1 = fcReadFields(Stats, 'b1') + c2 = fcReadFields(Stats, 'c2') + cib1 = fcReadFields(Stats, 'cib1') + cic2 = fcReadFields(Stats, 'cic2') + p = fcReadFields(Stats, 'p') + + measurementTime = fcReadFields(Stats, 'mt') + changePoint = fcReadFields(Stats, 'Cp') + + # ------------------------------------------------------------------------- + # 7) Identify Significant Change Points + + significanceThreshold = 0.05 + # p <= threshold is True/False mask + significantFlag = (p <= significanceThreshold) + + # ------------------------------------------------------------------------- + # 8) Identify Model Type (2-parameter vs 3-parameter) + + # If all c2 are NaN => effectively 2 parameters + # The MATLAB code checks sum(~isnan(c2)) == 0 + if np.sum(~np.isnan(c2)) == 0: + numParameters = 2 + c2 = np.zeros_like(b1) + cic2 = np.zeros_like(b1) + else: + numParameters = 3 + + # ------------------------------------------------------------------------- + # 9) Classify Significant Change Points: + + # Indices that are valid (not NaN in b1+c2+Cp): + validMask = ~np.isnan(b1 + c2 + changePoint) + validIndices = np.where(~np.isnan(measurementTime))[0] + numValidMeasurements = len(validIndices) + + # Non-significant + nonSignificantIndices = np.where((~significantFlag) & validMask)[0] + numNonSignificant = len(nonSignificantIndices) + + # Significant + significantIndices = np.where(significantFlag & validMask)[0] + numSignificant = len(significantIndices) + + # Mode E: b1 < c2 + modeEIndices = np.where(significantFlag & (b1 < c2) & validMask)[0] + numModeE = len(modeEIndices) + + # Mode D: b1 >= c2 + modeDIndices = np.where(significantFlag & (b1 >= c2) & validMask)[0] + numModeD = len(modeDIndices) + + # Decide dominant mode + if numModeD >= numModeE: + selectedIndices = modeDIndices + dominantMode = 'D' + else: + selectedIndices = modeEIndices + dominantMode = 'E' + + numSelected = len(selectedIndices) + + # Update Selection Flags + selectedPointsFlag = np.zeros_like(significantFlag, dtype=bool) + selectedPointsFlag[selectedIndices] = True + + modeDFlag = np.full_like(significantFlag, np.nan, dtype=float) + modeDFlag[modeDIndices] = 1 + modeEFlag = np.full_like(significantFlag, np.nan, dtype=float) + modeEFlag[modeEIndices] = 1 + + # Fractions + if numValidMeasurements > 0: + fractionSignificant = numSignificant / numValidMeasurements + fractionModeD = numModeD / max(numSignificant, 1) # avoid zero-div + fractionSelected = numSelected / numValidMeasurements + else: + fractionSignificant, fractionModeD, fractionSelected = (0, 0, 0) + + # ------------------------------------------------------------------------- + # 10) Abort if Too Few Selections + + if fractionSelected < 0.10: + failureMessage = 'Less than 10% successful detections.' + return (annualChangePoint, numAnnualSelected, seasonalTimeWindow, + seasonalChangePoint, dominantMode, failureMessage, + selectedPointsFlag, np.array([]), fractionSignificant, + fractionModeD, fractionSelected) + + # ------------------------------------------------------------------------- + # 11) Configure Regression Matrix + + if numParameters == 2: + regressionMatrix = np.column_stack([changePoint, b1, cib1]) + else: + regressionMatrix = np.column_stack([changePoint, b1, c2, cib1, cic2]) + + # ------------------------------------------------------------------------- + # 12) Exclude Outliers Based on Standardized Scores + + standardizedScores = computeStandardizedScores(regressionMatrix) # user-defined + outlierFlag, outlierIndices = identifyOutliers(standardizedScores, 5) # user-defined + + selectedIndices, numSelected, selectedPointsFlag = updateSelectedIndices( + selectedIndices, outlierIndices, selectedPointsFlag, outlierFlag + ) + + modeDIndices, numModeD = updateModes(modeDFlag, outlierIndices) # user-defined + modeEIndices, _ = updateModes(modeEFlag, outlierIndices) # user-defined + + # Recompute significantIndices, etc. + significantIndices = np.union1d(modeDIndices, modeEIndices) + numSignificant = len(significantIndices) + + if numValidMeasurements > 0: + fractionSignificant = numSignificant / numValidMeasurements + fractionModeD = numModeD / max(numSignificant, 1) + fractionSelected = numSelected / numValidMeasurements + + # ------------------------------------------------------------------------- + # 13) Check If Enough Change Points Remain + + if numSelected < requiredSelectionCount: + failureMessage = f'Too few selected change points: {numSelected}/{requiredSelectionCount}' + return (annualChangePoint, numAnnualSelected, seasonalTimeWindow, + seasonalChangePoint, dominantMode, failureMessage, + selectedPointsFlag, np.array([]), fractionSignificant, + fractionModeD, fractionSelected) + + # ------------------------------------------------------------------------- + # 14) Aggregate Seasonal and Annual Values + + annualChangePoint, numAnnualSelected, _ = aggregateSeasonalAndAnnualValues( + changePoint, selectedIndices, numDimensions, numWindows, + numTemperatureStrata, numBootstraps + ) + + # ------------------------------------------------------------------------- + # 15) Aggregate Seasonal Means + + seasonalTimeWindow, seasonalChangePoint = aggregateSeasonalMeans( + measurementTime, changePoint, measurementTime, selectedIndices, + numWindows, numTemperatureStrata, numBootstraps + ) + + # ------------------------------------------------------------------------- + # 16) Fit Annual Sine Curve + + sineCurve = fitAnnualSineCurve( + measurementTime, changePoint, selectedIndices + ) + + # ------------------------------------------------------------------------- + # Return All Outputs in the Same Order as the MATLAB Code + + return (annualChangePoint, + numAnnualSelected, + seasonalTimeWindow, + seasonalChangePoint, + dominantMode, + failureMessage, + selectedPointsFlag, + sineCurve, + fractionSignificant, + fractionModeD, + fractionSelected) def identifyOutliers(x_norm_x, threshold): """ @@ -89,11 +372,11 @@ def _annual_sine_for_curve_fit(t, b0, b1, b2): return sSine def aggregateSeasonalAndAnnualValues( - xCp, - iSelect, - nDim, - nWindows, - nStrata, + xCp, + iSelect, + nDim, + nWindows, + nStrata, nBoot ): """ @@ -143,7 +426,7 @@ def aggregateSeasonalAndAnnualValues( CpA = np.nanmean(xCpGF) nA = np.sum(~np.isnan(xCpSelect)) elif nDim == 3: - + # mask xCp using iSelect_array xCpSelect = xCp.copy() for i in range(len(xCp)): @@ -195,7 +478,7 @@ def aggregateSeasonalMeans(mt: NDArray, Cp: NDArray, xmt: NDArray, iSelect: NDAr # ---------------------------------------------------------- # 1) Calculate Median Number of Windows - # In MATLAB: + # In MATLAB: # reshape(xmt, nWindows, nStrata * nBoot) # Here we reshape the 1D array xmt => shape (nWindows, nStrata*nBoot) # ---------------------------------------------------------- @@ -222,7 +505,7 @@ def aggregateSeasonalMeans(mt: NDArray, Cp: NDArray, xmt: NDArray, iSelect: NDAr # ---------------------------------------------------------- # *In Python*, iSelect is an array of *0-based* indices. # We then do: - iSelect = iSelect.astype(int) + iSelect = iSelect.astype(int) selected_mt = mt[iSelect] # Sort them while keeping track of the sorted order sort_order = np.argsort(selected_mt) # array of int @@ -245,7 +528,7 @@ def aggregateSeasonalMeans(mt: NDArray, Cp: NDArray, xmt: NDArray, iSelect: NDAr pct_array = [0, 100] else: step = 100.0 / nW # step size - # The number of steps is int(nW)+1 if nW is integer-like. + # The number of steps is int(nW)+1 if nW is integer-like. # However, if nW is float, MATLAB still enumerates 0, step, 2*step,..., up to 100. # We'll replicate that logic using np.arange, then clip at <=100 pct_array = [] @@ -301,3 +584,34 @@ def updateSelectedIndices(iSelect : np.ndarray, iOut : np.ndarray, fSelect : np. fSelect = fSelect & np.logical_not(fOut) return iSelect, nSelect, fSelect + +def updateModes(fModeX: np.ndarray, iOut: np.ndarray): + """ + Recalculates mode indices after removing outliers, mirroring updateModes.m. + + Parameters + ---------- + fModeX : np.ndarray + Array indicating whether each point belongs to the mode (1) or not + (0/NaN). This array is modified in-place, setting the elements + at indices iOut to np.nan. + iOut : np.ndarray + Array of outlier indices to remove from the mode. + + Returns + ------- + iModeX : np.ndarray + Updated indices where fModeX is exactly 1. + nModeX : int + Number of such indices. + """ + # Mark outlier positions in fModeX as NaN + fModeX[iOut] = np.nan + + # Find indices where fModeX is exactly 1 + iModeX = np.where(fModeX == 1)[0] + + # Count them + nModeX = len(iModeX) + + return iModeX, nModeX \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index d7c54216..309ab3c4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -170,6 +170,8 @@ def newfunc(*args, **kwargs): # if jsonencode is present in kwargs then remove it if 'jsonencode' in kwargs: kwargs.pop('jsonencode') + if 'jsondecode' in kwargs: + kwargs.pop('jsondecode') func = globals().get(name) if callable(func): From 113c6ce610241fc7fb09f87e6b37f121cd7e40c9 Mon Sep 17 00:00:00 2001 From: Dominic Orchard Date: Fri, 21 Feb 2025 18:08:35 +0000 Subject: [PATCH 27/27] fix aggregateSeasonalAndAnnualValues...again... --- .../ustar_cp_python/cpdAssignUStarTh.py | 28 +++----------- oneflux_steps/ustar_cp_python/utilities.py | 37 +++++++++++++++++++ tests/conftest.py | 2 +- .../test_aggregateAndSeasonalValues.py | 17 ++++----- .../test_cpdAssignUStarTh20100901.py | 24 +----------- 5 files changed, 52 insertions(+), 56 deletions(-) diff --git a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py index 9a3e6c07..47c472e1 100644 --- a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py +++ b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py @@ -7,6 +7,7 @@ from typing import Tuple from numpy.typing import NDArray from oneflux_steps.ustar_cp_python.fcBin import fcBin +from oneflux_steps.ustar_cp_python.utilities import index_or_mark_array_update def cpdAssignUStarTh20100901(Stats, plotFlag, siteYearText, *args): """ @@ -407,35 +408,18 @@ def aggregateSeasonalAndAnnualValues( # Prepare an output array (same shape) filled with NaNs xCpSelect = np.full_like(xCp, np.nan, dtype=float) - # Treat selection arrays an array of integers - # (this allows both an array of booleans and an array of integers to be used - # for iselect) - iSelect_array = np.asarray(iSelect, dtype=int) + # Index using iSelect which could be a mask or a set of indices + xCpSelect = index_or_mark_array_update(xCpSelect, iSelect, xCp) + print(f"nDim = {nDim}") # Aggregate values based on dimensions if nDim == 2: - # mask xCp using iSelect_array - xCpSelect = xCp.copy() - for i in range(len(xCp)): - if iSelect_array[i] == 0: - xCpSelect[i] = np.nan - xCpGF = xCpSelect # naming convention - # xCp shape = [nWindows, nBoot] - CpA = np.nanmean(xCpGF) - nA = np.sum(~np.isnan(xCpSelect)) + CpA = np.nanmean(xCpGF, axis=0) + nA = np.sum(~np.isnan(xCpSelect), axis = 0) elif nDim == 3: - - # mask xCp using iSelect_array - xCpSelect = xCp.copy() - for i in range(len(xCp)): - for j in range(len(xCp[i])): - if iSelect_array[i][j] == 0: - xCpSelect[i][j] = np.nan - xCpGF = xCpSelect # Naming convention - # xCp shape = [nWindows, nStrata, nBoot] # reshape => (nWindows*nStrata, nBoot) in column-major xCpGF_reshaped = np.reshape(xCpGF, (nWindows * nStrata, nBoot), order='F') diff --git a/oneflux_steps/ustar_cp_python/utilities.py b/oneflux_steps/ustar_cp_python/utilities.py index 8702fc57..39f22023 100644 --- a/oneflux_steps/ustar_cp_python/utilities.py +++ b/oneflux_steps/ustar_cp_python/utilities.py @@ -4,6 +4,43 @@ import json from decimal import Decimal, ROUND_HALF_UP +def index_or_mark_array_update(A : np.ndarray, idx : np.ndarray, B : np.ndarray) -> np.ndarray: + """ + index_or_mark_array_update(A, idx, B) performs something akin to `A[idx] = B` + where we update the elements of `A` at indices `idx` according to `B` + but where A and B are n-dimensional but idx is either an n-dimensional array + of masking booleans, or a 1-dimensional array of indices into the flattened + array A. + + Parameters: + A : np.ndarray + The input array. + idx : np.ndarray + The indices to use. + + Returns: + np.ndarray + The elements of the array at the specified indices. + """ + + if (all(isinstance(i, np.bool_) for i in np.array(idx).flat)): + # idx is a boolean mask + for i in range(len(A.flat)): + if idx.flat[i]: + A.flat[i] = B.flat[i] + else: + # Do column major flattening of A + Acopy = A.copy().flatten(order='F') + B = B.flatten(order='F') + idx = idx.flatten() + # Copy across the elements at the right indices + for i in idx: + Acopy[i] = B[i] + # Reshape the data + A = Acopy.reshape(A.shape, order='F') + return A + + def prctile(A: np.ndarray, p: float) -> float|np.ndarray: """ Compute the p-th percentile of array A in a way that has diff --git a/tests/conftest.py b/tests/conftest.py index 309ab3c4..bb718377 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -124,7 +124,7 @@ def convert(self, x, index='to_matlab', fromFile=False): elif isinstance(x[0], list): return np.array(x[0]) else: - if ((len(x) > 1) and (isinstance(x[0], bool))): + if (all(isinstance(i, np.bool_) for i in np.array(x).flat)): return np.array(x).astype(bool) else: return np.array(x).astype(np.float64) diff --git a/tests/unit_tests/test_ustar_cp/test_aggregateAndSeasonalValues.py b/tests/unit_tests/test_ustar_cp/test_aggregateAndSeasonalValues.py index 0f1b1307..0d9a23c8 100644 --- a/tests/unit_tests/test_ustar_cp/test_aggregateAndSeasonalValues.py +++ b/tests/unit_tests/test_ustar_cp/test_aggregateAndSeasonalValues.py @@ -95,13 +95,11 @@ def test_aggregateSeasonalAndAnnualValues( expected_xCpSelect ): """ - Test the MATLAB function aggregateSeasonalAndAnnualValues by calling it via - the MATLAB Engine API for Python. We pass arrays from Python to MATLAB, - execute the function, and verify the outputs. + Test the function aggregateSeasonalAndAnnualValues Explanation: ------------ - - xCp is provided in a shape that (in MATLAB) should be [nWindows, nBoot] if nDim=2, + - xCp is provided in a shape that should be [nWindows, nBoot] if nDim=2, or [nWindows, nStrata, nBoot] if nDim=3. Because MATLAB uses column-major order, the exact flattening can be tricky in Python. We keep the nested structure so the final shape is correct in MATLAB once passed through the engine. @@ -112,8 +110,7 @@ def test_aggregateSeasonalAndAnnualValues( dimensional differences). """ - # Call the MATLAB function with nargout=3 - CpA_mat, nA_mat, xCpSelect_mat = test_engine.aggregateSeasonalAndAnnualValues( + CpA, nA, xCpSelect = test_engine.aggregateSeasonalAndAnnualValues( test_engine.convert(xCp), test_engine.convert(iSelect, index = "to_python"), test_engine.convert(nDim), @@ -122,7 +119,7 @@ def test_aggregateSeasonalAndAnnualValues( test_engine.convert(nBoot), nargout=3 ) - - assert test_engine.equal(CpA_mat, test_engine.convert(expected_CpA)) - assert test_engine.equal(nA_mat, test_engine.convert(expected_nA)) - assert test_engine.equal(xCpSelect_mat, test_engine.convert(expected_xCpSelect)) + + assert test_engine.equal(CpA, test_engine.convert(expected_CpA)) + assert test_engine.equal(nA, test_engine.convert(expected_nA)) + assert test_engine.equal(xCpSelect, test_engine.convert(expected_xCpSelect)) diff --git a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py index e3b6b534..2ad980c5 100644 --- a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py +++ b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py @@ -1,8 +1,6 @@ import pytest -import matlab.engine import numpy as np - @pytest.fixture(scope="module") def mock_data(test_engine, nt=300, tspan=(0, 1), uStar_pars=(0.1, 3.5), T_pars=(-10, 30), fNight=None): """ @@ -49,26 +47,6 @@ def mock_data(test_engine, nt=300, tspan=(0, 1), uStar_pars=(0.1, 3.5), T_pars=( return Stats2, fPlot, cSiteYr -# def test_cpdAssignUStarTh20100901_basic(test_engine, mock_data): -# stats, fPlot, cSiteYr = mock_data - -# # Call MATLAB function -# CpA, nA, tW, CpW, cMode, cFailure, fSelect, sSine, FracSig, FracModeD, FracSelect = test_engine.cpdAssignUStarTh20100901(stats, fPlot, cSiteYr, jsondecode=[0], nargout=11) - -# # Assertions -# assert isinstance(CpA, matlab.double), "CpA should be a MATLAB double array" -# assert isinstance(nA, matlab.double), "nA should be a MATLAB double array" -# assert isinstance(tW, matlab.double), "tW should be a MATLAB double array" -# assert isinstance(CpW, matlab.double), "CpW should be a MATLAB double array" -# assert isinstance(cMode, str), "cMode should be a string" -# assert isinstance(cFailure, str), "cFailure should be a string" -# assert isinstance(fSelect, matlab.logical), "fSelect should be a MATLAB logical array" -# assert isinstance(sSine, matlab.double), "sSine should be a MATLAB double array" -# assert isinstance(FracSig, float), "FracSig should be a float" -# assert isinstance(FracModeD, float), "FracModeD should be a float" -# assert isinstance(FracSelect, float), "FracSelect should be a float" - - def test_cpdAssignUStarTh20100901_edge_cases(test_engine, mock_data): def set_attr(struct_array, key, val): np.vectorize(lambda x: x.update({key: val}))(struct_array) @@ -131,8 +109,8 @@ def test_identify_outliers(test_engine, x_norm_x, threshold, expected_f_out, exp def test_aggregate_3d_case(test_engine, xCp, iSelect, nDim, nWindows, nStrata, nBoot, expected_CpA, expected_nA, expected_xCpSelect): """Test function for aggregateSeasonalAndAnnualValues 2D and 3D cases.""" + print("DATA for test iSelect: ", iSelect) - # Run MATLAB function CpA, nA, xCpSelect = test_engine.aggregateSeasonalAndAnnualValues( test_engine.convert(xCp), test_engine.convert(iSelect), nDim, nWindows, test_engine.convert(nStrata), test_engine.convert(nBoot), nargout=3 )