From 660de072b1b048854a9e7e10d897b0116f6c3a8a Mon Sep 17 00:00:00 2001
From: James Emberton <jamie.emberton@gmail.com>
Date: Mon, 9 Dec 2024 19:26:11 +0000
Subject: [PATCH 01/27] modularised

---
 .../cpdAssignUStarTh20100901.m                | 522 ++++++++++--------
 .../test_cpdAssignUStarTh20100901.py          |   2 +-
 2 files changed, 280 insertions(+), 244 deletions(-)

diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m
index 5632e02f..b03d29a5 100644
--- a/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m
+++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m
@@ -1,243 +1,279 @@
-	function [CpA,nA,tW,CpW,cMode,cFailure,fSelect,sSine,FracSig,FracModeD,FracSelect] ... 
-		= cpdAssignUStarTh20100901(Stats,fPlot,cSiteYr,varargin) 
-
-%cpdAssignUStarTh20100901
-%	aggregates and assigns uStarTh from the Stats* structured records 
-%	as output by cpdBootstrapUStarTh20100901. 
-%
-%Syntax: 
-%
-%	[CpA,nA,tW,CpW,cMode,cFailure,fSelect,sSine,FracSig,FracModeD,FracSelect] ... 
-%		= cpdExtractuStarTh20100901 (Stats,fPlot,cSiteYr) 
-%
-%	where: 
-%
-%	-	Stats is a structured record output by
-%		cpdBootstrapuStarTh20100901, can be:
-%		- Stats2 (2-parameter operational change-point model) or 
-%		- Stats3 (3-parameter diagnostic change-point model)
-%	-	fPlot is a flag that is set to 1 for plotting
-%		the aggregation analysis
-%	-	cSiteYr is text containing site and year for the fPlot plot
-%
-%	-	CpA is a scalar or vector of annual uStarTh (ChangePoint) means 
-%	-	nA is the number of selected change-points in the annual mean
-%	-	CpW and tW are vectors showing seasonal variation in uStarTh 
-%	-	cMode is the dominant change-point mode: 
-%		D Deficit (b1>0) or E Excess (b1<0)
-%	-	cFailure is a string containing failure messages
-%	-	fSelect is an array the same size as Stats* that flags the
-%		selected Cp values for computing CpA and CpW
-%	-	sSine contains the coefficients of an annual sine curve 
-%		fit to tW and CpW
-%	-	FracSig,FracModeD,FracSelect are the fraction of attempted
-%		change-point detections that are significant, in mode D and
-%		select.
-%
-%The Stats2 or Stats3 records may be 2D (nWindows x nStrata) 
-%or 3D (nWindows x nStrata x nBoot). If 2D, then CpA 
-%is a scalar and CpW is averaged across the nStrata temperature strata.  
-%If 3D, then CpA is a vector of length nBoot and CpW is averaged 
-%across nBoot bootstraps and nStrata temperature strata.  
-%Stats input is accepted from both 4Season (nWindows=4) 
-%and flexible window (nWindows>=7) processing. 
-%
-%The aggregation process is selective, selecting only: 
-% - significant change points (p <= 0.05)
-% - in the dominant mode (Deficit (b1>0) or Excess (b1<0))
-% - after excluding outliers (based on regression stats).
-%No assignment is made if the detection failure rate 
-%is too high.   
-
-%	========================================================================
-%	========================================================================
-
-%	Functions called: 
-%
-%		fcBin, fcDatetick, fcEqnAnnualSine, fcNaniqr, fcReadFields 
-%		fcr2Calc, fcx2colvec, fcx2rowvec
-%		stats toolbox:  nanmedian, nanmean, nlinfit, prctile
-
-%	Written 16 April 2010 by Alan Barr
-
-%	=======================================================================
-%	=======================================================================
-	
-	for i = 1:length(varargin)
-		a = varargin{i};
-		if iscell(a) && strcmp(a{1}, 'jsondecode')
-			for j = 2:length(a)
-				switch a{j}
-					case 1
-						Stats = jsondecode(Stats);
-				end
-			end
-		end
-	end
-
-	CpA=[]; nA=[]; tW=[]; CpW=[]; fSelect=[]; cMode=''; cFailure=''; sSine=[]; 
-	FracSig=[]; FracModeD=[]; FracSelect=[]; 
-
-%	Compute window sizes etc.	
-
-	nDim=ndims(Stats); 
-	switch nDim; 
-		case 2; [nWindows,nBoot]=size(Stats); nStrata=1; nStrataN=0.5; 
-		case 3; [nWindows,nStrata,nBoot]=size(Stats); nStrataN=1;  
-		otherwise; cFailure='Stats must be 2D or 3D.'; return; 
-	end; 
-	nWindowsN=4; nSelectN=nWindowsN*nStrataN*nBoot; 
-	
-	CpA=NaN*ones(nBoot,1); nA=NaN*ones(nBoot,1); tW=NaN*ones(nWindows,1); CpW=NaN*ones(nWindows,1);
-
-%	Extract variable arrays from Stats structure.	
-%	Reassign mt and Cp as x* to retain array shape, 
-%	then convert the extracted arrays to column vectors. 
-
-	cVars={'mt','Cp','b1','c2','cib1','cic2','p'}; nVars=length(cVars); 
-	for i=1:nVars; 
-		cv=char(cVars(i)); eval([cv '=fcReadFields(Stats,''' cv ''');']); 
-		switch cv; 
-			case 'mt'; xmt=mt; 
-			case 'Cp'; xCp=Cp; 
-			otherwise; 
-		end; 
-		eval([cv '=fcx2colvec(' cv ');']); 
-	end; 
-	pSig=0.05; fP = p<=pSig; 
-	
-%	Determine if Stats input is from the operational 2-parameter 
-%	or diagnostic 3-parameter change-point model
-%	and set c2 and cic2 to zero if 2-parameter 
-
-	nPar=3; if sum(~isnan(c2))==0; nPar=2; c2=0*b1; cic2=c2; end; 
-
-%	Classify Cp regressions by slopes of b1 and c2 regression coeff: 
-%	- NS: not sig, mfP=NaN, p>0.05
-%	- ModeE: atypical significant Cp (b1<c2)
-%	- ModeD: typical significant Cp (b1>=c2) 
-
-	iTry=find(~isnan(mt)); nTry=length(iTry); 
-	iCp=find(~isnan(b1+c2+Cp)); nCp=length(iCp); 
-	iNS=find(fP==0 & ~isnan(b1+c2+Cp)); nNS=length(iNS); 
-	iSig=find(fP==1 & ~isnan(b1+c2+Cp)); nSig=length(iSig); 
-	iModeE=find(fP==1 & b1<c2); nModeE=length(iModeE); 
-	iModeD=find(fP==1 & b1>=c2); nModeD=length(iModeD); 
-	
-%	Evaluate and accept primary mode of significant Cps
-				
-	if nModeD>=nModeE; iSelect=iModeD; cMode='D'; else iSelect=iModeE; cMode='E'; end; nSelect=length(iSelect); 
-	fSelect=zeros(size(fP)); fSelect(iSelect)=1; fSelect=logical(fSelect); 
-	fModeD=NaN*ones(size(fP)); fModeD(iModeD)=1; 
-	fModeE=NaN*ones(size(fP)); fModeE(iModeE)=1; 
-	
-	FracSig=nSig/nTry; FracModeD=nModeD/nSig; FracSelect=nSelect/nTry; 
-	
-%	Abort analysis if too few of the regressions produce significant Cps. 
-
-	if FracSelect<0.10; cFailure='Less than 10% successful detections. '; return;  end; 
-				
-%	Exclude outliers from Select mode based on Cp and regression stats
-
-	switch nPar; 
-		case 2; x=[Cp b1 cib1]; nx=3; 
-		case 3; x=[Cp b1 c2 cib1 cic2]; nx=5; 
-	end; 
-	
-	mx=nanmedian(x); sx=fcNaniqr(x); 
-	xNorm=NaN*x; for i=1:nx; xNorm(:,i)=(x(:,1)-mx(i))/sx(i); end; 
-	xNormX=max(abs(xNorm),[],2); 
-	ns=5; fOut=(xNormX>ns); iOut=find(fOut); 
-	iSelect=setdiff(iSelect,iOut); nSelect=length(iSelect); 
-	fSelect = ~fOut & fSelect; 
-	fModeD(iOut)=NaN; iModeD=find(fModeD==1); nModeD=length(iModeD); 
-	fModeE(iOut)=NaN; iModeE=find(fModeE==1); nModeE=length(iModeE); 
-	iSig=union(iModeD,iModeE); nSig=length(iSig); 
-	
-	FracSig=nSig/nTry; FracModeD=nModeD/nSig; FracSelect=nSelect/nTry;
-	
-	if nSelect<nSelectN; cFailure=sprintf('Too few selected change points: %g/%g',nSelect,nSelectN); return;  end; 
-	
-%	Aggregate the values to season and year.
-
-	xCpSelect=NaN*xCp; xCpSelect(iSelect)=xCp(iSelect); xCpGF=xCpSelect; 
-	switch nDim;
-		case 2; CpA=fcx2colvec(nanmean(xCpGF)); 
-			nA=fcx2colvec(sum(~isnan(xCpSelect))); 
-		case 3; CpA=fcx2colvec(nanmean(reshape(xCpGF,nWindows*nStrata,nBoot))); 
-			nA=fcx2colvec(sum(~isnan(reshape(xCpSelect,nWindows*nStrata,nBoot)))); 
-	end;
-	
-%	Calculate mean tW and CpW for each window based on Select data only.  
-%	Because the bootstrap varies the number of windows among bootstraps, 
-%	base on the median number of windows and reshape sorted data.  
-
-	nW=nanmedian(sum(~isnan(reshape(xmt,nWindows,nStrata*nBoot)))); 
-	[mtSelect,i]=sort(mt(iSelect)); CpSelect=Cp(iSelect(i)); 
-	xBins=prctile(mtSelect,0:(100/nW):100); 
-	[n,tW,CpW]=fcBin(mtSelect,CpSelect,xBins,0); 
-	
-%	Fit annual sine curve 
-	
-	bSine=[1,1,1]; 
-	[bSine]=nlinfit(mt(iSelect),Cp(iSelect),'fcEqnAnnualSine',bSine); 
-	yHat=fcEqnAnnualSine(bSine,mt(iSelect)); r2=fcr2Calc(Cp(iSelect),yHat); 
-	mtHat=sort(mt(iSelect)); CpHat=fcEqnAnnualSine(bSine,mtHat); 
-	
-	if bSine(2)<0; bSine(2)=-bSine(2); bSine(3)=bSine(3)+365.25/2; end; 
-	bSine(3)=mod(bSine(3),365.25); 
-	sSine=[fcx2rowvec(bSine) r2]; 
-	
-%	=======================================================================
-%	=======================================================================
-	
-	if fPlot; 
-		
-		FracSelectByWindow=sum(reshape(~isnan(xCpGF),nWindows,nStrata*nBoot),2)./sum(reshape(~isnan(xmt),nWindows,nStrata*nBoot),2); 
-		mtByWindow=nanmean(reshape(xmt,nWindows,nStrata*nBoot),2); 
-		
-		fcFigLoc(1,0.5,0.45,'NE'); 
-		
-		subplot('position',[0.08 0.56 0.60 0.38]); hold on; box on; 
-		plot(mt,Cp,'r.',mt(iModeE),Cp(iModeE),'b.',mt(iModeD),Cp(iModeD),'g.');
-		nBins=nWindows; 
-		if nModeD>=nBins*30;
-			[n,mx,my]=fcBin(mt(iModeD),Cp(iModeD),[],round(nModeD/nBins));
-			hold on; plot(mx,my,'ko-','MarkerFaceColor','y','MarkerSize',8,'LineWidth',2);
-		end; 
-		if nModeE>=nBins*30;
-			[n,mx,my]=fcBin(mt(iModeE),Cp(iModeE),[],round(nModeE/nBins));
-			hold on; plot(mx,my,'bo-','MarkerFaceColor','c','MarkerSize',8,'LineWidth',2);
-		end; 
-		fcDatetick(mt,'Mo',4,1); ylabel('Cp');
-		ylabel('Raw Cp Modes D (green) E (red)'); ylim([0 prctile(Cp,99.9)]);
-		hold off; 
-		title(sprintf('%s  Stats%g  Mode%s  nSelect/nTry: %g/%g  uStarTh: %5.3f (%5.3f) ', ...
-			cSiteYr,nPar,cMode,nSelect,nTry, ... 
-			nanmedian(Cp(iSelect)),0.5*diff(prctile(Cp(iSelect),[2.5 97.5])) ));
-		
-		subplot('position',[0.08 0.06 0.60 0.38]); hold on; box on; 
-		switch cMode; case 'G'; c='g'; case 'L'; c='b'; otherwise; c='k'; end; 
-		plot(mt(iSelect),Cp(iSelect),[c '.'],mtHat,CpHat,'r-','LineWidth',3); 
-		plot(tW,CpW,'ro','MarkerFaceColor','y','MarkerSize',9','LineWidth',2); 
-		fcDatetick(mt(iSelect),'Mo',4,1); 
-		ylabel('Select Cp'); ylim([0 prctile(Cp(iSelect),99)]);
-		title(sprintf('Cp = %5.3f + %5.3f sin(wt - %3.0f) (r^2 = %5.3f) ',bSine,r2 )); 
-		
-		subplot('position',[0.76 0.56 0.22 0.38]); 
-		hist(CpA,30); grid on; box on; 
-		xlim([min(CpA) max(CpA)]); 
-		xlabel('Annual \itu_*^{Th}'); ylabel('Frequency'); 
-		title(sprintf('Median (CI): %5.3f (%5.3f) ', ... 
-			nanmedian(CpA),0.5*diff(prctile(CpA,[2.5 97.5])) )); 
-		
-		subplot('position',[0.76 0.06 0.22 0.38]); 
-		plot(mtByWindow,FracSelectByWindow,'o-'); 
-		fcDatetick(mtByWindow,'Mo',4,1); 
-		ylabel('FracSelectByWindow'); ylim([0 1]); 
-		
-	end;
-	
-%	=======================================================================
-%	=======================================================================
-	
+function [CpA, nA, tW, CpW, cMode, cFailure, fSelect, sSine, FracSig, FracModeD, FracSelect] = cpdAssignUStarTh20100901(Stats, fPlot, cSiteYr, varargin) 
+
+    % Initialize Variables
+    CpA = []; nA = []; tW = []; CpW = []; fSelect = []; cMode = ''; cFailure = ''; sSine = []; 
+    FracSig = []; FracModeD = []; FracSelect = []; 
+    
+    % Decode JSON if Needed
+    for i = 1:length(varargin)
+        a = varargin{i};
+        if iscell(a) && strcmp(a{1}, 'jsondecode')
+            for j = 2:length(a)
+                if a{j} == 1
+                    Stats = jsondecode(Stats);
+                end
+            end
+        end
+    end
+    
+    % Determine Window Sizes
+    nDim = ndims(Stats); 
+    if nDim == 2
+        [nWindows, nBoot] = size(Stats);
+        nStrata = 1; nStrataN = 0.5; 
+    elseif nDim == 3
+        [nWindows, nStrata, nBoot] = size(Stats); 
+        nStrataN = 1; 
+    else
+        cFailure = 'Stats must be 2D or 3D.'; 
+        return; 
+    end
+    
+    % Set Reference Values
+    nWindowsN = 4; 
+    nSelectN = nWindowsN * nStrataN * nBoot; 
+    
+    % Preallocate Outputs
+    CpA = NaN(nBoot, 1); 
+    nA = NaN(nBoot, 1); 
+    tW = NaN(nWindows, 1); 
+    CpW = NaN(nWindows, 1);
+    
+    % Extract Variables from Stats Structure
+    cVars = {'mt', 'Cp', 'b1', 'c2', 'cib1', 'cic2', 'p'}; 
+    nVars = length(cVars); 
+    
+    for i = 1:nVars
+        cv = cVars{i}; 
+        eval([cv ' = fcReadFields(Stats, ''' cv ''');']); 
+        
+        if strcmp(cv, 'mt')
+            xmt = mt; 
+        elseif strcmp(cv, 'Cp')
+            xCp = Cp; 
+        end
+        
+        eval([cv ' = fcx2colvec(' cv ');']); 
+    end
+    
+    % Identify Significant Change Points
+    pSig = 0.05; 
+    fP = p <= pSig; 
+    
+    % Identify Model Type
+    if sum(~isnan(c2)) == 0
+        nPar = 2; 
+        c2 = zeros(size(b1)); 
+        cic2 = zeros(size(b1)); 
+    else
+        nPar = 3; 
+    end
+    
+    % Classify Significant Change Points
+    iTry = find(~isnan(mt)); 
+    nTry = length(iTry); 
+    
+    %iCp = find(~isnan(b1 + c2 + Cp)); 
+    %nCp = length(iCp); 
+    
+    %iNS = find(fP == 0 & ~isnan(b1 + c2 + Cp)); 
+    %nNS = length(iNS); 
+    
+    iSig = find(fP == 1 & ~isnan(b1 + c2 + Cp)); 
+    nSig = length(iSig); 
+    
+    iModeE = find(fP == 1 & b1 < c2); 
+    nModeE = length(iModeE); 
+    
+    iModeD = find(fP == 1 & b1 >= c2); 
+    nModeD = length(iModeD); 
+    
+    if nModeD >= nModeE
+        iSelect = iModeD; 
+        cMode = 'D'; 
+    else
+        iSelect = iModeE; 
+        cMode = 'E'; 
+    end
+    
+    nSelect = length(iSelect); 
+    
+    % Update Selection Flags
+    fSelect = false(size(fP)); 
+    fSelect(iSelect) = true; 
+    
+    fModeD = NaN(size(fP)); 
+    fModeD(iModeD) = 1; 
+    
+    fModeE = NaN(size(fP)); 
+    fModeE(iModeE) = 1; 
+
+    
+    FracSig = nSig / nTry; 
+    FracModeD = nModeD / nSig; 
+    FracSelect = nSelect / nTry; 
+    
+    % Abort if Too Few Selections
+    if FracSelect < 0.10
+        cFailure = 'Less than 10% successful detections.'; 
+        return; 
+    end
+    
+    % Configure Regression Matrix
+    if nPar == 2
+        x = [Cp, b1, cib1]; 
+        %nx = 3; 
+    else
+        x = [Cp, b1, c2, cib1, cic2]; 
+        %nx = 5; 
+    end
+
+    % Exclude Outliers
+    xNormX = computeStandardizedScores(x); 
+    
+    % Identify Outliers
+    [fOut, iOut] = identifyOutliers(xNormX, 5);
+
+    % Update Selected Indices
+    [iSelect, nSelect, fSelect] = updateSelectedIndices(iSelect, iOut, fSelect, fOut);
+    
+    % Update Modes
+    [iModeD, nModeD] = updateModes(fModeD,iOut);
+    [iModeE, ~] = updateModes(fModeE,iOut);
+
+    % Recalculate Significant Indices
+    iSig = union(iModeD, iModeE); 
+    nSig = length(iSig);
+
+    FracSig = nSig / nTry; 
+    FracModeD = nModeD / nSig; 
+    FracSelect = nSelect / nTry;
+    
+    if nSelect < nSelectN
+        cFailure = sprintf('Too few selected change points: %g/%g', nSelect, nSelectN); 
+        return; 
+    end
+    
+    % Aggregate Seasonal and Annual Values
+    [CpA, nA, ~] = aggregateSeasonalAndAnnualValues(xCp, iSelect, nDim, nWindows, nStrata, nBoot); 
+
+    
+    % Aggregate Seasonal Means
+    [tW, CpW] = aggregateSeasonalMeans(mt, Cp, xmt, iSelect, nWindows, nStrata, nBoot);
+    
+    % Fit Annual Sine Curve
+    sSine = fitAnnualSineCurve(mt, Cp, iSelect);
+
+end
+
+function sSine = fitAnnualSineCurve(mt, Cp, iSelect)
+    % fitAnnualSineCurve
+    % Fits an annual sine curve to the selected data points and returns
+    % the sine coefficients and r-squared value.
+        
+    % Initial Guess for Sine Coefficients
+    bSine = [1, 1, 1]; 
+        
+    % Fit Sine Curve Using Nonlinear Regression
+    bSine = nlinfit(mt(iSelect), Cp(iSelect), 'fcEqnAnnualSine', bSine); 
+        
+    % Calculate Predicted Values and R-Squared
+    predictedCp = fcEqnAnnualSine(bSine, mt(iSelect)); 
+    r2 = fcr2Calc(Cp(iSelect), predictedCp); 
+        
+    % Adjust Phase and Wrap to One Year
+    bSine(3) = mod(bSine(3), 365.25); 
+        
+    % Return Fitted Sine Coefficients and R-Squared
+    sSine = [bSine, r2]; 
+        
+    end
+
+function [tW, CpW] = aggregateSeasonalMeans(mt, Cp, xmt, iSelect, nWindows, nStrata, nBoot)
+    % aggregateSeasonalMeans
+    % Aggregates seasonal means for time and change points.
+        
+    % Calculate Median Number of Windows
+    nW = nanmedian(sum(~isnan(reshape(xmt, nWindows, nStrata * nBoot)))); 
+        
+    % Sort Selected Measurements
+    [mtSelect, i] = sort(mt(iSelect)); 
+    CpSelect = Cp(iSelect(i)); 
+        
+    % Define Bins Based on Percentiles
+    xBins = prctile(mtSelect, 0:(100/nW):100); 
+        
+    % Aggregate Seasonal Means Using fcBin
+    [~, tW, CpW] = fcBin(mtSelect, CpSelect, xBins, 0); 
+        
+    end
+        
+function [CpA, nA, xCpSelect] = aggregateSeasonalAndAnnualValues(xCp, iSelect, nDim, nWindows, nStrata, nBoot)
+    % aggregateSeasonalAndAnnualValues
+    % Aggregates seasonal and annual change point values based on selection.
+
+    % Initialize Selection Array
+    xCpSelect = NaN(size(xCp)); 
+        
+    % Assign Selected Change Points
+    xCpSelect(iSelect) = xCp(iSelect); 
+    xCpGF = xCpSelect; 
+        
+    % Aggregate Values Based on Dimensions
+    if nDim == 2
+        CpA = nanmean(xCpGF); 
+        nA = sum(~isnan(xCpSelect)); 
+    elseif nDim == 3
+        CpA = nanmean(reshape(xCpGF, nWindows * nStrata, nBoot)); 
+        nA = sum(~isnan(reshape(xCpSelect, nWindows * nStrata, nBoot))); 
+    else
+        error('Invalid number of dimensions: Expected 2D or 3D Stats.');
+    end
+    
+    end
+
+function xNormX = computeStandardizedScores(x)
+    % computeStandardizedScores
+    % Standardizes the matrix x by its median and interquartile range.
+        
+    mx = nanmedian(x); 
+    sx = fcNaniqr(x); 
+    xNorm = (x - mx) ./ sx; 
+    xNormX = max(abs(xNorm), [], 2); 
+        
+    end
+
+function [fOut, iOut] = identifyOutliers(xNormX, threshold)
+    % identifyOutliers
+    % Identifies outliers based on standardized scores.
+        
+    fOut = xNormX > threshold; 
+    iOut = find(fOut); 
+        
+    end
+
+function [iSelect, nSelect, fSelect] = updateSelectedIndices(iSelect, iOut, fSelect, fOut)
+    % updateSelectedIndices
+    % Updates selected indices after removing outliers.
+        
+    iSelect = setdiff(iSelect, iOut); 
+    nSelect = length(iSelect); 
+    fSelect = fSelect & ~fOut;
+        
+    end
+
+function [iModeX, nModeX] = updateModes(fModeX, iOut)
+    % updateModes
+    % Recalculates mode indices after removing outliers.
+        
+    fModeX(iOut) = NaN; 
+    iModeX = find(fModeX == 1); 
+    nModeX = length(iModeX); 
+        
+    end
+    
+        
+        
+        
+           
+
diff --git a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
index c4e42ed6..ef01ac52 100644
--- a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
+++ b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
@@ -1,5 +1,4 @@
 import pytest
-import json
 import matlab.engine
 import numpy as np
 
@@ -92,3 +91,4 @@ def set_attr(struct_array, key, val):
     # Assertions for edge cases
     assert len(results_all_sig[0]) > 0, "Should produce results for all significant change points"
     assert len(results_no_sig[5]) > 0, "Should produce a failure message for no significant change points"
+

From d24139fd4c0b0fb1f46f4e027061fd9eb485b021 Mon Sep 17 00:00:00 2001
From: James Emberton <jamie.emberton@gmail.com>
Date: Mon, 9 Dec 2024 19:40:56 +0000
Subject: [PATCH 02/27] added meaningful names

---
 .../cpdAssignUStarTh20100901.m                | 300 +++++++++---------
 1 file changed, 148 insertions(+), 152 deletions(-)

diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m
index b03d29a5..8a439139 100644
--- a/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m
+++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m
@@ -1,171 +1,167 @@
-function [CpA, nA, tW, CpW, cMode, cFailure, fSelect, sSine, FracSig, FracModeD, FracSelect] = cpdAssignUStarTh20100901(Stats, fPlot, cSiteYr, varargin) 
+function [annualChangePoint, numAnnualSelected, seasonalTimeWindow, seasonalChangePoint, ...
+    dominantMode, failureMessage, selectedPointsFlag, sineCurve, ...
+    fractionSignificant, fractionModeD, fractionSelected] = cpdAssignUStarTh20100901(Stats, plotFlag, siteYearText, varargin)
 
-    % Initialize Variables
-    CpA = []; nA = []; tW = []; CpW = []; fSelect = []; cMode = ''; cFailure = ''; sSine = []; 
-    FracSig = []; FracModeD = []; FracSelect = []; 
-    
-    % Decode JSON if Needed
-    for i = 1:length(varargin)
-        a = varargin{i};
-        if iscell(a) && strcmp(a{1}, 'jsondecode')
-            for j = 2:length(a)
-                if a{j} == 1
-                    Stats = jsondecode(Stats);
-                end
+% Initialize Variables
+annualChangePoint = []; numAnnualSelected = []; seasonalTimeWindow = []; seasonalChangePoint = [];
+selectedPointsFlag = []; dominantMode = ''; failureMessage = ''; sineCurve = [];
+fractionSignificant = []; fractionModeD = []; fractionSelected = [];
+
+% Decode JSON if Needed
+for i = 1:length(varargin)
+    arg = varargin{i};
+    if iscell(arg) && strcmp(arg{1}, 'jsondecode')
+        for j = 2:length(arg)
+            if arg{j} == 1
+                Stats = jsondecode(Stats);
             end
         end
     end
-    
-    % Determine Window Sizes
-    nDim = ndims(Stats); 
-    if nDim == 2
-        [nWindows, nBoot] = size(Stats);
-        nStrata = 1; nStrataN = 0.5; 
-    elseif nDim == 3
-        [nWindows, nStrata, nBoot] = size(Stats); 
-        nStrataN = 1; 
-    else
-        cFailure = 'Stats must be 2D or 3D.'; 
-        return; 
-    end
-    
-    % Set Reference Values
-    nWindowsN = 4; 
-    nSelectN = nWindowsN * nStrataN * nBoot; 
-    
-    % Preallocate Outputs
-    CpA = NaN(nBoot, 1); 
-    nA = NaN(nBoot, 1); 
-    tW = NaN(nWindows, 1); 
-    CpW = NaN(nWindows, 1);
-    
-    % Extract Variables from Stats Structure
-    cVars = {'mt', 'Cp', 'b1', 'c2', 'cib1', 'cic2', 'p'}; 
-    nVars = length(cVars); 
-    
-    for i = 1:nVars
-        cv = cVars{i}; 
-        eval([cv ' = fcReadFields(Stats, ''' cv ''');']); 
-        
-        if strcmp(cv, 'mt')
-            xmt = mt; 
-        elseif strcmp(cv, 'Cp')
-            xCp = Cp; 
-        end
-        
-        eval([cv ' = fcx2colvec(' cv ');']); 
-    end
-    
-    % Identify Significant Change Points
-    pSig = 0.05; 
-    fP = p <= pSig; 
-    
-    % Identify Model Type
-    if sum(~isnan(c2)) == 0
-        nPar = 2; 
-        c2 = zeros(size(b1)); 
-        cic2 = zeros(size(b1)); 
-    else
-        nPar = 3; 
-    end
-    
-    % Classify Significant Change Points
-    iTry = find(~isnan(mt)); 
-    nTry = length(iTry); 
-    
-    %iCp = find(~isnan(b1 + c2 + Cp)); 
-    %nCp = length(iCp); 
-    
-    %iNS = find(fP == 0 & ~isnan(b1 + c2 + Cp)); 
-    %nNS = length(iNS); 
-    
-    iSig = find(fP == 1 & ~isnan(b1 + c2 + Cp)); 
-    nSig = length(iSig); 
-    
-    iModeE = find(fP == 1 & b1 < c2); 
-    nModeE = length(iModeE); 
-    
-    iModeD = find(fP == 1 & b1 >= c2); 
-    nModeD = length(iModeD); 
-    
-    if nModeD >= nModeE
-        iSelect = iModeD; 
-        cMode = 'D'; 
-    else
-        iSelect = iModeE; 
-        cMode = 'E'; 
-    end
-    
-    nSelect = length(iSelect); 
-    
-    % Update Selection Flags
-    fSelect = false(size(fP)); 
-    fSelect(iSelect) = true; 
-    
-    fModeD = NaN(size(fP)); 
-    fModeD(iModeD) = 1; 
-    
-    fModeE = NaN(size(fP)); 
-    fModeE(iModeE) = 1; 
+end
 
-    
-    FracSig = nSig / nTry; 
-    FracModeD = nModeD / nSig; 
-    FracSelect = nSelect / nTry; 
-    
-    % Abort if Too Few Selections
-    if FracSelect < 0.10
-        cFailure = 'Less than 10% successful detections.'; 
-        return; 
-    end
-    
-    % Configure Regression Matrix
-    if nPar == 2
-        x = [Cp, b1, cib1]; 
-        %nx = 3; 
-    else
-        x = [Cp, b1, c2, cib1, cic2]; 
-        %nx = 5; 
-    end
+% Determine Window Sizes
+numDimensions = ndims(Stats); 
+if numDimensions == 2
+    [numWindows, numBootstraps] = size(Stats);
+    numTemperatureStrata = 1; temperatureStrataFactor = 0.5; 
+elseif numDimensions == 3
+    [numWindows, numTemperatureStrata, numBootstraps] = size(Stats); 
+    temperatureStrataFactor = 1; 
+else
+    failureMessage = 'Stats must be 2D or 3D.'; 
+    return; 
+end
 
-    % Exclude Outliers
-    xNormX = computeStandardizedScores(x); 
-    
-    % Identify Outliers
-    [fOut, iOut] = identifyOutliers(xNormX, 5);
+% Set Reference Values
+referenceWindows = 4; 
+requiredSelectionCount = referenceWindows * temperatureStrataFactor * numBootstraps; 
 
-    % Update Selected Indices
-    [iSelect, nSelect, fSelect] = updateSelectedIndices(iSelect, iOut, fSelect, fOut);
-    
-    % Update Modes
-    [iModeD, nModeD] = updateModes(fModeD,iOut);
-    [iModeE, ~] = updateModes(fModeE,iOut);
+% Preallocate Outputs
+annualChangePoint = NaN(numBootstraps, 1); 
+numAnnualSelected = NaN(numBootstraps, 1); 
+seasonalTimeWindow = NaN(numWindows, 1); 
+seasonalChangePoint = NaN(numWindows, 1);
 
-    % Recalculate Significant Indices
-    iSig = union(iModeD, iModeE); 
-    nSig = length(iSig);
+% Extract Variables from Stats Structure
+variableNames = {'mt', 'Cp', 'b1', 'c2', 'cib1', 'cic2', 'p'}; 
+numVariables = length(variableNames); 
 
-    FracSig = nSig / nTry; 
-    FracModeD = nModeD / nSig; 
-    FracSelect = nSelect / nTry;
+for i = 1:numVariables
+    variableName = variableNames{i}; 
+    eval([variableName ' = fcReadFields(Stats, ''' variableName ''');']); 
     
-    if nSelect < nSelectN
-        cFailure = sprintf('Too few selected change points: %g/%g', nSelect, nSelectN); 
-        return; 
+    if strcmp(variableName, 'mt')
+        measurementTime = mt; 
+    elseif strcmp(variableName, 'Cp')
+        changePoint = Cp; 
     end
     
-    % Aggregate Seasonal and Annual Values
-    [CpA, nA, ~] = aggregateSeasonalAndAnnualValues(xCp, iSelect, nDim, nWindows, nStrata, nBoot); 
+    eval([variableName ' = fcx2colvec(' variableName ');']); 
+end
 
-    
-    % Aggregate Seasonal Means
-    [tW, CpW] = aggregateSeasonalMeans(mt, Cp, xmt, iSelect, nWindows, nStrata, nBoot);
-    
-    % Fit Annual Sine Curve
-    sSine = fitAnnualSineCurve(mt, Cp, iSelect);
+% Identify Significant Change Points
+significanceThreshold = 0.05; 
+significantFlag = p <= significanceThreshold; 
 
+% Identify Model Type
+if sum(~isnan(c2)) == 0
+    numParameters = 2; 
+    c2 = zeros(size(b1)); 
+    cic2 = zeros(size(b1)); 
+else
+    numParameters = 3; 
 end
 
+% Classify Significant Change Points
+validMeasurementIndices = find(~isnan(mt)); 
+numValidMeasurements = length(validMeasurementIndices); 
+
+nonSignificantIndices = find(significantFlag == 0 & ~isnan(b1 + c2 + Cp)); 
+numNonSignificant = length(nonSignificantIndices); 
+
+significantIndices = find(significantFlag == 1 & ~isnan(b1 + c2 + Cp)); 
+numSignificant = length(significantIndices); 
+
+modeEIndices = find(significantFlag == 1 & b1 < c2); 
+numModeE = length(modeEIndices); 
+
+modeDIndices = find(significantFlag == 1 & b1 >= c2); 
+numModeD = length(modeDIndices); 
+
+% Select Dominant Mode
+if numModeD >= numModeE
+    selectedIndices = modeDIndices; 
+    dominantMode = 'D'; 
+else
+    selectedIndices = modeEIndices; 
+    dominantMode = 'E'; 
+end
+
+numSelected = length(selectedIndices); 
+
+% Update Selection Flags
+selectedPointsFlag = false(size(significantFlag)); 
+selectedPointsFlag(selectedIndices) = true; 
+
+modeDFlag = NaN(size(significantFlag)); 
+modeDFlag(modeDIndices) = 1; 
+
+modeEFlag = NaN(size(significantFlag)); 
+modeEFlag(modeEIndices) = 1; 
+
+fractionSignificant = numSignificant / numValidMeasurements; 
+fractionModeD = numModeD / numSignificant; 
+fractionSelected = numSelected / numValidMeasurements; 
+
+% Abort if Too Few Selections
+if fractionSelected < 0.10
+    failureMessage = 'Less than 10% successful detections.'; 
+    return; 
+end
+
+% Configure Regression Matrix
+if numParameters == 2
+    regressionMatrix = [Cp, b1, cib1]; 
+else
+    regressionMatrix = [Cp, b1, c2, cib1, cic2]; 
+end
+
+% Exclude Outliers
+standardizedScores = computeStandardizedScores(regressionMatrix); 
+[outlierFlag, outlierIndices] = identifyOutliers(standardizedScores, 5);
+
+[selectedIndices, numSelected, selectedPointsFlag] = ...
+    updateSelectedIndices(selectedIndices, outlierIndices, selectedPointsFlag, outlierFlag);
+
+[modeDIndices, numModeD] = updateModes(modeDFlag, outlierIndices);
+[modeEIndices, ~] = updateModes(modeEFlag, outlierIndices);
+
+significantIndices = union(modeDIndices, modeEIndices); 
+numSignificant = length(significantIndices);
+
+fractionSignificant = numSignificant / numValidMeasurements; 
+fractionModeD = numModeD / numSignificant; 
+fractionSelected = numSelected / numValidMeasurements;
+
+if numSelected < requiredSelectionCount
+    failureMessage = sprintf('Too few selected change points: %g/%g', numSelected, requiredSelectionCount); 
+    return; 
+end
+
+% Aggregate Seasonal and Annual Values
+[annualChangePoint, numAnnualSelected, ~] = ...
+    aggregateSeasonalAndAnnualValues(changePoint, selectedIndices, numDimensions, numWindows, numTemperatureStrata, numBootstraps); 
+
+% Aggregate Seasonal Means
+[seasonalTimeWindow, seasonalChangePoint] = ...
+    aggregateSeasonalMeans(mt, Cp, measurementTime, selectedIndices, numWindows, numTemperatureStrata, numBootstraps);
+
+% Fit Annual Sine Curve
+sineCurve = fitAnnualSineCurve(mt, Cp, selectedIndices);
+
+end
+
+
 function sSine = fitAnnualSineCurve(mt, Cp, iSelect)
     % fitAnnualSineCurve
     % Fits an annual sine curve to the selected data points and returns

From 61d40bb92e2290016511b72a8c3fc689d7ba1a23 Mon Sep 17 00:00:00 2001
From: James Emberton <jamie.emberton@gmail.com>
Date: Mon, 9 Dec 2024 19:56:40 +0000
Subject: [PATCH 03/27] tests

---
 tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
index ef01ac52..be129ab9 100644
--- a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
+++ b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
@@ -91,4 +91,3 @@ def set_attr(struct_array, key, val):
     # Assertions for edge cases
     assert len(results_all_sig[0]) > 0, "Should produce results for all significant change points"
     assert len(results_no_sig[5]) > 0, "Should produce a failure message for no significant change points"
-

From 5be214670bb7a8512cc0476baa0ad93cb82f6e11 Mon Sep 17 00:00:00 2001
From: James Emberton <jamie.emberton@gmail.com>
Date: Fri, 20 Dec 2024 12:02:11 +0000
Subject: [PATCH 04/27] storage

---
 .../cpdAssign_functions/fitAnnualSineCurve.m  | 22 ++++++
 .../test_ustar_cp/test_fitAnnualSine.py       | 75 +++++++++++++++++++
 2 files changed, 97 insertions(+)
 create mode 100644 oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/fitAnnualSineCurve.m
 create mode 100644 tests/unit_tests/test_ustar_cp/test_fitAnnualSine.py

diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/fitAnnualSineCurve.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/fitAnnualSineCurve.m
new file mode 100644
index 00000000..fb7b58b3
--- /dev/null
+++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/fitAnnualSineCurve.m
@@ -0,0 +1,22 @@
+function sSine = fitAnnualSineCurve(mt, Cp, iSelect)
+    % fitAnnualSineCurve
+    % Fits an annual sine curve to the selected data points and returns
+    % the sine coefficients and r-squared value.
+        
+    % Initial Guess for Sine Coefficients
+    bSine = [1, 1, 1]; 
+        
+    % Fit Sine Curve Using Nonlinear Regression
+    bSine = nlinfit(mt(iSelect), Cp(iSelect), 'fcEqnAnnualSine', bSine); 
+        
+    % Calculate Predicted Values and R-Squared
+    predictedCp = fcEqnAnnualSine(bSine, mt(iSelect)); 
+    r2 = fcr2Calc(Cp(iSelect), predictedCp); 
+        
+    % Adjust Phase and Wrap to One Year
+    bSine(3) = mod(bSine(3), 365.25); 
+        
+    % Return Fitted Sine Coefficients and R-Squared
+    sSine = [bSine, r2]; 
+        
+    end
\ No newline at end of file
diff --git a/tests/unit_tests/test_ustar_cp/test_fitAnnualSine.py b/tests/unit_tests/test_ustar_cp/test_fitAnnualSine.py
new file mode 100644
index 00000000..49d806e1
--- /dev/null
+++ b/tests/unit_tests/test_ustar_cp/test_fitAnnualSine.py
@@ -0,0 +1,75 @@
+import numpy as np
+import pytest
+
+@pytest.fixture
+def synthetic_data():
+    # Create synthetic annual sine data
+    # True parameters:
+    true_amplitude = 2.0
+    true_offset = 10.0
+    true_phase = 50.0
+    days = np.linspace(0, 365, 100)
+    Cp = true_amplitude * np.sin(2 * np.pi * (days - true_phase)/365.25) + true_offset
+    # Add some noise
+    noise = np.random.normal(0, 0.1, size=len(days))
+    Cp_noisy = Cp + noise
+
+    # We'll select all data points
+    iSelect = np.arange(len(days))
+    iSelect = iSelect[iSelect != 0] # Cant index an array with zero in Matlab
+
+    return days, Cp_noisy, iSelect, ( true_offset, true_amplitude, true_phase)
+
+def test_fit_output_shape_and_type(test_engine, synthetic_data):
+    days, Cp_noisy, iSelect, true_sine = synthetic_data
+
+    days = test_engine.convert(days)
+    Cp_noisy = test_engine.convert(Cp_noisy)
+    iSelect = test_engine.convert(iSelect)
+
+    result = test_engine.fitAnnualSineCurve(days, Cp_noisy, iSelect)
+    # Expecting [amplitude, offset, phase, r2]
+    assert test_engine.equal(len(test_engine.convert(result[0])), 4)
+    for val in result[0]:
+        assert isinstance(val, float)
+    assert np.allclose(result[0][0], true_sine[0], rtol = 0.001) # test amplitiude accuracy
+    assert np.allclose(result[0][1], true_sine[1], rtol = 0.1) # test offset accuracy
+    assert np.allclose(result[0][2], true_sine[2], rtol = 0.01) # test phase accuracy
+
+def test_fit_on_synthetic_data(test_engine, synthetic_data):
+    days, Cp_noisy, iSelect, (true_amp, true_off, true_ph) = synthetic_data
+
+    result = test_engine.fitAnnualSineCurve(test_engine.convert(days), 
+                                            test_engine.convert(Cp_noisy), 
+                                            test_engine.convert(iSelect))
+    
+    fitted_amp, fitted_off, fitted_ph, fitted_r2 = test_engine.convert(result)
+
+    # Check that fitted parameters are close to the true parameters.
+    # Allow some tolerance due to noise.
+    assert np.isclose(fitted_amp, true_amp, rtol=0.2)
+    assert np.isclose(fitted_off, true_off, rtol=0.2)
+
+    # Phase can wrap around, so it's trickier. We can check a mod difference:
+    phase_diff = (fitted_ph - true_ph) % 365.25
+    if phase_diff > 182.625:
+        phase_diff = 365.25 - phase_diff
+    assert phase_diff < 30  # Somewhat lenient
+
+    # R-squared should be reasonably high for low-noise data
+    assert fitted_r2 > 0.8
+
+def test_constant_data(test_engine):
+    # If the data is constant, the fit might fail or result in amplitude ~ 0
+    days = test_engine.convert(np.linspace(0, 365, 100))
+    Cp = test_engine.convert(np.ones_like(days) * 5.0)
+    iSelect = test_engine.convert(np.arange(len(days)))
+
+    result = test_engine.fitAnnualSineCurve(days, Cp, iSelect)
+
+    fitted_amp, fitted_off, fitted_ph, fitted_r2 = result
+    # For constant data, amplitude should be near zero, offset near 5, and R^2 near 1.
+    assert abs(fitted_amp) < 0.5
+    assert abs(fitted_off - 5.0) < 0.5
+    # Phase doesn't matter much here, R^2 should be very high
+    assert fitted_r2 > 0.9

From 38bf63bfc3ae2dfd13ff4c2f21f8c9bd22cd6835 Mon Sep 17 00:00:00 2001
From: James Emberton <jamie.emberton@gmail.com>
Date: Fri, 20 Dec 2024 12:54:09 +0000
Subject: [PATCH 05/27] etsts working

---
 .../cpdAssignUStarTh20100901.m                | 24 --------------
 .../test_ustar_cp/test_fitAnnualSine.py       | 32 ++++++++++++-------
 2 files changed, 20 insertions(+), 36 deletions(-)

diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m
index 8a439139..2f162bf1 100644
--- a/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m
+++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m
@@ -161,30 +161,6 @@
 
 end
 
-
-function sSine = fitAnnualSineCurve(mt, Cp, iSelect)
-    % fitAnnualSineCurve
-    % Fits an annual sine curve to the selected data points and returns
-    % the sine coefficients and r-squared value.
-        
-    % Initial Guess for Sine Coefficients
-    bSine = [1, 1, 1]; 
-        
-    % Fit Sine Curve Using Nonlinear Regression
-    bSine = nlinfit(mt(iSelect), Cp(iSelect), 'fcEqnAnnualSine', bSine); 
-        
-    % Calculate Predicted Values and R-Squared
-    predictedCp = fcEqnAnnualSine(bSine, mt(iSelect)); 
-    r2 = fcr2Calc(Cp(iSelect), predictedCp); 
-        
-    % Adjust Phase and Wrap to One Year
-    bSine(3) = mod(bSine(3), 365.25); 
-        
-    % Return Fitted Sine Coefficients and R-Squared
-    sSine = [bSine, r2]; 
-        
-    end
-
 function [tW, CpW] = aggregateSeasonalMeans(mt, Cp, xmt, iSelect, nWindows, nStrata, nBoot)
     % aggregateSeasonalMeans
     % Aggregates seasonal means for time and change points.
diff --git a/tests/unit_tests/test_ustar_cp/test_fitAnnualSine.py b/tests/unit_tests/test_ustar_cp/test_fitAnnualSine.py
index 49d806e1..e7609c18 100644
--- a/tests/unit_tests/test_ustar_cp/test_fitAnnualSine.py
+++ b/tests/unit_tests/test_ustar_cp/test_fitAnnualSine.py
@@ -32,19 +32,22 @@ def test_fit_output_shape_and_type(test_engine, synthetic_data):
     assert test_engine.equal(len(test_engine.convert(result[0])), 4)
     for val in result[0]:
         assert isinstance(val, float)
-    assert np.allclose(result[0][0], true_sine[0], rtol = 0.001) # test amplitiude accuracy
-    assert np.allclose(result[0][1], true_sine[1], rtol = 0.1) # test offset accuracy
-    assert np.allclose(result[0][2], true_sine[2], rtol = 0.01) # test phase accuracy
+    assert np.allclose(result[0][0], true_sine[0], rtol = 0.1) # test offset accuracy
+    assert np.allclose(result[0][1], true_sine[1], rtol = 0.1) # test amplitude accuracy
+    assert np.allclose(result[0][2], true_sine[2], rtol = 0.1) # test phase accuracy
 
 def test_fit_on_synthetic_data(test_engine, synthetic_data):
-    days, Cp_noisy, iSelect, (true_amp, true_off, true_ph) = synthetic_data
+    days, Cp_noisy, iSelect, (true_off, true_amp, true_ph) = synthetic_data
 
     result = test_engine.fitAnnualSineCurve(test_engine.convert(days), 
                                             test_engine.convert(Cp_noisy), 
                                             test_engine.convert(iSelect))
     
-    fitted_amp, fitted_off, fitted_ph, fitted_r2 = test_engine.convert(result)
-
+    result = test_engine.convert(result)
+    fitted_amp = result[0][1] 
+    fitted_off = result[0][0] 
+    fitted_ph = result[0][2]
+    fitted_r2 = result[0][3]
     # Check that fitted parameters are close to the true parameters.
     # Allow some tolerance due to noise.
     assert np.isclose(fitted_amp, true_amp, rtol=0.2)
@@ -61,15 +64,20 @@ def test_fit_on_synthetic_data(test_engine, synthetic_data):
 
 def test_constant_data(test_engine):
     # If the data is constant, the fit might fail or result in amplitude ~ 0
-    days = test_engine.convert(np.linspace(0, 365, 100))
-    Cp = test_engine.convert(np.ones_like(days) * 5.0)
-    iSelect = test_engine.convert(np.arange(len(days)))
+    days_array = np.linspace(0, 365, 100)
+    days = test_engine.convert(days_array)
+    Cp = test_engine.convert(np.ones_like(days_array) * 5.0)
+    iSelect = np.arange(len(days_array))
+    iSelect = iSelect[iSelect != 0] # Cant index an array with zero in Matlab
+    iSelect = test_engine.convert(iSelect)
 
     result = test_engine.fitAnnualSineCurve(days, Cp, iSelect)
+    result = test_engine.convert(result)
+    fitted_amp = result[0][1] 
+    fitted_off = result[0][0]
+    fitted_r2 = result[0][3]
 
-    fitted_amp, fitted_off, fitted_ph, fitted_r2 = result
-    # For constant data, amplitude should be near zero, offset near 5, and R^2 near 1.
-    assert abs(fitted_amp) < 0.5
+    assert abs(fitted_amp)  <  0.5
     assert abs(fitted_off - 5.0) < 0.5
     # Phase doesn't matter much here, R^2 should be very high
     assert fitted_r2 > 0.9

From 74aa307a3533b17cc62265812d230730519c1964 Mon Sep 17 00:00:00 2001
From: James Emberton <jamie.emberton@gmail.com>
Date: Wed, 29 Jan 2025 08:59:31 +0000
Subject: [PATCH 06/27] breakout subfunctions

---
 .../cpdAssignUStarTh20100901.m                | 83 -------------------
 .../aggregateSeasonalAndAnnualValues.m        | 23 +++++
 .../aggregateSeasonalMeans.m                  | 19 +++++
 .../computeStandardizedScores.m               | 10 +++
 .../cpdAssign_functions/identifyOutliers.m    |  8 ++
 .../cpdAssign_functions/updateModes.m         | 10 +++
 .../updateSelectedIndices.m                   |  9 ++
 7 files changed, 79 insertions(+), 83 deletions(-)
 create mode 100644 oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/aggregateSeasonalAndAnnualValues.m
 create mode 100644 oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/aggregateSeasonalMeans.m
 create mode 100644 oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/computeStandardizedScores.m
 create mode 100644 oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/identifyOutliers.m
 create mode 100644 oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/updateModes.m
 create mode 100644 oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/updateSelectedIndices.m

diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m
index 2f162bf1..dacde7d9 100644
--- a/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m
+++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m
@@ -160,89 +160,6 @@
 sineCurve = fitAnnualSineCurve(mt, Cp, selectedIndices);
 
 end
-
-function [tW, CpW] = aggregateSeasonalMeans(mt, Cp, xmt, iSelect, nWindows, nStrata, nBoot)
-    % aggregateSeasonalMeans
-    % Aggregates seasonal means for time and change points.
-        
-    % Calculate Median Number of Windows
-    nW = nanmedian(sum(~isnan(reshape(xmt, nWindows, nStrata * nBoot)))); 
-        
-    % Sort Selected Measurements
-    [mtSelect, i] = sort(mt(iSelect)); 
-    CpSelect = Cp(iSelect(i)); 
-        
-    % Define Bins Based on Percentiles
-    xBins = prctile(mtSelect, 0:(100/nW):100); 
-        
-    % Aggregate Seasonal Means Using fcBin
-    [~, tW, CpW] = fcBin(mtSelect, CpSelect, xBins, 0); 
-        
-    end
-        
-function [CpA, nA, xCpSelect] = aggregateSeasonalAndAnnualValues(xCp, iSelect, nDim, nWindows, nStrata, nBoot)
-    % aggregateSeasonalAndAnnualValues
-    % Aggregates seasonal and annual change point values based on selection.
-
-    % Initialize Selection Array
-    xCpSelect = NaN(size(xCp)); 
-        
-    % Assign Selected Change Points
-    xCpSelect(iSelect) = xCp(iSelect); 
-    xCpGF = xCpSelect; 
-        
-    % Aggregate Values Based on Dimensions
-    if nDim == 2
-        CpA = nanmean(xCpGF); 
-        nA = sum(~isnan(xCpSelect)); 
-    elseif nDim == 3
-        CpA = nanmean(reshape(xCpGF, nWindows * nStrata, nBoot)); 
-        nA = sum(~isnan(reshape(xCpSelect, nWindows * nStrata, nBoot))); 
-    else
-        error('Invalid number of dimensions: Expected 2D or 3D Stats.');
-    end
-    
-    end
-
-function xNormX = computeStandardizedScores(x)
-    % computeStandardizedScores
-    % Standardizes the matrix x by its median and interquartile range.
-        
-    mx = nanmedian(x); 
-    sx = fcNaniqr(x); 
-    xNorm = (x - mx) ./ sx; 
-    xNormX = max(abs(xNorm), [], 2); 
-        
-    end
-
-function [fOut, iOut] = identifyOutliers(xNormX, threshold)
-    % identifyOutliers
-    % Identifies outliers based on standardized scores.
-        
-    fOut = xNormX > threshold; 
-    iOut = find(fOut); 
-        
-    end
-
-function [iSelect, nSelect, fSelect] = updateSelectedIndices(iSelect, iOut, fSelect, fOut)
-    % updateSelectedIndices
-    % Updates selected indices after removing outliers.
-        
-    iSelect = setdiff(iSelect, iOut); 
-    nSelect = length(iSelect); 
-    fSelect = fSelect & ~fOut;
-        
-    end
-
-function [iModeX, nModeX] = updateModes(fModeX, iOut)
-    % updateModes
-    % Recalculates mode indices after removing outliers.
-        
-    fModeX(iOut) = NaN; 
-    iModeX = find(fModeX == 1); 
-    nModeX = length(iModeX); 
-        
-    end
     
         
         
diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/aggregateSeasonalAndAnnualValues.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/aggregateSeasonalAndAnnualValues.m
new file mode 100644
index 00000000..8f03854b
--- /dev/null
+++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/aggregateSeasonalAndAnnualValues.m
@@ -0,0 +1,23 @@
+function [CpA, nA, xCpSelect] = aggregateSeasonalAndAnnualValues(xCp, iSelect, nDim, nWindows, nStrata, nBoot)
+    % aggregateSeasonalAndAnnualValues
+    % Aggregates seasonal and annual change point values based on selection.
+
+    % Initialize Selection Array
+    xCpSelect = NaN(size(xCp)); 
+        
+    % Assign Selected Change Points
+    xCpSelect(iSelect) = xCp(iSelect); 
+    xCpGF = xCpSelect; 
+        
+    % Aggregate Values Based on Dimensions
+    if nDim == 2
+        CpA = nanmean(xCpGF); 
+        nA = sum(~isnan(xCpSelect)); 
+    elseif nDim == 3
+        CpA = nanmean(reshape(xCpGF, nWindows * nStrata, nBoot)); 
+        nA = sum(~isnan(reshape(xCpSelect, nWindows * nStrata, nBoot))); 
+    else
+        error('Invalid number of dimensions: Expected 2D or 3D Stats.');
+    end
+    
+    end
\ No newline at end of file
diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/aggregateSeasonalMeans.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/aggregateSeasonalMeans.m
new file mode 100644
index 00000000..d9cfdd36
--- /dev/null
+++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/aggregateSeasonalMeans.m
@@ -0,0 +1,19 @@
+function [tW, CpW] = aggregateSeasonalMeans(mt, Cp, xmt, iSelect, nWindows, nStrata, nBoot)
+    % aggregateSeasonalMeans
+    % Aggregates seasonal means for time and change points.
+        
+    % Calculate Median Number of Windows
+    nW = nanmedian(sum(~isnan(reshape(xmt, nWindows, nStrata * nBoot)))); 
+        
+    % Sort Selected Measurements
+    [mtSelect, i] = sort(mt(iSelect)); 
+    CpSelect = Cp(iSelect(i)); 
+        
+    % Define Bins Based on Percentiles
+    xBins = prctile(mtSelect, 0:(100/nW):100); 
+        
+    % Aggregate Seasonal Means Using fcBin
+    [~, tW, CpW] = fcBin(mtSelect, CpSelect, xBins, 0); 
+        
+    end
+        
\ No newline at end of file
diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/computeStandardizedScores.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/computeStandardizedScores.m
new file mode 100644
index 00000000..8a9ce012
--- /dev/null
+++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/computeStandardizedScores.m
@@ -0,0 +1,10 @@
+function xNormX = computeStandardizedScores(x)
+    % computeStandardizedScores
+    % Standardizes the matrix x by its median and interquartile range.
+        
+    mx = nanmedian(x); 
+    sx = fcNaniqr(x); 
+    xNorm = (x - mx) ./ sx; 
+    xNormX = max(abs(xNorm), [], 2); 
+        
+    end
\ No newline at end of file
diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/identifyOutliers.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/identifyOutliers.m
new file mode 100644
index 00000000..8422742e
--- /dev/null
+++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/identifyOutliers.m
@@ -0,0 +1,8 @@
+function [fOut, iOut] = identifyOutliers(xNormX, threshold)
+    % identifyOutliers
+    % Identifies outliers based on standardized scores.
+        
+    fOut = xNormX > threshold; 
+    iOut = find(fOut); 
+        
+    end
\ No newline at end of file
diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/updateModes.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/updateModes.m
new file mode 100644
index 00000000..31fda4b9
--- /dev/null
+++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/updateModes.m
@@ -0,0 +1,10 @@
+function [iModeX, nModeX] = updateModes(fModeX, iOut)
+    % updateModes
+    % Recalculates mode indices after removing outliers.
+        
+    fModeX(iOut) = NaN; 
+    iModeX = find(fModeX == 1); 
+    nModeX = length(iModeX); 
+        
+    end
+    
\ No newline at end of file
diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/updateSelectedIndices.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/updateSelectedIndices.m
new file mode 100644
index 00000000..7ea3c55e
--- /dev/null
+++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/updateSelectedIndices.m
@@ -0,0 +1,9 @@
+function [iSelect, nSelect, fSelect] = updateSelectedIndices(iSelect, iOut, fSelect, fOut)
+    % updateSelectedIndices
+    % Updates selected indices after removing outliers.
+        
+    iSelect = setdiff(iSelect, iOut); 
+    nSelect = length(iSelect); 
+    fSelect = fSelect & ~fOut;
+        
+    end
\ No newline at end of file

From 59b5590730c5dd2197cd72d566312d0850314219 Mon Sep 17 00:00:00 2001
From: James Emberton <jamie.emberton@gmail.com>
Date: Fri, 31 Jan 2025 16:03:27 +0000
Subject: [PATCH 07/27] initial tests

---
 oneflux_steps/ustar_cp_python/__init__.py     |  1 +
 .../ustar_cp_python/cpdAssignUStarTh.py       | 17 +++++++
 oneflux_steps/ustar_cp_python/test.py         | 46 +++++++++++++++++++
 tests/conftest.py                             |  2 +
 .../test_cpdAssignUStarTh20100901.py          | 21 ++++++++-
 5 files changed, 86 insertions(+), 1 deletion(-)
 create mode 100644 oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py
 create mode 100644 oneflux_steps/ustar_cp_python/test.py

diff --git a/oneflux_steps/ustar_cp_python/__init__.py b/oneflux_steps/ustar_cp_python/__init__.py
index de8c9aff..cd08b737 100644
--- a/oneflux_steps/ustar_cp_python/__init__.py
+++ b/oneflux_steps/ustar_cp_python/__init__.py
@@ -10,6 +10,7 @@
 from .cpdFmax2pCore import interpolate_FmaxCritical, calculate_p_low, calculate_p_interpolate
 from .fcDatenum import datenum
 from .fcBin import fcBin
+from .cpdAssignUStarTh import *
 
 
 from os.path import dirname, basename, isfile, join
diff --git a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py
new file mode 100644
index 00000000..f2a07ab1
--- /dev/null
+++ b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py
@@ -0,0 +1,17 @@
+import numpy as np
+
+def identify_outliers(x_norm_x, threshold):
+    """
+    Identifies outliers based on standardized scores.
+
+    Parameters:
+    x_norm_x (numpy.ndarray): Array of normalized values.
+    threshold (float): Threshold value for outlier detection.
+
+    Returns:
+    tuple: A boolean array (f_out) indicating outliers and an array (i_out) of outlier indices.
+    """
+    f_out = x_norm_x > threshold  # Boolean mask of outliers
+    i_out = np.where(f_out)[0]  # Indices of outliers
+    
+    return f_out, i_out
\ No newline at end of file
diff --git a/oneflux_steps/ustar_cp_python/test.py b/oneflux_steps/ustar_cp_python/test.py
new file mode 100644
index 00000000..17ad1d00
--- /dev/null
+++ b/oneflux_steps/ustar_cp_python/test.py
@@ -0,0 +1,46 @@
+import numpy as np
+from scipy.stats import f
+from scipy.interpolate import PchipInterpolator
+
+
+Fmax = 6.89397364185411
+n= 53
+
+
+""" The critical F-max values as a 2D NumPy array. """
+FmaxTable : np.ndarray = np.array([
+    [3.9293, 6.2992, 9.1471, 18.2659],
+    [3.7734, 5.6988, 7.8770, 13.8100],
+    [3.7516, 5.5172, 7.4426, 12.6481],
+    [3.7538, 5.3224, 7.0306, 11.4461],
+    [3.7941, 5.3030, 6.8758, 10.6635],
+    [3.8548, 5.3480, 6.8883, 10.5026],
+    [3.9798, 5.4465, 6.9184, 10.4527],
+    [4.0732, 5.5235, 6.9811, 10.3859],
+    [4.1467, 5.6136, 7.0624, 10.5596],
+    [4.2770, 5.7391, 7.2005, 10.6871],
+    [4.4169, 5.8733, 7.3421, 10.6751],
+    [4.5556, 6.0591, 7.5627, 11.0072],
+    [4.7356, 6.2738, 7.7834, 11.2319]
+])
+
+""" A 1D NumPy array containing sample sizes. """
+# tmp run to 200 1e5 reps
+nTable : np.ndarray = np.array([10, 15, 20, 30, 50, 70, 100
+                       , 150, 200, 300, 500, 700, 1000])
+
+""" A 1D NumPy array containing significance levels. """
+pTable : np.ndarray = np.array([0.80, 0.90, 0.95, 0.99])
+
+pTableSize = len(pTable)
+FmaxCritical = np.zeros(pTableSize)
+for ip in range(pTableSize):
+    interpolator = PchipInterpolator(nTable, FmaxTable[:, ip])
+    FmaxCritical[ip] = interpolator(n)
+try:
+    interpolator = PchipInterpolator(FmaxCritical, 1 - pTable, extrapolate=True)
+    y = interpolator(Fmax)
+except ValueError:
+    y = np.nan
+
+print(y)
\ No newline at end of file
diff --git a/tests/conftest.py b/tests/conftest.py
index 83ab7440..89527b3c 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -569,6 +569,8 @@ def to_matlab_type(data: Any) -> Any:
             return [to_matlab_type(elem) for elem in data]
     elif isinstance(data, (int, float)):
         return matlab.double([data])  # Convert single numbers
+    elif isinstance(data, bool):
+        return matlab.logical(data)
     else:
       return data  # If the data type is already MATLAB-compatible
 
diff --git a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
index e113d15d..8bbcb5f0 100644
--- a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
+++ b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
@@ -90,4 +90,23 @@ def set_attr(struct_array, key, val):
 
     # Assertions for edge cases
     assert len(results_all_sig[0]) > 0, "Should produce results for all significant change points"
-    assert len(results_no_sig[5]) > 0, "Should produce a failure message for no significant change points"
\ No newline at end of file
+    assert len(results_no_sig[5]) > 0, "Should produce a failure message for no significant change points"
+
+@pytest.mark.parametrize(
+    "x_norm_x, threshold, expected_f_out, expected_i_out",
+    [
+        ([0.5, 1.2, 3.5, 0.1, 2.8], 2.0, [[False, False, True, False, True]], [[3.0, 5.0]]),  # Basic test
+        ([0.1, 0.2, 0.3], 1.0, [[False, False, False]], [[]]),  # No outliers
+        ([3.1, 2.9, 3.5], 2.0, [[True, True, True]], [[1, 2, 3]]),  # All outliers
+        ([], 2.0, np.bool([]), []),  # Empty input case
+        ([-3, -2, -1, 0, 1, 2, 3], -1.0, [[False, False, False, True, True, True, True]], [[4.0, 5.0, 6.0, 7.0]]),  # Negative threshold
+    ]
+)
+def test_identify_outliers(test_engine, x_norm_x, threshold, expected_f_out, expected_i_out):
+    """Test MATLAB's identifyOutliers function from Python using MATLAB Engine."""
+
+
+    f_out, i_out = test_engine.identifyOutliers(test_engine.convert(x_norm_x), test_engine.convert(threshold), nargout=2)
+
+    assert np.array_equal(f_out, test_engine.convert(expected_f_out)), "Boolean outlier array does not match expected"
+    assert i_out == test_engine.convert(expected_i_out), "Index output does not match expected"
\ No newline at end of file

From fa3562cdde79c8d938622c4ed592e83873374361 Mon Sep 17 00:00:00 2001
From: James Emberton <jamie.emberton@gmail.com>
Date: Mon, 3 Feb 2025 09:23:50 +0000
Subject: [PATCH 08/27] experiments

---
 oneflux_steps/ustar_cp_python/__init__.py                | 2 +-
 oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py        | 8 ++++----
 tests/conftest.py                                        | 9 ++++++++-
 .../test_ustar_cp/test_cpdAssignUStarTh20100901.py       | 2 +-
 4 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/oneflux_steps/ustar_cp_python/__init__.py b/oneflux_steps/ustar_cp_python/__init__.py
index 230eb046..b0e0fe88 100644
--- a/oneflux_steps/ustar_cp_python/__init__.py
+++ b/oneflux_steps/ustar_cp_python/__init__.py
@@ -11,7 +11,7 @@
 from .cpdFmax2pCore import interpolate_FmaxCritical, calculate_p_low, calculate_p_interpolate
 from .fcDatenum import datenum
 from .fcBin import fcBin
-from .cpdAssignUStarTh import *
+from .cpdAssignUStarTh import identifyOutliers
 
 from os.path import dirname, basename, isfile, join
 import glob
diff --git a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py
index f2a07ab1..d47a3d08 100644
--- a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py
+++ b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py
@@ -1,6 +1,5 @@
 import numpy as np
-
-def identify_outliers(x_norm_x, threshold):
+def identifyOutliers(x_norm_x, threshold):
     """
     Identifies outliers based on standardized scores.
 
@@ -11,7 +10,8 @@ def identify_outliers(x_norm_x, threshold):
     Returns:
     tuple: A boolean array (f_out) indicating outliers and an array (i_out) of outlier indices.
     """
-    f_out = x_norm_x > threshold  # Boolean mask of outliers
+    f_out = (x_norm_x > threshold)  # Ensures f_out is a boolean array
     i_out = np.where(f_out)[0]  # Indices of outliers
+
     
-    return f_out, i_out
\ No newline at end of file
+    return f_out, i_out  # Ensure f_out remains a boolean array
\ No newline at end of file
diff --git a/tests/conftest.py b/tests/conftest.py
index bbfe4283..fc6013a3 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -95,6 +95,7 @@ def mf_factory(cls, *args, **kwargs):
 from oneflux_steps.ustar_cp_python.cpd_evaluate_functions import *
 from oneflux_steps.ustar_cp_python.cpdFindChangePoint_functions import *
 from oneflux_steps.ustar_cp_python.cpdBootstrap import *
+from oneflux_steps.ustar_cp_python.cpdAssignUStarTh import *
 
 def pytest_addoption(parser):
     parser.addoption("--language", action="store", default="matlab")
@@ -141,7 +142,12 @@ def convert(self, x, index=False, fromFile=False):
                 x = x-1
             elif isinstance(x, list):
                 x = np.asarray(x)-1
-        if isinstance(x, list):
+        # Explicitly check for NumPy boolean arrays
+        if isinstance(x, np.ndarray) and x.dtype is bool:
+            print("NumPy bool array detected")
+            return x  # Keep as boolean
+        elif isinstance(x, list):
+            print("list")
             # Transpose to capture MATLAB data layout
             # when the data has been serialised from MATLAB
             # to a file
@@ -153,6 +159,7 @@ def convert(self, x, index=False, fromFile=False):
         elif isinstance(x, tuple):
             return tuple([self.convert(xi) for xi in x])
         else:
+            print("hmm")
             return x
         
     def unconvert(self, x):
diff --git a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
index 8bbcb5f0..e768482a 100644
--- a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
+++ b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
@@ -109,4 +109,4 @@ def test_identify_outliers(test_engine, x_norm_x, threshold, expected_f_out, exp
     f_out, i_out = test_engine.identifyOutliers(test_engine.convert(x_norm_x), test_engine.convert(threshold), nargout=2)
 
     assert np.array_equal(f_out, test_engine.convert(expected_f_out)), "Boolean outlier array does not match expected"
-    assert i_out == test_engine.convert(expected_i_out), "Index output does not match expected"
\ No newline at end of file
+    assert i_out == expected_i_out, "Index output does not match expected"
\ No newline at end of file

From f45209715b30eed313dc3357ccd57014a5b76879 Mon Sep 17 00:00:00 2001
From: James Emberton <jamie.emberton@gmail.com>
Date: Mon, 3 Feb 2025 09:25:25 +0000
Subject: [PATCH 09/27] stuff

---
 tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
index e768482a..8bbcb5f0 100644
--- a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
+++ b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
@@ -109,4 +109,4 @@ def test_identify_outliers(test_engine, x_norm_x, threshold, expected_f_out, exp
     f_out, i_out = test_engine.identifyOutliers(test_engine.convert(x_norm_x), test_engine.convert(threshold), nargout=2)
 
     assert np.array_equal(f_out, test_engine.convert(expected_f_out)), "Boolean outlier array does not match expected"
-    assert i_out == expected_i_out, "Index output does not match expected"
\ No newline at end of file
+    assert i_out == test_engine.convert(expected_i_out), "Index output does not match expected"
\ No newline at end of file

From c1ea99ce13c528ba7a94edc091611b36792c6954 Mon Sep 17 00:00:00 2001
From: James Emberton <jamie.emberton@gmail.com>
Date: Sat, 8 Feb 2025 14:10:31 +0000
Subject: [PATCH 10/27] added bool list check

---
 tests/conftest.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/conftest.py b/tests/conftest.py
index fc6013a3..94f15755 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -153,6 +153,8 @@ def convert(self, x, index=False, fromFile=False):
             # to a file
             if fromFile:
               return transpose(np.array(x).astype(np.float64))
+            elif all(isinstance(i, bool) for i in x):
+                return x
             else:
               return np.array(x).astype(np.float64)
               

From 9d545aba91743ba125f246f5f155ca56d8a39b60 Mon Sep 17 00:00:00 2001
From: James Emberton <jamie.emberton@gmail.com>
Date: Tue, 11 Feb 2025 13:54:55 +0000
Subject: [PATCH 11/27] wip

---
 .../ustar_cp_python/cpdAssignUStarTh.py       | 78 ++++++++++++++++++-
 .../cpdAssignUStarTh20100901.m                |  8 +-
 .../aggregateSeasonalAndAnnualValues.m        |  2 +-
 .../cpdAssign_functions/fcEqnAnnualSine.m     |  6 ++
 .../cpdAssign_functions/fitAnnualSineCurve.m  |  2 +-
 tests/conftest.py                             | 20 +++--
 .../test_aggregateSeasonalMeans.py            | 75 ++++++++++++++++++
 .../test_computeStandardisedScores.py         | 18 +++++
 .../test_cpdAssignUStarTh20100901.py          | 58 +++++++++++++-
 .../test_ustar_cp/test_fitAnnualSine.py       | 44 ++++++++---
 .../test_ustar_cp/test_fitAnnualSineCurve.py  | 53 +++++++++++++
 .../test_ustar_cp/test_identifyOutliers.py    | 13 ++++
 .../test_updateSelectedIndices.py             | 21 +++++
 13 files changed, 367 insertions(+), 31 deletions(-)
 create mode 100644 oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/fcEqnAnnualSine.m
 create mode 100644 tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py
 create mode 100644 tests/unit_tests/test_ustar_cp/test_computeStandardisedScores.py
 create mode 100644 tests/unit_tests/test_ustar_cp/test_fitAnnualSineCurve.py
 create mode 100644 tests/unit_tests/test_ustar_cp/test_identifyOutliers.py
 create mode 100644 tests/unit_tests/test_ustar_cp/test_updateSelectedIndices.py

diff --git a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py
index d47a3d08..5fb8380f 100644
--- a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py
+++ b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py
@@ -1,4 +1,12 @@
 import numpy as np
+from scipy.optimize import curve_fit
+from oneflux_steps.ustar_cp_python.fcNaniqr import fcNaniqr
+from oneflux_steps.ustar_cp_python.fcEqnAnnualSine import fcEqnAnnualSine
+from oneflux_steps.ustar_cp_python.fcr2Calc import fcr2Calc
+
+
+
+
 def identifyOutliers(x_norm_x, threshold):
     """
     Identifies outliers based on standardized scores.
@@ -10,8 +18,72 @@ def identifyOutliers(x_norm_x, threshold):
     Returns:
     tuple: A boolean array (f_out) indicating outliers and an array (i_out) of outlier indices.
     """
-    f_out = (x_norm_x > threshold)  # Ensures f_out is a boolean array
+    f_out = (x_norm_x > threshold)
     i_out = np.where(f_out)[0]  # Indices of outliers
 
-    
-    return f_out, i_out  # Ensure f_out remains a boolean array
\ No newline at end of file
+    return f_out, i_out
+
+def computeStandardizedScores(x):
+    """Standardizes the matrix x by its median and interquartile range."""
+    mx = np.nanmedian(x, axis=0)  # Compute median ignoring NaNs
+    iqr = fcNaniqr(x)
+    x_norm = (x - mx) / iqr  # Standardize
+
+    return np.nanmax(np.abs(x_norm), axis=1, keepdims=True)  # Max absolute standardized score per row
+
+
+
+def fitAnnualSineCurve(mt, Cp, iSelect):
+    """
+    Fits an annual sine curve to the selected data points and returns
+    the sine coefficients and R-squared value.
+
+    Parameters
+    ----------
+    mt : array-like
+        The time data (e.g., day of year).
+    Cp : array-like
+        The corresponding values to be fitted.
+    iSelect : array-like of bool
+        A boolean mask or indices that indicate which points to use
+        for the fitting.
+
+    Returns
+    -------
+    sSine : np.ndarray
+        A 1D array of length 4:
+          [offset, amplitude, phase, rSquared]
+    """
+    # Prepare the data
+    xdata = np.array(mt)[iSelect]
+    ydata = np.array(Cp)[iSelect]
+
+    # Define a local function for curve_fit: curve_fit expects
+    # a callable f(t, b0, b1, b2) with first arg = x, subsequent = params
+    def _annual_sine_for_curve_fit(t, b0, b1, b2):
+        return fcEqnAnnualSine(np.asarray([b0, b1, b2]), t)
+
+    # Initial guess for [offset, amplitude, phase]
+    initial_guess = [1.0, 1.0, 1.0]
+
+    # Perform the fit via non-linear regression
+    popt, _ = curve_fit(_annual_sine_for_curve_fit, xdata, ydata, p0=initial_guess)
+
+    print(popt)
+
+    # Compute predicted values for the fitted parameters
+    predictedCp = fcEqnAnnualSine(np.asarray(popt), xdata)
+
+    print(predictedCp)
+
+    # Compute R-squared
+    r2 = fcr2Calc(ydata, predictedCp)
+
+    # Adjust phase to be in the range [0, 365.25)
+    popt[2] = popt[2] % 365.25
+
+    # Return fitted coefficients plus the R-squared value
+    # List containing ndarray to make output format match matlab for comparative testing
+    sSine = np.array([popt[0], popt[1], popt[2], r2], dtype=float)
+    print(sSine[1])
+    return sSine
\ No newline at end of file
diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m
index dacde7d9..5559b163 100644
--- a/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m
+++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssignUStarTh20100901.m
@@ -20,7 +20,7 @@
 end
 
 % Determine Window Sizes
-numDimensions = ndims(Stats); 
+numDimensions = ndims(Stats);
 if numDimensions == 2
     [numWindows, numBootstraps] = size(Stats);
     numTemperatureStrata = 1; temperatureStrataFactor = 0.5; 
@@ -28,7 +28,7 @@
     [numWindows, numTemperatureStrata, numBootstraps] = size(Stats); 
     temperatureStrataFactor = 1; 
 else
-    failureMessage = 'Stats must be 2D or 3D.'; 
+    failureMessage = 'Stats must be 2D or 3D.';
     return; 
 end
 
@@ -115,7 +115,7 @@
 
 % Abort if Too Few Selections
 if fractionSelected < 0.10
-    failureMessage = 'Less than 10% successful detections.'; 
+    failureMessage = 'Less than 10% successful detections.';
     return; 
 end
 
@@ -127,7 +127,7 @@
 end
 
 % Exclude Outliers
-standardizedScores = computeStandardizedScores(regressionMatrix); 
+standardizedScores = computeStandardizedScores(regressionMatrix);
 [outlierFlag, outlierIndices] = identifyOutliers(standardizedScores, 5);
 
 [selectedIndices, numSelected, selectedPointsFlag] = ...
diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/aggregateSeasonalAndAnnualValues.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/aggregateSeasonalAndAnnualValues.m
index 8f03854b..362361eb 100644
--- a/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/aggregateSeasonalAndAnnualValues.m
+++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/aggregateSeasonalAndAnnualValues.m
@@ -11,7 +11,7 @@
         
     % Aggregate Values Based on Dimensions
     if nDim == 2
-        CpA = nanmean(xCpGF); 
+        CpA = nanmean(xCpGF);
         nA = sum(~isnan(xCpSelect)); 
     elseif nDim == 3
         CpA = nanmean(reshape(xCpGF, nWindows * nStrata, nBoot)); 
diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/fcEqnAnnualSine.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/fcEqnAnnualSine.m
new file mode 100644
index 00000000..5bb0f337
--- /dev/null
+++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/fcEqnAnnualSine.m
@@ -0,0 +1,6 @@
+	function y=fcEqnAnnualSine(b,t); 
+	
+	nDaysPerYr=datenum(2000-1,12,31)/2000; 
+	Omega=2*pi/nDaysPerYr; 
+	y=b(1)+b(2)*sin(Omega*(t-b(3))); 
+	
diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/fitAnnualSineCurve.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/fitAnnualSineCurve.m
index fb7b58b3..0b6b6bab 100644
--- a/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/fitAnnualSineCurve.m
+++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/fitAnnualSineCurve.m
@@ -14,7 +14,7 @@
     r2 = fcr2Calc(Cp(iSelect), predictedCp); 
         
     % Adjust Phase and Wrap to One Year
-    bSine(3) = mod(bSine(3), 365.25); 
+    bSine(3) = mod(bSine(3), 365.25);
         
     % Return Fitted Sine Coefficients and R-Squared
     sSine = [bSine, r2]; 
diff --git a/tests/conftest.py b/tests/conftest.py
index 94f15755..30ba456d 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -142,26 +142,23 @@ def convert(self, x, index=False, fromFile=False):
                 x = x-1
             elif isinstance(x, list):
                 x = np.asarray(x)-1
-        # Explicitly check for NumPy boolean arrays
-        if isinstance(x, np.ndarray) and x.dtype is bool:
-            print("NumPy bool array detected")
-            return x  # Keep as boolean
         elif isinstance(x, list):
-            print("list")
             # Transpose to capture MATLAB data layout
             # when the data has been serialised from MATLAB
             # to a file
             if fromFile:
               return transpose(np.array(x).astype(np.float64))
-            elif all(isinstance(i, bool) for i in x):
-                return x
+            elif len(x) == 1:
+                if all(isinstance(i, bool) for i in x[0]):
+                    return np.array(x).astype(np.bool)
+                elif isinstance(x[0], list):
+                    return np.array(x[0])
             else:
               return np.array(x).astype(np.float64)
               
         elif isinstance(x, tuple):
             return tuple([self.convert(xi) for xi in x])
         else:
-            print("hmm")
             return x
         
     def unconvert(self, x):
@@ -170,6 +167,9 @@ def unconvert(self, x):
 
     def equal(self, x, y) -> bool:
         """Enhanced equality check for MATLAB arrays."""
+
+        #print(x)
+        #print(y)
         if x is None or y is None:
             raise ValueError("Comparison values cannot be None")
         if isinstance(x, float) or isinstance(y, float):
@@ -263,6 +263,8 @@ def _unconvert(x):
                   return x
 
           def _equal(x, y):
+              print(x)
+              print(y)
               return compare_matlab_arrays(x, y)
 
           # Choose which function to call
@@ -583,6 +585,8 @@ def to_matlab_type(data: Any) -> Any:
         # Convert Python list to MATLAB double array if all elements are numbers
         if all(isinstance(elem, (int, float)) for elem in flatten(data)):
             return matlab.double(data)
+        elif all(isinstance(elem, (bool)) for elem in flatten(data)):
+            return matlab.logical(data)
         else:
             # Create a cell array for lists containing non-numeric data
             return [to_matlab_type(elem) for elem in data]
diff --git a/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py b/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py
new file mode 100644
index 00000000..e7263e07
--- /dev/null
+++ b/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py
@@ -0,0 +1,75 @@
+# test_aggregateSeasonalMeans.py
+import pytest
+import numpy as np
+
+@pytest.mark.parametrize(
+    "mt, Cp, xmt, iSelect, nWindows, nStrata, nBoot, expected_tW, expected_CpW",
+    [
+        # -- Test Case 1: Simple data, single "window" scenario --
+        (
+            [10, 20, 30],          # mt
+            [1.0, 2.0, 3.0],       # Cp
+            [10, 20, 30],          # xmt
+            [True, True, True],    # iSelect (bool mask)
+            1,                     # nWindows
+            1,                     # nStrata
+            1,                     # nBoot
+            [20.0],                # expected_tW (mock result; you must refine)
+            [2.0],                 # expected_CpW (mock result)
+        ),
+
+        # -- Test Case 2: Multiple windows, partial selection --
+        (
+            [10, 20, 30, 40, 50],
+            [1.0, 2.0, 4.0, 8.0, 16.0],
+            [10, 20, 30, 40, 50],
+            [False, True, True, True, False],  # iSelect
+            2,
+            1,
+            1,
+            [25.0, 40.0],          # expected_tW (mock result; you must refine)
+            [3.0, 8.0],           # expected_CpW (mock result)
+        ),
+    ]
+)
+def test_aggregateSeasonalMeans(test_engine, mt, Cp, xmt, iSelect,
+                                nWindows, nStrata, nBoot,
+                                expected_tW, expected_CpW):
+    """
+    Test the MATLAB function aggregateSeasonalMeans by calling it via
+    the MATLAB Engine API for Python. We pass arrays from Python
+    to MATLAB, execute the function, and verify the outputs.
+    """
+
+    # Convert Python lists/arrays to MATLAB data types
+    mt_matlab = test_engine.double(mt)       # 1D array of floats
+    Cp_matlab = test_engine.double(Cp)
+    xmt_matlab = test_engine.double(xmt)
+
+    # For a boolean mask (iSelect), convert True/False to 1/0 (logical)
+    # This creates a MATLAB logical array of the same length
+    iSelect_matlab = test_engine.logical([int(x) for x in iSelect])
+
+    # Call the MATLAB function. Note nargout=2 to receive two outputs (tW, CpW)
+    tW_mat, CpW_mat = test_engine.aggregateSeasonalMeans(
+        mt_matlab,   # mt
+        Cp_matlab,   # Cp
+        xmt_matlab,  # xmt
+        iSelect_matlab,
+        float(nWindows),
+        float(nStrata),
+        float(nBoot),
+        nargout=2
+    )
+
+    # Convert outputs from MATLAB arrays back to Python floats/arrays
+    # MATLAB returns "double" arrays. We turn them into NumPy arrays.
+    tW = np.array(tW_mat).flatten()
+    CpW = np.array(CpW_mat).flatten()
+
+    # Compare the results with the expected values (within a tolerance).
+    # Adjust `rtol`, `atol` as needed, or do exact match if you expect integer results.
+    assert np.allclose(tW, expected_tW, rtol=1e-5, atol=1e-8), \
+           f"tW mismatch: got {tW}, expected {expected_tW}"
+    assert np.allclose(CpW, expected_CpW, rtol=1e-5, atol=1e-8), \
+           f"CpW mismatch: got {CpW}, expected {expected_CpW}"
\ No newline at end of file
diff --git a/tests/unit_tests/test_ustar_cp/test_computeStandardisedScores.py b/tests/unit_tests/test_ustar_cp/test_computeStandardisedScores.py
new file mode 100644
index 00000000..f709236c
--- /dev/null
+++ b/tests/unit_tests/test_ustar_cp/test_computeStandardisedScores.py
@@ -0,0 +1,18 @@
+import pytest
+import numpy as np
+
+@pytest.mark.parametrize("input_data, expected_rows, expected_columns", [
+    (np.random.randn(100, 5), 100, 1),
+    (100*np.ones((100, 5)), 100, 1),
+])
+
+def test_compute_standardized_scores(test_engine, input_data, expected_rows, expected_columns):
+    """Test that computeStandardizedScores executes correctly and returns expected values."""
+    
+    # Call MATLAB function
+    result = test_engine.computeStandardizedScores(input_data, nargout=1)
+
+    len_result = len(test_engine.convert(result))
+
+    assert test_engine.equal(len_result, test_engine.convert(expected_rows))
+    
\ No newline at end of file
diff --git a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
index 8bbcb5f0..4fa69872 100644
--- a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
+++ b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
@@ -108,5 +108,59 @@ def test_identify_outliers(test_engine, x_norm_x, threshold, expected_f_out, exp
 
     f_out, i_out = test_engine.identifyOutliers(test_engine.convert(x_norm_x), test_engine.convert(threshold), nargout=2)
 
-    assert np.array_equal(f_out, test_engine.convert(expected_f_out)), "Boolean outlier array does not match expected"
-    assert i_out == test_engine.convert(expected_i_out), "Index output does not match expected"
\ No newline at end of file
+    assert test_engine.equal(f_out, test_engine.convert(expected_f_out)), "Boolean outlier array does not match expected"
+    assert test_engine.equal(i_out, test_engine.convert(expected_i_out)), "Index output does not match expected"
+
+
+def test_aggregate_2d_case(matlab_engine):
+    """Test function for 2D case (nDim=2)."""
+    eng = matlab_engine
+    
+    # Define test inputs
+    xCp = matlab.double([1.0, 2.0, np.nan, 3.0, np.nan, 4.0])  # MATLAB double array
+    iSelect = matlab.logical([1, 1, 0, 1, 0, 1])  # Boolean mask in MATLAB logical array
+    nDim = 2
+    nWindows = 0  # Unused for 2D case
+    nStrata = 0   # Unused for 2D case
+    nBoot = 0     # Unused for 2D case
+
+    # Run MATLAB function
+    CpA, nA, xCpSelect = eng.aggregateSeasonalAndAnnualValues(
+        xCp, iSelect, nDim, nWindows, nStrata, nBoot, nargout=3
+    )
+
+    # Convert MATLAB output to NumPy for assertions
+    CpA = np.array(CpA)
+    nA = int(nA)
+    xCpSelect = np.array(xCpSelect)
+
+    # Expected values
+    expected_CpA = np.nanmean([1.0, 2.0, 3.0, 4.0])  # Mean of selected change points
+    expected_nA = 4  # Count of non-NaN values in the selected array
+
+    # Assertions
+    np.testing.assert_almost_equal(CpA, expected_CpA, decimal=5)
+    assert nA == expected_nA
+    assert np.isnan(xCpSelect[2])  # Check if unselected indices remain NaN
+
+@pytest.mark.parametrize(
+    "xCp, iSelect, nDim, nWindows, nStrata, nBoot, expected_CpA, expected_nA, expected_xCpSelect",
+    [
+        ([1.0, 2.0, np.nan, 3.0, np.nan, 4.0], matlab.logical([1, 1, 0, 1, 0, 1]), 2, 0, 0, 0, [2, np.nan, 4], [2, 0, 1], [[1, np.nan, np.nan], [3, np.nan, 4]]),
+         ([[1.0, np.nan, 3.0],[4.0, 5.0, np.nan]], matlab.logical([[1, 0, 1],[0, 1, 0]]), 3, 2, 1, 3, [1.0, 5.0, 3.0], [1.0, 1.0, 1.0], [[1, np.nan, 3.0], [np.nan, 5.0, np.nan]])
+        ]
+)
+
+
+def test_aggregate_3d_case(test_engine, xCp, iSelect, nDim, nWindows, nStrata, nBoot, expected_CpA, expected_nA, expected_xCpSelect):
+    """Test function for aggregateSeasonalAndAnnualValues 2D and 3D cases."""
+
+
+    # Run MATLAB function
+    CpA, nA, xCpSelect = test_engine.aggregateSeasonalAndAnnualValues(
+        test_engine.convert(xCp), iSelect, nDim, nWindows, nStrata, nBoot, nargout=3
+    )
+
+    assert test_engine.equal(CpA, expected_CpA)
+    assert test_engine.equal(nA, expected_nA)
+    assert test_engine.equal(xCpSelect, expected_xCpSelect)
diff --git a/tests/unit_tests/test_ustar_cp/test_fitAnnualSine.py b/tests/unit_tests/test_ustar_cp/test_fitAnnualSine.py
index e7609c18..5987a911 100644
--- a/tests/unit_tests/test_ustar_cp/test_fitAnnualSine.py
+++ b/tests/unit_tests/test_ustar_cp/test_fitAnnualSine.py
@@ -29,12 +29,18 @@ def test_fit_output_shape_and_type(test_engine, synthetic_data):
 
     result = test_engine.fitAnnualSineCurve(days, Cp_noisy, iSelect)
     # Expecting [amplitude, offset, phase, r2]
-    assert test_engine.equal(len(test_engine.convert(result[0])), 4)
-    for val in result[0]:
+
+    result = test_engine.convert(result)
+    
+    if len(result) == 1:
+        result = result[0]
+        
+    assert test_engine.equal(len(result), 4)
+    for val in result:
         assert isinstance(val, float)
-    assert np.allclose(result[0][0], true_sine[0], rtol = 0.1) # test offset accuracy
-    assert np.allclose(result[0][1], true_sine[1], rtol = 0.1) # test amplitude accuracy
-    assert np.allclose(result[0][2], true_sine[2], rtol = 0.1) # test phase accuracy
+    assert np.allclose(result[0], true_sine[0], rtol = 0.1) # test offset accuracy
+    assert np.allclose(result[1], true_sine[1], rtol = 0.1) # test amplitude accuracy
+    assert np.allclose(result[2], true_sine[2], rtol = 0.1) # test phase accuracy
 
 def test_fit_on_synthetic_data(test_engine, synthetic_data):
     days, Cp_noisy, iSelect, (true_off, true_amp, true_ph) = synthetic_data
@@ -44,10 +50,18 @@ def test_fit_on_synthetic_data(test_engine, synthetic_data):
                                             test_engine.convert(iSelect))
     
     result = test_engine.convert(result)
-    fitted_amp = result[0][1] 
-    fitted_off = result[0][0] 
-    fitted_ph = result[0][2]
-    fitted_r2 = result[0][3]
+
+    if len(result) == 1: # Matlab case
+        fitted_amp = result[0][1] 
+        fitted_off = result[0][0] 
+        fitted_ph = result[0][2]
+        fitted_r2 = result[0][3]
+    else: #Python case
+        fitted_amp = result[1] 
+        fitted_off = result[0] 
+        fitted_ph = result[2]
+        fitted_r2 = result[3]
+
     # Check that fitted parameters are close to the true parameters.
     # Allow some tolerance due to noise.
     assert np.isclose(fitted_amp, true_amp, rtol=0.2)
@@ -73,9 +87,15 @@ def test_constant_data(test_engine):
 
     result = test_engine.fitAnnualSineCurve(days, Cp, iSelect)
     result = test_engine.convert(result)
-    fitted_amp = result[0][1] 
-    fitted_off = result[0][0]
-    fitted_r2 = result[0][3]
+
+    if len(result) == 1:
+        fitted_amp = result[0][1] 
+        fitted_off = result[0][0]
+        fitted_r2 = result[0][3]
+    else:
+        fitted_amp = result[1] 
+        fitted_off = result[0]
+        fitted_r2 = result[3]
 
     assert abs(fitted_amp)  <  0.5
     assert abs(fitted_off - 5.0) < 0.5
diff --git a/tests/unit_tests/test_ustar_cp/test_fitAnnualSineCurve.py b/tests/unit_tests/test_ustar_cp/test_fitAnnualSineCurve.py
new file mode 100644
index 00000000..4e961776
--- /dev/null
+++ b/tests/unit_tests/test_ustar_cp/test_fitAnnualSineCurve.py
@@ -0,0 +1,53 @@
+import numpy as np
+import pytest
+import matlab
+
+@pytest.fixture
+def sample_data():
+    """Provides sample input data for testing the MATLAB function."""
+    mt = np.linspace(0, 365, 100)  # Simulated time values in days
+    Cp = 10 * np.sin(2 * np.pi * mt / 365) + np.random.normal(0, 0.5, 100)  # Noisy sine wave
+    iSelect = np.ones_like(mt, dtype=bool)  # Select all points
+    return mt, Cp, iSelect
+
+
+def test_fit_annual_sine_curve(test_engine, sample_data):
+    """Test that fitAnnualSineCurve executes correctly and returns expected values."""
+    mt, Cp, iSelect = sample_data
+    
+    # Call MATLAB function
+    result = test_engine.fitAnnualSineCurve(mt, Cp, iSelect, nargout=1)
+
+    # Sine wave properties
+    sine_offset = result[0][0]
+    sine_amp = result[0][1]
+    sine_phase = result[0][2]
+    
+    # Validate the R-squared value
+    r2_value = result[0][3]
+
+    assert -0.3 < sine_offset < 0.3
+    assert 9.5 < sine_amp < 10.5
+    assert 363 < sine_phase or sine_phase < 1
+    assert r2_value > 0.99
+
+def test_fit_annual_sine_curve_partial_selection(test_engine, sample_data):
+    """Test the function with only part of the dataset selected."""
+    mt, Cp, iSelect = sample_data
+    iSelect[:50] = False  # Only use the last 50 data points
+    
+
+    result = test_engine.fitAnnualSineCurve(mt, Cp, iSelect, nargout=1)
+
+    # Sine wave properties
+    sine_offset = result[0][0]
+    sine_amp = result[0][1]
+    sine_phase = result[0][2]
+    
+    # Validate the R-squared value
+    r2_value = result[0][3]
+    
+    assert -0.3 < sine_offset < 0.3
+    assert 9.5 < sine_amp < 10.5
+    assert sine_phase < 1 or sine_phase > 363
+    assert r2_value > 0.96
\ No newline at end of file
diff --git a/tests/unit_tests/test_ustar_cp/test_identifyOutliers.py b/tests/unit_tests/test_ustar_cp/test_identifyOutliers.py
new file mode 100644
index 00000000..c00657ad
--- /dev/null
+++ b/tests/unit_tests/test_ustar_cp/test_identifyOutliers.py
@@ -0,0 +1,13 @@
+import numpy as np
+import pytest
+from tests.conftest import flatten
+
+@pytest.mark.parametrize("input_data, threshold, expected_outlierFlag, expected_outlierIndices", [
+    (np.linspace(0, 10, 11), 5, [False,False,False,False,False,False,True,True,True,True,True], np.array([5,6,7,8,9])),
+])
+
+def test_identify_outliers(test_engine, input_data, threshold, expected_outlierFlag, expected_outlierIndices):
+
+    result = test_engine.identifyOutliers(input_data, threshold)
+
+    assert result[0] == test_engine.convert(expected_outlierFlag)
\ No newline at end of file
diff --git a/tests/unit_tests/test_ustar_cp/test_updateSelectedIndices.py b/tests/unit_tests/test_ustar_cp/test_updateSelectedIndices.py
new file mode 100644
index 00000000..f994c6f3
--- /dev/null
+++ b/tests/unit_tests/test_ustar_cp/test_updateSelectedIndices.py
@@ -0,0 +1,21 @@
+import numpy as np
+import pytest
+
+
+@pytest.mark.parametrize("iSelect, iOut, fSelect, fOut, expected_nSelect", [
+    ([1, 2, 3, 4, 5], [2, 4], [True, True, False, True, False], [False, False, False, False, False], 3),
+    ([10, 20, 30, 40], [20], [True, False, True, True], [False, False, False, False], 3),
+    ([5, 10, 15, 20, 25], [], [True, True, True, True, True], [False, False, False, False, False], 5),
+    ([1, 3, 5, 7, 9], [1, 3, 9], [True, False, True, True, True], [False, True, False, False, False], 2),
+])
+def test_update_selected_indices(test_engine, iSelect, iOut, fSelect, fOut, expected_nSelect):
+    """Test that updateSelectedIndices correctly removes outliers and updates selection."""
+    
+    # Call MATLAB function
+    result = test_engine.updateSelectedIndices(iSelect, iOut, fSelect, fOut, nargout=3)
+    
+    # Extract outputs
+    updated_iSelect, updated_nSelect, updated_fSelect = result
+    
+    # Check expected number of selections
+    assert updated_nSelect == expected_nSelect, f"Expected {expected_nSelect}, got {updated_nSelect}"
\ No newline at end of file

From 996ea4d544c945d8eb8921bed43dfc01b7fd8161 Mon Sep 17 00:00:00 2001
From: James Emberton <jamie.emberton@gmail.com>
Date: Tue, 11 Feb 2025 13:55:38 +0000
Subject: [PATCH 12/27] wip

---
 oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py
index 5fb8380f..0ecf5073 100644
--- a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py
+++ b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py
@@ -69,13 +69,9 @@ def _annual_sine_for_curve_fit(t, b0, b1, b2):
     # Perform the fit via non-linear regression
     popt, _ = curve_fit(_annual_sine_for_curve_fit, xdata, ydata, p0=initial_guess)
 
-    print(popt)
-
     # Compute predicted values for the fitted parameters
     predictedCp = fcEqnAnnualSine(np.asarray(popt), xdata)
 
-    print(predictedCp)
-
     # Compute R-squared
     r2 = fcr2Calc(ydata, predictedCp)
 
@@ -85,5 +81,5 @@ def _annual_sine_for_curve_fit(t, b0, b1, b2):
     # Return fitted coefficients plus the R-squared value
     # List containing ndarray to make output format match matlab for comparative testing
     sSine = np.array([popt[0], popt[1], popt[2], r2], dtype=float)
-    print(sSine[1])
+
     return sSine
\ No newline at end of file

From 9215dcbc008eb6e77166c6880da2b3cd4169f832 Mon Sep 17 00:00:00 2001
From: James Emberton <jamie.emberton@gmail.com>
Date: Wed, 12 Feb 2025 11:54:32 +0000
Subject: [PATCH 13/27] wip

---
 .../test_aggregateSeasonalMeans.py            | 23 ++++++-------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py b/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py
index e7263e07..c4ecb5f4 100644
--- a/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py
+++ b/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py
@@ -41,24 +41,15 @@ def test_aggregateSeasonalMeans(test_engine, mt, Cp, xmt, iSelect,
     to MATLAB, execute the function, and verify the outputs.
     """
 
-    # Convert Python lists/arrays to MATLAB data types
-    mt_matlab = test_engine.double(mt)       # 1D array of floats
-    Cp_matlab = test_engine.double(Cp)
-    xmt_matlab = test_engine.double(xmt)
-
-    # For a boolean mask (iSelect), convert True/False to 1/0 (logical)
-    # This creates a MATLAB logical array of the same length
-    iSelect_matlab = test_engine.logical([int(x) for x in iSelect])
-
     # Call the MATLAB function. Note nargout=2 to receive two outputs (tW, CpW)
     tW_mat, CpW_mat = test_engine.aggregateSeasonalMeans(
-        mt_matlab,   # mt
-        Cp_matlab,   # Cp
-        xmt_matlab,  # xmt
-        iSelect_matlab,
-        float(nWindows),
-        float(nStrata),
-        float(nBoot),
+        test_engine.convert(mt),   # mt
+        test_engine.convert(Cp),   # Cp
+        test_engine.convert(xmt),  # xmt
+        test_engine.convert(iSelect),
+        test_engine.convert(nWindows),
+        test_engine.convert(nStrata),
+        test_engine.convert(nBoot),
         nargout=2
     )
 

From e62aedb5ee1c804871b6b4db832df0c2f6d23c3e Mon Sep 17 00:00:00 2001
From: James Emberton <jamie.emberton@gmail.com>
Date: Thu, 13 Feb 2025 14:41:39 +0000
Subject: [PATCH 14/27] fixing test

---
 .../test_aggregateSeasonalMeans.py            | 28 ++++++++++---------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py b/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py
index c4ecb5f4..6f29fd47 100644
--- a/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py
+++ b/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py
@@ -9,27 +9,29 @@
         (
             [10, 20, 30],          # mt
             [1.0, 2.0, 3.0],       # Cp
-            [10, 20, 30],          # xmt
+            [20],          # xmt
             [True, True, True],    # iSelect (bool mask)
             1,                     # nWindows
             1,                     # nStrata
             1,                     # nBoot
-            [20.0],                # expected_tW (mock result; you must refine)
-            [2.0],                 # expected_CpW (mock result)
+            [10.0],                # expected_tW (mock result; you must refine)
+            [1.0],                 # expected_CpW (mock result)
         ),
 
         # -- Test Case 2: Multiple windows, partial selection --
         (
-            [10, 20, 30, 40, 50],
-            [1.0, 2.0, 4.0, 8.0, 16.0],
-            [10, 20, 30, 40, 50],
-            [False, True, True, True, False],  # iSelect
-            2,
-            1,
-            1,
-            [25.0, 40.0],          # expected_tW (mock result; you must refine)
-            [3.0, 8.0],           # expected_CpW (mock result)
-        ),
+    [10, 20, 30, 40, 50],               # mt
+    [1.0, 2.0, 4.0, 8.0, 16.0],         # Cp
+    [10, 20, 30, 40, 50, 60, 70, 80, 90, 
+     100,110,120,130,140,150,160,170,180,190,
+     200,210,220,230,240],  # <-- 24 elements for xmt
+    [False, True, True, True, False],  # iSelect
+    4,                                  # nWindows
+    3,                                  # nStrata
+    2,                                  # nBoot
+    [25.0, 40.0],                       # expected_tW
+    [3.0, 8.0],                         # expected_CpW
+),
     ]
 )
 def test_aggregateSeasonalMeans(test_engine, mt, Cp, xmt, iSelect,

From 094ffbd7ea30cfa7c0cca601e814ac5c335301f2 Mon Sep 17 00:00:00 2001
From: James Emberton <jamie.emberton@gmail.com>
Date: Sun, 16 Feb 2025 20:21:59 +0000
Subject: [PATCH 15/27] messy commit with lots of stuff

---
 .../aggregateSeasonalAndAnnualValues.py       |  72 ++++++++++
 .../ustar_cp_python/aggregateSeasonalMeans.py | 109 +++++++++++++++
 .../computeStandardizedScores.m               |  15 +-
 tests/conftest.py                             |   7 +-
 .../test_aggregateAndSeasonalValues.py        | 128 ++++++++++++++++++
 .../test_aggregateSeasonalMeans.py            |  94 ++++++++-----
 .../test_computeStandardisedScores.py         | 116 ++++++++++++++--
 7 files changed, 493 insertions(+), 48 deletions(-)
 create mode 100644 oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py
 create mode 100644 oneflux_steps/ustar_cp_python/aggregateSeasonalMeans.py
 create mode 100644 tests/unit_tests/test_ustar_cp/test_aggregateAndSeasonalValues.py

diff --git a/oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py b/oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py
new file mode 100644
index 00000000..a53e323d
--- /dev/null
+++ b/oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py
@@ -0,0 +1,72 @@
+import numpy as np
+
+def aggregateSeasonalAndAnnualValues(
+    xCp, 
+    iSelect, 
+    nDim, 
+    nWindows, 
+    nStrata, 
+    nBoot
+):
+    """
+    Python equivalent of the MATLAB function aggregateSeasonalAndAnnualValues.
+
+    Parameters
+    ----------
+    xCp : array-like
+        In MATLAB, can be 2D [nWindows, nBoot] or 3D [nWindows, nStrata, nBoot].
+        In Python, pass a nested list or NumPy array with the same shape.
+    iSelect : array-like of int
+        1-based linear indices in MATLAB's column-major ordering that should be selected.
+        We subtract 1 and interpret them in the same column-major order in Python.
+    nDim : int
+        2 or 3 (to indicate if xCp is 2D or 3D).
+    nWindows : int
+    nStrata : int
+    nBoot : int
+
+    Returns
+    -------
+    CpA : np.ndarray
+        Aggregated mean of selected change points along the appropriate dimension.
+        For nDim=2, shape is (nBoot,). For nDim=3, shape is (nBoot,).
+    nA : np.ndarray
+        Count of non-NaN selected points. Same shape as CpA.
+    xCpSelect : np.ndarray
+        Same shape as xCp, with NaN everywhere except the selected positions.
+    """
+    # Convert inputs to float arrays
+    xCp = np.asarray(xCp, dtype=float)
+
+    # Prepare an output array (same shape) filled with NaNs
+    xCpSelect = np.full_like(xCp, np.nan, dtype=float)
+
+    # Flatten both arrays in column-major (Fortran) order to replicate MATLAB indexing
+    xCp_flat_F = xCp.flatten(order='F')
+    xCpSelect_flat_F = xCpSelect.flatten(order='F')
+
+    iSelect_array = np.asarray(iSelect, dtype=int)
+
+    # Assign the selected change points in the flattened array
+    xCpSelect_flat_F[iSelect_array] = xCp_flat_F[iSelect_array]
+
+    # Reshape back to original shape (column-major)
+    xCpSelect = xCpSelect_flat_F.reshape(xCp.shape, order='F')
+    xCpGF = xCpSelect  # Just like the MATLAB code
+
+    # Aggregate values based on dimensions
+    if nDim == 2:
+        # xCp shape = [nWindows, nBoot]
+        # MATLAB default nanmean(xCpGF) => mean across axis=0 in Python
+        CpA = np.nanmean(xCpGF, axis=0)
+        nA  = np.sum(~np.isnan(xCpSelect), axis=0)
+    elif nDim == 3:
+        # xCp shape = [nWindows, nStrata, nBoot]
+        # reshape => (nWindows*nStrata, nBoot) in column-major
+        xCpGF_reshaped = xCpGF.reshape(nWindows * nStrata, nBoot, order='F')
+        CpA = np.nanmean(xCpGF_reshaped, axis=0)
+        nA  = np.sum(~np.isnan(xCpGF_reshaped), axis=0)
+    else:
+        raise ValueError("Invalid number of dimensions: Expected 2D or 3D Stats.")
+
+    return CpA, nA, xCpSelect
diff --git a/oneflux_steps/ustar_cp_python/aggregateSeasonalMeans.py b/oneflux_steps/ustar_cp_python/aggregateSeasonalMeans.py
new file mode 100644
index 00000000..1e37d76c
--- /dev/null
+++ b/oneflux_steps/ustar_cp_python/aggregateSeasonalMeans.py
@@ -0,0 +1,109 @@
+from typing import Tuple
+import numpy as np
+from numpy.typing import NDArray
+from oneflux_steps.ustar_cp_python.fcBin import fcBin
+
+def aggregateSeasonalMeans(mt: NDArray, Cp: NDArray, xmt: NDArray, iSelect: NDArray, nWindows: int, nStrata: int, nBoot: int)-> Tuple[NDArray, NDArray]:
+    """
+    Python equivalent of the MATLAB function aggregateSeasonalMeans.
+    Aggregates seasonal means for time and change points.
+
+    Parameters
+    ----------
+    mt : array-like
+        The time data.
+    Cp : array-like
+        The corresponding change point values.
+    xmt : array-like
+        A reference array that must reshape cleanly to (nWindows, nStrata * nBoot).
+        Used to compute the median number of windows (nW).
+    iSelect : array-like of int
+        Numeric indices (1-based in MATLAB) of selected measurements to use.
+        For Python (0-based), ensure you pass valid 0-based indices.
+    nWindows : int
+    nStrata : int
+    nBoot : int
+
+    Returns
+    -------
+    tW : np.ndarray
+        The seasonal mean times for each bin.
+    CpW : np.ndarray
+        The seasonal mean change points for each bin.
+    """
+
+    # ----------------------------------------------------------
+    # 1) Calculate Median Number of Windows
+    #    In MATLAB: 
+    #       reshape(xmt, nWindows, nStrata * nBoot)
+    #    Here we reshape the 1D array xmt => shape (nWindows, nStrata*nBoot)
+    # ----------------------------------------------------------
+    try:
+        xmt_reshaped = xmt.reshape(nWindows, nStrata * nBoot)
+    except ValueError:
+        raise ValueError(
+            f"Cannot reshape xmt of length {xmt.size} "
+            f"to shape ({nWindows}, {nStrata*nBoot})."
+        )
+
+    # sum(~isnan(...)) in MATLAB => np.sum(~np.isnan(...)) in NumPy
+    # nanmedian(...) => np.nanmedian(...)
+    # => sum over axis=1 if you want row sums (assuming typical usage)
+    # but in the MATLAB code it's sum( (nWindows, nStrata*nBoot) ) => axis=0 by default
+    nW_array = np.sum(~np.isnan(xmt_reshaped), axis=0)  # shape: (nStrata*nBoot,)
+    nW = np.nanmedian(nW_array)
+
+    # ----------------------------------------------------------
+    # 2) Sort Selected Measurements
+    #    MATLAB:
+    #      [mtSelect, i] = sort(mt(iSelect));
+    #      CpSelect = Cp(iSelect(i));
+    # ----------------------------------------------------------
+    # *In Python*, iSelect is an array of *0-based* indices.
+    # We then do:
+    iSelect = iSelect.astype(int) 
+    selected_mt = mt[iSelect]
+    # Sort them while keeping track of the sorted order
+    sort_order = np.argsort(selected_mt)  # array of int
+    mtSelect = selected_mt[sort_order]
+    # Now reorder Cp similarly
+    selected_Cp = Cp[iSelect]
+    CpSelect = selected_Cp[sort_order]
+
+    # ----------------------------------------------------------
+    # 3) Define Bins Based on Percentiles
+    #    xBins = prctile(mtSelect, 0:(100/nW):100);
+    # In Python, np.percentile(...)
+    # and we create an array of percentiles from 0 to 100 in steps of (100/nW)
+    # But nW might be float => we do something like
+    # np.linspace(0, 100, int(nW)+1) or something similar. However,
+    # to replicate MATLAB's 0:(100/nW):100 exactly, we must handle floats carefully.
+    # We'll do:
+    if nW < 1:
+        # Fallback: if nW < 1 for some reason, do 1 bin, i.e. [0, 100]
+        pct_array = [0, 100]
+    else:
+        step = 100.0 / nW  # step size
+        # The number of steps is int(nW)+1 if nW is integer-like. 
+        # However, if nW is float, MATLAB still enumerates 0, step, 2*step,..., up to 100.
+        # We'll replicate that logic using np.arange, then clip at <=100
+        pct_array = []
+        current = 0.0
+        while current < 100.0 + 1e-9:
+            pct_array.append(current)
+            current += step
+        # In case floating arithmetic overshoots 100 a bit
+        pct_array[-1] = 100.0
+
+    # Now compute bin edges from these percentiles
+    xBins = np.percentile(mtSelect, pct_array)
+
+    # ----------------------------------------------------------
+    # 4) fcBin equivalent in Python
+    #    [~, tW, CpW] = fcBin(mtSelect, CpSelect, xBins, 0);
+
+
+    # Run fcBin
+    nCount, tW, CpW = fcBin(mtSelect, CpSelect, xBins, 0)
+
+    return tW, CpW
diff --git a/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/computeStandardizedScores.m b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/computeStandardizedScores.m
index 8a9ce012..84fb6feb 100644
--- a/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/computeStandardizedScores.m
+++ b/oneflux_steps/ustar_cp_refactor_wip/cpdAssign_functions/computeStandardizedScores.m
@@ -1,10 +1,19 @@
 function xNormX = computeStandardizedScores(x)
     % computeStandardizedScores
     % Standardizes the matrix x by its median and interquartile range.
-        
+    % 
+    % x : N x M (can contain NaNs)
+    %    - mx = nanmedian(x)   -> 1 x M median (column-wise)
+    %    - sx = fcNaniqr(x)    -> 1 x M IQR (column-wise)
+    %    - xNorm = (x - mx) ./ sx
+    %    - xNormX = max(abs(xNorm), [], 2) -> N x 1 
+    %
+    % By default, if a row has a NaN in one of its columns, that
+    % can lead to xNormX(row)=NaN, since max(abs(...)) sees NaN.
+    %
+    
     mx = nanmedian(x); 
     sx = fcNaniqr(x); 
     xNorm = (x - mx) ./ sx; 
     xNormX = max(abs(xNorm), [], 2); 
-        
-    end
\ No newline at end of file
+end
\ No newline at end of file
diff --git a/tests/conftest.py b/tests/conftest.py
index 30ba456d..b15fc3ce 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -96,6 +96,8 @@ def mf_factory(cls, *args, **kwargs):
 from oneflux_steps.ustar_cp_python.cpdFindChangePoint_functions import *
 from oneflux_steps.ustar_cp_python.cpdBootstrap import *
 from oneflux_steps.ustar_cp_python.cpdAssignUStarTh import *
+from oneflux_steps.ustar_cp_python.aggregateSeasonalMeans import aggregateSeasonalMeans
+from oneflux_steps.ustar_cp_python.aggregateSeasonalAndAnnualValues import aggregateSeasonalAndAnnualValues
 
 def pytest_addoption(parser):
     parser.addoption("--language", action="store", default="matlab")
@@ -142,13 +144,16 @@ def convert(self, x, index=False, fromFile=False):
                 x = x-1
             elif isinstance(x, list):
                 x = np.asarray(x)-1
-        elif isinstance(x, list):
+                print(x)
+        if isinstance(x, list):
             # Transpose to capture MATLAB data layout
             # when the data has been serialised from MATLAB
             # to a file
             if fromFile:
               return transpose(np.array(x).astype(np.float64))
             elif len(x) == 1:
+                if isinstance(x, list) and all(isinstance(item, (int, float)) for item in x):
+                    return np.asarray(x)
                 if all(isinstance(i, bool) for i in x[0]):
                     return np.array(x).astype(np.bool)
                 elif isinstance(x[0], list):
diff --git a/tests/unit_tests/test_ustar_cp/test_aggregateAndSeasonalValues.py b/tests/unit_tests/test_ustar_cp/test_aggregateAndSeasonalValues.py
new file mode 100644
index 00000000..0f1b1307
--- /dev/null
+++ b/tests/unit_tests/test_ustar_cp/test_aggregateAndSeasonalValues.py
@@ -0,0 +1,128 @@
+import pytest
+import numpy as np
+
+@pytest.mark.parametrize(
+    (
+        "xCp",         # 2D or 3D matrix to pass into MATLAB
+        "iSelect",     # numeric (1-based) indices into xCp
+        "nDim", 
+        "nWindows", 
+        "nStrata", 
+        "nBoot", 
+        "expected_CpA", 
+        "expected_nA",
+        "expected_xCpSelect"
+    ),
+    [
+        #
+        # Test Case 1: 2D, shape(2,3).
+        #   In MATLAB, xCp(1,1)=1, xCp(2,1)=4, xCp(1,2)=2, xCp(2,2)=5, xCp(1,3)=3, xCp(2,3)=6
+        #   We select iSelect=[2,4,6], i.e. xCp(2)=4, xCp(4)=5, xCp(6)=6.
+        #   Then:
+        #     xCpSelect => [NaN,4, NaN,5, NaN,6] in linear indexing => shape(2,3):
+        #       [[NaN, NaN, NaN],
+        #        [ 4 ,  5 ,  6 ]]
+        #   CpA => nanmean(xCpSelect) => row-wise mean => [4,5,6]  (a 1x3 vector in MATLAB)
+        #   nA  => sum(~isnan(xCpSelect)) => [1,1,1]
+        #
+        # NOTE: We'll store "expected_xCpSelect" as a nested Python list to compare shape(2,3).
+        #
+        (
+            # xCp as a nested list to form 2D [2,3]
+            [[1, 2, 3],
+             [4, 5, 6]],
+
+            [2, 4, 6],   # 1-based indices => picks xCp(2)=4, xCp(4)=5, xCp(6)=6
+            2,           # nDim
+            2,           # nWindows
+            1,           # nStrata
+            3,           # nBoot
+            [4.0, 5.0, 6.0],    # expected_CpA
+            [1, 1, 1],          # expected_nA
+            [[np.nan, np.nan, np.nan],
+             [4.0,    5.0,    6.0]]
+        ),
+
+        #
+        # Test Case 2: 3D, shape(2,2,2).
+        #   In MATLAB (column-major), xCp(1,1,1)=1, xCp(2,1,1)=2, xCp(1,2,1)=3, xCp(2,2,1)=4,
+        #                         xCp(1,1,2)=5, xCp(2,1,2)=6, xCp(1,2,2)=7, xCp(2,2,2)=8
+        #   iSelect=[1,8] => picks xCp(1)=1, xCp(8)=8.
+        #   => xCpSelect => [1, NaN, NaN, NaN, NaN, NaN, NaN, 8] => shape(2,2,2) => in "layer" form:
+        #
+        #   layer 1 => [[1,   NaN],
+        #               [NaN, NaN]]
+        #   layer 2 => [[NaN, NaN],
+        #               [NaN, 8]]
+        #
+        #   Then we do nDim=3 => reshape => (nWindows*nStrata, nBoot) => (2*2, 2) => (4,2).
+        #   We get a 4x2 array => only [1,8] are non-NaN => so:
+        #       CpA => [1, 8]  (i.e. nanmean along dimension=1 => a row vector 1x2)
+        #       nA  => [1, 1]  (count of non-NaN in each column)
+        (
+            # xCp as nested lists for shape(2,2,2). The dimension order in Python is [row, col, "page"].
+            # We'll pass it as we want MATLAB to interpret it with column-major flattening.
+            [
+              [ [1, 5], [3, 7] ],   # "row 1"
+              [ [2, 6], [4, 8] ]    # "row 2"
+            ],
+            [1, 8],   # picks xCp(1)=1, xCp(8)=8 in MATLAB
+            3,        # nDim
+            2,        # nWindows
+            2,        # nStrata
+            2,        # nBoot
+            [1.0, 8.0],    # expected_CpA (1x2 row vector)
+            [1, 1],        # expected_nA
+            [
+              [[1.0,    np.nan],
+               [np.nan, np.nan]],
+              [[np.nan, np.nan],
+               [np.nan, 8.0   ]]
+            ]
+        ),
+    ]
+)
+def test_aggregateSeasonalAndAnnualValues(
+    test_engine,
+    xCp,
+    iSelect,
+    nDim,
+    nWindows,
+    nStrata,
+    nBoot,
+    expected_CpA,
+    expected_nA,
+    expected_xCpSelect
+):
+    """
+    Test the MATLAB function aggregateSeasonalAndAnnualValues by calling it via
+    the MATLAB Engine API for Python. We pass arrays from Python to MATLAB,
+    execute the function, and verify the outputs.
+
+    Explanation:
+    ------------
+    - xCp is provided in a shape that (in MATLAB) should be [nWindows, nBoot] if nDim=2,
+      or [nWindows, nStrata, nBoot] if nDim=3. Because MATLAB uses column-major order,
+      the exact flattening can be tricky in Python. We keep the nested structure so
+      the final shape is correct in MATLAB once passed through the engine.
+    - iSelect is 1-based, matching MATLAB indexing. For instance, iSelect=[1,8] means
+      xCp(1) and xCp(8) are selected.
+    - The function returns three outputs: [CpA, nA, xCpSelect].
+      We check each against the expected result (accounting for possible shape and
+      dimensional differences).
+    """
+
+    # Call the MATLAB function with nargout=3
+    CpA_mat, nA_mat, xCpSelect_mat = test_engine.aggregateSeasonalAndAnnualValues(
+        test_engine.convert(xCp),
+        test_engine.convert(iSelect, index = "to_python"),
+        test_engine.convert(nDim),
+        test_engine.convert(nWindows),
+        test_engine.convert(nStrata),
+        test_engine.convert(nBoot),
+        nargout=3
+    )
+    
+    assert test_engine.equal(CpA_mat, test_engine.convert(expected_CpA))
+    assert test_engine.equal(nA_mat, test_engine.convert(expected_nA))
+    assert test_engine.equal(xCpSelect_mat, test_engine.convert(expected_xCpSelect))
diff --git a/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py b/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py
index 6f29fd47..38e956ed 100644
--- a/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py
+++ b/tests/unit_tests/test_ustar_cp/test_aggregateSeasonalMeans.py
@@ -1,37 +1,44 @@
-# test_aggregateSeasonalMeans.py
 import pytest
 import numpy as np
 
 @pytest.mark.parametrize(
     "mt, Cp, xmt, iSelect, nWindows, nStrata, nBoot, expected_tW, expected_CpW",
     [
-        # -- Test Case 1: Simple data, single "window" scenario --
+        #
+        # Test Case 1: Single window, select all points with numeric indices
+        #
         (
             [10, 20, 30],          # mt
             [1.0, 2.0, 3.0],       # Cp
-            [20],          # xmt
-            [True, True, True],    # iSelect (bool mask)
+            [20],                  # xmt => length=1 => reshape(1,1*1)
+            [1, 2, 3],            # iSelect as numeric indices (MATLAB is 1-based)
             1,                     # nWindows
             1,                     # nStrata
             1,                     # nBoot
-            [10.0],                # expected_tW (mock result; you must refine)
-            [1.0],                 # expected_CpW (mock result)
+            [20.0],                # expected_tW
+            [2.0],                 # expected_CpW
         ),
 
-        # -- Test Case 2: Multiple windows, partial selection --
+        #
+        # Test Case 2: Multiple windows, partial selection
+        # Here iSelect = [2,3,4] means we are selecting the 2nd, 3rd, and 4th elements
+        # in the arrays mt/Cp. xmt must have 4*(3*2) = 24 elements for reshape().
+        #
         (
-    [10, 20, 30, 40, 50],               # mt
-    [1.0, 2.0, 4.0, 8.0, 16.0],         # Cp
-    [10, 20, 30, 40, 50, 60, 70, 80, 90, 
-     100,110,120,130,140,150,160,170,180,190,
-     200,210,220,230,240],  # <-- 24 elements for xmt
-    [False, True, True, True, False],  # iSelect
-    4,                                  # nWindows
-    3,                                  # nStrata
-    2,                                  # nBoot
-    [25.0, 40.0],                       # expected_tW
-    [3.0, 8.0],                         # expected_CpW
-),
+            [10, 20, 30, 40, 50],               # mt
+            [1.0, 2.0, 4.0, 8.0, 16.0],         # Cp
+            [
+                10, 20, 30, 40, 50, 60, 70, 80, 90,
+                100,110,120,130,140,150,160,170,180,190,
+                200,210,220,230,240
+            ],                                  # xmt => 24 elements
+            [2, 3, 4],                          # iSelect => partial selection
+            4,                                  # nWindows
+            3,                                  # nStrata
+            2,                                  # nBoot
+            [20.0, 30.0, 30.0, 40.0],                       # expected_tW
+            [2.0, 4.0, 4.0, 8.0],                         # expected_CpW (mock result)
+        ),
     ]
 )
 def test_aggregateSeasonalMeans(test_engine, mt, Cp, xmt, iSelect,
@@ -41,28 +48,51 @@ def test_aggregateSeasonalMeans(test_engine, mt, Cp, xmt, iSelect,
     Test the MATLAB function aggregateSeasonalMeans by calling it via
     the MATLAB Engine API for Python. We pass arrays from Python
     to MATLAB, execute the function, and verify the outputs.
+
+    Key point:
+    ----------
+    - In the original cpdAssignUStarTh20100901.m code, `iSelect` is a
+      numeric array of indices (found via `find(...)`) rather than a
+      logical mask. This avoids the mixing of sort(...) and logical
+      indexing that caused errors.
+    - We ensure length(xmt) == nWindows * nStrata * nBoot so reshape(xmt, ...)
+      is valid in MATLAB.
+    - The `expected_tW` and `expected_CpW` here are "mock" placeholders to
+      show the format of the test. Adjust them based on what you actually
+      expect from fcBin.
     """
 
-    # Call the MATLAB function. Note nargout=2 to receive two outputs (tW, CpW)
+    # Convert Python lists/arrays to MATLAB data
+    mt_matlab = test_engine.convert(mt)
+    Cp_matlab = test_engine.convert(Cp)
+    xmt_matlab = test_engine.convert(xmt)
+
+    # Important: iSelect is now numeric. For MATLAB, it's 1-based indexing.
+    # This means if iSelect=[2,3], we are selecting the 2nd, 3rd elements of mt/Cp.
+    iSelect_matlab = test_engine.convert(iSelect, index = "to_python")
+
+    nWindows_matlab = test_engine.convert(nWindows)
+    nStrata_matlab = test_engine.convert(nStrata)
+    nBoot_matlab = test_engine.convert(nBoot)
+
+    # Call the MATLAB function
     tW_mat, CpW_mat = test_engine.aggregateSeasonalMeans(
-        test_engine.convert(mt),   # mt
-        test_engine.convert(Cp),   # Cp
-        test_engine.convert(xmt),  # xmt
-        test_engine.convert(iSelect),
-        test_engine.convert(nWindows),
-        test_engine.convert(nStrata),
-        test_engine.convert(nBoot),
+        mt_matlab,
+        Cp_matlab,
+        xmt_matlab,
+        iSelect_matlab,
+        nWindows_matlab,
+        nStrata_matlab,
+        nBoot_matlab,
         nargout=2
     )
 
-    # Convert outputs from MATLAB arrays back to Python floats/arrays
-    # MATLAB returns "double" arrays. We turn them into NumPy arrays.
+    # Convert MATLAB outputs to NumPy arrays
     tW = np.array(tW_mat).flatten()
     CpW = np.array(CpW_mat).flatten()
 
-    # Compare the results with the expected values (within a tolerance).
-    # Adjust `rtol`, `atol` as needed, or do exact match if you expect integer results.
+    # Compare the results with the expected values
     assert np.allclose(tW, expected_tW, rtol=1e-5, atol=1e-8), \
            f"tW mismatch: got {tW}, expected {expected_tW}"
     assert np.allclose(CpW, expected_CpW, rtol=1e-5, atol=1e-8), \
-           f"CpW mismatch: got {CpW}, expected {expected_CpW}"
\ No newline at end of file
+           f"CpW mismatch: got {CpW}, expected {expected_CpW}"
diff --git a/tests/unit_tests/test_ustar_cp/test_computeStandardisedScores.py b/tests/unit_tests/test_ustar_cp/test_computeStandardisedScores.py
index f709236c..3dcbdbc4 100644
--- a/tests/unit_tests/test_ustar_cp/test_computeStandardisedScores.py
+++ b/tests/unit_tests/test_ustar_cp/test_computeStandardisedScores.py
@@ -1,18 +1,110 @@
 import pytest
 import numpy as np
 
-@pytest.mark.parametrize("input_data, expected_rows, expected_columns", [
-    (np.random.randn(100, 5), 100, 1),
-    (100*np.ones((100, 5)), 100, 1),
-])
+@pytest.mark.parametrize(
+    "input_data, expected_xNormX",
+    [
+        #
+        # Test 1: 2x3 matrix, no NaNs
+        #
+        #   x = [[1,2,3],
+        #        [4,5,6]]
+        #
+        #   For each column (with only 2 values):
+        #     median => (value1 + value2)/2
+        #     iqr    => 75th - 25th percentile => typically 1.5 for each col
+        #
+        #   => xNorm => row0 => [-1, -1, -1]
+        #               row1 => [ 1,  1,  1]
+        #   => xNormX => [1, 1]
+        #
+        (
+            [
+                [1, 2, 3],
+                [4, 5, 6]
+            ],
+            [[np.nan],[np.nan]],
+        ),
 
-def test_compute_standardized_scores(test_engine, input_data, expected_rows, expected_columns):
-    """Test that computeStandardizedScores executes correctly and returns expected values."""
-    
-    # Call MATLAB function
-    result = test_engine.computeStandardizedScores(input_data, nargout=1)
+        #
+        # Test 2: 3x3 matrix, no NaNs
+        #
+        #   x = [[1,2,3],
+        #        [4,5,6],
+        #        [7,8,9]]
+        #
+        #   column 0 => median=4, iqr=3
+        #   column 1 => median=5, iqr=3
+        #   column 2 => median=6, iqr=3
+        #
+        #   => xNorm => row0 => [-1, -1, -1]
+        #               row1 => [ 0,  0,  0]
+        #               row2 => [ 1,  1,  1]
+        #   => xNormX => [1, 0, 1]
+        #
+        (
+            [
+                [1, 2, 3],
+                [4, 5, 6],
+                [7, 8, 9],
+            ],
+            [[np.nan],[np.nan],[np.nan]],
+        ),
+
+        #
+        # Test 3: 3x3 with a NaN in the middle row
+        #
+        #   x = [[1,   2,   3],
+        #        [4,  NaN,  6],
+        #        [7,   8,   9]]
+        #
+        #   We already reasoned that row0 => xNorm => [-1, -1, -1]
+        #                         row1 => => [0,  NaN, 0] => max => NaN
+        #                         row2 => => [1,  1,   1]
+        #   => xNormX => [1, NaN, 1]
+        #
+        (
+            [
+                [1,    2,    3],
+                [4,    np.nan,6],
+                [7,    8,    9],
+            ],
+            [[np.nan],[np.nan],[np.nan]],
+        ),
+
+        #
+        # Test 4: 3x1 matrix
+        #
+        #   x = [[1],
+        #        [2],
+        #        [5]]
+        #
+        #   => col median=2, iqr= (3.5-1.5)=2
+        #   => row0 => (1-2)/2 = -0.5 => abs=0.5
+        #      row1 => (2-2)/2 = 0
+        #      row2 => (5-2)/2 = 1.5 => abs=1.5
+        #   => xNormX => [0.5, 0.0, 1.5]
+        #
+        (
+            [
+                [1],
+                [2],
+                [5],
+            ],
+            [[0.3333333333333333],[0.0],[1.0]],
+        ),
+    ]
+)
+def test_computeStandardizedScores(test_engine, input_data, expected_xNormX):
+    """
+    Test the MATLAB function computeStandardizedScores by calling it 
+    via the MATLAB Engine. We verify xNormX row-by-row against 
+    expected values, including NaNs.
+    """
 
-    len_result = len(test_engine.convert(result))
+    # Call the MATLAB function computeStandardizedScores
+    xNormX = test_engine.computeStandardizedScores(test_engine.convert(input_data), nargout=1)
 
-    assert test_engine.equal(len_result, test_engine.convert(expected_rows))
-    
\ No newline at end of file
+    assert test_engine.equal(xNormX, test_engine.convert(expected_xNormX))
+
+    

From cc857a5fafecef0a710ce0ebb41b2a4ff42130f0 Mon Sep 17 00:00:00 2001
From: Dominic Orchard <dom.orchard@gmail.com>
Date: Thu, 20 Feb 2025 17:14:37 +0000
Subject: [PATCH 16/27] fix a test

---
 tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
index 4fa69872..73b062fb 100644
--- a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
+++ b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
@@ -98,7 +98,7 @@ def set_attr(struct_array, key, val):
         ([0.5, 1.2, 3.5, 0.1, 2.8], 2.0, [[False, False, True, False, True]], [[3.0, 5.0]]),  # Basic test
         ([0.1, 0.2, 0.3], 1.0, [[False, False, False]], [[]]),  # No outliers
         ([3.1, 2.9, 3.5], 2.0, [[True, True, True]], [[1, 2, 3]]),  # All outliers
-        ([], 2.0, np.bool([]), []),  # Empty input case
+        ([], 2.0, [], []),  # Empty input case
         ([-3, -2, -1, 0, 1, 2, 3], -1.0, [[False, False, False, True, True, True, True]], [[4.0, 5.0, 6.0, 7.0]]),  # Negative threshold
     ]
 )

From be114f216f99c95feb648e60b3d7d819de242fd8 Mon Sep 17 00:00:00 2001
From: Dominic Orchard <dom.orchard@gmail.com>
Date: Thu, 20 Feb 2025 21:48:30 +0000
Subject: [PATCH 17/27] convert booleans  before int, due to the fact that
 bools look like ints to Python

---
 tests/conftest.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index b15fc3ce..37b22d02 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -588,10 +588,10 @@ def to_matlab_type(data: Any) -> Any:
             return data.tolist()  # Convert non-numeric arrays to lists
     elif isinstance(data, list):
         # Convert Python list to MATLAB double array if all elements are numbers
-        if all(isinstance(elem, (int, float)) for elem in flatten(data)):
-            return matlab.double(data)
-        elif all(isinstance(elem, (bool)) for elem in flatten(data)):
+        if all(isinstance(elem, (bool)) for elem in flatten(data)):
             return matlab.logical(data)
+        elif all(isinstance(elem, (int, float)) for elem in flatten(data)):
+            return matlab.double(data)
         else:
             # Create a cell array for lists containing non-numeric data
             return [to_matlab_type(elem) for elem in data]

From 60887fe97fa7288dcc6bf89850bb68bc820765ec Mon Sep 17 00:00:00 2001
From: Dominic Orchard <dom.orchard@gmail.com>
Date: Thu, 20 Feb 2025 21:49:41 +0000
Subject: [PATCH 18/27] fix aggregateSeasonalAndAnnualValues 2d case - and its
 test

---
 .../aggregateSeasonalAndAnnualValues.py       |  18 +-
 .../test_cpdAssignUStarTh20100901.py          | 303 +++++++++---------
 2 files changed, 171 insertions(+), 150 deletions(-)

diff --git a/oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py b/oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py
index a53e323d..4943533e 100644
--- a/oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py
+++ b/oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py
@@ -35,23 +35,27 @@ def aggregateSeasonalAndAnnualValues(
     xCpSelect : np.ndarray
         Same shape as xCp, with NaN everywhere except the selected positions.
     """
-    # Convert inputs to float arrays
-    xCp = np.asarray(xCp, dtype=float)
+    # # Convert inputs to float arrays
+    # xCp = np.asarray(xCp, dtype=float)
 
     # Prepare an output array (same shape) filled with NaNs
     xCpSelect = np.full_like(xCp, np.nan, dtype=float)
 
     # Flatten both arrays in column-major (Fortran) order to replicate MATLAB indexing
-    xCp_flat_F = xCp.flatten(order='F')
-    xCpSelect_flat_F = xCpSelect.flatten(order='F')
+#    xCp_flat_F = xCp.flatten(order='F')
+#    xCpSelect_flat_F = xCpSelect.flatten(order='F')
 
     iSelect_array = np.asarray(iSelect, dtype=int)
 
     # Assign the selected change points in the flattened array
-    xCpSelect_flat_F[iSelect_array] = xCp_flat_F[iSelect_array]
-
+    # mask xCp using iSelect_array
+    xCpSelect = xCp.copy()
+    for i in range(len(xCp)):
+        if iSelect_array[i] == 0:
+            xCpSelect[i] = np.nan
+    
     # Reshape back to original shape (column-major)
-    xCpSelect = xCpSelect_flat_F.reshape(xCp.shape, order='F')
+    #xCpSelect = xCpSelect_flat_F.reshape(xCp.shape, order='F')
     xCpGF = xCpSelect  # Just like the MATLAB code
 
     # Aggregate values based on dimensions
diff --git a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
index 73b062fb..58d9796f 100644
--- a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
+++ b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
@@ -3,164 +3,181 @@
 import numpy as np
 
 
-@pytest.fixture(scope="module")
-def mock_data(test_engine, nt=300, tspan=(0, 1), uStar_pars=(0.1, 3.5), T_pars=(-10, 30), fNight=None):
-    """
-    Fixture to generate mock time series data for testing purposes. This fixture
-    creates a set of synthetic data typically used in environmental studies,
-    such as Net Ecosystem Exchange (NEE), uStar values, temperature, and day/night flags.
-
-    Args:
-        nt (int, optional): Number of time points to generate in the series.
-                            Defaults to 300.
-        tspan (tuple, optional): Start and end time for the time vector.
-                                 Defaults to (0, 1).
-        uStar_pars (tuple, optional): Minimum and maximum values for the random
-                                      generation of u* values. Defaults to (0.1, 3.5).
-        T_pars (tuple, optional): Minimum and maximum values for the random
-                                  generation of temperature values. Defaults to (-10, 30).
-        fNight (array-like or None, optional): Binary values indicating day (0) or
-                                               night (1). If None, a random assignment
-                                               is made. Defaults to None.
-
-    Returns:
-        tuple: A tuple containing:
-            - t (np.ndarray): Time vector of length `nt`.
-            - NEE (np.ndarray): Randomly generated Net Ecosystem Exchange values.
-            - uStar (np.ndarray): Randomly generated u* values.
-            - T (np.ndarray): Randomly generated temperature values.
-            - fNight (np.ndarray): Array indicating day (0) or night (1) conditions.
-    """
-    fPlot = 0
-    nBoot = 10
-    cSiteYr = "Site_2024"
-    t = np.linspace(*tspan, nt)  # Generate time vector
-    uStar = np.random.uniform(*uStar_pars, size=nt)  # u* values between typical ranges
-    NEE = np.random.uniform(-10, 10, size=nt)  # Random NEE values
-    T = np.random.uniform(*T_pars, size=nt)  # Temperature values
-    if fNight is None:
-        fNight = np.random.choice([0, 1], size=nt)  # Randomly assign day/night
-    else:
-        fNight = np.resize(fNight, nt)
-
-    Cp2, Stats2, Cp3, Stats3 = test_engine.cpdBootstrapUStarTh4Season20100901(
-        t,NEE,uStar,T,fNight,fPlot,cSiteYr,nBoot,jsonencode=[1,3],nargout=4
-    )
-    return Stats2, fPlot, cSiteYr
-
-
-def test_cpdAssignUStarTh20100901_basic(test_engine, mock_data):
-    stats, fPlot, cSiteYr = mock_data
-
-    # Call MATLAB function
-    CpA, nA, tW, CpW, cMode, cFailure, fSelect, sSine, FracSig, FracModeD, FracSelect = test_engine.cpdAssignUStarTh20100901(stats, fPlot, cSiteYr, jsondecode=[0], nargout=11)
-
-    # Assertions
-    assert isinstance(CpA, matlab.double), "CpA should be a MATLAB double array"
-    assert isinstance(nA, matlab.double), "nA should be a MATLAB double array"
-    assert isinstance(tW, matlab.double), "tW should be a MATLAB double array"
-    assert isinstance(CpW, matlab.double), "CpW should be a MATLAB double array"
-    assert isinstance(cMode, str), "cMode should be a string"
-    assert isinstance(cFailure, str), "cFailure should be a string"
-    assert isinstance(fSelect, matlab.logical), "fSelect should be a MATLAB logical array"
-    assert isinstance(sSine, matlab.double), "sSine should be a MATLAB double array"
-    assert isinstance(FracSig, float), "FracSig should be a float"
-    assert isinstance(FracModeD, float), "FracModeD should be a float"
-    assert isinstance(FracSelect, float), "FracSelect should be a float"
-
-
-def test_cpdAssignUStarTh20100901_edge_cases(test_engine, mock_data):
-    def set_attr(struct_array, key, val):
-        np.vectorize(lambda x: x.update({key: val}))(struct_array)
-
-    mock_stats, fPlot, cSiteYr = mock_data
-    edge_stats = mock_stats.copy()
-
-    # Case 1: All significant change points
-    set_attr(edge_stats, "p", 0)
-    assert edge_stats[0][0][0]['p'] == 0
-
-    results_all_sig = test_engine.cpdAssignUStarTh20100901(edge_stats, 0, "AllSig_2024", jsondecode=[0], nargout=11)
-
-    # Case 2: No significant change points
-    set_attr(edge_stats, "p", 1)
-    assert edge_stats[0][0][0]['p'] == 1
-
-    results_no_sig = test_engine.cpdAssignUStarTh20100901(edge_stats, 0, "NoSig_2024", jsondecode=[0], nargout=11)
-
-    # Assertions for edge cases
-    assert len(results_all_sig[0]) > 0, "Should produce results for all significant change points"
-    assert len(results_no_sig[5]) > 0, "Should produce a failure message for no significant change points"
-
-@pytest.mark.parametrize(
-    "x_norm_x, threshold, expected_f_out, expected_i_out",
-    [
-        ([0.5, 1.2, 3.5, 0.1, 2.8], 2.0, [[False, False, True, False, True]], [[3.0, 5.0]]),  # Basic test
-        ([0.1, 0.2, 0.3], 1.0, [[False, False, False]], [[]]),  # No outliers
-        ([3.1, 2.9, 3.5], 2.0, [[True, True, True]], [[1, 2, 3]]),  # All outliers
-        ([], 2.0, [], []),  # Empty input case
-        ([-3, -2, -1, 0, 1, 2, 3], -1.0, [[False, False, False, True, True, True, True]], [[4.0, 5.0, 6.0, 7.0]]),  # Negative threshold
-    ]
-)
-def test_identify_outliers(test_engine, x_norm_x, threshold, expected_f_out, expected_i_out):
-    """Test MATLAB's identifyOutliers function from Python using MATLAB Engine."""
-
-
-    f_out, i_out = test_engine.identifyOutliers(test_engine.convert(x_norm_x), test_engine.convert(threshold), nargout=2)
-
-    assert test_engine.equal(f_out, test_engine.convert(expected_f_out)), "Boolean outlier array does not match expected"
-    assert test_engine.equal(i_out, test_engine.convert(expected_i_out)), "Index output does not match expected"
-
-
-def test_aggregate_2d_case(matlab_engine):
-    """Test function for 2D case (nDim=2)."""
-    eng = matlab_engine
+# @pytest.fixture(scope="module")
+# def mock_data(test_engine, nt=300, tspan=(0, 1), uStar_pars=(0.1, 3.5), T_pars=(-10, 30), fNight=None):
+#     """
+#     Fixture to generate mock time series data for testing purposes. This fixture
+#     creates a set of synthetic data typically used in environmental studies,
+#     such as Net Ecosystem Exchange (NEE), uStar values, temperature, and day/night flags.
+
+#     Args:
+#         nt (int, optional): Number of time points to generate in the series.
+#                             Defaults to 300.
+#         tspan (tuple, optional): Start and end time for the time vector.
+#                                  Defaults to (0, 1).
+#         uStar_pars (tuple, optional): Minimum and maximum values for the random
+#                                       generation of u* values. Defaults to (0.1, 3.5).
+#         T_pars (tuple, optional): Minimum and maximum values for the random
+#                                   generation of temperature values. Defaults to (-10, 30).
+#         fNight (array-like or None, optional): Binary values indicating day (0) or
+#                                                night (1). If None, a random assignment
+#                                                is made. Defaults to None.
+
+#     Returns:
+#         tuple: A tuple containing:
+#             - t (np.ndarray): Time vector of length `nt`.
+#             - NEE (np.ndarray): Randomly generated Net Ecosystem Exchange values.
+#             - uStar (np.ndarray): Randomly generated u* values.
+#             - T (np.ndarray): Randomly generated temperature values.
+#             - fNight (np.ndarray): Array indicating day (0) or night (1) conditions.
+#     """
+#     fPlot = 0
+#     nBoot = 10
+#     cSiteYr = "Site_2024"
+#     t = np.linspace(*tspan, nt)  # Generate time vector
+#     uStar = np.random.uniform(*uStar_pars, size=nt)  # u* values between typical ranges
+#     NEE = np.random.uniform(-10, 10, size=nt)  # Random NEE values
+#     T = np.random.uniform(*T_pars, size=nt)  # Temperature values
+#     if fNight is None:
+#         fNight = np.random.choice([0, 1], size=nt)  # Randomly assign day/night
+#     else:
+#         fNight = np.resize(fNight, nt)
+
+#     Cp2, Stats2, Cp3, Stats3 = test_engine.cpdBootstrapUStarTh4Season20100901(
+#         t,NEE,uStar,T,fNight,fPlot,cSiteYr,nBoot,jsonencode=[1,3],nargout=4
+#     )
+#     return Stats2, fPlot, cSiteYr
+
+
+# def test_cpdAssignUStarTh20100901_basic(test_engine, mock_data):
+#     stats, fPlot, cSiteYr = mock_data
+
+#     # Call MATLAB function
+#     CpA, nA, tW, CpW, cMode, cFailure, fSelect, sSine, FracSig, FracModeD, FracSelect = test_engine.cpdAssignUStarTh20100901(stats, fPlot, cSiteYr, jsondecode=[0], nargout=11)
+
+#     # Assertions
+#     assert isinstance(CpA, matlab.double), "CpA should be a MATLAB double array"
+#     assert isinstance(nA, matlab.double), "nA should be a MATLAB double array"
+#     assert isinstance(tW, matlab.double), "tW should be a MATLAB double array"
+#     assert isinstance(CpW, matlab.double), "CpW should be a MATLAB double array"
+#     assert isinstance(cMode, str), "cMode should be a string"
+#     assert isinstance(cFailure, str), "cFailure should be a string"
+#     assert isinstance(fSelect, matlab.logical), "fSelect should be a MATLAB logical array"
+#     assert isinstance(sSine, matlab.double), "sSine should be a MATLAB double array"
+#     assert isinstance(FracSig, float), "FracSig should be a float"
+#     assert isinstance(FracModeD, float), "FracModeD should be a float"
+#     assert isinstance(FracSelect, float), "FracSelect should be a float"
+
+
+# def test_cpdAssignUStarTh20100901_edge_cases(test_engine, mock_data):
+#     def set_attr(struct_array, key, val):
+#         np.vectorize(lambda x: x.update({key: val}))(struct_array)
+
+#     mock_stats, fPlot, cSiteYr = mock_data
+#     edge_stats = mock_stats.copy()
+
+#     # Case 1: All significant change points
+#     set_attr(edge_stats, "p", 0)
+#     assert edge_stats[0][0][0]['p'] == 0
+
+#     results_all_sig = test_engine.cpdAssignUStarTh20100901(edge_stats, 0, "AllSig_2024", jsondecode=[0], nargout=11)
+
+#     # Case 2: No significant change points
+#     set_attr(edge_stats, "p", 1)
+#     assert edge_stats[0][0][0]['p'] == 1
+
+#     results_no_sig = test_engine.cpdAssignUStarTh20100901(edge_stats, 0, "NoSig_2024", jsondecode=[0], nargout=11)
+
+#     # Assertions for edge cases
+#     assert len(results_all_sig[0]) > 0, "Should produce results for all significant change points"
+#     assert len(results_no_sig[5]) > 0, "Should produce a failure message for no significant change points"
+
+# @pytest.mark.parametrize(
+#     "x_norm_x, threshold, expected_f_out, expected_i_out",
+#     [
+#         ([0.5, 1.2, 3.5, 0.1, 2.8], 2.0, [[False, False, True, False, True]], [[3.0, 5.0]]),  # Basic test
+#         ([0.1, 0.2, 0.3], 1.0, [[False, False, False]], [[]]),  # No outliers
+#         ([3.1, 2.9, 3.5], 2.0, [[True, True, True]], [[1, 2, 3]]),  # All outliers
+#         ([], 2.0, [], []),  # Empty input case
+#         ([-3, -2, -1, 0, 1, 2, 3], -1.0, [[False, False, False, True, True, True, True]], [[4.0, 5.0, 6.0, 7.0]]),  # Negative threshold
+#     ]
+# )
+# def test_identify_outliers(test_engine, x_norm_x, threshold, expected_f_out, expected_i_out):
+#     """Test MATLAB's identifyOutliers function from Python using MATLAB Engine."""
+
+
+#     f_out, i_out = test_engine.identifyOutliers(test_engine.convert(x_norm_x), test_engine.convert(threshold), nargout=2)
+
+#     assert test_engine.equal(f_out, test_engine.convert(expected_f_out)), "Boolean outlier array does not match expected"
+#     assert test_engine.equal(i_out, test_engine.convert(expected_i_out)), "Index output does not match expected"
+
+
+# def test_aggregate_2d_case(matlab_engine):
+#     """Test function for 2D case (nDim=2)."""
+#     eng = matlab_engine
     
-    # Define test inputs
-    xCp = matlab.double([1.0, 2.0, np.nan, 3.0, np.nan, 4.0])  # MATLAB double array
-    iSelect = matlab.logical([1, 1, 0, 1, 0, 1])  # Boolean mask in MATLAB logical array
-    nDim = 2
-    nWindows = 0  # Unused for 2D case
-    nStrata = 0   # Unused for 2D case
-    nBoot = 0     # Unused for 2D case
-
-    # Run MATLAB function
-    CpA, nA, xCpSelect = eng.aggregateSeasonalAndAnnualValues(
-        xCp, iSelect, nDim, nWindows, nStrata, nBoot, nargout=3
-    )
-
-    # Convert MATLAB output to NumPy for assertions
-    CpA = np.array(CpA)
-    nA = int(nA)
-    xCpSelect = np.array(xCpSelect)
-
-    # Expected values
-    expected_CpA = np.nanmean([1.0, 2.0, 3.0, 4.0])  # Mean of selected change points
-    expected_nA = 4  # Count of non-NaN values in the selected array
-
-    # Assertions
-    np.testing.assert_almost_equal(CpA, expected_CpA, decimal=5)
-    assert nA == expected_nA
-    assert np.isnan(xCpSelect[2])  # Check if unselected indices remain NaN
+#     # Define test inputs
+#     xCp = matlab.double([1.0, 2.0, np.nan, 3.0, np.nan, 4.0])  # MATLAB double array
+#     iSelect = matlab.logical([1, 1, 0, 1, 0, 1])  # Boolean mask in MATLAB logical array
+#     nDim = 2
+#     nWindows = 0  # Unused for 2D case
+#     nStrata = 0   # Unused for 2D case
+#     nBoot = 0     # Unused for 2D case
+
+#     # Run MATLAB function
+#     CpA, nA, xCpSelect = eng.aggregateSeasonalAndAnnualValues(
+#         xCp, iSelect, nDim, nWindows, nStrata, nBoot, nargout=3
+#     )
+
+#     # Convert MATLAB output to NumPy for assertions
+#     CpA = np.array(CpA)
+#     nA = int(nA)
+#     xCpSelect = np.array(xCpSelect)
+
+#     # Expected values
+#     expected_CpA = np.nanmean([1.0, 2.0, 3.0, 4.0])  # Mean of selected change points
+#     expected_nA = 4  # Count of non-NaN values in the selected array
+
+#     # Assertions
+#     np.testing.assert_almost_equal(CpA, expected_CpA, decimal=5)
+#     assert nA == expected_nA
+#     assert np.isnan(xCpSelect[2])  # Check if unselected indices remain NaN
+
+def toLogical(xs):
+    def convert(x):
+      if (x == 0):
+        False
+      else:
+          True
+    return list(map(lambda x: list(map(convert, x)), xs))
 
 @pytest.mark.parametrize(
     "xCp, iSelect, nDim, nWindows, nStrata, nBoot, expected_CpA, expected_nA, expected_xCpSelect",
     [
-        ([1.0, 2.0, np.nan, 3.0, np.nan, 4.0], matlab.logical([1, 1, 0, 1, 0, 1]), 2, 0, 0, 0, [2, np.nan, 4], [2, 0, 1], [[1, np.nan, np.nan], [3, np.nan, 4]]),
-         ([[1.0, np.nan, 3.0],[4.0, 5.0, np.nan]], matlab.logical([[1, 0, 1],[0, 1, 0]]), 3, 2, 1, 3, [1.0, 5.0, 3.0], [1.0, 1.0, 1.0], [[1, np.nan, 3.0], [np.nan, 5.0, np.nan]])
+        ([1.0, 2.0, np.nan, 3.0, np.nan, 4.0]
+          , [True, True, False, True, False, True]
+          , 2, 0, 0, 0
+           # expected
+           #, 2, np.nan, 4], [2, 0, 1], [[1, np.nan, np.nan], [3, np.nan, 4]]
+          , 2.5, 4.0, [1.0,2.0,np.nan,3.0,np.nan,4.0]
+         )
+        , ([[1.0, np.nan, 3.0],[4.0, 5.0, np.nan]]
+          , ([[True, False, True],[False, True, False]])
+          , 3, 2, 1, 3
+          # expected
+          , [1.0, 5.0, 3.0], [1.0, 1.0, 1.0], [[1, np.nan, 3.0], [np.nan, 5.0, np.nan]])
         ]
 )
 
-
 def test_aggregate_3d_case(test_engine, xCp, iSelect, nDim, nWindows, nStrata, nBoot, expected_CpA, expected_nA, expected_xCpSelect):
     """Test function for aggregateSeasonalAndAnnualValues 2D and 3D cases."""
 
 
     # Run MATLAB function
     CpA, nA, xCpSelect = test_engine.aggregateSeasonalAndAnnualValues(
-        test_engine.convert(xCp), iSelect, nDim, nWindows, nStrata, nBoot, nargout=3
+        test_engine.convert(xCp), test_engine.convert(iSelect), nDim, nWindows, test_engine.convert(nStrata), test_engine.convert(nBoot), nargout=3
     )
 
     assert test_engine.equal(CpA, expected_CpA)
     assert test_engine.equal(nA, expected_nA)
-    assert test_engine.equal(xCpSelect, expected_xCpSelect)
+    assert test_engine.equal(xCpSelect, test_engine.convert(expected_xCpSelect))
\ No newline at end of file

From cd6d451244f1fa4773f51d8d0365bb1a6dd760df Mon Sep 17 00:00:00 2001
From: Dominic Orchard <dom.orchard@gmail.com>
Date: Fri, 21 Feb 2025 10:54:48 +0000
Subject: [PATCH 19/27] fix up tests and code for
 aggregateSeasonalAndAnnualValues

---
 .../aggregateSeasonalAndAnnualValues.py       | 50 +++++++++----------
 .../test_cpdAssignUStarTh20100901.py          | 13 +----
 2 files changed, 27 insertions(+), 36 deletions(-)

diff --git a/oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py b/oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py
index 4943533e..7e1993ad 100644
--- a/oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py
+++ b/oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py
@@ -14,11 +14,9 @@ def aggregateSeasonalAndAnnualValues(
     Parameters
     ----------
     xCp : array-like
-        In MATLAB, can be 2D [nWindows, nBoot] or 3D [nWindows, nStrata, nBoot].
-        In Python, pass a nested list or NumPy array with the same shape.
+        Can be 2D [nWindows, nBoot] or 3D [nWindows, nStrata, nBoot].
     iSelect : array-like of int
         1-based linear indices in MATLAB's column-major ordering that should be selected.
-        We subtract 1 and interpret them in the same column-major order in Python.
     nDim : int
         2 or 3 (to indicate if xCp is 2D or 3D).
     nWindows : int
@@ -35,39 +33,41 @@ def aggregateSeasonalAndAnnualValues(
     xCpSelect : np.ndarray
         Same shape as xCp, with NaN everywhere except the selected positions.
     """
-    # # Convert inputs to float arrays
-    # xCp = np.asarray(xCp, dtype=float)
-
     # Prepare an output array (same shape) filled with NaNs
     xCpSelect = np.full_like(xCp, np.nan, dtype=float)
 
-    # Flatten both arrays in column-major (Fortran) order to replicate MATLAB indexing
-#    xCp_flat_F = xCp.flatten(order='F')
-#    xCpSelect_flat_F = xCpSelect.flatten(order='F')
-
+    # Treat selection arrays an array of integers
+    # (this allows both an array of booleans and an array of integers to be used
+    # for iselect)
     iSelect_array = np.asarray(iSelect, dtype=int)
 
-    # Assign the selected change points in the flattened array
-    # mask xCp using iSelect_array
-    xCpSelect = xCp.copy()
-    for i in range(len(xCp)):
-        if iSelect_array[i] == 0:
-            xCpSelect[i] = np.nan
-    
-    # Reshape back to original shape (column-major)
-    #xCpSelect = xCpSelect_flat_F.reshape(xCp.shape, order='F')
-    xCpGF = xCpSelect  # Just like the MATLAB code
-
     # Aggregate values based on dimensions
     if nDim == 2:
+        # mask xCp using iSelect_array
+        xCpSelect = xCp.copy()
+        for i in range(len(xCp)):
+            if iSelect_array[i] == 0:
+                xCpSelect[i] = np.nan
+
+        xCpGF = xCpSelect  # naming convention
+
         # xCp shape = [nWindows, nBoot]
-        # MATLAB default nanmean(xCpGF) => mean across axis=0 in Python
-        CpA = np.nanmean(xCpGF, axis=0)
-        nA  = np.sum(~np.isnan(xCpSelect), axis=0)
+        CpA = np.nanmean(xCpGF)
+        nA  = np.sum(~np.isnan(xCpSelect))
     elif nDim == 3:
+        
+        # mask xCp using iSelect_array
+        xCpSelect = xCp.copy()
+        for i in range(len(xCp)):
+            for j in range(len(xCp[i])):
+              if iSelect_array[i][j] == 0:
+                  xCpSelect[i][j] = np.nan
+
+        xCpGF = xCpSelect  # Naming convention
+
         # xCp shape = [nWindows, nStrata, nBoot]
         # reshape => (nWindows*nStrata, nBoot) in column-major
-        xCpGF_reshaped = xCpGF.reshape(nWindows * nStrata, nBoot, order='F')
+        xCpGF_reshaped = np.reshape(xCpGF, (nWindows * nStrata, nBoot), order='F')
         CpA = np.nanmean(xCpGF_reshaped, axis=0)
         nA  = np.sum(~np.isnan(xCpGF_reshaped), axis=0)
     else:
diff --git a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
index 58d9796f..fcb5ff09 100644
--- a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
+++ b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
@@ -143,14 +143,6 @@
 #     assert nA == expected_nA
 #     assert np.isnan(xCpSelect[2])  # Check if unselected indices remain NaN
 
-def toLogical(xs):
-    def convert(x):
-      if (x == 0):
-        False
-      else:
-          True
-    return list(map(lambda x: list(map(convert, x)), xs))
-
 @pytest.mark.parametrize(
     "xCp, iSelect, nDim, nWindows, nStrata, nBoot, expected_CpA, expected_nA, expected_xCpSelect",
     [
@@ -158,7 +150,6 @@ def convert(x):
           , [True, True, False, True, False, True]
           , 2, 0, 0, 0
            # expected
-           #, 2, np.nan, 4], [2, 0, 1], [[1, np.nan, np.nan], [3, np.nan, 4]]
           , 2.5, 4.0, [1.0,2.0,np.nan,3.0,np.nan,4.0]
          )
         , ([[1.0, np.nan, 3.0],[4.0, 5.0, np.nan]]
@@ -178,6 +169,6 @@ def test_aggregate_3d_case(test_engine, xCp, iSelect, nDim, nWindows, nStrata, n
         test_engine.convert(xCp), test_engine.convert(iSelect), nDim, nWindows, test_engine.convert(nStrata), test_engine.convert(nBoot), nargout=3
     )
 
-    assert test_engine.equal(CpA, expected_CpA)
-    assert test_engine.equal(nA, expected_nA)
+    assert test_engine.equal(CpA, test_engine.convert(expected_CpA))
+    assert test_engine.equal(nA, test_engine.convert(expected_nA))
     assert test_engine.equal(xCpSelect, test_engine.convert(expected_xCpSelect))
\ No newline at end of file

From 123782e6483f3a0f5da61a2285d132b7f2a58aaa Mon Sep 17 00:00:00 2001
From: Dominic Orchard <dom.orchard@gmail.com>
Date: Fri, 21 Feb 2025 11:29:12 +0000
Subject: [PATCH 20/27] remove a duplicate test already covered by a fixture

---
 .../test_cpdAssignUStarTh20100901.py          | 32 -------------------
 1 file changed, 32 deletions(-)

diff --git a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
index fcb5ff09..1a1ff49d 100644
--- a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
+++ b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
@@ -111,38 +111,6 @@
 #     assert test_engine.equal(f_out, test_engine.convert(expected_f_out)), "Boolean outlier array does not match expected"
 #     assert test_engine.equal(i_out, test_engine.convert(expected_i_out)), "Index output does not match expected"
 
-
-# def test_aggregate_2d_case(matlab_engine):
-#     """Test function for 2D case (nDim=2)."""
-#     eng = matlab_engine
-    
-#     # Define test inputs
-#     xCp = matlab.double([1.0, 2.0, np.nan, 3.0, np.nan, 4.0])  # MATLAB double array
-#     iSelect = matlab.logical([1, 1, 0, 1, 0, 1])  # Boolean mask in MATLAB logical array
-#     nDim = 2
-#     nWindows = 0  # Unused for 2D case
-#     nStrata = 0   # Unused for 2D case
-#     nBoot = 0     # Unused for 2D case
-
-#     # Run MATLAB function
-#     CpA, nA, xCpSelect = eng.aggregateSeasonalAndAnnualValues(
-#         xCp, iSelect, nDim, nWindows, nStrata, nBoot, nargout=3
-#     )
-
-#     # Convert MATLAB output to NumPy for assertions
-#     CpA = np.array(CpA)
-#     nA = int(nA)
-#     xCpSelect = np.array(xCpSelect)
-
-#     # Expected values
-#     expected_CpA = np.nanmean([1.0, 2.0, 3.0, 4.0])  # Mean of selected change points
-#     expected_nA = 4  # Count of non-NaN values in the selected array
-
-#     # Assertions
-#     np.testing.assert_almost_equal(CpA, expected_CpA, decimal=5)
-#     assert nA == expected_nA
-#     assert np.isnan(xCpSelect[2])  # Check if unselected indices remain NaN
-
 @pytest.mark.parametrize(
     "xCp, iSelect, nDim, nWindows, nStrata, nBoot, expected_CpA, expected_nA, expected_xCpSelect",
     [

From 3e53db36075127e9164947434aa021e790b79d91 Mon Sep 17 00:00:00 2001
From: Dominic Orchard <dom.orchard@gmail.com>
Date: Fri, 21 Feb 2025 11:44:14 +0000
Subject: [PATCH 21/27] fix bug in conftest

---
 tests/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 37b22d02..5ea39925 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -155,7 +155,7 @@ def convert(self, x, index=False, fromFile=False):
                 if isinstance(x, list) and all(isinstance(item, (int, float)) for item in x):
                     return np.asarray(x)
                 if all(isinstance(i, bool) for i in x[0]):
-                    return np.array(x).astype(np.bool)
+                    return np.array(x).astype(bool)
                 elif isinstance(x[0], list):
                     return np.array(x[0])
             else:

From 6384b5adff0ce4e0667a92ba7cf8fb10620cc0b5 Mon Sep 17 00:00:00 2001
From: Dominic Orchard <dom.orchard@gmail.com>
Date: Fri, 21 Feb 2025 11:44:39 +0000
Subject: [PATCH 22/27] make identifyOutliers test language agnostic

---
 .../test_cpdAssignUStarTh20100901.py          | 36 +++++++++----------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
index 1a1ff49d..645db606 100644
--- a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
+++ b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
@@ -92,24 +92,24 @@
 #     assert len(results_all_sig[0]) > 0, "Should produce results for all significant change points"
 #     assert len(results_no_sig[5]) > 0, "Should produce a failure message for no significant change points"
 
-# @pytest.mark.parametrize(
-#     "x_norm_x, threshold, expected_f_out, expected_i_out",
-#     [
-#         ([0.5, 1.2, 3.5, 0.1, 2.8], 2.0, [[False, False, True, False, True]], [[3.0, 5.0]]),  # Basic test
-#         ([0.1, 0.2, 0.3], 1.0, [[False, False, False]], [[]]),  # No outliers
-#         ([3.1, 2.9, 3.5], 2.0, [[True, True, True]], [[1, 2, 3]]),  # All outliers
-#         ([], 2.0, [], []),  # Empty input case
-#         ([-3, -2, -1, 0, 1, 2, 3], -1.0, [[False, False, False, True, True, True, True]], [[4.0, 5.0, 6.0, 7.0]]),  # Negative threshold
-#     ]
-# )
-# def test_identify_outliers(test_engine, x_norm_x, threshold, expected_f_out, expected_i_out):
-#     """Test MATLAB's identifyOutliers function from Python using MATLAB Engine."""
-
-
-#     f_out, i_out = test_engine.identifyOutliers(test_engine.convert(x_norm_x), test_engine.convert(threshold), nargout=2)
-
-#     assert test_engine.equal(f_out, test_engine.convert(expected_f_out)), "Boolean outlier array does not match expected"
-#     assert test_engine.equal(i_out, test_engine.convert(expected_i_out)), "Index output does not match expected"
+@pytest.mark.parametrize(
+    "x_norm_x, threshold, expected_f_out, expected_i_out",
+    [
+        ([0.5, 1.2, 3.5, 0.1, 2.8], 2.0, [[False, False, True, False, True]], [[3.0, 5.0]]),  # Basic test
+        ([0.1, 0.2, 0.3], 1.0, [[False, False, False]], [[]]),  # No outliers
+        ([3.1, 2.9, 3.5], 2.0, [[True, True, True]], [[1, 2, 3]]),  # All outliers
+        ([], 2.0, [], []),  # Empty input case
+        ([-3, -2, -1, 0, 1, 2, 3], -1.0, [[False, False, False, True, True, True, True]], [[4.0, 5.0, 6.0, 7.0]]),  # Negative threshold
+    ]
+)
+def test_identify_outliers(test_engine, x_norm_x, threshold, expected_f_out, expected_i_out):
+    """Test MATLAB's identifyOutliers function from Python using MATLAB Engine."""
+
+
+    f_out, i_out = test_engine.identifyOutliers(test_engine.convert(x_norm_x), test_engine.convert(threshold), nargout=2)
+
+    assert test_engine.equal(f_out, test_engine.convert(expected_f_out)), "Boolean outlier array does not match expected"
+    assert test_engine.equal(i_out, test_engine.convert(expected_i_out, index="to_python")), "Index output does not match expected"
 
 @pytest.mark.parametrize(
     "xCp, iSelect, nDim, nWindows, nStrata, nBoot, expected_CpA, expected_nA, expected_xCpSelect",

From a3f49cf22b45a95db37e45d9fdce497c1a58d447 Mon Sep 17 00:00:00 2001
From: Dominic Orchard <dom.orchard@gmail.com>
Date: Fri, 21 Feb 2025 11:52:43 +0000
Subject: [PATCH 23/27] make more tests available

---
 .../test_cpdAssignUStarTh20100901.py          | 120 +++++++++---------
 1 file changed, 60 insertions(+), 60 deletions(-)

diff --git a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
index 645db606..e3b6b534 100644
--- a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
+++ b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
@@ -3,50 +3,50 @@
 import numpy as np
 
 
-# @pytest.fixture(scope="module")
-# def mock_data(test_engine, nt=300, tspan=(0, 1), uStar_pars=(0.1, 3.5), T_pars=(-10, 30), fNight=None):
-#     """
-#     Fixture to generate mock time series data for testing purposes. This fixture
-#     creates a set of synthetic data typically used in environmental studies,
-#     such as Net Ecosystem Exchange (NEE), uStar values, temperature, and day/night flags.
-
-#     Args:
-#         nt (int, optional): Number of time points to generate in the series.
-#                             Defaults to 300.
-#         tspan (tuple, optional): Start and end time for the time vector.
-#                                  Defaults to (0, 1).
-#         uStar_pars (tuple, optional): Minimum and maximum values for the random
-#                                       generation of u* values. Defaults to (0.1, 3.5).
-#         T_pars (tuple, optional): Minimum and maximum values for the random
-#                                   generation of temperature values. Defaults to (-10, 30).
-#         fNight (array-like or None, optional): Binary values indicating day (0) or
-#                                                night (1). If None, a random assignment
-#                                                is made. Defaults to None.
-
-#     Returns:
-#         tuple: A tuple containing:
-#             - t (np.ndarray): Time vector of length `nt`.
-#             - NEE (np.ndarray): Randomly generated Net Ecosystem Exchange values.
-#             - uStar (np.ndarray): Randomly generated u* values.
-#             - T (np.ndarray): Randomly generated temperature values.
-#             - fNight (np.ndarray): Array indicating day (0) or night (1) conditions.
-#     """
-#     fPlot = 0
-#     nBoot = 10
-#     cSiteYr = "Site_2024"
-#     t = np.linspace(*tspan, nt)  # Generate time vector
-#     uStar = np.random.uniform(*uStar_pars, size=nt)  # u* values between typical ranges
-#     NEE = np.random.uniform(-10, 10, size=nt)  # Random NEE values
-#     T = np.random.uniform(*T_pars, size=nt)  # Temperature values
-#     if fNight is None:
-#         fNight = np.random.choice([0, 1], size=nt)  # Randomly assign day/night
-#     else:
-#         fNight = np.resize(fNight, nt)
-
-#     Cp2, Stats2, Cp3, Stats3 = test_engine.cpdBootstrapUStarTh4Season20100901(
-#         t,NEE,uStar,T,fNight,fPlot,cSiteYr,nBoot,jsonencode=[1,3],nargout=4
-#     )
-#     return Stats2, fPlot, cSiteYr
+@pytest.fixture(scope="module")
+def mock_data(test_engine, nt=300, tspan=(0, 1), uStar_pars=(0.1, 3.5), T_pars=(-10, 30), fNight=None):
+    """
+    Fixture to generate mock time series data for testing purposes. This fixture
+    creates a set of synthetic data typically used in environmental studies,
+    such as Net Ecosystem Exchange (NEE), uStar values, temperature, and day/night flags.
+
+    Args:
+        nt (int, optional): Number of time points to generate in the series.
+                            Defaults to 300.
+        tspan (tuple, optional): Start and end time for the time vector.
+                                 Defaults to (0, 1).
+        uStar_pars (tuple, optional): Minimum and maximum values for the random
+                                      generation of u* values. Defaults to (0.1, 3.5).
+        T_pars (tuple, optional): Minimum and maximum values for the random
+                                  generation of temperature values. Defaults to (-10, 30).
+        fNight (array-like or None, optional): Binary values indicating day (0) or
+                                               night (1). If None, a random assignment
+                                               is made. Defaults to None.
+
+    Returns:
+        tuple: A tuple containing:
+            - t (np.ndarray): Time vector of length `nt`.
+            - NEE (np.ndarray): Randomly generated Net Ecosystem Exchange values.
+            - uStar (np.ndarray): Randomly generated u* values.
+            - T (np.ndarray): Randomly generated temperature values.
+            - fNight (np.ndarray): Array indicating day (0) or night (1) conditions.
+    """
+    fPlot = 0
+    nBoot = 10
+    cSiteYr = "Site_2024"
+    t = np.linspace(*tspan, nt)  # Generate time vector
+    uStar = np.random.uniform(*uStar_pars, size=nt)  # u* values between typical ranges
+    NEE = np.random.uniform(-10, 10, size=nt)  # Random NEE values
+    T = np.random.uniform(*T_pars, size=nt)  # Temperature values
+    if fNight is None:
+        fNight = np.random.choice([0, 1], size=nt)  # Randomly assign day/night
+    else:
+        fNight = np.resize(fNight, nt)
+
+    Cp2, Stats2, Cp3, Stats3 = test_engine.cpdBootstrapUStarTh4Season20100901(
+        t,NEE,uStar,T,fNight,fPlot,cSiteYr,nBoot,jsonencode=[1,3],nargout=4
+    )
+    return Stats2, fPlot, cSiteYr
 
 
 # def test_cpdAssignUStarTh20100901_basic(test_engine, mock_data):
@@ -69,28 +69,28 @@
 #     assert isinstance(FracSelect, float), "FracSelect should be a float"
 
 
-# def test_cpdAssignUStarTh20100901_edge_cases(test_engine, mock_data):
-#     def set_attr(struct_array, key, val):
-#         np.vectorize(lambda x: x.update({key: val}))(struct_array)
+def test_cpdAssignUStarTh20100901_edge_cases(test_engine, mock_data):
+    def set_attr(struct_array, key, val):
+        np.vectorize(lambda x: x.update({key: val}))(struct_array)
 
-#     mock_stats, fPlot, cSiteYr = mock_data
-#     edge_stats = mock_stats.copy()
+    mock_stats, fPlot, cSiteYr = mock_data
+    edge_stats = mock_stats.copy()
 
-#     # Case 1: All significant change points
-#     set_attr(edge_stats, "p", 0)
-#     assert edge_stats[0][0][0]['p'] == 0
+    # Case 1: All significant change points
+    set_attr(edge_stats, "p", 0)
+    assert edge_stats[0][0][0]['p'] == 0
 
-#     results_all_sig = test_engine.cpdAssignUStarTh20100901(edge_stats, 0, "AllSig_2024", jsondecode=[0], nargout=11)
+    results_all_sig = test_engine.cpdAssignUStarTh20100901(edge_stats, 0, "AllSig_2024", jsondecode=[0], nargout=11)
 
-#     # Case 2: No significant change points
-#     set_attr(edge_stats, "p", 1)
-#     assert edge_stats[0][0][0]['p'] == 1
+    # Case 2: No significant change points
+    set_attr(edge_stats, "p", 1)
+    assert edge_stats[0][0][0]['p'] == 1
 
-#     results_no_sig = test_engine.cpdAssignUStarTh20100901(edge_stats, 0, "NoSig_2024", jsondecode=[0], nargout=11)
+    results_no_sig = test_engine.cpdAssignUStarTh20100901(edge_stats, 0, "NoSig_2024", jsondecode=[0], nargout=11)
 
-#     # Assertions for edge cases
-#     assert len(results_all_sig[0]) > 0, "Should produce results for all significant change points"
-#     assert len(results_no_sig[5]) > 0, "Should produce a failure message for no significant change points"
+    # Assertions for edge cases
+    assert len(results_all_sig[0]) > 0, "Should produce results for all significant change points"
+    assert len(results_no_sig[5]) > 0, "Should produce a failure message for no significant change points"
 
 @pytest.mark.parametrize(
     "x_norm_x, threshold, expected_f_out, expected_i_out",

From a8369da8164e81ef62ae9eae1f051657d373fc18 Mon Sep 17 00:00:00 2001
From: Dominic Orchard <dom.orchard@gmail.com>
Date: Fri, 21 Feb 2025 12:50:54 +0000
Subject: [PATCH 24/27] move helper files into the cpdAssignUStarTh file

---
 .../aggregateSeasonalAndAnnualValues.py       |  76 -------
 .../ustar_cp_python/aggregateSeasonalMeans.py | 109 ----------
 .../ustar_cp_python/cpdAssignUStarTh.py       | 189 +++++++++++++++++-
 oneflux_steps/ustar_cp_python/launch.py       |   2 -
 4 files changed, 187 insertions(+), 189 deletions(-)
 delete mode 100644 oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py
 delete mode 100644 oneflux_steps/ustar_cp_python/aggregateSeasonalMeans.py

diff --git a/oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py b/oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py
deleted file mode 100644
index 7e1993ad..00000000
--- a/oneflux_steps/ustar_cp_python/aggregateSeasonalAndAnnualValues.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import numpy as np
-
-def aggregateSeasonalAndAnnualValues(
-    xCp, 
-    iSelect, 
-    nDim, 
-    nWindows, 
-    nStrata, 
-    nBoot
-):
-    """
-    Python equivalent of the MATLAB function aggregateSeasonalAndAnnualValues.
-
-    Parameters
-    ----------
-    xCp : array-like
-        Can be 2D [nWindows, nBoot] or 3D [nWindows, nStrata, nBoot].
-    iSelect : array-like of int
-        1-based linear indices in MATLAB's column-major ordering that should be selected.
-    nDim : int
-        2 or 3 (to indicate if xCp is 2D or 3D).
-    nWindows : int
-    nStrata : int
-    nBoot : int
-
-    Returns
-    -------
-    CpA : np.ndarray
-        Aggregated mean of selected change points along the appropriate dimension.
-        For nDim=2, shape is (nBoot,). For nDim=3, shape is (nBoot,).
-    nA : np.ndarray
-        Count of non-NaN selected points. Same shape as CpA.
-    xCpSelect : np.ndarray
-        Same shape as xCp, with NaN everywhere except the selected positions.
-    """
-    # Prepare an output array (same shape) filled with NaNs
-    xCpSelect = np.full_like(xCp, np.nan, dtype=float)
-
-    # Treat selection arrays an array of integers
-    # (this allows both an array of booleans and an array of integers to be used
-    # for iselect)
-    iSelect_array = np.asarray(iSelect, dtype=int)
-
-    # Aggregate values based on dimensions
-    if nDim == 2:
-        # mask xCp using iSelect_array
-        xCpSelect = xCp.copy()
-        for i in range(len(xCp)):
-            if iSelect_array[i] == 0:
-                xCpSelect[i] = np.nan
-
-        xCpGF = xCpSelect  # naming convention
-
-        # xCp shape = [nWindows, nBoot]
-        CpA = np.nanmean(xCpGF)
-        nA  = np.sum(~np.isnan(xCpSelect))
-    elif nDim == 3:
-        
-        # mask xCp using iSelect_array
-        xCpSelect = xCp.copy()
-        for i in range(len(xCp)):
-            for j in range(len(xCp[i])):
-              if iSelect_array[i][j] == 0:
-                  xCpSelect[i][j] = np.nan
-
-        xCpGF = xCpSelect  # Naming convention
-
-        # xCp shape = [nWindows, nStrata, nBoot]
-        # reshape => (nWindows*nStrata, nBoot) in column-major
-        xCpGF_reshaped = np.reshape(xCpGF, (nWindows * nStrata, nBoot), order='F')
-        CpA = np.nanmean(xCpGF_reshaped, axis=0)
-        nA  = np.sum(~np.isnan(xCpGF_reshaped), axis=0)
-    else:
-        raise ValueError("Invalid number of dimensions: Expected 2D or 3D Stats.")
-
-    return CpA, nA, xCpSelect
diff --git a/oneflux_steps/ustar_cp_python/aggregateSeasonalMeans.py b/oneflux_steps/ustar_cp_python/aggregateSeasonalMeans.py
deleted file mode 100644
index 1e37d76c..00000000
--- a/oneflux_steps/ustar_cp_python/aggregateSeasonalMeans.py
+++ /dev/null
@@ -1,109 +0,0 @@
-from typing import Tuple
-import numpy as np
-from numpy.typing import NDArray
-from oneflux_steps.ustar_cp_python.fcBin import fcBin
-
-def aggregateSeasonalMeans(mt: NDArray, Cp: NDArray, xmt: NDArray, iSelect: NDArray, nWindows: int, nStrata: int, nBoot: int)-> Tuple[NDArray, NDArray]:
-    """
-    Python equivalent of the MATLAB function aggregateSeasonalMeans.
-    Aggregates seasonal means for time and change points.
-
-    Parameters
-    ----------
-    mt : array-like
-        The time data.
-    Cp : array-like
-        The corresponding change point values.
-    xmt : array-like
-        A reference array that must reshape cleanly to (nWindows, nStrata * nBoot).
-        Used to compute the median number of windows (nW).
-    iSelect : array-like of int
-        Numeric indices (1-based in MATLAB) of selected measurements to use.
-        For Python (0-based), ensure you pass valid 0-based indices.
-    nWindows : int
-    nStrata : int
-    nBoot : int
-
-    Returns
-    -------
-    tW : np.ndarray
-        The seasonal mean times for each bin.
-    CpW : np.ndarray
-        The seasonal mean change points for each bin.
-    """
-
-    # ----------------------------------------------------------
-    # 1) Calculate Median Number of Windows
-    #    In MATLAB: 
-    #       reshape(xmt, nWindows, nStrata * nBoot)
-    #    Here we reshape the 1D array xmt => shape (nWindows, nStrata*nBoot)
-    # ----------------------------------------------------------
-    try:
-        xmt_reshaped = xmt.reshape(nWindows, nStrata * nBoot)
-    except ValueError:
-        raise ValueError(
-            f"Cannot reshape xmt of length {xmt.size} "
-            f"to shape ({nWindows}, {nStrata*nBoot})."
-        )
-
-    # sum(~isnan(...)) in MATLAB => np.sum(~np.isnan(...)) in NumPy
-    # nanmedian(...) => np.nanmedian(...)
-    # => sum over axis=1 if you want row sums (assuming typical usage)
-    # but in the MATLAB code it's sum( (nWindows, nStrata*nBoot) ) => axis=0 by default
-    nW_array = np.sum(~np.isnan(xmt_reshaped), axis=0)  # shape: (nStrata*nBoot,)
-    nW = np.nanmedian(nW_array)
-
-    # ----------------------------------------------------------
-    # 2) Sort Selected Measurements
-    #    MATLAB:
-    #      [mtSelect, i] = sort(mt(iSelect));
-    #      CpSelect = Cp(iSelect(i));
-    # ----------------------------------------------------------
-    # *In Python*, iSelect is an array of *0-based* indices.
-    # We then do:
-    iSelect = iSelect.astype(int) 
-    selected_mt = mt[iSelect]
-    # Sort them while keeping track of the sorted order
-    sort_order = np.argsort(selected_mt)  # array of int
-    mtSelect = selected_mt[sort_order]
-    # Now reorder Cp similarly
-    selected_Cp = Cp[iSelect]
-    CpSelect = selected_Cp[sort_order]
-
-    # ----------------------------------------------------------
-    # 3) Define Bins Based on Percentiles
-    #    xBins = prctile(mtSelect, 0:(100/nW):100);
-    # In Python, np.percentile(...)
-    # and we create an array of percentiles from 0 to 100 in steps of (100/nW)
-    # But nW might be float => we do something like
-    # np.linspace(0, 100, int(nW)+1) or something similar. However,
-    # to replicate MATLAB's 0:(100/nW):100 exactly, we must handle floats carefully.
-    # We'll do:
-    if nW < 1:
-        # Fallback: if nW < 1 for some reason, do 1 bin, i.e. [0, 100]
-        pct_array = [0, 100]
-    else:
-        step = 100.0 / nW  # step size
-        # The number of steps is int(nW)+1 if nW is integer-like. 
-        # However, if nW is float, MATLAB still enumerates 0, step, 2*step,..., up to 100.
-        # We'll replicate that logic using np.arange, then clip at <=100
-        pct_array = []
-        current = 0.0
-        while current < 100.0 + 1e-9:
-            pct_array.append(current)
-            current += step
-        # In case floating arithmetic overshoots 100 a bit
-        pct_array[-1] = 100.0
-
-    # Now compute bin edges from these percentiles
-    xBins = np.percentile(mtSelect, pct_array)
-
-    # ----------------------------------------------------------
-    # 4) fcBin equivalent in Python
-    #    [~, tW, CpW] = fcBin(mtSelect, CpSelect, xBins, 0);
-
-
-    # Run fcBin
-    nCount, tW, CpW = fcBin(mtSelect, CpSelect, xBins, 0)
-
-    return tW, CpW
diff --git a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py
index 0ecf5073..472da442 100644
--- a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py
+++ b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py
@@ -3,8 +3,12 @@
 from oneflux_steps.ustar_cp_python.fcNaniqr import fcNaniqr
 from oneflux_steps.ustar_cp_python.fcEqnAnnualSine import fcEqnAnnualSine
 from oneflux_steps.ustar_cp_python.fcr2Calc import fcr2Calc
+from typing import Tuple
+from numpy.typing import NDArray
+from oneflux_steps.ustar_cp_python.fcBin import fcBin
 
-
+def cpdAssignUStarTh20100901(*args):
+    return None, None, None, None, None, None, None, None, None, None, None, None
 
 
 def identifyOutliers(x_norm_x, threshold):
@@ -82,4 +86,185 @@ def _annual_sine_for_curve_fit(t, b0, b1, b2):
     # List containing ndarray to make output format match matlab for comparative testing
     sSine = np.array([popt[0], popt[1], popt[2], r2], dtype=float)
 
-    return sSine
\ No newline at end of file
+    return sSine
+
+def aggregateSeasonalAndAnnualValues(
+    xCp, 
+    iSelect, 
+    nDim, 
+    nWindows, 
+    nStrata, 
+    nBoot
+):
+    """
+    Python equivalent of the MATLAB function aggregateSeasonalAndAnnualValues.
+
+    Parameters
+    ----------
+    xCp : array-like
+        Can be 2D [nWindows, nBoot] or 3D [nWindows, nStrata, nBoot].
+    iSelect : array-like of int
+        1-based linear indices in MATLAB's column-major ordering that should be selected.
+    nDim : int
+        2 or 3 (to indicate if xCp is 2D or 3D).
+    nWindows : int
+    nStrata : int
+    nBoot : int
+
+    Returns
+    -------
+    CpA : np.ndarray
+        Aggregated mean of selected change points along the appropriate dimension.
+        For nDim=2, shape is (nBoot,). For nDim=3, shape is (nBoot,).
+    nA : np.ndarray
+        Count of non-NaN selected points. Same shape as CpA.
+    xCpSelect : np.ndarray
+        Same shape as xCp, with NaN everywhere except the selected positions.
+    """
+    # Prepare an output array (same shape) filled with NaNs
+    xCpSelect = np.full_like(xCp, np.nan, dtype=float)
+
+    # Treat selection arrays an array of integers
+    # (this allows both an array of booleans and an array of integers to be used
+    # for iselect)
+    iSelect_array = np.asarray(iSelect, dtype=int)
+
+    # Aggregate values based on dimensions
+    if nDim == 2:
+        # mask xCp using iSelect_array
+        xCpSelect = xCp.copy()
+        for i in range(len(xCp)):
+            if iSelect_array[i] == 0:
+                xCpSelect[i] = np.nan
+
+        xCpGF = xCpSelect  # naming convention
+
+        # xCp shape = [nWindows, nBoot]
+        CpA = np.nanmean(xCpGF)
+        nA  = np.sum(~np.isnan(xCpSelect))
+    elif nDim == 3:
+        
+        # mask xCp using iSelect_array
+        xCpSelect = xCp.copy()
+        for i in range(len(xCp)):
+            for j in range(len(xCp[i])):
+              if iSelect_array[i][j] == 0:
+                  xCpSelect[i][j] = np.nan
+
+        xCpGF = xCpSelect  # Naming convention
+
+        # xCp shape = [nWindows, nStrata, nBoot]
+        # reshape => (nWindows*nStrata, nBoot) in column-major
+        xCpGF_reshaped = np.reshape(xCpGF, (nWindows * nStrata, nBoot), order='F')
+        CpA = np.nanmean(xCpGF_reshaped, axis=0)
+        nA  = np.sum(~np.isnan(xCpGF_reshaped), axis=0)
+    else:
+        raise ValueError("Invalid number of dimensions: Expected 2D or 3D Stats.")
+
+    return CpA, nA, xCpSelect
+
+
+def aggregateSeasonalMeans(mt: NDArray, Cp: NDArray, xmt: NDArray, iSelect: NDArray, nWindows: int, nStrata: int, nBoot: int)-> Tuple[NDArray, NDArray]:
+    """
+    Python equivalent of the MATLAB function aggregateSeasonalMeans.
+    Aggregates seasonal means for time and change points.
+
+    Parameters
+    ----------
+    mt : array-like
+        The time data.
+    Cp : array-like
+        The corresponding change point values.
+    xmt : array-like
+        A reference array that must reshape cleanly to (nWindows, nStrata * nBoot).
+        Used to compute the median number of windows (nW).
+    iSelect : array-like of int
+        Numeric indices (1-based in MATLAB) of selected measurements to use.
+        For Python (0-based), ensure you pass valid 0-based indices.
+    nWindows : int
+    nStrata : int
+    nBoot : int
+
+    Returns
+    -------
+    tW : np.ndarray
+        The seasonal mean times for each bin.
+    CpW : np.ndarray
+        The seasonal mean change points for each bin.
+    """
+
+    # ----------------------------------------------------------
+    # 1) Calculate Median Number of Windows
+    #    In MATLAB: 
+    #       reshape(xmt, nWindows, nStrata * nBoot)
+    #    Here we reshape the 1D array xmt => shape (nWindows, nStrata*nBoot)
+    # ----------------------------------------------------------
+    try:
+        xmt_reshaped = xmt.reshape(nWindows, nStrata * nBoot)
+    except ValueError:
+        raise ValueError(
+            f"Cannot reshape xmt of length {xmt.size} "
+            f"to shape ({nWindows}, {nStrata*nBoot})."
+        )
+
+    # sum(~isnan(...)) in MATLAB => np.sum(~np.isnan(...)) in NumPy
+    # nanmedian(...) => np.nanmedian(...)
+    # => sum over axis=1 if you want row sums (assuming typical usage)
+    # but in the MATLAB code it's sum( (nWindows, nStrata*nBoot) ) => axis=0 by default
+    nW_array = np.sum(~np.isnan(xmt_reshaped), axis=0)  # shape: (nStrata*nBoot,)
+    nW = np.nanmedian(nW_array)
+
+    # ----------------------------------------------------------
+    # 2) Sort Selected Measurements
+    #    MATLAB:
+    #      [mtSelect, i] = sort(mt(iSelect));
+    #      CpSelect = Cp(iSelect(i));
+    # ----------------------------------------------------------
+    # *In Python*, iSelect is an array of *0-based* indices.
+    # We then do:
+    iSelect = iSelect.astype(int) 
+    selected_mt = mt[iSelect]
+    # Sort them while keeping track of the sorted order
+    sort_order = np.argsort(selected_mt)  # array of int
+    mtSelect = selected_mt[sort_order]
+    # Now reorder Cp similarly
+    selected_Cp = Cp[iSelect]
+    CpSelect = selected_Cp[sort_order]
+
+    # ----------------------------------------------------------
+    # 3) Define Bins Based on Percentiles
+    #    xBins = prctile(mtSelect, 0:(100/nW):100);
+    # In Python, np.percentile(...)
+    # and we create an array of percentiles from 0 to 100 in steps of (100/nW)
+    # But nW might be float => we do something like
+    # np.linspace(0, 100, int(nW)+1) or something similar. However,
+    # to replicate MATLAB's 0:(100/nW):100 exactly, we must handle floats carefully.
+    # We'll do:
+    if nW < 1:
+        # Fallback: if nW < 1 for some reason, do 1 bin, i.e. [0, 100]
+        pct_array = [0, 100]
+    else:
+        step = 100.0 / nW  # step size
+        # The number of steps is int(nW)+1 if nW is integer-like. 
+        # However, if nW is float, MATLAB still enumerates 0, step, 2*step,..., up to 100.
+        # We'll replicate that logic using np.arange, then clip at <=100
+        pct_array = []
+        current = 0.0
+        while current < 100.0 + 1e-9:
+            pct_array.append(current)
+            current += step
+        # In case floating arithmetic overshoots 100 a bit
+        pct_array[-1] = 100.0
+
+    # Now compute bin edges from these percentiles
+    xBins = np.percentile(mtSelect, pct_array)
+
+    # ----------------------------------------------------------
+    # 4) fcBin equivalent in Python
+    #    [~, tW, CpW] = fcBin(mtSelect, CpSelect, xBins, 0);
+
+
+    # Run fcBin
+    nCount, tW, CpW = fcBin(mtSelect, CpSelect, xBins, 0)
+
+    return tW, CpW
diff --git a/oneflux_steps/ustar_cp_python/launch.py b/oneflux_steps/ustar_cp_python/launch.py
index 6336d0ef..ff7255a5 100644
--- a/oneflux_steps/ustar_cp_python/launch.py
+++ b/oneflux_steps/ustar_cp_python/launch.py
@@ -748,8 +748,6 @@ def createTimeArray(uStar: Union[np.ndarray, pd.Series]) -> np.ndarray:
 
 
 
-def cpdAssignUStarTh20100901(*args):
-    return None, None, None, None, None, None, None, None, None, None, None, None
 
 def saveResult(*args):
     return None, None, None
\ No newline at end of file

From f30c1616a85c813bd72262b3f0863fde15344f55 Mon Sep 17 00:00:00 2001
From: Dominic Orchard <dom.orchard@gmail.com>
Date: Fri, 21 Feb 2025 13:28:48 +0000
Subject: [PATCH 25/27] updateSelectIndices translation and fix tests

---
 .../ustar_cp_python/cpdAssignUStarTh.py       | 33 +++++++++++++++++++
 tests/conftest.py                             |  5 ++-
 .../test_updateSelectedIndices.py             | 12 +++----
 3 files changed, 42 insertions(+), 8 deletions(-)

diff --git a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py
index 472da442..81d8904c 100644
--- a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py
+++ b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py
@@ -268,3 +268,36 @@ def aggregateSeasonalMeans(mt: NDArray, Cp: NDArray, xmt: NDArray, iSelect: NDAr
     nCount, tW, CpW = fcBin(mtSelect, CpSelect, xBins, 0)
 
     return tW, CpW
+
+def updateSelectedIndices(iSelect : np.ndarray, iOut : np.ndarray, fSelect : np.ndarray, fOut : np.ndarray) -> (np.ndarray, int, np.ndarray):
+    """
+    Parameters
+    ----------
+    iSelect : np.ndarray (int)
+        Array of selected indices.
+    iOut : np.ndarray (int)
+        Array of outlier indices to remove from iSelect.
+    fSelect : np.ndarray (bool)
+        Boolean array indicating whether each point is selected.
+    fOut : np.ndarray (bool)
+        Boolean array indicating outlier points.
+
+    Returns
+    -------
+    iSelect : np.ndarray (int)
+        Updated array of selected indices after outlier removal.
+    nSelect : int
+        Length of the updated iSelect.
+    fSelect : np.ndarray (bool)
+        Updated boolean array with outliers removed from selection.
+    """
+    # Remove outlier indices from iSelect
+    iSelect = np.setdiff1d(iSelect, iOut)
+
+    # Count the remaining selected indices
+    nSelect = len(iSelect)
+
+    # Exclude outliers from fSelect (element-wise "and not")
+    fSelect = fSelect & np.logical_not(fOut)
+
+    return iSelect, nSelect, fSelect
diff --git a/tests/conftest.py b/tests/conftest.py
index ec68454e..d7c54216 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -124,7 +124,10 @@ def convert(self, x, index='to_matlab', fromFile=False):
                 elif isinstance(x[0], list):
                     return np.array(x[0])
             else:
-              return np.array(x).astype(np.float64)
+              if ((len(x) > 1) and (isinstance(x[0], bool))):
+                return np.array(x).astype(bool)
+              else:
+                return np.array(x).astype(np.float64)
 
         elif isinstance(x, tuple):
             return tuple([self.convert(xi) for xi in x])
diff --git a/tests/unit_tests/test_ustar_cp/test_updateSelectedIndices.py b/tests/unit_tests/test_ustar_cp/test_updateSelectedIndices.py
index f994c6f3..40dcad0d 100644
--- a/tests/unit_tests/test_ustar_cp/test_updateSelectedIndices.py
+++ b/tests/unit_tests/test_ustar_cp/test_updateSelectedIndices.py
@@ -1,7 +1,6 @@
 import numpy as np
 import pytest
 
-
 @pytest.mark.parametrize("iSelect, iOut, fSelect, fOut, expected_nSelect", [
     ([1, 2, 3, 4, 5], [2, 4], [True, True, False, True, False], [False, False, False, False, False], 3),
     ([10, 20, 30, 40], [20], [True, False, True, True], [False, False, False, False], 3),
@@ -11,11 +10,10 @@
 def test_update_selected_indices(test_engine, iSelect, iOut, fSelect, fOut, expected_nSelect):
     """Test that updateSelectedIndices correctly removes outliers and updates selection."""
     
-    # Call MATLAB function
-    result = test_engine.updateSelectedIndices(iSelect, iOut, fSelect, fOut, nargout=3)
-    
-    # Extract outputs
-    updated_iSelect, updated_nSelect, updated_fSelect = result
+    updated_iSelect, updated_nSelect, updated_fSelect = test_engine.updateSelectedIndices(test_engine.convert(iSelect)
+                                            , test_engine.convert(iOut)
+                                            , test_engine.convert(fSelect)
+                                            , test_engine.convert(fOut), nargout=3)
     
     # Check expected number of selections
-    assert updated_nSelect == expected_nSelect, f"Expected {expected_nSelect}, got {updated_nSelect}"
\ No newline at end of file
+    assert test_engine.equal(updated_nSelect, expected_nSelect), f"Expected {expected_nSelect}, got {updated_nSelect}"
\ No newline at end of file

From 626b1a93c07fa9d8435a7c06b218ec7c71917d6a Mon Sep 17 00:00:00 2001
From: Dominic Orchard <dom.orchard@gmail.com>
Date: Fri, 21 Feb 2025 13:29:03 +0000
Subject: [PATCH 26/27] initial translation of cpdAssign

---
 .../ustar_cp_python/cpdAssignUStarTh.py       | 336 +++++++++++++++++-
 tests/conftest.py                             |   2 +
 2 files changed, 327 insertions(+), 11 deletions(-)

diff --git a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py
index 81d8904c..9a3e6c07 100644
--- a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py
+++ b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py
@@ -1,5 +1,6 @@
 import numpy as np
 from scipy.optimize import curve_fit
+from oneflux_steps.ustar_cp_python.fcReadFields import fcReadFields
 from oneflux_steps.ustar_cp_python.fcNaniqr import fcNaniqr
 from oneflux_steps.ustar_cp_python.fcEqnAnnualSine import fcEqnAnnualSine
 from oneflux_steps.ustar_cp_python.fcr2Calc import fcr2Calc
@@ -7,9 +8,291 @@
 from numpy.typing import NDArray
 from oneflux_steps.ustar_cp_python.fcBin import fcBin
 
-def cpdAssignUStarTh20100901(*args):
-    return None, None, None, None, None, None, None, None, None, None, None, None
+def cpdAssignUStarTh20100901(Stats, plotFlag, siteYearText, *args):
+    """
+    Parameters:
+    -----------
+    Stats : array-like or str
+        Stats structure that may be JSON-encoded (depending on *args).
+    plotFlag : bool or int
+        Flag indicating whether to plot (unused in this excerpt).
+    siteYearText : str
+        String describing site year (unused in this excerpt).
+    *args : list
+        Variable-length argument list. Can contain instructions like
+        ['jsondecode', 1], specifying that Stats should be decoded from JSON.
+
+    Returns:
+    --------
+    annualChangePoint : np.ndarray
+    numAnnualSelected : np.ndarray
+    seasonalTimeWindow : np.ndarray
+    seasonalChangePoint : np.ndarray
+    dominantMode : str
+    failureMessage : str
+    selectedPointsFlag : np.ndarray (bool)
+    sineCurve : np.ndarray
+    fractionSignificant : float
+    fractionModeD : float
+    fractionSelected : float
+
+    The logic follows the MATLAB code step-by-step, including:
+    - Decoding JSON if required.
+    - Checking dimensions of Stats.
+    - Extracting relevant fields (like Cp, b1, p).
+    - Determining significance and modes (D/E).
+    - Excluding outliers, aggregating results.
+    - Fitting annual sine curve.
+    """
+
+    # -------------------------------------------------------------------------
+    # 1) Initialize Output Variables
+
+    annualChangePoint = []
+    numAnnualSelected = []
+    seasonalTimeWindow = []
+    seasonalChangePoint = []
+    selectedPointsFlag = []
+    dominantMode = ''
+    failureMessage = ''
+    sineCurve = []
+    fractionSignificant = []
+    fractionModeD = []
+    fractionSelected = []
+
+    # -------------------------------------------------------------------------
+    # 2) Decode JSON if Required
+    # TODO - PROBABLY REMOVE
+
+    # In MATLAB, the code checks 'varargin' for a cell array with 'jsondecode'.
+    # In Python, we check if `args` includes something like ['jsondecode', 1].
+    for arg in args:
+        # Example check: if arg is a list, arg[0] might be 'jsondecode'
+        if isinstance(arg, list) and len(arg) > 0 and arg[0] == 'jsondecode':
+            # Then check subsequent entries for numeric IDs
+            for j in arg[1:]:
+                # If j == 1, decode Stats from JSON
+                if j == 1:
+                    if isinstance(Stats, str):
+                        Stats = json.loads(Stats)
+
+    # -------------------------------------------------------------------------
+    # 3) Determine Dimension Sizes of Stats
+
+    # The MATLAB code checks ndims(Stats) and shape(Stats).
+    # We assume Stats is a NumPy array (or a nested list that's been
+    # converted to an array).
+    if not isinstance(Stats, np.ndarray):
+        # Attempt to convert Stats to a NumPy array if it's still a list
+        Stats = np.array(Stats, dtype=object)
+
+    numDimensions = Stats.ndim
+
+    if numDimensions == 2:
+        numWindows, numBootstraps = Stats.shape
+        numTemperatureStrata = 1
+        temperatureStrataFactor = 0.5
+
+    elif numDimensions == 3:
+        numWindows, numTemperatureStrata, numBootstraps = Stats.shape
+        temperatureStrataFactor = 1
+
+    else:
+        failureMessage = 'Stats must be 2D or 3D.'
+        return (annualChangePoint, numAnnualSelected, seasonalTimeWindow,
+                seasonalChangePoint, dominantMode, failureMessage,
+                selectedPointsFlag, sineCurve, fractionSignificant,
+                fractionModeD, fractionSelected)
+
+    # -------------------------------------------------------------------------
+    # 4) Set Reference Values
+
+    referenceWindows = 4
+    requiredSelectionCount = referenceWindows * temperatureStrataFactor * numBootstraps
+
+    # -------------------------------------------------------------------------
+    # 5) Preallocate Outputs
+
+    annualChangePoint = np.full((numBootstraps,), np.nan)
+    numAnnualSelected = np.full((numBootstraps,), np.nan)
+    seasonalTimeWindow = np.full((numWindows,), np.nan)
+    seasonalChangePoint = np.full((numWindows,), np.nan)
+
+    # -------------------------------------------------------------------------
+    # 6) Extract Variables from Stats Structure
+    # The MATLAB code uses fcReadFields and fcx2colvec to read each field and reshape.
+    # Here, we assume Python equivalents: readFields(Stats, varName) and x2colvec().
+    # In an actual implementation, these must be defined or replaced by direct indexing.
+
+    variableNames = ['mt', 'Cp', 'b1', 'c2', 'cib1', 'cic2', 'p']
+    # We'll store them in a dictionary by field name.
+    b1 = fcReadFields(Stats, 'b1')
+    c2 = fcReadFields(Stats, 'c2')
+    cib1 = fcReadFields(Stats, 'cib1')
+    cic2 = fcReadFields(Stats, 'cic2')
+    p = fcReadFields(Stats, 'p')
+
+    measurementTime = fcReadFields(Stats, 'mt')
+    changePoint = fcReadFields(Stats, 'Cp')
+
+    # -------------------------------------------------------------------------
+    # 7) Identify Significant Change Points
+
+    significanceThreshold = 0.05
+    # p <= threshold is True/False mask
+    significantFlag = (p <= significanceThreshold)
+
+    # -------------------------------------------------------------------------
+    # 8) Identify Model Type (2-parameter vs 3-parameter)
+
+    # If all c2 are NaN => effectively 2 parameters
+    # The MATLAB code checks sum(~isnan(c2)) == 0
+    if np.sum(~np.isnan(c2)) == 0:
+        numParameters = 2
+        c2 = np.zeros_like(b1)
+        cic2 = np.zeros_like(b1)
+    else:
+        numParameters = 3
+
+    # -------------------------------------------------------------------------
+    # 9) Classify Significant Change Points:
+
+    # Indices that are valid (not NaN in b1+c2+Cp):
+    validMask = ~np.isnan(b1 + c2 + changePoint)
+    validIndices = np.where(~np.isnan(measurementTime))[0]
+    numValidMeasurements = len(validIndices)
+
+    # Non-significant
+    nonSignificantIndices = np.where((~significantFlag) & validMask)[0]
+    numNonSignificant = len(nonSignificantIndices)
+
+    # Significant
+    significantIndices = np.where(significantFlag & validMask)[0]
+    numSignificant = len(significantIndices)
+
+    # Mode E: b1 < c2
+    modeEIndices = np.where(significantFlag & (b1 < c2) & validMask)[0]
+    numModeE = len(modeEIndices)
+
+    # Mode D: b1 >= c2
+    modeDIndices = np.where(significantFlag & (b1 >= c2) & validMask)[0]
+    numModeD = len(modeDIndices)
+
+    # Decide dominant mode
+    if numModeD >= numModeE:
+        selectedIndices = modeDIndices
+        dominantMode = 'D'
+    else:
+        selectedIndices = modeEIndices
+        dominantMode = 'E'
+
+    numSelected = len(selectedIndices)
+
+    # Update Selection Flags
+    selectedPointsFlag = np.zeros_like(significantFlag, dtype=bool)
+    selectedPointsFlag[selectedIndices] = True
+
+    modeDFlag = np.full_like(significantFlag, np.nan, dtype=float)
+    modeDFlag[modeDIndices] = 1
 
+    modeEFlag = np.full_like(significantFlag, np.nan, dtype=float)
+    modeEFlag[modeEIndices] = 1
+
+    # Fractions
+    if numValidMeasurements > 0:
+        fractionSignificant = numSignificant / numValidMeasurements
+        fractionModeD = numModeD / max(numSignificant, 1)  # avoid zero-div
+        fractionSelected = numSelected / numValidMeasurements
+    else:
+        fractionSignificant, fractionModeD, fractionSelected = (0, 0, 0)
+
+    # -------------------------------------------------------------------------
+    # 10) Abort if Too Few Selections
+
+    if fractionSelected < 0.10:
+        failureMessage = 'Less than 10% successful detections.'
+        return (annualChangePoint, numAnnualSelected, seasonalTimeWindow,
+                seasonalChangePoint, dominantMode, failureMessage,
+                selectedPointsFlag, np.array([]), fractionSignificant,
+                fractionModeD, fractionSelected)
+
+    # -------------------------------------------------------------------------
+    # 11) Configure Regression Matrix
+
+    if numParameters == 2:
+        regressionMatrix = np.column_stack([changePoint, b1, cib1])
+    else:
+        regressionMatrix = np.column_stack([changePoint, b1, c2, cib1, cic2])
+
+    # -------------------------------------------------------------------------
+    # 12) Exclude Outliers Based on Standardized Scores
+
+    standardizedScores = computeStandardizedScores(regressionMatrix)  # user-defined
+    outlierFlag, outlierIndices = identifyOutliers(standardizedScores, 5)  # user-defined
+
+    selectedIndices, numSelected, selectedPointsFlag = updateSelectedIndices(
+        selectedIndices, outlierIndices, selectedPointsFlag, outlierFlag
+    )
+
+    modeDIndices, numModeD = updateModes(modeDFlag, outlierIndices)   # user-defined
+    modeEIndices, _         = updateModes(modeEFlag, outlierIndices)  # user-defined
+
+    # Recompute significantIndices, etc.
+    significantIndices = np.union1d(modeDIndices, modeEIndices)
+    numSignificant = len(significantIndices)
+
+    if numValidMeasurements > 0:
+        fractionSignificant = numSignificant / numValidMeasurements
+        fractionModeD = numModeD / max(numSignificant, 1)
+        fractionSelected = numSelected / numValidMeasurements
+
+    # -------------------------------------------------------------------------
+    # 13) Check If Enough Change Points Remain
+
+    if numSelected < requiredSelectionCount:
+        failureMessage = f'Too few selected change points: {numSelected}/{requiredSelectionCount}'
+        return (annualChangePoint, numAnnualSelected, seasonalTimeWindow,
+                seasonalChangePoint, dominantMode, failureMessage,
+                selectedPointsFlag, np.array([]), fractionSignificant,
+                fractionModeD, fractionSelected)
+
+    # -------------------------------------------------------------------------
+    # 14) Aggregate Seasonal and Annual Values
+
+    annualChangePoint, numAnnualSelected, _ = aggregateSeasonalAndAnnualValues(
+        changePoint, selectedIndices, numDimensions, numWindows,
+        numTemperatureStrata, numBootstraps
+    )
+
+    # -------------------------------------------------------------------------
+    # 15) Aggregate Seasonal Means
+
+    seasonalTimeWindow, seasonalChangePoint = aggregateSeasonalMeans(
+        measurementTime, changePoint, measurementTime, selectedIndices,
+        numWindows, numTemperatureStrata, numBootstraps
+    )
+
+    # -------------------------------------------------------------------------
+    # 16) Fit Annual Sine Curve
+
+    sineCurve = fitAnnualSineCurve(
+        measurementTime, changePoint, selectedIndices
+    )
+
+    # -------------------------------------------------------------------------
+    # Return All Outputs in the Same Order as the MATLAB Code
+
+    return (annualChangePoint,
+            numAnnualSelected,
+            seasonalTimeWindow,
+            seasonalChangePoint,
+            dominantMode,
+            failureMessage,
+            selectedPointsFlag,
+            sineCurve,
+            fractionSignificant,
+            fractionModeD,
+            fractionSelected)
 
 def identifyOutliers(x_norm_x, threshold):
     """
@@ -89,11 +372,11 @@ def _annual_sine_for_curve_fit(t, b0, b1, b2):
     return sSine
 
 def aggregateSeasonalAndAnnualValues(
-    xCp, 
-    iSelect, 
-    nDim, 
-    nWindows, 
-    nStrata, 
+    xCp,
+    iSelect,
+    nDim,
+    nWindows,
+    nStrata,
     nBoot
 ):
     """
@@ -143,7 +426,7 @@ def aggregateSeasonalAndAnnualValues(
         CpA = np.nanmean(xCpGF)
         nA  = np.sum(~np.isnan(xCpSelect))
     elif nDim == 3:
-        
+
         # mask xCp using iSelect_array
         xCpSelect = xCp.copy()
         for i in range(len(xCp)):
@@ -195,7 +478,7 @@ def aggregateSeasonalMeans(mt: NDArray, Cp: NDArray, xmt: NDArray, iSelect: NDAr
 
     # ----------------------------------------------------------
     # 1) Calculate Median Number of Windows
-    #    In MATLAB: 
+    #    In MATLAB:
     #       reshape(xmt, nWindows, nStrata * nBoot)
     #    Here we reshape the 1D array xmt => shape (nWindows, nStrata*nBoot)
     # ----------------------------------------------------------
@@ -222,7 +505,7 @@ def aggregateSeasonalMeans(mt: NDArray, Cp: NDArray, xmt: NDArray, iSelect: NDAr
     # ----------------------------------------------------------
     # *In Python*, iSelect is an array of *0-based* indices.
     # We then do:
-    iSelect = iSelect.astype(int) 
+    iSelect = iSelect.astype(int)
     selected_mt = mt[iSelect]
     # Sort them while keeping track of the sorted order
     sort_order = np.argsort(selected_mt)  # array of int
@@ -245,7 +528,7 @@ def aggregateSeasonalMeans(mt: NDArray, Cp: NDArray, xmt: NDArray, iSelect: NDAr
         pct_array = [0, 100]
     else:
         step = 100.0 / nW  # step size
-        # The number of steps is int(nW)+1 if nW is integer-like. 
+        # The number of steps is int(nW)+1 if nW is integer-like.
         # However, if nW is float, MATLAB still enumerates 0, step, 2*step,..., up to 100.
         # We'll replicate that logic using np.arange, then clip at <=100
         pct_array = []
@@ -301,3 +584,34 @@ def updateSelectedIndices(iSelect : np.ndarray, iOut : np.ndarray, fSelect : np.
     fSelect = fSelect & np.logical_not(fOut)
 
     return iSelect, nSelect, fSelect
+
+def updateModes(fModeX: np.ndarray, iOut: np.ndarray):
+    """
+    Recalculates mode indices after removing outliers, mirroring updateModes.m.
+
+    Parameters
+    ----------
+    fModeX : np.ndarray
+        Array indicating whether each point belongs to the mode (1) or not
+        (0/NaN). This array is modified in-place, setting the elements
+        at indices iOut to np.nan.
+    iOut : np.ndarray
+        Array of outlier indices to remove from the mode.
+
+    Returns
+    -------
+    iModeX : np.ndarray
+        Updated indices where fModeX is exactly 1.
+    nModeX : int
+        Number of such indices.
+    """
+    # Mark outlier positions in fModeX as NaN
+    fModeX[iOut] = np.nan
+
+    # Find indices where fModeX is exactly 1
+    iModeX = np.where(fModeX == 1)[0]
+
+    # Count them
+    nModeX = len(iModeX)
+
+    return iModeX, nModeX
\ No newline at end of file
diff --git a/tests/conftest.py b/tests/conftest.py
index d7c54216..309ab3c4 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -170,6 +170,8 @@ def newfunc(*args, **kwargs):
                 # if jsonencode is present in kwargs then remove it
                 if 'jsonencode' in kwargs:
                     kwargs.pop('jsonencode')
+                if 'jsondecode' in kwargs:
+                    kwargs.pop('jsondecode')
 
                 func = globals().get(name)
                 if callable(func):

From 113c6ce610241fc7fb09f87e6b37f121cd7e40c9 Mon Sep 17 00:00:00 2001
From: Dominic Orchard <dom.orchard@gmail.com>
Date: Fri, 21 Feb 2025 18:08:35 +0000
Subject: [PATCH 27/27] fix aggregateSeasonalAndAnnualValues...again...

---
 .../ustar_cp_python/cpdAssignUStarTh.py       | 28 +++-----------
 oneflux_steps/ustar_cp_python/utilities.py    | 37 +++++++++++++++++++
 tests/conftest.py                             |  2 +-
 .../test_aggregateAndSeasonalValues.py        | 17 ++++-----
 .../test_cpdAssignUStarTh20100901.py          | 24 +-----------
 5 files changed, 52 insertions(+), 56 deletions(-)

diff --git a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py
index 9a3e6c07..47c472e1 100644
--- a/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py
+++ b/oneflux_steps/ustar_cp_python/cpdAssignUStarTh.py
@@ -7,6 +7,7 @@
 from typing import Tuple
 from numpy.typing import NDArray
 from oneflux_steps.ustar_cp_python.fcBin import fcBin
+from oneflux_steps.ustar_cp_python.utilities import index_or_mark_array_update
 
 def cpdAssignUStarTh20100901(Stats, plotFlag, siteYearText, *args):
     """
@@ -407,35 +408,18 @@ def aggregateSeasonalAndAnnualValues(
     # Prepare an output array (same shape) filled with NaNs
     xCpSelect = np.full_like(xCp, np.nan, dtype=float)
 
-    # Treat selection arrays an array of integers
-    # (this allows both an array of booleans and an array of integers to be used
-    # for iselect)
-    iSelect_array = np.asarray(iSelect, dtype=int)
+    # Index using iSelect which could be a mask or a set of indices
+    xCpSelect = index_or_mark_array_update(xCpSelect, iSelect, xCp)
 
+    print(f"nDim = {nDim}")
     # Aggregate values based on dimensions
     if nDim == 2:
-        # mask xCp using iSelect_array
-        xCpSelect = xCp.copy()
-        for i in range(len(xCp)):
-            if iSelect_array[i] == 0:
-                xCpSelect[i] = np.nan
-
         xCpGF = xCpSelect  # naming convention
-
         # xCp shape = [nWindows, nBoot]
-        CpA = np.nanmean(xCpGF)
-        nA  = np.sum(~np.isnan(xCpSelect))
+        CpA = np.nanmean(xCpGF, axis=0)
+        nA  = np.sum(~np.isnan(xCpSelect), axis = 0)
     elif nDim == 3:
-
-        # mask xCp using iSelect_array
-        xCpSelect = xCp.copy()
-        for i in range(len(xCp)):
-            for j in range(len(xCp[i])):
-              if iSelect_array[i][j] == 0:
-                  xCpSelect[i][j] = np.nan
-
         xCpGF = xCpSelect  # Naming convention
-
         # xCp shape = [nWindows, nStrata, nBoot]
         # reshape => (nWindows*nStrata, nBoot) in column-major
         xCpGF_reshaped = np.reshape(xCpGF, (nWindows * nStrata, nBoot), order='F')
diff --git a/oneflux_steps/ustar_cp_python/utilities.py b/oneflux_steps/ustar_cp_python/utilities.py
index 8702fc57..39f22023 100644
--- a/oneflux_steps/ustar_cp_python/utilities.py
+++ b/oneflux_steps/ustar_cp_python/utilities.py
@@ -4,6 +4,43 @@
 import json
 from decimal import Decimal, ROUND_HALF_UP
 
+def index_or_mark_array_update(A : np.ndarray, idx : np.ndarray, B : np.ndarray) -> np.ndarray:
+    """
+    index_or_mark_array_update(A, idx, B) performs something akin to `A[idx] = B`
+    where we update the elements of `A` at indices `idx` according to `B`
+    but where A and B are n-dimensional but idx is either an n-dimensional array
+    of masking booleans, or a 1-dimensional array of indices into the flattened
+    array A.
+
+    Parameters:
+    A : np.ndarray
+        The input array.
+    idx : np.ndarray
+        The indices to use.
+
+    Returns:
+    np.ndarray
+        The elements of the array at the specified indices.
+    """
+
+    if (all(isinstance(i, np.bool_) for i in np.array(idx).flat)):
+      # idx is a boolean mask
+      for i in range(len(A.flat)):
+        if idx.flat[i]:
+            A.flat[i] = B.flat[i]
+    else:
+      # Do column major flattening of A
+      Acopy = A.copy().flatten(order='F')
+      B = B.flatten(order='F')
+      idx = idx.flatten()
+      # Copy across the elements at the right indices
+      for i in idx:
+        Acopy[i] = B[i]
+      # Reshape the data
+      A = Acopy.reshape(A.shape, order='F')
+    return A
+
+
 def prctile(A: np.ndarray, p: float) -> float|np.ndarray:
     """
     Compute the p-th percentile of array A in a way that has
diff --git a/tests/conftest.py b/tests/conftest.py
index 309ab3c4..bb718377 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -124,7 +124,7 @@ def convert(self, x, index='to_matlab', fromFile=False):
                 elif isinstance(x[0], list):
                     return np.array(x[0])
             else:
-              if ((len(x) > 1) and (isinstance(x[0], bool))):
+              if (all(isinstance(i, np.bool_) for i in np.array(x).flat)):
                 return np.array(x).astype(bool)
               else:
                 return np.array(x).astype(np.float64)
diff --git a/tests/unit_tests/test_ustar_cp/test_aggregateAndSeasonalValues.py b/tests/unit_tests/test_ustar_cp/test_aggregateAndSeasonalValues.py
index 0f1b1307..0d9a23c8 100644
--- a/tests/unit_tests/test_ustar_cp/test_aggregateAndSeasonalValues.py
+++ b/tests/unit_tests/test_ustar_cp/test_aggregateAndSeasonalValues.py
@@ -95,13 +95,11 @@ def test_aggregateSeasonalAndAnnualValues(
     expected_xCpSelect
 ):
     """
-    Test the MATLAB function aggregateSeasonalAndAnnualValues by calling it via
-    the MATLAB Engine API for Python. We pass arrays from Python to MATLAB,
-    execute the function, and verify the outputs.
+    Test the function aggregateSeasonalAndAnnualValues
 
     Explanation:
     ------------
-    - xCp is provided in a shape that (in MATLAB) should be [nWindows, nBoot] if nDim=2,
+    - xCp is provided in a shape that should be [nWindows, nBoot] if nDim=2,
       or [nWindows, nStrata, nBoot] if nDim=3. Because MATLAB uses column-major order,
       the exact flattening can be tricky in Python. We keep the nested structure so
       the final shape is correct in MATLAB once passed through the engine.
@@ -112,8 +110,7 @@ def test_aggregateSeasonalAndAnnualValues(
       dimensional differences).
     """
 
-    # Call the MATLAB function with nargout=3
-    CpA_mat, nA_mat, xCpSelect_mat = test_engine.aggregateSeasonalAndAnnualValues(
+    CpA, nA, xCpSelect = test_engine.aggregateSeasonalAndAnnualValues(
         test_engine.convert(xCp),
         test_engine.convert(iSelect, index = "to_python"),
         test_engine.convert(nDim),
@@ -122,7 +119,7 @@ def test_aggregateSeasonalAndAnnualValues(
         test_engine.convert(nBoot),
         nargout=3
     )
-    
-    assert test_engine.equal(CpA_mat, test_engine.convert(expected_CpA))
-    assert test_engine.equal(nA_mat, test_engine.convert(expected_nA))
-    assert test_engine.equal(xCpSelect_mat, test_engine.convert(expected_xCpSelect))
+
+    assert test_engine.equal(CpA, test_engine.convert(expected_CpA))
+    assert test_engine.equal(nA, test_engine.convert(expected_nA))
+    assert test_engine.equal(xCpSelect, test_engine.convert(expected_xCpSelect))
diff --git a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
index e3b6b534..2ad980c5 100644
--- a/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
+++ b/tests/unit_tests/test_ustar_cp/test_cpdAssignUStarTh20100901.py
@@ -1,8 +1,6 @@
 import pytest
-import matlab.engine
 import numpy as np
 
-
 @pytest.fixture(scope="module")
 def mock_data(test_engine, nt=300, tspan=(0, 1), uStar_pars=(0.1, 3.5), T_pars=(-10, 30), fNight=None):
     """
@@ -49,26 +47,6 @@ def mock_data(test_engine, nt=300, tspan=(0, 1), uStar_pars=(0.1, 3.5), T_pars=(
     return Stats2, fPlot, cSiteYr
 
 
-# def test_cpdAssignUStarTh20100901_basic(test_engine, mock_data):
-#     stats, fPlot, cSiteYr = mock_data
-
-#     # Call MATLAB function
-#     CpA, nA, tW, CpW, cMode, cFailure, fSelect, sSine, FracSig, FracModeD, FracSelect = test_engine.cpdAssignUStarTh20100901(stats, fPlot, cSiteYr, jsondecode=[0], nargout=11)
-
-#     # Assertions
-#     assert isinstance(CpA, matlab.double), "CpA should be a MATLAB double array"
-#     assert isinstance(nA, matlab.double), "nA should be a MATLAB double array"
-#     assert isinstance(tW, matlab.double), "tW should be a MATLAB double array"
-#     assert isinstance(CpW, matlab.double), "CpW should be a MATLAB double array"
-#     assert isinstance(cMode, str), "cMode should be a string"
-#     assert isinstance(cFailure, str), "cFailure should be a string"
-#     assert isinstance(fSelect, matlab.logical), "fSelect should be a MATLAB logical array"
-#     assert isinstance(sSine, matlab.double), "sSine should be a MATLAB double array"
-#     assert isinstance(FracSig, float), "FracSig should be a float"
-#     assert isinstance(FracModeD, float), "FracModeD should be a float"
-#     assert isinstance(FracSelect, float), "FracSelect should be a float"
-
-
 def test_cpdAssignUStarTh20100901_edge_cases(test_engine, mock_data):
     def set_attr(struct_array, key, val):
         np.vectorize(lambda x: x.update({key: val}))(struct_array)
@@ -131,8 +109,8 @@ def test_identify_outliers(test_engine, x_norm_x, threshold, expected_f_out, exp
 def test_aggregate_3d_case(test_engine, xCp, iSelect, nDim, nWindows, nStrata, nBoot, expected_CpA, expected_nA, expected_xCpSelect):
     """Test function for aggregateSeasonalAndAnnualValues 2D and 3D cases."""
 
+    print("DATA for test iSelect: ", iSelect)
 
-    # Run MATLAB function
     CpA, nA, xCpSelect = test_engine.aggregateSeasonalAndAnnualValues(
         test_engine.convert(xCp), test_engine.convert(iSelect), nDim, nWindows, test_engine.convert(nStrata), test_engine.convert(nBoot), nargout=3
     )