From c7e687b312a71310a8f4deed6061a97aa9f8b6e1 Mon Sep 17 00:00:00 2001
From: toonhasenack <toon.hasenack@outlook.com>
Date: Thu, 16 May 2024 12:14:43 +0200
Subject: [PATCH 1/8] unweight script

---
 .../src/validphys/scripts/vp_unweight.py      | 155 ++++++++++++++++++
 1 file changed, 155 insertions(+)
 create mode 100644 validphys2/src/validphys/scripts/vp_unweight.py

diff --git a/validphys2/src/validphys/scripts/vp_unweight.py b/validphys2/src/validphys/scripts/vp_unweight.py
new file mode 100644
index 0000000000..e91b7e2dd0
--- /dev/null
+++ b/validphys2/src/validphys/scripts/vp_unweight.py
@@ -0,0 +1,155 @@
+import numpy as np
+import pandas as pd
+from scipy.special import xlogy
+from typing import Tuple
+from tqdm import tqdm
+import argparse
+
+class Unweight:
+    def __init__(self, weights: np.ndarray, *chis: Tuple[int, np.ndarray]) -> None:
+        """
+        Initialize the Unweight class.
+
+        Args:
+            weights (np.ndarray): Array of weights.
+            *chis (Tuple[int, np.ndarray]): Variable number of tuples containing power `n` and `chi` array.
+
+        Raises:
+            AssertionError: If lengths of `chi` arrays are not consistent.
+        """
+        self.chis = chis
+        
+        length = len(chis[0][1])
+        for chi in chis:
+            assert len(chi[1]) == length, "Not all chis have the same length!"
+
+        if weights is None:
+            self.weights = np.ones(length)
+        else:
+            self.weights = weights
+
+    def entropy(self, p1, p2):
+        """
+        Calculate the entropy between two probability distributions.
+
+        Args:
+            p1 (np.ndarray): Probability distribution 1.
+            p2 (np.ndarray): Probability distribution 2.
+
+        Returns:
+            float: Entropy value.
+        """
+        log = xlogy(p1, p1/p2)
+        log[np.isnan(log)] = 0
+        entropy = np.sum(log)
+        return entropy
+    
+    def reweight(self, i: int = 0, thresh: float = 1e-12) -> None:
+        """
+        Perform reweighting.
+
+        Args:
+            i (int): Index of the chi array to reweight.
+            thresh (float, optional): Threshold value for setting small weights to zero. Defaults to 1e-12.
+        """
+
+        n, chi = self.chis[i]
+        exp = (n-1)*np.log(chi) - 1/2*np.power(chi,2.0)
+        self.reweights = np.exp(exp - np.mean(exp))
+        self.reweights = len(self.reweights)*self.reweights/np.sum(self.reweights)
+        self.reweights[self.reweights <= thresh] = 0
+        self.reprobs = self.reweights/len(self.reweights)
+
+    def unweight(self, Np: int) -> None:
+        """
+        Perform unweighting.
+
+        Args:
+            Np (int): Number of points.
+        """
+        pcum = np.zeros(len(self.reweights) + 1)
+        pcum[1:] = np.cumsum(self.reprobs)
+        unweights = np.zeros(len(self.reweights), dtype="int")
+        for k in range(len(self.reweights)):
+            for j in range(Np):
+                condition_one = j/Np - pcum[k] >= 0
+                condition_two = pcum[k+1] - j/Np >= 0
+                if condition_one and condition_two:
+                    unweights[k] += 1
+        
+        self.unweights = unweights
+        self.unprobs = unweights/np.sum(unweights)
+
+    def effective_replicas(self, weights: np.ndarray) -> int:
+        """
+        Calculate the effective number of replicas.
+
+        Args:
+            weights (np.ndarray): Array of weights.
+
+        Returns:
+            int: Effective number of replicas.
+        """
+        N = len(weights)
+        Neff = int(np.exp(-1/N*np.sum(xlogy(weights,weights/N))))
+        return Neff
+
+    def optimize(self, thresh: float, earlystopping: bool = True):
+        """
+        Optimize the unweighting process based on entropy threshold.
+
+        Args:
+            thresh (float): Entropy threshold value.
+            earlystopping (bool, optional): Whether to stop optimization early if threshold is reached. Defaults to True.
+
+        Returns:
+            Tuple[np.ndarray, np.ndarray, int]: Tuple containing arrays of Nps, entropies, and optimal Np value.
+        """
+        Nps = np.logspace(1, np.log10(len(self.weights))+1, 50, dtype=np.int64)
+        entropies = np.zeros(len(Nps))
+        for i in tqdm(range(len(Nps))):
+            self.unweight(Nps[i])
+            entropies[i] = self.entropy(self.unprobs, self.reprobs)
+            if entropies[i] <= thresh and earlystopping:
+                loc = i
+                break
+
+        if i == len(Nps)-1:
+            try:
+                loc = np.where(entropies <= thresh)[0][0]
+            except:
+                print("Failed minimisation procedure! Defaulting to lowest entropy.")
+                loc = -1
+                
+        Nopt = Nps[loc]
+
+        return Nps, entropies, Nopt
+    
+def main(chi2, N, store = True):
+    u = Unweight(None, (N, np.sqrt(chi2)))
+    u.reweight()
+    Neff = u.effective_replicas(u.reweights)
+    u.unweight(Neff)
+
+    weights = pd.DataFrame()
+    weights["unweight"] = u.unweights
+    weights["reweight"] = u.reweights
+    weights["nrep"] = np.arange(1, len(weights)+1)
+    weights = weights.set_index("nrep", drop = True)
+
+    if store:
+        weights.to_csv("weights.csv")
+
+    return weights
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Unweighting using chi squared values.")
+    parser.add_argument("chi2_name", help = "Add the filename of the chi2 dataset (.csv)")
+    parser.add_argument("N", help = "Add the amount of experimental datapoints that the chi2 is based on")
+    args = parser.parse_args()
+    chi2 = pd.read_csv(args.chi2_name).values
+
+    main(chi2, args.N)
+    
+
+

From e6d3c515cc757dffdd99b394af10728137beba33 Mon Sep 17 00:00:00 2001
From: toonhasenack <toon.hasenack@outlook.com>
Date: Thu, 16 May 2024 12:40:31 +0200
Subject: [PATCH 2/8] working.

---
 pyproject.toml                                  | 5 +++--
 validphys2/src/validphys/scripts/vp_unweight.py | 5 +----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 083545744f..db19508512 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry_dynamic_versioning.backend"
 
 [tool.poetry]
 name = "nnpdf"
-version = "0.0.0"
+version = "4.0.9.post745.dev0+c7e687b31.dirty"
 description = "An open-source machine learning framework for global analyses of parton distributions."
 readme = "README.md"
 authors = [
@@ -47,6 +47,7 @@ postfit = "validphys.scripts.postfit:main"
 # validphys helpers and scripts
 vp-upload = "validphys.scripts.vp_upload:main"
 wiki-upload = "validphys.scripts.wiki_upload:main"
+vp-unweight= "validphys.scripts.vp_unweight:main"
 vp-get = "validphys.scripts.vp_get:main"
 vp-comparefits = "validphys.scripts.vp_comparefits:main"
 vp-fitrename = "validphys.scripts.vp_fitrename:main"
@@ -106,7 +107,7 @@ qed = ["fiatlux"]
 nolha = ["pdfflow", "lhapdf-management"]
 
 [tool.poetry-dynamic-versioning]
-enable = true
+enable = false
 vcs = "git"
 metadata = true
 dirty = true
diff --git a/validphys2/src/validphys/scripts/vp_unweight.py b/validphys2/src/validphys/scripts/vp_unweight.py
index e91b7e2dd0..5bed168a8e 100644
--- a/validphys2/src/validphys/scripts/vp_unweight.py
+++ b/validphys2/src/validphys/scripts/vp_unweight.py
@@ -149,7 +149,4 @@ def main(chi2, N, store = True):
     args = parser.parse_args()
     chi2 = pd.read_csv(args.chi2_name).values
 
-    main(chi2, args.N)
-    
-
-
+    main(chi2, args.N)
\ No newline at end of file

From 2fd2f04686ab3129ecc382c09082725588f11e08 Mon Sep 17 00:00:00 2001
From: toonhasenack <toon.hasenack@outlook.com>
Date: Thu, 16 May 2024 13:08:55 +0200
Subject: [PATCH 3/8] small weights fix

---
 validphys2/src/validphys/scripts/vp_unweight.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/validphys2/src/validphys/scripts/vp_unweight.py b/validphys2/src/validphys/scripts/vp_unweight.py
index 5bed168a8e..cc7d61a6ef 100644
--- a/validphys2/src/validphys/scripts/vp_unweight.py
+++ b/validphys2/src/validphys/scripts/vp_unweight.py
@@ -80,17 +80,19 @@ def unweight(self, Np: int) -> None:
         self.unweights = unweights
         self.unprobs = unweights/np.sum(unweights)
 
-    def effective_replicas(self, weights: np.ndarray) -> int:
+    def effective_replicas(self, weights: np.ndarray, thresh: float = 1e-12) -> int:
         """
         Calculate the effective number of replicas.
 
         Args:
             weights (np.ndarray): Array of weights.
+            thresh (float, optional): Threshold value neglecting small weights. Defaults to 1e-12.
 
         Returns:
             int: Effective number of replicas.
         """
         N = len(weights)
+        weights = weights[weights > thresh]
         Neff = int(np.exp(-1/N*np.sum(xlogy(weights,weights/N))))
         return Neff
 

From 5dbadcdd8e8427bd22bbf65733b972eeb4049247 Mon Sep 17 00:00:00 2001
From: toonhasenack <toon.hasenack@outlook.com>
Date: Mon, 20 May 2024 17:03:44 +0200
Subject: [PATCH 4/8] Juan's comments

---
 pyproject.toml                                |   2 +-
 .../src/validphys/scripts/vp_unweight.py      | 156 ++++++++++++------
 2 files changed, 103 insertions(+), 55 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index db19508512..dbba402b88 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -107,7 +107,7 @@ qed = ["fiatlux"]
 nolha = ["pdfflow", "lhapdf-management"]
 
 [tool.poetry-dynamic-versioning]
-enable = false
+enable = true
 vcs = "git"
 metadata = true
 dirty = true
diff --git a/validphys2/src/validphys/scripts/vp_unweight.py b/validphys2/src/validphys/scripts/vp_unweight.py
index cc7d61a6ef..b28ebcc1c8 100644
--- a/validphys2/src/validphys/scripts/vp_unweight.py
+++ b/validphys2/src/validphys/scripts/vp_unweight.py
@@ -2,23 +2,28 @@
 import pandas as pd
 from scipy.special import xlogy
 from typing import Tuple
-from tqdm import tqdm
 import argparse
 
+
 class Unweight:
     def __init__(self, weights: np.ndarray, *chis: Tuple[int, np.ndarray]) -> None:
         """
         Initialize the Unweight class.
 
-        Args:
-            weights (np.ndarray): Array of weights.
-            *chis (Tuple[int, np.ndarray]): Variable number of tuples containing power `n` and `chi` array.
-
-        Raises:
-            AssertionError: If lengths of `chi` arrays are not consistent.
+        Parameters
+        ----------
+        weights : np.ndarray
+            Array of weights.
+        *chis : Tuple[int, np.ndarray]
+            Variable number of tuples containing power `n` and `chi` array.
+
+        Raises
+        ------
+        AssertionError
+            If lengths of `chi` arrays are not consistent.
         """
         self.chis = chis
-        
+
         length = len(chis[0][1])
         for chi in chis:
             assert len(chi[1]) == length, "Not all chis have the same length!"
@@ -28,106 +33,146 @@ def __init__(self, weights: np.ndarray, *chis: Tuple[int, np.ndarray]) -> None:
         else:
             self.weights = weights
 
-    def entropy(self, p1, p2):
+    def entropy(self, p1: np.ndarray, p2: np.ndarray) -> float:
         """
         Calculate the entropy between two probability distributions.
 
-        Args:
-            p1 (np.ndarray): Probability distribution 1.
-            p2 (np.ndarray): Probability distribution 2.
-
-        Returns:
-            float: Entropy value.
+        Parameters
+        ----------
+        p1 : np.ndarray
+            Probability distribution 1.
+        p2 : np.ndarray
+            Probability distribution 2.
+
+        Returns
+        -------
+        float
+            Entropy value.
         """
-        log = xlogy(p1, p1/p2)
+        log = xlogy(p1, p1 / p2)
         log[np.isnan(log)] = 0
         entropy = np.sum(log)
         return entropy
-    
+
     def reweight(self, i: int = 0, thresh: float = 1e-12) -> None:
         """
         Perform reweighting.
 
-        Args:
-            i (int): Index of the chi array to reweight.
-            thresh (float, optional): Threshold value for setting small weights to zero. Defaults to 1e-12.
+        Parameters
+        ----------
+        i : int, optional
+            Index of the chi array to reweight.
+        thresh : float, optional
+            Threshold value for setting small weights to zero. Defaults to 1e-12.
         """
 
         n, chi = self.chis[i]
-        exp = (n-1)*np.log(chi) - 1/2*np.power(chi,2.0)
+        exp = (n - 1) * np.log(chi) - 1 / 2 * np.power(chi, 2.0)
         self.reweights = np.exp(exp - np.mean(exp))
-        self.reweights = len(self.reweights)*self.reweights/np.sum(self.reweights)
+        self.reweights = len(self.reweights) * self.reweights / np.sum(self.reweights)
         self.reweights[self.reweights <= thresh] = 0
-        self.reprobs = self.reweights/len(self.reweights)
+        self.reprobs = self.reweights / len(self.reweights)
 
     def unweight(self, Np: int) -> None:
         """
         Perform unweighting.
 
-        Args:
-            Np (int): Number of points.
+        Parameters
+        ----------
+        Np : int
+            Number of points.
         """
         pcum = np.zeros(len(self.reweights) + 1)
         pcum[1:] = np.cumsum(self.reprobs)
         unweights = np.zeros(len(self.reweights), dtype="int")
         for k in range(len(self.reweights)):
             for j in range(Np):
-                condition_one = j/Np - pcum[k] >= 0
-                condition_two = pcum[k+1] - j/Np >= 0
+                condition_one = j / Np - pcum[k] >= 0
+                condition_two = pcum[k + 1] - j / Np >= 0
                 if condition_one and condition_two:
                     unweights[k] += 1
-        
+
         self.unweights = unweights
-        self.unprobs = unweights/np.sum(unweights)
+        self.unprobs = unweights / np.sum(unweights)
 
     def effective_replicas(self, weights: np.ndarray, thresh: float = 1e-12) -> int:
         """
         Calculate the effective number of replicas.
 
-        Args:
-            weights (np.ndarray): Array of weights.
-            thresh (float, optional): Threshold value neglecting small weights. Defaults to 1e-12.
-
-        Returns:
-            int: Effective number of replicas.
+        Parameters
+        ----------
+        weights : np.ndarray
+            Array of weights.
+        thresh : float, optional
+            Threshold value neglecting small weights. Defaults to 1e-12.
+
+        Returns
+        -------
+        int
+            Effective number of replicas.
         """
         N = len(weights)
         weights = weights[weights > thresh]
-        Neff = int(np.exp(-1/N*np.sum(xlogy(weights,weights/N))))
+        Neff = int(np.exp(-1 / N * np.sum(xlogy(weights, weights / N))))
         return Neff
 
-    def optimize(self, thresh: float, earlystopping: bool = True):
+    def optimize(
+        self, thresh: float, earlystopping: bool = True
+    ) -> Tuple[np.ndarray, np.ndarray, int]:
         """
         Optimize the unweighting process based on entropy threshold.
 
-        Args:
-            thresh (float): Entropy threshold value.
-            earlystopping (bool, optional): Whether to stop optimization early if threshold is reached. Defaults to True.
-
-        Returns:
-            Tuple[np.ndarray, np.ndarray, int]: Tuple containing arrays of Nps, entropies, and optimal Np value.
+        Parameters
+        ----------
+        thresh : float
+            Entropy threshold value.
+        earlystopping : bool, optional
+            Whether to stop optimization early if threshold is reached. Defaults to True.
+
+        Returns
+        -------
+        Tuple[np.ndarray, np.ndarray, int]
+            Tuple containing arrays of Nps, entropies, and optimal Np value.
         """
-        Nps = np.logspace(1, np.log10(len(self.weights))+1, 50, dtype=np.int64)
+        Nps = np.logspace(1, np.log10(len(self.weights)) + 1, 50, dtype=np.int64)
         entropies = np.zeros(len(Nps))
-        for i in tqdm(range(len(Nps))):
+        for i in range(len(Nps)):
             self.unweight(Nps[i])
             entropies[i] = self.entropy(self.unprobs, self.reprobs)
             if entropies[i] <= thresh and earlystopping:
                 loc = i
                 break
 
-        if i == len(Nps)-1:
+        if i == len(Nps) - 1:
             try:
                 loc = np.where(entropies <= thresh)[0][0]
             except:
                 print("Failed minimisation procedure! Defaulting to lowest entropy.")
                 loc = -1
-                
+
         Nopt = Nps[loc]
 
         return Nps, entropies, Nopt
-    
-def main(chi2, N, store = True):
+
+
+def main(chi2: np.ndarray, N: int, store: bool = True) -> pd.DataFrame:
+    """
+    Perform the unweighting process and store the results.
+
+    Parameters
+    ----------
+    chi2 : np.ndarray
+        Array of chi-squared values.
+    N : int
+        Number of experimental data points that the chi2 is based on.
+    store : bool, optional
+        Whether to store the resulting weights in a CSV file. Defaults to True.
+
+    Returns
+    -------
+    pd.DataFrame
+        DataFrame containing the unweighted and reweighted values.
+    """
     u = Unweight(None, (N, np.sqrt(chi2)))
     u.reweight()
     Neff = u.effective_replicas(u.reweights)
@@ -136,19 +181,22 @@ def main(chi2, N, store = True):
     weights = pd.DataFrame()
     weights["unweight"] = u.unweights
     weights["reweight"] = u.reweights
-    weights["nrep"] = np.arange(1, len(weights)+1)
-    weights = weights.set_index("nrep", drop = True)
+    weights["nrep"] = np.arange(1, len(weights) + 1)
+    weights = weights.set_index("nrep", drop=True)
 
     if store:
         weights.to_csv("weights.csv")
 
     return weights
 
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Unweighting using chi squared values.")
-    parser.add_argument("chi2_name", help = "Add the filename of the chi2 dataset (.csv)")
-    parser.add_argument("N", help = "Add the amount of experimental datapoints that the chi2 is based on")
+    parser.add_argument("chi2_name", help="Add the filename of the chi2 dataset (.csv)")
+    parser.add_argument(
+        "N", help="Add the amount of experimental datapoints that the chi2 is based on"
+    )
     args = parser.parse_args()
     chi2 = pd.read_csv(args.chi2_name).values
 
-    main(chi2, args.N)
\ No newline at end of file
+    main(chi2, args.N)

From 9c6feab4d978fe0ecb5c58676757d960c7be2d30 Mon Sep 17 00:00:00 2001
From: toonhasenack <toon.hasenack@outlook.com>
Date: Tue, 21 May 2024 15:32:22 +0200
Subject: [PATCH 5/8] add plot_entropy functionality

---
 .../src/validphys/scripts/vp_unweight.py      | 45 ++++++++++++++-----
 1 file changed, 35 insertions(+), 10 deletions(-)

diff --git a/validphys2/src/validphys/scripts/vp_unweight.py b/validphys2/src/validphys/scripts/vp_unweight.py
index b28ebcc1c8..0d3dafc7b8 100644
--- a/validphys2/src/validphys/scripts/vp_unweight.py
+++ b/validphys2/src/validphys/scripts/vp_unweight.py
@@ -3,6 +3,7 @@
 from scipy.special import xlogy
 from typing import Tuple
 import argparse
+import matplotlib.pyplot as plt
 
 
 class Unweight:
@@ -116,16 +117,16 @@ def effective_replicas(self, weights: np.ndarray, thresh: float = 1e-12) -> int:
         Neff = int(np.exp(-1 / N * np.sum(xlogy(weights, weights / N))))
         return Neff
 
-    def optimize(
-        self, thresh: float, earlystopping: bool = True
+    def iterate(
+        self, thresh: float = 0, earlystopping: bool = True
     ) -> Tuple[np.ndarray, np.ndarray, int]:
         """
-        Optimize the unweighting process based on entropy threshold.
+        Itarate the unweighting process based on entropy threshold.
 
         Parameters
         ----------
         thresh : float
-            Entropy threshold value.
+            Entropy threshold value. Defaults to 0.
         earlystopping : bool, optional
             Whether to stop optimization early if threshold is reached. Defaults to True.
 
@@ -134,7 +135,7 @@ def optimize(
         Tuple[np.ndarray, np.ndarray, int]
             Tuple containing arrays of Nps, entropies, and optimal Np value.
         """
-        Nps = np.logspace(1, np.log10(len(self.weights)) + 1, 50, dtype=np.int64)
+        Nps = np.logspace(0, np.log10(len(self.weights)) + 1, 50, dtype=np.int64)
         entropies = np.zeros(len(Nps))
         for i in range(len(Nps)):
             self.unweight(Nps[i])
@@ -153,9 +154,27 @@ def optimize(
         Nopt = Nps[loc]
 
         return Nps, entropies, Nopt
-
-
-def main(chi2: np.ndarray, N: int, store: bool = True) -> pd.DataFrame:
+    
+    def plot_entropy(
+        self, Neff: int
+    ) -> None:
+        """
+        Plot the entropy as a function of the new number of replicas.
+        Parameters
+        ----------
+        Neff : int
+            Number of effective replicas
+        """
+        N, E, _ = self.iterate()
+        fig = plt.figure()
+        ax = plt.axes(xscale="log")
+        ax.axvline(Neff, c="r", linestyle=":")
+        ax.plot(N,E)
+        ax.set_xlabel(r"Replica Number $N'_{rep}$",size=18)
+        ax.set_ylabel(r"Entropy $H$",size=18)
+        fig.savefig("Entropy.jpg")
+
+def main(chi2: np.ndarray, N: int, store: bool = True, plot_entropy: bool = False) -> pd.DataFrame:
     """
     Perform the unweighting process and store the results.
 
@@ -167,7 +186,9 @@ def main(chi2: np.ndarray, N: int, store: bool = True) -> pd.DataFrame:
         Number of experimental data points that the chi2 is based on.
     store : bool, optional
         Whether to store the resulting weights in a CSV file. Defaults to True.
-
+    plot_entropy: bool, optional
+        Whether to plot and save the entropy as a function of the number of new replicas. Defaults to false.
+        
     Returns
     -------
     pd.DataFrame
@@ -186,6 +207,9 @@ def main(chi2: np.ndarray, N: int, store: bool = True) -> pd.DataFrame:
 
     if store:
         weights.to_csv("weights.csv")
+    
+    if plot_entropy:
+        u.plot_entropy(Neff)
 
     return weights
 
@@ -196,7 +220,8 @@ def main(chi2: np.ndarray, N: int, store: bool = True) -> pd.DataFrame:
     parser.add_argument(
         "N", help="Add the amount of experimental datapoints that the chi2 is based on"
     )
+    parser.add_argument("--plot_entropy", action="store_true", help="Call flag to enable entropy plotting.")
     args = parser.parse_args()
     chi2 = pd.read_csv(args.chi2_name).values
 
-    main(chi2, args.N)
+    main(chi2, args.N, plot_entropy=args.plot_entropy)

From bb3598b6f55a04982ae5ae88055c3bca126fe971 Mon Sep 17 00:00:00 2001
From: toonhasenack <toon.hasenack@outlook.com>
Date: Tue, 21 May 2024 15:45:58 +0200
Subject: [PATCH 6/8] figure layout

---
 validphys2/src/validphys/scripts/vp_unweight.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/validphys2/src/validphys/scripts/vp_unweight.py b/validphys2/src/validphys/scripts/vp_unweight.py
index 0d3dafc7b8..f7d2720e79 100644
--- a/validphys2/src/validphys/scripts/vp_unweight.py
+++ b/validphys2/src/validphys/scripts/vp_unweight.py
@@ -172,7 +172,7 @@ def plot_entropy(
         ax.plot(N,E)
         ax.set_xlabel(r"Replica Number $N'_{rep}$",size=18)
         ax.set_ylabel(r"Entropy $H$",size=18)
-        fig.savefig("Entropy.jpg")
+        fig.savefig("Entropy.jpg", bbox_inches='tight', pad_inches=0.2)
 
 def main(chi2: np.ndarray, N: int, store: bool = True, plot_entropy: bool = False) -> pd.DataFrame:
     """

From 2e51982971f5dc61235e360520aec805c6b7a3ec Mon Sep 17 00:00:00 2001
From: toonhasenack <toon.hasenack@outlook.com>
Date: Fri, 24 May 2024 10:24:56 +0200
Subject: [PATCH 7/8] better plotting

---
 .../src/validphys/scripts/vp_unweight.py      | 26 ++++++++++++-------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/validphys2/src/validphys/scripts/vp_unweight.py b/validphys2/src/validphys/scripts/vp_unweight.py
index f7d2720e79..5271fdc37a 100644
--- a/validphys2/src/validphys/scripts/vp_unweight.py
+++ b/validphys2/src/validphys/scripts/vp_unweight.py
@@ -154,10 +154,8 @@ def iterate(
         Nopt = Nps[loc]
 
         return Nps, entropies, Nopt
-    
-    def plot_entropy(
-        self, Neff: int
-    ) -> None:
+
+    def plot_entropy(self, Neff: int) -> None:
         """
         Plot the entropy as a function of the new number of replicas.
         Parameters
@@ -169,11 +167,17 @@ def plot_entropy(
         fig = plt.figure()
         ax = plt.axes(xscale="log")
         ax.axvline(Neff, c="r", linestyle=":")
-        ax.plot(N,E)
-        ax.set_xlabel(r"Replica Number $N'_{rep}$",size=18)
-        ax.set_ylabel(r"Entropy $H$",size=18)
+        ax.plot(N, E)
+        ax.set_xlabel(r"Replica Number $N'_{rep}$", size=18)
+        ax.set_ylabel(r"Entropy $H$", size=18)
+        ax.tick_params(axis='x', direction='in', bottom=True, top=True)
+        ax.tick_params(axis='y', direction='in', left=True, right=True)
+        ax.minorticks_on()
+        ax.tick_params(axis='x', which='minor', direction='in', bottom=True, top=True)
+        ax.tick_params(axis='y', which='minor', direction='in', left=True, right=True)
         fig.savefig("Entropy.jpg", bbox_inches='tight', pad_inches=0.2)
 
+
 def main(chi2: np.ndarray, N: int, store: bool = True, plot_entropy: bool = False) -> pd.DataFrame:
     """
     Perform the unweighting process and store the results.
@@ -188,7 +192,7 @@ def main(chi2: np.ndarray, N: int, store: bool = True, plot_entropy: bool = Fals
         Whether to store the resulting weights in a CSV file. Defaults to True.
     plot_entropy: bool, optional
         Whether to plot and save the entropy as a function of the number of new replicas. Defaults to false.
-        
+
     Returns
     -------
     pd.DataFrame
@@ -207,7 +211,7 @@ def main(chi2: np.ndarray, N: int, store: bool = True, plot_entropy: bool = Fals
 
     if store:
         weights.to_csv("weights.csv")
-    
+
     if plot_entropy:
         u.plot_entropy(Neff)
 
@@ -220,7 +224,9 @@ def main(chi2: np.ndarray, N: int, store: bool = True, plot_entropy: bool = Fals
     parser.add_argument(
         "N", help="Add the amount of experimental datapoints that the chi2 is based on"
     )
-    parser.add_argument("--plot_entropy", action="store_true", help="Call flag to enable entropy plotting.")
+    parser.add_argument(
+        "--plot_entropy", action="store_true", help="Call flag to enable entropy plotting."
+    )
     args = parser.parse_args()
     chi2 = pd.read_csv(args.chi2_name).values
 

From 71271be249fc4c5d86afd2270136957554d2a3e1 Mon Sep 17 00:00:00 2001
From: toonhasenack <toon.hasenack@outlook.com>
Date: Fri, 14 Jun 2024 13:25:26 +0200
Subject: [PATCH 8/8] pass minmax kinematics when loading data

---
 validphys2/src/validphys/commondataparser.py |  4 ++--
 validphys2/src/validphys/coredata.py         | 12 +++++++++++-
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/validphys2/src/validphys/commondataparser.py b/validphys2/src/validphys/commondataparser.py
index 5d26d93900..ab2617ceb9 100644
--- a/validphys2/src/validphys/commondataparser.py
+++ b/validphys2/src/validphys/commondataparser.py
@@ -857,7 +857,7 @@ def load_commondata_new(metadata):
     # the uncertainties
     uncertainties_df = metadata.load_uncertainties()
     # and the kinematics
-    kin_df = metadata.load_kinematics()
+    kin_df = metadata.load_kinematics(drop_minmax=False)
 
     # Once we have loaded all uncertainty files, let's check how many sys we have
     nsys = len(
@@ -915,7 +915,7 @@ def load_commondata_new(metadata):
         setname=metadata.name,
         ndata=metadata.ndata,
         commondataproc=procname,
-        nkin=3,
+        nkin=9,
         nsys=nsys,
         commondata_table=commondata_table,
         systype_table=systype_table,
diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py
index d74f197a77..58a50e1361 100644
--- a/validphys2/src/validphys/coredata.py
+++ b/validphys2/src/validphys/coredata.py
@@ -13,7 +13,17 @@
 from reportengine.compat import yaml
 from validphys.utils import generate_path_filtered_data
 
-KIN_NAMES = ["kin1", "kin2", "kin3"]
+KIN_NAMES = [
+    "kin1min",
+    "kin1mid",
+    "kin1max",
+    "kin2min",
+    "kin2mid",
+    "kin2max",
+    "kin3min",
+    "kin3mid",
+    "kin3max",
+]
 log = logging.getLogger(__name__)