NNPDF · scarlehoff · May 20, 2024 · May 15, 2024 · May 16, 2024 · May 16, 2024
diff --git a/.github/workflows/fitbot.yml b/.github/workflows/fitbot.yml
@@ -10,7 +10,7 @@ on:
 env:
   N3FIT_MAXNREP: 20 # total number of replicas to fit
   POSTFIT_NREP: 16 # requested replicas for postfit
-  REFERENCE_SET: NNBOT-489c196ac-2024-04-18 # reference set for exact results
+  REFERENCE_SET: NNBOT-c0b68779b-2024-05-16 # reference set for exact results
   STABLE_REFERENCE_SET: NNBOT-c0f99b7b3-2024-02-28 # reference set for last tag
   CONDA_PY: 312
   PYTHONHASHSEED: "0"
@@ -120,13 +120,13 @@ jobs:
         url=$(vp-upload output/ 2>&1 | grep https)
         echo "REPORT_URL=$url" >> $GITHUB_ENV
         vp-comparefits $RUNCARD $STABLE_REFERENCE_SET \
-                    --title "Automatic check fit $RUNCARD with respect to latest tag" \
-                    --author bot \
-                    --keywords run-fit-bot \
-                    --thcovmat_if_present \
-                    -o tagged_output
-        url=$(vp-upload tagged_output/ 2>&1 | grep https)
-        echo "REPORT_URL_STABLE=$url" >> $GITHUB_ENV
+                     --title "Automatic check fit $RUNCARD with respect to latest tag" \
+                     --author bot \
+                     --keywords run-fit-bot \
+                     --thcovmat_if_present \
+                     -o tagged_output
+         url=$(vp-upload tagged_output/ 2>&1 | grep https)
+         echo "REPORT_URL_STABLE=$url" >> $GITHUB_ENV
     # write reminder
     - name: Write summary on PR
       uses: unsplash/comment-on-pr@master

diff --git a/n3fit/runcards/examples/developing.yml b/n3fit/runcards/examples/developing.yml
@@ -50,7 +50,7 @@ datacuts:
 
 ############################################################
 theory:
-  theoryid: 708        # database id
+  theoryid: 40000000
 
 sampling:
   separate_multiplicative: true

diff --git a/n3fit/runcards/examples/nnpdf40-like.yml b/n3fit/runcards/examples/nnpdf40-like.yml
@@ -2,7 +2,7 @@
 # Configuration file for n3fit
 #
 ################################################################################
-description: NNPDF4.0 NNLO baseline fit with theory 708 (nFONLL, new commondata)
+description: NNPDF4.0 NNLO baseline fit (nFONLL). Comparable to NNPDF40_nnlo_as_01180_qcd
 
 ################################################################################
 dataset_inputs:
@@ -91,16 +91,18 @@ dataset_inputs:
 
 ################################################################################
 datacuts:
-  t0pdfset: 240329-01-rs-nnpdf40like-baseline
+  t0pdfset: NNPDF40_nnlo_as_01180
   q2min: 3.49
   w2min: 12.5
 
 ################################################################################
+# NNLO QCD TRN evolution
 theory:
-  theoryid: 708
+  theoryid: 40000000
 
+# For fits <= 4.0 multiplicative and additive uncertainties were sampled separately
 sampling:
-  separate_multiplicative: false
+  separate_multiplicative: False
 
 ################################################################################
 trvlseed: 591866982
@@ -131,15 +133,16 @@ parameters: # This defines the parameter dictionary that is passed to the Model
 
 fitting:
   fitbasis: EVOL
+  savepseudodata: True
   basis:
-  - {fl: sng, trainable: false, smallx: [1.092, 1.118], largex: [1.479, 3.037]}
-  - {fl: g, trainable: false, smallx: [0.8048, 1.053], largex: [2.828, 5.503]}
-  - {fl: v, trainable: false, smallx: [0.4678, 0.744], largex: [1.547, 3.319]}
-  - {fl: v3, trainable: false, smallx: [0.1268, 0.4549], largex: [1.725, 3.38]}
-  - {fl: v8, trainable: false, smallx: [0.5864, 0.7822], largex: [1.533, 3.282]}
-  - {fl: t3, trainable: false, smallx: [-0.436, 1.0], largex: [1.771, 3.47]}
-  - {fl: t8, trainable: false, smallx: [0.5878, 0.874], largex: [1.548, 3.265]}
-  - {fl: t15, trainable: false, smallx: [1.087, 1.146], largex: [1.493, 3.441]}
+  - {fl: sng, trainable: false, smallx: [1.091, 1.119], largex: [1.471, 3.021]}
+  - {fl: g, trainable: false, smallx: [0.7795, 1.095], largex: [2.742, 5.547]}
+  - {fl: v, trainable: false, smallx: [0.472, 0.7576], largex: [1.571, 3.559]}
+  - {fl: v3, trainable: false, smallx: [0.07483, 0.4501], largex: [1.714, 3.467]}
+  - {fl: v8, trainable: false, smallx: [0.5731, 0.779], largex: [1.555, 3.465]}
+  - {fl: t3, trainable: false, smallx: [-0.5498, 1.0], largex: [1.778, 3.5]}
+  - {fl: t8, trainable: false, smallx: [0.5469, 0.857], largex: [1.555, 3.391]}
+  - {fl: t15, trainable: false, smallx: [1.081, 1.142], largex: [1.491, 3.092]}
 
 ################################################################################
 positivity:

diff --git a/nnpdf_data/nnpdf_data/theory_cards/40000000.yaml b/nnpdf_data/nnpdf_data/theory_cards/40000000.yaml
@@ -0,0 +1,41 @@
+ID: 40_000_000
+Comments: NNLO nFONLL theory. Equivalent to 700/708. 4.0 baseline.
+PTO: 2
+FNS: FONLL-C
+DAMP: 0
+IC: 1
+Q0: 1.65
+ModEv: TRN
+XIR: 1.0
+XIF: 1.0
+NfFF: 5
+QED: 0
+HQ: POLE
+mc: 1.51
+Qmc: 1.51
+kcThr: 1.0
+mb: 4.92
+Qmb: 4.92
+kbThr: 1.0
+mt: 172.5
+Qmt: 172.5
+ktThr: 1.0
+CKM:
+- 0.97428
+- 0.2253
+- 0.00347
+- 0.2252
+- 0.97345
+- 0.041
+- 0.00862
+- 0.0403
+- 0.999152
+MZ: 91.1876
+MW: 80.398
+GF: 1.1663787e-05
+SIN2TW: 0.23126
+TMC: 1
+MP: 0.938
+Qref: 91.2
+alphas: 0.118
+alphaqed: 0.0077553
diff --git a/validphys2/src/validphys/loader.py b/validphys2/src/validphys/loader.py
@@ -4,22 +4,22 @@
 """
 
 import functools
-from functools import cached_property
 import logging
 import mimetypes
 import os
-import os.path as osp
 import pathlib
 import pkgutil
 import re
 import shutil
 import sys
+import tarfile
 import tempfile
 from typing import List
 import urllib.parse as urls
 
 import requests
 
+from nnpdf_data import legacy_to_new_mapping, path_vpdata
 from reportengine import filefinder
 from reportengine.compat import yaml
 from validphys import lhaindex
@@ -40,7 +40,6 @@
     TheoryIDSpec,
     peek_commondata_metadata,
 )
-from nnpdf_data import legacy_to_new_mapping, path_vpdata
 from validphys.utils import generate_path_filtered_data, tempfile_cleaner
 
 log = logging.getLogger(__name__)
@@ -516,7 +515,9 @@ def theorydb_folder(self):
         """Checks theory db file exists and returns path to it"""
         dbpath = self.datapath / "theory_cards"
         if not dbpath.is_dir():
-            raise TheoryDataBaseNotFound(f"could not find theory db folder. Directory not found at {dbpath}")
+            raise TheoryDataBaseNotFound(
+                f"could not find theory db folder. Directory not found at {dbpath}"
+            )
         return dbpath
 
     def get_commondata(self, setname, sysnum):
@@ -629,7 +630,7 @@ def get_posset(self, theoryID, setname, postlambda):
 
     def check_fit(self, fitname):
         resultspath = self.resultspath
-        if fitname != osp.basename(fitname):
+        if fitname != pathlib.Path(fitname).name:
             raise FitNotFound(
                 f"Could not find fit '{fitname}' in '{resultspath} "
                 "because the name doesn't correspond to a valid filename"
@@ -646,7 +647,7 @@ def check_fit(self, fitname):
     def check_hyperscan(self, hyperscan_name):
         """Obtain a hyperscan run"""
         resultspath = self.hyperscan_resultpath
-        if hyperscan_name != osp.basename(hyperscan_name):
+        if hyperscan_name != pathlib.Path(hyperscan_name).name:
             raise HyperscanNotFound(
                 f"Could not find fit '{hyperscan_name}' in '{resultspath} "
                 "because the name doesn't correspond to a valid filename"
@@ -939,7 +940,7 @@ def download_file(url, stream_or_path, make_parents=False):
         _download_and_show(response, stream_or_path)
 
 
-def download_and_extract(url, local_path):
+def download_and_extract(url, local_path, target_name=None):
     """Download a compressed archive and then extract it to the given path"""
     local_path = pathlib.Path(local_path)
     if not local_path.is_dir():
@@ -951,12 +952,28 @@ def download_and_extract(url, local_path):
         download_file(url, t)
     log.info("Extracting archive to %s", local_path)
     try:
-        shutil.unpack_archive(t.name, extract_dir=local_path)
-    except:
+        with tarfile.open(archive_dest.name) as res_tar:
+            # Extract to a temporary directory
+            folder_dest = tempfile.TemporaryDirectory(dir=local_path, suffix=name)
+            dest_path = pathlib.Path(folder_dest.name)
+            res_tar.extractall(path=dest_path, filter="data")
+
+            # Check there are no more than one item in the top level
+            top_level_stuff = list(dest_path.glob("*"))
+            if len(top_level_stuff) > 1:
+                raise RemoteLoaderError(f"More than one item in the top level directory of {url}")
+
+            if target_name is None:
+                target_path = local_path
+            else:
+                target_path = local_path / target_name
+            shutil.move(top_level_stuff[0], target_path)
+
+    except Exception as e:
         log.error(
             f"The original archive at {t.name} was only extracted partially at \n{local_path}"
         )
-        raise
+        raise e
     else:
         os.unlink(archive_dest.name)
 
@@ -1079,7 +1096,7 @@ def remote_theories(self):
     def remote_nnpdf_pdfs(self):
         return self.remote_files(self.nnpdf_pdfs_urls, self.nnpdf_pdfs_index, thing="PDFs")
 
-    @cached_property
+    @functools.cached_property
     def remote_keywords(self):
         root = self.nnprofile['reports_root_url']
         url = urls.urljoin(root, 'index.json')
@@ -1274,7 +1291,7 @@ def download_theoryID(self, thid):
         remote = self.remote_theories
         if thid not in remote:
             raise TheoryNotFound("Theory %s not available." % thid)
-        download_and_extract(remote[thid], self._theories_path)
+        download_and_extract(remote[thid], self._theories_path, target_name=f"theory_{thid}")
 
     def download_vp_output_file(self, filename, **kwargs):
         try: