Skip to content

Commit

Permalink
Merge pull request #2172 from NNPDF/hessian2mc
Browse files Browse the repository at this point in the history
Hessian 2 MC
  • Loading branch information
comane authored Oct 18, 2024
2 parents 16ffb5a + 8f3bf62 commit bf8c890
Show file tree
Hide file tree
Showing 7 changed files with 287 additions and 0 deletions.
33 changes: 33 additions & 0 deletions doc/sphinx/source/tutorials/hessian2mc.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
.. _hessian2mc:
How to transform a Hessian set into the corresponding Monte Carlo PDF
=====================================================================

A Hessian PDF set can be transformed into a Monte Carlo replica set using the
method described in Eq. (4.3) of `PDF4LHC21 <https://arxiv.org/pdf/2203.05506>`_ using a runcard
like the one below.
In this example ``Neig`` are the number of basis eigenvectors of the Hessian PDF.
``mc_pdf_name`` is the name of the new MC replica set that will be created.
``watt_thorne_rnd_seed`` is the random seed used to generate the MC replicas as shown in the equation below.

.. math::
f^{MC, (k)}(x,Q) = f^{H}_{0}(x,Q) + \frac{1}{2} \sum_{j=1}^{Neig} \left(f^{H}_{j,+}(x,Q) - f^{H}_{j,-}(x,Q)\right) R_j^{(k)}
where :math:`f^{MC, (k)}(x,Q)` is the :math:`k`-th Monte Carlo replica, :math:`f^{H}_{0}(x,Q)` the central Hessian member,
:math:`f^{H}_{j,\pm}(x,Q)` the positive and negative eigenvector directions corresponding to the :math:`j`-th Hessian eigenvector, and :math:`R_j^{(k)}`
are random standard normal numbers.

The new MC replica set will be saved in the LHAPDF folder.

.. code:: yaml
pdf: MSTW2008nlo68cl
mc_pdf_name: MSTW2008nlo68cl_mc
num_members: 100
watt_thorne_rnd_seed: 1
actions_:
- write_hessian_to_mc_watt_thorne
1 change: 1 addition & 0 deletions doc/sphinx/source/tutorials/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ Special PDF sets
./reproduce
./bundle-pdfs.rst
./mc2hessian.rst
./hessian2mc.rst
Miscellaneous
-------------
Expand Down
16 changes: 16 additions & 0 deletions validphys2/examples/hessian_2mc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
meta:
author: Lazy Person
title: runcard for conversion of msht20 hessian set to MC
keywords: [hessian Monte carlo]


pdf: MSTW2008nlo68cl

mc_pdf_name: MSTW2008nlo68cl_mc

num_members: 100

watt_thorne_rnd_seed: 1

actions_:
- write_hessian_to_mc_watt_thorne
1 change: 1 addition & 0 deletions validphys2/src/validphys/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
"validphys.mc2hessian",
"reportengine.report",
"validphys.overfit_metric",
"validphys.hessian2mc",
]

log = logging.getLogger(__name__)
Expand Down
9 changes: 9 additions & 0 deletions validphys2/src/validphys/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,15 @@ def check_pdf_is_montecarlo_or_hessian(pdf, **kwargs):
)


@make_argcheck
def check_pdf_is_hessian(pdf, **kwargs):
etype = pdf.error_type
check(
etype in {'symmhessian', 'hessian'},
f"Error type of PDF {pdf} must be 'symmhessian' or 'hessian' and not '{etype}'",
)


@make_argcheck
def check_using_theory_covmat(use_theorycovmat):
"""Check that the `use_theorycovmat` is set to True"""
Expand Down
145 changes: 145 additions & 0 deletions validphys2/src/validphys/hessian2mc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
"""
validphys.hessian2mc.py
This module contains the functions that can be used to convert Hessian sets
like MSHT20 and CT18 to Monte Carlo sets.
The functions implemented here follow equations (4.3) of the paper arXiv:2203.05506
"""

import pathlib
import lhapdf
import os
import logging
import numpy as np

from validphys.lhio import load_all_replicas, rep_matrix, write_replica
from validphys.checks import check_pdf_is_hessian

log = logging.getLogger(__name__)


def write_new_lhapdf_info_file_from_previous_pdf(
path_old_pdfset,
name_old_pdfset,
path_new_pdfset,
name_new_pdfset,
num_members,
description_set="MC representation of hessian PDF set",
errortype="replicas",
):
"""
Writes a new LHAPDF set info file based on an existing set.
"""

# write LHAPDF info file for a new pdf set
with open(path_old_pdfset / f"{name_old_pdfset}.info", "r") as in_stream, open(
path_new_pdfset / f"{name_new_pdfset}.info", "w"
) as out_stream:
for l in in_stream.readlines():
if l.find("SetDesc:") >= 0:
out_stream.write(f'SetDesc: "{description_set}"\n')
elif l.find("NumMembers:") >= 0:
out_stream.write(f"NumMembers: {num_members+1}\n")
elif l.find("ErrorType:") >= 0:
out_stream.write(f"ErrorType: {errortype}\n")
elif l.find("ErrorConfLevel") >= 0:
# remove ErrorConfLevel line
pass
else:
out_stream.write(l)
log.info(f"Info file written to {path_new_pdfset / f'{name_new_pdfset}.info'}")


def write_mc_watt_thorne_replicas(Rjk_std_normal, replicas_df, mc_pdf_path):
"""
Writes the Monte Carlo representation of a PDF set that is in Hessian form
using the Watt-Thorne prescription described in Eq. 4.3 of arXiv:2203.05506.
Parameters
----------
Rjk_std_normal: np.ndarray
Array of shape (num_members, n_eig) containing random standard normal numbers.
replicas_df: pd.DataFrame
DataFrame containing replicas of the hessian set at all scales.
mc_pdf_path: pathlib.Path
Path to the new Monte Carlo PDF set.
"""

for i, rnd_std_norm_vec in enumerate(Rjk_std_normal):

# Odd eigenvectors: negative direction, even eigenvectors: positive direction
df_odd = replicas_df.loc[:, 2::2]
df_even = replicas_df.loc[:, 3::2]
new_column_names = range(1, len(df_even.columns) + 1)

df_even.columns = new_column_names
df_odd.columns = new_column_names

central_member, hess_diff_cov = replicas_df.loc[:, [1]], df_even - df_odd

# Eq. 4.3 of arXiv:2203.05506
mc_replica = central_member.dot([1]) + 0.5 * hess_diff_cov.dot(rnd_std_norm_vec)

headers = f"PdfType: replica\nFormat: lhagrid1\nFromHessReplica: {i}\n"
write_replica(i + 1, mc_pdf_path, headers.encode("UTF-8"), mc_replica)

# Write central replica from hessian set to mc set
headers = f"PdfType: replica\nFormat: lhagrid1\nFromHessReplica: {i}\n"
log.info(f"Writing central replica to {mc_pdf_path}")
write_replica(0, mc_pdf_path, headers.encode("UTF-8"), central_member)


@check_pdf_is_hessian
def write_hessian_to_mc_watt_thorne(pdf, mc_pdf_name, num_members, watt_thorne_rnd_seed=1):
"""
Writes the Monte Carlo representation of a PDF set that is in Hessian form
using the Watt-Thorne (MSHT20) prescription described in Eq. 4.3 of arXiv:2203.05506.
Parameters
----------
pdf: validphys.core.PDF
The Hessian PDF set that is to be converted to Monte Carlo.
mc_pdf_name: str
The name of the new Monte Carlo PDF set.
"""
hessian_set = pdf

lhapdf_path = pathlib.Path(lhapdf.paths()[-1])

# path to hessian lhapdf set
hessian_pdf_path = lhapdf_path / str(hessian_set)

# path to new pdf set
mc_pdf_path = lhapdf_path / mc_pdf_name

# create new pdf set folder in lhapdf path if it does not exist
if not mc_pdf_path.exists():
os.makedirs(mc_pdf_path)

# write LHAPDF info file for new pdf set
write_new_lhapdf_info_file_from_previous_pdf(
path_old_pdfset=hessian_pdf_path,
name_old_pdfset=str(hessian_set),
path_new_pdfset=mc_pdf_path,
name_new_pdfset=mc_pdf_name,
num_members=num_members,
description_set=f"MC representation of {str(hessian_set)}",
errortype="replicas",
)

# load replicas from basis set at all scales
_, grids = load_all_replicas(hessian_set)
replicas_df = rep_matrix(grids)

# Eg for MSHT20, mem=0 => central value; mem=1-64 => 32 eigenvector sets (+/- directions)
n_eig = int((replicas_df.shape[1] - 1) / 2)

np.random.seed(watt_thorne_rnd_seed)
Rjk_std_normal = np.random.standard_normal(size=(num_members, n_eig))

# write replicas to new pdf set
write_mc_watt_thorne_replicas(Rjk_std_normal, replicas_df, mc_pdf_path)
82 changes: 82 additions & 0 deletions validphys2/src/validphys/tests/test_hessian2mc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import numpy as np
import pandas as pd
from unittest import mock
from validphys.hessian2mc import write_mc_watt_thorne_replicas, write_hessian_to_mc_watt_thorne
import pathlib


@mock.patch("validphys.hessian2mc.write_replica")
@mock.patch("validphys.hessian2mc.log.info")
def test_write_mc_watt_thorne_replicas(mock_log_info, mock_write_replica):
# Set up mock inputs
n_eig = 5
n_all_eig = 1 + 2 * n_eig # includes central member, so 1 + 2*n_eig
num_members = 50 # new MC members
num_nx_nq = 100 # xgrid and Q points

replicas_df = pd.DataFrame(
np.random.standard_normal(size=(num_nx_nq, n_all_eig)), columns=range(1, n_all_eig + 1)
)

Rjk_std_normal = np.random.standard_normal(size=(num_members, n_eig))

# import IPython; IPython.embed()
mc_pdf_path = mock.Mock() # Mock the path

# Call the function being tested
write_mc_watt_thorne_replicas(Rjk_std_normal, replicas_df, mc_pdf_path)

# Verify that write_replica was called the correct number of times
assert (
mock_write_replica.call_count == num_members + 1
) # for Rjk members + 1 for the central replica

# Check if the log message was correct for the central replica
mock_log_info.assert_any_call(f"Writing central replica to {mc_pdf_path}")


@mock.patch("validphys.hessian2mc.write_mc_watt_thorne_replicas")
@mock.patch("validphys.hessian2mc.load_all_replicas")
@mock.patch("validphys.hessian2mc.rep_matrix")
@mock.patch("validphys.hessian2mc.write_new_lhapdf_info_file_from_previous_pdf")
@mock.patch("validphys.hessian2mc.os.makedirs")
@mock.patch("validphys.hessian2mc.lhapdf.paths")
def test_write_hessian_to_mc_watt_thorne(
mock_lhapdf_paths,
mock_makedirs,
mock_write_info_file,
mock_rep_matrix,
mock_load_all_replicas,
mock_write_mc_replicas,
):
# Set up mock inputs
msht_like_hessian_pdf = "MSHT20"
mc_pdf_name = "MSHT20_MC"
num_members = 100

mock_load_all_replicas.return_value = (None, None)

mock_lhapdf_paths.return_value = [pathlib.Path("/path/to/lhapdf")]

mock_rep_matrix.return_value = np.random.randn(5, 7) # Mocked replica matrix

# Call the function being tested
write_hessian_to_mc_watt_thorne(msht_like_hessian_pdf, mc_pdf_name, num_members)

# Verify that the necessary directories were created
mc_pdf_path = pathlib.Path("/path/to/lhapdf") / mc_pdf_name
mock_makedirs.assert_called_once_with(mc_pdf_path)

# Verify that the LHAPDF info file was written
mock_write_info_file.assert_called_once_with(
path_old_pdfset=pathlib.Path("/path/to/lhapdf") / msht_like_hessian_pdf,
name_old_pdfset=msht_like_hessian_pdf,
path_new_pdfset=mc_pdf_path,
name_new_pdfset=mc_pdf_name,
num_members=num_members,
description_set=f"MC representation of {msht_like_hessian_pdf}",
errortype="replicas",
)

# Verify that the replicas were written
mock_write_mc_replicas.assert_called_once()

0 comments on commit bf8c890

Please sign in to comment.