diff --git a/Combine/Checks/README.md b/Combine/Checks/README.md new file mode 100644 index 00000000..37dd7c70 --- /dev/null +++ b/Combine/Checks/README.md @@ -0,0 +1,57 @@ +# Impacts + +Impacts documentation to come here. + +# Bias studies + +Here we provide a script to perform a simple bias study. + +## Inputs + +The only pre-requisite is a workspace with a single category, +and therefore a single multipdf (the object that controls the envelope method) +and a single pdf index corresponding to the choice of functional form. + +To create this, it's simple to use the existing combineCards functionality, for example: +``` +combineCards.py Datacard.txt --ic cat_name > Datacard_cat_name.txt +``` + +This creates a .txt datacard with only categories matching the reg exp `cat_name` included. +Be careful: you will probably have to manually delete some pdfindex lines at the bottom; +the script does not know that these correspond to the analysis categories, +and therefore will leave them all in (you only want the one corresponding to the category you are studying). + +Once that is done, you can run your usual `text2workspace` command to generate the `-d, --datacard` input for this script. + +## Usage + +The script is split into three different stages: + * `-t, --toys`: throw and save a total of `-n,--nToys` toys for each of the candidate functions included in the envelope + * `-f, --fits`: fit each of those toys and extract the uncertainty + * `-p, --plots`: plot the pull distribution of the resulting fits + +You can then inspect the output plots and hope to see an approximately gaussian shape with zero mean and unit width. +Normally, provided that the absolute value of the mean is less than 0.14, this is considered satisfactory. + +The three steps can be run in one go, but it's probably safer to run them one-by-one. +Here is an example: + +``` +./RunBiasStudy.py -d Datacard_mu_ggH_cat0.root -t +./RunBiasStudy.py -d Datacard_mu_ggH_cat0.root -f -c "--cminDefaultMinimizerStrategy 0 --X-rtd MINIMIZER_freezeDisassociatedParams --X-rtd MINIMIZER_multiMin_hideConstants --X-rtd MINIMIZER_multiMin_maskConstraints --X-rtd MINIMIZER_multiMin_maskChannels=2 --freezeParameters MH" +./RunBiasStudy.py -d Datacard_mu_ggH_cat0.root -p --gaussianFit +``` +The options for the second step are passed to combine; these are recommended to get the fit to converge. +The additional option on the plotting is fairly self-explanatory; it adds a gaussian fit to the output plot. + +## More options + +There are various things one can tweak for these studies. +Here is a list of the common options: + * `-n,--nToys`: the default number of toys is 1000 per function, but can be lowered or raised. + * `-e,--expectSignal`: the injected signal strength is 1 by default, but zero can also be checked, or higher values for searches. + * `-s,--seed`: the default value of -1 finds a random seed; you can fix this for reproducility if you prefer. + * `--poi`: if your parameter of interest is called something other than `r`, say so here. + * `--split`: default number of toys to be thrown or fits to be performed in one go. Set to 500 but may need to be lowered for memory reasons if you have a more complicated fit. + * `--selectFunction`: you can specify a string here to only select certain functions for these studies (e.g. `bern` to match all Bernstein polynomials, `exp1` to match just the first-order exponential). diff --git a/Combine/Checks/RunBiasStudy.py b/Combine/Checks/RunBiasStudy.py new file mode 100755 index 00000000..86ffbcd2 --- /dev/null +++ b/Combine/Checks/RunBiasStudy.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python + +from biasUtils import * + +from optparse import OptionParser +parser = OptionParser() +parser.add_option("-d","--datacard",default="Datacard.root") +parser.add_option("-w","--workspace",default="w") +parser.add_option("-t","--toys",action="store_true", default=False) +parser.add_option("-n","--nToys",default=1000,type="int") +parser.add_option("-f","--fits",action="store_true", default=False) +parser.add_option("-p","--plots",action="store_true", default=False) +parser.add_option("-e","--expectSignal",default=1.,type="float") +parser.add_option("-m","--mH",default=125.,type="float") +parser.add_option("-c","--combineOptions",default="") +parser.add_option("-s","--seed",default=-1,type="int") +parser.add_option("--dryRun",action="store_true", default=False) +parser.add_option("--poi",default="r") +parser.add_option("--split",default=500,type="int") +parser.add_option("--selectFunction",default=None) +parser.add_option("--gaussianFit",action="store_true", default=False) +(opts,args) = parser.parse_args() +print +if opts.nToys>opts.split and not opts.nToys%opts.split==0: raise RuntimeError('The number of toys %g needs to be smaller than or divisible by the split number %g'%(opts.nToys, opts.split)) + +import ROOT as r +r.gROOT.SetBatch(True) +r.gStyle.SetOptStat(2211) + +ws = r.TFile(opts.datacard).Get(opts.workspace) + +pdfs = rooArgSetToList(ws.allPdfs()) +multipdfName = None +for pdf in pdfs: + if pdf.InheritsFrom("RooMultiPdf"): + if multipdfName is not None: raiseMultiError() + multipdfName = pdf.GetName() + print 'Conduct bias study for multipdf called %s'%multipdfName +multipdf = ws.pdf(multipdfName) +print + +varlist = rooArgSetToList(ws.allCats()) +indexName = None +for var in varlist: + if var.GetName().startswith('pdfindex'): + if indexName is not None: raiseMultiError() + indexName = var.GetName() + print 'Found index called %s'%indexName +print + +from collections import OrderedDict as od +indexNameMap = od() +for ipdf in range(multipdf.getNumPdfs()): + if opts.selectFunction is not None: + if not multipdf.getPdf(ipdf).GetName().count(opts.selectFunction): continue + indexNameMap[ipdf] = multipdf.getPdf(ipdf).GetName() + +if opts.toys: + if not path.isdir('BiasToysn'): system('mkdir -p BiasToys') + toyCmdBase = 'combine -m %.4f -d %s -M GenerateOnly --expectSignal %.4f -s %g --saveToys %s '%(opts.mH, opts.datacard, opts.expectSignal, opts.seed, opts.combineOptions) + for ipdf,pdfName in indexNameMap.iteritems(): + name = shortName(pdfName) + if opts.nToys > opts.split: + for isplit in range(opts.nToys//opts.split): + toyCmd = toyCmdBase + ' -t %g -n _%s_split%g --setParameters %s=%g --freezeParameters %s'%(opts.split, name, isplit, indexName, ipdf, indexName) + run(toyCmd, dry=opts.dryRun) + system('mv higgsCombine_%s* %s'%(name, toyName(name,split=isplit))) + else: + toyCmd = toyCmdBase + ' -t %g -n _%s --setParameters %s=%g --freezeParameters %s'%(opts.nToys, name, indexName, ipdf, indexName) + run(toyCmd, dry=opts.dryRun) + system('mv higgsCombine_%s* %s'%(name, toyName(name))) +print + +if opts.fits: + if not path.isdir('BiasFits'): system('mkdir -p BiasFits') + fitCmdBase = 'combine -m %.4f -d %s -M MultiDimFit -P %s --algo singles %s '%(opts.mH, opts.datacard, opts.poi, opts.combineOptions) + for ipdf,pdfName in indexNameMap.iteritems(): + name = shortName(pdfName) + if opts.nToys > opts.split: + for isplit in range(opts.nToys//opts.split): + fitCmd = fitCmdBase + ' -t %g -n _%s_split%g --toysFile=%s'%(opts.split, name, isplit, toyName(name,split=isplit)) + run(fitCmd, dry=opts.dryRun) + system('mv higgsCombine_%s* %s'%(name, fitName(name,split=isplit))) + run('hadd %s BiasFits/*%s*split*.root'%(fitName(name),name), dry=opts.dryRun) + else: + fitCmd = fitCmdBase + ' -t %g -n _%s --toysFile=%s'%(opts.nToys, name, toyName(name)) + run(fitCmd, dry=opts.dryRun) + system('mv higgsCombine_%s* %s'%(name, fitName(name))) + +if opts.plots: + if not path.isdir('BiasPlots'): system('mkdir -p BiasPlots') + for ipdf,pdfName in indexNameMap.iteritems(): + name = shortName(pdfName) + tfile = r.TFile(fitName(name)) + tree = tfile.Get('limit') + pullHist = r.TH1F('pullsForTruth_%s'%name, 'Pull distribution using the envelope to fit %s'%name, 80, -4., 4.) + pullHist.GetXaxis().SetTitle('Pull') + pullHist.GetYaxis().SetTitle('Entries') + for itoy in range(opts.nToys): + tree.GetEntry(3*itoy) + if not getattr(tree,'quantileExpected')==-1: + raiseFailError(itoy,True) + continue + bf = getattr(tree, 'r') + tree.GetEntry(3*itoy+1) + if not abs(getattr(tree,'quantileExpected')--0.32)<0.001: + raiseFailError(itoy,True) + continue + lo = getattr(tree, 'r') + tree.GetEntry(3*itoy+2) + if not abs(getattr(tree,'quantileExpected')-0.32)<0.001: + raiseFailError(itoy,True) + continue + hi = getattr(tree, 'r') + diff = bf - opts.expectSignal + unc = 0.5 * (hi-lo) + if unc > 0.: + pullHist.Fill(diff/unc) + canv = r.TCanvas() + pullHist.Draw() + if opts.gaussianFit: + r.gStyle.SetOptFit(111) + pullHist.Fit('gaus') + canv.SaveAs('%s.pdf'%plotName(name)) + canv.SaveAs('%s.png'%plotName(name)) diff --git a/Combine/Checks/biasUtils.py b/Combine/Checks/biasUtils.py new file mode 100644 index 00000000..6cf638c2 --- /dev/null +++ b/Combine/Checks/biasUtils.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python + +def rooArgSetToList(argset): ## taken from Andrea Marini's great repo here: https://github.com/amarini/rfwsutils/blob/master/wsutils.py#L300-L313 + """creates a python list with the contents of argset (which should be a RooArgSet)""" + it = argset.createIterator() + + retval = [] + while True: + obj = it.Next() + + if obj == None: + break + + retval.append(obj) + + return retval + +def raiseMultiError(lax=False): + raise RuntimeError('Found more than one multipdf here - please create a workspace with just one for these bias studies. You can use "combineCards.py Datacard.txt --ic cat_name" for this)') + +def raiseFailError(itoy, lax=False): + text = 'some fits have failed, wrong quantile for toy number %g'%itoy + if not lax: raise RuntimeError('ERROR %s'%text) + else: print 'WARNING %s'%text + +def shortName(name): + return name.split('_')[-1] + +def toyName(name, split=None): + retval = 'BiasToys/biasStudy_%s_toys.root'%name + if split is not None: + split = int(split) + retval = retval.replace(name,'%s_split%g'%(name,split)) + return retval + +def fitName(name, split=None): + retval = 'BiasFits/biasStudy_%s_fits.root'%name + if split is not None: + split = int(split) + retval = retval.replace(name,'%s_split%g'%(name,split)) + return retval + +def plotName(name): + return 'BiasPlots/biasStudy_%s_pulls'%name + +def run(cmd, dry=False): + print cmd + if not dry: system(cmd)