Source code for bamboo.analysisutils

""" The :py:mod:`bamboo.analysisutils` module bundles a number of more
specific helper methods that use the tree decorators and integrate with
other components, connect to external services, or are factored out of the
classes in :py:mod:`bamboo.analysismodules` to facilitate reuse.
"""
import copy
import logging
import os.path
import subprocess
import urllib.parse
import warnings

import yaml

logger = logging.getLogger(__name__)

_SAMADhi_found = False
try:
    from cp3_llbb.SAMADhi.SAMADhi import Sample, SAMADhiDB
    _SAMADhi_found = True
except ImportError:
    pass

bamboo_cachedir = os.path.join(os.getenv(
    "XDG_CACHE_HOME", os.path.join(os.path.expanduser("~"), ".cache")), "bamboo")


[docs] def addLumiMask(sel, jsonName, runRange=None, runAndLS=None, name="goodlumis"): """ Refine selection with a luminosity block filter Typically applied directly to the root selection (for data). runAndLS should be a tuple of expressions with the run number and luminosity block ID. The run range is used to limit the part of the JSON file to consider, see the LumiMask helper class for details. """ from . import treefunctions as op if runAndLS is None: raise RuntimeError( "Cannot construct a filter for the good lumi blocks without accessors " "(backend.create(..., runAndLS=XXX)), tree->(run, LS)") lumiSel = op.define( "LumiMask", 'const auto <<name>> = LumiMask::fromJSON("{}"{});'.format( jsonName, (", {:d}, {:d}".format(*runRange) if runRange is not None else ""))) return sel.refine(name, cut=lumiSel.accept(*runAndLS))
def _dasLFNtoPFN(lfn, dasConfig): localPFN = os.path.join(dasConfig["storageroot"], lfn.lstrip("/")) if dasConfig.get("checklocalfiles", False) and "xrootdredirector" in dasConfig: if os.path.isfile(localPFN): return localPFN else: xrootdPFN = "root://{redirector}//{lfn}".format(redirector=dasConfig["xrootdredirector"], lfn=lfn) logger.warning(f"PFN {localPFN} not available, falling back to xrootd with {xrootdPFN}") return xrootdPFN else: return localPFN def sample_resolveFiles(smpName, smpCfg, dbCache=None, envConfig=None, cfgDir="."): import os.path listfile = None if "files" in smpCfg and str(smpCfg["files"]) == smpCfg["files"]: listfile = (smpCfg["files"] if os.path.isabs(smpCfg["files"]) else os.path.join(cfgDir, smpCfg["files"])) if "db" in smpCfg and dbCache is not None: if listfile: # warn if old entry and files are still there logger.warning( f"'dbcache' specified at file level and both 'db' and 'files: {listfile}' " "at config level. The last one will be ignored " "(without 'db' the list from the text file will be used)") if not os.path.isabs(dbCache): dbCache = os.path.join(cfgDir, dbCache) if not os.path.exists(dbCache): import os os.makedirs(dbCache) listfile = os.path.join(dbCache, f"{smpName}.txt") # read cache, if it's there fromFile, cacheID = [], None if listfile and os.path.isfile(listfile): with open(listfile) as smpF: for ln in smpF: if ln.startswith("#"): if cacheID is None: cacheID = ln.lstrip("#").strip() # first commented line, ignore any others elif ln.strip(): fromFile.append(ln.strip()) if len(fromFile) == 0: raise RuntimeError(f"No file names read from {listfile}") files = [] if "db" in smpCfg: if isinstance(smpCfg["db"], str): newCacheID = smpCfg["db"] dbEntries = [smpCfg["db"]] else: newCacheID = ";;".join(smpCfg["db"]) dbEntries = smpCfg["db"] if newCacheID == cacheID: files = fromFile else: # do queries for dbEntry in dbEntries: # convert to list if string if ":" not in dbEntry: raise RuntimeError( "'db' entry should be of the format 'protocol:location', " "e.g. 'das:/SingleMuon/Run2016E-03Feb2017-v1/MINIAOD'") protocol, dbLoc = dbEntry.split(":") if protocol == "das": dasConfig = envConfig["das"] dasQuery = f"file dataset={dbLoc}" logger.debug(f"Querying DAS: '{dasQuery}'") try: entryFiles = [ _dasLFNtoPFN(lfn, dasConfig) for lfn in [ ln.strip() for ln in subprocess.check_output( ["dasgoclient", "-query", dasQuery], stderr=subprocess.STDOUT ).decode().split()] if len(lfn) > 0] except subprocess.CalledProcessError as ex: raise RuntimeError( f"Error from \"{' '.join(ex.cmd)}\": " f"{ex.output.decode().strip()}") from None files += entryFiles if len(entryFiles) == 0: raise RuntimeError(f"No files found with DAS query {dasQuery}") elif protocol == "samadhi": if not _SAMADhi_found: raise RuntimeError( f"SAMADhi could not be found, cannot resolve '{dbEntry}'. " "Please install the SAMADhi library " "(see https://github.com/cp3-llbb/SAMADhi) " "if you want to use the database to locate samples") samaCred = "~/.samadhi" if "SAMADhi" in envConfig and "credentials" in envConfig["SAMADhi"]: samaCred = envConfig["SAMADhi"]["credentials"] with SAMADhiDB(credentials=samaCred): if dbLoc.isnumeric(): descr = f"id {dbLoc}" sample = Sample.get_or_none(Sample.id == int(dbLoc)) else: descr = f"name '{dbLoc}'" sample = Sample.get_or_none(Sample.name == dbLoc) if not sample: raise RuntimeError(f"Could not find sample with {descr} in SAMADhi") entryFiles = [f.pfn for f in sample.files] if len(entryFiles) == 0: raise RuntimeError(f"No files found with SAMADhi {descr}") files += entryFiles else: raise RuntimeError(f"Unsupported protocol in '{dbEntry}': {protocol}") if listfile: with open(listfile, "w") as listF: listF.write(f"# {newCacheID}\n") listF.write("\n".join(files)) elif "files" not in smpCfg: raise RuntimeError(f"Cannot load files for {smpName}: neither 'db' nor 'files' specified") elif listfile: files = fromFile else: # list in yml files = [(fn if os.path.isabs(fn) or urllib.parse.urlparse(fn).scheme != "" else os.path.join(cfgDir, fn)) for fn in smpCfg["files"]] return files
[docs] class YMLIncludeLoader(yaml.SafeLoader): """ Custom yaml loading to support including config files. Use `!include (file)` to insert content of `file` at that position. """ def __init__(self, stream): super().__init__(stream) self._root = os.path.split(stream.name)[0] def include(self, node): filename = os.path.join(self._root, self.construct_scalar(node)) with open(filename) as f: return yaml.load(f, YMLIncludeLoader)
YMLIncludeLoader.add_constructor('!include', YMLIncludeLoader.include) def parseAnalysisConfig(anaCfgName): with open(anaCfgName) as anaCfgF: analysisCfg = yaml.load(anaCfgF, YMLIncludeLoader) return analysisCfg
[docs] def getAFileFromAnySample(samples, resolveFiles=None, cfgDir="."): """ Helper method: get a file from any sample (minimizing the risk of errors) Tries to find any samples with: - a list of files - a cache file - a SAMADhi path - a DAS path If successful, a single read / query is sufficient to retrieve a file """ # list of files -> return 1st for smpNm, smpCfg in samples.items(): if ("files" in smpCfg) and (str(smpCfg["files"]) != smpCfg["files"]): return (smpNm, smpCfg, (smpCfg["files"][0] if (os.path.isabs(smpCfg["files"][0]) or urllib.parse.urlparse(smpCfg["files"][0]).scheme) else os.path.join(cfgDir, smpCfg["files"][0]))) # try to get them from a cache file or database (ordered by less-to-more risky) failed_names = set() for method, condition in [ (" from cache file", ( lambda smpCfg: "files" in smpCfg and isinstance(smpCfg["files"], str))), (" from SAMADhi", ( lambda smpCfg: ("db" in smpCfg and (smpCfg["db"] if isinstance(smpCfg["db"], str) else smpCfg["db"][0]).startswith("samadhi:")))), (" from DAS", ( lambda smpCfg: ("db" in smpCfg and (smpCfg["db"] if isinstance(smpCfg["db"], str) else smpCfg["db"][0]).startswith("das:")))), ("", (lambda smpCfg: True))]: for smpNm, smpCfg in samples.items(): if smpNm not in failed_names and condition(smpCfg): try: files = resolveFiles(smpNm, smpCfg) return smpNm, smpCfg, files[0] except Exception as ex: failed_names.add(smpNm) logger.warning(f"Problem while resolving files for {smpNm}{method}: {ex!s}") raise RuntimeError( "Failed to resolve a file from any sample " "(see the warnings above for more information)")
[docs] def readEnvConfig(explName=None): """ Read computing environment config file (batch system, storage site etc.) For using a batch cluster, the [batch] section should have a 'backend' key, and there should be a section with the name of the backend (slurm, htcondor...), see bamboo.batch_<backend> for details. The storage site information needed to resolve the PFNs for datasets retrieved from DAS should be specified under the [das] section (sitename and storageroot). """ import os from configparser import ConfigParser def readFromFile(name): cfgp = ConfigParser() cfgp.optionxform = str cfgp.read(name) cfg = {sName: dict(cfgp[sName]) for sName in cfgp.sections()} return cfg xdgCfg = os.getenv("XDG_CONFIG_HOME", os.path.join(os.path.expanduser("~"), ".config")) toTry = ["bamboo.ini", "bamboorc", os.path.join(xdgCfg, "bamboorc")] if explName: toTry.insert(0, explName) for iniName in toTry: if os.path.exists(iniName) and os.path.isfile(iniName): try: res = readFromFile(iniName) logger.info(f"Read config from file {iniName}") return res except Exception as ex: logger.warning(f"Problem reading config file {iniName}: {ex}") logger.error( "No valid environment config file found, please copy one from the .ini files " "from the examples directory to ~/.config/bamboorc, or pass the --envConfig option")
_yieldsTexPreface = "\n".join( f"% {ln}" for ln in r"""Yields table generated automatically by bamboo (same as plotIt). Needed packages: \usepackage{booktabs} Use the following if building a CMS document \makeatletter \newcommand{\thickhline}{% \noalign {\ifnum 0=`}\fi \hrule height .08em \futurelet \reserved@a \@xhline } \newcommand{\thinhline}{% \noalign {\ifnum 0=`}\fi \hrule height .05em \futurelet \reserved@a \@xhline } \makeatother \newcommand{\toprule}{\noalign{\vskip0pt}\thickhline\noalign{\vskip.65ex}} \newcommand{\midrule}{\noalign{\vskip.4ex}\thinhline\noalign{\vskip.65ex}} \newcommand{\bottomrule}{\noalign{\vskip.4ex}\thickhline\noalign{\vskip0pt}} """.split("\n")) def _texProcName(procName): if "$" in procName: procName = procName.replace("_", "\\_") if "#" in procName: # assume ROOT LaTeX procName = "${}$".format(procName.replace("#", "\\")) return procName def _makeYieldsTexTable( report, samples, entryPlots, stretch=1.5, orientation="v", align="c", yieldPrecision=1, ratioPrecision=2): if orientation not in ("v", "h"): raise RuntimeError(f"Unsupported table orientation: {orientation} (valid: 'h' and 'v')") import plotit.plotit from plotit.plotit import Stack import numpy as np from itertools import repeat def getHist(smp, plot): try: h = smp.getHist(plot) h.contents # check return h except KeyError: return None def colEntriesFromCFREntryHists(report, entryHists, precision=1, showUncert=True): stacks_t = [] colEntries = [] for entries in report.titles.values(): s_entries = [] for eName in entries: eh = entryHists[eName] if eh is not None: if (not isinstance(eh, Stack)) or eh.entries: s_entries.append(eh) st_t = Stack(entries=s_entries) if s_entries: uncert = (fr" \pm {{:.{precision}f}}".format(np.sqrt(st_t.sumw2 + st_t.syst2)[1]) if showUncert else "") colEntries.append(f"${{0:.{precision:d}f}}{{1}}$".format(st_t.contents[1], uncert)) stacks_t.append(st_t) else: colEntries.append("---") stacks_t.append(None) return stacks_t, colEntries smp_signal = [smp for smp in samples if smp.cfg.type == "SIGNAL"] smp_mc = [smp for smp in samples if smp.cfg.type == "MC"] smp_data = [smp for smp in samples if smp.cfg.type == "DATA"] sepStr = "|l|" smpHdrs = [] titles = list(report.titles.keys()) entries_smp = [] stTotMC, stTotData = None, None if smp_signal: sepStr += "|" for sigSmp in smp_signal: _, colEntries = colEntriesFromCFREntryHists( report, {eName: getHist(sigSmp, p) for eName, p in entryPlots.items()}, precision=yieldPrecision) sepStr += f"{align}|" smpHdrs.append(f"{_texProcName(sigSmp.cfg.yields_group)} {sigSmp.cfg.cross_section:f}pb") entries_smp.append(colEntries) if smp_mc: sepStr += "|" for mcSmp in smp_mc: stTotMC, colEntries = colEntriesFromCFREntryHists( report, {eName: getHist(mcSmp, p) for eName, p in entryPlots.items()}, precision=yieldPrecision) sepStr += f"{align}|" if isinstance(mcSmp, plotit.plotit.Group): smpHdrs.append(_texProcName(mcSmp.name)) else: smpHdrs.append(_texProcName(mcSmp.cfg.yields_group)) entries_smp.append(colEntries) if len(smp_mc) > 1: sepStr += f"|{align}|" smpHdrs.append("Tot. MC") stTotMC, colEntries = colEntriesFromCFREntryHists( report, {eName: Stack(entries=[h for h in (getHist(smp, p) for smp in smp_mc) if h]) for eName, p in entryPlots.items()}, precision=yieldPrecision) entries_smp.append(colEntries) if smp_data: sepStr += f"|{align}|" smpHdrs.append("Data") stTotData, colEntries = colEntriesFromCFREntryHists( report, {eName: Stack(entries=[h for h in (getHist(smp, p) for smp in smp_data) if h]) for eName, p in entryPlots.items()}, precision=0, showUncert=False) entries_smp.append(colEntries) if smp_data and smp_mc: sepStr += f"|{align}|" smpHdrs.append("Data/MC") colEntries = [] import numpy.ma as ma for stData, stMC in zip(stTotData, stTotMC): if stData is not None and stMC is not None: dtCont = stData.contents mcCont = ma.array(stMC.contents) ratio = dtCont / mcCont ratioErr = ( np.sqrt(mcCont**2 * stData.sumw2 + dtCont**2 * (stMC.sumw2 + stMC.syst2)) / mcCont**2) if mcCont[1] != 0.: colEntries.append(r"${{0:.{0}f}} \pm {{1:.{0}f}}$".format( ratioPrecision).format(ratio[1], ratioErr[1])) else: colEntries.append("---") else: colEntries.append("---") entries_smp.append(colEntries) c_bySmp = entries_smp c_byHdr = [[smpEntries[i] for smpEntries in entries_smp] for i in range(len(titles))] if orientation == "v": rowHdrs = titles # selections colHdrs = ["Cat."] + smpHdrs # samples c_byRow = c_byHdr c_byCol = c_bySmp else: # horizontal sepStr = "|l|{}|".format("|".join(repeat(align, len(titles)))) rowHdrs = smpHdrs # samples colHdrs = ["Cat."] + titles # selections c_byRow = c_bySmp c_byCol = c_byHdr if entries_smp: colWidths = ([max(len(rh) for rh in rowHdrs) + 1] + [max(len(hdr), max(len(c) for c in col)) + 1 for hdr, col in zip(colHdrs[1:], c_byCol)]) return "\n".join([ f"\\renewcommand{{\\arraystretch}}{{{stretch}}}", f"\\begin{{tabular}}{{ {sepStr} }}", " \\hline", " {} \\\\".format(" & ".join(h.ljust(cw) for cw, h in zip(colWidths, colHdrs))), " \\hline"] + [ " {} \\\\".format(" & ".join(en.rjust(cw) for cw, en in zip(colWidths, [rh] + rowEntries))) for rh, rowEntries in zip(rowHdrs, c_byRow) ] + [ " \\hline", "\\end{tabular}" ])
[docs] def printCutFlowReports( config, reportList, workdir=".", resultsdir=".", suffix=None, readCounters=lambda f: -1., eras=("all", None), verbose=False): """ Print yields to the log file, and write a LaTeX yields table for each Samples can be grouped (only for the LaTeX table) by specifying the ``yields-group`` key (overriding the regular ``groups`` used for plots). The sample (or group) name to use in this table should be specified through the ``yields-title`` sample key. In addition, the following options in the ``plotIt`` section of the YAML configuration file influence the layout of the LaTeX yields table: - ``yields-table-stretch``: ``\\arraystretch`` value, 1.15 by default - ``yields-table-align``: orientation, ``h`` (default), samples in rows, or ``v``, samples in columns - ``yields-table-text-align``: alignment of text in table cells (default: ``c``) - ``yields-table-numerical-precision-yields``: number of digits after the decimal point for yields (default: 1) - ``yields-table-numerical-precision-ratio``: number of digits after the decimal point for ratios (default: 2) """ eraMode, eras = eras if not eras: # from config if not specified eras = list(config["eras"].keys()) # helper: print one bamboo.plots.CutFlowReport.Entry def printEntry(entry, printFun=logger.info, recursive=True, genEvents=None): if entry.nominal is not None: effMsg = "" if entry.parent: sumPass = entry.nominal.GetBinContent(1) sumTotal = (entry.parent.nominal.GetBinContent(1) if entry.parent.nominal is not None else 0.) if sumTotal != 0.: effMsg = f", Eff={sumPass/sumTotal:.2%}" if genEvents: effMsg += f", TotalEff={sumPass/genEvents:.2%}" printFun( f"Selection {entry.name}: N={entry.nominal.GetEntries()}, " f"SumW={entry.nominal.GetBinContent(1)}{effMsg}") if recursive: for c in entry.children: printEntry(c, printFun=printFun, recursive=recursive, genEvents=genEvents) # retrieve results files, get generated events for each sample from .root import gbl resultsFiles = dict() generated_events = dict() for smp, smpCfg in config["samples"].items(): if "era" not in smpCfg or smpCfg["era"] in eras: resF = gbl.TFile.Open(os.path.join(resultsdir, f"{smp}.root")) resultsFiles[smp] = resF genEvts = None if "generated-events" in smpCfg: if isinstance(smpCfg["generated-events"], str): genEvts = readCounters(resF)[smpCfg["generated-events"]] else: genEvts = smpCfg["generated-events"] generated_events[smp] = genEvts has_plotit = None try: import plotit.plotit # noqa: F401 has_plotit = True except ImportError: has_plotit = False from bamboo.plots import EquidistantBinning as EqB class YieldPlot: def __init__(self, name): self.name = name self.plotopts = dict() self.axisTitles = ("Yield",) self.binnings = [EqB(1, 0., 1.)] for report in reportList: smpReports = {smp: report.readFromResults(resF) for smp, resF in resultsFiles.items()} # debug print for smp, smpRep in smpReports.items(): if smpRep.printInLog: logger.info(f"Cutflow report {report.name} for sample {smp}") for root in smpRep.rootEntries(): printEntry(root, genEvents=generated_events[smp]) # save yields.tex (if needed) if any(len(cb) > 1 or tt != cb[0] for tt, cb in report.titles.items()): if not has_plotit: logger.error(f"Could not load plotit python library, no TeX yields tables for {report.name}") else: yield_plots = [YieldPlot(f"{report.name}_{eName}") for tEntries in report.titles.values() for eName in tEntries] out_eras = [] if len(eras) > 1 and eraMode in ("all", "combined"): nParts = [report.name] if suffix: nParts.append(suffix) out_eras.append(("{}.tex".format("_".join(nParts)), eras)) if len(eras) == 1 or eraMode in ("split", "all"): for era in eras: nParts = [report.name] if suffix: nParts.append(suffix) nParts.append(era) out_eras.append(("{}.tex".format("_".join(nParts)), [era])) for outName, iEras in out_eras: pConfig, samples, plots, _, _ = loadPlotIt( config, yield_plots, eras=iEras, workdir=workdir, resultsdir=resultsdir, readCounters=readCounters) tabBlock = _makeYieldsTexTable( report, samples, {p.name[len(report.name) + 1:]: p for p in plots}, stretch=pConfig.yields_table_stretch, orientation=pConfig.yields_table_align, align=pConfig.yields_table_text_align, yieldPrecision=pConfig.yields_table_numerical_precision_yields, ratioPrecision=pConfig.yields_table_numerical_precision_ratio) if tabBlock: with open(os.path.join(workdir, outName), "w") as ytf: ytf.write("\n".join((_yieldsTexPreface, tabBlock))) logger.info( "Yields table for era(s) {} was written to {}".format( ",".join(iEras), os.path.join(workdir, outName))) else: logger.warning(f"No samples for era(s) {','.join(iEras)}, so no yields.tex")
def plotIt_files(samplesDict, resultsdir=".", eras=None, readCounters=lambda f: -1., vetoAttributes=None): files = dict() for smpName, smpCfg in samplesDict.items(): if smpCfg.get("era") in eras: resultsName = f"{smpName}.root" smpOpts = (dict(smpCfg) if vetoAttributes is None else {k: v for k, v in smpCfg.items() if k not in vetoAttributes}) isMC = (smpCfg.get("group") != "data") if "type" not in smpOpts: smpOpts["type"] = ("mc" if isMC else "data") if isMC: if "cross-section" not in smpCfg: logger.warning(f"Sample {smpName} is of type MC, but no cross-section specified") smpOpts["cross-section"] = smpCfg.get("cross-section", 1.) from .root import gbl resultsFile = gbl.TFile.Open(os.path.join(resultsdir, resultsName)) if "generated-events" not in smpCfg: logger.error( f"No key 'generated-events' found for MC sample {smpName}, " "normalization will be wrong") elif isinstance(smpCfg["generated-events"], str): counters = readCounters(resultsFile) if smpCfg["generated-events"] not in counters: raise RuntimeError( f"Counter with name \"{smpCfg['generated-events']}\" not found " f"for sample {smpName}") smpOpts["generated-events"] = counters[smpCfg["generated-events"]] else: smpOpts["generated-events"] = smpCfg["generated-events"] files[resultsName] = smpOpts return files plotit_plotdefaults = { "x-axis": lambda p: f"{p.axisTitles[0]}", "x-axis-range": lambda p: [p.binnings[0].minimum, p.binnings[0].maximum] } def plotIt_plots(plotList, plotDefaults=None): plotit_plots = dict() for plot in plotList: plotOpts = dict(plotit_plotdefaults) if plotDefaults is not None: plotOpts.update(plotDefaults) plotOpts.update(plot.plotopts) plotOpts = {k: (v(plot) if callable(v) else v) for k, v in plotOpts.items()} plotit_plots[plot.name] = plotOpts return plotit_plots def plotIt_config(config, root=".", eras=None): plotitCfg = copy.deepcopy(config.get("plotIt", {"configuration": {}})) plotitCfg["configuration"].update({ "root": root, "eras": eras, "luminosity": {era: config["eras"][era]["luminosity"] for era in eras} }) return plotitCfg def savePlotItConfig(cfgName, plotitCfg, filesCfg, plotsCfg): fullCfg = copy.deepcopy(plotitCfg) fullCfg["files"] = filesCfg fullCfg["plots"] = plotsCfg with open(cfgName, "w") as plotitFile: yaml.dump(fullCfg, plotitFile) def _plotIt_configFilesAndPlots( config, plotList, eras=None, workdir=".", resultsdir=".", readCounters=lambda f: -1., vetoFileAttributes=None, plotDefaults=None): # helper method, to avoid overlap between writePlotIt and loadPlotIt # base: copy from plotIt block, add root and translate eras/lumi plotitCfg = plotIt_config(config, root=os.path.relpath(resultsdir, workdir), eras=eras) # samples -> files: read sum of weights from results filesCfg = plotIt_files( config["samples"], resultsdir=resultsdir, eras=eras, readCounters=readCounters, vetoAttributes=vetoFileAttributes) # plots: add default style options plotDefaults_cmb = plotitCfg.get("plotdefaults", {}) if plotDefaults: plotDefaults_cmb.update(plotDefaults) plotsCfg = plotIt_plots(plotList, plotDefaults=plotDefaults_cmb) return plotitCfg, filesCfg, plotsCfg
[docs] def writePlotIt( config, plotList, outName, eras=None, workdir=".", resultsdir=".", readCounters=lambda f: -1., vetoFileAttributes=None, plotDefaults=None): """ Combine creation and saving of a plotIt config file for convenience inside a :py:class:`~bamboo.analysismodules.HistogramsModule`, the individual parts are also available in :py:mod:`bamboo.analysisutils`. :param config: parsed analysis configuration. Only the ``configuration`` (if present) and ``eras`` sections (to get the luminosities) are read. :param plotList: list of plots to convert (``name`` and ``plotopts``, combined with the default style) :param outName: output YAML config file name :param eras: valid era list :param workdir: output directory :param resultsdir: directory with output ROOT files with histograms :param readCounters: method to read the sum of event weights from an output file :param vetoFileAttributes: list of per-sample keys that should be ignored (those specific to the bamboo part, e.g. job splitting and DAS paths) :param plotDefaults: plot defaults to add (added to those from ``config["plotIt"]["plotdefaults"]``, with higher precedence if present in both) """ if eras is None: eras = list(config["eras"].keys()) plotitCfg, filesCfg, plotsCfg = _plotIt_configFilesAndPlots( config, plotList, eras=eras, workdir=workdir, resultsdir=resultsdir, readCounters=readCounters, vetoFileAttributes=vetoFileAttributes, plotDefaults=plotDefaults) # write savePlotItConfig(outName, plotitCfg, filesCfg, plotsCfg)
[docs] def loadPlotIt( config, plotList, eras=None, workdir=".", resultsdir=".", readCounters=lambda f: -1., vetoFileAttributes=None, plotDefaults=None): """ Load the plotit configuration with the plotIt python library The plotIt YAML file writing and parsing is skipped in this case (to write the file, the :py:func:`~bamboo.analysisutils.writePlotIt` method should be used, with the same arguments). :param config: parsed analysis configuration. Only the ``configuration`` (if present) and ``eras`` sections (to get the luminosities) are read. :param plotList: list of plots to convert (``name`` and ``plotopts``, combined with the default style) :param eras: list of eras to consider (``None`` for all that are in the config) :param workdir: output directory :param resultsdir: directory with output ROOT files with histograms :param readCounters: method to read the sum of event weights from an output file :param vetoFileAttributes: list of per-sample keys that should be ignored (those specific to the bamboo part, e.g. job splitting and DAS paths) :param plotDefaults: plot defaults to add (added to those from ``config["plotIt"]["plotdefaults"]``, with higher precedence if present in both) """ if eras is None: eras = list(config["eras"].keys()) try: from plotit.config import (loadConfiguration, loadFiles, loadGroups, loadPlots, loadSystematics, loadLegend) from plotit.plotit import resolveFiles, samplesFromFilesAndGroups except ImportError as ex: raise RuntimeError(f"Could not load plotit python library ({ex!r})") # generate dictionaries plotitCfg, filesCfg, plotsCfg = _plotIt_configFilesAndPlots( config, plotList, eras=eras, workdir=workdir, resultsdir=resultsdir, readCounters=readCounters, vetoFileAttributes=vetoFileAttributes, plotDefaults=plotDefaults) # parse in (py)plotit format configuration = loadConfiguration(plotitCfg["configuration"]) cFiles = loadFiles(filesCfg) cGroups = loadGroups(plotitCfg.get("groups"), files=cFiles) plots = loadPlots(plotsCfg) systematics = loadSystematics(plotitCfg.get("systematics"), configuration=configuration) legend = loadLegend(plotitCfg.get("legend")) # resolve, select, group, and sort the files -> samples files = resolveFiles(cFiles, configuration, systematics=systematics, histodir=workdir) samples = samplesFromFilesAndGroups(files, cGroups, eras=eras) return configuration, samples, plots, systematics, legend
[docs] def runPlotIt(cfgName, workdir=".", plotsdir="plots", plotIt="plotIt", eras=("all", None), verbose=False): """ Run plotIt :param cfgName: plotIt YAML config file name :param workdir: working directory (also the starting point for finding the histograms files, ``--i`` option) :param plotsdir: name of the plots directory inside workdir (``plots``, by default) :param plotIt: path of the ``plotIt`` executable :param eras: ``(mode, eras)``, mode being one of ``"split"``, ``"combined"``, or ``"all"`` (both of the former), and eras a list of era names, or ``None`` for all :param verbose: print the plotIt command being run """ eraMode, eras = eras out_extraOpts = [] if len(eras) > 1 and eraMode in ("all", "combined"): out_extraOpts.append((os.path.join(workdir, plotsdir), [])) if len(eras) == 1 or eraMode in ("split", "all"): for era in eras: out_extraOpts.append((os.path.join(workdir, f"{plotsdir}_{era}"), ["-e", era])) for plotsdir, extraOpts in out_extraOpts: if os.path.exists(plotsdir): logger.warning(f"Directory '{plotsdir}' already exists, previous plots will be overwritten") else: os.makedirs(plotsdir) try: plotItLog = os.path.join(plotsdir, "out.log") plotItArgs = [plotIt, "-i", workdir, "-o", plotsdir, "-y"] + extraOpts + [cfgName] if verbose: logger.debug("Running command `{}`, with logfile {}".format(" ".join(plotItArgs), plotItLog)) with open(plotItLog, "w") as logFile: subprocess.check_call(plotItArgs, stdout=logFile) logger.info(f"plotIt output is available in {plotsdir}") except subprocess.CalledProcessError as ex: logger.error( "Command '{}' failed with exit code {}\n{}".format( " ".join(ex.cmd), ex.returncode, ex.output))
def _getJESUncertaintySources( sources, jecDBCache, jec, jetType, regroupTag="", uncertaintiesFallbackJetType=None, session=None): if sources == "Merged": try: plf = jecDBCache.getPayload( jec, "UncertaintySources", jetType, prefix=f"Regrouped{regroupTag}_", session=session) except ValueError as ex: if uncertaintiesFallbackJetType is not None: plf = jecDBCache.getPayload( jec, "UncertaintySources", uncertaintiesFallbackJetType, prefix=f"Regrouped{regroupTag}_", session=session) logger.info(f"{ex!s}, falling back to {uncertaintiesFallbackJetType} uncertainties ({plf})") else: raise ex else: plf = jecDBCache.getPayload(jec, "UncertaintySources", jetType, session=session) if sources in ("All", "Merged"): readSources = [] with open(plf) as jesUncF: for ln in jesUncF: ln = ln.strip() if ln.startswith("[") and ln.endswith("]"): readSources.append(ln[1:-1]) logger.debug(f"Found {len(readSources):d} JES uncertainty sources in {plf}: {', '.join(readSources)}") sources = readSources return plf, sources def _filterCalcSystematics(variProxy, calcHandle, enableSystematics=None, default=True, name=""): if enableSystematics is None: if default: def enableSystematics(x): return True else: def enableSystematics(x): return False elif (not callable(enableSystematics)) and hasattr(enableSystematics, "__contains__"): enableSystematics = enableSystematics.__contains__ all_enabled = set() all_avail = set() for attN, opWithSyst in variProxy.brMapMap[variProxy.withSystName].items(): # variations provided by this calculator calc_vars = variProxy.availableVariations(attN, calcHandle) # variations registered with this collection that are provided by others... other_vars = set(opWithSyst.variations).difference(calc_vars) # ...and keep those enable = set(other_vars) all_avail.update(opWithSyst.variations) for vari in calc_vars: if vari != "nominal": all_avail.add(vari) vari_c = vari if vari.endswith("up"): vari_c = vari[:-2] elif vari.endswith("down"): vari_c = vari[:-4] if enableSystematics(vari) or enableSystematics(vari_c): enable.add(vari) if enable or opWithSyst.variations: opWithSyst.variations = tuple(enable) all_enabled.update(enable) if all_enabled: logger.debug("Enabled systematic variations for {}: {}".format( name, " ".join(all_enabled))) if len(all_enabled) != len(all_avail): logger.debug("Disabled systematic variations for {}: {}".format( name, " ".join(all_avail.difference(all_enabled)))) # jet mass scale # W-tagging PUPPI softdrop JMS values: https://twiki.cern.ch/twiki/bin/view/CMS/JetWtagging # 2016 values fatjet_jmsValues = { "2016": [1.00, 1.0094, 0.9906], # nominal, up, down "2017": [0.982, 0.986, 0.978], # Use 2017 values for 2018 until 2018 are released "2018": [0.982, 0.986, 0.978], } # jet mass resolution: https://twiki.cern.ch/twiki/bin/view/CMS/JetWtagging fatjet_jmrValues = { # nominal, up, down "2016": [1.0, 1.2, 0.8], "2017": [1.09, 1.14, 1.04], # Use 2017 values for 2018 until 2018 are released "2018": [1.09, 1.14, 1.04], } # JMS&JMR SD corr in tau21DDT region: https://twiki.cern.ch/twiki/bin/viewauth/CMS/JetWtagging#tau21DDT_0_43 fatjet_gmsValues_tau21DDT = { "2016": [1.014, 1.021, 1.007], "2017": [0.983, 0.99, 0.976], "2018": [1.000, 1.010, 0.990] # tau21DDT < 0.43 WP } fatjet_gmrValues_tau21DDT = { "2016": [1.086, 1.176, 0.996], "2017": [1.080, 1.161, 0.999], "2018": [1.124, 1.208, 1.040] } # PUPPI scale and resolutions fatjet_puppi_msd_params = { "gen": "1.0062610283313527+(-1.061605139842829*((x*0.07999000770091785)^-1.2045376937033758))", "reco_cen": (1.0930197734452352, -0.00015006788774298745, 3.4486611503791434e-07, -2.681003093847412e-10, 8.674402290776817e-14, -1.0011358570698617e-17), "reco_fwd": (1.2721151537214315, -0.0005716403627542301, 8.372894123074334e-07, -5.204332049858346e-10, 1.4537520981877012e-13, -1.5038869243803616e-17), "resol_cen": (1.092735080341856, 4.142622682579229e-05, -1.3736805733597026e-07, 1.2295818250625584e-10, -4.197075395161288e-14, 4.923792745086088e-18), "resol_fwd": (1.1649278339374347, -0.00012678902807057208, 1.0594037344842974e-07, 6.072087019460118e-12, -1.992427482862693e-14, 3.644006510237158e-18) }
[docs] def configureJets( variProxy, jetType, jec=None, jecLevels="default", smear=None, useGenMatch=True, genMatchDR=0.2, genMatchDPt=3., jesUncertaintySources=None, regroupTag="", uncertaintiesFallbackJetType=None, splitJER=False, addHEM2018Issue=False, enableSystematics=None, subjets=None, mcYearForFatJets=None, isTau21DDT=False, jms=None, jmr=None, gms=None, gmr=None, cachedir=None, mayWriteCache=False, isMC=False, backend=None, uName=""): """ Reapply JEC, set up jet smearing, or prepare JER/JES uncertainties collections :param variProxy: jet variations proxy, e.g. ``tree._Jet`` :param jetType: jet type, e.g. AK4PFchs :param smear: tag of resolution (and scalefactors) to use for smearing (no smearing is done if unspecified) :param jec: tag of the new JEC to apply, or for the JES uncertainties (pass an empty list to jecLevels to produce only the latter without reapplying the JEC) :param jecLevels: list of JEC levels to apply (if left out the recommendations are used: L1FastJet, L2Relative, L3Absolute, and also L2L3Residual for data) :param jesUncertaintySources: list of jet energy scale uncertainty sources (see the `JECUncertaintySources twiki <https://twiki.cern.ch/twiki/bin/viewauth/CMS/JECUncertaintySources>`_), ``"All"``, or ``"Merged"``, for all regrouped uncertainties (in which case ``regroupTag`` can be specified) :param regroupTag: version of the regrouped uncertainties to use :param uncertaintiesFallbackJetType: jet type from which to use the (regrouped) JES uncertainties if those for jetType are not found (e.g. AK4PFchs, see `JME HN <https://hypernews.cern.ch/HyperNews/CMS/get/jes/988/1.html>`_) :param enableSystematics: filter systematics variations to enable (collection of names or callable that takes the variation name; default: all that are available for MC, none for data) :param useGenMatch: use matching to generator-level jets for resolution smearing :param genMatchDR: DeltaR for generator-level jet matching (half the cone size is recommended, default is 0.2) :param genMatchDPt: maximal relative PT difference (in units of the resolution) between reco and gen jet :param splitJER: vary the JER uncertainty independently in six kinematic bins (see the `JER uncertainty twiki <https://twiki.cern.ch/twiki/bin/view/CMS/JetResolution#Run2_JER_uncertainty_correlation>`_) :param addHEM2018Issue: add a JES uncertainty for the HEM issue in 2018 (see `this hypernews post <https://hypernews.cern.ch/HyperNews/CMS/get/JetMET/2000.html>`_) :param subjets: subjets proxy (``tree.SubJet``) :param mcYearForFatJets: data-taking year for fat jet parameters ((softdrop) mass scale and resolution, these should not be passed for data). They can also be passed explicitly, see the following parameters. If none are passed, no jet mass scale corrections are applied. :param isTau21DDT: if used in combinaton with mcYearForFatJets, will use different values for the softdrop mass. Warning: differently from nanoAOD-Tools, these will be propagated to the JEC uncertainties, and this combination of settings has not been validated. Please check carefully if you need to use this. :param jms: jet mass scale correction (nominal, up, down), for fat jets :param jmr: jet mass resolution (nominal, up, down), for fat jets :param gms: jet groomed mass scale correction (nominal, up, down), for fat jets, same as ``jms`` by default :param gmr: jet groomed mass resolution (nominal, up, down), for fat jets, same as ``jmr`` by default :param cachedir: alternative root directory to use for the txt files cache, instead of ``$XDG_CACHE_HOME/bamboo`` (usually ``~/.cache/bamboo``) :param mayWriteCache: flag to indicate if this task is allowed to write to the cache status file (set to False for worker tasks to avoid corruption due to concurrent writes) :param isMC: MC or not :param backend: backend pointer (returned from :py:meth:`~bamboo.analysismodules.HistogramsModule.prepareTree`) :param uName: [deprecated, ignored] unique name for the correction calculator (sample name is a safe choice) """ if uName != "": warnings.warn("Passing uName to configureJets" "is not necessary anymore (and ignored), please update", DeprecationWarning, stacklevel=1) from . import treefunctions as op # first to load default headers/libs, if still needed from .treefunctions import _to, _tp # treeoperations and treeproxies from itertools import repeat isFat = jetType.startswith("AK8") aJet = variProxy.orig[0] if isFat and subjets is None: evt = variProxy._parent subjets = evt.SubJet args = [getattr(aJet, comp).op.arg for comp in ("pt", "eta", "phi", "mass", "rawFactor", "area")] if isFat: args.append(aJet.msoftdrop.op.arg) args += [getattr(aJet, f"subJet{iSJ:d}").idx.op.arg for iSJ in range(1, 3)] args += [getattr(subjets[0], comp).op.arg for comp in ("pt", "eta", "phi", "mass")] args.append(aJet.jetId.op.arg) evt = variProxy._parent if hasattr(evt, "fixedGridRhoFastjetAll"): args.append(_to.GetColumn("Float_t", "fixedGridRhoFastjetAll")) elif hasattr(evt, "Rho_fixedGridRhoFastjetAll"): args.append(_to.GetColumn("Float_t", "Rho_fixedGridRhoFastjetAll")) else: raise RuntimeError("Cannot find fixedGridRhoFastjetAll or Rho_fixedGridRhoFastjetAll") if isMC: if not isFat: args.append(aJet.partonFlavour.op.arg) evt = variProxy._parent args.append((evt.run << 20) + (evt.luminosityBlock << 10) + evt.event + 1 + op.static_cast("unsigned", op.switch(op.rng_len(variProxy.orig) != 0, variProxy.orig[0].eta / .01, op.c_float(0.)))) aGJet = evt.GenJet[0] if isFat: aGJet = evt.GenJetAK8[0] args += [getattr(aGJet, comp).op.arg for comp in ("pt", "eta", "phi", "mass")] if isFat: aGSJet = evt.SubGenJetAK8[0] args += [getattr(aGSJet, comp).op.arg for comp in ("pt", "eta", "phi", "mass")] else: if not isFat: args.append(_to.ExtVar("ROOT::VecOps::RVec<int>", "ROOT::VecOps::RVec<int>{}")) args.append(_tp.makeConst(0, "unsigned")) # no seed args += list(repeat(_to.ExtVar( "ROOT::VecOps::RVec<float>", "ROOT::VecOps::RVec<float>{}"), (8 if isFat else 4))) # fill configuration from CMSJMECalculators import config as calcConfigs config = (calcConfigs.JetVariations() if not isFat else calcConfigs.FatJetVariations()) if smear is not None or jec is not None: from CMSJMECalculators.jetdatabasecache import JetDatabaseCache, sessionWithResponseChecks with sessionWithResponseChecks() as session: if smear is not None: jrDBCache = JetDatabaseCache( "JRDatabase", repository="cms-jet/JRDatabase", cachedir=cachedir, mayWrite=mayWriteCache, session=session) config.ptResolution = jrDBCache.getPayload(smear, "PtResolution", jetType, session=session) config.ptResolutionSF = jrDBCache.getPayload(smear, "SF", jetType, session=session) config.splitJER = splitJER config.useGenMatch = useGenMatch config.genMatchDR = genMatchDR config.genMatchDPt = genMatchDPt if isFat: if jmr is not None or mcYearForFatJets is not None: if jmr is None: jmr = fatjet_jmrValues[mcYearForFatJets] config.jmr = jmr if gmr is None: if isTau21DDT: gmr = fatjet_gmrValues_tau21DDT[mcYearForFatJets] else: gmr = jmr config.gmr = gmr elif gmr is not None: # groomed only, explicitly passed config.gmr = gmr if jec is not None: if jecLevels == "default": # "L3Absolute" left out because it is dummy according to # https://twiki.cern.ch/twiki/bin/view/CMS/IntroToJEC#Mandatory_Jet_Energy_Corrections if jec.endswith("_DATA"): jecLevels = ["L1FastJet", "L2Relative", "L2L3Residual"] elif jec.endswith("_MC"): # "L2L3Residual" could be added, but it is dummy for MC according to # https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookJetEnergyCorrections#JetCorApplication # noqa: B950 jecLevels = ["L1FastJet", "L2Relative"] else: raise ValueError( f"JEC tag {jec} does not end with '_DATA' or '_MC', " "so the levels cannot be guessed. " "Please specify the JEC levels explicitly") jecDBCache = JetDatabaseCache( "JECDatabase", repository="cms-jet/JECDatabase", cachedir=cachedir, mayWrite=mayWriteCache, session=session) if jecLevels: config.jec = [ jecDBCache.getPayload(jec, jLev, jetType, session=session) for jLev in jecLevels] config.addHEM2018Issue = addHEM2018Issue if jesUncertaintySources: plf, jesUncertaintySources = _getJESUncertaintySources( jesUncertaintySources, jecDBCache, jec, jetType, uncertaintiesFallbackJetType=uncertaintiesFallbackJetType, regroupTag=regroupTag, session=session) config.jesUncertainties = [(src, plf) for src in jesUncertaintySources] if jec is None: if jecLevels and jecLevels != "default": logger.error("JEC levels specified, but no JEC tag; no correction will be done") if jesUncertaintySources: logger.error("JES uncertainty specified, but no JEC tag; none will be evaluated") if isFat: if jms is not None or mcYearForFatJets is not None: if jms is not None: if not hasattr(jms, "__iter__"): jms = (jms, 1., 1.) else: jms = fatjet_jmsValues[mcYearForFatJets] config.jms = jms if gms is not None: if not hasattr(gms, "__iter__"): gms = (gms, 1., 1.) else: if isTau21DDT: gms = fatjet_gmsValues_tau21DDT[mcYearForFatJets] else: gms = jms config.gms = gms elif gms is not None: # groomed only, explicitly passed config.gms = gms # PUPPI config.puppiGenFormula = fatjet_puppi_msd_params["gen"] config.puppi_reco_cen = fatjet_puppi_msd_params["reco_cen"] config.puppi_reco_for = fatjet_puppi_msd_params["reco_fwd"] config.puppi_resol_cen = fatjet_puppi_msd_params["resol_cen"] config.puppi_resol_for = fatjet_puppi_msd_params["resol_fwd"] # define calculator and initialize from .root import loadJMESystematicsCalculators, gbl loadJMESystematicsCalculators(backend=backend) jetcalcName = backend.symbol(f"const auto <<name>> = {config.cppConstruct};") calcHandle = getattr(gbl, jetcalcName) variProxy.addCalculator(op.extVar(config.calcClass, jetcalcName), calcHandle=calcHandle, args=args) _filterCalcSystematics(variProxy, calcHandle, enableSystematics=enableSystematics, default=isMC, name="Jet")
[docs] def configureType1MET( variProxy, jec=None, smear=None, isT1Smear=False, useGenMatch=True, genMatchDR=0.2, genMatchDPt=3., jesUncertaintySources=None, regroupTag="", splitJER=False, addHEM2018Issue=False, enableSystematics=None, cachedir=None, mayWriteCache=False, isMC=False, backend=None, uName=""): """ Reapply JEC, set up jet smearing, or prepare JER/JES uncertainties collections :param variProxy: MET variations proxy, e.g. ``tree._MET`` :param smear: tag of resolution (and scalefactors) to use for smearing (no smearing is done if unspecified) :param isT1Smear: T1Smear (smeared as nominal, all variations with respect to that) if True, otherwise T1 (JES variations with respect to the unsmeared MET, jerup and jerdown variations are nominally smeared) :param jec: tag of the new JEC to apply, or for the JES uncertainties :param jesUncertaintySources: list of jet energy scale uncertainty sources (see the `JECUncertaintySources twiki <https://twiki.cern.ch/twiki/bin/viewauth/CMS/JECUncertaintySources>`_), ``"All"``, or ``"Merged"``, for all regrouped uncertainties (in which case ``regroupTag`` can be specified) :param regroupTag: version of the regrouped uncertainties to use :param enableSystematics: filter systematics variations to enable (collection of names or callable that takes the variation name; default: all that are available for MC, none for data) :param useGenMatch: use matching to generator-level jets for resolution smearing :param genMatchDR: DeltaR for generator-level jet matching (half the cone size is recommended, default is 0.2) :param genMatchDPt: maximal relative PT difference (in units of the resolution) between reco and gen jet :param splitJER: vary the JER uncertainty independently in six kinematic bins (see the `JER uncertainty twiki <https://twiki.cern.ch/twiki/bin/view/CMS/JetResolution#Run2_JER_uncertainty_correlation>`_) :param addHEM2018Issue: add a JES uncertainty for the HEM issue in 2018 (see `this hypernews post <https://hypernews.cern.ch/HyperNews/CMS/get/JetMET/2000.html>`_) :param cachedir: alternative root directory to use for the txt files cache, instead of ``$XDG_CACHE_HOME/bamboo`` (usually ``~/.cache/bamboo``) :param mayWriteCache: flag to indicate if this task is allowed to write to the cache status file (set to False for worker tasks to avoid corruption due to concurrent writes) :param be: backend pointer :param uName: [deprecated, ignored] unique name for the correction calculator (sample name is a safe choice) :param isMC: MC or not """ if uName != "": warnings.warn("Passing uName to configureType1MET" "is not necessary anymore (and ignored), please update", DeprecationWarning, stacklevel=1) isFixEE2017 = variProxy.orig.pt.op.name.startswith("METFixEE2017_") isPuppi = ("Puppi" in variProxy.orig.pt.op.name) from . import treefunctions as op # first to load default headers/libs, if still needed from .treefunctions import _to, _tp # treeoperations and treeproxies from itertools import repeat evt = variProxy._parent jets = evt._Jet.orig if hasattr(evt, "_Jet") else evt.Jet[0] args = [getattr(jets[0], comp).op.arg for comp in ( "pt", "eta", "phi", "mass", "rawFactor", "area", "muonSubtrFactor", "neEmEF", "chEmEF")] if addHEM2018Issue: if isFixEE2017: raise RuntimeError( "You can either use the EE 2017 fix, or the 2018 HEM issue variation, " "but not both at a time, for the MET") args.append(jets[0].jetId.op.arg) else: if not isFixEE2017: args.append(_to.ExtVar("ROOT::VecOps::RVec<int>", "ROOT::VecOps::RVec<int>{}")) if hasattr(evt, "fixedGridRhoFastjetAll"): args.append(_to.GetColumn("Float_t", "fixedGridRhoFastjetAll")) elif hasattr(evt, "Rho_fixedGridRhoFastjetAll"): args.append(_to.GetColumn("Float_t", "Rho_fixedGridRhoFastjetAll")) else: raise RuntimeError("Cannot find fixedGridRhoFastjetAll or Rho_fixedGridRhoFastjetAll") evt = variProxy._parent if isMC: args.append(jets[0].partonFlavour.op.arg) args.append((evt.run << 20) + (evt.luminosityBlock << 10) + evt.event + 1 + op.static_cast( "unsigned", op.switch(op.rng_len(jets) != 0, jets[0].eta / .01, op.c_float(0.)))) aGJet = evt.GenJet[0] args += [getattr(aGJet, comp).op.arg for comp in ("pt", "eta", "phi", "mass")] else: args.append(_to.ExtVar("ROOT::VecOps::RVec<int>", "ROOT::VecOps::RVec<int>{}")) args.append(_tp.makeConst(0, "unsigned")) # no seed args += list(repeat(_to.ExtVar("ROOT::VecOps::RVec<float>", "ROOT::VecOps::RVec<float>{}"), 4)) if isPuppi: args += [evt.RawPuppiMET.phi, evt.RawPuppiMET.pt] else: args += [evt.RawMET.phi, evt.RawMET.pt] args += [variProxy.orig.MetUnclustEnUpDeltaX, variProxy.orig.MetUnclustEnUpDeltaY] aT1Jet = evt.CorrT1METJet[0] args += [getattr(aT1Jet, comp).op.arg for comp in ( "rawPt", "eta", "phi", "area", "muonSubtrFactor")] args += [getattr(aT1Jet, comp).op.arg if hasattr(aT1Jet, comp) else _to.ExtVar("ROOT::VecOps::RVec<float>", "ROOT::VecOps::RVec<float>{}") for comp in ("neEmEF", "chEmEF")] if isFixEE2017: args += [evt.MET.phi, evt.MET.pt, variProxy.orig.phi, variProxy.orig.pt] # fill configuration from CMSJMECalculators import config as calcConfigs config = (calcConfigs.METVariations() if not isFixEE2017 else calcConfigs.FixEE2017METVariations()) jetType = "AK4PFchs" if smear is not None or jec is not None: from CMSJMECalculators.jetdatabasecache import JetDatabaseCache, sessionWithResponseChecks with sessionWithResponseChecks() as session: if smear is not None: jrDBCache = JetDatabaseCache( "JRDatabase", repository="cms-jet/JRDatabase", cachedir=cachedir, mayWrite=mayWriteCache, session=session) config.ptResolution = jrDBCache.getPayload(smear, "PtResolution", jetType, session=session) config.ptResolutionSF = jrDBCache.getPayload(smear, "SF", jetType, session=session) config.splitJER = splitJER config.useGenMatch = useGenMatch config.genMatchDR = genMatchDR config.genMatchDPt = genMatchDPt config.isT1SmearedMET = isT1Smear if jec is not None: jecLevels_L1 = ["L1FastJet"] # "L3Absolute" left out because it is dummy according to # https://twiki.cern.ch/twiki/bin/view/CMS/IntroToJEC#Mandatory_Jet_Energy_Corrections if jec.endswith("_DATA"): jecLevels = jecLevels_L1 + ["L2Relative", "L2L3Residual"] elif jec.endswith("_MC"): # "L2L3Residual" could be added, but it is dummy for MC according to # https://twiki.cern.ch/twiki/bin/view/CMSPublic/WorkBookJetEnergyCorrections#JetCorApplication # noqa: B950 jecLevels = jecLevels_L1 + ["L2Relative"] else: raise ValueError( f"JEC tag {jec} does not end with '_DATA' or '_MC', " "so the levels cannot be guessed") jecDBCache = JetDatabaseCache( "JECDatabase", repository="cms-jet/JECDatabase", cachedir=cachedir, mayWrite=mayWriteCache, session=session) if jecLevels: config.jec = [ jecDBCache.getPayload(jec, jLev, jetType, session=session) for jLev in jecLevels] if jecLevels_L1: config.l1Jec = [ jecDBCache.getPayload(jec, jLev, jetType, session=session) for jLev in jecLevels_L1] config.addHEM2018Issue = addHEM2018Issue if jesUncertaintySources: plf, jesUncertaintySources = _getJESUncertaintySources( jesUncertaintySources, jecDBCache, jec, jetType, regroupTag=regroupTag, session=session) config.jesUncertainties = [(src, plf) for src in jesUncertaintySources] if jec is None and jesUncertaintySources: logger.error("JES uncertainty specified, but no JEC tag; none will be evaluated") # define calculator and initialize from .root import loadJMESystematicsCalculators, gbl loadJMESystematicsCalculators(backend=backend) metcalcName = backend.symbol(f"const auto <<name>> = {config.cppConstruct};") calcHandle = getattr(gbl, metcalcName) variProxy.addCalculator(op.extVar(config.calcClass, metcalcName), calcHandle=calcHandle, args=args) _filterCalcSystematics(variProxy, calcHandle, enableSystematics=enableSystematics, default=isMC, name="MET")
[docs] def splitVariation(variProxy, variation, regions, nomName="nom"): """ Split a systematic variation between (kinematic) regions (to decorrelate the nuisance parameter) :param variProxy: jet variations proxy, e.g. ``tree._Jet`` :param variation: name of the variation that should be split (e.g. "jer") :param regions: map of region names and selections (for non-collection objects: boolean expression, for collection objects: a callable that returns a boolean for an item from the collection) :param nomName: name of the nominal variation ("nom" for postprocessed, "nominal" for calculator) :Example: >>> splitVariation(tree._Jet, "jer", {"forward" : lambda j : j.eta > 0., >>> "backward" : lambda j : j.eta < 0.}) """ if not (hasattr(variProxy, "brMapMap") and f"{variation}up" in variProxy.brMapMap and f"{variation}down" in variProxy.brMapMap): raise RuntimeError( f"Could not find up and down variation for {variation} " f"(available: {list(variProxy.brMapMap.keys())!s})") from itertools import chain from functools import partial from . import treefunctions as op from .treeoperations import adaptArg from .treeproxies import ListBase nomProxy = variProxy[nomName] for direction in ("up", "down"): origVarName = f"{variation}{direction}" origBrMap = variProxy.brMapMap[origVarName] for regNm, regSel in regions.items(): if isinstance(nomProxy, ListBase): # collection regBrMap = {attN: adaptArg(op.map(nomProxy, partial( lambda osel, oatt, attn, obj: op.switch( osel(obj), oatt.result[obj._idx], getattr(obj, attn)), regSel, origAtt, attN) )) for attN, origAtt in origBrMap.items()} else: # non-collection regBrMap = { attN: adaptArg(op.switch(regSel, origAtt, getattr(nomProxy, attN))) for attN, origAtt in origBrMap.items()} variProxy.brMapMap[f"{variation}{regNm}{direction}"] = regBrMap # adjust varMap and enabled variations for with-syst ops # (as in CalcVariationsBase.addCalculator and configure* above) origVarNames = tuple(f"{variation}{direction}" for direction in ("up", "down")) newVarNames = tuple(f"{variation}{regNm}{direction}" for regNm in regions.keys() for direction in ("up", "down")) for attN, opWithSyst in variProxy.brMapMap[getattr(variProxy, "withSystName", "nomWithSyst")].items(): if opWithSyst._cache: raise RuntimeError( "Expression has already been used, changing now would lead to undefined behaviour") for nvN in newVarNames: if attN in variProxy.brMapMap[nvN]: opWithSyst.varMap[nvN] = variProxy.brMapMap[nvN][attN] opWithSyst.variations = tuple(chain( (varnm for varnm in opWithSyst.variations if varnm not in origVarNames), newVarNames))
[docs] def forceDefine(arg, selection, includeSub=True): """ Force the definition of an expression as a column at a selection stage Use only for really computation-intensive operations that need to be precalculated :param arg: expression to define as a column :param selection: :py:class:`~bamboo.plots.Selection` for which the expression should be defined :param includeSub: also precalculate for data-driven background 'shadow' selections (:py:class:`bamboo.plots.SelectionWithSub` 'sub'-selections) """ if arg is None: raise RuntimeError( "Trying to define None. If a correction calculator product was passed: " "has the calculator been added and configured?") from .treeoperations import adaptArg op = adaptArg(arg) if not op.canDefine: raise RuntimeError(f"Cannot define {op!r}") selection._fbe.define(op, selection) from .plots import SelectionWithSub if includeSub and isinstance(selection, SelectionWithSub): for subSel in selection.sub.values(): if subSel is not None: selection._fbe.define(op, subSel)
[docs] def addPrintout(selection, funName, *args): ''' Call a method with debugging printout, as part of the RDataFrame graph This method is only meant to work with the default backend, since it works by inserting a ``Filter`` node that lets all events pass. :param selection: selection for which to add the printout. The function call will be added to the RDataFrame graph in its current state, so if a plot causes a problem this method should be called before defining it. :param funName: name of a C++ method to call. This method should always return ``true``, and can take any number of arguments. :param args: arguments to pass to the function The following example would print the entry and event number for each event that passes some selection. :Example: >>> from bamboo.root import gbl >>> gbl.gInterpreter.Declare(""" ... bool bamboo_printEntry(long entry, long event) { ... std::cout << "Processing entry #" << entry << ": event " << event << std::endl; ... }""") >>> addPrintout(sel, "bamboo_printEntry", op.extVar("ULong_t", "rdfentry_"), t.event) ''' from . import treefunctions as op from .treeoperations import boolType be = selection._fbe if selection.name not in be.selDFs: raise RuntimeError("This trick will only work with a dynamically constructed RDataFrame") nd = be.selDFs[selection.name].fdnd callPrint = op.extMethod(funName, returnType=boolType)(*args) filterStr = nd(callPrint.op) logger.debug(f"Adding printout with {filterStr}") nd.df = be.df_filter(nd.df, filterStr, callPrint.op, nd)
[docs] def makePileupWeight( puWeights, numTrueInteractions, systName=None, nameHint=None, sel=None, defineOnFirstUse=True): """ Construct a pileup weight for MC, based on the weights in a JSON file :param puWeights: path of the JSON file with weights (binned in NumTrueInteractions) for cp3-llbb JSON, or tuple of JSON path and correction name (correctionlib JSON) :param numTrueInteractions: expression to get the number of true interactions (Poissonian expectation value for an event) :param systName: name of the associated systematic nuisance parameter :param sel: a selection in the current graph (only used to retrieve a pointer to the backend) """ from . import treefunctions as op if isinstance(puWeights, tuple): from .scalefactors import get_correction expr = get_correction( puWeights[0], puWeights[1], params={"NumTrueInteractions": numTrueInteractions}, systParam="weights", systNomName="nominal", systVariations=("up", "down"), systName=systName, # will enable or disable systematic variations sel=sel, defineOnFirstUse=defineOnFirstUse )(None) else: logger.warning("Using makePileupWeight() with 'llbb' JSONs is deprecated!" "Use correctionlib JSONs and get_correction() instead") paramVType = "Parameters::value_type::value_type" puArgs = op.construct( "Parameters", (op.initList( f"std::initializer_list<{paramVType}>", paramVType, (op.initList(paramVType, "float", (op.extVar( "int", "BinningVariable::NumTrueInteractions"), numTrueInteractions)),)),)) puWFun = op.define("ILeptonScaleFactor", f'const ScaleFactor <<name>>{{"{puWeights}"}};', nameHint=nameHint) expr = puWFun.get(puArgs, op.extVar("int", "Nominal")) if systName: expr = op.systematic( expr, name=systName, up=puWFun.get(puArgs, op.extVar("int", "Up")), down=puWFun.get(puArgs, op.extVar("int", "Down"))) if defineOnFirstUse: expr = op.defineOnFirstUse(expr) return expr
[docs] def makeMultiPrimaryDatasetTriggerSelection(sampleName, datasetsAndTriggers): """ Construct a selection that prevents processing multiple times (from different primary datasets) If an event is passes triggers for different primary datasets, it will be taken from the first of those (i.e. the selection will be 'passes one of the triggers that select it for this primary dataset, and not for any of those that come before in the input dictionary). :param sampleName: sample name :param datasetsAndTriggers: a dictionary ``{primary-dataset, set-of-triggers}``, where the key is either a callable that takes a sample name and returns true in case it originates from the corresponding primary datasets, or a string that is the first part of the sample name in that case. The value (second item) can be a single expression (e.g. a trigger flag, or an OR of them), or a list of those (in which case an OR-expression is constructed from them). :returns: an expression to filter the events in the sample with given name :Example: >>> if not self.isMC(sample): >>> trigSel = noSel.refine("trigAndPrimaryDataset", >>> cut=makeMultiPrimaryDatasetTriggerSelection(sample, { >>> "DoubleMuon" : [t.HLT.Mu17_TrkIsoVVL_Mu8_TrkIsoVVL, >>> t.HLT.Mu17_TrkIsoVVL_TkMu8_TrkIsoVVL], >>> "DoubleEG" : t.HLT.Ele23_Ele12_CaloIdL_TrackIdL_IsoVL_DZ, >>> "MuonEG" : [t.HLT.Mu23_TrkIsoVVL_Ele12_CaloIdL_TrackIdL_IsoVL, >>> t.HLT.Mu8_TrkIsoVVL_Ele23_CaloIdL_TrackIdL_IsoVL ] >>> })) """ # python3.6+ dictionaries keep insertion order # (implementation detail in cpython 3.6, part of the language spec since 3.7) # see https://docs.python.org/3/library/stdtypes.html#mapping-types-dict from . import treefunctions as op inDSAndTrigSel = [ ((sampleName.startswith(dsArg) if isinstance(dsArg, str) else dsArg(sampleName)), (op.OR(*trigArg) if hasattr(trigArg, "__iter__") else trigArg)) for dsArg, trigArg in ( datasetsAndTriggers.items() if isinstance(datasetsAndTriggers, dict) else datasetsAndTriggers)] sels_not = [] trigSel = None for inThisDataset, dsTrigSel in inDSAndTrigSel: if not inThisDataset: sels_not.append(dsTrigSel) else: trigSel = dsTrigSel break if trigSel is None: raise RuntimeError(f"Sample name {sampleName} matched none of the primary datasets") if len(sels_not) == 0: # first return trigSel else: return op.AND(op.NOT(op.OR(*sels_not)), trigSel)
[docs] def configureRochesterCorrection(variProxy, paramsFile, isMC=False, backend=None, uName=""): """ Apply the Rochester correction for muons :param variProxy: muon variatons proxy, e.g. ``tree.._Muon`` for NanoAOD :param paramsFile: path of the text file with correction parameters :param isMC: MC or not :param backend: backend pointer (returned from :py:meth:`~bamboo.analysismodules.HistogramsModule.prepareTree`) :param uName: [deprecated, ignored] unique name for the correction calculator (sample name is a safe choice) """ if uName != "": warnings.warn("Passing uName to configureRochesterCorrection " "is not necessary anymore (and ignored), please update", DeprecationWarning, stacklevel=1) from bamboo.treefunctions import _to, _tp aMu = variProxy.orig[0] args = [getattr(aMu, comp).op.arg for comp in ( "pt", "eta", "phi", "mass", "charge", "nTrackerLayers")] if isMC: args += [aMu.genPart._idx.arg, variProxy._parent.GenPart[0].pt.op.arg] evt = variProxy._parent args.append((evt.run << 20) + (evt.luminosityBlock << 10) + evt.event + 169) else: args += [ _to.ExtVar("ROOT::VecOps::RVec<Int_t>", "ROOT::VecOps::RVec<Int_t>{}"), _to.ExtVar("ROOT::VecOps::RVec<float>", "ROOT::VecOps::RVec<float>{}"), _tp.makeConst(0, "unsigned")] # no seed # load necessary library and header(s) from . import treefunctions as op # first to load default headers/libs, if still needed from .root import loadRochesterCorrectionCalculator, gbl loadRochesterCorrectionCalculator(backend=backend) if not os.path.exists(paramsFile): raise ValueError(f"File {paramsFile} not found") # define calculator and initialize roccorName = backend.symbol( f'RochesterCorrectionCalculator <<name>>{{"{paramsFile}"}};') calcHandle = getattr(gbl, roccorName) calcProd = op.extVar("RochesterCorrectionCalculator", roccorName) variProxy.addCalculator(calcProd, calcHandle, args=args) assert variProxy.calcWithProd[calcHandle]
[docs] def configureTauESCorrection(variProxy, paramsFile, tauIdAlgo, backend=None): """ Apply the energy correction for taus :param variProxy: tau variatons proxy, e.g. ``tree._Tau`` for NanoAOD :param paramsFile: path of the json file with correction parameters :param tauIdAlgo: name of the algorithm for the tau identification, e.g. "DeepTau2017v2p1" :param backend: backend pointer (returned from :py:meth:`~bamboo.analysismodules.HistogramsModule.prepareTree`) """ from bamboo.treefunctions import _tp aTau = variProxy.orig[0] args = [getattr(aTau, comp).op.arg for comp in ( "pt", "mass", "eta", "decayMode", "genPartFlav")] args.append(_tp.makeConst(tauIdAlgo, "std::string")) # load necessary library and header(s) from . import treefunctions as op # first to load default headers/libs, if still needed from .root import loadTauESCorrectionCalculator, gbl loadTauESCorrectionCalculator(backend=backend) if not os.path.exists(paramsFile): raise ValueError(f"File {paramsFile} not found") # define calculator and initialize tauESName = backend.symbol( f'TauESCorrectionCalculator <<name>>{{"{paramsFile}"}};') variProxy._initCalc(op.extVar("TauESCorrectionCalculator", tauESName), calcHandle=getattr(gbl, tauESName), args=args) variProxy._initFromCalc() assert variProxy.calcProd
[docs] def configureSVfitCalculator(pathToSVfit="", backend=None): """ Configure SVfit :param pathToSVfit: path to your SVfit installation :param backend: backend pointer (returned from :py:meth:`~bamboo.analysismodules.HistogramsModule.prepareTree`) """ # load necessary library and header(s) from .root import loadSVfitCalculator loadSVfitCalculator(backend=backend, pathToSVfit=pathToSVfit)