Source code for bamboo.plots

"""
The :py:mod:`bamboo.plots` module provides high-level classes to represent
and manipulate selections and plots.
"""
__all__ = (
    "Plot", "EquidistantBinning", "VariableBinning", "Selection", "Product", "DerivedPlot",
    "SummedPlot", "CutFlowReport", "SelectionWithSub", "SelectionWithDataDriven",
    "LateSplittingSelection", "CategorizedSelection", "FactoryBackend", "Skim"
)

import logging
from collections import defaultdict
from itertools import chain

from . import treefunctions as op
from . import treeoperations as top
from .treeproxies import adaptArg

logger = logging.getLogger(__name__)


[docs] class FactoryBackend: """ Interface for factory backend (to separate Plots classes from ROOT::RDataFrame part) """
[docs] def __init__(self): pass
def addSelection(self, selection): pass def addPlot(self, plot): pass def addDerived(self, product): pass def addSkim(self, skim): # TODO move towards addProduct pass def addCutFlowReport(self, report, selections=None, autoSyst=True): pass
[docs] def define(self, op, selection): """ explicitly define column for expression """ pass
[docs] def buildGraph(self, plotList): """ Called after adding all products, but before retrieving the results """ pass
def getResults(self, plot): pass def setNThreads(self, nThreads): pass def addDependency(self, **kwargs): pass
[docs] @classmethod def create(cls, tree, nThreads=None): """ Factory method, should return a pair of the backend and root selection """ return (None, None)
[docs] class Product: """ Interface for output products (plots, counters etc.) """
[docs] def __init__(self, name, key=None): self.name = name self.key = key if key is not None else name
[docs] def produceResults(self, bareResults, fbe, key=None): """ Main interface method, called by the backend :param bareResults: iterable of histograms for this plot produced by the backend :param fbe: reference to the backend :param key: key under which the backend stores the results (if any) :returns: an iterable with ROOT objects to save to the output file """ pass
[docs] class EquidistantBinning: """ Equidistant binning """ __slots__ = ("__weakref__", "N", "mn", "mx")
[docs] def __init__(self, N, mn, mx): """ :param N: number of bins :param mn: minimum axis value :param mx: maximum axis value """ self.N = N self.mn = mn self.mx = mx
@property def minimum(self): return self.mn @property def maximum(self): return self.mx def __repr__(self): return f"{self.__class__.__name__}({self.N}, {self.mn:f}, {self.mx:f})"
[docs] class VariableBinning: """ Variable-sized binning """ __slots__ = ("__weakref__", "binEdges")
[docs] def __init__(self, binEdges): """ :param binEdges: iterable with the edges. There will be ``len(binEges)-1`` bins """ binEdges = list(binEdges) if any(binEdges[i + 1] <= binEdges[i] for i in range(len(binEdges) - 1)): raise ValueError(f"Variable bin edges should be strictly increasing: {binEdges}") self.binEdges = binEdges
@property def N(self): return len(self.binEdges) - 1 @property def minimum(self): return self.binEdges[0] @property def maximum(self): return self.binEdges[-1] def __repr__(self): return f"{self.__class__.__name__}([', '.join(str(xi) for xi in self.binEdges)])"
[docs] class Plot(Product): """ A :py:class:`~bamboo.plots.Plot` object contains all information needed to produce a histogram: the variable(s) to plot, binnings and options (axis titles, optionally some style information), and a reference to a :py:class:`~bamboo.plots.Selection` (which holds all cuts and weights to apply for the plot). .. note:: All :py:class:`~bamboo.plots.Plot` (and :py:class:`~bamboo.plots.Selection`) instances need to have a unique name. This name is used to construct output filenames, and internally to define DataFrame columns with readable names. The constructor will raise an exception if an existing name is used. """
[docs] def __init__( self, name, variables, selection, binnings, weight=None, title="", axisTitles=tuple(), axisBinLabels=tuple(), plotopts=None, autoSyst=True, key=None): """ Generic constructor. Please use the static :py:meth:`~bamboo.plots.Plot.make1D`, :py:meth:`~bamboo.plots.Plot.make2D` and :py:meth:`~bamboo.plots.Plot.make3D` methods, which provide a more convenient interface to construct histograms (filling in some defaults requires knowing the dimensionality). """ if len(variables) != len(binnings): raise ValueError( f"Unequal number of variables ({len(variables):d}) and binnings ({len(binnings):d})") super().__init__(name, key=key) self.variables = variables self.selection = selection self._weights = [adaptArg(wgt, typeHint=top.floatType) for wgt in Selection._optionalToIterable(weight)] self.binnings = binnings self.title = title self.axisTitles = axisTitles self.axisBinLabels = axisBinLabels self.plotopts = plotopts if plotopts else dict() # register with backend selection.registerPlot(self, autoSyst=autoSyst)
[docs] def clone( self, name=None, variables=None, selection=None, binnings=None, weight=None, title=None, axisTitles=None, axisBinLabels=None, plotopts=None, autoSyst=True, key=None): """ Helper method: create a copy with optional re-setting of attributes """ return Plot((name if name is not None else self.name), (variables if variables is not None else self.variables), (selection if selection is not None else self.selection), (binnings if binnings is not None else self.binnings), weight=(weight if weight is not None else self._weights), title=(title if title is not None else self.title), axisTitles=(axisTitles if axisTitles is not None else self.axisTitles), axisBinLabels=(axisBinLabels if axisBinLabels is not None else self.axisBinLabels), plotopts=(plotopts if plotopts is not None else self.plotopts), autoSyst=autoSyst, key=key # default is name )
[docs] def produceResults(self, bareResults, fbe, key=None): """ Trivial implementation of :py:meth:`~bamboo.plots.Product.produceResults` Subclasses can e.g. calculate additional systematic variation histograms from the existing ones :param bareResults: list of nominal and systematic variation histograms for this :py:class:`~bamboo.plots.Plot` :param fbe: reference to the backend :param key: key under which the backend stores the results (if any) :returns: ``bareResults`` """ return bareResults
@property def cut(self): return self.selection.cut @property def weights(self): return self.selection.weights + self._weights @property def longTitle(self): return ";".join(chain([self.title], self.axisTitles)) def __repr__(self): return ( f"Plot({self.name!r}, {self.variables!r}, {self.selection!r}, {self.binnings!r}, " f"title={self.title!r}, axisTitles={self.axisTitles!r})")
[docs] @classmethod def make1D(plotCls, name, variable, selection, binning, **kwargs): """ Construct a 1-dimensional histogram plot :param name: unique plot name :param variable: x-axis variable expression :param selection: the :py:class:`~bamboo.plots.Selection` with cuts and weights to apply :param binning: x-axis binning :param weight: per-entry weight (optional, multiplied with the selection weight) :param title: plot title :param xTitle: x-axis title (optional, taken from plot title by default) :param xBinLabels: x-axis bin labels (optional) :param plotopts: dictionary of options to pass directly to plotIt (optional) :param autoSyst: automatically add systematic variations (True by default - set to False to turn off) :returns: the new :py:class:`~bamboo.plots.Plot` instance with a 1-dimensional histogram :Example: >>> hasTwoEl = noSel.refine(cut=(op.rng_len(t.Electron) >= 2)) >>> mElElPlot = Plot.make1D( >>> "mElEl", op.invariant_mass(t.Electron[0].p4, t.Electron[1].p4), hasTwoEl, >>> EquidistantBinning(80, 50., 130.), title="Invariant mass of the leading-PT electrons") """ title = kwargs.pop("title", "") kwargs["axisTitles"] = (kwargs.pop("xTitle", title),) kwargs["axisBinLabels"] = (kwargs.pop("xBinLabels", None),) return plotCls(name, (adaptArg(variable),), selection, (binning,), **kwargs)
[docs] @classmethod def make2D(plotCls, name, variables, selection, binnings, **kwargs): """ Construct a 2-dimensional histogram plot :param name: unique plot name :param variables: x- and y-axis variable expression (iterable, e.g. tuple or list) :param selection: the :py:class:`~bamboo.plots.Selection` with cuts and weights to apply :param binnings: x- and y-axis binnings (iterable, e.g. tuple or list) :param weight: per-entry weight (optional, multiplied with the selection weight) :param title: plot title :param xTitle: x-axis title (optional, empty by default) :param yTitle: y-axis title (optional, empty by default) :param xBinLabels: x-axis bin labels (optional) :param yBinLabels: y-axis bin labels (optional) :param plotopts: dictionary of options to pass directly to plotIt (optional) :param autoSyst: automatically add systematic variations (True by default - set to False to turn off) :returns: the new :py:class:`~bamboo.plots.Plot` instance with a 2-dimensional histogram """ kwargs["axisTitles"] = (kwargs.pop("xTitle", ""), kwargs.pop("yTitle", "")) kwargs["axisBinLabels"] = (kwargs.pop("xBinLabels", None), kwargs.pop("yBinLabels", None)) return plotCls(name, tuple(adaptArg(v) for v in variables), selection, binnings, **kwargs)
[docs] @classmethod def make3D(plotCls, name, variables, selection, binnings, **kwargs): """ Construct a 3-dimensional histogram :param name: unique plot name :param variables: x-, y- and z-axis variable expression (iterable, e.g. tuple or list) :param selection: the :py:class:`~bamboo.plots.Selection` with cuts and weights to apply :param binnings: x-, y-, and z-axis binnings (iterable, e.g. tuple or list) :param weight: per-entry weight (optional, multiplied with the selection weight) :param title: plot title :param xTitle: x-axis title (optional, empty by default) :param yTitle: y-axis title (optional, empty by default) :param zTitle: z-axis title (optional, empty by default) :param xBinLabels: x-axis bin labels (optional) :param yBinLabels: y-axis bin labels (optional) :param zBinLabels: z-axis bin labels (optional) :param plotopts: dictionary of options to pass directly to plotIt (optional) :param autoSyst: automatically add systematic variations (True by default - set to False to turn off) :returns: the new :py:class:`~bamboo.plots.Plot` instance with a 3-dimensional histogram """ kwargs["axisTitles"] = ( kwargs.pop("xTitle", ""), kwargs.pop("yTitle", ""), kwargs.pop("zTitle", "") ) kwargs["axisBinLabels"] = ( kwargs.pop("xBinLabels", None), kwargs.pop("yBinLabels", None), kwargs.pop("zBinLabels", None) ) return plotCls(name, tuple(adaptArg(v) for v in variables), selection, binnings, **kwargs)
[docs] class Selection: """ A :py:class:`~bamboo.plots.Selection` object groups a set of selection criteria (cuts) and weight factors that belong to a specific stage of the selection and analysis. Selections should be constructed by calling the :py:meth:`~bamboo.plots.Selection.refine` method on a 'root' selection (which may include overall selections and weights, e.g. a lumi mask for data and pileup reweighting for MC). .. note:: All :py:class:`~bamboo.plots.Selection` (and :py:class:`~bamboo.plots.Plot`) instances need to have a unique name. This name is used internally to define DataFrame columns with readable names. The constructor will raise an exception if an existing name is used. """
[docs] def __init__(self, parent, name, cuts=None, weights=None, autoSyst=True): """ Constructor. Prefer using :py:meth:`~bamboo.plots.Selection.refine` instead (except for the 'root' selection) :param parent: backend or parent selection :param name: (unique) name of the selection :param cuts: iterable of selection criterion expressions (optional) :param weights: iterable of weight factors (optional) """ self.name = name self.parent = None self._cuts = [adaptArg(cut, typeHint=top.boolType) for cut in Selection._optionalToIterable(cuts)] self._weights = [adaptArg(wgt, typeHint=top.floatType) for wgt in Selection._optionalToIterable(weights)] self._cSysts = {} self._wSysts = {} # register with backend if isinstance(parent, Selection): self.autoSyst = parent.autoSyst and autoSyst self.parent = parent self._fbe = parent._fbe else: self.autoSyst = autoSyst assert isinstance(parent, FactoryBackend) self._fbe = parent if self.autoSyst: # { varName : { expr : [ nodes to change ] } } self._cSysts = top.collectSystVars(self._cuts) self._wSysts = top.collectSystVars(self._weights) self._fbe.addSelection(self)
def registerPlot(self, plot, **kwargs): self._fbe.addPlot(plot, **kwargs) def registerSkim(self, skim, **kwargs): self._fbe.addSkim(skim, **kwargs) def registerDerived(self, product, **kwargs): self._fbe.addDerived(product, **kwargs) def registerCutFlowReport(self, product, selections, **kwargs): self._fbe.addCutFlowReport(product, selections, **kwargs) # helper: convert None, single item, or iterable arg to iterable @staticmethod def _optionalToIterable(arg): if arg is None: return [] else: if (isinstance(arg, top.TupleOp) or isinstance(arg, top.TupleBaseProxy) or not hasattr(arg, "__iter__")): return [arg] else: return arg @property def cuts(self): if self.parent: return self.parent.cuts + self._cuts else: return self._cuts @property def weights(self): if self.parent: return self.parent.weights + self._weights else: return self._weights @property def weightSystematics(self): if self.parent: ret = self.parent.weightSystematics # new set ret.update(self._wSysts) return ret else: return set(self._wSysts) @property def cutSystematics(self): if self.parent: ret = self.parent.cutSystematics # new set ret.update(self._cSysts) return ret else: return set(self._cSysts) @property def systematics(self): ret = self.weightSystematics # new set ret.update(self.cutSystematics) return ret # for debugging/monitoring: full cut and weight expression ## TODO review @property def cut(self): return Selection._makeExprAnd(self.cuts) @property def weight(self): return Selection._makeExprProduct(self.weights) def __repr__(self): # TODO maybe change to refer to parent return f"{self.__class__.__name__}({self.name!r}, {self.cut!r}, {self.weight!r})" def __eq__(self, other): # FIXME do we even still need this? return ( (len(self.cuts) == len(other.cuts)) and all(sc == oc for sc, oc in zip(self.cuts, other.cuts)) and (len(self.weights) == len(other.weights)) and all(sw == ow for sw, ow in zip(self.weights, other.weights)))
[docs] def refine(self, name, cut=None, weight=None, autoSyst=True): """ Create a new selection by adding cuts and/or weight factors :param name: unique name of the new selection :param cut: expression (or list of expressions) with additional selection criteria (combined using logical AND) :param weight: expression (or list of expressions) with additional weight factors :param autoSyst: automatically add systematic variations (True by default - set to False to turn off; note that this would also turn off automatic systematic variations for any selections and plots that derive from the one created by this method) :returns: the new :py:class:`~bamboo.plots.Selection` """ return self.__class__(self, name, cuts=cut, weights=weight, autoSyst=autoSyst)
@staticmethod def _makeExprAnd(listOfReqs): # op.AND for expressions (helper for histfactory etc.) if len(listOfReqs) > 1: return adaptArg(op.AND(*listOfReqs)) elif len(listOfReqs) == 1: return listOfReqs[0] else: return adaptArg("true", typeHint=top.boolType) @staticmethod def _makeExprProduct(listOfFactors): # op.product for expressions (helper for histfactory etc.) if len(listOfFactors) > 1: return adaptArg(op.product(*listOfFactors)) elif len(listOfFactors) == 1: return listOfFactors[0] else: return adaptArg(1., typeHint=top.floatType)
[docs] class DerivedPlot(Product): """ Base class for a plot with results based on other plots' results The :py:attr:`~bamboo.plots.DerivedPlot.dependencies` attribute that lists the :py:class:`~bamboo.plots.Plot`-like objects this one depends on (which may be used e.g. to order operations). The other necessary properties (binnings, titles, labels, etc.) are taken from the keyword arguments to the constructor, or the first dependency. The :py:meth:`~bamboo.plots.DerivedPlot.produceResults` method, which is called by the backend to retrieve the derived results, should be overridden with the desired calculation. Typical use cases are summed histograms, background subtraction, etc. (the results are combined for different subjobs with hadd, so derived quantities that require the full statistics should be calculated from the postprocessing step; alternative or additional systematic variations calculated from the existing ones can be added by subclassing :py:class:`~bamboo.plots.Plot`). """
[docs] def __init__(self, name, dependencies, **kwargs): super().__init__(name) if "__" in name: raise RuntimeError( "No '__' should be present in the name of a derived plot: " "it is reserved for separating the name from systematic variations") self.dependencies = dependencies self.binnings = kwargs.get("binnings", dependencies[0].binnings) self.axisTitles = kwargs.get( "axisTitles", tuple( kwargs.get(f"{ax}Title", dependencies[0].axisTitles[i]) for i, ax in enumerate("xyzuvw"[:len(self.variables)]))) self.axisBinLabels = kwargs.get( "axisBinLabels", tuple( kwargs.get(f"{ax}BinLabels", dependencies[0].axisBinLabels[i]) for i, ax in enumerate("xyzuvw"[:len(self.variables)]))) self.plotopts = kwargs.get("plotopts", dependencies[0].plotopts) # register with backend dependencies[0].selection.registerDerived(self)
@property def variables(self): return [None for x in self.binnings]
[docs] def produceResults(self, bareResults, fbe, key=None): """ Main interface method, called by the backend :param bareResults: iterable of histograms for this plot produced by the backend (none) :param fbe: reference to the backend, can be used to retrieve the histograms for the dependencies, e.g. with :py:meth:`~bamboo.plots.DerivedPlot.collectDependencyResults` :param key: key under which the backend stores the results (if any) :returns: an iterable with ROOT objects to save to the output file """ return []
[docs] def collectDependencyResults(self, fbe, key=None): """ helper method: collect all results of the dependencies :returns: ``[ (nominalResult, {"variation" : variationResult}) ]`` """ res_dep = [] for dep in self.dependencies: resNom = None resPerVar = {} depResults = fbe.getResults(dep, key=( (dep.name, key[1]) if key is not None and len(key) == 2 else None)) if depResults: for res in depResults: if "__" not in res.GetName(): assert resNom is None resNom = res else: resVar = res.GetName().split("__")[1] resPerVar[resVar] = res res_dep.append((resNom, resPerVar)) return res_dep
[docs] class SummedPlot(DerivedPlot): """ A :py:class:`~bamboo.plots.DerivedPlot` implementation that sums histograms """
[docs] def __init__(self, name, termPlots, **kwargs): super().__init__(name, termPlots, **kwargs)
[docs] def produceResults(self, bareResults, fbe, key=None): from .root import gbl res_dep = self.collectDependencyResults(fbe, key=key) if not res_dep: return [] def getVal(prod): # DistRDF actions return DistRDF.ActionProxy objects, not RResultPtr's if hasattr(prod, "GetValue"): return prod.GetValue() return prod.GetPtr() # list all variations (some may not be there for all) allVars = set() for _, resVar in res_dep: allVars.update(resVar.keys()) # sum nominal hNom = res_dep[0][0].Clone(self.name) for ihn, _ in res_dep[1:]: if not isinstance(ihn, gbl.TH1): ihn = getVal(ihn) hNom.Add(ihn) results = [hNom] # sum variations (using nominal if not present for some) for vn in allVars: hVar = res_dep[0][1].get(vn, res_dep[0][0]).Clone("__".join((self.name, vn))) for ihn, ihv in res_dep[1:]: hvi = ihv.get(vn, ihn) if not isinstance(hvi, gbl.TH1): hvi = getVal(hvi) hVar.Add(hvi) results.append(hVar) return results
[docs] class CutFlowReport(Product): """ Collect and print yields at different selection stages, and cut efficiencies The simplest way to use this, just to get an overview of the number of events passing each selection stage in the log file, is by adding a ``CutFlowReport("yields", selections=<list of selections>, recursive=True, printInLog=True)`` to the list of plots. ``recursive=True`` will add all parent selections recursively, so only the final selection categories need to be passed to the ``selections`` keyword argument. It is also possible to output a LaTeX yields table, and specify exactly which selections and row or column headers are used. Then the :py:class:`~bamboo.plots.CutFlowReport` should be constructed like this: .. code-block:: python yields = CutFlowReport("yields") plots.append(yields) yields.add(<selection1-or-list-of-selections1>, title=title1) yields.add(<selection2-or-list-of-selections2>, title=title2) ... Each ``yields.add`` call will then add one entry in the yields table, with the yield the one of the corresponding selection, or the sum over the list (e.g. different categories that should be taken together); the other dimension are the samples (or sample groups). The sample (group) titles and formatting of the table can be customised in the same way as in plotIt, see :py:func:`~bamboo.analysisutils.printCutFlowReports` for a detailed description of the different options. """ class Entry: # counters for one selection def __init__(self, name, nominal=None, systVars=None, parent=None, children=None): self.name = name self.nominal = nominal self.systVars = systVars or dict() self.parent = parent self.children = list(children) if children is not None else [] def _load(self, tmpF): if isinstance(self.nominal, str): self.nominal = tmpF.Get(self.nominal) self.systVars = {svNm: tmpF.Get(svV) for svNm, svV in self.systVars} return self def setParent(self, parent): self.parent = parent if self not in parent.children: parent.children.append(self)
[docs] def __init__( self, name, selections=None, recursive=False, titles=None, autoSyst=False, cfres=None, printInLog=False): """ Constructor. ``name`` is mandatory, all other are optional; for full control the :py:meth:`~bamboo.plots.CutFlowReport.add` should be used to add entries. Using the constructor with a list of :py:class:`~bamboo.plots.Selection` instances passed to the ``selections`` keyword argument, and ``recursive=True, printInLog=True`` is the easiest way to get debugging printout of the numbers of passing events. """ super().__init__(name) self.recursive = recursive if selections is None: self.selections = [] else: self.selections = list(selections) if hasattr(selections, "__iter__") else [selections] self.titles = defaultdict(list) if titles is not None: self.titles = titles elif self.selections: self.titles.update({sel.name: sel.name for sel in self.selections}) self.autoSyst = autoSyst self.cfres = cfres if cfres is not None else defaultdict(list) if self.selections and cfres is None: self._register() self.printInLog = printInLog
def _register(self, selections=None): if selections is None: selections = self.selections aSelection = selections[0] selections = {sel.name: sel for sel in selections} if self.recursive: for sel in list(selections.values()): isel = sel.parent while isel is not None and isel.name not in selections: selections[isel.name] = isel aSelection.registerCutFlowReport(self, selections, autoSyst=self.autoSyst) sels_per_sub = defaultdict(dict) for selName, sel in selections.items(): if isinstance(sel, SelectionWithSub): sel.initSub() for suffix, subSel in sel.sub.items(): if subSel is not None: sels_per_sub[suffix][selName] = subSel for suffix, subSels in sels_per_sub.items(): logger.debug(f"Registering counters for {suffix} part of {', '.join(subSels.keys())}") aSelection.registerCutFlowReport( self, subSels, key=(self.name, suffix), autoSyst=self.autoSyst)
[docs] def add(self, selections, title=None): """ Add an entry to the yields table, with a title (optional) """ if not hasattr(selections, "__iter__"): selections = [selections] selections = [sel for sel in selections if sel not in self.selections] if title is not None: self.titles[title] += [sel.name for sel in selections] else: self.titles.update({sel.name: sel.name for sel in selections}) if selections: self.selections += selections self._register(selections)
[docs] def produceResults(self, bareResults, fbe, key=None): entries = list() for iEn in self.cfres[key if key else self.name]: # self.cfres was set by addCutFlowReport while iEn is not None and iEn not in entries: entries.append(iEn) iEn = iEn.parent return ([res.nominal.product for res in entries] + [v.product for res in entries for v in res.systVars.values()])
def rootEntries(self): # helper: traverse reports tree up def travUp(entry): yield entry yield from travUp(entry.parent) return {next(en for en in travUp(res) if en.parent is None) for lres in self.cfres.values() for res in lres}
[docs] def readFromResults(self, resultsFile): """ Reconstruct the :py:class:`~bamboo.plots.CutFlowReport`, reading counters from a results file """ cfres = [] entries = {} # by selection name for sel in self.selections: if sel.name not in entries: entries[sel.name] = CutFlowReport.Entry(sel.name) if self.recursive: isel = sel.parent entry_d = entries[sel.name] while isel is not None: if isel.name in entries: entry_p = entries[isel.name] entry_d.setParent(entry_p) break entry_p = CutFlowReport.Entry(isel.name) entries[isel.name] = entry_p entry_d.setParent(entry_p) entry_d = entry_p isel = isel.parent cfres.append(entries[sel.name]) # retrieve nominal for selName, entry in entries.items(): kyNm = f"{self.name}_{selName}" obj = resultsFile.Get(kyNm) if obj: entry.nominal = obj # and systematic variations prefix = f"{self.name}_" for ky in resultsFile.GetListOfKeys(): if ky.GetName().startswith(prefix): selName = ky.GetName().split("__")[0][len(prefix):] if selName in entries: entry = entries[selName] cnt = ky.GetName().count("__") if cnt == 1: varNm = ky.GetName().split("__")[1] if varNm in entry.systVars: logger.warning(f"{self.name}: counter for variation {varNm} " f"already present for selection {selName}") entry.systVars[varNm] = ky.ReadObj() elif cnt > 1: logger.warning("Key {ky.GetName()!r} contains '__' more than once, " "this will break assumptions") return CutFlowReport( self.name, self.selections, titles=self.titles, recursive=self.recursive, autoSyst=self.autoSyst, cfres={self.name: cfres}, printInLog=self.printInLog)
[docs] class SelectionWithSub(Selection): """ A common base class for :py:class:`~bamboo.plots.Selection` subclasses with related/alternative/sub-:py:class:`~bamboo.plots.Selection` instances attached A dictionary of additional selections is kept in the ``sub`` attribute (could be ``None`` to disable). """
[docs] def __init__(self, parent, name, cuts=None, weights=None, autoSyst=True, sub=None): super().__init__(parent, name, cuts=cuts, weights=weights, autoSyst=autoSyst) self.sub = sub if sub is not None else dict()
[docs] def initSub(self): """ Initialize related selections (no-op by default, subclasses can request to call this to enable some functionality) """ pass
[docs] @staticmethod def getSubsForPlot(p, requireActive=False, silent=False): """ Helper method: gather the sub-selections for which a plot is produced """ subs = set() if isinstance(p, Plot): if isinstance(p.selection, SelectionWithSub): for suff, subSel in p.selection.sub.items(): if (not requireActive) or (subSel is not None): subs.add(suff) elif isinstance(p, DerivedPlot): for dp in p.dependencies: if isinstance(dp.selection, SelectionWithSub): for suff, subSel in dp.selection.sub.items(): if (not requireActive) or (subSel is not None): subs.add(suff) elif isinstance(p, CutFlowReport): for sel in p.selections: if isinstance(sel, SelectionWithSub): for suff, subSel in sel.sub.items(): if (not requireActive) or (subSel is not None): subs.add(suff) elif not silent: logger.warning( f"Unsupported product type for data-driven: {type(p).__name__}, " "additional products will not be stored") return subs
[docs] def refine(self, name, cut=None, weight=None, autoSyst=True): main = super().refine(name, cut=cut, weight=weight, autoSyst=autoSyst) main.sub = {suff: ( parent.refine("".join((name, suff)), cut=cut, weight=weight, autoSyst=autoSyst) if parent is not None else None) for suff, parent in self.sub.items()} return main
[docs] class SelectionWithDataDriven(SelectionWithSub): """ A main :py:class:`~bamboo.plots.Selection` with the corresponding "shadow" :py:class:`~bamboo.plots.Selection` instances for evaluating data-driven backgrounds (alternative cuts and/or weights) """
[docs] @staticmethod def create( parent, name, ddSuffix, cut=None, weight=None, autoSyst=True, ddCut=None, ddWeight=None, ddAutoSyst=True, enable=True): """ Create a selection with a data-driven shadow selection Drop-in replacement for a :py:meth:`bamboo.plots.Selection.refine` call: the main selection is made from the parent with ``cut`` and ``weight``, the shadow selection is made from the parent with ``ddCut`` and ``ddWeight``. With ``enable=False`` no shadow selection is made (this may help to avoid duplication in the calling code). """ ddName = "".join((name, ddSuffix)) ddSel = None if isinstance(parent, SelectionWithSub): if cut is None and weight is None and autoSyst == parent.autoSyst: main = parent else: main = parent.refine(name, cut=cut, weight=weight, autoSyst=autoSyst) if enable: ddSel = Selection.refine( parent, ddName, cut=ddCut, weight=ddWeight, autoSyst=ddAutoSyst) else: # create from regular Selection main = SelectionWithDataDriven(parent, name, cuts=cut, weights=weight, autoSyst=autoSyst) if enable: ddSel = parent.refine(ddName, cut=ddCut, weight=ddWeight, autoSyst=ddAutoSyst) if ddSel is not None: logger.debug(f"Adding the data-driven counterpart of {name} for the {ddSuffix} contribution") main.sub[ddSuffix] = ddSel return main
def registerPlot(self, plot, **kwargs): super().registerPlot(plot, **kwargs) for ddSuffix, ddSel in self.sub.items(): if ddSel is not None: # will register and go out of scope # (the module has all necessary information to retrieve and process the results; # everything by reference, so cheap) plot.clone(selection=ddSel, key=(plot.name, ddSuffix), **kwargs)
# NOTE registerDerived not overridden, since none of the current backends need it
[docs] class CategorizedSelection: """ Helper class to represent a group of similar selections on different categories The interface is similar, but not identical to that of :py:class:`~bamboo.plots.Selection` (constructing :py:class:`~bamboo.plots.Plot` objects is done through the :py:meth:`~bamboo.plots.CategorizedSelection.makePlots` method, which takes additional arguments). Each category selection can have a candidate, typically the object or group of object that differs between the categories. The axis variables can then either be expressions, or callables that will be passed this per-category object. :Example: >>> muonSel = noSel.refine("hasMuon", cut=( >>> op.rng_len(muons) > 0, op.OR(op.rng_len(electrons) == 0, >>> muons[0].pt > electrons[0].pt))) >>> electronSel = noSel.refine("hasElectron", cut=( >>> op.rng_len(electrons) > 0, op.OR(op.rng_len(muons) == 0, >>> electrons[0].pt > muons[0].pt))) >>> oneLeptonSel = CategorizedSelection(categories={ ... "Mu" : (muonSel, muons[0]), ... "El" : (electronSel, electrons[0]) ... }) >>> oneLep2JSel = onLeptonSel.refine("hasLep2J", cut=(op.rng_len(jets) >= 2)) >>> plots += oneLep2JSel.makePlots("J1PT", jets[0].pt, EqB(50, 0., 150.)) >>> plots += oneLep2JSel.makePlots("LJ1Mass", ... (lambda l : op.invariant_mass(jets[0].p4, l.p4)), EqB(50, 0., 200.)) """
[docs] def __init__(self, parent=None, categories=None, name=None): """ Construct a group of related selections :param name: name (optional) :param parent: parent CategorizedSelection (optional) :param categories: dictionary of a :py:class:`~bamboo.plots.Selection` and candidate (any python object) per category (key is category name), see the :py:meth:`~CategorizedSelection.addCategory` method below """ self.name = name self.categories = categories self.parent = parent
[docs] def addCategory(self, catName, selection, candidate=None): """ Add a category :param catName: category name :param selection: :py:class:`~bamboo.plots.Selection` for this category :param candidate: any python object with event-level quantities specific to this category """ if catName in self.categories: raise ValueError(f"A category with name {catName} is already present") self.categories[catName] = (selection, candidate)
[docs] def refine(self, name, cut=None, weight=None, autoSyst=True): """ Equivalent of :py:meth:`~bamboo.plots.Selection.refine`, but for all categories at a time :param name: common part of the name for the new category selections (individual names will be ``"{name}_{category}``) :param cut: cut(s) to add. If callable, the category's candidate will be passed :param weight: weight(s) to add. If callable, the category's candidate will be passed :param autoSyst: automatically add systematic variations (True by default - set to False to turn off; note that this would also turn off automatic systematic variations for any selections and plots that derive from the one created by this method) :returns: the new :py:class:`CategorizedSelection` """ if cut is not None and not hasattr(cut, "__iter__"): cut = [cut] if weight is not None and not hasattr(weight, "__iter__"): weight = [weight] newCatsAndCands = {} for catName, (catSel, catCand) in self.categories.items(): catCut = cut if cut is not None: catCut = [(ict if not callable(ict) else ict(catCand)) for ict in cut] catWeight = weight if weight is not None: catWeight = [(iwt if not callable(iwt) else iwt(catCand)) for iwt in weight] newCatsAndCands[catName] = (catSel.refine( f"{name}_{catName}", cut=catCut, weight=catWeight, autoSyst=autoSyst), catCand) return CategorizedSelection(name=name, parent=self, categories=newCatsAndCands)
[docs] def makePlots( self, name, axisVariables, binnings, construct=None, savePerCategory=True, saveCombined=True, combinedPlotType=SummedPlot, **kwargs): """ Make a plot for all categories, and/or a combined one :param name: plot name (per-category plot names will be ``"{name}_{category}"``) :param axisVariables: one or more axis variables :param binnings: as many binnings as variables :param construct: plot factory method, by default the ``make{N}D`` method of :py:class:`~bamboo.plots.Plot` (with N the number of axis variables) :param savePerCategory: save the individual plots (enabled by default) :param saveCombine: save the combined plot (enabled by default) :param combinedPlotType: combined plot type, :py:class:`~bamboo.plots.SummedPlot` by default :returns: a list of plots """ if (not savePerCategory) and (not saveCombined): return [] # no need to make plots then :-) if not hasattr(axisVariables, "__iter__"): axisVariables = [axisVariables] if construct is None: # default: use `Plot.makeND` construct = getattr(Plot, f"make{len(axisVariables):d}D") catPlots = [] for catName, (catSel, catCand) in self.categories.items(): variables = [(iVar if not callable(iVar) else iVar(catCand)) for iVar in axisVariables] if len(variables) == 1: variables = variables[0] catPlots.append(construct(f"{name}_{catName}", variables, catSel, binnings, **kwargs)) if not saveCombined: return catPlots else: return catPlots + [combinedPlotType(name, catPlots)]
[docs] class LateSplittingSelection(SelectionWithSub): """ A drop-in replacement for :py:class:`~bamboo.plots.Selection` to efficiently split a sample The concept is quite similar to :py:class:`~bamboo.plots.SelectionWithDataDriven`, but with very different performance trade-offs: the former creates two parallel branches of the RDF graph, each for their own set of events (with a typically small performance overhead due to dupliation), whereas this is for cases where all events should be processed identically until they are filled into histograms (e.g. separating subprocesses based on MC truth). It is worth defining columns with these categories early on, such that the splitting does not need to do it many times for different selections and categories. """
[docs] def __init__(self, parent, name, cuts=None, weights=None, autoSyst=True, keepInclusive=None): super().__init__(parent, name, cuts=cuts, weights=weights, autoSyst=autoSyst) self.splitCuts = {} if isinstance(parent, LateSplittingSelection): self.splitCuts = parent.splitCuts if keepInclusive is not None: self.keepInclusive = keepInclusive elif isinstance(parent, LateSplittingSelection): self.keepInclusive = parent.keepInclusive else: self.keepInclusive = True
[docs] @staticmethod def create(parent, name, splitCuts=None, keepInclusive=True, cut=None, weight=None, autoSyst=True): """ Create a selection that will lazily split into categories :param name: name of the new selection (after applying the cut and weight, as in :py:meth:`bamboo.plots.Selection.refine`) :param splitCuts: dictionary of regions, the values should be the cuts that define the region :param keepInclusive: also produce the plots without splitting :param cut: common selection :param weight: common weight :param autoSyst: automatically propagate systematic uncertainties """ if isinstance(parent, SelectionWithSub): raise RuntimeError( "LateSplittingSelection must be constructed before any SelectionWithSub, " "and extending is not supported") lsSel = LateSplittingSelection( parent, name, cuts=cut, weights=weight, autoSyst=autoSyst, keepInclusive=keepInclusive) lsSel.splitCuts = splitCuts if splitCuts is not None else {} return lsSel
[docs] def initSub(self): """ Initialize related selections, should be called before registering non-plot products (anything not going through registerPlot) """ if self.splitCuts and any(suff not in self.sub for suff in self.splitCuts): self.sub.update({ suff: Selection(self, f"{self.name}{suff}", cuts=cut) for suff, cut in self.splitCuts.items() if suff not in self.sub })
def registerPlot(self, plot, **kwargs): # always still produce the original one (it helps with having all variables defined) if self.keepInclusive: super().registerPlot(plot, **kwargs) if self.splitCuts: # here is the trick: per-category selections are only created once we attach a plot self.initSub() for suff, sSel in self.sub.items(): if sSel is not None: plot.clone(selection=sSel, key=(plot.name, suff), **kwargs)
[docs] class Skim(Product): """ Save selected branches for events that pass the selection to a skimmed tree """ KeepAll = object() class KeepRegex: def __init__(self, pattern): self.pattern = pattern
[docs] def __init__( self, name, branches, selection, keepOriginal=None, maxSelected=-1, treeName=None, key=None): """ Skim constructor :param name: name of the skim (also default name of the TTree) :param branches: dictionary of branches to keep (name and definition for new branches, or name and ``None`` for specific branches from the input tree) :param selection: :py:class:`~bamboo.plots.Selection` of events to save :param keepOriginal: list of branch names to keep, :py:obj:`bamboo.plots.Skim.KeepRegex` instances with patterns of branch names to keep, or :py:obj:`bamboo.plots.Skim.KeepAll` to keep all branches from the input tree :param maxSelected: maximal number of events to keep (default: no limit) :Example: >>> plots.append(Skim("dimuSkim", { >>> "run": None, # copy from input >>> "luminosityBlock": None, >>> "event": None, >>> "dimu_m": op.invariant_mass(muons[0].p4, muons[1].p4), >>> "mu1_pt": muons[0].pt, >>> "mu2_pt": muons[1].pt, >>> }, twoMuSel, >>> keepOriginal=[ >>> Skim.KeepRegex("PV_.*"), >>> "nOtherPV", >>> Skim.KeepRegex("OtherPV_.*") >>> ]) """ super().__init__(name, key=key) self.definedBranches = {k: v for k, v in branches.items() if v is not None} self.originalBranches = [k for k, v in branches.items() if v is None] if keepOriginal: from collections.abc import Iterable if isinstance(keepOriginal, Iterable) and not isinstance(keepOriginal, str): self.originalBranches.extend(keepOriginal) else: self.originalBranches.append(keepOriginal) self.selection = selection self.maxSelected = maxSelected self.treeName = treeName or name selection.registerSkim(self)
[docs] def produceResults(self, bareResults, fbe, key=None): return bareResults