Source code for bamboo.plots

"""
The :py:mod:`bamboo.plots` module provides high-level classes to represent
and manipulate selections and plots.
"""
__all__ = (
    "Plot", "EquidistantBinning", "VariableBinning", "Selection", "Product", "DerivedPlot",
    "SummedPlot", "CutFlowReport", "SelectionWithSub", "SelectionWithDataDriven",
    "SelectionWithMultipleDataDriven", "LateSplittingSelection", "CategorizedSelection",
    "FactoryBackend", "Skim"
)

import logging
from collections import defaultdict
from itertools import chain

from . import treefunctions as op
from . import treeoperations as top
from .treeproxies import adaptArg

logger = logging.getLogger(__name__)



[docs]
class FactoryBackend:
    """ Interface for factory backend (to separate Plots classes from ROOT::RDataFrame part) """

[docs]
    def __init__(self):
        pass


    def addSelection(self, selection):
        pass

    def addPlot(self, plot):
        pass

    def addDerived(self, product):
        pass

    def addSkim(self, skim):  # TODO move towards addProduct
        pass

    def addCutFlowReport(self, report, selections=None, autoSyst=True):
        pass


[docs]
    def define(self, op, selection):
        """ explicitly define column for expression """
        pass



[docs]
    def buildGraph(self, plotList):
        """ Called after adding all products, but before retrieving the results """
        pass


    def getResults(self, plot):
        pass

    def setNThreads(self, nThreads):
        pass

    def addDependency(self, **kwargs):
        pass


[docs]
    @classmethod
    def create(cls, tree, nThreads=None):
        """ Factory method, should return a pair of the backend and root selection """
        return (None, None)





[docs]
class Product:
    """ Interface for output products (plots, counters etc.) """

[docs]
    def __init__(self, name, key=None):
        self.name = name
        self.key = key if key is not None else name



[docs]
    def produceResults(self, bareResults, fbe, key=None):
        """
        Main interface method, called by the backend

        :param bareResults: iterable of histograms for this plot produced by the backend
        :param fbe: reference to the backend
        :param key: key under which the backend stores the results (if any)

        :returns: an iterable with ROOT objects to save to the output file
        """
        pass





[docs]
class EquidistantBinning:
    """ Equidistant binning """
    __slots__ = ("__weakref__", "N", "mn", "mx")


[docs]
    def __init__(self, N, mn, mx):
        """
        :param N: number of bins
        :param mn: minimum axis value
        :param mx: maximum axis value
        """
        self.N = N
        self.mn = mn
        self.mx = mx


    @property
    def minimum(self):
        return self.mn

    @property
    def maximum(self):
        return self.mx

    def __repr__(self):
        return f"{self.__class__.__name__}({self.N}, {self.mn:f}, {self.mx:f})"




[docs]
class VariableBinning:
    """ Variable-sized binning """
    __slots__ = ("__weakref__", "binEdges")


[docs]
    def __init__(self, binEdges):
        """
        :param binEdges: iterable with the edges. There will be ``len(binEges)-1`` bins
        """
        binEdges = list(binEdges)
        if any(binEdges[i + 1] <= binEdges[i] for i in range(len(binEdges) - 1)):
            raise ValueError(f"Variable bin edges should be strictly increasing: {binEdges}")
        self.binEdges = binEdges


    @property
    def N(self):
        return len(self.binEdges) - 1

    @property
    def minimum(self):
        return self.binEdges[0]

    @property
    def maximum(self):
        return self.binEdges[-1]

    def __repr__(self):
        return f"{self.__class__.__name__}([', '.join(str(xi) for xi in self.binEdges)])"




[docs]
class Plot(Product):
    """ A :py:class:`~bamboo.plots.Plot` object contains all information needed
    to produce a histogram: the variable(s) to plot, binnings and options
    (axis titles, optionally some style information), and a reference to
    a :py:class:`~bamboo.plots.Selection` (which holds all cuts and weights to apply for the plot).

    .. note::

        All :py:class:`~bamboo.plots.Plot` (and :py:class:`~bamboo.plots.Selection`) instances
        need to have a unique name. This name is used to construct output filenames, and internally
        to define DataFrame columns with readable names.
        The constructor will raise an exception if an existing name is used.
    """

[docs]
    def __init__(
            self, name, variables, selection, binnings, weight=None, title="", axisTitles=tuple(),
            axisBinLabels=tuple(), plotopts=None, autoSyst=True, key=None):
        """ Generic constructor. Please use the static :py:meth:`~bamboo.plots.Plot.make1D`,
        :py:meth:`~bamboo.plots.Plot.make2D` and :py:meth:`~bamboo.plots.Plot.make3D` methods,
        which provide a more convenient interface to construct histograms
        (filling in some defaults requires knowing the dimensionality).
        """
        if len(variables) != len(binnings):
            raise ValueError(
                f"Unequal number of variables ({len(variables):d}) and binnings ({len(binnings):d})")
        super().__init__(name, key=key)
        self.variables = variables
        self.selection = selection
        self._weights = [adaptArg(wgt, typeHint=top.floatType)
                         for wgt in Selection._optionalToIterable(weight)]
        self.binnings = binnings
        self.title = title
        self.axisTitles = axisTitles
        self.axisBinLabels = axisBinLabels
        self.plotopts = plotopts if plotopts else dict()
        # register with backend
        selection.registerPlot(self, autoSyst=autoSyst)



[docs]
    def clone(
            self, name=None, variables=None, selection=None, binnings=None, weight=None, title=None,
            axisTitles=None, axisBinLabels=None, plotopts=None, autoSyst=True, key=None):
        """ Helper method: create a copy with optional re-setting of attributes """
        return Plot((name if name is not None else self.name),
                    (variables if variables is not None else self.variables),
                    (selection if selection is not None else self.selection),
                    (binnings if binnings is not None else self.binnings),
                    weight=(weight if weight is not None else self._weights),
                    title=(title if title is not None else self.title),
                    axisTitles=(axisTitles if axisTitles is not None else self.axisTitles),
                    axisBinLabels=(axisBinLabels if axisBinLabels is not None else self.axisBinLabels),
                    plotopts=(plotopts if plotopts is not None else self.plotopts),
                    autoSyst=autoSyst,
                    key=key  # default is name
                    )



[docs]
    def produceResults(self, bareResults, fbe, key=None):
        """
        Trivial implementation of :py:meth:`~bamboo.plots.Product.produceResults`

        Subclasses can e.g. calculate additional systematic variation histograms from the existing ones

        :param bareResults: list of nominal and systematic variation histograms
            for this :py:class:`~bamboo.plots.Plot`
        :param fbe: reference to the backend
        :param key: key under which the backend stores the results (if any)

        :returns: ``bareResults``
        """
        return bareResults


    @property
    def cut(self):
        return self.selection.cut

    @property
    def weights(self):
        return self.selection.weights + self._weights

    @property
    def longTitle(self):
        return ";".join(chain([self.title], self.axisTitles))

    def __repr__(self):
        return (
            f"Plot({self.name!r}, {self.variables!r}, {self.selection!r}, {self.binnings!r}, "
            f"title={self.title!r}, axisTitles={self.axisTitles!r})")


[docs]
    @classmethod
    def make1D(plotCls, name, variable, selection, binning, **kwargs):
        """ Construct a 1-dimensional histogram plot

        :param name: unique plot name
        :param variable: x-axis variable expression
        :param selection: the :py:class:`~bamboo.plots.Selection` with cuts and weights to apply
        :param binning: x-axis binning
        :param weight: per-entry weight (optional, multiplied with the selection weight)
        :param title: plot title
        :param xTitle: x-axis title (optional, taken from plot title by default)
        :param xBinLabels: x-axis bin labels (optional)
        :param plotopts: dictionary of options to pass directly to plotIt (optional)
        :param autoSyst: automatically add systematic variations (True by default - set to False to turn off)

        :returns: the new :py:class:`~bamboo.plots.Plot` instance with a 1-dimensional histogram

        :Example:

        >>> hasTwoEl = noSel.refine(cut=(op.rng_len(t.Electron) >= 2))
        >>> mElElPlot = Plot.make1D(
        >>>     "mElEl", op.invariant_mass(t.Electron[0].p4, t.Electron[1].p4), hasTwoEl,
        >>>     EquidistantBinning(80, 50., 130.), title="Invariant mass of the leading-PT electrons")
        """
        title = kwargs.pop("title", "")
        kwargs["axisTitles"] = (kwargs.pop("xTitle", title),)
        kwargs["axisBinLabels"] = (kwargs.pop("xBinLabels", None),)
        return plotCls(name, (adaptArg(variable),), selection, (binning,), **kwargs)



[docs]
    @classmethod
    def make2D(plotCls, name, variables, selection, binnings, **kwargs):
        """ Construct a 2-dimensional histogram plot

        :param name: unique plot name
        :param variables: x- and y-axis variable expression (iterable, e.g. tuple or list)
        :param selection: the :py:class:`~bamboo.plots.Selection` with cuts and weights to apply
        :param binnings: x- and y-axis binnings (iterable, e.g. tuple or list)
        :param weight: per-entry weight (optional, multiplied with the selection weight)
        :param title: plot title
        :param xTitle: x-axis title (optional, empty by default)
        :param yTitle: y-axis title (optional, empty by default)
        :param xBinLabels: x-axis bin labels (optional)
        :param yBinLabels: y-axis bin labels (optional)
        :param plotopts: dictionary of options to pass directly to plotIt (optional)
        :param autoSyst: automatically add systematic variations (True by default - set to False to turn off)

        :returns: the new :py:class:`~bamboo.plots.Plot` instance with a 2-dimensional histogram
        """
        kwargs["axisTitles"] = (kwargs.pop("xTitle", ""), kwargs.pop("yTitle", ""))
        kwargs["axisBinLabels"] = (kwargs.pop("xBinLabels", None), kwargs.pop("yBinLabels", None))
        return plotCls(name, tuple(adaptArg(v) for v in variables), selection, binnings, **kwargs)



[docs]
    @classmethod
    def make3D(plotCls, name, variables, selection, binnings, **kwargs):
        """ Construct a 3-dimensional histogram

        :param name: unique plot name
        :param variables: x-, y- and z-axis variable expression (iterable, e.g. tuple or list)
        :param selection: the :py:class:`~bamboo.plots.Selection` with cuts and weights to apply
        :param binnings: x-, y-, and z-axis binnings (iterable, e.g. tuple or list)
        :param weight: per-entry weight (optional, multiplied with the selection weight)
        :param title: plot title
        :param xTitle: x-axis title (optional, empty by default)
        :param yTitle: y-axis title (optional, empty by default)
        :param zTitle: z-axis title (optional, empty by default)
        :param xBinLabels: x-axis bin labels (optional)
        :param yBinLabels: y-axis bin labels (optional)
        :param zBinLabels: z-axis bin labels (optional)
        :param plotopts: dictionary of options to pass directly to plotIt (optional)
        :param autoSyst: automatically add systematic variations (True by default - set to False to turn off)

        :returns: the new :py:class:`~bamboo.plots.Plot` instance with a 3-dimensional histogram
        """
        kwargs["axisTitles"] = (
            kwargs.pop("xTitle", ""),
            kwargs.pop("yTitle", ""),
            kwargs.pop("zTitle", "")
        )
        kwargs["axisBinLabels"] = (
            kwargs.pop("xBinLabels", None),
            kwargs.pop("yBinLabels", None),
            kwargs.pop("zBinLabels", None)
        )
        return plotCls(name, tuple(adaptArg(v) for v in variables), selection, binnings, **kwargs)





[docs]
class Selection:
    """ A :py:class:`~bamboo.plots.Selection` object groups a set of selection criteria
    (cuts) and weight factors that belong to a specific stage of the selection and analysis.
    Selections should be constructed by calling the :py:meth:`~bamboo.plots.Selection.refine`
    method on a 'root' selection (which may include overall selections and weights, e.g.
    a lumi mask for data and pileup reweighting for MC).

    .. note::

        All :py:class:`~bamboo.plots.Selection` (and :py:class:`~bamboo.plots.Plot`) instances
        need to have a unique name. This name is used internally to define DataFrame columns
        with readable names.
        The constructor will raise an exception if an existing name is used.
    """

[docs]
    def __init__(self, parent, name, cuts=None, weights=None, autoSyst=True):
        """
        Constructor. Prefer using :py:meth:`~bamboo.plots.Selection.refine` instead
            (except for the 'root' selection)

        :param parent: backend or parent selection
        :param name: (unique) name of the selection
        :param cuts: iterable of selection criterion expressions (optional)
        :param weights: iterable of weight factors (optional)
        """
        self.name = name
        self.parent = None
        self._cuts = [adaptArg(cut, typeHint=top.boolType)
                      for cut in Selection._optionalToIterable(cuts)]
        self._weights = [adaptArg(wgt, typeHint=top.floatType)
                         for wgt in Selection._optionalToIterable(weights)]
        self._cSysts = {}
        self._wSysts = {}
        # register with backend
        if isinstance(parent, Selection):
            self.autoSyst = parent.autoSyst and autoSyst
            self.parent = parent
            self._fbe = parent._fbe
        else:
            self.autoSyst = autoSyst
            assert isinstance(parent, FactoryBackend)
            self._fbe = parent
        if self.autoSyst:
            # { varName : { expr : [ nodes to change ] } }
            self._cSysts = top.collectSystVars(self._cuts)
            self._wSysts = top.collectSystVars(self._weights)
        self._fbe.addSelection(self)


    def registerPlot(self, plot, **kwargs):
        self._fbe.addPlot(plot, **kwargs)

    def registerSkim(self, skim, **kwargs):
        self._fbe.addSkim(skim, **kwargs)

    def registerDerived(self, product, **kwargs):
        self._fbe.addDerived(product, **kwargs)

    def registerCutFlowReport(self, product, selections, **kwargs):
        self._fbe.addCutFlowReport(product, selections, **kwargs)

    # helper: convert None, single item, or iterable arg to iterable
    @staticmethod
    def _optionalToIterable(arg):
        if arg is None:
            return []
        else:
            if (isinstance(arg, top.TupleOp) or isinstance(arg, top.TupleBaseProxy)
                    or not hasattr(arg, "__iter__")):
                return [arg]
            else:
                return arg

    @property
    def cuts(self):
        if self.parent:
            return self.parent.cuts + self._cuts
        else:
            return self._cuts

    @property
    def weights(self):
        if self.parent:
            return self.parent.weights + self._weights
        else:
            return self._weights

    @property
    def weightSystematics(self):
        if self.parent:
            ret = self.parent.weightSystematics  # new set
            ret.update(self._wSysts)
            return ret
        else:
            return set(self._wSysts)

    @property
    def cutSystematics(self):
        if self.parent:
            ret = self.parent.cutSystematics  # new set
            ret.update(self._cSysts)
            return ret
        else:
            return set(self._cSysts)

    @property
    def systematics(self):
        ret = self.weightSystematics  # new set
        ret.update(self.cutSystematics)
        return ret
    # for debugging/monitoring: full cut and weight expression ## TODO review
    @property
    def cut(self):
        return Selection._makeExprAnd(self.cuts)

    @property
    def weight(self):
        return Selection._makeExprProduct(self.weights)

    def __repr__(self):  # TODO maybe change to refer to parent
        return f"{self.__class__.__name__}({self.name!r}, {self.cut!r}, {self.weight!r})"

    def __eq__(self, other):
        #  FIXME do we even still need this?
        return (
            (len(self.cuts) == len(other.cuts))
            and all(sc == oc for sc, oc in zip(self.cuts, other.cuts))
            and (len(self.weights) == len(other.weights))
            and all(sw == ow for sw, ow in zip(self.weights, other.weights)))


[docs]
    def refine(self, name, cut=None, weight=None, autoSyst=True):
        """ Create a new selection by adding cuts and/or weight factors

        :param name: unique name of the new selection
        :param cut: expression (or list of expressions) with additional selection criteria
            (combined using logical AND)
        :param weight: expression (or list of expressions) with additional weight factors
        :param autoSyst: automatically add systematic variations
            (True by default - set to False to turn off; note that this would also turn off
            automatic systematic variations for any selections and plots
            that derive from the one created by this method)

        :returns: the new :py:class:`~bamboo.plots.Selection`
        """
        return self.__class__(self, name, cuts=cut, weights=weight, autoSyst=autoSyst)


    @staticmethod
    def _makeExprAnd(listOfReqs):
        # op.AND for expressions (helper for histfactory etc.)
        if len(listOfReqs) > 1:
            return adaptArg(op.AND(*listOfReqs))
        elif len(listOfReqs) == 1:
            return listOfReqs[0]
        else:
            return adaptArg("true", typeHint=top.boolType)

    @staticmethod
    def _makeExprProduct(listOfFactors):
        # op.product for expressions (helper for histfactory etc.)
        if len(listOfFactors) > 1:
            return adaptArg(op.product(*listOfFactors))
        elif len(listOfFactors) == 1:
            return listOfFactors[0]
        else:
            return adaptArg(1., typeHint=top.floatType)




[docs]
class DerivedPlot(Product):
    """
    Base class for a plot with results based on other plots' results

    The :py:attr:`~bamboo.plots.DerivedPlot.dependencies` attribute that lists
    the :py:class:`~bamboo.plots.Plot`-like objects this one depends on (which
    may be used e.g. to order operations).
    The other necessary properties (binnings, titles, labels, etc.) are taken
    from the keyword arguments to the constructor, or the first dependency.
    The :py:meth:`~bamboo.plots.DerivedPlot.produceResults` method,
    which is called by the backend to retrieve the derived results,
    should be overridden with the desired calculation.

    Typical use cases are summed histograms, background subtraction, etc.
    (the results are combined for different subjobs with hadd, so derived
    quantities that require the full statistics should be calculated from
    the postprocessing step; alternative or additional systematic variations
    calculated from the existing ones can be added by subclassing
    :py:class:`~bamboo.plots.Plot`).
    """

[docs]
    def __init__(self, name, dependencies, **kwargs):
        super().__init__(name)
        if "__" in name:
            raise RuntimeError(
                "No '__' should be present in the name of a derived plot: "
                "it is reserved for separating the name from systematic variations")
        self.dependencies = dependencies
        self.binnings = kwargs.get("binnings", dependencies[0].binnings)
        self.axisTitles = kwargs.get(
            "axisTitles", tuple(
                kwargs.get(f"{ax}Title", dependencies[0].axisTitles[i])
                for i, ax in enumerate("xyzuvw"[:len(self.variables)])))
        self.axisBinLabels = kwargs.get(
            "axisBinLabels", tuple(
                kwargs.get(f"{ax}BinLabels", dependencies[0].axisBinLabels[i])
                for i, ax in enumerate("xyzuvw"[:len(self.variables)])))
        self.plotopts = kwargs.get("plotopts", dependencies[0].plotopts)
        # register with backend
        dependencies[0].selection.registerDerived(self)


    @property
    def variables(self):
        return [None for x in self.binnings]


[docs]
    def produceResults(self, bareResults, fbe, key=None):
        """
        Main interface method, called by the backend

        :param bareResults: iterable of histograms for this plot produced by the backend (none)
        :param fbe: reference to the backend, can be used to retrieve the histograms for the dependencies,
            e.g. with :py:meth:`~bamboo.plots.DerivedPlot.collectDependencyResults`
        :param key: key under which the backend stores the results (if any)

        :returns: an iterable with ROOT objects to save to the output file
        """
        return []



[docs]
    def collectDependencyResults(self, fbe, key=None):
        """ helper method: collect all results of the dependencies

        :returns: ``[ (nominalResult, {"variation" : variationResult}) ]``
        """
        res_dep = []
        for dep in self.dependencies:
            resNom = None
            resPerVar = {}
            depResults = fbe.getResults(dep, key=(
                (dep.name, key[1]) if key is not None and len(key) == 2 else None))
            if depResults:
                for res in depResults:
                    if "__" not in res.GetName():
                        assert resNom is None
                        resNom = res
                    else:
                        resVar = res.GetName().split("__")[1]
                        resPerVar[resVar] = res
                res_dep.append((resNom, resPerVar))
        return res_dep





[docs]
class SummedPlot(DerivedPlot):
    """ A :py:class:`~bamboo.plots.DerivedPlot` implementation that sums histograms """

[docs]
    def __init__(self, name, termPlots, **kwargs):
        super().__init__(name, termPlots, **kwargs)



[docs]
    def produceResults(self, bareResults, fbe, key=None):
        from .root import gbl
        res_dep = self.collectDependencyResults(fbe, key=key)
        if not res_dep:
            return []

        def getVal(prod):
            # DistRDF actions return DistRDF.ActionProxy objects, not RResultPtr's
            if hasattr(prod, "GetValue"):
                return prod.GetValue()
            return prod.GetPtr()

        # list all variations (some may not be there for all)
        allVars = set()
        for _, resVar in res_dep:
            allVars.update(resVar.keys())
        # sum nominal
        hNom = res_dep[0][0].Clone(self.name)
        for ihn, _ in res_dep[1:]:
            if not isinstance(ihn, gbl.TH1):
                ihn = getVal(ihn)
            hNom.Add(ihn)
        results = [hNom]
        # sum variations (using nominal if not present for some)
        for vn in allVars:
            hVar = res_dep[0][1].get(vn, res_dep[0][0]).Clone("__".join((self.name, vn)))
            for ihn, ihv in res_dep[1:]:
                hvi = ihv.get(vn, ihn)
                if not isinstance(hvi, gbl.TH1):
                    hvi = getVal(hvi)
                hVar.Add(hvi)
            results.append(hVar)
        return results





[docs]
class CutFlowReport(Product):
    """
    Collect and print yields at different selection stages, and cut efficiencies

    The simplest way to use this, just to get an overview of the number of events
    passing each selection stage in the log file, is by adding a
    ``CutFlowReport("yields", selections=<list of selections>, recursive=True, printInLog=True)``
    to the list of plots.
    ``recursive=True`` will add all parent selections recursively,
    so only the final selection categories need to be passed to the ``selections``
    keyword argument.

    It is also possible to output a LaTeX yields table, and specify exactly which
    selections and row or column headers are used.
    Then the :py:class:`~bamboo.plots.CutFlowReport` should be constructed like this:

    .. code-block:: python

       yields = CutFlowReport("yields")
       plots.append(yields)
       yields.add(<selection1-or-list-of-selections1>, title=title1)
       yields.add(<selection2-or-list-of-selections2>, title=title2)
       ...

    Each ``yields.add`` call will then add one entry in the yields table,
    with the yield the one of the corresponding selection, or the sum over
    the list (e.g. different categories that should be taken together);
    the other dimension are the samples (or sample groups).
    The sample (group) titles and formatting of the table can be
    customised in the same way as in plotIt, see
    :py:func:`~bamboo.analysisutils.printCutFlowReports`
    for a detailed description of the different options.
    """
    class Entry:  # counters for one selection
        def __init__(self, name, nominal=None, systVars=None, parent=None, children=None):
            self.name = name
            self.nominal = nominal
            self.systVars = systVars or dict()
            self.parent = parent
            self.children = list(children) if children is not None else []

        def _load(self, tmpF):
            if isinstance(self.nominal, str):
                self.nominal = tmpF.Get(self.nominal)
                self.systVars = {svNm: tmpF.Get(svV) for svNm, svV in self.systVars}
            return self

        def setParent(self, parent):
            self.parent = parent
            if self not in parent.children:
                parent.children.append(self)


[docs]
    def __init__(
            self, name, selections=None, recursive=False, titles=None, autoSyst=False,
            cfres=None, printInLog=False):
        """
        Constructor. ``name`` is mandatory, all other are optional; for full control
        the :py:meth:`~bamboo.plots.CutFlowReport.add` should be used to add entries.

        Using the constructor with a list of :py:class:`~bamboo.plots.Selection`
        instances passed to the ``selections`` keyword argument, and ``recursive=True, printInLog=True``
        is the easiest way to get debugging printout of the numbers of passing events.
        """
        super().__init__(name)
        self.recursive = recursive
        if selections is None:
            self.selections = []
        else:
            self.selections = list(selections) if hasattr(selections, "__iter__") else [selections]
        self.titles = defaultdict(list)
        if titles is not None:
            self.titles = titles
        elif self.selections:
            self.titles.update({sel.name: sel.name for sel in self.selections})
        self.autoSyst = autoSyst
        self.cfres = cfres if cfres is not None else defaultdict(list)
        if self.selections and cfres is None:
            self._register()
        self.printInLog = printInLog


    def _register(self, selections=None):
        if selections is None:
            selections = self.selections
        aSelection = selections[0]
        selections = {sel.name: sel for sel in selections}
        if self.recursive:
            for sel in list(selections.values()):
                isel = sel.parent
                while isel is not None and isel.name not in selections:
                    selections[isel.name] = isel
        aSelection.registerCutFlowReport(self, selections, autoSyst=self.autoSyst)
        sels_per_sub = defaultdict(dict)
        for selName, sel in selections.items():
            if isinstance(sel, SelectionWithSub):
                sel.initSub()
                for suffix, subSel in sel.sub.items():
                    if subSel is not None:
                        sels_per_sub[suffix][selName] = subSel
        for suffix, subSels in sels_per_sub.items():
            logger.debug(f"Registering counters for {suffix} part of {', '.join(subSels.keys())}")
            aSelection.registerCutFlowReport(
                self, subSels, key=(self.name, suffix), autoSyst=self.autoSyst)


[docs]
    def add(self, selections, title=None):
        """ Add an entry to the yields table, with a title (optional) """
        if not hasattr(selections, "__iter__"):
            selections = [selections]
        selections = [sel for sel in selections if sel not in self.selections]
        if title is not None:
            self.titles[title] += [sel.name for sel in selections]
        else:
            self.titles.update({sel.name: sel.name for sel in selections})
        if selections:
            self.selections += selections
            self._register(selections)



[docs]
    def produceResults(self, bareResults, fbe, key=None):
        entries = list()
        for iEn in self.cfres[key if key else self.name]:  # self.cfres was set by addCutFlowReport
            while iEn is not None and iEn not in entries:
                entries.append(iEn)
                iEn = iEn.parent
        return ([res.nominal.product for res in entries]
                + [v.product for res in entries for v in res.systVars.values()])


    def rootEntries(self):
        # helper: traverse reports tree up
        def travUp(entry):
            yield entry
            yield from travUp(entry.parent)
        return {next(en for en in travUp(res) if en.parent is None)
                for lres in self.cfres.values() for res in lres}


[docs]
    def readFromResults(self, resultsFile):
        """ Reconstruct the :py:class:`~bamboo.plots.CutFlowReport`, reading counters from a results file """
        cfres = []
        entries = {}  # by selection name
        for sel in self.selections:
            if sel.name not in entries:
                entries[sel.name] = CutFlowReport.Entry(sel.name)
                if self.recursive:
                    isel = sel.parent
                    entry_d = entries[sel.name]
                    while isel is not None:
                        if isel.name in entries:
                            entry_p = entries[isel.name]
                            entry_d.setParent(entry_p)
                            break
                        entry_p = CutFlowReport.Entry(isel.name)
                        entries[isel.name] = entry_p
                        entry_d.setParent(entry_p)
                        entry_d = entry_p
                        isel = isel.parent
            cfres.append(entries[sel.name])
        # retrieve nominal
        for selName, entry in entries.items():
            kyNm = f"{self.name}_{selName}"
            obj = resultsFile.Get(kyNm)
            if obj:
                entry.nominal = obj
        # and systematic variations
        prefix = f"{self.name}_"
        for ky in resultsFile.GetListOfKeys():
            if ky.GetName().startswith(prefix):
                selName = ky.GetName().split("__")[0][len(prefix):]
                if selName in entries:
                    entry = entries[selName]
                    cnt = ky.GetName().count("__")
                    if cnt == 1:
                        varNm = ky.GetName().split("__")[1]
                        if varNm in entry.systVars:
                            logger.warning(f"{self.name}: counter for variation {varNm} "
                                           f"already present for selection {selName}")
                        entry.systVars[varNm] = ky.ReadObj()
                    elif cnt > 1:
                        logger.warning("Key {ky.GetName()!r} contains '__' more than once, "
                                       "this will break assumptions")
        return CutFlowReport(
            self.name, self.selections, titles=self.titles, recursive=self.recursive,
            autoSyst=self.autoSyst, cfres={self.name: cfres}, printInLog=self.printInLog)





[docs]
class SelectionWithSub(Selection):
    """
    A common base class for :py:class:`~bamboo.plots.Selection` subclasses
        with related/alternative/sub-:py:class:`~bamboo.plots.Selection` instances attached

    A dictionary of additional selections is kept in the ``sub`` attribute (could be ``None`` to disable).
    """

[docs]
    def __init__(self, parent, name, cuts=None, weights=None, autoSyst=True, sub=None):
        super().__init__(parent, name, cuts=cuts, weights=weights, autoSyst=autoSyst)
        self.sub = sub if sub is not None else dict()



[docs]
    def initSub(self):
        """
        Initialize related selections
            (no-op by default, subclasses can request to call this to enable some functionality)
        """
        pass



[docs]
    @staticmethod
    def getSubsForPlot(p, requireActive=False, silent=False):
        """ Helper method: gather the sub-selections for which a plot is produced """
        subs = set()
        if isinstance(p, Plot):
            if isinstance(p.selection, SelectionWithSub):
                for suff, subSel in p.selection.sub.items():
                    if (not requireActive) or (subSel is not None):
                        subs.add(suff)
        elif isinstance(p, DerivedPlot):
            for dp in p.dependencies:
                if isinstance(dp.selection, SelectionWithSub):
                    for suff, subSel in dp.selection.sub.items():
                        if (not requireActive) or (subSel is not None):
                            subs.add(suff)
        elif isinstance(p, CutFlowReport):
            for sel in p.selections:
                if isinstance(sel, SelectionWithSub):
                    for suff, subSel in sel.sub.items():
                        if (not requireActive) or (subSel is not None):
                            subs.add(suff)
        elif not silent:
            logger.warning(
                f"Unsupported product type for data-driven: {type(p).__name__}, "
                "additional products will not be stored")
        return subs



[docs]
    def refine(self, name, cut=None, weight=None, autoSyst=True):
        main = super().refine(name, cut=cut, weight=weight, autoSyst=autoSyst)
        main.sub = {suff: (
            parent.refine("".join((name, suff)), cut=cut, weight=weight, autoSyst=autoSyst)
            if parent is not None else None) for suff, parent in self.sub.items()}
        return main





[docs]
class SelectionWithDataDriven(SelectionWithSub):
    """
    A main :py:class:`~bamboo.plots.Selection` with the corresponding "shadow"
        :py:class:`~bamboo.plots.Selection` instances for evaluating data-driven backgrounds
        (alternative cuts and/or weights)
    """

[docs]
    @staticmethod
    def create(
            parent, name, ddSuffix, cut=None, weight=None, autoSyst=True,
            ddCut=None, ddWeight=None, ddAutoSyst=True, enable=True):
        """
        Create a selection with a data-driven shadow selection

        Drop-in replacement for a :py:meth:`bamboo.plots.Selection.refine` call:
        the main selection is made from the parent with ``cut`` and ``weight``,
        the shadow selection is made from the parent with ``ddCut`` and ``ddWeight``.
        With ``enable=False`` no shadow selection is made (this may help to avoid
        duplication in the calling code).
        """
        ddName = "".join((name, ddSuffix))
        ddSel = None
        if isinstance(parent, SelectionWithSub):
            if cut is None and weight is None and autoSyst == parent.autoSyst:
                main = parent
            else:
                main = parent.refine(name, cut=cut, weight=weight, autoSyst=autoSyst)
            if enable:
                ddSel = Selection.refine(
                    parent, ddName, cut=ddCut, weight=ddWeight, autoSyst=ddAutoSyst)
        else:  # create from regular Selection
            main = SelectionWithDataDriven(parent, name, cuts=cut, weights=weight, autoSyst=autoSyst)
            if enable:
                ddSel = parent.refine(ddName, cut=ddCut, weight=ddWeight, autoSyst=ddAutoSyst)
        if ddSel is not None:
            logger.debug(f"Adding the data-driven counterpart of {name} for the {ddSuffix} contribution")
        main.sub[ddSuffix] = ddSel
        return main


    def registerPlot(self, plot, **kwargs):
        super().registerPlot(plot, **kwargs)
        for ddSuffix, ddSel in self.sub.items():
            if ddSel is not None:
                # will register and go out of scope
                # (the module has all necessary information to retrieve and process the results;
                # everything by reference, so cheap)
                plot.clone(selection=ddSel, key=(plot.name, ddSuffix), **kwargs)

    # NOTE registerDerived not overridden, since none of the current backends need it



[docs]
class SelectionWithMultipleDataDriven(SelectionWithSub):
    """
    A main :py:class:`~bamboo.plots.Selection` with the corresponding "shadow"
        :py:class:`~bamboo.plots.Selection` instances for evaluating data-driven backgrounds
        (alternative cuts and/or weights)
    """

[docs]
    @staticmethod
    def create(
            parent, name, ddSuffixes, cut=None, weight=None, autoSyst=True,
            ddCuts=None, ddWeights=None, ddAutoSyst=True, enables=True):
        """
        Create a selection with a data-driven shadow selections

        Drop-in replacement for a :py:meth:`bamboo.plots.Selection.refine` call:
        the main selection is made from the parent with ``cut`` and ``weight``,
        the shadow selections are made from the parent with ``ddCuts`` and ``ddWeights``.
        With ``enables=[False]`` no shadow selections are made (this may help to avoid
        duplication in the calling code).

        This class is derived from a :py:meth:`bamboo.plots.SelectionWithDataDriven` class
        and it works in a similar manner. In contrast to the :py:meth:`bamboo.plots.SelectionWithDataDriven`
        class, this class can be used for the selection with multiple data-driven backgrounds
        at the same time. The ``ddCuts``, ``ddWeights``, ``ddSuffixes`` and ``enables``
        need to be lists of ddCuts, ddWeights, ddSuffixes and enables, respectively.
        """

        if len(ddCuts) == len(ddWeights) and len(ddCuts) == len(ddSuffixes) and len(ddCuts) == len(enables):
            pass
        else:
            raise ValueError(
                f"Different number of ddSuffixes({len(ddSuffixes)}), ddCuts({len(ddCuts)}),"
                f" ddWeights({len(ddWeights)}) and enables({len(enables)}).")

        ddNames = []
        for ddSuffix in ddSuffixes:
            ddName = "".join((name, ddSuffix))
            ddNames.append(ddName)
        ddSels = None

        if isinstance(parent, SelectionWithSub):
            if cut is None and weight is None and autoSyst == parent.autoSyst:
                main = parent
            else:
                main = parent.refine(name, cut=cut, weight=weight, autoSyst=autoSyst)
            if True in enables:
                ddSels = []
                for (ddName, ddCut, ddWeight, enable) in zip(ddNames, ddCuts, ddWeights, enables):
                    if enable:
                        ddSel = Selection.refine(
                            parent, ddName, cut=ddCut, weight=ddWeight, autoSyst=ddAutoSyst)
                        ddSels.append(ddSel)
                    else:
                        ddSel = None
                        ddSels.append(ddSel)
        else:  # create from regular Selection
            main = SelectionWithMultipleDataDriven(parent, name, cuts=cut, weights=weight, autoSyst=autoSyst)
            if True in enables:
                ddSels = []
                for (ddName, ddCut, ddWeight, enable) in zip(ddNames, ddCuts, ddWeights, enables):
                    if enable:
                        ddSel = Selection.refine(
                            parent, ddName, cut=ddCut, weight=ddWeight, autoSyst=ddAutoSyst)
                        ddSels.append(ddSel)
                    else:
                        ddSel = None
                        ddSels.append(ddSel)

        if ddSels is not None:
            for (ddSuffix, enable) in zip(ddSuffixes, enables):
                if enable:
                    logger.debug(
                        f"Adding the data-driven counterpart of {name}"
                        f" for the {ddSuffix} contribution")

        for index, ddSuffix in enumerate(ddSuffixes):
            if ddSels is None:
                main.sub[ddSuffix] = None
            else:
                main.sub[ddSuffix] = ddSels[index]

        return main


    def registerPlot(self, plot, **kwargs):
        super().registerPlot(plot, **kwargs)
        for ddSuffix, ddSel in self.sub.items():
            if ddSel is not None:
                # will register and go out of scope
                # (the module has all necessary information to retrieve and process the results;
                # everything by reference, so cheap)
                plot.clone(selection=ddSel, key=(plot.name, ddSuffix), **kwargs)

    # NOTE registerDerived not overridden, since none of the current backends need it



[docs]
class CategorizedSelection:
    """
    Helper class to represent a group of similar selections on different categories

    The interface is similar, but not identical to that of :py:class:`~bamboo.plots.Selection`
    (constructing :py:class:`~bamboo.plots.Plot` objects is done through the
    :py:meth:`~bamboo.plots.CategorizedSelection.makePlots` method,
    which takes additional arguments).
    Each category selection can have a candidate, typically the object
    or group of object that differs between the categories.
    The axis variables can then either be expressions, or callables
    that will be passed this per-category object.

    :Example:

    >>> muonSel = noSel.refine("hasMuon", cut=(
    >>>     op.rng_len(muons) > 0, op.OR(op.rng_len(electrons) == 0,
    >>>     muons[0].pt > electrons[0].pt)))
    >>> electronSel = noSel.refine("hasElectron", cut=(
    >>>     op.rng_len(electrons) > 0, op.OR(op.rng_len(muons) == 0,
    >>>     electrons[0].pt > muons[0].pt)))
    >>> oneLeptonSel = CategorizedSelection(categories={
    ...     "Mu" : (muonSel, muons[0]),
    ...     "El" : (electronSel, electrons[0])
    ...     })
    >>> oneLep2JSel = onLeptonSel.refine("hasLep2J", cut=(op.rng_len(jets) >= 2))
    >>> plots += oneLep2JSel.makePlots("J1PT", jets[0].pt, EqB(50, 0., 150.))
    >>> plots += oneLep2JSel.makePlots("LJ1Mass",
    ...     (lambda l : op.invariant_mass(jets[0].p4, l.p4)), EqB(50, 0., 200.))
    """

[docs]
    def __init__(self, parent=None, categories=None, name=None):
        """
        Construct a group of related selections

        :param name: name (optional)
        :param parent: parent CategorizedSelection (optional)
        :param categories: dictionary of a :py:class:`~bamboo.plots.Selection` and candidate
            (any python object) per category (key is category name),
            see the :py:meth:`~CategorizedSelection.addCategory` method below
        """
        self.name = name
        self.categories = categories
        self.parent = parent



[docs]
    def addCategory(self, catName, selection, candidate=None):
        """
        Add a category

        :param catName: category name
        :param selection: :py:class:`~bamboo.plots.Selection` for this category
        :param candidate: any python object with event-level quantities specific to this category
        """
        if catName in self.categories:
            raise ValueError(f"A category with name {catName} is already present")
        self.categories[catName] = (selection, candidate)



[docs]
    def refine(self, name, cut=None, weight=None, autoSyst=True):
        """
        Equivalent of :py:meth:`~bamboo.plots.Selection.refine`, but for all categories at a time

        :param name: common part of the name for the new category selections
            (individual names will be ``"{name}_{category}``)
        :param cut: cut(s) to add. If callable, the category's candidate will be passed
        :param weight: weight(s) to add. If callable, the category's candidate will be passed
        :param autoSyst: automatically add systematic variations
            (True by default - set to False to turn off; note that this would also turn off
            automatic systematic variations for any selections and plots that derive
            from the one created by this method)

        :returns: the new :py:class:`CategorizedSelection`
        """
        if cut is not None and not hasattr(cut, "__iter__"):
            cut = [cut]
        if weight is not None and not hasattr(weight, "__iter__"):
            weight = [weight]
        newCatsAndCands = {}
        for catName, (catSel, catCand) in self.categories.items():
            catCut = cut
            if cut is not None:
                catCut = [(ict if not callable(ict) else ict(catCand)) for ict in cut]
            catWeight = weight
            if weight is not None:
                catWeight = [(iwt if not callable(iwt) else iwt(catCand)) for iwt in weight]
            newCatsAndCands[catName] = (catSel.refine(
                f"{name}_{catName}", cut=catCut, weight=catWeight, autoSyst=autoSyst), catCand)
        return CategorizedSelection(name=name, parent=self, categories=newCatsAndCands)



[docs]
    def makePlots(
            self, name, axisVariables, binnings, construct=None, savePerCategory=True,
            saveCombined=True, combinedPlotType=SummedPlot, **kwargs):
        """
        Make a plot for all categories, and/or a combined one

        :param name: plot name (per-category plot names will be ``"{name}_{category}"``)
        :param axisVariables: one or more axis variables
        :param binnings: as many binnings as variables
        :param construct: plot factory method, by default the ``make{N}D`` method of
            :py:class:`~bamboo.plots.Plot` (with N the number of axis variables)
        :param savePerCategory: save the individual plots (enabled by default)
        :param saveCombine: save the combined plot (enabled by default)
        :param combinedPlotType: combined plot type, :py:class:`~bamboo.plots.SummedPlot` by default

        :returns: a list of plots
        """
        if (not savePerCategory) and (not saveCombined):
            return []  # no need to make plots then :-)
        if not hasattr(axisVariables, "__iter__"):
            axisVariables = [axisVariables]
        if construct is None:  # default: use `Plot.makeND`
            construct = getattr(Plot, f"make{len(axisVariables):d}D")
        catPlots = []
        for catName, (catSel, catCand) in self.categories.items():
            variables = [(iVar if not callable(iVar) else iVar(catCand)) for iVar in axisVariables]
            if len(variables) == 1:
                variables = variables[0]
            catPlots.append(construct(f"{name}_{catName}", variables, catSel, binnings, **kwargs))
        if not saveCombined:
            return catPlots
        else:
            return catPlots + [combinedPlotType(name, catPlots)]





[docs]
class LateSplittingSelection(SelectionWithSub):
    """
    A drop-in replacement for :py:class:`~bamboo.plots.Selection` to efficiently split a sample

    The concept is quite similar to :py:class:`~bamboo.plots.SelectionWithDataDriven`,
    but with very different performance trade-offs: the former creates two parallel branches of
    the RDF graph, each for their own set of events (with a typically small performance
    overhead due to dupliation), whereas this is for cases where all events should be processed
    identically until they are filled into histograms (e.g. separating subprocesses based on
    MC truth). It is worth defining columns with these categories early on, such that the splitting
    does not need to do it many times for different selections and categories.
    """

[docs]
    def __init__(self, parent, name, cuts=None, weights=None, autoSyst=True, keepInclusive=None):
        super().__init__(parent, name, cuts=cuts, weights=weights, autoSyst=autoSyst)
        self.splitCuts = {}
        if isinstance(parent, LateSplittingSelection):
            self.splitCuts = parent.splitCuts
        if keepInclusive is not None:
            self.keepInclusive = keepInclusive
        elif isinstance(parent, LateSplittingSelection):
            self.keepInclusive = parent.keepInclusive
        else:
            self.keepInclusive = True



[docs]
    @staticmethod
    def create(parent, name, splitCuts=None, keepInclusive=True, cut=None, weight=None, autoSyst=True):
        """
        Create a selection that will lazily split into categories

        :param name: name of the new selection (after applying the cut and weight,
            as in :py:meth:`bamboo.plots.Selection.refine`)
        :param splitCuts: dictionary of regions, the values should be the cuts that define the region
        :param keepInclusive: also produce the plots without splitting
        :param cut: common selection
        :param weight: common weight
        :param autoSyst: automatically propagate systematic uncertainties
        """
        if isinstance(parent, SelectionWithSub):
            raise RuntimeError(
                "LateSplittingSelection must be constructed before any SelectionWithSub, "
                "and extending is not supported")
        lsSel = LateSplittingSelection(
            parent, name, cuts=cut, weights=weight, autoSyst=autoSyst, keepInclusive=keepInclusive)
        lsSel.splitCuts = splitCuts if splitCuts is not None else {}
        return lsSel



[docs]
    def initSub(self):
        """
        Initialize related selections, should be called before registering non-plot products
            (anything not going through registerPlot)
        """
        if self.splitCuts and any(suff not in self.sub for suff in self.splitCuts):
            self.sub.update({
                suff: Selection(self, f"{self.name}{suff}", cuts=cut)
                for suff, cut in self.splitCuts.items()
                if suff not in self.sub
            })


    def registerPlot(self, plot, **kwargs):
        # always still produce the original one (it helps with having all variables defined)
        if self.keepInclusive:
            super().registerPlot(plot, **kwargs)
        if self.splitCuts:
            # here is the trick: per-category selections are only created once we attach a plot
            self.initSub()
            for suff, sSel in self.sub.items():
                if sSel is not None:
                    plot.clone(selection=sSel, key=(plot.name, suff), **kwargs)




[docs]
class Skim(Product):
    """
    Save selected branches for events that pass the selection to a skimmed tree
    """
    KeepAll = object()

    class KeepRegex:
        def __init__(self, pattern):
            self.pattern = pattern


[docs]
    def __init__(
            self, name, branches, selection, keepOriginal=None,
            maxSelected=-1, compressionLevel=1, compressionAlgo=None, treeName=None, key=None):
        """
        Skim constructor

        :param name: name of the skim (also default name of the TTree)
        :param branches: dictionary of branches to keep (name and definition for new branches,
            or name and ``None`` for specific branches from the input tree)
        :param selection: :py:class:`~bamboo.plots.Selection` of events to save
        :param keepOriginal: list of branch names to keep, :py:obj:`bamboo.plots.Skim.KeepRegex`
            instances with patterns of branch names to keep, or :py:obj:`bamboo.plots.Skim.KeepAll`
            to keep all branches from the input tree
        :param maxSelected: maximal number of events to keep (default: no limit)
        :param compressionLevel: compression level of output file (default: 1)
        :param compressionAlgo: compression algorithm of output file (default: ZLIB)

        :Example:

        >>> plots.append(Skim("dimuSkim", {
        >>>     "run": None,  # copy from input
        >>>     "luminosityBlock": None,
        >>>     "event": None,
        >>>     "dimu_m": op.invariant_mass(muons[0].p4, muons[1].p4),
        >>>     "mu1_pt": muons[0].pt,
        >>>     "mu2_pt": muons[1].pt,
        >>>     }, twoMuSel,
        >>>     keepOriginal=[
        >>>         Skim.KeepRegex("PV_.*"),
        >>>         "nOtherPV",
        >>>         Skim.KeepRegex("OtherPV_.*")
        >>>         ])
        """
        super().__init__(name, key=key)
        self.definedBranches = {k: v for k, v in branches.items() if v is not None}
        self.originalBranches = [k for k, v in branches.items() if v is None]
        if keepOriginal:
            from collections.abc import Iterable
            if isinstance(keepOriginal, Iterable) and not isinstance(keepOriginal, str):
                self.originalBranches.extend(keepOriginal)
            else:
                self.originalBranches.append(keepOriginal)
        self.selection = selection
        self.maxSelected = maxSelected
        self.compressionLevel = compressionLevel
        self.compressionAlgo = compressionAlgo
        self.treeName = treeName or name
        selection.registerSkim(self)



[docs]
    def produceResults(self, bareResults, fbe, key=None):
        return bareResults