Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions roofit/batchcompute/res/RooBatchCompute.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,12 @@ class Config {
void setCudaStream(CudaInterface::CudaStream *cudaStream) { _cudaStream = cudaStream; }
CudaInterface::CudaStream *cudaStream() const { return _cudaStream; }

bool takeLog() const { return _takeLog; }
void setTakeLog(bool takeLog) { _takeLog = takeLog; }

private:
CudaInterface::CudaStream *_cudaStream = nullptr;
bool _takeLog = false;
};

enum class Architecture {
Expand Down Expand Up @@ -90,6 +94,7 @@ enum Computer {
Gamma,
GaussModelExpBasis,
Gaussian,
LogGaussian,
Identity,
Johnson,
Landau,
Expand Down
13 changes: 13 additions & 0 deletions roofit/batchcompute/src/ComputeFunctions.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,18 @@ __rooglobal__ void computeGaussian(Batches &batches)
}
}

__rooglobal__ void computeLogGaussian(Batches &batches)
{
auto x = batches.args[0];
auto mean = batches.args[1];
auto sigma = batches.args[2];
for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
const double arg = x[i] - mean[i];
const double halfBySigmaSq = -0.5 / (sigma[i] * sigma[i]);
batches.output[i] = arg * arg * halfBySigmaSq;
}
}

__rooglobal__ void computeIdentity(Batches &batches)
{
for (size_t i = BEGIN; i < batches.nEvents; i += STEP) {
Expand Down Expand Up @@ -938,6 +950,7 @@ std::vector<void (*)(Batches &)> getFunctions()
computeGamma,
computeGaussModelExpBasis,
computeGaussian,
computeLogGaussian,
computeIdentity,
computeJohnson,
computeLandau,
Expand Down
1 change: 1 addition & 0 deletions roofit/roofit/inc/RooGaussian.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ class RooGaussian : public RooAbsPdf {

double evaluate() const override;
void doEval(RooFit::EvalContext &) const override;
bool canOptimizeLogarithm() const override { return true; }
inline bool canComputeBatchWithCuda() const override { return true; }

private:
Expand Down
20 changes: 15 additions & 5 deletions roofit/roofit/src/RooGaussian.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,6 @@ Plain Gaussian p.d.f

#include <RooFit/Detail/MathFuncs.h>

#include <vector>


////////////////////////////////////////////////////////////////////////////////

Expand Down Expand Up @@ -61,10 +59,22 @@ double RooGaussian::evaluate() const

////////////////////////////////////////////////////////////////////////////////
/// Compute multiple values of Gaussian distribution.
void RooGaussian::doEval(RooFit::EvalContext & ctx) const
void RooGaussian::doEval(RooFit::EvalContext &ctx) const
{
RooBatchCompute::compute(ctx.config(this), RooBatchCompute::Gaussian, ctx.output(),
{ctx.at(x), ctx.at(mean), ctx.at(sigma)});
if (ctx.config(this).takeLog()) {
auto output = ctx.output();
if (output.size() == 1) {
// If the ouput size is just one, which is common for constraints,
// calling into RooBatchCompute is not worth its overhead.
output[0] = RooFit::Detail::MathFuncs::logGaussian(ctx.at(x)[0], ctx.at(mean)[0], ctx.at(sigma)[0]);
} else {
RooBatchCompute::compute(ctx.config(this), RooBatchCompute::LogGaussian, output,
{ctx.at(x), ctx.at(mean), ctx.at(sigma)});
}
return;
}
RooBatchCompute::compute(ctx.config(this), RooBatchCompute::Gaussian, ctx.output(),
{ctx.at(x), ctx.at(mean), ctx.at(sigma)});
}

////////////////////////////////////////////////////////////////////////////////
Expand Down
4 changes: 3 additions & 1 deletion roofit/roofitcore/inc/RooAbsArg.h
Original file line number Diff line number Diff line change
Expand Up @@ -510,8 +510,10 @@ class RooAbsArg : public TNamed, public RooPrintable {
return false;
};

virtual bool canComputeBatchWithCuda() const { return false; }
/// Information to expose to the RooFit::Evaluator for optimized evaluation:
virtual bool canOptimizeLogarithm() const { return false; }
virtual bool isReducerNode() const { return false; }
virtual bool canComputeBatchWithCuda() const { return false; }

virtual void applyWeightSquared(bool flag);

Expand Down
12 changes: 9 additions & 3 deletions roofit/roofitcore/inc/RooFit/Detail/MathFuncs.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,18 @@ double bernstein(double x, double xmin, double xmax, DoubleArray coefs, int nCoe
return result;
}

/// @brief Function to evaluate an un-normalized RooGaussian.
inline double gaussian(double x, double mean, double sigma)
/// Evaluate the logarithm of an un-normalized Gaussian.
inline double logGaussian(double x, double mean, double sigma)
{
const double arg = x - mean;
const double sig = sigma;
return std::exp(-0.5 * arg * arg / (sig * sig));
return -0.5 * arg * arg / (sig * sig);
}

/// @brief Function to evaluate an un-normalized Gaussian.
inline double gaussian(double x, double mean, double sigma)
{
return std::exp(logGaussian(x, mean, sigma));
}

template <typename DoubleArray>
Expand Down
7 changes: 6 additions & 1 deletion roofit/roofitcore/inc/RooFit/Detail/RooNLLVarNew.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,11 @@ class RooNLLVarNew : public RooAbsReal {

void enableBinOffsetting(bool on = true) { _doBinOffset = on; }

void setSimCount(int simCount) { _simCount = simCount; }
void setSimCount(int simCount)
{
_simCount = simCount;
_logSimCount = std::log(static_cast<double>(simCount));
}

RooAbsPdf const &pdf() const { return *_pdf; }
RooAbsReal const &weightVar() const { return *_weightVar; }
Expand All @@ -78,6 +82,7 @@ class RooNLLVarNew : public RooAbsReal {
bool _doOffset = false;
bool _doBinOffset = false;
int _simCount = 1;
double _logSimCount = 0.;
std::string _prefix;
std::vector<double> _binw;
mutable ROOT::Math::KahanSum<double> _offset{0.}; ///<! Offset as KahanSum to avoid loss of precision
Expand Down
1 change: 1 addition & 0 deletions roofit/roofitcore/inc/RooFit/Detail/RooNormalizedPdf.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ class RooNormalizedPdf : public RooAbsPdf {
return _pdf->createExpectedEventsFunc(&_normSet);
}

bool canOptimizeLogarithm() const override { return true; }
bool canComputeBatchWithCuda() const override { return true; }

RooAbsPdf const &pdf() const { return *_pdf; }
Expand Down
3 changes: 3 additions & 0 deletions roofit/roofitcore/inc/RooRealIntegral.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ class RooRealIntegral : public RooAbsReal {
double evaluate() const override ;
bool isValidReal(double value, bool printError=false) const override ;

void doEval(RooFit::EvalContext &) const override;
bool canOptimizeLogarithm() const override { return true; }

bool redirectServersHook(const RooAbsCollection& newServerList,
bool mustReplaceAll, bool nameChange, bool isRecursive) override ;

Expand Down
8 changes: 7 additions & 1 deletion roofit/roofitcore/src/RooConstraintSum.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ arguments.


#include "RooConstraintSum.h"
#include "RooBatchCompute.h"
#include "RooAbsData.h"
#include "RooAbsReal.h"
#include "RooAbsPdf.h"
Expand Down Expand Up @@ -78,12 +79,17 @@ double RooConstraintSum::evaluate() const
return sum;
}

/// Evaluate with the vectorizing CPU backend.
void RooConstraintSum::doEval(RooFit::EvalContext &ctx) const
{
double sum(0);

for (const auto comp : _set1) {
sum -= std::log(ctx.at(comp)[0]);
// We only need to take the logarithm if the server didn't do it already:
if (!ctx.config(comp).takeLog())
sum -= std::log(ctx.at(comp)[0]);
else
sum -= ctx.at(comp)[0];
}

ctx.output()[0] = sum;
Expand Down
52 changes: 51 additions & 1 deletion roofit/roofitcore/src/RooFit/Evaluator.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,12 @@ RooAbsPdf::fitTo() is called and gets destroyed when the fitting ends.
#include <RooAbsCategory.h>
#include <RooAbsData.h>
#include <RooAbsReal.h>
#include <RooRealVar.h>
#include <RooBatchCompute.h>
#include <RooConstraintSum.h>
#include <RooFit/Detail/RooNormalizedPdf.h>
#include <RooMsgService.h>
#include <RooNameReg.h>
#include <RooRealVar.h>
#include <RooSimultaneous.h>

#include <RooBatchCompute.h>
Expand Down Expand Up @@ -220,6 +222,54 @@ Evaluator::Evaluator(const RooAbsReal &absReal, bool useGPU)

syncDataTokens();

// This is an optimization for Gaussian constraint terms, of which there are
// many in big fits. To Avoid taking the the exponential and then the
// logarithm for the constraint term, we request that all normalized pdfs
// that have only the RooConstraintSum as a value client are evaluating
// their logarithms, which the RooConstrantSum::doEval() then assumes.
// Note that we don't need to consider the GPU evaluation path here, because
// scalar terms like the constraints are always evaluated on the CPU.
for (NodeInfo &info : _nodes) {
if (!dynamic_cast<RooConstraintSum const *>(info.absArg)) {
continue;
}
for (RooAbsArg *server : info.absArg->servers()) {
if (!server->canOptimizeLogarithm()) {
continue;
}
// If the pdf has clients other than the RooConstraintSum, we can't do
// the log optimization.
if (server->clients().size() != 1) {
continue;
}
RooBatchCompute::Config cfg;
cfg.setTakeLog(true);
if (auto normPdf = dynamic_cast<RooFit::Detail::RooNormalizedPdf const *>(server)) {
// If this is a normalized pdf, we also request the log of the pdf
// value and normalization integral.
// Disable the optimization if the computation graph doesn't have
// the expected structure, for safety. The unnormalized pdf has two
// clients: the integral, and the RooNormalizedPdf.
int clientsLogOptimized = 0;
if (normPdf->pdf().canOptimizeLogarithm() && normPdf->pdf().clients().size() == 2) {
_evalContextCPU.setConfig(&normPdf->pdf(), cfg);
clientsLogOptimized++;
}
if (normPdf->normIntegral().canOptimizeLogarithm() && normPdf->normIntegral().clients().size() == 1) {
_evalContextCPU.setConfig(&normPdf->normIntegral(), cfg);
clientsLogOptimized++;
}
// If the servers can't be optimized, no gain in optimizing the
// normalized pdf.
if (clientsLogOptimized == 2) {
_evalContextCPU.setConfig(server, cfg);
}
} else {
_evalContextCPU.setConfig(server, cfg);
}
}
}

if (_useGPU) {
// create events and streams for every node
for (auto &info : _nodes) {
Expand Down
3 changes: 2 additions & 1 deletion roofit/roofitcore/src/RooNLLVarNew.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ RooNLLVarNew::RooNLLVarNew(const RooNLLVarNew &other, const char *name)
_binnedL{other._binnedL},
_doOffset{other._doOffset},
_simCount{other._simCount},
_logSimCount{other._logSimCount},
_prefix{other._prefix},
_binw{other._binw}
{
Expand Down Expand Up @@ -320,7 +321,7 @@ void RooNLLVarNew::finalizeResult(RooFit::EvalContext &ctx, ROOT::Math::KahanSum
// number of simultaneous PDFs: -sum(log(p/n)) = -sum(log(p)) + N*log(n)
// If we do bin-by bin offsetting, we don't do this because it cancels out
if (!_doBinOffset && _simCount > 1) {
result += weightSum * std::log(static_cast<double>(_simCount));
result += weightSum * _logSimCount;
}

// Check if value offset flag is set.
Expand Down
10 changes: 10 additions & 0 deletions roofit/roofitcore/src/RooNormalizedPdf.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,16 @@ void RooNormalizedPdf::doEval(RooFit::EvalContext &ctx) const
auto nums = ctx.at(_pdf);
auto integralSpan = ctx.at(_normIntegral);

if (ctx.config(this).takeLog()) {
auto output = ctx.output();
for (std::size_t i = 0; i < output.size(); ++i) {
double num = nums.size() > 1 ? nums[i] : nums[0];
double den = integralSpan.size() > 1 ? integralSpan[i] : integralSpan[0];
output[i] = num - den;
}
return;
}

// We use the extraArgs as output parameter to count evaluation errors.
std::array<double, 3> extraArgs{0.0, 0.0, 0.0};

Expand Down
12 changes: 12 additions & 0 deletions roofit/roofitcore/src/RooRealIntegral.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ integration is performed in the various implementations of the RooAbsIntegrator
#include <RooSuperCategory.h>
#include <RooTrace.h>
#include <RooFitImplHelpers.h>
#include <RooBatchCompute.h>

#include <iostream>
#include <memory>
Expand Down Expand Up @@ -1111,6 +1112,17 @@ Int_t RooRealIntegral::getCacheAllNumeric()
return _cacheAllNDim;
}

void RooRealIntegral::doEval(RooFit::EvalContext &ctx) const
{
RooAbsReal::doEval(ctx);
if (ctx.config(this).takeLog()) {
auto output = ctx.output();
for (std::size_t i = 0; i < output.size(); ++i) {
output[i] = std::log(output[i]);
}
}
}

std::unique_ptr<RooAbsArg>
RooRealIntegral::compileForNormSet(RooArgSet const &normSet, RooFit::Detail::CompileContext &ctx) const
{
Expand Down
Loading