Skip to content

Address interpolation overhead for 2D and ND data#84

Merged
Radonirinaunimi merged 7 commits into
masterfrom
speedup
Feb 11, 2026
Merged

Address interpolation overhead for 2D and ND data#84
Radonirinaunimi merged 7 commits into
masterfrom
speedup

Conversation

@Radonirinaunimi
Copy link
Copy Markdown
Member

@Radonirinaunimi Radonirinaunimi commented Feb 11, 2026

The following PR directly addresses #83.

It does so by removing unnecessary heap allocations and modifying the PID lookup with a flat array. For multiple PIDs computations, it pre-computes and interleaves the coefficients of the interpolation so that they can be re-used multiple times.

Using the following benchmark for a single-point interpolation:

#include <LHAPDF/PDF.h>
#include <neopdf_capi.h>
#include <chrono>
#include <cmath>
#include <cstdlib>
#include <iomanip>
#include <iostream>

int main() {
    LHAPDF::setVerbosity(0);

    std::string pdfname = "NNPDF40_nnlo_as_01180";
    NeoPDFWrapper* neo_pdf = neopdf_pdf_load(pdfname.c_str(), 0);
    auto lha_pdf = std::unique_ptr<LHAPDF::PDF>(LHAPDF::mkPDF(pdfname, 0));

    const int pid = 21;
    const double x = 1e-3;
    const double q2 = 4.0;
    const int N = 10000000;

    // Warm-up both libraries
    for (int w = 0; w < 1000; ++w) {
        volatile double r1 = lha_pdf->xfxQ2(pid, x, q2);
        volatile double r2 = neopdf_pdf_xfxq2(neo_pdf, pid, x, q2);
        (void)r1; (void)r2;
    }

    // Benchmark LHAPDF
    volatile double sink = 0.0;
    auto t0 = std::chrono::high_resolution_clock::now();
    for (int i = 0; i < N; ++i) {
        sink = lha_pdf->xfxQ2(pid, x, q2);
    }
    auto t1 = std::chrono::high_resolution_clock::now();

    // Benchmark NeoPDF
    auto t2 = std::chrono::high_resolution_clock::now();
    for (int i = 0; i < N; ++i) {
        sink = neopdf_pdf_xfxq2(neo_pdf, pid, x, q2);
    }
    auto t3 = std::chrono::high_resolution_clock::now();

    double lhapdf_ns = std::chrono::duration<double, std::nano>(t1 - t0).count() / N;
    double neopdf_ns = std::chrono::duration<double, std::nano>(t3 - t2).count() / N;

    std::cout << std::fixed << std::setprecision(1);
    std::cout << "=== Single-point xfxQ2 benchmark ===\n";
    std::cout << "PDF:        " << pdfname << "\n";
    std::cout << "pid=" << pid
              << "  x=" << std::scientific << std::setprecision(1) << x
              << "  Q2=" << q2 << "\n";
    std::cout << "Repeats:    " << N << "\n\n";
    std::cout << std::fixed << std::setprecision(1);
    std::cout << "LHAPDF:     " << lhapdf_ns << " ns/call\n";
    std::cout << "NeoPDF:     " << neopdf_ns << " ns/call\n";
    std::cout << "Ratio:      " << neopdf_ns / lhapdf_ns << "x\n";

    (void)sink;
    neopdf_pdf_free(neo_pdf);
    return EXIT_SUCCESS;
}

yields the following results:

=== Single-point xfxQ2 benchmark ===
PDF:        NNPDF40_nnlo_as_01180
pid=21  x=1.0e-03  Q2=4.0e+00
Repeats:    10000000

LHAPDF:     33.7 ns/call
NeoPDF:     27.7 ns/call
Ratio:      0.8x

While using the following multiple-PIDs interpolation:

#include <LHAPDF/PDF.h>
#include <neopdf_capi.h>
#include <chrono>
#include <cmath>
#include <cstdlib>
#include <iomanip>
#include <iostream>
#include <vector>

int main() {
    LHAPDF::setVerbosity(0);

    std::string pdfname = "NNPDF40_nnlo_as_01180";

    // LHAPDF
    auto lha_pdf = std::unique_ptr<LHAPDF::PDF>(LHAPDF::mkPDF(pdfname, 0));

    // NeoPDF (C compatibility layer)
    initpdfsetbyname(pdfname.c_str());
    initpdf(0);

    const double x = 1e-3;
    const double q = 2.0;       // evolvepdf takes Q, not Q2
    const double q2 = q * q;
    const int N = 10000000;

    // Warm-up both libraries
    std::vector<double> lha_xfs(13);
    double neo_xfs[14];
    for (int w = 0; w < 1000; ++w) {
        lha_pdf->xfxQ2(x, q2, lha_xfs);
        evolvepdf(x, q, neo_xfs);
    }

    // Benchmark LHAPDF: xfxQ2(x, Q2, vector) — all flavors at once
    volatile double sink = 0.0;
    auto t0 = std::chrono::high_resolution_clock::now();
    for (int i = 0; i < N; ++i) {
        lha_pdf->xfxQ2(x, q2, lha_xfs);
        sink = lha_xfs[6]; // gluon
    }
    auto t1 = std::chrono::high_resolution_clock::now();

    // Benchmark NeoPDF: evolvepdf(x, Q, xfxs) — all flavors at once
    auto t2 = std::chrono::high_resolution_clock::now();
    for (int i = 0; i < N; ++i) {
        evolvepdf(x, q, neo_xfs);
        sink = neo_xfs[6]; // gluon
    }
    auto t3 = std::chrono::high_resolution_clock::now();

    double lhapdf_ns = std::chrono::duration<double, std::nano>(t1 - t0).count() / N;
    double neopdf_ns = std::chrono::duration<double, std::nano>(t3 - t2).count() / N;

    std::cout << std::fixed << std::setprecision(1);
    std::cout << "=== All-flavor evolvepdf benchmark ===\n";
    std::cout << "PDF:        " << pdfname << "\n";
    std::cout << "x=" << std::scientific << std::setprecision(1) << x
              << "  Q=" << q << "  Q2=" << q2 << "\n";
    std::cout << "Repeats:    " << N << "\n\n";
    std::cout << std::fixed << std::setprecision(1);
    std::cout << "LHAPDF:     " << lhapdf_ns << " ns/call  (xfxQ2 -> vector)\n";
    std::cout << "NeoPDF:     " << neopdf_ns << " ns/call  (evolvepdf)\n";
    std::cout << "Ratio:      " << neopdf_ns / lhapdf_ns << "x\n";

    (void)sink;
    return EXIT_SUCCESS;
}

yields the following benchmark results:

=== All-flavor evolvepdf benchmark ===
PDF:        NNPDF40_nnlo_as_01180
x=1.0e-03  Q=2.0e+00  Q2=4.0e+00
Repeats:    10000000

LHAPDF:     101.7 ns/call  (xfxQ2 -> vector)
NeoPDF:     91.4 ns/call  (evolvepdf)
Ratio:      0.9x

@Radonirinaunimi Radonirinaunimi changed the title Address interpolation overhead for 2D data Address interpolation overhead for 2D and ND data Feb 11, 2026
@Radonirinaunimi Radonirinaunimi merged commit 66b0c9a into master Feb 11, 2026
14 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant