NVIDIA · aliceb-nv · Apr 10, 2026 · Apr 9, 2026 · Apr 14, 2026 · Apr 14, 2026
@@ -9,6 +9,7 @@
 #include "diversity_manager.cuh"
 
 #include <mip_heuristics/mip_constants.hpp>
+#include <mip_heuristics/presolve/third_party_presolve.hpp>
 
 #include <mip_heuristics/presolve/conflict_graph/clique_table.cuh>
 #include <mip_heuristics/presolve/probing_cache.cuh>
@@ -182,9 +183,33 @@ void diversity_manager_t<i_t, f_t>::add_user_given_solutions(
   std::vector<solution_t<i_t, f_t>>& initial_sol_vector)
 {
   raft::common::nvtx::range fun_scope("add_user_given_solutions");
-  for (const auto& init_sol : context.settings.initial_solutions) {
+  const bool has_papilo   = problem_ptr->has_papilo_presolve_data();
+  const i_t papilo_orig_n = problem_ptr->get_papilo_original_num_variables();
+  for (size_t sol_idx = 0; sol_idx < context.settings.initial_solutions.size(); ++sol_idx) {
+    const auto& init_sol = context.settings.initial_solutions[sol_idx];
     solution_t<i_t, f_t> sol(*problem_ptr);
     rmm::device_uvector<f_t> init_sol_assignment(*init_sol, sol.handle_ptr->get_stream());
+
+    if (has_papilo) {
+      if ((i_t)init_sol_assignment.size() != papilo_orig_n) {
+        CUOPT_LOG_ERROR(
+          "Error cannot add the provided initial solution! Initial solution %zu has %zu vars, "
+          "expected %d; skipping",
+          sol_idx,
+          init_sol_assignment.size(),
+          papilo_orig_n);
+        continue;
+      }
+      std::vector<f_t> h_original = host_copy(init_sol_assignment, sol.handle_ptr->get_stream());
+      std::vector<f_t> h_crushed;
+      problem_ptr->presolve_data.papilo_presolve_ptr->crush_primal_solution(h_original, h_crushed);
+      init_sol_assignment = cuopt::device_copy(h_crushed, sol.handle_ptr->get_stream());
+      CUOPT_LOG_DEBUG("Crushed initial solution %d through Papilo (%d -> %d vars)",
+                      sol_idx,
+                      papilo_orig_n,
+                      h_crushed.size());
+    }
+
     if (problem_ptr->pre_process_assignment(init_sol_assignment)) {
       relaxed_lp_settings_t lp_settings;
       lp_settings.time_limit            = std::min(60., timer.remaining_time() / 2);
@@ -202,10 +227,10 @@ void diversity_manager_t<i_t, f_t>::add_user_given_solutions(
                  sol.handle_ptr->get_stream());
       bool is_feasible = sol.compute_feasibility();
       cuopt_func_call(sol.test_variable_bounds(true));
-      CUOPT_LOG_INFO("Adding initial solution success! feas %d objective %f excess %f",
-                     is_feasible,
-                     sol.get_user_objective(),
-                     sol.get_total_excess());
+      CUOPT_LOG_DEBUG("Adding initial solution success! feas %d objective %f excess %f",
+                      is_feasible,
+                      sol.get_user_objective(),
+                      sol.get_total_excess());
       population.run_solution_callbacks(sol);
       initial_sol_vector.emplace_back(std::move(sol));
     } else {
@@ -418,6 +443,9 @@ solution_t<i_t, f_t> diversity_manager_t<i_t, f_t>::run_solver()
     CUOPT_LOG_INFO("GPU heuristics disabled via CUOPT_DISABLE_GPU_HEURISTICS=1");
     population.initialize_population();
     population.allocate_solutions();
+    add_user_given_solutions(initial_sol_vector);
+    population.add_solutions_from_vec(std::move(initial_sol_vector));
+    if (check_b_b_preemption()) { return population.best_feasible(); }
 
     while (!check_b_b_preemption()) {
       std::this_thread::sleep_for(std::chrono::milliseconds(100));
@@ -448,8 +476,8 @@ solution_t<i_t, f_t> diversity_manager_t<i_t, f_t>::run_solver()
     "The problem must not be ii");
   population.initialize_population();
   population.allocate_solutions();
-  if (check_b_b_preemption()) { return population.best_feasible(); }
   add_user_given_solutions(initial_sol_vector);
+  if (check_b_b_preemption()) { return population.best_feasible(); }
-  if (check_b_b_preemption()) { return population.best_feasible(); }
+  add_user_given_solutions(initial_sol_vector);
+  if (check_b_b_preemption()) {
+    population.add_solutions_from_vec(std::move(initial_sol_vector));
+    return population.best_feasible();
+  }
-  if (check_b_b_preemption()) { return population.best_feasible(); }
+  add_user_given_solutions(initial_sol_vector);
+  if (check_b_b_preemption()) {
+    population.add_solutions_from_vec(std::move(initial_sol_vector));
+    return population.best_feasible();
+  }
   // Run CPUFJ early to find quick initial solutions
   ls_cpufj_raii_guard_t ls_cpufj_raii_guard(ls);  // RAII to stop cpufj threads on solve stop
   ls.start_cpufj_scratch_threads(population);

@@ -137,8 +137,10 @@ papilo::Problem<f_t> build_papilo_problem(const optimization_problem_t<i_t, f_t>
     }
   }
 
-  for (size_t i = 0; i < h_var_types.size(); ++i) {
-    builder.setColIntegral(i, h_var_types[i] == var_t::INTEGER);
+  if (category == problem_category_t::MIP) {
+    for (size_t i = 0; i < h_var_types.size(); ++i) {
+      builder.setColIntegral(i, h_var_types[i] == var_t::INTEGER);
+    }
   }
 
   if (!h_constr_lb.empty() && !h_constr_ub.empty()) {
@@ -562,8 +564,6 @@ void set_presolve_methods(papilo::Presolve<f_t>& presolver,
   presolver.addPresolveMethod(uptr(new papilo::SimpleProbing<f_t>()));
   presolver.addPresolveMethod(uptr(new papilo::ParallelRowDetection<f_t>()));
   presolver.addPresolveMethod(uptr(new papilo::ParallelColDetection<f_t>()));
-
-  presolver.addPresolveMethod(uptr(new papilo::SingletonStuffing<f_t>()));
   presolver.addPresolveMethod(uptr(new papilo::DualFix<f_t>()));
   presolver.addPresolveMethod(uptr(new papilo::SimplifyInequalities<f_t>()));
   presolver.addPresolveMethod(uptr(new papilo::CliqueMerging<f_t>()));
@@ -574,6 +574,11 @@ void set_presolve_methods(papilo::Presolve<f_t>& presolver,
   presolver.addPresolveMethod(uptr(new papilo::Probing<f_t>()));
 
   if (!dual_postsolve) {
+    // SingletonStuffing causes dual crushing failures on:
+    //   tr12-30, ns1208400, gmu-35-50, dws008-01, neos-1445765,
+    //   neos-5107597-kakapo, rocI-4-11, traininstance2, traininstance6,
+    //   radiationm18-12-05, rococoB10-011000, b1c1s1
+    presolver.addPresolveMethod(uptr(new papilo::SingletonStuffing<f_t>()));
     presolver.addPresolveMethod(uptr(new papilo::DualInfer<f_t>()));
     presolver.addPresolveMethod(uptr(new papilo::SimpleSubstitution<f_t>()));
     presolver.addPresolveMethod(uptr(new papilo::Sparsify<f_t>()));
@@ -876,6 +881,242 @@ void third_party_presolve_t<i_t, f_t>::uncrush_primal_solution(
   full_primal = std::move(full_sol.primal);
 }
 
+template <typename i_t, typename f_t>
+void third_party_presolve_t<i_t, f_t>::crush_primal_solution(
+  const std::vector<f_t>& original_primal, std::vector<f_t>& reduced_primal) const
+{
+  cuopt_expects(presolver_ == cuopt::linear_programming::presolver_t::Papilo,
+                error_type_t::RuntimeError,
+                "Primal crushing is only supported for PaPILO presolve");
+  cuopt_assert(papilo_post_solve_storage_ != nullptr, "No postsolve storage available");
+  std::vector<f_t> unused_y, unused_z;
+  std::vector<f_t> empty_vals;
+  std::vector<i_t> empty_indices, empty_offsets;
+  crush_primal_dual_solution(original_primal,
+                             {},
+                             reduced_primal,
+                             unused_y,
+                             {},
+                             unused_z,
+                             empty_vals,
+                             empty_indices,
+                             empty_offsets);
+}
+
+/**
+ * Crush an original-space primal+dual solution into the presolved (reduced) space.
+ *
+ * This is the forward counterpart of Papilo's Postsolve::undo(). It replays
+ * each presolve reduction in forward order to transform variable/dual values,
+ * then projects onto the surviving columns/rows via origcol/origrow_mapping.
+ *
+ * Only two reductions actually transform survivor coordinates:
+ *   kParallelCol             — merges x[col1] into x[col2]
+ *   kRowBoundChangeForcedByRow — conditionally transfers y[deleted_row] → y[kept_row]
+ */
+template <typename i_t, typename f_t>
+void third_party_presolve_t<i_t, f_t>::crush_primal_dual_solution(
+  const std::vector<f_t>& x_original,
+  const std::vector<f_t>& y_original,
+  std::vector<f_t>& x_reduced,
+  std::vector<f_t>& y_reduced,
+  const std::vector<f_t>& z_original,
+  std::vector<f_t>& z_reduced,
+  const std::vector<f_t>& A_values,
+  const std::vector<i_t>& A_indices,
+  const std::vector<i_t>& A_offsets) const
+{
+  cuopt_expects(presolver_ == cuopt::linear_programming::presolver_t::Papilo,
+                error_type_t::RuntimeError,
+                "Crushing is only supported for PaPILO presolve");
+  cuopt_assert(papilo_post_solve_storage_ != nullptr, "No postsolve storage available");
+
+  const auto& storage = *papilo_post_solve_storage_;
+  const auto& types   = storage.types;
+  const auto& indices = storage.indices;
+  const auto& values  = storage.values;
+  const auto& start   = storage.start;
+  const auto& num     = storage.num;
+
+  cuopt_assert((int)x_original.size() == (int)storage.nColsOriginal, "");
+
+  const bool crush_dual = !y_original.empty();
+  if (crush_dual) { cuopt_assert((int)y_original.size() == (int)storage.nRowsOriginal, ""); }
+
+  const bool crush_rc = !z_original.empty() && crush_dual;
+  if (crush_rc) { cuopt_assert((int)z_original.size() == (int)storage.nColsOriginal, ""); }
+
+  std::vector<f_t> x(x_original.begin(), x_original.end());
+  std::vector<f_t> y(y_original.begin(), y_original.end());
+  std::vector<f_t> z(z_original.begin(), z_original.end());
+
+  // Track current coefficient values for entries modified by kCoefficientChange,
+  // so repeated changes to the same (row, col) are handled correctly.
+  std::map<std::pair<int, int>, f_t> coeff_current;
+
+  // Look up the current value of a_{row,col}: use coeff_current if it was
+  // modified by a prior kCoefficientChange, otherwise fall back to the
+  // original CSR.
+  auto get_coeff = [&](int row, int col) -> f_t {
+    auto it = coeff_current.find({row, col});
+    if (it != coeff_current.end()) return it->second;
+    for (i_t p = A_offsets[row]; p < A_offsets[row + 1]; ++p) {
+      if (A_indices[p] == col) return A_values[p];
+    }
+    return 0;
+  };
+
+  for (int i = 0; i < (int)types.size(); ++i) {
+    int first = start[i];
+
+    switch (types[i]) {
+      case ReductionType::kParallelCol: {
+        // Storage layout: [orig_col1, flags1, orig_col2, flags2, -1]
+        //                 [col1lb,    col1ub, col2lb,    col2ub, col2scale]
+        int col1         = indices[first];
+        int col2         = indices[first + 2];
+        const f_t& scale = values[first + 4];
+        x[col2] += scale * x[col1];
+        break;
+      }
+
+      case ReductionType::kRowBoundChangeForcedByRow: {
+        if (!crush_dual) break;
+        cuopt_assert(i >= 1 && types[i - 1] == ReductionType::kReasonForRowBoundChangeForcedByRow,
+                     "kRowBoundChangeForcedByRow must be preceded by its reason record");
+
+        bool is_lhs = indices[first] == 1;
+        int row     = (int)values[first];
+
+        int reason_first = start[i - 1];
+        int deleted_row  = indices[reason_first + 1];
+        f_t factor       = values[reason_first];
+        cuopt_assert(factor != 0, "parallel row factor must be nonzero");
+
+        // Forward rule: if the deleted row carried dual signal that the
+        // reverse would have attributed to the kept row, transfer it back.
+        f_t candidate = y[deleted_row] / factor;
+        bool sign_ok  = is_lhs ? num.isGT(candidate, (f_t)0) : num.isLT(candidate, (f_t)0);
+
+        if (sign_ok) {
+          f_t y_old = y[row];
+          y[row]    = candidate;
+          // Maintain z = c - A^T y: propagate the y change into reduced costs
+          if (crush_rc) {
+            f_t delta_y = candidate - y_old;
+            for (i_t p = A_offsets[row]; p < A_offsets[row + 1]; ++p) {
+              f_t a = get_coeff(row, A_indices[p]);
+              z[A_indices[p]] -= delta_y * a;
+            }
+          }
+        }
+        break;
+      }
+
+      case ReductionType::kCoefficientChange: {
+        if (!crush_rc) break;
+        int row                   = indices[first];
+        int col                   = indices[first + 1];
+        f_t a_new                 = values[first];
+        f_t a_old                 = get_coeff(row, col);
+        coeff_current[{row, col}] = a_new;
+        z[col] += (a_old - a_new) * y[row];
+        break;
+      }
+
+      case ReductionType::kSubstitutedColWithDual: {
+        // Singleton substitution: column j is expressed via equality row k as
+        //   x_j = (rhs_k - Σ_{l≠j} a_kl·x_l) / a_kj
+        // This changes the objective for every column l in row k:
+        //   c_red[l] = c_orig[l] - (c_j / a_kj) · a_kl
+        // Adjust z accordingly:  Δz[l] = -(a_kl / a_kj)·z[j] - a_kl·y[k]
+        if (!crush_rc) break;
+        int row_k      = indices[first];  // equality row (original space)
+        int row_length = (int)values[first];
+        // Row coefficients start at first+3
+        int row_coef_start = first + 3;
+        // Substituted column index is after the row coefficients
+        int col_j = indices[row_coef_start + row_length];
+
+        // Find a_kj (coefficient of col j in row k)
+        f_t a_kj = 0;
+        for (int p = 0; p < row_length; ++p) {
+          if (indices[row_coef_start + p] == col_j) {
+            a_kj = values[row_coef_start + p];
+            break;
+          }
+        }
+        if (a_kj == 0) break;  // shouldn't happen
+
+        f_t z_j = z[col_j];
+        f_t y_k = y[row_k];
+
+        // Adjust z for each surviving column l in the equality row (l ≠ j)
+        for (int p = 0; p < row_length; ++p) {
+          int col_l = indices[row_coef_start + p];
+          if (col_l == col_j) continue;
+          f_t a_kl = values[row_coef_start + p];
+          z[col_l] -= (a_kl / a_kj) * z_j + a_kl * y_k;
+        }
+        break;
+      }
+
+      case ReductionType::kFixedCol:                            // Handled via projection
+      case ReductionType::kSubstitutedCol:                      // Col is dropped
+      case ReductionType::kFixedInfCol:                         // Col is dropped
+      case ReductionType::kVarBoundChange:                      // Noop
+      case ReductionType::kRedundantRow:                        // Noop
+      case ReductionType::kRowBoundChange:                      // Noop
+      case ReductionType::kReasonForRowBoundChangeForcedByRow:  // Metadata for above
+      case ReductionType::kSaveRow:                             // Metadata
+      case ReductionType::kReducedBoundsCost:                   // Noop
+      case ReductionType::kColumnDualValue:                     // Column reduced-cost only
+      case ReductionType::kRowDualValue:                        // Handled via projection
+        break;
+        // no default: case to let the compiler yell at us if a new reduction is later introduced
+    }
+  }
+
+  const auto& col_map = storage.origcol_mapping;
+  const auto& row_map = storage.origrow_mapping;
+
+  // Cancel contributions from removed rows.  The original-space z was
+  // computed as z = c - A^T y over ALL rows.  The reduced-space stationarity
+  // only involves surviving rows, so we must add back the terms from removed
+  // rows: z[j] += y[i] * a_{i,j} for every removed row i with y[i] != 0.
+  if (crush_rc) {
+    std::vector<bool> row_survives((int)storage.nRowsOriginal, false);
+    for (size_t k = 0; k < row_map.size(); ++k) {
+      row_survives[row_map[k]] = true;
+    }
+    for (int i = 0; i < (int)storage.nRowsOriginal; ++i) {
+      if (row_survives[i] || y[i] == 0) continue;
+      for (i_t p = A_offsets[i]; p < A_offsets[i + 1]; ++p) {
+        z[A_indices[p]] += y[i] * get_coeff(i, A_indices[p]);
+      }
+    }
+  }
+
+  x_reduced.resize(col_map.size());
+  for (size_t k = 0; k < col_map.size(); ++k) {
+    x_reduced[k] = x[col_map[k]];
+  }
+
+  if (crush_dual) {
+    y_reduced.resize(row_map.size());
+    for (size_t k = 0; k < row_map.size(); ++k) {
+      y_reduced[k] = y[row_map[k]];
+    }
+  }
+
+  if (crush_rc) {
+    z_reduced.resize(col_map.size());
+    for (size_t k = 0; k < col_map.size(); ++k) {
+      z_reduced[k] = z[col_map[k]];
+    }
+  }
+}
+
 template <typename i_t, typename f_t>
 third_party_presolve_t<i_t, f_t>::~third_party_presolve_t()
 {

@@ -77,6 +77,19 @@ class third_party_presolve_t {
 
   void uncrush_primal_solution(const std::vector<f_t>& reduced_primal,
                                std::vector<f_t>& full_primal) const;
+
+  void crush_primal_solution(const std::vector<f_t>& original_primal,
+                             std::vector<f_t>& reduced_primal) const;
+
+  void crush_primal_dual_solution(const std::vector<f_t>& x_original,
+                                  const std::vector<f_t>& y_original,
+                                  std::vector<f_t>& x_reduced,
+                                  std::vector<f_t>& y_reduced,
+                                  const std::vector<f_t>& z_original,
+                                  std::vector<f_t>& z_reduced,
+                                  const std::vector<f_t>& A_values,
+                                  const std::vector<i_t>& A_indices,
+                                  const std::vector<i_t>& A_offsets) const;
   const std::vector<i_t>& get_reduced_to_original_map() const { return reduced_to_original_map_; }
   const std::vector<i_t>& get_original_to_reduced_map() const { return original_to_reduced_map_; }