diff --git a/RAPIDS_BRANCH b/RAPIDS_BRANCH index d5ea6ced53..ba2906d066 100644 --- a/RAPIDS_BRANCH +++ b/RAPIDS_BRANCH @@ -1 +1 @@ -release/26.04 +main diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp index d3f59144cc..611e4419c2 100644 --- a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp @@ -264,6 +264,8 @@ class pdlp_solver_settings_t { bool inside_mip{false}; // For concurrent termination std::atomic* concurrent_halt{nullptr}; + /** If true, solver does not set concurrent_halt; caller sets it after crossover. */ + bool halt_set_by_caller{false}; static constexpr f_t minimal_absolute_tolerance = 1.0e-12; pdlp_hyper_params::pdlp_hyper_params_t hyper_params; // Holds the information of new variable lower and upper bounds for each climber in the format: diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp index 45a47e7401..9bd5796a89 100644 --- a/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp +++ b/cpp/include/cuopt/linear_programming/pdlp/solver_solution.hpp @@ -235,6 +235,7 @@ class optimization_problem_solution_t : public base_solution_t { * @return rmm::device_uvector The device memory container for the reduced cost. */ rmm::device_uvector& get_reduced_cost(); + const rmm::device_uvector& get_reduced_cost() const; /** * @brief Get termination reason diff --git a/cpp/src/branch_and_bound/branch_and_bound.cpp b/cpp/src/branch_and_bound/branch_and_bound.cpp index ba2b63983e..a5c3948ec9 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.cpp +++ b/cpp/src/branch_and_bound/branch_and_bound.cpp @@ -9,6 +9,8 @@ #include #include +#include + #include #include @@ -29,13 +31,18 @@ #include #include +#include #include +#include #include #include #include +#include #include #include #include +#include +#include #include #include #include @@ -243,6 +250,8 @@ branch_and_bound_t::branch_and_bound_t( const user_problem_t& user_problem, const simplex_solver_settings_t& solver_settings, f_t start_time, + cuopt::linear_programming::detail::problem_t* mip_problem_ptr, + i_t pdlp_root_num_gpus, std::shared_ptr> clique_table) : original_problem_(user_problem), settings_(solver_settings), @@ -251,9 +260,10 @@ branch_and_bound_t::branch_and_bound_t( Arow_(1, 1, 0), incumbent_(1), root_relax_soln_(1, 1), - root_crossover_soln_(1, 1), pc_(1), - solver_status_(mip_status_t::UNSET) + solver_status_(mip_status_t::UNSET), + mip_problem_ptr_(mip_problem_ptr), + pdlp_root_num_gpus_(pdlp_root_num_gpus) { exploration_stats_.start_time = start_time; #ifdef PRINT_CONSTRAINT_MATRIX @@ -1802,6 +1812,84 @@ void branch_and_bound_t::single_threaded_solve() } } +template +void branch_and_bound_t::run_concurrent_pdlp_and_barrier_with_crossover( + const simplex_solver_settings_t& lp_settings, + crossover_status_t& crossover_status_out, + lp_solution_t& winner_crossover_soln_out, + std::vector& winner_crossover_vstatus_out, + f_t& winner_root_objective_out, + std::string& winner_solver_name_out, + std::atomic& winner, + std::mutex* first_solver_mutex, + bool* first_solver_callback_done, + std::thread& pdlp_thread_out, + std::thread& barrier_thread_out) +{ + // PDLP+crossover and Barrier+crossover each in a thread. winner: 0=none, 1=dual, 2=PDLP, + // 3=Barrier. + struct concurrent_shared_state_t { + std::mutex first_result_mutex; + }; + auto shared = std::make_shared(); + + auto do_crush_crossover = [this, + &lp_settings, + &crossover_status_out, + &winner_crossover_soln_out, + &winner_crossover_vstatus_out, + &winner_root_objective_out, + &winner_solver_name_out, + &winner, + first_solver_mutex, + first_solver_callback_done, + shared](const root_relaxation_first_solution_t& result, + const char* solver_name, + int winner_id) { + return cuopt::linear_programming::detail::run_crush_crossover_and_maybe_win( + result, + original_problem_, + original_lp_, + new_slacks_, + settings_, + exploration_stats_.start_time, + get_root_concurrent_halt(), + [this]() { set_root_concurrent_halt(1); }, + lp_settings.on_first_lp_solution_available, + first_solver_mutex, + first_solver_callback_done, + &shared->first_result_mutex, + &winner, + winner_id, + &crossover_status_out, + &winner_crossover_soln_out, + &winner_crossover_vstatus_out, + &winner_root_objective_out, + solver_name, + &winner_solver_name_out); + }; + + pdlp_thread_out = std::thread([this, &lp_settings, do_crush_crossover]() { + auto result = cuopt::linear_programming::detail::run_solver_for_root_lp( + mip_problem_ptr_, + lp_settings.time_limit, + get_root_concurrent_halt(), + pdlp_root_num_gpus_, + cuopt::linear_programming::method_t::PDLP); + (void)do_crush_crossover(result, "PDLP", 2); + }); + + barrier_thread_out = std::thread([this, &lp_settings, do_crush_crossover]() { + auto result = cuopt::linear_programming::detail::run_solver_for_root_lp( + mip_problem_ptr_, + lp_settings.time_limit, + get_root_concurrent_halt(), + pdlp_root_num_gpus_, + cuopt::linear_programming::method_t::Barrier); + (void)do_crush_crossover(result, "Barrier", 3); + }); +} + template lp_status_t branch_and_bound_t::solve_root_relaxation( simplex_solver_settings_t const& lp_settings, @@ -1817,89 +1905,157 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( i_t iter = 0; std::string solver_name = ""; - // Root node path - lp_status_t root_status; - std::future root_status_future; - root_status_future = std::async(std::launch::async, - &solve_linear_program_with_advanced_basis, - std::ref(original_lp_), - exploration_stats_.start_time, - std::ref(lp_settings), - std::ref(root_relax_soln), - std::ref(basis_update), - std::ref(basic_list), - std::ref(nonbasic_list), - std::ref(root_vstatus), - std::ref(edge_norms), - nullptr); - // Wait for the root relaxation solution to be sent by the diversity manager or dual simplex - // to finish - while (!root_crossover_solution_set_.load(std::memory_order_acquire) && - *get_root_concurrent_halt() == 0) { - std::this_thread::sleep_for(std::chrono::milliseconds(1)); - continue; - } - - if (root_crossover_solution_set_.load(std::memory_order_acquire)) { - // Crush the root relaxation solution on converted user problem - std::vector crushed_root_x; - crush_primal_solution( - original_problem_, original_lp_, root_crossover_soln_.x, new_slacks_, crushed_root_x); - std::vector crushed_root_y; - std::vector crushed_root_z; - - f_t dual_res_inf = crush_dual_solution(original_problem_, - original_lp_, - new_slacks_, - root_crossover_soln_.y, - root_crossover_soln_.z, - crushed_root_y, - crushed_root_z); - - root_crossover_soln_.x = crushed_root_x; - root_crossover_soln_.y = crushed_root_y; - root_crossover_soln_.z = crushed_root_z; - - // Call crossover on the crushed solution - auto root_crossover_settings = settings_; - root_crossover_settings.log.log = false; - root_crossover_settings.concurrent_halt = get_root_concurrent_halt(); - crossover_status_t crossover_status = crossover(original_lp_, - root_crossover_settings, - root_crossover_soln_, - exploration_stats_.start_time, - root_crossover_soln_, - crossover_vstatus_); - - // Check if crossover was stopped by dual simplex + // Dual simplex runs on the main thread when concurrent; otherwise it runs alone on main. + auto dual_simplex_settings = std::make_shared>(lp_settings); + dual_simplex_settings->inside_mip = 1; + + lp_status_t root_status = lp_status_t::UNSET; + lp_status_t root_result_status = + lp_status_t::UNSET; // dual simplex result; set when dual returns, read in else branch + + bool use_pdlp_path = false; + bool dual_simplex_finished_first = false; + + crossover_status_t crossover_status = crossover_status_t::NUMERICAL_ISSUES; + lp_solution_t winner_crossover_soln(original_lp_.num_rows, original_lp_.num_cols); + std::vector winner_crossover_vstatus; + f_t winner_root_objective = 0; + std::string root_winner_solver_name; + + std::thread pdlp_thread; + std::thread barrier_thread; + std::thread dual_simplex_thread; + std::atomic winner{0}; // 0=none, 1=dual, 2=PDLP, 3=Barrier + + if (enable_concurrent_lp_root_solve_ && mip_problem_ptr_ != nullptr) { + // All three run in threads; main only starts them and joins. First to finish with OPTIMAL sets + // winner and halt. + std::mutex first_solver_mutex; + bool first_solver_callback_done = false; + run_concurrent_pdlp_and_barrier_with_crossover(lp_settings, + crossover_status, + winner_crossover_soln, + winner_crossover_vstatus, + winner_root_objective, + root_winner_solver_name, + winner, + &first_solver_mutex, + &first_solver_callback_done, + pdlp_thread, + barrier_thread); + + // Dual simplex does not call on_first_lp_solution: diversity manager prefers optimal first; + // only PDLP/Barrier feed first solution when they have one. + dual_simplex_thread = std::thread([this, + dual_simplex_settings, + &root_relax_soln, + &basis_update, + &basic_list, + &nonbasic_list, + &root_vstatus, + &edge_norms, + &root_result_status, + &winner]() { + lp_status_t status = + solve_linear_program_with_advanced_basis(original_lp_, + exploration_stats_.start_time, + *dual_simplex_settings, + root_relax_soln, + basis_update, + basic_list, + nonbasic_list, + root_vstatus, + edge_norms, + nullptr); + root_result_status = status; + int expected = 0; + if (status == lp_status_t::OPTIMAL && + winner.compare_exchange_strong(expected, 1, std::memory_order_acq_rel)) { + set_root_concurrent_halt(1); + } + }); + + struct join_threads_guard_t { + std::thread* a = nullptr; + std::thread* b = nullptr; + std::thread* c = nullptr; + ~join_threads_guard_t() + { + if (a && a->joinable()) { a->join(); } + if (b && b->joinable()) { b->join(); } + if (c && c->joinable()) { c->join(); } + } + } join_guard; + join_guard.a = &pdlp_thread; + join_guard.b = &barrier_thread; + join_guard.c = &dual_simplex_thread; + + pdlp_thread.join(); + barrier_thread.join(); + dual_simplex_thread.join(); + join_guard.a = nullptr; + join_guard.b = nullptr; + join_guard.c = nullptr; + + // Winner may have set concurrent_halt==1 to stop peer solvers. All threads are joined; reset + // the flag for the rest of B&B (subsequent LP solves, etc.). + set_root_concurrent_halt(0); + + const int w = winner.load(std::memory_order_acquire); + use_pdlp_path = (w == 2 || w == 3); + if (w == 1) { dual_simplex_finished_first = true; } + } else { + // Non-concurrent: run dual simplex on main only. + root_status = solve_linear_program_with_advanced_basis(original_lp_, + exploration_stats_.start_time, + *dual_simplex_settings, + root_relax_soln, + basis_update, + basic_list, + nonbasic_list, + root_vstatus, + edge_norms, + nullptr); + root_result_status = root_status; + if (lp_settings.on_first_lp_solution_available) { + root_relaxation_first_solution_t ds_result; + ds_result.primal = root_relax_soln.x; + ds_result.dual = root_relax_soln.y; + ds_result.reduced_costs = root_relax_soln.z; + ds_result.objective = root_relax_soln.objective; + ds_result.user_objective = root_relax_soln.user_objective; + ds_result.iterations = root_relax_soln.iterations; + lp_settings.on_first_lp_solution_available(ds_result); + } + } + + if (use_pdlp_path) { + root_objective_ = winner_root_objective; + auto root_crossover_settings = settings_; + root_crossover_settings.log.log = false; + // Single-threaded CPU post-processing (refactor_basis, edge norms); concurrent halt must not + // apply. + root_crossover_settings.concurrent_halt = nullptr; if (crossover_status == crossover_status_t::OPTIMAL) { - set_root_concurrent_halt(1); // Stop dual simplex - root_status = root_status_future.get(); // Wait for dual simplex to finish - set_root_concurrent_halt(0); // Clear the concurrent halt flag - // Override the root relaxation solution with the crossover solution - root_relax_soln = root_crossover_soln_; - root_vstatus = crossover_vstatus_; + // Use winner's crossover solution; no wait. + root_relax_soln = winner_crossover_soln; + root_vstatus = winner_crossover_vstatus; root_status = lp_status_t::OPTIMAL; basic_list.clear(); nonbasic_list.reserve(original_lp_.num_cols - original_lp_.num_rows); nonbasic_list.clear(); // Get the basic list and nonbasic list from the vstatus for (i_t j = 0; j < original_lp_.num_cols; j++) { - if (crossover_vstatus_[j] == variable_status_t::BASIC) { + if (winner_crossover_vstatus[j] == variable_status_t::BASIC) { basic_list.push_back(j); } else { nonbasic_list.push_back(j); } } if (basic_list.size() != original_lp_.num_rows) { - settings_.log.printf( - "basic_list size %d != m %d\n", basic_list.size(), original_lp_.num_rows); assert(basic_list.size() == original_lp_.num_rows); } if (nonbasic_list.size() != original_lp_.num_cols - original_lp_.num_rows) { - settings_.log.printf("nonbasic_list size %d != n - m %d\n", - nonbasic_list.size(), - original_lp_.num_cols - original_lp_.num_rows); assert(nonbasic_list.size() == original_lp_.num_cols - original_lp_.num_rows); } // Populate the basis_update from the crossover vstatus @@ -1910,9 +2066,8 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( exploration_stats_.start_time, basic_list, nonbasic_list, - crossover_vstatus_); + winner_crossover_vstatus); if (refactor_status != 0) { - settings_.log.printf("Failed to refactor basis. %d deficient columns.\n", refactor_status); assert(refactor_status == 0); root_status = lp_status_t::NUMERICAL_ISSUES; } @@ -1920,19 +2075,32 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( // Set the edge norms to a default value edge_norms.resize(original_lp_.num_cols, -1.0); set_uninitialized_steepest_edge_norms(original_lp_, basic_list, edge_norms); - user_objective = root_crossover_soln_.user_objective; - iter = root_crossover_soln_.iterations; - solver_name = "Barrier/PDLP and Crossover"; + user_objective = winner_crossover_soln.user_objective; + iter = winner_crossover_soln.iterations; + solver_name = root_winner_solver_name + " and Crossover"; } else { - root_status = root_status_future.get(); - user_objective = root_relax_soln_.user_objective; - iter = root_relax_soln_.iterations; - solver_name = "Dual Simplex"; + // Crossover winner path but crossover was not OPTIMAL. Map crossover outcome to lp_status_t. + switch (crossover_status) { + case crossover_status_t::TIME_LIMIT: root_status = lp_status_t::TIME_LIMIT; break; + case crossover_status_t::NUMERICAL_ISSUES: + root_status = lp_status_t::NUMERICAL_ISSUES; + break; + case crossover_status_t::CONCURRENT_LIMIT: root_status = lp_status_t::TIME_LIMIT; break; + case crossover_status_t::PRIMAL_FEASIBLE: + case crossover_status_t::DUAL_FEASIBLE: root_status = lp_status_t::NUMERICAL_ISSUES; break; + default: root_status = lp_status_t::NUMERICAL_ISSUES; break; + } + user_objective = winner_crossover_soln.user_objective; + iter = winner_crossover_soln.iterations; + solver_name = root_winner_solver_name + " and Crossover"; } } else { - root_status = root_status_future.get(); - user_objective = root_relax_soln_.user_objective; - iter = root_relax_soln_.iterations; + // Use dual simplex result (root_result_status was set when dual simplex returned). + root_status = root_result_status; + (void)dual_simplex_finished_first; // used only to select path + // Diversity manager was already notified by whoever was first (dual simplex, PDLP, or Barrier). + user_objective = root_relax_soln.user_objective; + iter = root_relax_soln.iterations; solver_name = "Dual Simplex"; } @@ -1949,8 +2117,8 @@ lp_status_t branch_and_bound_t::solve_root_relaxation( } settings_.log.printf("\n"); - is_root_solution_set = true; + set_root_concurrent_halt(0); return root_status; } @@ -3799,4 +3967,10 @@ template class branch_and_bound_t; #endif +#ifdef MIP_INSTANTIATION_FLOAT + +template class branch_and_bound_t; + +#endif + } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/branch_and_bound/branch_and_bound.hpp b/cpp/src/branch_and_bound/branch_and_bound.hpp index 98674b7f9e..b4c46ac8e9 100644 --- a/cpp/src/branch_and_bound/branch_and_bound.hpp +++ b/cpp/src/branch_and_bound/branch_and_bound.hpp @@ -17,6 +17,7 @@ #include +#include #include #include #include @@ -33,15 +34,21 @@ #include #include +#include +#include + #include #include #include #include namespace cuopt::linear_programming::detail { +template +class problem_t; + template struct clique_table_t; -} +} // namespace cuopt::linear_programming::detail namespace cuopt::linear_programming::dual_simplex { @@ -74,34 +81,20 @@ struct deterministic_diving_policy_t; template class branch_and_bound_t { public: - branch_and_bound_t(const user_problem_t& user_problem, - const simplex_solver_settings_t& solver_settings, - f_t start_time, - std::shared_ptr> clique_table = nullptr); + /** Host \p user_problem must be fully populated by the caller. When \p mip_problem_ptr is + * non-null (GPU MIP / concurrent root), the caller must sync from device first, e.g. + * recompute_objective_integrality(), set objective_is_integral, get_host_user_problem(). */ + branch_and_bound_t( + const user_problem_t& user_problem, + const simplex_solver_settings_t& solver_settings, + f_t start_time, + cuopt::linear_programming::detail::problem_t* mip_problem_ptr = nullptr, + i_t pdlp_root_num_gpus = 1, + std::shared_ptr> clique_table = nullptr); // Set an initial guess based on the user_problem. This should be called before solve. void set_initial_guess(const std::vector& user_guess) { guess_ = user_guess; } - // Set the root solution found by PDLP - void set_root_relaxation_solution(const std::vector& primal, - const std::vector& dual, - const std::vector& reduced_costs, - f_t objective, - f_t user_objective, - i_t iterations) - { - if (!is_root_solution_set) { - root_crossover_soln_.x = primal; - root_crossover_soln_.y = dual; - root_crossover_soln_.z = reduced_costs; - root_objective_ = objective; - root_crossover_soln_.objective = objective; - root_crossover_soln_.user_objective = user_objective; - root_crossover_soln_.iterations = iterations; - root_crossover_solution_set_.store(true, std::memory_order_release); - } - } - // Set a solution based on the user problem during the course of the solve void set_new_solution(const std::vector& solution); @@ -122,6 +115,7 @@ class branch_and_bound_t { std::vector& repaired_solution) const; f_t get_lower_bound(); + i_t get_num_cols() const { return original_problem_.num_cols; } bool enable_concurrent_lp_root_solve() const { return enable_concurrent_lp_root_solve_; } std::atomic* get_root_concurrent_halt() { return &root_concurrent_halt_; } void set_root_concurrent_halt(int value) { root_concurrent_halt_ = value; } @@ -133,6 +127,21 @@ class branch_and_bound_t { std::vector& nonbasic_list, std::vector& edge_norms); + /** Starts PDLP+crossover and Barrier+crossover in two threads. winner is 0=none, 1=dual, 2=PDLP, + * 3=Barrier; first OPTIMAL sets it. first_solver_* for diversity manager callback. */ + void run_concurrent_pdlp_and_barrier_with_crossover( + const simplex_solver_settings_t& lp_settings, + crossover_status_t& crossover_status_out, + lp_solution_t& winner_crossover_soln_out, + std::vector& winner_crossover_vstatus_out, + f_t& winner_root_objective_out, + std::string& winner_solver_name_out, + std::atomic& winner, + std::mutex* first_solver_mutex, + bool* first_solver_callback_done, + std::thread& pdlp_thread_out, + std::thread& barrier_thread_out); + i_t find_reduced_cost_fixings(f_t upper_bound, std::vector& lower_bounds, std::vector& upper_bounds); @@ -146,7 +155,7 @@ class branch_and_bound_t { producer_sync_t& get_producer_sync() { return producer_sync_; } private: - const user_problem_t& original_problem_; + user_problem_t original_problem_; const simplex_solver_settings_t settings_; std::shared_ptr> clique_table_; std::future>> clique_table_future_; @@ -194,17 +203,16 @@ class branch_and_bound_t { // Variables for the root node in the search tree. std::vector root_vstatus_; - std::vector crossover_vstatus_; f_t root_objective_; lp_solution_t root_relax_soln_; - lp_solution_t root_crossover_soln_; std::vector edge_norms_; std::atomic root_crossover_solution_set_{false}; omp_atomic_t root_lp_current_lower_bound_; omp_atomic_t solving_root_relaxation_{false}; bool enable_concurrent_lp_root_solve_{false}; std::atomic root_concurrent_halt_{0}; - bool is_root_solution_set{false}; + cuopt::linear_programming::detail::problem_t* mip_problem_ptr_{nullptr}; + i_t pdlp_root_num_gpus_{1}; // Pseudocosts pseudo_costs_t pc_; diff --git a/cpp/src/dual_simplex/crossover.cpp b/cpp/src/dual_simplex/crossover.cpp index 988c9c50ad..14624a4f4c 100644 --- a/cpp/src/dual_simplex/crossover.cpp +++ b/cpp/src/dual_simplex/crossover.cpp @@ -331,6 +331,7 @@ void compute_dual_solution_from_basis(const lp_problem_t& lp, template i_t dual_push(const lp_problem_t& lp, + const csr_matrix_t& Arow, const simplex_solver_settings_t& settings, f_t start_time, lp_solution_t& solution, @@ -387,6 +388,9 @@ i_t dual_push(const lp_problem_t& lp, std::vector& y = solution.y; const std::vector& x = solution.x; i_t num_pushes = 0; + std::vector delta_zN(n - m); + std::vector delta_expanded; // workspace for sparse path (delta_y is sparse enough) + std::vector delta_y_dense; // workspace for dense path (delta_y is not sparse enough) while (superbasic_list.size() > 0) { const i_t s = superbasic_list.back(); const i_t basic_leaving_index = superbasic_list_index.back(); @@ -401,11 +405,9 @@ i_t dual_push(const lp_problem_t& lp, es_sparse.x[0] = -delta_zs; // B^T delta_y = -delta_zs*es - std::vector delta_y(m); sparse_vector_t delta_y_sparse(m, 1); sparse_vector_t UTsol_sparse(m, 1); ft.b_transpose_solve(es_sparse, delta_y_sparse, UTsol_sparse); - delta_y_sparse.scatter(delta_y); // We solved B^T delta_y = -delta_zs*es, but for the update we need // U^T*etilde = es. @@ -416,16 +418,38 @@ i_t dual_push(const lp_problem_t& lp, } // delta_zN = -N^T delta_y - std::vector delta_zN(n - m); - for (i_t k = 0; k < n - m; ++k) { - const i_t j = nonbasic_list[k]; - const i_t col_start = lp.A.col_start[j]; - const i_t col_end = lp.A.col_start[j + 1]; - f_t dot = 0.0; - for (i_t p = col_start; p < col_end; ++p) { - dot += lp.A.x[p] * delta_y[lp.A.i[p]]; + // Choose sparse vs dense method by delta_y sparsity (match dual simplex: sparse if <= 30% nnz) + std::fill(delta_zN.begin(), delta_zN.end(), 0.); + const bool use_sparse = (delta_y_sparse.i.size() * 1.0 / m) <= 0.3; + + if (use_sparse) { + delta_expanded.resize(n); + std::fill(delta_expanded.begin(), delta_expanded.end(), 0.); + for (i_t nnz_idx = 0; nnz_idx < static_cast(delta_y_sparse.i.size()); ++nnz_idx) { + const i_t row = delta_y_sparse.i[nnz_idx]; + const f_t val = delta_y_sparse.x[nnz_idx]; + const i_t row_start = Arow.row_start[row]; + const i_t row_end = Arow.row_start[row + 1]; + for (i_t p = row_start; p < row_end; ++p) { + const i_t col = Arow.j[p]; + delta_expanded[col] += Arow.x[p] * val; + } + } + for (i_t k = 0; k < n - m; ++k) { + delta_zN[k] = -delta_expanded[nonbasic_list[k]]; + } + } else { + delta_y_sparse.to_dense(delta_y_dense); + for (i_t k = 0; k < n - m; ++k) { + const i_t j = nonbasic_list[k]; + f_t dot = 0.0; + const i_t c_start = lp.A.col_start[j]; + const i_t c_end = lp.A.col_start[j + 1]; + for (i_t p = c_start; p < c_end; ++p) { + dot += lp.A.x[p] * delta_y_dense[lp.A.i[p]]; + } + delta_zN[k] = -dot; } - delta_zN[k] = -dot; } i_t entering_index = -1; @@ -435,8 +459,10 @@ i_t dual_push(const lp_problem_t& lp, assert(step_length >= -1e-6); // y <- y + step_length * delta_y - for (i_t i = 0; i < m; ++i) { - y[i] += step_length * delta_y[i]; + // Optimized: Only update non-zero elements from sparse representation + for (i_t nnz_idx = 0; nnz_idx < delta_y_sparse.i.size(); ++nnz_idx) { + const i_t i = delta_y_sparse.i[nnz_idx]; + y[i] += step_length * delta_y_sparse.x[nnz_idx]; } // z <- z + step_length * delta z @@ -725,7 +751,6 @@ i_t primal_push(const lp_problem_t& lp, { const i_t m = lp.num_rows; const i_t n = lp.num_cols; - settings.log.debug("Primal push: superbasic %ld\n", superbasic_list.size()); std::vector& x = solution.x; @@ -1002,6 +1027,7 @@ i_t primal_push(const lp_problem_t& lp, } solution.x = x_compare; solution.iterations += num_pushes; + return 0; } @@ -1190,6 +1216,9 @@ crossover_status_t crossover(const lp_problem_t& lp, f_t crossover_start = tic(); f_t work_estimate = 0; + csr_matrix_t Arow(m, n, 1); + lp.A.to_compressed_row(Arow); + settings.log.printf("\n"); settings.log.printf("Starting crossover\n"); @@ -1331,8 +1360,16 @@ crossover_status_t crossover(const lp_problem_t& lp, basis_update_mpf_t ft(L, U, p, settings.refactor_frequency); verify_basis(m, n, vstatus); compare_vstatus_with_lists(m, n, basic_list, nonbasic_list, vstatus); - i_t dual_push_status = dual_push( - lp, settings, start_time, solution, ft, basic_list, nonbasic_list, superbasic_list, vstatus); + i_t dual_push_status = dual_push(lp, + Arow, + settings, + start_time, + solution, + ft, + basic_list, + nonbasic_list, + superbasic_list, + vstatus); if (dual_push_status < 0) { return return_to_status(dual_push_status); } settings.log.debug("basic list size %ld m %d\n", basic_list.size(), m); settings.log.debug("nonbasic list size %ld n - m %d\n", nonbasic_list.size(), n - m); diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index 43429ba2de..9434f4661a 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -3597,10 +3597,6 @@ dual::status_t dual_phase2_with_advanced_basis(i_t phase, 100.0 * dense_delta_z / (sparse_delta_z + dense_delta_z)); ft.print_stats(); } - if (settings.inside_mip && settings.concurrent_halt != nullptr) { - settings.log.debug("Setting concurrent halt in Dual Simplex Phase 2\n"); - *settings.concurrent_halt = 1; - } } return status; } diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp index b9ee419517..e8af7ba514 100644 --- a/cpp/src/dual_simplex/presolve.cpp +++ b/cpp/src/dual_simplex/presolve.cpp @@ -7,6 +7,8 @@ #include +#include + #include #include #include @@ -1571,4 +1573,62 @@ template void uncrush_solution(const presolve_info_t& #endif +#if CUOPT_INSTANTIATE_FLOAT + +template void convert_user_problem( + const user_problem_t& user_problem, + const simplex_solver_settings_t& settings, + lp_problem_t& problem, + std::vector& new_slacks, + dualize_info_t& dualize_info); + +template void convert_user_lp_with_guess( + const user_problem_t& user_problem, + const lp_solution_t& initial_solution, + const std::vector& initial_slack, + lp_problem_t& lp, + lp_solution_t& converted_solution); + +template int presolve(const lp_problem_t& original, + const simplex_solver_settings_t& settings, + lp_problem_t& presolved, + presolve_info_t& presolve_info); + +template void crush_primal_solution(const user_problem_t& user_problem, + const lp_problem_t& problem, + const std::vector& user_solution, + const std::vector& new_slacks, + std::vector& solution); + +template float crush_dual_solution(const user_problem_t& user_problem, + const lp_problem_t& problem, + const std::vector& new_slacks, + const std::vector& user_y, + const std::vector& user_z, + std::vector& y, + std::vector& z); + +template void uncrush_primal_solution(const user_problem_t& user_problem, + const lp_problem_t& problem, + const std::vector& solution, + std::vector& user_solution); + +template void uncrush_dual_solution(const user_problem_t& user_problem, + const lp_problem_t& problem, + const std::vector& y, + const std::vector& z, + std::vector& user_y, + std::vector& user_z); + +template void uncrush_solution(const presolve_info_t& presolve_info, + const simplex_solver_settings_t& settings, + const std::vector& crushed_x, + const std::vector& crushed_y, + const std::vector& crushed_z, + std::vector& uncrushed_x, + std::vector& uncrushed_y, + std::vector& uncrushed_z); + +#endif + } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/right_looking_lu.cpp b/cpp/src/dual_simplex/right_looking_lu.cpp index 657ebc4762..37202000f8 100644 --- a/cpp/src/dual_simplex/right_looking_lu.cpp +++ b/cpp/src/dual_simplex/right_looking_lu.cpp @@ -30,7 +30,7 @@ struct element_t { f_t x; // coefficient value i_t next_in_column; // index of the next element in the column: kNone if there is no next element i_t next_in_row; // index of the next element in the row: kNone if there is no next element -}; +}; // 24 bytes constexpr int kNone = -1; template @@ -79,6 +79,29 @@ i_t initialize_degree_data(const csc_matrix_t& A, return Bnz; } +// Fill col_pos and row_pos so that column j has col_pos[j] = its index in col_count[Cdegree[j]], +// and row i has row_pos[i] = its index in row_count[Rdegree[i]]. Enables O(1) degree-bucket +// removal. +template +void initialize_bucket_positions(const std::vector>& col_count, + const std::vector>& row_count, + i_t col_max_degree, + i_t row_max_degree, + std::vector& col_pos, + std::vector& row_pos) +{ + for (i_t d = 0; d <= col_max_degree; ++d) { + for (i_t pos = 0; pos < static_cast(col_count[d].size()); ++pos) { + col_pos[col_count[d][pos]] = pos; + } + } + for (i_t d = 0; d <= row_max_degree; ++d) { + for (i_t pos = 0; pos < static_cast(row_count[d].size()); ++pos) { + row_pos[row_count[d][pos]] = pos; + } + } +} + template i_t load_elements(const csc_matrix_t& A, const std::vector& column_list, @@ -86,11 +109,11 @@ i_t load_elements(const csc_matrix_t& A, std::vector>& elements, std::vector& first_in_row, std::vector& first_in_col, + std::vector& last_in_row, f_t& work_estimate) { const i_t m = A.m; const i_t n = column_list.size(); - std::vector last_element_in_row(m, kNone); work_estimate += m; i_t nz = 0; @@ -106,14 +129,14 @@ i_t load_elements(const csc_matrix_t& A, elements[nz].next_in_column = kNone; if (p > col_start) { elements[nz - 1].next_in_column = nz; } elements[nz].next_in_row = kNone; // set the current next in row to None (since we don't know - // if there will be more entries in this row) - if (last_element_in_row[i] != kNone) { + // if there will be more entries in this row yet)) + if (last_in_row[i] != kNone) { // If we have seen an entry in this row before, set the last entry we've seen in this row to // point to the current entry - elements[last_element_in_row[i]].next_in_row = nz; + elements[last_in_row[i]].next_in_row = nz; } // The current entry becomes the last element seen in the row - last_element_in_row[i] = nz; + last_in_row[i] = nz; if (p == col_start) { first_in_col[k] = nz; } if (first_in_row[i] == kNone) { first_in_row[i] = nz; } nz++; @@ -316,10 +339,11 @@ void update_Cdegree_and_col_count(i_t pivot_i, const std::vector& first_in_row, std::vector& Cdegree, std::vector>& col_count, + std::vector& col_pos, std::vector>& elements, f_t& work_estimate) { - // Update Cdegree and col_count + // Update Cdegree and col_count (O(1) removal using position array) i_t loop_count = 0; for (i_t p = first_in_row[pivot_i]; p != kNone; p = elements[p].next_in_row) { element_t* entry = &elements[p]; @@ -327,20 +351,20 @@ void update_Cdegree_and_col_count(i_t pivot_i, assert(entry->i == pivot_i); i_t cdeg = Cdegree[j]; assert(cdeg >= 0); - for (typename std::vector::iterator it = col_count[cdeg].begin(); - it != col_count[cdeg].end(); - it++) { - if (*it == j) { - // Remove col j from col_count[cdeg] - std::swap(*it, col_count[cdeg].back()); - col_count[cdeg].pop_back(); - work_estimate += (it - col_count[cdeg].begin()); - break; - } + // O(1) swap-with-last removal + { + i_t pos = col_pos[j]; + i_t other = col_count[cdeg].back(); + col_count[cdeg][pos] = other; + col_pos[other] = pos; + col_count[cdeg].pop_back(); } cdeg = --Cdegree[j]; assert(cdeg >= 0); - if (j != pivot_j && cdeg >= 0) { col_count[cdeg].push_back(j); } + if (j != pivot_j && cdeg >= 0) { + col_pos[j] = col_count[cdeg].size(); + col_count[cdeg].push_back(j); + } loop_count++; } work_estimate += 7 * loop_count; @@ -353,30 +377,31 @@ void update_Rdegree_and_row_count(i_t pivot_i, const std::vector& first_in_col, std::vector& Rdegree, std::vector>& row_count, + std::vector& row_pos, std::vector>& elements, f_t& work_estimate) { - // Update Rdegree and row_count + // Update Rdegree and row_count (O(1) removal using position array) i_t loop_count = 0; for (i_t p = first_in_col[pivot_j]; p != kNone; p = elements[p].next_in_column) { element_t* entry = &elements[p]; const i_t i = entry->i; i_t rdeg = Rdegree[i]; assert(rdeg >= 0); - for (typename std::vector::iterator it = row_count[rdeg].begin(); - it != row_count[rdeg].end(); - it++) { - if (*it == i) { - // Remove row i from row_count[rdeg] - std::swap(*it, row_count[rdeg].back()); - row_count[rdeg].pop_back(); - work_estimate += (it - row_count[rdeg].begin()); - break; - } + // O(1) swap-with-last removal + { + i_t pos = row_pos[i]; + i_t other = row_count[rdeg].back(); + row_count[rdeg][pos] = other; + row_pos[other] = pos; + row_count[rdeg].pop_back(); } rdeg = --Rdegree[i]; assert(rdeg >= 0); - if (i != pivot_i && rdeg >= 0) { row_count[rdeg].push_back(i); } + if (i != pivot_i && rdeg >= 0) { + row_pos[i] = row_count[rdeg].size(); + row_count[rdeg].push_back(i); + } loop_count++; } work_estimate += 7 * loop_count; @@ -400,18 +425,17 @@ void schur_complement(i_t pivot_i, std::vector& Cdegree, std::vector>& row_count, std::vector>& col_count, + std::vector& last_in_row, + std::vector& col_pos, + std::vector& row_pos, std::vector>& elements, f_t& work_estimate) { + // row_last_workspace: temp copy of last_in_row for this pivot step, updated when adding fill + // last_in_row: persistent tail pointer per row for (i_t p1 = first_in_col[pivot_j]; p1 != kNone; p1 = elements[p1].next_in_column) { - element_t* e = &elements[p1]; - const i_t i = e->i; - i_t row_last = kNone; - for (i_t p3 = first_in_row[i]; p3 != kNone; p3 = elements[p3].next_in_row) { - row_last = p3; - } - work_estimate += 2 * Rdegree[i]; - row_last_workspace[i] = row_last; + const i_t i = elements[p1].i; + row_last_workspace[i] = last_in_row[i]; } work_estimate += 4 * Cdegree[pivot_j]; @@ -478,35 +502,29 @@ void schur_complement(i_t pivot_i, first_in_row[i] = fill_p; } row_last_workspace[i] = fill_p; - i_t rdeg = Rdegree[i]; // Rdgree must increase - for (typename std::vector::iterator it = row_count[rdeg].begin(); - it != row_count[rdeg].end(); - it++) { - if (*it == i) { - // Remove row i from row_count[rdeg] - std::swap(*it, row_count[rdeg].back()); - row_count[rdeg].pop_back(); - work_estimate += 2 * (it - row_count[rdeg].begin()); - break; - } + last_in_row[i] = fill_p; // maintain last_in_row persistent state + // Row degree update: O(1) removal using row_pos + { + i_t rdeg = Rdegree[i]; + i_t pos = row_pos[i]; + i_t other = row_count[rdeg].back(); + row_count[rdeg][pos] = other; + row_pos[other] = pos; + row_count[rdeg].pop_back(); + row_pos[i] = row_count[rdeg + 1].size(); + row_count[++Rdegree[i]].push_back(i); } - rdeg = ++Rdegree[i]; // Increase rdeg - row_count[rdeg].push_back(i); // Add row i to row_count[rdeg] - - i_t cdeg = Cdegree[j]; // Cdegree must increase - for (typename std::vector::iterator it = col_count[cdeg].begin(); - it != col_count[cdeg].end(); - it++) { - if (*it == j) { - // Remove col j from col_count[cdeg] - std::swap(*it, col_count[cdeg].back()); - col_count[cdeg].pop_back(); - work_estimate += 2 * (it - col_count[cdeg].begin()); - break; - } + // Col degree update: O(1) removal using col_pos + { + i_t cdeg = Cdegree[j]; + i_t pos = col_pos[j]; + i_t other = col_count[cdeg].back(); + col_count[cdeg][pos] = other; + col_pos[other] = pos; + col_count[cdeg].pop_back(); + col_pos[j] = col_count[cdeg + 1].size(); + col_count[++Cdegree[j]].push_back(j); } - cdeg = ++Cdegree[j]; // Increase Cdegree - col_count[cdeg].push_back(j); // Add column j to col_count[cdeg] } } work_estimate += 10 * Cdegree[pivot_j]; @@ -532,7 +550,6 @@ void remove_pivot_row(i_t pivot_i, f_t& work_estimate) { // Remove the pivot row - i_t row_loop_count = 0; for (i_t p0 = first_in_row[pivot_i]; p0 != kNone; p0 = elements[p0].next_in_row) { element_t* e = &elements[p0]; @@ -574,6 +591,7 @@ void remove_pivot_col(i_t pivot_i, std::vector& first_in_col, std::vector& first_in_row, std::vector& max_in_row, + std::vector& last_in_row, std::vector>& elements, f_t& work_estimate) { @@ -582,7 +600,10 @@ void remove_pivot_col(i_t pivot_i, for (i_t p1 = first_in_col[pivot_j]; p1 != kNone; p1 = elements[p1].next_in_column) { element_t* e = &elements[p1]; const i_t i = e->i; - i_t last = kNone; + // Need both: last = previous-in-row (for link update when removing); last_surviving = new row + // tail (for last_in_row[i]). They differ when the pivot is the last element in the row. + i_t last = kNone; + i_t last_surviving = kNone; #ifdef THRESHOLD_ROOK_PIVOTING f_t max_in_row_i = 0.0; #endif @@ -598,16 +619,17 @@ void remove_pivot_col(i_t pivot_i, entry->i = -1; entry->j = -1; entry->x = std::numeric_limits::quiet_NaN(); - } + } else { + last_surviving = p; #ifdef THRESHOLD_ROOK_PIVOTING - else { const f_t abs_entryx = std::abs(entry->x); if (abs_entryx > max_in_row_i) { max_in_row_i = abs_entryx; } - } #endif + } last = p; row_loop_count++; } + last_in_row[i] = last_surviving; work_estimate += 3 * row_loop_count; #ifdef THRESHOLD_ROOK_PIVOTING max_in_row[i] = max_in_row_i; @@ -656,11 +678,19 @@ i_t right_looking_lu(const csc_matrix_t& A, const i_t Bnz = initialize_degree_data(A, column_list, Cdegree, Rdegree, col_count, row_count, work_estimate); + + // Position arrays for O(1) degree-bucket removal (col_count and row_count each have n+1 buckets) + std::vector col_pos(n); // if Cdegree[j] = nz, then j is in col_count[nz][col_pos[j]] + std::vector row_pos(n); // if Rdegree[i] = nz, then i is in row_count[nz][row_pos[i]] + initialize_bucket_positions(col_count, row_count, n, n, col_pos, row_pos); + std::vector> elements(Bnz); std::vector first_in_row(n, kNone); std::vector first_in_col(n, kNone); + std::vector last_in_row(n, kNone); work_estimate += 2 * n + Bnz; - load_elements(A, column_list, Bnz, elements, first_in_row, first_in_col, work_estimate); + load_elements( + A, column_list, Bnz, elements, first_in_row, first_in_col, last_in_row, work_estimate); std::vector column_j_workspace(n, kNone); std::vector row_last_workspace(n); @@ -777,9 +807,9 @@ i_t right_looking_lu(const csc_matrix_t& A, // Update Cdegree and col_count update_Cdegree_and_col_count( - pivot_i, pivot_j, first_in_row, Cdegree, col_count, elements, work_estimate); + pivot_i, pivot_j, first_in_row, Cdegree, col_count, col_pos, elements, work_estimate); update_Rdegree_and_row_count( - pivot_i, pivot_j, first_in_col, Rdegree, row_count, elements, work_estimate); + pivot_i, pivot_j, first_in_col, Rdegree, row_count, row_pos, elements, work_estimate); // A22 <- A22 - l u^T schur_complement(pivot_i, @@ -798,14 +828,23 @@ i_t right_looking_lu(const csc_matrix_t& A, Cdegree, row_count, col_count, + last_in_row, + col_pos, + row_pos, elements, work_estimate); // Remove the pivot row remove_pivot_row( pivot_i, pivot_j, first_in_col, first_in_row, max_in_column, elements, work_estimate); - remove_pivot_col( - pivot_i, pivot_j, first_in_col, first_in_row, max_in_row, elements, work_estimate); + remove_pivot_col(pivot_i, + pivot_j, + first_in_col, + first_in_row, + max_in_row, + last_in_row, + elements, + work_estimate); // Set pivot entry to sentinel value pivot_entry->i = -1; @@ -1030,10 +1069,18 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, const i_t Bnz = initialize_degree_data(A, column_list, Cdegree, Rdegree, col_count, row_count, work_estimate); + + // Position arrays for O(1) degree-bucket removal (col_count has m+1 buckets, row_count n+1) + std::vector col_pos(n); // if Cdegree[j] = nz, then j is in col_count[nz][col_pos[j]] + std::vector row_pos(m); // if Rdegree[i] = nz, then i is in row_count[nz][row_pos[i]] + initialize_bucket_positions(col_count, row_count, m, n, col_pos, row_pos); + std::vector> elements(Bnz); std::vector first_in_row(m, kNone); std::vector first_in_col(n, kNone); - load_elements(A, column_list, Bnz, elements, first_in_row, first_in_col, work_estimate); + std::vector last_in_row(m, kNone); + load_elements( + A, column_list, Bnz, elements, first_in_row, first_in_col, last_in_row, work_estimate); std::vector column_j_workspace(m, kNone); std::vector row_last_workspace(m); @@ -1100,9 +1147,9 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, // Update Cdegree and col_count update_Cdegree_and_col_count( - pivot_i, pivot_j, first_in_row, Cdegree, col_count, elements, work_estimate); + pivot_i, pivot_j, first_in_row, Cdegree, col_count, col_pos, elements, work_estimate); update_Rdegree_and_row_count( - pivot_i, pivot_j, first_in_col, Rdegree, row_count, elements, work_estimate); + pivot_i, pivot_j, first_in_col, Rdegree, row_count, row_pos, elements, work_estimate); // A22 <- A22 - l u^T schur_complement(pivot_i, @@ -1121,14 +1168,23 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, Cdegree, row_count, col_count, + last_in_row, + col_pos, + row_pos, elements, work_estimate); // Remove the pivot row remove_pivot_row( pivot_i, pivot_j, first_in_col, first_in_row, max_in_column, elements, work_estimate); - remove_pivot_col( - pivot_i, pivot_j, first_in_col, first_in_row, max_in_row, elements, work_estimate); + remove_pivot_col(pivot_i, + pivot_j, + first_in_col, + first_in_row, + max_in_row, + last_in_row, + elements, + work_estimate); // Set pivot entry to sentinel value pivot_entry->i = -1; diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index eadd93040c..702699980f 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -205,6 +205,9 @@ struct simplex_solver_settings_t { std::function&, f_t)> node_processed_callback; std::function heuristic_preemption_callback; std::function&, std::vector&, f_t)> set_simplex_solution_callback; + // Called by B&B when first LP solution is available (PDLP/Barrier or dual simplex). + std::function const&)> + on_first_lp_solution_available; std::function dual_simplex_objective_callback; // Called with current dual obj mutable logger_t log; std::atomic* concurrent_halt; // if nullptr ignored, if !nullptr, 0 if solver should diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp index d300d6011c..d5525891b6 100644 --- a/cpp/src/dual_simplex/solve.cpp +++ b/cpp/src/dual_simplex/solve.cpp @@ -706,7 +706,7 @@ i_t solve(const user_problem_t& problem, { i_t status; if (is_mip(problem) && !settings.relaxation) { - branch_and_bound_t branch_and_bound(problem, settings, tic()); + branch_and_bound_t branch_and_bound(problem, settings, tic()); mip_solution_t mip_solution(problem.num_cols); mip_status_t mip_status = branch_and_bound.solve(mip_solution); if (mip_status == mip_status_t::OPTIMAL) { @@ -745,7 +745,7 @@ i_t solve_mip_with_guess(const user_problem_t& problem, { i_t status; if (is_mip(problem)) { - branch_and_bound_t branch_and_bound(problem, settings, tic()); + branch_and_bound_t branch_and_bound(problem, settings, tic()); branch_and_bound.set_initial_guess(guess); mip_status_t mip_status = branch_and_bound.solve(solution); if (mip_status == mip_status_t::OPTIMAL) { diff --git a/cpp/src/dual_simplex/types.hpp b/cpp/src/dual_simplex/types.hpp index ea46a1f67e..6660a86f0a 100644 --- a/cpp/src/dual_simplex/types.hpp +++ b/cpp/src/dual_simplex/types.hpp @@ -9,6 +9,7 @@ #include #include +#include namespace cuopt::linear_programming::dual_simplex { @@ -19,6 +20,18 @@ using float64_t = double; constexpr float64_t inf = std::numeric_limits::infinity(); +// First LP solution from either PDLP/Barrier or dual simplex; used to notify diversity manager +// without B&B depending on PDLP types. +template +struct root_relaxation_first_solution_t { + std::vector primal; + std::vector dual; + std::vector reduced_costs; + f_t objective{0}; + f_t user_objective{0}; + i_t iterations{0}; +}; + // We return this constant to signal that a concurrent halt has occurred #define CONCURRENT_HALT_RETURN -2 // We return this constant to signal that a time limit has occurred diff --git a/cpp/src/mip_heuristics/CMakeLists.txt b/cpp/src/mip_heuristics/CMakeLists.txt index a200d4265b..202ed94bd5 100644 --- a/cpp/src/mip_heuristics/CMakeLists.txt +++ b/cpp/src/mip_heuristics/CMakeLists.txt @@ -41,7 +41,8 @@ set(MIP_NON_LP_FILES ${CMAKE_CURRENT_SOURCE_DIR}/presolve/conflict_graph/clique_table.cu ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/feasibility_jump.cu ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/feasibility_jump_kernels.cu - ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/fj_cpu.cu) + ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/fj_cpu.cu + ${CMAKE_CURRENT_SOURCE_DIR}/root_lp.cu) # Choose which files to include based on build mode if(BUILD_LP_ONLY) diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cu b/cpp/src/mip_heuristics/diversity/diversity_manager.cu index 0ded8337d8..c659e9788b 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cu +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cu @@ -417,7 +417,16 @@ solution_t diversity_manager_t::run_solver() bool bb_thread_solution_exists = simplex_solution_exists.load(); if (bb_thread_solution_exists) { ls.lp_optimal_exists = true; + } else if (branch_and_bound_ptr != nullptr && + branch_and_bound_ptr->enable_concurrent_lp_root_solve()) { + // B&B drives root relaxation; wait for first solution (PDLP/Barrier or dual simplex) + first_solution_ready_.store(false, std::memory_order_release); + std::unique_lock lock(first_solution_mutex_); + first_solution_cv_.wait(lock, [this]() { return first_solution_ready_.load(); }); + lock.unlock(); + clamp_within_var_bounds(lp_optimal_solution, problem_ptr, problem_ptr->handle_ptr); } else if (!fj_only_run) { + // Heuristics-only or non-concurrent: diversity manager runs LP solve convert_greater_to_less(*problem_ptr); f_t tolerance_divisor = @@ -489,38 +498,6 @@ solution_t diversity_manager_t::run_solver() // to bring variables within the bounds } - // Send PDLP relaxed solution to branch and bound - if (problem_ptr->set_root_relaxation_solution_callback != nullptr) { - auto& d_primal_solution = lp_result.get_primal_solution(); - auto& d_dual_solution = lp_result.get_dual_solution(); - auto& d_reduced_costs = lp_result.get_reduced_cost(); - - std::vector host_primal(d_primal_solution.size()); - std::vector host_dual(d_dual_solution.size()); - std::vector host_reduced_costs(d_reduced_costs.size()); - raft::copy(host_primal.data(), - d_primal_solution.data(), - d_primal_solution.size(), - problem_ptr->handle_ptr->get_stream()); - raft::copy(host_dual.data(), - d_dual_solution.data(), - d_dual_solution.size(), - problem_ptr->handle_ptr->get_stream()); - raft::copy(host_reduced_costs.data(), - d_reduced_costs.data(), - d_reduced_costs.size(), - problem_ptr->handle_ptr->get_stream()); - problem_ptr->handle_ptr->sync_stream(); - - // PDLP returns user-space objective (it applies objective_scaling_factor internally) - auto user_obj = lp_result.get_objective_value(); - auto solver_obj = problem_ptr->get_solver_obj_from_user_obj(user_obj); - auto iterations = lp_result.get_additional_termination_information().number_of_steps_taken; - // Set for the B&B (param4 expects solver space, param5 expects user space) - problem_ptr->set_root_relaxation_solution_callback( - host_primal, host_dual, host_reduced_costs, solver_obj, user_obj, iterations); - } - // in case the pdlp returned var boudns that are out of bounds clamp_within_var_bounds(lp_optimal_solution, problem_ptr, problem_ptr->handle_ptr); } @@ -859,6 +836,35 @@ std::pair, bool> diversity_manager_t::recombine( return std::make_pair(solution_t(a), false); } +template +void diversity_manager_t::on_first_lp_solution( + cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t const& result) +{ + { + std::lock_guard lock(relaxed_solution_mutex); + cuopt_assert(result.primal.size() == lp_optimal_solution.size(), + "First LP solution primal size mismatch"); + cuopt_assert(result.dual.size() == lp_dual_optimal_solution.size(), + "First LP solution dual size mismatch"); + raft::copy(lp_optimal_solution.data(), + result.primal.data(), + result.primal.size(), + problem_ptr->handle_ptr->get_stream()); + raft::copy(lp_dual_optimal_solution.data(), + result.dual.data(), + result.dual.size(), + problem_ptr->handle_ptr->get_stream()); + problem_ptr->handle_ptr->sync_stream(); + ls.lp_optimal_exists = true; + set_new_user_bound(result.user_objective); + } + { + std::lock_guard lock(first_solution_mutex_); + first_solution_ready_.store(true, std::memory_order_release); + first_solution_cv_.notify_all(); + } +} + template void diversity_manager_t::set_simplex_solution(const std::vector& solution, const std::vector& dual_solution, diff --git a/cpp/src/mip_heuristics/diversity/diversity_manager.cuh b/cpp/src/mip_heuristics/diversity/diversity_manager.cuh index 4f86192db8..fed937a88b 100644 --- a/cpp/src/mip_heuristics/diversity/diversity_manager.cuh +++ b/cpp/src/mip_heuristics/diversity/diversity_manager.cuh @@ -21,12 +21,16 @@ #include #include +#include + #include #include #include #include #include +#include +#include #include namespace cuopt::linear_programming::detail { @@ -71,6 +75,12 @@ class diversity_manager_t { void set_simplex_solution(const std::vector& solution, const std::vector& dual_solution, f_t objective); + + // Called by B&B when first LP solution is available (PDLP/Barrier or dual simplex). + void on_first_lp_solution( + cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t const& + result); + mip_solver_context_t& context; dual_simplex::branch_and_bound_t* branch_and_bound_ptr; problem_t* problem_ptr; @@ -98,6 +108,11 @@ class diversity_manager_t { // atomic for signalling pdlp to stop std::atomic global_concurrent_halt{0}; + // First solution from B&B: wait for B&B to call on_first_lp_solution when run_bb and concurrent + std::mutex first_solution_mutex_; + std::condition_variable first_solution_cv_; + std::atomic first_solution_ready_{false}; + rins_t rins; bool run_only_ls_recombiner{false}; diff --git a/cpp/src/mip_heuristics/diversity/lns/rins.cu b/cpp/src/mip_heuristics/diversity/lns/rins.cu index d7d7601014..fcee9ecc89 100644 --- a/cpp/src/mip_heuristics/diversity/lns/rins.cu +++ b/cpp/src/mip_heuristics/diversity/lns/rins.cu @@ -248,12 +248,9 @@ void rins_t::run_rins() // run sub-mip namespace dual_simplex = cuopt::linear_programming::dual_simplex; - dual_simplex::user_problem_t branch_and_bound_problem(&rins_handle); dual_simplex::simplex_solver_settings_t branch_and_bound_settings; dual_simplex::mip_solution_t branch_and_bound_solution(1); dual_simplex::mip_status_t branch_and_bound_status = dual_simplex::mip_status_t::UNSET; - fixed_problem.get_host_user_problem(branch_and_bound_problem); - branch_and_bound_solution.resize(branch_and_bound_problem.num_cols); // Fill in the settings for branch and bound branch_and_bound_settings.time_limit = time_limit; // branch_and_bound_settings.node_limit = 5000 + node_count / 100; // try harder as time goes @@ -274,8 +271,13 @@ void rins_t::run_rins() f_t objective) { rins_solution_queue.push_back(solution); }; + dual_simplex::user_problem_t bb_user_problem(fixed_problem.handle_ptr); + fixed_problem.recompute_objective_integrality(); + bb_user_problem.objective_is_integral = fixed_problem.is_objective_integral(); + fixed_problem.get_host_user_problem(bb_user_problem); dual_simplex::branch_and_bound_t branch_and_bound( - branch_and_bound_problem, branch_and_bound_settings, dual_simplex::tic()); + bb_user_problem, branch_and_bound_settings, dual_simplex::tic(), &fixed_problem, 1); + branch_and_bound_solution.resize(branch_and_bound.get_num_cols()); branch_and_bound.set_initial_guess(cuopt::host_copy(fixed_assignment, rins_handle.get_stream())); branch_and_bound_status = branch_and_bound.solve(branch_and_bound_solution); diff --git a/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh b/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh index a867141d0a..e55355ecba 100644 --- a/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh +++ b/cpp/src/mip_heuristics/diversity/recombiners/sub_mip.cuh @@ -95,10 +95,7 @@ class sub_mip_recombiner_t : public recombiner_t { if (run_sub_mip) { // run sub-mip namespace dual_simplex = cuopt::linear_programming::dual_simplex; - dual_simplex::user_problem_t branch_and_bound_problem(offspring.handle_ptr); dual_simplex::simplex_solver_settings_t branch_and_bound_settings; - fixed_problem.get_host_user_problem(branch_and_bound_problem); - branch_and_bound_solution.resize(branch_and_bound_problem.num_cols); // Fill in the settings for branch and bound branch_and_bound_settings.time_limit = sub_mip_recombiner_config_t::sub_mip_time_limit; branch_and_bound_settings.print_presolve_stats = false; @@ -117,8 +114,13 @@ class sub_mip_recombiner_t : public recombiner_t { // disable B&B logs, so that it is not interfering with the main B&B thread branch_and_bound_settings.log.log = false; + dual_simplex::user_problem_t bb_user_problem(fixed_problem.handle_ptr); + fixed_problem.recompute_objective_integrality(); + bb_user_problem.objective_is_integral = fixed_problem.is_objective_integral(); + fixed_problem.get_host_user_problem(bb_user_problem); dual_simplex::branch_and_bound_t branch_and_bound( - branch_and_bound_problem, branch_and_bound_settings, dual_simplex::tic()); + bb_user_problem, branch_and_bound_settings, dual_simplex::tic(), &fixed_problem, 1); + branch_and_bound_solution.resize(branch_and_bound.get_num_cols()); branch_and_bound_status = branch_and_bound.solve(branch_and_bound_solution); if (solution_vector.size() > 0) { cuopt_assert(fixed_assignment.size() == branch_and_bound_solution.x.size(), diff --git a/cpp/src/mip_heuristics/problem/problem.cu b/cpp/src/mip_heuristics/problem/problem.cu index 90d80f5948..32aeef695a 100644 --- a/cpp/src/mip_heuristics/problem/problem.cu +++ b/cpp/src/mip_heuristics/problem/problem.cu @@ -149,8 +149,7 @@ problem_t::problem_t( Q_values(problem_.get_quadratic_objective_values()) { op_problem_cstr_body(problem_); - branch_and_bound_callback = nullptr; - set_root_relaxation_solution_callback = nullptr; + branch_and_bound_callback = nullptr; } template @@ -162,7 +161,6 @@ problem_t::problem_t(const problem_t& problem_) integer_fixed_problem(problem_.integer_fixed_problem), integer_fixed_variable_map(problem_.integer_fixed_variable_map, handle_ptr->get_stream()), branch_and_bound_callback(nullptr), - set_root_relaxation_solution_callback(nullptr), n_variables(problem_.n_variables), n_constraints(problem_.n_constraints), n_binary_vars(problem_.n_binary_vars), @@ -219,7 +217,6 @@ problem_t::problem_t(const problem_t& problem_, integer_fixed_problem(problem_.integer_fixed_problem), integer_fixed_variable_map(problem_.integer_fixed_variable_map, handle_ptr->get_stream()), branch_and_bound_callback(nullptr), - set_root_relaxation_solution_callback(nullptr), n_variables(problem_.n_variables), n_constraints(problem_.n_constraints), n_binary_vars(problem_.n_binary_vars), diff --git a/cpp/src/mip_heuristics/problem/problem.cuh b/cpp/src/mip_heuristics/problem/problem.cuh index 9771bab568..130c97526e 100644 --- a/cpp/src/mip_heuristics/problem/problem.cuh +++ b/cpp/src/mip_heuristics/problem/problem.cuh @@ -242,9 +242,6 @@ class problem_t { rmm::device_uvector integer_fixed_variable_map; std::function&)> branch_and_bound_callback; - std::function&, const std::vector&, const std::vector&, f_t, f_t, i_t)> - set_root_relaxation_solution_callback; typename mip_solver_settings_t::tolerances_t tolerances{}; i_t n_variables{0}; diff --git a/cpp/src/mip_heuristics/root_lp.cu b/cpp/src/mip_heuristics/root_lp.cu new file mode 100644 index 0000000000..b181db43cd --- /dev/null +++ b/cpp/src/mip_heuristics/root_lp.cu @@ -0,0 +1,219 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#include +#include +#include "root_lp.cuh" + +#include +#include + +#include +#include +#include +#include +#include + +namespace cuopt::linear_programming::detail { + +namespace { +template +cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t +copy_lp_result_to_root_solution(problem_t* problem, + const optimization_problem_solution_t& lp_result) +{ + cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t result; + auto stream = problem->handle_ptr->get_stream(); + result.primal.resize(lp_result.get_primal_solution().size()); + result.dual.resize(lp_result.get_dual_solution().size()); + result.reduced_costs.resize(lp_result.get_reduced_cost().size()); + raft::copy( + result.primal.data(), lp_result.get_primal_solution().data(), result.primal.size(), stream); + raft::copy(result.dual.data(), lp_result.get_dual_solution().data(), result.dual.size(), stream); + raft::copy(result.reduced_costs.data(), + lp_result.get_reduced_cost().data(), + result.reduced_costs.size(), + stream); + problem->handle_ptr->sync_stream(); + result.objective = problem->get_solver_obj_from_user_obj(lp_result.get_objective_value()); + result.user_objective = lp_result.get_objective_value(); + result.iterations = lp_result.get_additional_termination_information().number_of_steps_taken; + return result; +} + +} // namespace + +template +cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t +run_solver_for_root_lp(problem_t* problem, + f_t time_limit, + std::atomic* concurrent_halt, + i_t num_gpus, + method_t method) +{ + convert_greater_to_less(*problem); + f_t tolerance_divisor = + problem->tolerances.absolute_tolerance / + (problem->tolerances.relative_tolerance > 0 ? problem->tolerances.relative_tolerance : 1); + pdlp_solver_settings_t pdlp_settings{}; + pdlp_settings.tolerances.relative_primal_tolerance = + problem->tolerances.absolute_tolerance / tolerance_divisor; + pdlp_settings.tolerances.relative_dual_tolerance = + problem->tolerances.absolute_tolerance / tolerance_divisor; + pdlp_settings.time_limit = time_limit; + pdlp_settings.first_primal_feasible = false; + pdlp_settings.concurrent_halt = concurrent_halt; + pdlp_settings.halt_set_by_caller = true; // B&B sets halt only after crossover + pdlp_settings.method = method; + pdlp_settings.inside_mip = true; + pdlp_settings.num_gpus = num_gpus; + pdlp_settings.presolver = presolver_t::None; + pdlp_settings.crossover = false; // B&B does crush + crossover for both paths + if (method == method_t::PDLP) { pdlp_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable2; } + + timer_t lp_timer(time_limit); + auto lp_result = solve_lp_with_method(*problem, pdlp_settings, lp_timer); + return copy_lp_result_to_root_solution(problem, lp_result); +} + +template +cuopt::linear_programming::dual_simplex::crossover_status_t run_crush_crossover_and_maybe_win( + const cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t& result, + const cuopt::linear_programming::dual_simplex::user_problem_t& original_problem, + const cuopt::linear_programming::dual_simplex::lp_problem_t& original_lp, + const std::vector& new_slacks, + const cuopt::linear_programming::dual_simplex::simplex_solver_settings_t& + crossover_settings, + f_t start_time, + std::atomic* concurrent_halt, + std::function set_halter, + std::function&)> + on_first_lp_solution, + std::mutex* first_solver_mutex, + bool* first_solver_callback_done, + std::mutex* first_result_mutex, + std::atomic* winner, + int winner_id, + cuopt::linear_programming::dual_simplex::crossover_status_t* first_crossover_status_out, + cuopt::linear_programming::dual_simplex::lp_solution_t* winner_crossover_soln, + std::vector* winner_crossover_vstatus, + f_t* winner_root_objective, + const char* this_solver_name, + std::string* winner_solver_name_out) +{ + using namespace cuopt::linear_programming::dual_simplex; + if (on_first_lp_solution) { + std::lock_guard lock(*first_solver_mutex); + if (!*first_solver_callback_done) { + *first_solver_callback_done = true; + on_first_lp_solution(result); + } + } + lp_solution_t soln(original_lp.num_rows, original_lp.num_cols); + soln.x = result.primal; + soln.y = result.dual; + soln.z = result.reduced_costs; + soln.objective = result.objective; + soln.user_objective = result.user_objective; + soln.iterations = result.iterations; + std::vector crushed_x; + crush_primal_solution(original_problem, original_lp, soln.x, new_slacks, crushed_x); + std::vector crushed_y; + std::vector crushed_z; + (void)crush_dual_solution( + original_problem, original_lp, new_slacks, soln.y, soln.z, crushed_y, crushed_z); + soln.x = std::move(crushed_x); + soln.y = std::move(crushed_y); + soln.z = std::move(crushed_z); + lp_solution_t crossover_out(original_lp.num_rows, original_lp.num_cols); + std::vector vstatus_out(original_lp.num_cols); + auto root_crossover_settings = crossover_settings; + root_crossover_settings.inside_mip = + 1; // root LP crossover; dual_phase2 uses this to set concurrent_halt + root_crossover_settings.log.log = false; + root_crossover_settings.concurrent_halt = concurrent_halt; + crossover_status_t status = + crossover(original_lp, root_crossover_settings, soln, start_time, crossover_out, vstatus_out); + { + std::lock_guard lock(*first_result_mutex); + int expected = 0; + if (status == crossover_status_t::OPTIMAL && + winner->compare_exchange_strong(expected, winner_id, std::memory_order_acq_rel)) { + *first_crossover_status_out = status; + if (winner_solver_name_out) { *winner_solver_name_out = this_solver_name; } + winner_crossover_soln->x = std::move(crossover_out.x); + winner_crossover_soln->y = std::move(crossover_out.y); + winner_crossover_soln->z = std::move(crossover_out.z); + winner_crossover_soln->objective = result.objective; + winner_crossover_soln->user_objective = result.user_objective; + winner_crossover_soln->iterations = result.iterations; + *winner_root_objective = result.objective; + *winner_crossover_vstatus = std::move(vstatus_out); + set_halter(); + } else { + if (winner->load(std::memory_order_acquire) != 0) { status = *first_crossover_status_out; } + } + } + return status; +} + +template cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t +run_solver_for_root_lp( + problem_t*, double, std::atomic*, int, method_t); +template cuopt::linear_programming::dual_simplex::crossover_status_t +run_crush_crossover_and_maybe_win( + const cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t&, + const cuopt::linear_programming::dual_simplex::user_problem_t&, + const cuopt::linear_programming::dual_simplex::lp_problem_t&, + const std::vector&, + const cuopt::linear_programming::dual_simplex::simplex_solver_settings_t&, + double, + std::atomic*, + std::function, + std::function&)>, + std::mutex*, + bool*, + std::mutex*, + std::atomic*, + int, + cuopt::linear_programming::dual_simplex::crossover_status_t*, + cuopt::linear_programming::dual_simplex::lp_solution_t*, + std::vector*, + double*, + const char*, + std::string*); + +#ifdef MIP_INSTANTIATION_FLOAT +template cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t +run_solver_for_root_lp(problem_t*, float, std::atomic*, int, method_t); +template cuopt::linear_programming::dual_simplex::crossover_status_t +run_crush_crossover_and_maybe_win( + const cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t&, + const cuopt::linear_programming::dual_simplex::user_problem_t&, + const cuopt::linear_programming::dual_simplex::lp_problem_t&, + const std::vector&, + const cuopt::linear_programming::dual_simplex::simplex_solver_settings_t&, + float, + std::atomic*, + std::function, + std::function&)>, + std::mutex*, + bool*, + std::mutex*, + std::atomic*, + int, + cuopt::linear_programming::dual_simplex::crossover_status_t*, + cuopt::linear_programming::dual_simplex::lp_solution_t*, + std::vector*, + float*, + const char*, + std::string*); +#endif +} // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/root_lp.cuh b/cpp/src/mip_heuristics/root_lp.cuh new file mode 100644 index 0000000000..2f87884fe9 --- /dev/null +++ b/cpp/src/mip_heuristics/root_lp.cuh @@ -0,0 +1,66 @@ +/* clang-format off */ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + */ +/* clang-format on */ + +#pragma once + +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace cuopt::linear_programming::detail { + +template +class problem_t; + +/** Run PDLP or Barrier for root LP. Uses concurrent_halt to stop; does not set it. Crossover done + * by caller. */ +template +cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t +run_solver_for_root_lp(problem_t* problem, + f_t time_limit, + std::atomic* concurrent_halt, + i_t num_gpus, + method_t method); + +/** + * Run crush + crossover on a root LP solution and optionally store as winner (first to finish). + * Used by B&B when running PDLP and Barrier concurrently; both paths call this after their solver + * returns. + */ +template +cuopt::linear_programming::dual_simplex::crossover_status_t run_crush_crossover_and_maybe_win( + const cuopt::linear_programming::dual_simplex::root_relaxation_first_solution_t& result, + const cuopt::linear_programming::dual_simplex::user_problem_t& original_problem, + const cuopt::linear_programming::dual_simplex::lp_problem_t& original_lp, + const std::vector& new_slacks, + const cuopt::linear_programming::dual_simplex::simplex_solver_settings_t& + crossover_settings, + f_t start_time, + std::atomic* concurrent_halt, + std::function set_halter, + std::function&)> + on_first_lp_solution, + std::mutex* first_solver_mutex, + bool* first_solver_callback_done, + std::mutex* first_result_mutex, + std::atomic* winner, + int winner_id, + cuopt::linear_programming::dual_simplex::crossover_status_t* first_crossover_status_out, + cuopt::linear_programming::dual_simplex::lp_solution_t* winner_crossover_soln, + std::vector* winner_crossover_vstatus, + f_t* winner_root_objective, + const char* this_solver_name, + std::string* winner_solver_name_out); + +} // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip_heuristics/solver.cu b/cpp/src/mip_heuristics/solver.cu index f25c093afb..e426365841 100644 --- a/cpp/src/mip_heuristics/solver.cu +++ b/cpp/src/mip_heuristics/solver.cu @@ -180,13 +180,11 @@ solution_t mip_solver_t::run_solver() namespace dual_simplex = cuopt::linear_programming::dual_simplex; std::future branch_and_bound_status_future; - dual_simplex::user_problem_t branch_and_bound_problem(context.problem_ptr->handle_ptr); context.problem_ptr->recompute_objective_integrality(); if (context.problem_ptr->is_objective_integral()) { CUOPT_LOG_INFO("Objective function is integral, scale %g", context.problem_ptr->presolve_data.objective_scaling_factor); } - branch_and_bound_problem.objective_is_integral = context.problem_ptr->is_objective_integral(); dual_simplex::simplex_solver_settings_t branch_and_bound_settings; std::unique_ptr> branch_and_bound; branch_and_bound_solution_helper_t solution_helper(&dm, branch_and_bound_settings); @@ -194,11 +192,6 @@ solution_t mip_solver_t::run_solver() bool run_bb = !context.settings.heuristics_only; if (run_bb) { - // Convert the presolved problem to dual_simplex::user_problem_t - op_problem_.get_host_user_problem(branch_and_bound_problem); - // Resize the solution now that we know the number of columns/variables - branch_and_bound_solution.resize(branch_and_bound_problem.num_cols); - // Fill in the settings for branch and bound branch_and_bound_settings.time_limit = timer_.get_time_limit(); branch_and_bound_settings.node_limit = context.settings.node_limit; @@ -262,20 +255,30 @@ solution_t mip_solver_t::run_solver() &solution_helper, std::placeholders::_1, std::placeholders::_2); + + branch_and_bound_settings.on_first_lp_solution_available = + [&dm](dual_simplex::root_relaxation_first_solution_t const& result) { + dm.on_first_lp_solution(result); + }; } - // Create the branch and bound object - branch_and_bound = std::make_unique>( - branch_and_bound_problem, - branch_and_bound_settings, - timer_.get_tic_start(), - context.problem_ptr->clique_table); + dual_simplex::user_problem_t bb_user_problem(context.problem_ptr->handle_ptr); + context.problem_ptr->recompute_objective_integrality(); + bb_user_problem.objective_is_integral = context.problem_ptr->is_objective_integral(); + context.problem_ptr->get_host_user_problem(bb_user_problem); + branch_and_bound = + std::make_unique>(bb_user_problem, + branch_and_bound_settings, + timer_.get_tic_start(), + context.problem_ptr, + context.settings.num_gpus, + context.problem_ptr->clique_table); + branch_and_bound_solution.resize(branch_and_bound->get_num_cols()); context.branch_and_bound_ptr = branch_and_bound.get(); auto* stats_ptr = &context.stats; branch_and_bound->set_user_bound_callback( [stats_ptr](f_t user_bound) { stats_ptr->set_solution_bound(user_bound); }); - // Set the primal heuristics -> branch and bound callback if (context.settings.determinism_mode == CUOPT_MODE_OPPORTUNISTIC) { branch_and_bound->set_concurrent_lp_root_solve(true); @@ -295,16 +298,6 @@ solution_t mip_solver_t::run_solver() context.work_unit_scheduler_.register_context(branch_and_bound->get_work_unit_context()); // context.work_unit_scheduler_.verbose = true; - context.problem_ptr->set_root_relaxation_solution_callback = - std::bind(&dual_simplex::branch_and_bound_t::set_root_relaxation_solution, - branch_and_bound.get(), - std::placeholders::_1, - std::placeholders::_2, - std::placeholders::_3, - std::placeholders::_4, - std::placeholders::_5, - std::placeholders::_6); - if (timer_.check_time_limit()) { CUOPT_LOG_INFO("Time limit reached during B&B setup"); solution_t sol(*context.problem_ptr); diff --git a/cpp/src/pdlp/pdlp.cu b/cpp/src/pdlp/pdlp.cu index 82e79098a7..9424240a08 100644 --- a/cpp/src/pdlp/pdlp.cu +++ b/cpp/src/pdlp/pdlp.cu @@ -499,9 +499,9 @@ std::optional> pdlp_solver_t pdlp_termination_status_t::IterationLimit)); } - // Check for concurrent limit - if (settings_.method == method_t::Concurrent && settings_.concurrent_halt != nullptr && - *settings_.concurrent_halt == 1) { + // Check for concurrent limit (whenever caller provides a halt flag, e.g. B&B racing PDLP vs + // Barrier) + if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) { #ifdef PDLP_VERBOSE_MODE RAFT_CUDA_TRY(cudaDeviceSynchronize()); std::cout << "Concurrent Limit reached, returning current solution" << std::endl; diff --git a/cpp/src/pdlp/solve.cu b/cpp/src/pdlp/solve.cu index 2fc9ec08d5..3c3ce1e0eb 100644 --- a/cpp/src/pdlp/solve.cu +++ b/cpp/src/pdlp/solve.cu @@ -463,9 +463,10 @@ run_barrier(dual_simplex::user_problem_t& user_problem, CUOPT_LOG_CONDITIONAL_INFO( !settings.inside_mip, "Barrier finished in %.2f seconds", timer.elapsed_time()); - if (settings.concurrent_halt != nullptr && (status == dual_simplex::lp_status_t::OPTIMAL || - status == dual_simplex::lp_status_t::UNBOUNDED || - status == dual_simplex::lp_status_t::INFEASIBLE)) { + if (!settings.halt_set_by_caller && settings.concurrent_halt != nullptr && + (status == dual_simplex::lp_status_t::OPTIMAL || + status == dual_simplex::lp_status_t::UNBOUNDED || + status == dual_simplex::lp_status_t::INFEASIBLE)) { // We finished. Tell PDLP to stop if it is still running. *settings.concurrent_halt = 1; } @@ -535,9 +536,10 @@ run_dual_simplex(dual_simplex::user_problem_t& user_problem, CUOPT_LOG_CONDITIONAL_INFO( !settings.inside_mip, "Dual simplex finished in %.2f seconds", timer.elapsed_time()); - if (settings.concurrent_halt != nullptr && (status == dual_simplex::lp_status_t::OPTIMAL || - status == dual_simplex::lp_status_t::UNBOUNDED || - status == dual_simplex::lp_status_t::INFEASIBLE)) { + if (!settings.halt_set_by_caller && settings.concurrent_halt != nullptr && + (status == dual_simplex::lp_status_t::OPTIMAL || + status == dual_simplex::lp_status_t::UNBOUNDED || + status == dual_simplex::lp_status_t::INFEASIBLE)) { // We finished. Tell PDLP to stop if it is still running. *settings.concurrent_halt = 1; } @@ -828,7 +830,7 @@ optimization_problem_solution_t run_pdlp(detail::problem_t& CUOPT_LOG_CONDITIONAL_INFO( !settings.inside_mip, "Crossover status %s", sol.get_termination_status_string().c_str()); } - if (settings.method == method_t::Concurrent && settings.concurrent_halt != nullptr && + if (!settings.halt_set_by_caller && settings.method == method_t::Concurrent && settings.concurrent_halt != nullptr && crossover_info == 0 && sol.get_termination_status() == pdlp_termination_status_t::Optimal) { // We finished. Tell dual simplex to stop if it is still running. CUOPT_LOG_CONDITIONAL_INFO(!settings.inside_mip, "PDLP finished. Telling others to stop"); diff --git a/cpp/src/pdlp/solver_solution.cu b/cpp/src/pdlp/solver_solution.cu index 10e6a80593..87ad02e020 100644 --- a/cpp/src/pdlp/solver_solution.cu +++ b/cpp/src/pdlp/solver_solution.cu @@ -372,6 +372,12 @@ rmm::device_uvector& optimization_problem_solution_t::get_reduced return reduced_cost_; } +template +const rmm::device_uvector& optimization_problem_solution_t::get_reduced_cost() const +{ + return reduced_cost_; +} + template pdlp_termination_status_t optimization_problem_solution_t::get_termination_status( i_t id) const diff --git a/docs/cuopt/source/cuopt-python/routing/routing-example.ipynb b/docs/cuopt/source/cuopt-python/routing/routing-example.ipynb index 9df5e2c0c7..9cfc05f9bb 100644 --- a/docs/cuopt/source/cuopt-python/routing/routing-example.ipynb +++ b/docs/cuopt/source/cuopt-python/routing/routing-example.ipynb @@ -147,7 +147,7 @@ "metadata": {}, "source": [ "#### Compressed Sparse Row (CSR) representation of above weighted waypoint graph.\n", - "For details on the CSR encoding of the above graph see the [cost_matrix_and_waypoint_graph_creation.ipynb](https://github.com/NVIDIA/cuopt-examples/blob/release/26.04/intra-factory_transport/cost_matrix_and_waypoint_graph_creation.ipynb) notebook." + "For details on the CSR encoding of the above graph see the [cost_matrix_and_waypoint_graph_creation.ipynb](https://github.com/NVIDIA/cuopt-examples/blob/main/intra-factory_transport/cost_matrix_and_waypoint_graph_creation.ipynb) notebook." ] }, { diff --git a/docs/cuopt/source/faq.rst b/docs/cuopt/source/faq.rst index 1985052531..0c3a0e219f 100644 --- a/docs/cuopt/source/faq.rst +++ b/docs/cuopt/source/faq.rst @@ -283,7 +283,7 @@ Routing FAQ So in either case, task locations are actually integer indices into another structure. - If you have (lat, long) values, then you can generate a cost matrix using a map API. cuOpt does not directly connect to a third-party map engine, but that can be done outside of cuOpt as shown `here `__. + If you have (lat, long) values, then you can generate a cost matrix using a map API. cuOpt does not directly connect to a third-party map engine, but that can be done outside of cuOpt as shown `here `__. .. dropdown:: Is it possible to define constraints such as refrigerated vehicles required for certain orders?