From 00c58dfd3ccba18deb8d5c7907b2e50cb075a3c5 Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Tue, 10 Mar 2026 11:42:26 -0700 Subject: [PATCH 1/6] Optimize right-looking LU factorization with O(1) degree-bucket ops Replace linear degree-bucket search with O(1) swap-with-last removal using col_pos/row_pos position arrays, and eliminate O(row_degree) pre-traversal in schur_complement via a persistent last_in_row[] array --- cpp/src/dual_simplex/right_looking_lu.cpp | 210 +++++++++++++--------- 1 file changed, 126 insertions(+), 84 deletions(-) diff --git a/cpp/src/dual_simplex/right_looking_lu.cpp b/cpp/src/dual_simplex/right_looking_lu.cpp index 657ebc4762..53bfcf8ac5 100644 --- a/cpp/src/dual_simplex/right_looking_lu.cpp +++ b/cpp/src/dual_simplex/right_looking_lu.cpp @@ -30,7 +30,7 @@ struct element_t { f_t x; // coefficient value i_t next_in_column; // index of the next element in the column: kNone if there is no next element i_t next_in_row; // index of the next element in the row: kNone if there is no next element -}; +}; // 24 bytes constexpr int kNone = -1; template @@ -86,11 +86,11 @@ i_t load_elements(const csc_matrix_t& A, std::vector>& elements, std::vector& first_in_row, std::vector& first_in_col, + std::vector& last_in_row, f_t& work_estimate) { const i_t m = A.m; const i_t n = column_list.size(); - std::vector last_element_in_row(m, kNone); work_estimate += m; i_t nz = 0; @@ -105,15 +105,9 @@ i_t load_elements(const csc_matrix_t& A, elements[nz].x = A.x[p]; elements[nz].next_in_column = kNone; if (p > col_start) { elements[nz - 1].next_in_column = nz; } - elements[nz].next_in_row = kNone; // set the current next in row to None (since we don't know - // if there will be more entries in this row) - if (last_element_in_row[i] != kNone) { - // If we have seen an entry in this row before, set the last entry we've seen in this row to - // point to the current entry - elements[last_element_in_row[i]].next_in_row = nz; - } - // The current entry becomes the last element seen in the row - last_element_in_row[i] = nz; + elements[nz].next_in_row = kNone; + if (last_in_row[i] != kNone) { elements[last_in_row[i]].next_in_row = nz; } + last_in_row[i] = nz; if (p == col_start) { first_in_col[k] = nz; } if (first_in_row[i] == kNone) { first_in_row[i] = nz; } nz++; @@ -316,10 +310,11 @@ void update_Cdegree_and_col_count(i_t pivot_i, const std::vector& first_in_row, std::vector& Cdegree, std::vector>& col_count, + std::vector& col_pos, std::vector>& elements, f_t& work_estimate) { - // Update Cdegree and col_count + // Update Cdegree and col_count (O(1) removal using position array) i_t loop_count = 0; for (i_t p = first_in_row[pivot_i]; p != kNone; p = elements[p].next_in_row) { element_t* entry = &elements[p]; @@ -327,20 +322,20 @@ void update_Cdegree_and_col_count(i_t pivot_i, assert(entry->i == pivot_i); i_t cdeg = Cdegree[j]; assert(cdeg >= 0); - for (typename std::vector::iterator it = col_count[cdeg].begin(); - it != col_count[cdeg].end(); - it++) { - if (*it == j) { - // Remove col j from col_count[cdeg] - std::swap(*it, col_count[cdeg].back()); - col_count[cdeg].pop_back(); - work_estimate += (it - col_count[cdeg].begin()); - break; - } + // O(1) swap-with-last removal + { + i_t pos = col_pos[j]; + i_t other = col_count[cdeg].back(); + col_count[cdeg][pos] = other; + col_pos[other] = pos; + col_count[cdeg].pop_back(); } cdeg = --Cdegree[j]; assert(cdeg >= 0); - if (j != pivot_j && cdeg >= 0) { col_count[cdeg].push_back(j); } + if (j != pivot_j && cdeg >= 0) { + col_pos[j] = col_count[cdeg].size(); + col_count[cdeg].push_back(j); + } loop_count++; } work_estimate += 7 * loop_count; @@ -353,30 +348,31 @@ void update_Rdegree_and_row_count(i_t pivot_i, const std::vector& first_in_col, std::vector& Rdegree, std::vector>& row_count, + std::vector& row_pos, std::vector>& elements, f_t& work_estimate) { - // Update Rdegree and row_count + // Update Rdegree and row_count (O(1) removal using position array) i_t loop_count = 0; for (i_t p = first_in_col[pivot_j]; p != kNone; p = elements[p].next_in_column) { element_t* entry = &elements[p]; const i_t i = entry->i; i_t rdeg = Rdegree[i]; assert(rdeg >= 0); - for (typename std::vector::iterator it = row_count[rdeg].begin(); - it != row_count[rdeg].end(); - it++) { - if (*it == i) { - // Remove row i from row_count[rdeg] - std::swap(*it, row_count[rdeg].back()); - row_count[rdeg].pop_back(); - work_estimate += (it - row_count[rdeg].begin()); - break; - } + // O(1) swap-with-last removal + { + i_t pos = row_pos[i]; + i_t other = row_count[rdeg].back(); + row_count[rdeg][pos] = other; + row_pos[other] = pos; + row_count[rdeg].pop_back(); } rdeg = --Rdegree[i]; assert(rdeg >= 0); - if (i != pivot_i && rdeg >= 0) { row_count[rdeg].push_back(i); } + if (i != pivot_i && rdeg >= 0) { + row_pos[i] = row_count[rdeg].size(); + row_count[rdeg].push_back(i); + } loop_count++; } work_estimate += 7 * loop_count; @@ -400,18 +396,15 @@ void schur_complement(i_t pivot_i, std::vector& Cdegree, std::vector>& row_count, std::vector>& col_count, + std::vector& last_in_row, + std::vector& col_pos, + std::vector& row_pos, std::vector>& elements, f_t& work_estimate) { + // Initialize row_last_workspace from last_in_row (O(1) per row, no full row traversal) for (i_t p1 = first_in_col[pivot_j]; p1 != kNone; p1 = elements[p1].next_in_column) { - element_t* e = &elements[p1]; - const i_t i = e->i; - i_t row_last = kNone; - for (i_t p3 = first_in_row[i]; p3 != kNone; p3 = elements[p3].next_in_row) { - row_last = p3; - } - work_estimate += 2 * Rdegree[i]; - row_last_workspace[i] = row_last; + row_last_workspace[elements[p1].i] = last_in_row[elements[p1].i]; } work_estimate += 4 * Cdegree[pivot_j]; @@ -478,35 +471,29 @@ void schur_complement(i_t pivot_i, first_in_row[i] = fill_p; } row_last_workspace[i] = fill_p; - i_t rdeg = Rdegree[i]; // Rdgree must increase - for (typename std::vector::iterator it = row_count[rdeg].begin(); - it != row_count[rdeg].end(); - it++) { - if (*it == i) { - // Remove row i from row_count[rdeg] - std::swap(*it, row_count[rdeg].back()); - row_count[rdeg].pop_back(); - work_estimate += 2 * (it - row_count[rdeg].begin()); - break; - } + last_in_row[i] = fill_p; // maintain last_in_row persistent state + // Row degree update: O(1) removal using row_pos + { + i_t rdeg = Rdegree[i]; + i_t pos = row_pos[i]; + i_t other = row_count[rdeg].back(); + row_count[rdeg][pos] = other; + row_pos[other] = pos; + row_count[rdeg].pop_back(); + row_pos[i] = row_count[rdeg + 1].size(); + row_count[++Rdegree[i]].push_back(i); } - rdeg = ++Rdegree[i]; // Increase rdeg - row_count[rdeg].push_back(i); // Add row i to row_count[rdeg] - - i_t cdeg = Cdegree[j]; // Cdegree must increase - for (typename std::vector::iterator it = col_count[cdeg].begin(); - it != col_count[cdeg].end(); - it++) { - if (*it == j) { - // Remove col j from col_count[cdeg] - std::swap(*it, col_count[cdeg].back()); - col_count[cdeg].pop_back(); - work_estimate += 2 * (it - col_count[cdeg].begin()); - break; - } + // Col degree update: O(1) removal using col_pos + { + i_t cdeg = Cdegree[j]; + i_t pos = col_pos[j]; + i_t other = col_count[cdeg].back(); + col_count[cdeg][pos] = other; + col_pos[other] = pos; + col_count[cdeg].pop_back(); + col_pos[j] = col_count[cdeg + 1].size(); + col_count[++Cdegree[j]].push_back(j); } - cdeg = ++Cdegree[j]; // Increase Cdegree - col_count[cdeg].push_back(j); // Add column j to col_count[cdeg] } } work_estimate += 10 * Cdegree[pivot_j]; @@ -532,7 +519,6 @@ void remove_pivot_row(i_t pivot_i, f_t& work_estimate) { // Remove the pivot row - i_t row_loop_count = 0; for (i_t p0 = first_in_row[pivot_i]; p0 != kNone; p0 = elements[p0].next_in_row) { element_t* e = &elements[p0]; @@ -574,6 +560,7 @@ void remove_pivot_col(i_t pivot_i, std::vector& first_in_col, std::vector& first_in_row, std::vector& max_in_row, + std::vector& last_in_row, std::vector>& elements, f_t& work_estimate) { @@ -583,6 +570,7 @@ void remove_pivot_col(i_t pivot_i, element_t* e = &elements[p1]; const i_t i = e->i; i_t last = kNone; + i_t last_surviving = kNone; #ifdef THRESHOLD_ROOK_PIVOTING f_t max_in_row_i = 0.0; #endif @@ -598,16 +586,17 @@ void remove_pivot_col(i_t pivot_i, entry->i = -1; entry->j = -1; entry->x = std::numeric_limits::quiet_NaN(); - } + } else { + last_surviving = p; #ifdef THRESHOLD_ROOK_PIVOTING - else { const f_t abs_entryx = std::abs(entry->x); if (abs_entryx > max_in_row_i) { max_in_row_i = abs_entryx; } - } #endif + } last = p; row_loop_count++; } + last_in_row[i] = last_surviving; work_estimate += 3 * row_loop_count; #ifdef THRESHOLD_ROOK_PIVOTING max_in_row[i] = max_in_row_i; @@ -656,11 +645,28 @@ i_t right_looking_lu(const csc_matrix_t& A, const i_t Bnz = initialize_degree_data(A, column_list, Cdegree, Rdegree, col_count, row_count, work_estimate); + + // Position arrays for O(1) degree-bucket removal + std::vector col_pos(n); + for (i_t d = 0; d <= n; ++d) { + for (i_t pos = 0; pos < static_cast(col_count[d].size()); ++pos) { + col_pos[col_count[d][pos]] = pos; + } + } + std::vector row_pos(n); + for (i_t d = 0; d <= n; ++d) { + for (i_t pos = 0; pos < static_cast(row_count[d].size()); ++pos) { + row_pos[row_count[d][pos]] = pos; + } + } + std::vector> elements(Bnz); std::vector first_in_row(n, kNone); std::vector first_in_col(n, kNone); + std::vector last_in_row(n, kNone); work_estimate += 2 * n + Bnz; - load_elements(A, column_list, Bnz, elements, first_in_row, first_in_col, work_estimate); + load_elements( + A, column_list, Bnz, elements, first_in_row, first_in_col, last_in_row, work_estimate); std::vector column_j_workspace(n, kNone); std::vector row_last_workspace(n); @@ -777,9 +783,9 @@ i_t right_looking_lu(const csc_matrix_t& A, // Update Cdegree and col_count update_Cdegree_and_col_count( - pivot_i, pivot_j, first_in_row, Cdegree, col_count, elements, work_estimate); + pivot_i, pivot_j, first_in_row, Cdegree, col_count, col_pos, elements, work_estimate); update_Rdegree_and_row_count( - pivot_i, pivot_j, first_in_col, Rdegree, row_count, elements, work_estimate); + pivot_i, pivot_j, first_in_col, Rdegree, row_count, row_pos, elements, work_estimate); // A22 <- A22 - l u^T schur_complement(pivot_i, @@ -798,14 +804,23 @@ i_t right_looking_lu(const csc_matrix_t& A, Cdegree, row_count, col_count, + last_in_row, + col_pos, + row_pos, elements, work_estimate); // Remove the pivot row remove_pivot_row( pivot_i, pivot_j, first_in_col, first_in_row, max_in_column, elements, work_estimate); - remove_pivot_col( - pivot_i, pivot_j, first_in_col, first_in_row, max_in_row, elements, work_estimate); + remove_pivot_col(pivot_i, + pivot_j, + first_in_col, + first_in_row, + max_in_row, + last_in_row, + elements, + work_estimate); // Set pivot entry to sentinel value pivot_entry->i = -1; @@ -1030,10 +1045,28 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, const i_t Bnz = initialize_degree_data(A, column_list, Cdegree, Rdegree, col_count, row_count, work_estimate); + + // Position arrays for O(1) degree-bucket removal + // col_count has m+1 buckets, row_count has n+1 buckets + std::vector col_pos(n); + for (i_t d = 0; d <= m; ++d) { + for (i_t pos = 0; pos < static_cast(col_count[d].size()); ++pos) { + col_pos[col_count[d][pos]] = pos; + } + } + std::vector row_pos(m); + for (i_t d = 0; d <= n; ++d) { + for (i_t pos = 0; pos < static_cast(row_count[d].size()); ++pos) { + row_pos[row_count[d][pos]] = pos; + } + } + std::vector> elements(Bnz); std::vector first_in_row(m, kNone); std::vector first_in_col(n, kNone); - load_elements(A, column_list, Bnz, elements, first_in_row, first_in_col, work_estimate); + std::vector last_in_row(m, kNone); + load_elements( + A, column_list, Bnz, elements, first_in_row, first_in_col, last_in_row, work_estimate); std::vector column_j_workspace(m, kNone); std::vector row_last_workspace(m); @@ -1100,9 +1133,9 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, // Update Cdegree and col_count update_Cdegree_and_col_count( - pivot_i, pivot_j, first_in_row, Cdegree, col_count, elements, work_estimate); + pivot_i, pivot_j, first_in_row, Cdegree, col_count, col_pos, elements, work_estimate); update_Rdegree_and_row_count( - pivot_i, pivot_j, first_in_col, Rdegree, row_count, elements, work_estimate); + pivot_i, pivot_j, first_in_col, Rdegree, row_count, row_pos, elements, work_estimate); // A22 <- A22 - l u^T schur_complement(pivot_i, @@ -1121,14 +1154,23 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, Cdegree, row_count, col_count, + last_in_row, + col_pos, + row_pos, elements, work_estimate); // Remove the pivot row remove_pivot_row( pivot_i, pivot_j, first_in_col, first_in_row, max_in_column, elements, work_estimate); - remove_pivot_col( - pivot_i, pivot_j, first_in_col, first_in_row, max_in_row, elements, work_estimate); + remove_pivot_col(pivot_i, + pivot_j, + first_in_col, + first_in_row, + max_in_row, + last_in_row, + elements, + work_estimate); // Set pivot entry to sentinel value pivot_entry->i = -1; From 5f377a2c6d5c48040622c82e715105825f7d7163 Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Mon, 2 Mar 2026 10:55:23 -0800 Subject: [PATCH 2/6] Take advantage of hyper sparsity in dual push --- cpp/src/dual_simplex/crossover.cpp | 40 +++++++++++++++++++----------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/cpp/src/dual_simplex/crossover.cpp b/cpp/src/dual_simplex/crossover.cpp index 988c9c50ad..16f503e893 100644 --- a/cpp/src/dual_simplex/crossover.cpp +++ b/cpp/src/dual_simplex/crossover.cpp @@ -331,6 +331,7 @@ void compute_dual_solution_from_basis(const lp_problem_t& lp, template i_t dual_push(const lp_problem_t& lp, + const csr_matrix_t& Arow, const simplex_solver_settings_t& settings, f_t start_time, lp_solution_t& solution, @@ -401,11 +402,9 @@ i_t dual_push(const lp_problem_t& lp, es_sparse.x[0] = -delta_zs; // B^T delta_y = -delta_zs*es - std::vector delta_y(m); sparse_vector_t delta_y_sparse(m, 1); sparse_vector_t UTsol_sparse(m, 1); ft.b_transpose_solve(es_sparse, delta_y_sparse, UTsol_sparse); - delta_y_sparse.scatter(delta_y); // We solved B^T delta_y = -delta_zs*es, but for the update we need // U^T*etilde = es. @@ -417,15 +416,23 @@ i_t dual_push(const lp_problem_t& lp, // delta_zN = -N^T delta_y std::vector delta_zN(n - m); - for (i_t k = 0; k < n - m; ++k) { - const i_t j = nonbasic_list[k]; - const i_t col_start = lp.A.col_start[j]; - const i_t col_end = lp.A.col_start[j + 1]; - f_t dot = 0.0; - for (i_t p = col_start; p < col_end; ++p) { - dot += lp.A.x[p] * delta_y[lp.A.i[p]]; + std::vector delta_expanded(n, 0.); + + // Iterate directly over sparse delta_y instead of checking zeros + for (i_t nnz_idx = 0; nnz_idx < delta_y_sparse.i.size(); ++nnz_idx) { + const i_t row = delta_y_sparse.i[nnz_idx]; + const f_t val = delta_y_sparse.x[nnz_idx]; + + // Accumulate contributions from this row to all columns + const i_t row_start = Arow.row_start[row]; + const i_t row_end = Arow.row_start[row + 1]; + for (i_t p = row_start; p < row_end; ++p) { + const i_t col = Arow.j[p]; + delta_expanded[col] += Arow.x[p] * val; } - delta_zN[k] = -dot; + } + for (i_t k = 0; k < n - m; ++k) { + delta_zN[k] = -delta_expanded[nonbasic_list[k]]; } i_t entering_index = -1; @@ -435,8 +442,10 @@ i_t dual_push(const lp_problem_t& lp, assert(step_length >= -1e-6); // y <- y + step_length * delta_y - for (i_t i = 0; i < m; ++i) { - y[i] += step_length * delta_y[i]; + // Optimized: Only update non-zero elements from sparse representation + for (i_t nnz_idx = 0; nnz_idx < delta_y_sparse.i.size(); ++nnz_idx) { + const i_t i = delta_y_sparse.i[nnz_idx]; + y[i] += step_length * delta_y_sparse.x[nnz_idx]; } // z <- z + step_length * delta z @@ -725,7 +734,6 @@ i_t primal_push(const lp_problem_t& lp, { const i_t m = lp.num_rows; const i_t n = lp.num_cols; - settings.log.debug("Primal push: superbasic %ld\n", superbasic_list.size()); std::vector& x = solution.x; @@ -1002,6 +1010,7 @@ i_t primal_push(const lp_problem_t& lp, } solution.x = x_compare; solution.iterations += num_pushes; + return 0; } @@ -1190,6 +1199,9 @@ crossover_status_t crossover(const lp_problem_t& lp, f_t crossover_start = tic(); f_t work_estimate = 0; + csr_matrix_t Arow(m, n, 1); + lp.A.to_compressed_row(Arow); + settings.log.printf("\n"); settings.log.printf("Starting crossover\n"); @@ -1332,7 +1344,7 @@ crossover_status_t crossover(const lp_problem_t& lp, verify_basis(m, n, vstatus); compare_vstatus_with_lists(m, n, basic_list, nonbasic_list, vstatus); i_t dual_push_status = dual_push( - lp, settings, start_time, solution, ft, basic_list, nonbasic_list, superbasic_list, vstatus); + lp, Arow, settings, start_time, solution, ft, basic_list, nonbasic_list, superbasic_list, vstatus); if (dual_push_status < 0) { return return_to_status(dual_push_status); } settings.log.debug("basic list size %ld m %d\n", basic_list.size(), m); settings.log.debug("nonbasic list size %ld n - m %d\n", nonbasic_list.size(), n - m); From 3ab9ff74155d210aae8f1f4fb9f9acccdecf8b16 Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Mon, 16 Mar 2026 07:40:15 -0700 Subject: [PATCH 3/6] crossover: hoist delta_zN and delta_expanded out of dual push loop Allocate buffers once before the superbasic loop and reset with std::fill each iteration to avoid repeated O(n) allocations (PR #948 review). Made-with: Cursor --- cpp/src/dual_simplex/crossover.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/src/dual_simplex/crossover.cpp b/cpp/src/dual_simplex/crossover.cpp index 16f503e893..832f6891f6 100644 --- a/cpp/src/dual_simplex/crossover.cpp +++ b/cpp/src/dual_simplex/crossover.cpp @@ -388,6 +388,8 @@ i_t dual_push(const lp_problem_t& lp, std::vector& y = solution.y; const std::vector& x = solution.x; i_t num_pushes = 0; + std::vector delta_zN(n - m); + std::vector delta_expanded(n); while (superbasic_list.size() > 0) { const i_t s = superbasic_list.back(); const i_t basic_leaving_index = superbasic_list_index.back(); @@ -415,9 +417,9 @@ i_t dual_push(const lp_problem_t& lp, } // delta_zN = -N^T delta_y - std::vector delta_zN(n - m); - std::vector delta_expanded(n, 0.); - + std::fill(delta_expanded.begin(), delta_expanded.end(), 0.); + std::fill(delta_zN.begin(), delta_zN.end(), 0.); + // Iterate directly over sparse delta_y instead of checking zeros for (i_t nnz_idx = 0; nnz_idx < delta_y_sparse.i.size(); ++nnz_idx) { const i_t row = delta_y_sparse.i[nnz_idx]; From fbbdd356da7df7535bd55fb86dcde0383bb50afe Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Mon, 16 Mar 2026 09:24:59 -0700 Subject: [PATCH 4/6] Added review comments --- cpp/src/dual_simplex/right_looking_lu.cpp | 33 ++++++++++++++--------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/cpp/src/dual_simplex/right_looking_lu.cpp b/cpp/src/dual_simplex/right_looking_lu.cpp index 53bfcf8ac5..4800d644c1 100644 --- a/cpp/src/dual_simplex/right_looking_lu.cpp +++ b/cpp/src/dual_simplex/right_looking_lu.cpp @@ -105,8 +105,14 @@ i_t load_elements(const csc_matrix_t& A, elements[nz].x = A.x[p]; elements[nz].next_in_column = kNone; if (p > col_start) { elements[nz - 1].next_in_column = nz; } - elements[nz].next_in_row = kNone; - if (last_in_row[i] != kNone) { elements[last_in_row[i]].next_in_row = nz; } + elements[nz].next_in_row = kNone; // set the current next in row to None (since we don't know + // if there will be more entries in this row yet)) + if (last_in_row[i] != kNone) { + // If we have seen an entry in this row before, set the last entry we've seen in this row to + // point to the current entry + elements[last_in_row[i]].next_in_row = nz; + } + // The current entry becomes the last element seen in the row last_in_row[i] = nz; if (p == col_start) { first_in_col[k] = nz; } if (first_in_row[i] == kNone) { first_in_row[i] = nz; } @@ -402,9 +408,11 @@ void schur_complement(i_t pivot_i, std::vector>& elements, f_t& work_estimate) { - // Initialize row_last_workspace from last_in_row (O(1) per row, no full row traversal) + // row_last_workspace: temp copy of last_in_row for this pivot step, updated when adding fill + // last_in_row: persistent tail pointer per row for (i_t p1 = first_in_col[pivot_j]; p1 != kNone; p1 = elements[p1].next_in_column) { - row_last_workspace[elements[p1].i] = last_in_row[elements[p1].i]; + const i_t i = elements[p1].i; + row_last_workspace[i] = last_in_row[i]; } work_estimate += 4 * Cdegree[pivot_j]; @@ -569,8 +577,10 @@ void remove_pivot_col(i_t pivot_i, for (i_t p1 = first_in_col[pivot_j]; p1 != kNone; p1 = elements[p1].next_in_column) { element_t* e = &elements[p1]; const i_t i = e->i; - i_t last = kNone; - i_t last_surviving = kNone; + // Need both: last = previous-in-row (for link update when removing); last_surviving = new row + // tail (for last_in_row[i]). They differ when the pivot is the last element in the row. + i_t last = kNone; + i_t last_surviving = kNone; #ifdef THRESHOLD_ROOK_PIVOTING f_t max_in_row_i = 0.0; #endif @@ -647,13 +657,13 @@ i_t right_looking_lu(const csc_matrix_t& A, initialize_degree_data(A, column_list, Cdegree, Rdegree, col_count, row_count, work_estimate); // Position arrays for O(1) degree-bucket removal - std::vector col_pos(n); + std::vector col_pos(n); // if Cdegree[j] = nz, then j is in col_count[nz][col_pos[j]] for (i_t d = 0; d <= n; ++d) { for (i_t pos = 0; pos < static_cast(col_count[d].size()); ++pos) { col_pos[col_count[d][pos]] = pos; } } - std::vector row_pos(n); + std::vector row_pos(n); // if Rdegree[i] = nz, then i is in row_count[nz][row_pos[i]] for (i_t d = 0; d <= n; ++d) { for (i_t pos = 0; pos < static_cast(row_count[d].size()); ++pos) { row_pos[row_count[d][pos]] = pos; @@ -1046,15 +1056,14 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, const i_t Bnz = initialize_degree_data(A, column_list, Cdegree, Rdegree, col_count, row_count, work_estimate); - // Position arrays for O(1) degree-bucket removal - // col_count has m+1 buckets, row_count has n+1 buckets - std::vector col_pos(n); + // Position arrays for O(1) degree-bucket removal (col_count has m+1 buckets, row_count n+1) + std::vector col_pos(n); // if Cdegree[j] = nz, then j is in col_count[nz][col_pos[j]] for (i_t d = 0; d <= m; ++d) { for (i_t pos = 0; pos < static_cast(col_count[d].size()); ++pos) { col_pos[col_count[d][pos]] = pos; } } - std::vector row_pos(m); + std::vector row_pos(m); // if Rdegree[i] = nz, then i is in row_count[nz][row_pos[i]] for (i_t d = 0; d <= n; ++d) { for (i_t pos = 0; pos < static_cast(row_count[d].size()); ++pos) { row_pos[row_count[d][pos]] = pos; From bdea7a2460bb17e4b2f4b2bc3775d9a7325b68c1 Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Mon, 16 Mar 2026 09:32:17 -0700 Subject: [PATCH 5/6] Remove code duplication --- cpp/src/dual_simplex/right_looking_lu.cpp | 47 +++++++++++++---------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/cpp/src/dual_simplex/right_looking_lu.cpp b/cpp/src/dual_simplex/right_looking_lu.cpp index 4800d644c1..37202000f8 100644 --- a/cpp/src/dual_simplex/right_looking_lu.cpp +++ b/cpp/src/dual_simplex/right_looking_lu.cpp @@ -79,6 +79,29 @@ i_t initialize_degree_data(const csc_matrix_t& A, return Bnz; } +// Fill col_pos and row_pos so that column j has col_pos[j] = its index in col_count[Cdegree[j]], +// and row i has row_pos[i] = its index in row_count[Rdegree[i]]. Enables O(1) degree-bucket +// removal. +template +void initialize_bucket_positions(const std::vector>& col_count, + const std::vector>& row_count, + i_t col_max_degree, + i_t row_max_degree, + std::vector& col_pos, + std::vector& row_pos) +{ + for (i_t d = 0; d <= col_max_degree; ++d) { + for (i_t pos = 0; pos < static_cast(col_count[d].size()); ++pos) { + col_pos[col_count[d][pos]] = pos; + } + } + for (i_t d = 0; d <= row_max_degree; ++d) { + for (i_t pos = 0; pos < static_cast(row_count[d].size()); ++pos) { + row_pos[row_count[d][pos]] = pos; + } + } +} + template i_t load_elements(const csc_matrix_t& A, const std::vector& column_list, @@ -656,19 +679,10 @@ i_t right_looking_lu(const csc_matrix_t& A, const i_t Bnz = initialize_degree_data(A, column_list, Cdegree, Rdegree, col_count, row_count, work_estimate); - // Position arrays for O(1) degree-bucket removal + // Position arrays for O(1) degree-bucket removal (col_count and row_count each have n+1 buckets) std::vector col_pos(n); // if Cdegree[j] = nz, then j is in col_count[nz][col_pos[j]] - for (i_t d = 0; d <= n; ++d) { - for (i_t pos = 0; pos < static_cast(col_count[d].size()); ++pos) { - col_pos[col_count[d][pos]] = pos; - } - } std::vector row_pos(n); // if Rdegree[i] = nz, then i is in row_count[nz][row_pos[i]] - for (i_t d = 0; d <= n; ++d) { - for (i_t pos = 0; pos < static_cast(row_count[d].size()); ++pos) { - row_pos[row_count[d][pos]] = pos; - } - } + initialize_bucket_positions(col_count, row_count, n, n, col_pos, row_pos); std::vector> elements(Bnz); std::vector first_in_row(n, kNone); @@ -1058,17 +1072,8 @@ i_t right_looking_lu_row_permutation_only(const csc_matrix_t& A, // Position arrays for O(1) degree-bucket removal (col_count has m+1 buckets, row_count n+1) std::vector col_pos(n); // if Cdegree[j] = nz, then j is in col_count[nz][col_pos[j]] - for (i_t d = 0; d <= m; ++d) { - for (i_t pos = 0; pos < static_cast(col_count[d].size()); ++pos) { - col_pos[col_count[d][pos]] = pos; - } - } std::vector row_pos(m); // if Rdegree[i] = nz, then i is in row_count[nz][row_pos[i]] - for (i_t d = 0; d <= n; ++d) { - for (i_t pos = 0; pos < static_cast(row_count[d].size()); ++pos) { - row_pos[row_count[d][pos]] = pos; - } - } + initialize_bucket_positions(col_count, row_count, m, n, col_pos, row_pos); std::vector> elements(Bnz); std::vector first_in_row(m, kNone); From 3e2352b59f8501b64cb9529a174d05702a8583dc Mon Sep 17 00:00:00 2001 From: Rajesh Gandham Date: Mon, 16 Mar 2026 10:37:23 -0700 Subject: [PATCH 6/6] keep the dense vector path alive --- cpp/src/dual_simplex/crossover.cpp | 61 ++++++++++++++++++++---------- 1 file changed, 42 insertions(+), 19 deletions(-) diff --git a/cpp/src/dual_simplex/crossover.cpp b/cpp/src/dual_simplex/crossover.cpp index 832f6891f6..14624a4f4c 100644 --- a/cpp/src/dual_simplex/crossover.cpp +++ b/cpp/src/dual_simplex/crossover.cpp @@ -389,7 +389,8 @@ i_t dual_push(const lp_problem_t& lp, const std::vector& x = solution.x; i_t num_pushes = 0; std::vector delta_zN(n - m); - std::vector delta_expanded(n); + std::vector delta_expanded; // workspace for sparse path (delta_y is sparse enough) + std::vector delta_y_dense; // workspace for dense path (delta_y is not sparse enough) while (superbasic_list.size() > 0) { const i_t s = superbasic_list.back(); const i_t basic_leaving_index = superbasic_list_index.back(); @@ -417,24 +418,38 @@ i_t dual_push(const lp_problem_t& lp, } // delta_zN = -N^T delta_y - std::fill(delta_expanded.begin(), delta_expanded.end(), 0.); + // Choose sparse vs dense method by delta_y sparsity (match dual simplex: sparse if <= 30% nnz) std::fill(delta_zN.begin(), delta_zN.end(), 0.); - - // Iterate directly over sparse delta_y instead of checking zeros - for (i_t nnz_idx = 0; nnz_idx < delta_y_sparse.i.size(); ++nnz_idx) { - const i_t row = delta_y_sparse.i[nnz_idx]; - const f_t val = delta_y_sparse.x[nnz_idx]; - - // Accumulate contributions from this row to all columns - const i_t row_start = Arow.row_start[row]; - const i_t row_end = Arow.row_start[row + 1]; - for (i_t p = row_start; p < row_end; ++p) { - const i_t col = Arow.j[p]; - delta_expanded[col] += Arow.x[p] * val; + const bool use_sparse = (delta_y_sparse.i.size() * 1.0 / m) <= 0.3; + + if (use_sparse) { + delta_expanded.resize(n); + std::fill(delta_expanded.begin(), delta_expanded.end(), 0.); + for (i_t nnz_idx = 0; nnz_idx < static_cast(delta_y_sparse.i.size()); ++nnz_idx) { + const i_t row = delta_y_sparse.i[nnz_idx]; + const f_t val = delta_y_sparse.x[nnz_idx]; + const i_t row_start = Arow.row_start[row]; + const i_t row_end = Arow.row_start[row + 1]; + for (i_t p = row_start; p < row_end; ++p) { + const i_t col = Arow.j[p]; + delta_expanded[col] += Arow.x[p] * val; + } + } + for (i_t k = 0; k < n - m; ++k) { + delta_zN[k] = -delta_expanded[nonbasic_list[k]]; + } + } else { + delta_y_sparse.to_dense(delta_y_dense); + for (i_t k = 0; k < n - m; ++k) { + const i_t j = nonbasic_list[k]; + f_t dot = 0.0; + const i_t c_start = lp.A.col_start[j]; + const i_t c_end = lp.A.col_start[j + 1]; + for (i_t p = c_start; p < c_end; ++p) { + dot += lp.A.x[p] * delta_y_dense[lp.A.i[p]]; + } + delta_zN[k] = -dot; } - } - for (i_t k = 0; k < n - m; ++k) { - delta_zN[k] = -delta_expanded[nonbasic_list[k]]; } i_t entering_index = -1; @@ -1345,8 +1360,16 @@ crossover_status_t crossover(const lp_problem_t& lp, basis_update_mpf_t ft(L, U, p, settings.refactor_frequency); verify_basis(m, n, vstatus); compare_vstatus_with_lists(m, n, basic_list, nonbasic_list, vstatus); - i_t dual_push_status = dual_push( - lp, Arow, settings, start_time, solution, ft, basic_list, nonbasic_list, superbasic_list, vstatus); + i_t dual_push_status = dual_push(lp, + Arow, + settings, + start_time, + solution, + ft, + basic_list, + nonbasic_list, + superbasic_list, + vstatus); if (dual_push_status < 0) { return return_to_status(dual_push_status); } settings.log.debug("basic list size %ld m %d\n", basic_list.size(), m); settings.log.debug("nonbasic list size %ld n - m %d\n", nonbasic_list.size(), n - m);