Skip to content

Commit

Permalink
chore: Optimise grand product computation round based on active ranges (
Browse files Browse the repository at this point in the history
#10460)

Skip computation of numerator and denominator of z_perm at indexes that
are not part of the active ranges, and refine the threshold in
commit_structured for `z_perm`. New benchmarks:
**Now: 5.6 s difference**
We still see a difference between committing to z_perm between an
ambient trace of 2^19 and 2^20, caused by the fact that the active
ranges complement are larger (i.e. the ranges in the trace blocks where
z_perm is constant) because the blocks themselves are larger. We 
make sure to at least avoid computing and committing to z_perm after the
final active wire index.
  • Loading branch information
maramihali authored Dec 9, 2024
1 parent f5de7d1 commit 7fa8f84
Show file tree
Hide file tree
Showing 13 changed files with 79 additions and 44 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ template <class Curve> class CommitmentKey {
*/
Commitment commit(PolynomialSpan<const Fr> polynomial)
{
PROFILE_THIS();
PROFILE_THIS_NAME("commit");
// We must have a power-of-2 SRS points *after* subtracting by start_index.
size_t dyadic_poly_size = numeric::round_up_power_2(polynomial.size());
// Because pippenger prefers a power-of-2 size, we must choose a starting index for the points so that we don't
Expand Down Expand Up @@ -133,7 +133,7 @@ template <class Curve> class CommitmentKey {
*/
Commitment commit_sparse(PolynomialSpan<const Fr> polynomial)
{
PROFILE_THIS();
PROFILE_THIS_NAME("commit_sparse");
const size_t poly_size = polynomial.size();
ASSERT(polynomial.end_index() <= srs->get_monomial_size());

Expand Down Expand Up @@ -204,21 +204,24 @@ template <class Curve> class CommitmentKey {
* @return Commitment
*/
Commitment commit_structured(PolynomialSpan<const Fr> polynomial,
const std::vector<std::pair<size_t, size_t>>& active_ranges)
const std::vector<std::pair<size_t, size_t>>& active_ranges,
size_t final_active_wire_idx = 0)
{
BB_OP_COUNT_TIME();
PROFILE_THIS_NAME("commit_structured");
ASSERT(polynomial.end_index() <= srs->get_monomial_size());

// Percentage of nonzero coefficients beyond which we resort to the conventional commit method
constexpr size_t NONZERO_THRESHOLD = 75;

// Compute the number of non-zero coefficients in the polynomial
size_t total_num_scalars = 0;
for (const auto& range : active_ranges) {
total_num_scalars += range.second - range.first;
for (const auto& [first, second] : active_ranges) {
total_num_scalars += second - first;
}

// Compute "active" percentage of polynomial; resort to standard commit if appropriate
size_t percentage_nonzero = total_num_scalars * 100 / polynomial.size();
size_t polynomial_size = final_active_wire_idx != 0 ? final_active_wire_idx : polynomial.size();
size_t percentage_nonzero = total_num_scalars * 100 / polynomial_size;
if (percentage_nonzero > NONZERO_THRESHOLD) {
return commit(polynomial);
}
Expand Down Expand Up @@ -259,9 +262,10 @@ template <class Curve> class CommitmentKey {
* @return Commitment
*/
Commitment commit_structured_with_nonzero_complement(PolynomialSpan<const Fr> polynomial,
const std::vector<std::pair<size_t, size_t>>& active_ranges)
const std::vector<std::pair<size_t, size_t>>& active_ranges,
size_t final_active_wire_idx = 0)
{
BB_OP_COUNT_TIME();
PROFILE_THIS_NAME("commit_structured_with_nonzero_complement");
ASSERT(polynomial.end_index() <= srs->get_monomial_size());

using BatchedAddition = BatchedAffineAddition<Curve>;
Expand All @@ -273,20 +277,21 @@ template <class Curve> class CommitmentKey {
// Note: the range from the end of the last active range to the end of the polynomial is excluded from the
// complement since the polynomial is assumed to be zero there.
std::vector<std::pair<size_t, size_t>> active_ranges_complement;
// Also compute total number of scalars in the constant regions
size_t total_num_complement_scalars = 0;
for (size_t i = 0; i < active_ranges.size() - 1; ++i) {
const size_t start = active_ranges[i].second;
const size_t end = active_ranges[i + 1].first;
active_ranges_complement.emplace_back(start, end);
}

// Compute the total number of scalars in the constant regions
size_t total_num_complement_scalars = 0;
for (const auto& range : active_ranges_complement) {
total_num_complement_scalars += range.second - range.first;
if (end > start) {
active_ranges_complement.emplace_back(start, end);
total_num_complement_scalars += end - start;
}
}

size_t polynomial_size = final_active_wire_idx != 0 ? final_active_wire_idx : polynomial.size();
// Compute percentage of polynomial comprised of constant blocks; resort to standard commit if appropriate
size_t percentage_constant = total_num_complement_scalars * 100 / polynomial.size();
size_t percentage_constant = total_num_complement_scalars * 100 / polynomial_size;

if (percentage_constant < CONSTANT_THRESHOLD) {
return commit(polynomial);
}
Expand All @@ -299,12 +304,11 @@ template <class Curve> class CommitmentKey {
// TODO(https://github.com/AztecProtocol/barretenberg/issues/1131): Peak memory usage could be improved by
// performing this copy and the subsequent summation as a precomputation prior to constructing the point table.
std::vector<G1> points;
points.reserve(2 * total_num_complement_scalars);
for (const auto& range : active_ranges_complement) {
const size_t start = 2 * range.first;
const size_t end = 2 * range.second;
for (size_t i = start; i < end; i += 2) {
points.emplace_back(point_table[i]);

points.reserve(total_num_complement_scalars);
for (const auto& [start, end] : active_ranges_complement) {
for (size_t i = start; i < end; i++) {
points.emplace_back(point_table[2 * i]);
}
}

Expand All @@ -313,17 +317,16 @@ template <class Curve> class CommitmentKey {
std::vector<Fr> unique_scalars;
std::vector<size_t> sequence_counts;
for (const auto& range : active_ranges_complement) {
if (range.second - range.first > 0) { // only ranges with nonzero length
unique_scalars.emplace_back(polynomial.span[range.first]);
sequence_counts.emplace_back(range.second - range.first);
}
unique_scalars.emplace_back(polynomial.span[range.first]);
sequence_counts.emplace_back(range.second - range.first);
}

// Reduce each sequence to a single point
auto reduced_points = BatchedAddition::add_in_place(points, sequence_counts);

// Compute the full commitment as the sum of the "active" region commitment and the constant region contribution
Commitment result = commit_structured(polynomial, active_ranges);
Commitment result = commit_structured(polynomial, active_ranges, final_active_wire_idx);

for (auto [scalar, point] : zip_view(unique_scalars, reduced_points)) {
result = result + point * scalar;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ template <typename Curve>
std::vector<typename BatchedAffineAddition<Curve>::G1> BatchedAffineAddition<Curve>::add_in_place(
const std::span<G1>& points, const std::vector<size_t>& sequence_counts)
{
PROFILE_THIS_NAME("BatchedAffineAddition::add_in_place");
// Instantiate scratch space for point addition denominators and their calculation
std::vector<Fq> scratch_space_vector(points.size());
std::span<Fq> scratch_space(scratch_space_vector);
Expand Down
3 changes: 2 additions & 1 deletion barretenberg/cpp/src/barretenberg/ecc/fields/field_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -404,9 +404,10 @@ template <class T> void field<T>::batch_invert(field* coeffs, const size_t n) no
batch_invert(std::span{ coeffs, n });
}

// TODO(https://github.com/AztecProtocol/barretenberg/issues/1166)
template <class T> void field<T>::batch_invert(std::span<field> coeffs) noexcept
{
BB_OP_COUNT_TRACK_NAME("fr::batch_invert");
PROFILE_THIS_NAME("fr::batch_invert");
const size_t n = coeffs.size();

auto temporaries_ptr = std::static_pointer_cast<field[]>(get_mem_slab(n * sizeof(field)));
Expand Down
2 changes: 1 addition & 1 deletion barretenberg/cpp/src/barretenberg/flavor/flavor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ template <typename FF, typename CommitmentKey_> class ProvingKey_ {
// folded element by element.
std::vector<FF> public_inputs;

// Ranges of the form [start, end) over which the execution trace is "active"
// Ranges of the form [start, end) where witnesses have non-zero values (hence the execution trace is "active")
std::vector<std::pair<size_t, size_t>> active_block_ranges;

ProvingKey_() = default;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ struct ExecutionTraceUsageTracker {
MegaTraceFixedBlockSizes fixed_sizes; // fixed size of each block prescribed by structuring
// Store active ranges based on the most current accumulator and those based on all but the most recently
// accumulated circuit. The former is needed for the combiner calculation and the latter for the perturbator.
// The ranges cover all areas in the trace where relations have nontrivial values.
std::vector<Range> active_ranges;
std::vector<Range> previous_active_ranges;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,11 @@ namespace bb {
template <typename Flavor, typename GrandProdRelation>
void compute_grand_product(typename Flavor::ProverPolynomials& full_polynomials,
bb::RelationParameters<typename Flavor::FF>& relation_parameters,
size_t size_override = 0)
size_t size_override = 0,
std::vector<std::pair<size_t, size_t>> active_block_ranges = {})
{
PROFILE_THIS_NAME("compute_grand_product");

using FF = typename Flavor::FF;
using Polynomial = typename Flavor::Polynomial;
using Accumulator = std::tuple_element_t<0, typename GrandProdRelation::SumcheckArrayOfValuesOverSubrelations>;
Expand All @@ -84,22 +87,34 @@ void compute_grand_product(typename Flavor::ProverPolynomials& full_polynomials,
Polynomial numerator{ domain_size, domain_size };
Polynomial denominator{ domain_size, domain_size };

auto check_is_active = [&](size_t idx) {
if (active_block_ranges.empty()) {
return true;
}
return std::any_of(active_block_ranges.begin(), active_block_ranges.end(), [idx](const auto& range) {
return idx >= range.first && idx < range.second;
});
};

// Step (1)
// Populate `numerator` and `denominator` with the algebra described by Relation
FF gamma_fourth = relation_parameters.gamma.pow(4);
parallel_for(num_threads, [&](size_t thread_idx) {
typename Flavor::AllValues evaluations;
// TODO(https://github.com/AztecProtocol/barretenberg/issues/940): construction of evaluations is equivalent to
// calling get_row which creates full copies. avoid?
typename Flavor::AllValues row;
const size_t start = idx_bounds[thread_idx].first;
const size_t end = idx_bounds[thread_idx].second;
for (size_t i = start; i < end; ++i) {
for (auto [eval, full_poly] : zip_view(evaluations.get_all(), full_polynomials.get_all())) {
eval = full_poly.size() > i ? full_poly[i] : 0;
if (check_is_active(i)) {
// TODO(https://github.com/AztecProtocol/barretenberg/issues/940):consider avoiding get_row if possible.
row = full_polynomials.get_row(i);
numerator.at(i) =
GrandProdRelation::template compute_grand_product_numerator<Accumulator>(row, relation_parameters);
denominator.at(i) = GrandProdRelation::template compute_grand_product_denominator<Accumulator>(
row, relation_parameters);
} else {
numerator.at(i) = gamma_fourth;
denominator.at(i) = gamma_fourth;
}
numerator.at(i) = GrandProdRelation::template compute_grand_product_numerator<Accumulator>(
evaluations, relation_parameters);
denominator.at(i) = GrandProdRelation::template compute_grand_product_denominator<Accumulator>(
evaluations, relation_parameters);
}
});

Expand Down Expand Up @@ -163,6 +178,7 @@ void compute_grand_product(typename Flavor::ProverPolynomials& full_polynomials,
auto& grand_product_polynomial = GrandProdRelation::get_grand_product_polynomial(full_polynomials);
// We have a 'virtual' 0 at the start (as this is a to-be-shifted polynomial)
ASSERT(grand_product_polynomial.start_index() == 1);

parallel_for(num_threads, [&](size_t thread_idx) {
const size_t start = idx_bounds[thread_idx].first;
const size_t end = idx_bounds[thread_idx].second;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ template <typename FF_> class DatabusLookupRelationImpl {
auto& relation_parameters,
const size_t circuit_size)
{
PROFILE_THIS_NAME("Databus::compute_logderivative_inverse");
auto& inverse_polynomial = BusData<bus_idx, Polynomials>::inverses(polynomials);

size_t min_iterations_per_thread = 1 << 6; // min number of iterations for which we'll spin up a unique thread
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ template <typename FF_> class LogDerivLookupRelationImpl {
auto& relation_parameters,
const size_t circuit_size)
{
PROFILE_THIS_NAME("Lookup::compute_logderivative_inverse");
auto& inverse_polynomial = get_inverse_polynomial(polynomials);

size_t min_iterations_per_thread = 1 << 6; // min number of iterations for which we'll spin up a unique thread
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,8 @@ class MegaFlavor {
*/
void compute_logderivative_inverses(const RelationParameters<FF>& relation_parameters)
{
PROFILE_THIS_NAME("compute_logderivative_inverses");

// Compute inverses for conventional lookups
LogDerivLookupRelation<FF>::compute_logderivative_inverse(
this->polynomials, relation_parameters, this->circuit_size);
Expand Down Expand Up @@ -525,7 +527,7 @@ class MegaFlavor {

// Compute permutation grand product polynomial
compute_grand_product<MegaFlavor, UltraPermutationRelation<FF>>(
this->polynomials, relation_parameters, size_override);
this->polynomials, relation_parameters, size_override, this->active_block_ranges);
}

uint64_t estimate_memory()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ class UltraFlavor {
[[nodiscard]] size_t get_polynomial_size() const { return q_c.size(); }
[[nodiscard]] AllValues get_row(const size_t row_idx) const
{
PROFILE_THIS();
PROFILE_THIS_NAME("UltraFlavor::get_row");
AllValues result;
for (auto [result_field, polynomial] : zip_view(result.get_all(), get_all())) {
result_field = polynomial[row_idx];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ typename TraceToPolynomials<Flavor>::TraceData TraceToPolynomials<Flavor>::const
// otherwise, the next block starts immediately following the previous one
offset += block.get_fixed_size(is_structured);
}

return trace_data;
}

Expand Down
5 changes: 4 additions & 1 deletion barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@ template <IsUltraFlavor Flavor> void OinkProver<Flavor>::execute_grand_product_c
{
PROFILE_THIS_NAME("OinkProver::execute_grand_product_computation_round");
// Compute the permutation grand product polynomial

proving_key->proving_key.compute_grand_product_polynomial(proving_key->relation_parameters,
proving_key->final_active_wire_idx + 1);

Expand All @@ -243,7 +244,9 @@ template <IsUltraFlavor Flavor> void OinkProver<Flavor>::execute_grand_product_c
if (proving_key->get_is_structured()) {
witness_commitments.z_perm =
proving_key->proving_key.commitment_key->commit_structured_with_nonzero_complement(
proving_key->proving_key.polynomials.z_perm, proving_key->proving_key.active_block_ranges);
proving_key->proving_key.polynomials.z_perm,
proving_key->proving_key.active_block_ranges,
proving_key->final_active_wire_idx + 1);
} else {
witness_commitments.z_perm =
proving_key->proving_key.commitment_key->commit(proving_key->proving_key.polynomials.z_perm);
Expand Down
7 changes: 6 additions & 1 deletion barretenberg/cpp/src/barretenberg/ultra_honk/oink_prover.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
// clang-format on
#include <utility>

#include "barretenberg/plonk_honk_shared/execution_trace/execution_trace_usage_tracker.hpp"
#include "barretenberg/ultra_honk/decider_proving_key.hpp"

namespace bb {
Expand All @@ -40,16 +41,20 @@ template <IsUltraFlavor Flavor> class OinkProver {
std::shared_ptr<DeciderPK> proving_key;
std::shared_ptr<Transcript> transcript;
std::string domain_separator;
ExecutionTraceUsageTracker trace_usage_tracker;

typename Flavor::WitnessCommitments witness_commitments;
typename Flavor::CommitmentLabels commitment_labels;
using RelationSeparator = typename Flavor::RelationSeparator;

OinkProver(std::shared_ptr<DeciderPK> proving_key,
const std::shared_ptr<typename Flavor::Transcript>& transcript = std::make_shared<Transcript>(),
std::string domain_separator = "")
std::string domain_separator = "",
const ExecutionTraceUsageTracker& trace_usage_tracker = ExecutionTraceUsageTracker{})
: proving_key(proving_key)
, transcript(transcript)
, domain_separator(std::move(domain_separator))
, trace_usage_tracker(trace_usage_tracker)
{}

void prove();
Expand Down

0 comments on commit 7fa8f84

Please sign in to comment.