From 0c399f1b808d57aef3d7fe1491b9441bd6a6f10e Mon Sep 17 00:00:00 2001 From: George Kadianakis Date: Tue, 10 May 2022 17:20:48 +0300 Subject: [PATCH 1/7] Fix missing variables/funcs in validate_blob_transaction_wrapper() There is no `tx.message.blob_commitments` anymore, or `kzg_to_commitment()` --- EIPS/eip-4844.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/EIPS/eip-4844.md b/EIPS/eip-4844.md index 705349916f2652..6015f584b6ab48 100644 --- a/EIPS/eip-4844.md +++ b/EIPS/eip-4844.md @@ -328,13 +328,13 @@ We do network-level validation of `BlobTransactionNetworkWrapper` objects as fol ```python def validate_blob_transaction_wrapper(wrapper: BlobTransactionNetworkWrapper): versioned_hashes = wrapper.tx.message.blob_versioned_hashes - kzgs = wrapper.blob_kzgs + commitments = wrapper.blob_kzgs blobs = wrapper.blobs - assert len(versioned_hashes) == len(kzgs) == len(blobs) - for versioned_hash, kzg, blob in zip(versioned_hashes, kzgs, blobs): + assert len(versioned_hashes) == len(commitments) == len(blobs) + for versioned_hash, commitment, blob in zip(versioned_hashes, commitments, blobs): # note: assert blob is not malformatted - assert kzg == blob_to_kzg(blob) - assert versioned_hash == kzg_to_versioned_hash(kzg) + assert commitment == blob_to_kzg(blob) + assert versioned_hash == kzg_to_versioned_hash(commitment) ``` ## Rationale From e3f2bd4d3ed2267317e36493a21410c56943e7f7 Mon Sep 17 00:00:00 2001 From: George Kadianakis Date: Tue, 10 May 2022 17:30:53 +0300 Subject: [PATCH 2/7] Introduce KZGProof as its own type instead of using KZGCommitment --- EIPS/eip-4844.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/EIPS/eip-4844.md b/EIPS/eip-4844.md index 6015f584b6ab48..ca409a2e579e58 100644 --- a/EIPS/eip-4844.md +++ b/EIPS/eip-4844.md @@ -71,6 +71,7 @@ Compared to full data sharding, this EIP has a reduced cap on the number of thes | `Blob` | `Vector[BLSFieldElement, FIELD_ELEMENTS_PER_BLOB]` | | | `VersionedHash` | `Bytes32` | | | `KZGCommitment` | `Bytes48` | Same as BLS standard "is valid pubkey" check but also allows `0x00..00` for point-at-infinity | +| `KZGProof` | Bytes48 | Same as for `KZGCommitment` | ### Helpers @@ -101,7 +102,7 @@ Verifies a KZG evaluation proof: def verify_kzg_proof(polynomial_kzg: KZGCommitment, x: BLSFieldElement, y: BLSFieldElement, - quotient_kzg: KZGCommitment): + quotient_kzg: KZGProof): # Verify: P - y = Q * (X - x) X_minus_x = bls.add(KZG_SETUP_G2[1], bls.multiply(bls.G2, BLS_MODULUS - x)) P_minus_y = bls.add(polynomial_kzg, bls.multiply(bls.G1, BLS_MODULUS - y)) From b66c8aa29f14dfc6ebcb174fce667b7c8d9768af Mon Sep 17 00:00:00 2001 From: George Kadianakis Date: Tue, 10 May 2022 17:27:57 +0300 Subject: [PATCH 3/7] Introduce high-level logic of new efficient transaction validation To validate a 4844 transaction in the mempool, the verifier checks that each provided KZG commitment matches the polynomial represented by the corresponding blob data. | d_1 | d_2 | d_3 | ... | d_4096 | -> commitment Before this patch, to do this validation, we reconstructed the commitment from the blob data (d_i above), and checked it against the provided commitment. This was expensive because computing a commitment from blob data (even using Lagrange basis) involves N scalar multiplications, where N is the number of field elements per blob. Initial benchmarking showed that this was about 40ms for N=4096 which was deemed too expensive. For more details see: https://hackmd.io/@protolambda/eip-4844-implementer-notes#Optimizations https://github.com/protolambda/go-ethereum/pull/4 In this patch, we speed this up by providing a KZG proof for each commitment. The verifier can check that proof to ensure that the KZG commitment matches the polynomial represented by the corresponding blob data. | d_1 | d_2 | d_3 | ... | d_4096 | -> commitment, proof To do so, we evaluate the blob data polynomial at a random point `x` to get a value `y`. We then use the KZG proof to ensure that the commited polynomial (i.e. the commitment) also evaluates to `y` at `x`. If the check passes, it means that the KZG commitment matches the polynomial represented by the blob data. This is significantly faster since evaluating the blob data polynomial at a random point using the Barycentric formula can be done efficiently with only field operations (see https://hackmd.io/@vbuterin/barycentric_evaluation). Then, verifying a KZG proof takes two pairing operations (which take about 0.6ms each). This brings the total verification cost to about 2 ms per blob. With some additional optimizations (using linear combination tricks as the ones linked above) we can batch all the blobs together into a single efficient verification, and hence verify the entire transaction in 2.5 ms. The same techniques can be used to efficiently verify blocks on the consensus side. --- EIPS/eip-4844.md | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/EIPS/eip-4844.md b/EIPS/eip-4844.md index ca409a2e579e58..688219d7a77179 100644 --- a/EIPS/eip-4844.md +++ b/EIPS/eip-4844.md @@ -320,6 +320,8 @@ class BlobTransactionNetworkWrapper(Container): tx: SignedBlobTransaction # KZGCommitment = Bytes48 blob_kzgs: List[KZGCommitment, MAX_TX_WRAP_KZG_COMMITMENTS] + # KZGProofs = Bytes48 + blob_proofs: List[KZGProof, MAX_TX_WRAP_KZG_COMMITMENTS] # BLSFieldElement = uint256 blobs: List[Vector[BLSFieldElement, FIELD_ELEMENTS_PER_BLOB], LIMIT_BLOBS_PER_TX] ``` @@ -331,10 +333,19 @@ def validate_blob_transaction_wrapper(wrapper: BlobTransactionNetworkWrapper): versioned_hashes = wrapper.tx.message.blob_versioned_hashes commitments = wrapper.blob_kzgs blobs = wrapper.blobs - assert len(versioned_hashes) == len(commitments) == len(blobs) - for versioned_hash, commitment, blob in zip(versioned_hashes, commitments, blobs): + proofs = wrapper.blob_proofs + + assert len(versioned_hashes) == len(commitments) == len(blobs) == len(proofs) + for versioned_hash, commitment, blob, proof in zip(versioned_hashes, commitments, blobs, proofs): # note: assert blob is not malformatted - assert commitment == blob_to_kzg(blob) + + # Get `x` point using Fiat-Shamir by hashing both commitment and blob + x = hash_to_bls_field([commitment, blob]) + # Evaluate blob polynomial at `x` + y = evaluate_polynomial_in_evaluation_form(blob, x) + # Check that blob polynomial matches the `commitment` by verifying provided proof + assert verify_kzg_proof(commitment, x, y, proof) + # Finally check that the `versioned_hash` matches the `commitment` assert versioned_hash == kzg_to_versioned_hash(commitment) ``` From ab7eef0f877329a23b113a60af7cce66c9f10f29 Mon Sep 17 00:00:00 2001 From: George Kadianakis Date: Tue, 10 May 2022 18:16:14 +0300 Subject: [PATCH 4/7] Introduce polynomial helper functions for transaction validation --- EIPS/eip-4844.md | 52 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/EIPS/eip-4844.md b/EIPS/eip-4844.md index 688219d7a77179..1d3f93047134f3 100644 --- a/EIPS/eip-4844.md +++ b/EIPS/eip-4844.md @@ -46,6 +46,7 @@ Compared to full data sharding, this EIP has a reduced cap on the number of thes | `BLS_MODULUS` | `52435875175126190479447740508185965837690552500527637822603658699938581184513` | | `KZG_SETUP_G2` | `Vector[G2Point, FIELD_ELEMENTS_PER_BLOB]`, contents TBD | | `KZG_SETUP_LAGRANGE` | `Vector[KZGCommitment, FIELD_ELEMENTS_PER_BLOB]`, contents TBD | +| `ROOTS_OF_UNITY` | `Vector[BLSFieldElement, FIELD_ELEMENTS_PER_BLOB]` | | `BLOB_COMMITMENT_VERSION_KZG` | `Bytes1(0x01)` | | `POINT_EVALUATION_PRECOMPILE_ADDRESS` | `Bytes20(0x14)` | | `POINT_EVALUATION_PRECOMPILE_GAS` | `50000` | @@ -102,7 +103,7 @@ Verifies a KZG evaluation proof: def verify_kzg_proof(polynomial_kzg: KZGCommitment, x: BLSFieldElement, y: BLSFieldElement, - quotient_kzg: KZGProof): + quotient_kzg: KZGProof) -> bool: # Verify: P - y = Q * (X - x) X_minus_x = bls.add(KZG_SETUP_G2[1], bls.multiply(bls.G2, BLS_MODULUS - x)) P_minus_y = bls.add(polynomial_kzg, bls.multiply(bls.G1, BLS_MODULUS - y)) @@ -112,6 +113,48 @@ def verify_kzg_proof(polynomial_kzg: KZGCommitment, ]) ``` +Efficiently evaluates a polynomial in evaluation form using the barycentric formula + +```python +def bls_modular_inverse(x: BLSFieldElement) -> BLSFieldElement: + """ + Compute the modular inverse of x using the eGCD algorithm + i.e. return y such that x * y % BLS_MODULUS == 1 and return 0 for x == 0 + """ + if x == 0: + return 0 + + lm, hm = 1, 0 + low, high = x % BLS_MODULUS, BLS_MODULUS + while low > 1: + r = high // low + nm, new = hm - lm * r, high - low * r + lm, low, hm, high = nm, new, lm, low + return lm % BLS_MODULUS + + +def div(x, y): + """Divide two field elements: `x` by `y`""" + return x * inv(y) % MODULUS + + +def evaluate_polynomial_in_evaluation_form(poly: List[BLSFieldElement], x: BLSFieldElement) -> BLSFieldElement: + """ + Evaluate a polynomial (in evaluation form) at an arbitrary point `x` + Uses the barycentric formula: + f(x) = (1 - x**WIDTH) / WIDTH * sum_(i=0)^WIDTH (f(DOMAIN[i]) * DOMAIN[i]) / (x - DOMAIN[i]) + """ + width = len(poly) + assert width == FIELD_ELEMENTS_PER_BLOB + inverse_width = bls_modular_inverse(width) + + for i in range(width): + r += div(poly[i] * ROOTS_OF_UNITY[i], (x - ROOTS_OF_UNITY[i]) ) + r = r * (pow(x, width, BLS_MODULUS) - 1) * inverse_width % BLS_MODULUS + + return r +``` + Approximates `2 ** (numerator / denominator)`, with the simplest possible approximation that is continuous and has a continuous derivative: ```python @@ -329,6 +372,13 @@ class BlobTransactionNetworkWrapper(Container): We do network-level validation of `BlobTransactionNetworkWrapper` objects as follows: ```python +def hash_to_bls_field(x: Container) -> BLSFieldElement: + """ + This function is used to generate Fiat-Shamir challenges. The output is not uniform over the BLS field. + """ + return int.from_bytes(hash_tree_root(x), "little") % BLS_MODULUS + + def validate_blob_transaction_wrapper(wrapper: BlobTransactionNetworkWrapper): versioned_hashes = wrapper.tx.message.blob_versioned_hashes commitments = wrapper.blob_kzgs From 603ab2ae1315ebc5ac7bf632752f3f18d17bf41b Mon Sep 17 00:00:00 2001 From: George Kadianakis Date: Mon, 16 May 2022 16:55:48 +0300 Subject: [PATCH 5/7] Implement high-level logic of aggregated proof verification --- EIPS/eip-4844.md | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/EIPS/eip-4844.md b/EIPS/eip-4844.md index 1d3f93047134f3..719b81048c76e1 100644 --- a/EIPS/eip-4844.md +++ b/EIPS/eip-4844.md @@ -363,10 +363,10 @@ class BlobTransactionNetworkWrapper(Container): tx: SignedBlobTransaction # KZGCommitment = Bytes48 blob_kzgs: List[KZGCommitment, MAX_TX_WRAP_KZG_COMMITMENTS] - # KZGProofs = Bytes48 - blob_proofs: List[KZGProof, MAX_TX_WRAP_KZG_COMMITMENTS] # BLSFieldElement = uint256 blobs: List[Vector[BLSFieldElement, FIELD_ELEMENTS_PER_BLOB], LIMIT_BLOBS_PER_TX] + # KZGProof = Bytes48 + kzg_aggregated_proof: KZGProof ``` We do network-level validation of `BlobTransactionNetworkWrapper` objects as follows: @@ -379,23 +379,35 @@ def hash_to_bls_field(x: Container) -> BLSFieldElement: return int.from_bytes(hash_tree_root(x), "little") % BLS_MODULUS + def validate_blob_transaction_wrapper(wrapper: BlobTransactionNetworkWrapper): versioned_hashes = wrapper.tx.message.blob_versioned_hashes commitments = wrapper.blob_kzgs blobs = wrapper.blobs - proofs = wrapper.blob_proofs - - assert len(versioned_hashes) == len(commitments) == len(blobs) == len(proofs) - for versioned_hash, commitment, blob, proof in zip(versioned_hashes, commitments, blobs, proofs): - # note: assert blob is not malformatted - - # Get `x` point using Fiat-Shamir by hashing both commitment and blob - x = hash_to_bls_field([commitment, blob]) - # Evaluate blob polynomial at `x` - y = evaluate_polynomial_in_evaluation_form(blob, x) - # Check that blob polynomial matches the `commitment` by verifying provided proof - assert verify_kzg_proof(commitment, x, y, proof) - # Finally check that the `versioned_hash` matches the `commitment` + # note: assert blobs are not malformatted + + assert len(versioned_hashes) == len(commitments) == len(blobs) + number_of_blobs = len(blobs) + + # Generate random linear combination challenges + r = hash_to_bls_field([blobs, commitments]) + r_powers = compute_powers(r, number_of_blobs) + + # Compute commitment to aggregated polynomial + aggregated_poly_commitment = lincomb(commitments, r_powers) + + # Create aggregated polynomial in evaluation form + aggregated_poly = vector_lincomb(blobs, r_powers) + + # Generate challenge `x` and evaluate the aggregated polynomial at `x` + x = hash_to_bls_field([aggregated_poly, aggregated_poly_commitment]) + y = evaluate_polynomial_in_evaluation_form(aggregated_poly, x) + + # Verify aggregated proof + assert verify_kzg_proof(aggregated_poly_commitment, x, y, wrapper.kzg_aggregated_proof) + + # Now that all commitments have been verified, check that versioned_hashes matches the commitments + for versioned_hash, commitment in zip(versioned_hashes, commitments): assert versioned_hash == kzg_to_versioned_hash(commitment) ``` From 7d3b44980398702ebb85602d611da6382167adfb Mon Sep 17 00:00:00 2001 From: George Kadianakis Date: Tue, 17 May 2022 15:55:52 +0300 Subject: [PATCH 6/7] Add helper functions for aggregated proof verification Also abstract `lincomb()` out of the `blob_to_kzg()` function to be used in the verification. --- EIPS/eip-4844.md | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/EIPS/eip-4844.md b/EIPS/eip-4844.md index 719b81048c76e1..ca4934b1420efb 100644 --- a/EIPS/eip-4844.md +++ b/EIPS/eip-4844.md @@ -79,15 +79,17 @@ Compared to full data sharding, this EIP has a reduced cap on the number of thes Converts a blob to its corresponding KZG point: ```python +def lincomb(points: List[KZGCommitment], scalars: List[BLSFieldElement]) -> KZGCommitment: + """ + BLS multiscalar multiplication. This function can be optimized using Pippenger's algorithm and variants. + """ + r = bls.Z1 + for x, a in zip(points, scalars): + r = bls.add(r, bls.multiply(x, a)) + return r + def blob_to_kzg(blob: Blob) -> KZGCommitment: - computed_kzg = bls.Z1 - for value, point_kzg in zip(blob, KZG_SETUP_LAGRANGE): - assert value < BLS_MODULUS - computed_kzg = bls.add( - computed_kzg, - bls.multiply(point_kzg, value) - ) - return computed_kzg + return lincomb(blob, KZG_SETUP_LAGRANGE) ``` Converts a KZG point into a versioned hash: @@ -379,6 +381,24 @@ def hash_to_bls_field(x: Container) -> BLSFieldElement: return int.from_bytes(hash_tree_root(x), "little") % BLS_MODULUS +def compute_powers(x: BLSFieldElement, n: uint64) -> List[BLSFieldElement]: + current_power = 1 + powers = [] + for _ in range(n): + powers.append(BLSFieldElement(current_power)) + current_power = current_power * int(x) % BLS_MODULUS + return powers + +def vector_lincomb(vectors: List[List[BLSFieldElement]], scalars: List[BLSFieldElement]) -> List[BLSFieldElement]: + """ + Given a list of vectors, compute the linear combination of each column with `scalars`, and return the resulting + vector. + """ + r = [0]*len(vectors[0]) + for v, a in zip(vectors, scalars): + for i, x in enumerate(v): + r[i] = (r[i] + a * x) % BLS_MODULUS + return [BLSFieldElement(x) for x in r] def validate_blob_transaction_wrapper(wrapper: BlobTransactionNetworkWrapper): versioned_hashes = wrapper.tx.message.blob_versioned_hashes From ea9ae70e0292484095b137b9336c30381538935b Mon Sep 17 00:00:00 2001 From: George Kadianakis Date: Thu, 23 Jun 2022 13:22:34 +0300 Subject: [PATCH 7/7] Fixes after review on the consensus PR --- EIPS/eip-4844.md | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/EIPS/eip-4844.md b/EIPS/eip-4844.md index ca4934b1420efb..794d316efc3da6 100644 --- a/EIPS/eip-4844.md +++ b/EIPS/eip-4844.md @@ -72,7 +72,7 @@ Compared to full data sharding, this EIP has a reduced cap on the number of thes | `Blob` | `Vector[BLSFieldElement, FIELD_ELEMENTS_PER_BLOB]` | | | `VersionedHash` | `Bytes32` | | | `KZGCommitment` | `Bytes48` | Same as BLS standard "is valid pubkey" check but also allows `0x00..00` for point-at-infinity | -| `KZGProof` | Bytes48 | Same as for `KZGCommitment` | +| `KZGProof` | `Bytes48` | Same as for `KZGCommitment` | ### Helpers @@ -89,7 +89,7 @@ def lincomb(points: List[KZGCommitment], scalars: List[BLSFieldElement]) -> KZGC return r def blob_to_kzg(blob: Blob) -> KZGCommitment: - return lincomb(blob, KZG_SETUP_LAGRANGE) + return lincomb(KZG_SETUP_LAGRANGE, blob) ``` Converts a KZG point into a versioned hash: @@ -120,24 +120,15 @@ Efficiently evaluates a polynomial in evaluation form using the barycentric form ```python def bls_modular_inverse(x: BLSFieldElement) -> BLSFieldElement: """ - Compute the modular inverse of x using the eGCD algorithm + Compute the modular inverse of x i.e. return y such that x * y % BLS_MODULUS == 1 and return 0 for x == 0 """ - if x == 0: - return 0 - - lm, hm = 1, 0 - low, high = x % BLS_MODULUS, BLS_MODULUS - while low > 1: - r = high // low - nm, new = hm - lm * r, high - low * r - lm, low, hm, high = nm, new, lm, low - return lm % BLS_MODULUS + return pow(x, -1, BLS_MODULUS) if x != 0 else 0 def div(x, y): """Divide two field elements: `x` by `y`""" - return x * inv(y) % MODULUS + return x * bls_modular_inverse(y) % BLS_MODULUS def evaluate_polynomial_in_evaluation_form(poly: List[BLSFieldElement], x: BLSFieldElement) -> BLSFieldElement: