// Copyright 2022 Risc0, Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "risc0/zkp/prove/prove.h" #include "risc0/core/log.h" #include "risc0/core/rng.h" #include "risc0/zkp/core/constants.h" #include "risc0/zkp/core/poly.h" #include "risc0/zkp/core/rou.h" #include "risc0/zkp/prove/fri.h" #include "risc0/zkp/prove/poly_group.h" #include "risc0/zkp/prove/write_iop.h" namespace risc0 { static AccelSlice makeCoeffs(const std::vector& vec, size_t count) { // Copy into accel buffer auto ret = AccelSlice::copy(vec); // Do interpolate batchInterpolateNTT(ret, count); // Convert f(x) -> f(3x), which effective multiplies cofficent c_i by 3^i. #ifndef CIRCUIT_DEBUG zkShiftAccel(ret, count); #endif return ret; } // NOLINTNEXTLINE(readability-function-size) std::vector prove(ProveCircuit& circuit) { // Get taps TapSetRef tapSet = circuit.getTaps(); // Setup output IOP WriteIOP iop; // Do main execution + get size circuit.execute(iop); uint32_t po2 = circuit.getPo2(); REQUIRE(po2 <= kMaxCyclesPo2); size_t size = size_t(1) << po2; // Get sizes size_t codeSize = tapSet.groupSize(RegisterGroup::CODE); size_t dataSize = tapSet.groupSize(RegisterGroup::DATA); size_t accumSize = tapSet.groupSize(RegisterGroup::ACCUM); size_t comboCount = tapSet.combosSize(); // Make code + data PolyGroups + commit them PolyGroup codeGroup(makeCoeffs(circuit.getCode(), codeSize), codeSize, size); PolyGroup dataGroup(makeCoeffs(circuit.getData(), dataSize), dataSize, size); codeGroup.getMerkle().commit(iop); dataGroup.getMerkle().commit(iop); LOG(1, "codeGroup: " << codeGroup.getMerkle().getRoot()); LOG(1, "dataGroup: " << dataGroup.getMerkle().getRoot()); circuit.accumulate(iop); // Make the accum group + commit LOG(1, "size = " << size << ", accumSize = " << accumSize); LOG(1, "getAccum.size() = " << circuit.getAccum().size()); PolyGroup accumGroup(makeCoeffs(circuit.getAccum(), accumSize), accumSize, size); accumGroup.getMerkle().commit(iop); LOG(1, "accumGroup: " << accumGroup.getMerkle().getRoot()); // Set the poly mix value Fp4 polyMix = Fp4::random(iop); // Now generate the check polynomial size_t domain = size * kInvRate; auto checkPoly = AccelSlice::allocate(4 * domain); circuit.evalCheck(checkPoly, codeGroup.getEvaluated(), dataGroup.getEvaluated(), accumGroup.getEvaluated(), polyMix); #ifdef CIRCUIT_DEBUG Fp4 badZ; // = Fp4(1); { AccelReadLock lock(checkPoly); for (size_t i = 0; i < 4 * domain; i += 4) { if (lock[i] != 0) { LOG(1, "ERROR AT i = " << i << " value = " << lock[i]); badZ = Fp4(pow(kRouFwd[po2], i / 4)); break; } } } #endif // Convert to coefficients. Some tricky bizness here with the fact that checkPoly is really // an Fp4 polynomial. Nicely for us, since all the roots of unity (which are the only thing that // and values get multiplied by) are in Fp, Fp4 values act like simple vectors of Fp for the // purposes of interpolate/evaluate. batchInterpolateNTT(checkPoly, 4); // The next step is to convert the degree 4*n check polynomial into 4 degreen n polynomials // so that f(x) = g0(x^4) + g1(x^4) x + g2(x^4) x^2 + g3(x^4) x^3. To do this, we normally // would grab all the coeffients of f(x) = sum_i c_i x^i where i % 4 == 0 and put them into // a new polynomial g0(x) = sum_i d0_i*x^i, where d0_i = c_(i*4). // // Amazingingly, since the coefficients are bit reversed, the coefficients of g0 are all // aleady next to each other and in bit-reversed for for g0, as are the coeffients of g1, etc. // So really, we can just reinterpret 4 polys of invRate*size to 16 polys of size, without // actually doing anything. // Make the PolyGroup + add it to the IOP; PolyGroup checkGroup(checkPoly, kCheckSize, size); checkGroup.getMerkle().commit(iop); LOG(1, "checkGroup: " << checkGroup.getMerkle().getRoot()); // Now pick a value for Z Fp4 Z = Fp4::random(iop); #ifdef CIRCUIT_DEBUG if (badZ != Fp4(0)) { Z = badZ; } iop.write(&Z, 1); #endif LOG(1, "Z = " << Z); // Get rev rou for size Fp backOne = kRouRev[po2]; std::vector allXs; // Do evaluations of all of the various polynomials at the appropriate points // Compute evalU std::vector evalU; auto evalGroup = [&](RegisterGroup id, PolyGroup& pg) { AccelConstSlice coeffs = pg.getCoeffs(); std::vector which; std::vector xs; for (auto tap : tapSet.groupTaps(id)) { which.push_back(tap.offset()); xs.push_back(pow(backOne, tap.back()) * Z); allXs.push_back(xs.back()); } auto whichAccel = AccelSlice::copy(which); auto xsAccel = AccelSlice::copy(xs); auto outAccel = AccelSlice::allocate(which.size()); batchEvaluateAny(coeffs, pg.getCount(), whichAccel, xsAccel, outAccel); { AccelReadLock out(outAccel); std::copy(out.data(), out.data() + out.size(), std::back_inserter(evalU)); } }; // Now, we evaluate each group at the approriate points (relative to Z). // From here on out, we always process groups in accum, code, data order, // since this is the order used by the codegen system (alphabetical). // Sometimes it's a requirement for matching generated code, but even when // it's not we keep the order for consistency. evalGroup(RegisterGroup::ACCUM, accumGroup); evalGroup(RegisterGroup::CODE, codeGroup); evalGroup(RegisterGroup::DATA, dataGroup); // Now, convert the values to coefficients via interpolation size_t curPos = 0; std::vector coeffU(evalU.size()); for (auto reg : tapSet.regs()) { polyInterpolate( coeffU.data() + curPos, allXs.data() + curPos, evalU.data() + curPos, reg.size()); curPos += reg.size(); } Fp4 Z4 = pow(Z, 4); // Add in the coeffs of the check polynomials. { std::vector which; for (size_t i = 0; i < kCheckSize; i++) { which.push_back(i); } std::vector xs(kCheckSize, Z4); auto out = AccelSlice::allocate(kCheckSize); batchEvaluateAny(checkGroup.getCoeffs(), kCheckSize, AccelSlice::copy(which), AccelSlice::copy(xs), out); AccelReadLock outLock(out); std::copy(outLock.data(), outLock.data() + out.size(), std::back_inserter(coeffU)); } LOG(1, "Size of U = " << coeffU.size()); iop.write(coeffU.data(), coeffU.size()); auto hashU = shaHash(reinterpret_cast(coeffU.data()), coeffU.size() * 4, 1, false); iop.commit(hashU); // Set the mix mix value Fp4 mix = Fp4::random(iop); LOG(1, "Mix = " << mix); // Do the coefficent mixing // Begin by making a zeroed output buffer auto combos = AccelSlice::copy(std::vector(size * (comboCount + 1))); Fp4 curMix(1); auto mixGroup = [&](RegisterGroup id, PolyGroup& pg) { std::vector which; for (auto reg : tapSet.groupRegs(id)) { which.push_back(reg.comboID()); } auto whichAccel = AccelSlice::copy(which); auto curMixAccel = AccelSlice::copy(&curMix, 1); auto mixAccel = AccelSlice::copy(&mix, 1); size_t gsize = tapSet.groupSize(id); mixPolyCoeffsAccel(combos, curMixAccel, mixAccel, pg.getCoeffs(), whichAccel, gsize, size); curMix *= pow(mix, gsize); }; mixGroup(RegisterGroup::ACCUM, accumGroup); mixGroup(RegisterGroup::CODE, codeGroup); mixGroup(RegisterGroup::DATA, dataGroup); { std::vector which(kCheckSize, comboCount); auto whichAccel = AccelSlice::copy(which); auto curMixAccel = AccelSlice::copy(&curMix, 1); auto mixAccel = AccelSlice::copy(&mix, 1); mixPolyCoeffsAccel( combos, curMixAccel, mixAccel, checkGroup.getCoeffs(), whichAccel, kCheckSize, size); } // Load the near final coefficients back to the CPU auto comboCpu = std::make_unique>(combos); // Subtract the U coeffs from the combos curPos = 0; Fp4 cur = Fp4(1); for (auto reg : tapSet.regs()) { for (size_t i = 0; i < reg.size(); i++) { (*comboCpu)[size * reg.comboID() + i] -= cur * coeffU[curPos + i]; } cur *= mix; curPos += reg.size(); } // Subtract the final 'check' coefficents for (size_t i = 0; i < kCheckSize; i++) { (*comboCpu)[size * comboCount] -= cur * coeffU[curPos++]; cur *= mix; } // Divide each element by (x - Z * back1^back) for each back for (size_t combo = 0; combo < tapSet.combosSize(); combo++) { for (size_t back : tapSet.getCombo(combo)) { REQUIRE(polyDivide(comboCpu->data() + combo * size, size, Z * pow(backOne, back)) == Fp4(0)); } } // Divide check polys by Z4 REQUIRE(polyDivide(comboCpu->data() + comboCount * size, size, Z4) == Fp4(0)); comboCpu.reset(); // Free lock, move memory back to accelerator // Sum the combos up into one final polynomial + make then make it into 4 Fp polys // Additionally, it needs to be bit reversed to make everyone happy auto finalPolyCoeffs = AccelSlice::allocate(size * 4); eltwiseSumFp4Accel(finalPolyCoeffs, combos); // Finally do the FRI protocol to prove the degree of the polynomial batchBitReverse(finalPolyCoeffs, 4); LOG(1, "FRI-proof, size = " << finalPolyCoeffs.size() / 4); friProve(iop, finalPolyCoeffs, [&](WriteIOP& iop, size_t idx) { accumGroup.getMerkle().prove(iop, idx); codeGroup.getMerkle().prove(iop, idx); dataGroup.getMerkle().prove(iop, idx); checkGroup.getMerkle().prove(iop, idx); }); // Return final proof std::vector ret = iop.getProof(); LOG(1, "Proof size = " << ret.size()); return ret; } } // namespace risc0