// SPDX-License-Identifier: GPL-2.0-or-later1/* mpihelp-mul.c - MPI helper functions2* Copyright (C) 1994, 1996, 1998, 1999,3* 2000 Free Software Foundation, Inc.4*5* This file is part of GnuPG.6*7* Note: This code is heavily based on the GNU MP Library.8* Actually it's the same code with only minor changes in the9* way the data is stored; this is to support the abstraction10* of an optional secure memory allocation which may be used11* to avoid revealing of sensitive data due to paging etc.12* The GNU MP Library itself is published under the LGPL;13* however I decided to publish this code under the plain GPL.14*/1516#include <linux/string.h>17#include "mpi-internal.h"18#include "longlong.h"1920#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \21do { \22if ((size) < KARATSUBA_THRESHOLD) \23mul_n_basecase(prodp, up, vp, size); \24else \25mul_n(prodp, up, vp, size, tspace); \26} while (0);2728#define MPN_SQR_N_RECURSE(prodp, up, size, tspace) \29do { \30if ((size) < KARATSUBA_THRESHOLD) \31mpih_sqr_n_basecase(prodp, up, size); \32else \33mpih_sqr_n(prodp, up, size, tspace); \34} while (0);3536/* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),37* both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are38* always stored. Return the most significant limb.39*40* Argument constraints:41* 1. PRODP != UP and PRODP != VP, i.e. the destination42* must be distinct from the multiplier and the multiplicand.43*44*45* Handle simple cases with traditional multiplication.46*47* This is the most critical code of multiplication. All multiplies rely48* on this, both small and huge. Small ones arrive here immediately. Huge49* ones arrive here as this is the base case for Karatsuba's recursive50* algorithm below.51*/5253static mpi_limb_t54mul_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)55{56mpi_size_t i;57mpi_limb_t cy;58mpi_limb_t v_limb;5960/* Multiply by the first limb in V separately, as the result can be61* stored (not added) to PROD. We also avoid a loop for zeroing. */62v_limb = vp[0];63if (v_limb <= 1) {64if (v_limb == 1)65MPN_COPY(prodp, up, size);66else67MPN_ZERO(prodp, size);68cy = 0;69} else70cy = mpihelp_mul_1(prodp, up, size, v_limb);7172prodp[size] = cy;73prodp++;7475/* For each iteration in the outer loop, multiply one limb from76* U with one limb from V, and add it to PROD. */77for (i = 1; i < size; i++) {78v_limb = vp[i];79if (v_limb <= 1) {80cy = 0;81if (v_limb == 1)82cy = mpihelp_add_n(prodp, prodp, up, size);83} else84cy = mpihelp_addmul_1(prodp, up, size, v_limb);8586prodp[size] = cy;87prodp++;88}8990return cy;91}9293static void94mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp,95mpi_size_t size, mpi_ptr_t tspace)96{97if (size & 1) {98/* The size is odd, and the code below doesn't handle that.99* Multiply the least significant (size - 1) limbs with a recursive100* call, and handle the most significant limb of S1 and S2101* separately.102* A slightly faster way to do this would be to make the Karatsuba103* code below behave as if the size were even, and let it check for104* odd size in the end. I.e., in essence move this code to the end.105* Doing so would save us a recursive call, and potentially make the106* stack grow a lot less.107*/108mpi_size_t esize = size - 1; /* even size */109mpi_limb_t cy_limb;110111MPN_MUL_N_RECURSE(prodp, up, vp, esize, tspace);112cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, vp[esize]);113prodp[esize + esize] = cy_limb;114cy_limb = mpihelp_addmul_1(prodp + esize, vp, size, up[esize]);115prodp[esize + size] = cy_limb;116} else {117/* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.118*119* Split U in two pieces, U1 and U0, such that120* U = U0 + U1*(B**n),121* and V in V1 and V0, such that122* V = V0 + V1*(B**n).123*124* UV is then computed recursively using the identity125*126* 2n n n n127* UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V128* 1 1 1 0 0 1 0 0129*130* Where B = 2**BITS_PER_MP_LIMB.131*/132mpi_size_t hsize = size >> 1;133mpi_limb_t cy;134int negflg;135136/* Product H. ________________ ________________137* |_____U1 x V1____||____U0 x V0_____|138* Put result in upper part of PROD and pass low part of TSPACE139* as new TSPACE.140*/141MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize,142tspace);143144/* Product M. ________________145* |_(U1-U0)(V0-V1)_|146*/147if (mpihelp_cmp(up + hsize, up, hsize) >= 0) {148mpihelp_sub_n(prodp, up + hsize, up, hsize);149negflg = 0;150} else {151mpihelp_sub_n(prodp, up, up + hsize, hsize);152negflg = 1;153}154if (mpihelp_cmp(vp + hsize, vp, hsize) >= 0) {155mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize);156negflg ^= 1;157} else {158mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize);159/* No change of NEGFLG. */160}161/* Read temporary operands from low part of PROD.162* Put result in low part of TSPACE using upper part of TSPACE163* as new TSPACE.164*/165MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize,166tspace + size);167168/* Add/copy product H. */169MPN_COPY(prodp + hsize, prodp + size, hsize);170cy = mpihelp_add_n(prodp + size, prodp + size,171prodp + size + hsize, hsize);172173/* Add product M (if NEGFLG M is a negative number) */174if (negflg)175cy -=176mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace,177size);178else179cy +=180mpihelp_add_n(prodp + hsize, prodp + hsize, tspace,181size);182183/* Product L. ________________ ________________184* |________________||____U0 x V0_____|185* Read temporary operands from low part of PROD.186* Put result in low part of TSPACE using upper part of TSPACE187* as new TSPACE.188*/189MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size);190191/* Add/copy Product L (twice) */192193cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);194if (cy)195mpihelp_add_1(prodp + hsize + size,196prodp + hsize + size, hsize, cy);197198MPN_COPY(prodp, tspace, hsize);199cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize,200hsize);201if (cy)202mpihelp_add_1(prodp + size, prodp + size, size, 1);203}204}205206void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size)207{208mpi_size_t i;209mpi_limb_t cy_limb;210mpi_limb_t v_limb;211212/* Multiply by the first limb in V separately, as the result can be213* stored (not added) to PROD. We also avoid a loop for zeroing. */214v_limb = up[0];215if (v_limb <= 1) {216if (v_limb == 1)217MPN_COPY(prodp, up, size);218else219MPN_ZERO(prodp, size);220cy_limb = 0;221} else222cy_limb = mpihelp_mul_1(prodp, up, size, v_limb);223224prodp[size] = cy_limb;225prodp++;226227/* For each iteration in the outer loop, multiply one limb from228* U with one limb from V, and add it to PROD. */229for (i = 1; i < size; i++) {230v_limb = up[i];231if (v_limb <= 1) {232cy_limb = 0;233if (v_limb == 1)234cy_limb = mpihelp_add_n(prodp, prodp, up, size);235} else236cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb);237238prodp[size] = cy_limb;239prodp++;240}241}242243void244mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace)245{246if (size & 1) {247/* The size is odd, and the code below doesn't handle that.248* Multiply the least significant (size - 1) limbs with a recursive249* call, and handle the most significant limb of S1 and S2250* separately.251* A slightly faster way to do this would be to make the Karatsuba252* code below behave as if the size were even, and let it check for253* odd size in the end. I.e., in essence move this code to the end.254* Doing so would save us a recursive call, and potentially make the255* stack grow a lot less.256*/257mpi_size_t esize = size - 1; /* even size */258mpi_limb_t cy_limb;259260MPN_SQR_N_RECURSE(prodp, up, esize, tspace);261cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, up[esize]);262prodp[esize + esize] = cy_limb;263cy_limb = mpihelp_addmul_1(prodp + esize, up, size, up[esize]);264265prodp[esize + size] = cy_limb;266} else {267mpi_size_t hsize = size >> 1;268mpi_limb_t cy;269270/* Product H. ________________ ________________271* |_____U1 x U1____||____U0 x U0_____|272* Put result in upper part of PROD and pass low part of TSPACE273* as new TSPACE.274*/275MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace);276277/* Product M. ________________278* |_(U1-U0)(U0-U1)_|279*/280if (mpihelp_cmp(up + hsize, up, hsize) >= 0)281mpihelp_sub_n(prodp, up + hsize, up, hsize);282else283mpihelp_sub_n(prodp, up, up + hsize, hsize);284285/* Read temporary operands from low part of PROD.286* Put result in low part of TSPACE using upper part of TSPACE287* as new TSPACE. */288MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size);289290/* Add/copy product H */291MPN_COPY(prodp + hsize, prodp + size, hsize);292cy = mpihelp_add_n(prodp + size, prodp + size,293prodp + size + hsize, hsize);294295/* Add product M (if NEGFLG M is a negative number). */296cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size);297298/* Product L. ________________ ________________299* |________________||____U0 x U0_____|300* Read temporary operands from low part of PROD.301* Put result in low part of TSPACE using upper part of TSPACE302* as new TSPACE. */303MPN_SQR_N_RECURSE(tspace, up, hsize, tspace + size);304305/* Add/copy Product L (twice). */306cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);307if (cy)308mpihelp_add_1(prodp + hsize + size,309prodp + hsize + size, hsize, cy);310311MPN_COPY(prodp, tspace, hsize);312cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize,313hsize);314if (cy)315mpihelp_add_1(prodp + size, prodp + size, size, 1);316}317}318319int320mpihelp_mul_karatsuba_case(mpi_ptr_t prodp,321mpi_ptr_t up, mpi_size_t usize,322mpi_ptr_t vp, mpi_size_t vsize,323struct karatsuba_ctx *ctx)324{325mpi_limb_t cy;326327if (!ctx->tspace || ctx->tspace_size < vsize) {328if (ctx->tspace)329mpi_free_limb_space(ctx->tspace);330ctx->tspace = mpi_alloc_limb_space(2 * vsize);331if (!ctx->tspace)332return -ENOMEM;333ctx->tspace_size = vsize;334}335336MPN_MUL_N_RECURSE(prodp, up, vp, vsize, ctx->tspace);337338prodp += vsize;339up += vsize;340usize -= vsize;341if (usize >= vsize) {342if (!ctx->tp || ctx->tp_size < vsize) {343if (ctx->tp)344mpi_free_limb_space(ctx->tp);345ctx->tp = mpi_alloc_limb_space(2 * vsize);346if (!ctx->tp) {347if (ctx->tspace)348mpi_free_limb_space(ctx->tspace);349ctx->tspace = NULL;350return -ENOMEM;351}352ctx->tp_size = vsize;353}354355do {356MPN_MUL_N_RECURSE(ctx->tp, up, vp, vsize, ctx->tspace);357cy = mpihelp_add_n(prodp, prodp, ctx->tp, vsize);358mpihelp_add_1(prodp + vsize, ctx->tp + vsize, vsize,359cy);360prodp += vsize;361up += vsize;362usize -= vsize;363} while (usize >= vsize);364}365366if (usize) {367if (usize < KARATSUBA_THRESHOLD) {368mpi_limb_t tmp;369if (mpihelp_mul(ctx->tspace, vp, vsize, up, usize, &tmp)370< 0)371return -ENOMEM;372} else {373if (!ctx->next) {374ctx->next = kzalloc(sizeof *ctx, GFP_KERNEL);375if (!ctx->next)376return -ENOMEM;377}378if (mpihelp_mul_karatsuba_case(ctx->tspace,379vp, vsize,380up, usize,381ctx->next) < 0)382return -ENOMEM;383}384385cy = mpihelp_add_n(prodp, prodp, ctx->tspace, vsize);386mpihelp_add_1(prodp + vsize, ctx->tspace + vsize, usize, cy);387}388389return 0;390}391392void mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx)393{394struct karatsuba_ctx *ctx2;395396if (ctx->tp)397mpi_free_limb_space(ctx->tp);398if (ctx->tspace)399mpi_free_limb_space(ctx->tspace);400for (ctx = ctx->next; ctx; ctx = ctx2) {401ctx2 = ctx->next;402if (ctx->tp)403mpi_free_limb_space(ctx->tp);404if (ctx->tspace)405mpi_free_limb_space(ctx->tspace);406kfree(ctx);407}408}409410/* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)411* and v (pointed to by VP, with VSIZE limbs), and store the result at412* PRODP. USIZE + VSIZE limbs are always stored, but if the input413* operands are normalized. Return the most significant limb of the414* result.415*416* NOTE: The space pointed to by PRODP is overwritten before finished417* with U and V, so overlap is an error.418*419* Argument constraints:420* 1. USIZE >= VSIZE.421* 2. PRODP != UP and PRODP != VP, i.e. the destination422* must be distinct from the multiplier and the multiplicand.423*/424425int426mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,427mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result)428{429mpi_ptr_t prod_endp = prodp + usize + vsize - 1;430mpi_limb_t cy;431struct karatsuba_ctx ctx;432433if (vsize < KARATSUBA_THRESHOLD) {434mpi_size_t i;435mpi_limb_t v_limb;436437if (!vsize) {438*_result = 0;439return 0;440}441442/* Multiply by the first limb in V separately, as the result can be443* stored (not added) to PROD. We also avoid a loop for zeroing. */444v_limb = vp[0];445if (v_limb <= 1) {446if (v_limb == 1)447MPN_COPY(prodp, up, usize);448else449MPN_ZERO(prodp, usize);450cy = 0;451} else452cy = mpihelp_mul_1(prodp, up, usize, v_limb);453454prodp[usize] = cy;455prodp++;456457/* For each iteration in the outer loop, multiply one limb from458* U with one limb from V, and add it to PROD. */459for (i = 1; i < vsize; i++) {460v_limb = vp[i];461if (v_limb <= 1) {462cy = 0;463if (v_limb == 1)464cy = mpihelp_add_n(prodp, prodp, up,465usize);466} else467cy = mpihelp_addmul_1(prodp, up, usize, v_limb);468469prodp[usize] = cy;470prodp++;471}472473*_result = cy;474return 0;475}476477memset(&ctx, 0, sizeof ctx);478if (mpihelp_mul_karatsuba_case(prodp, up, usize, vp, vsize, &ctx) < 0)479return -ENOMEM;480mpihelp_release_karatsuba_ctx(&ctx);481*_result = *prod_endp;482return 0;483}484485486