bn_mp_karatsuba_mul.cGo to the documentation of this file.00001 #include <tommath.h> 00002 #ifdef BN_MP_KARATSUBA_MUL_C 00003 /* LibTomMath, multiple-precision integer library -- Tom St Denis 00004 * 00005 * LibTomMath is a library that provides multiple-precision 00006 * integer arithmetic as well as number theoretic functionality. 00007 * 00008 * The library was designed directly after the MPI library by 00009 * Michael Fromberger but has been written from scratch with 00010 * additional optimizations in place. 00011 * 00012 * The library is free for all purposes without any express 00013 * guarantee it works. 00014 * 00015 * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com 00016 */ 00017 00018 /* c = |a| * |b| using Karatsuba Multiplication using 00019 * three half size multiplications 00020 * 00021 * Let B represent the radix [e.g. 2**DIGIT_BIT] and 00022 * let n represent half of the number of digits in 00023 * the min(a,b) 00024 * 00025 * a = a1 * B**n + a0 00026 * b = b1 * B**n + b0 00027 * 00028 * Then, a * b => 00029 a1b1 * B**2n + ((a1 + a0)(b1 + b0) - (a0b0 + a1b1)) * B + a0b0 00030 * 00031 * Note that a1b1 and a0b0 are used twice and only need to be 00032 * computed once. So in total three half size (half # of 00033 * digit) multiplications are performed, a0b0, a1b1 and 00034 * (a1+b1)(a0+b0) 00035 * 00036 * Note that a multiplication of half the digits requires 00037 * 1/4th the number of single precision multiplications so in 00038 * total after one call 25% of the single precision multiplications 00039 * are saved. Note also that the call to mp_mul can end up back 00040 * in this function if the a0, a1, b0, or b1 are above the threshold. 00041 * This is known as divide-and-conquer and leads to the famous 00042 * O(N**lg(3)) or O(N**1.584) work which is asymptopically lower than 00043 * the standard O(N**2) that the baseline/comba methods use. 00044 * Generally though the overhead of this method doesn't pay off 00045 * until a certain size (N ~ 80) is reached. 00046 */ 00047 int mp_karatsuba_mul (mp_int * a, mp_int * b, mp_int * c) 00048 { 00049 mp_int x0, x1, y0, y1, t1, x0y0, x1y1; 00050 int B, err; 00051 00052 /* default the return code to an error */ 00053 err = MP_MEM; 00054 00055 /* min # of digits */ 00056 B = MIN (a->used, b->used); 00057 00058 /* now divide in two */ 00059 B = B >> 1; 00060 00061 /* init copy all the temps */ 00062 if (mp_init_size (&x0, B) != MP_OKAY) 00063 goto ERR; 00064 if (mp_init_size (&x1, a->used - B) != MP_OKAY) 00065 goto X0; 00066 if (mp_init_size (&y0, B) != MP_OKAY) 00067 goto X1; 00068 if (mp_init_size (&y1, b->used - B) != MP_OKAY) 00069 goto Y0; 00070 00071 /* init temps */ 00072 if (mp_init_size (&t1, B * 2) != MP_OKAY) 00073 goto Y1; 00074 if (mp_init_size (&x0y0, B * 2) != MP_OKAY) 00075 goto T1; 00076 if (mp_init_size (&x1y1, B * 2) != MP_OKAY) 00077 goto X0Y0; 00078 00079 /* now shift the digits */ 00080 x0.used = y0.used = B; 00081 x1.used = a->used - B; 00082 y1.used = b->used - B; 00083 00084 { 00085 register int x; 00086 register mp_digit *tmpa, *tmpb, *tmpx, *tmpy; 00087 00088 /* we copy the digits directly instead of using higher level functions 00089 * since we also need to shift the digits 00090 */ 00091 tmpa = a->dp; 00092 tmpb = b->dp; 00093 00094 tmpx = x0.dp; 00095 tmpy = y0.dp; 00096 for (x = 0; x < B; x++) { 00097 *tmpx++ = *tmpa++; 00098 *tmpy++ = *tmpb++; 00099 } 00100 00101 tmpx = x1.dp; 00102 for (x = B; x < a->used; x++) { 00103 *tmpx++ = *tmpa++; 00104 } 00105 00106 tmpy = y1.dp; 00107 for (x = B; x < b->used; x++) { 00108 *tmpy++ = *tmpb++; 00109 } 00110 } 00111 00112 /* only need to clamp the lower words since by definition the 00113 * upper words x1/y1 must have a known number of digits 00114 */ 00115 mp_clamp (&x0); 00116 mp_clamp (&y0); 00117 00118 /* now calc the products x0y0 and x1y1 */ 00119 /* after this x0 is no longer required, free temp [x0==t2]! */ 00120 if (mp_mul (&x0, &y0, &x0y0) != MP_OKAY) 00121 goto X1Y1; /* x0y0 = x0*y0 */ 00122 if (mp_mul (&x1, &y1, &x1y1) != MP_OKAY) 00123 goto X1Y1; /* x1y1 = x1*y1 */ 00124 00125 /* now calc x1+x0 and y1+y0 */ 00126 if (s_mp_add (&x1, &x0, &t1) != MP_OKAY) 00127 goto X1Y1; /* t1 = x1 - x0 */ 00128 if (s_mp_add (&y1, &y0, &x0) != MP_OKAY) 00129 goto X1Y1; /* t2 = y1 - y0 */ 00130 if (mp_mul (&t1, &x0, &t1) != MP_OKAY) 00131 goto X1Y1; /* t1 = (x1 + x0) * (y1 + y0) */ 00132 00133 /* add x0y0 */ 00134 if (mp_add (&x0y0, &x1y1, &x0) != MP_OKAY) 00135 goto X1Y1; /* t2 = x0y0 + x1y1 */ 00136 if (s_mp_sub (&t1, &x0, &t1) != MP_OKAY) 00137 goto X1Y1; /* t1 = (x1+x0)*(y1+y0) - (x1y1 + x0y0) */ 00138 00139 /* shift by B */ 00140 if (mp_lshd (&t1, B) != MP_OKAY) 00141 goto X1Y1; /* t1 = (x0y0 + x1y1 - (x1-x0)*(y1-y0))<<B */ 00142 if (mp_lshd (&x1y1, B * 2) != MP_OKAY) 00143 goto X1Y1; /* x1y1 = x1y1 << 2*B */ 00144 00145 if (mp_add (&x0y0, &t1, &t1) != MP_OKAY) 00146 goto X1Y1; /* t1 = x0y0 + t1 */ 00147 if (mp_add (&t1, &x1y1, c) != MP_OKAY) 00148 goto X1Y1; /* t1 = x0y0 + t1 + x1y1 */ 00149 00150 /* Algorithm succeeded set the return code to MP_OKAY */ 00151 err = MP_OKAY; 00152 00153 X1Y1:mp_clear (&x1y1); 00154 X0Y0:mp_clear (&x0y0); 00155 T1:mp_clear (&t1); 00156 Y1:mp_clear (&y1); 00157 Y0:mp_clear (&y0); 00158 X1:mp_clear (&x1); 00159 X0:mp_clear (&x0); 00160 ERR: 00161 return err; 00162 } 00163 #endif 00164 00165 /* $Source: /cvsroot/tcl/libtommath/bn_mp_karatsuba_mul.c,v $ */ 00166 /* $Revision: 1.1.1.3 $ */ 00167 /* $Date: 2006/12/01 00:08:11 $ */
Generated on Wed Mar 12 12:18:24 2008 by 1.5.1 |