/home/ntakagi/work/tcl8.5.1/libtommath/bn_mp_toom_mul.c Source File

00001 #include <tommath.h>
00002 #ifdef BN_MP_TOOM_MUL_C
00003 /* LibTomMath, multiple-precision integer library -- Tom St Denis
00004  *
00005  * LibTomMath is a library that provides multiple-precision
00006  * integer arithmetic as well as number theoretic functionality.
00007  *
00008  * The library was designed directly after the MPI library by
00009  * Michael Fromberger but has been written from scratch with
00010  * additional optimizations in place.
00011  *
00012  * The library is free for all purposes without any express
00013  * guarantee it works.
00014  *
00015  * Tom St Denis, tomstdenis@gmail.com, http://math.libtomcrypt.com
00016  */
00017 
00018 /* multiplication using the Toom-Cook 3-way algorithm 
00019  *
00020  * Much more complicated than Karatsuba but has a lower 
00021  * asymptotic running time of O(N**1.464).  This algorithm is 
00022  * only particularly useful on VERY large inputs 
00023  * (we're talking 1000s of digits here...).
00024 */
00025 int mp_toom_mul(mp_int *a, mp_int *b, mp_int *c)
00026 {
00027     mp_int w0, w1, w2, w3, w4, tmp1, tmp2, a0, a1, a2, b0, b1, b2;
00028     int res, B;
00029         
00030     /* init temps */
00031     if ((res = mp_init_multi(&w0, &w1, &w2, &w3, &w4, 
00032                              &a0, &a1, &a2, &b0, &b1, 
00033                              &b2, &tmp1, &tmp2, NULL)) != MP_OKAY) {
00034        return res;
00035     }
00036     
00037     /* B */
00038     B = MIN(a->used, b->used) / 3;
00039     
00040     /* a = a2 * B**2 + a1 * B + a0 */
00041     if ((res = mp_mod_2d(a, DIGIT_BIT * B, &a0)) != MP_OKAY) {
00042        goto ERR;
00043     }
00044 
00045     if ((res = mp_copy(a, &a1)) != MP_OKAY) {
00046        goto ERR;
00047     }
00048     mp_rshd(&a1, B);
00049     mp_mod_2d(&a1, DIGIT_BIT * B, &a1);
00050 
00051     if ((res = mp_copy(a, &a2)) != MP_OKAY) {
00052        goto ERR;
00053     }
00054     mp_rshd(&a2, B*2);
00055     
00056     /* b = b2 * B**2 + b1 * B + b0 */
00057     if ((res = mp_mod_2d(b, DIGIT_BIT * B, &b0)) != MP_OKAY) {
00058        goto ERR;
00059     }
00060 
00061     if ((res = mp_copy(b, &b1)) != MP_OKAY) {
00062        goto ERR;
00063     }
00064     mp_rshd(&b1, B);
00065     mp_mod_2d(&b1, DIGIT_BIT * B, &b1);
00066 
00067     if ((res = mp_copy(b, &b2)) != MP_OKAY) {
00068        goto ERR;
00069     }
00070     mp_rshd(&b2, B*2);
00071     
00072     /* w0 = a0*b0 */
00073     if ((res = mp_mul(&a0, &b0, &w0)) != MP_OKAY) {
00074        goto ERR;
00075     }
00076     
00077     /* w4 = a2 * b2 */
00078     if ((res = mp_mul(&a2, &b2, &w4)) != MP_OKAY) {
00079        goto ERR;
00080     }
00081     
00082     /* w1 = (a2 + 2(a1 + 2a0))(b2 + 2(b1 + 2b0)) */
00083     if ((res = mp_mul_2(&a0, &tmp1)) != MP_OKAY) {
00084        goto ERR;
00085     }
00086     if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
00087        goto ERR;
00088     }
00089     if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
00090        goto ERR;
00091     }
00092     if ((res = mp_add(&tmp1, &a2, &tmp1)) != MP_OKAY) {
00093        goto ERR;
00094     }
00095     
00096     if ((res = mp_mul_2(&b0, &tmp2)) != MP_OKAY) {
00097        goto ERR;
00098     }
00099     if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
00100        goto ERR;
00101     }
00102     if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) {
00103        goto ERR;
00104     }
00105     if ((res = mp_add(&tmp2, &b2, &tmp2)) != MP_OKAY) {
00106        goto ERR;
00107     }
00108     
00109     if ((res = mp_mul(&tmp1, &tmp2, &w1)) != MP_OKAY) {
00110        goto ERR;
00111     }
00112     
00113     /* w3 = (a0 + 2(a1 + 2a2))(b0 + 2(b1 + 2b2)) */
00114     if ((res = mp_mul_2(&a2, &tmp1)) != MP_OKAY) {
00115        goto ERR;
00116     }
00117     if ((res = mp_add(&tmp1, &a1, &tmp1)) != MP_OKAY) {
00118        goto ERR;
00119     }
00120     if ((res = mp_mul_2(&tmp1, &tmp1)) != MP_OKAY) {
00121        goto ERR;
00122     }
00123     if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
00124        goto ERR;
00125     }
00126     
00127     if ((res = mp_mul_2(&b2, &tmp2)) != MP_OKAY) {
00128        goto ERR;
00129     }
00130     if ((res = mp_add(&tmp2, &b1, &tmp2)) != MP_OKAY) {
00131        goto ERR;
00132     }
00133     if ((res = mp_mul_2(&tmp2, &tmp2)) != MP_OKAY) {
00134        goto ERR;
00135     }
00136     if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
00137        goto ERR;
00138     }
00139     
00140     if ((res = mp_mul(&tmp1, &tmp2, &w3)) != MP_OKAY) {
00141        goto ERR;
00142     }
00143     
00144 
00145     /* w2 = (a2 + a1 + a0)(b2 + b1 + b0) */
00146     if ((res = mp_add(&a2, &a1, &tmp1)) != MP_OKAY) {
00147        goto ERR;
00148     }
00149     if ((res = mp_add(&tmp1, &a0, &tmp1)) != MP_OKAY) {
00150        goto ERR;
00151     }
00152     if ((res = mp_add(&b2, &b1, &tmp2)) != MP_OKAY) {
00153        goto ERR;
00154     }
00155     if ((res = mp_add(&tmp2, &b0, &tmp2)) != MP_OKAY) {
00156        goto ERR;
00157     }
00158     if ((res = mp_mul(&tmp1, &tmp2, &w2)) != MP_OKAY) {
00159        goto ERR;
00160     }
00161     
00162     /* now solve the matrix 
00163     
00164        0  0  0  0  1
00165        1  2  4  8  16
00166        1  1  1  1  1
00167        16 8  4  2  1
00168        1  0  0  0  0
00169        
00170        using 12 subtractions, 4 shifts, 
00171               2 small divisions and 1 small multiplication 
00172      */
00173      
00174      /* r1 - r4 */
00175      if ((res = mp_sub(&w1, &w4, &w1)) != MP_OKAY) {
00176         goto ERR;
00177      }
00178      /* r3 - r0 */
00179      if ((res = mp_sub(&w3, &w0, &w3)) != MP_OKAY) {
00180         goto ERR;
00181      }
00182      /* r1/2 */
00183      if ((res = mp_div_2(&w1, &w1)) != MP_OKAY) {
00184         goto ERR;
00185      }
00186      /* r3/2 */
00187      if ((res = mp_div_2(&w3, &w3)) != MP_OKAY) {
00188         goto ERR;
00189      }
00190      /* r2 - r0 - r4 */
00191      if ((res = mp_sub(&w2, &w0, &w2)) != MP_OKAY) {
00192         goto ERR;
00193      }
00194      if ((res = mp_sub(&w2, &w4, &w2)) != MP_OKAY) {
00195         goto ERR;
00196      }
00197      /* r1 - r2 */
00198      if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
00199         goto ERR;
00200      }
00201      /* r3 - r2 */
00202      if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
00203         goto ERR;
00204      }
00205      /* r1 - 8r0 */
00206      if ((res = mp_mul_2d(&w0, 3, &tmp1)) != MP_OKAY) {
00207         goto ERR;
00208      }
00209      if ((res = mp_sub(&w1, &tmp1, &w1)) != MP_OKAY) {
00210         goto ERR;
00211      }
00212      /* r3 - 8r4 */
00213      if ((res = mp_mul_2d(&w4, 3, &tmp1)) != MP_OKAY) {
00214         goto ERR;
00215      }
00216      if ((res = mp_sub(&w3, &tmp1, &w3)) != MP_OKAY) {
00217         goto ERR;
00218      }
00219      /* 3r2 - r1 - r3 */
00220      if ((res = mp_mul_d(&w2, 3, &w2)) != MP_OKAY) {
00221         goto ERR;
00222      }
00223      if ((res = mp_sub(&w2, &w1, &w2)) != MP_OKAY) {
00224         goto ERR;
00225      }
00226      if ((res = mp_sub(&w2, &w3, &w2)) != MP_OKAY) {
00227         goto ERR;
00228      }
00229      /* r1 - r2 */
00230      if ((res = mp_sub(&w1, &w2, &w1)) != MP_OKAY) {
00231         goto ERR;
00232      }
00233      /* r3 - r2 */
00234      if ((res = mp_sub(&w3, &w2, &w3)) != MP_OKAY) {
00235         goto ERR;
00236      }
00237      /* r1/3 */
00238      if ((res = mp_div_3(&w1, &w1, NULL)) != MP_OKAY) {
00239         goto ERR;
00240      }
00241      /* r3/3 */
00242      if ((res = mp_div_3(&w3, &w3, NULL)) != MP_OKAY) {
00243         goto ERR;
00244      }
00245      
00246      /* at this point shift W[n] by B*n */
00247      if ((res = mp_lshd(&w1, 1*B)) != MP_OKAY) {
00248         goto ERR;
00249      }
00250      if ((res = mp_lshd(&w2, 2*B)) != MP_OKAY) {
00251         goto ERR;
00252      }
00253      if ((res = mp_lshd(&w3, 3*B)) != MP_OKAY) {
00254         goto ERR;
00255      }
00256      if ((res = mp_lshd(&w4, 4*B)) != MP_OKAY) {
00257         goto ERR;
00258      }     
00259      
00260      if ((res = mp_add(&w0, &w1, c)) != MP_OKAY) {
00261         goto ERR;
00262      }
00263      if ((res = mp_add(&w2, &w3, &tmp1)) != MP_OKAY) {
00264         goto ERR;
00265      }
00266      if ((res = mp_add(&w4, &tmp1, &tmp1)) != MP_OKAY) {
00267         goto ERR;
00268      }
00269      if ((res = mp_add(&tmp1, c, c)) != MP_OKAY) {
00270         goto ERR;
00271      }     
00272      
00273 ERR:
00274      mp_clear_multi(&w0, &w1, &w2, &w3, &w4, 
00275                     &a0, &a1, &a2, &b0, &b1, 
00276                     &b2, &tmp1, &tmp2, NULL);
00277      return res;
00278 }     
00279      
00280 #endif
00281 
00282 /* $Source: /cvsroot/tcl/libtommath/bn_mp_toom_mul.c,v $ */
00283 /* $Revision: 1.1.1.4 $ */
00284 /* $Date: 2006/12/01 00:08:11 $ */