mul.c

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2005 Josef Cejka
00003  * All rights reserved.
00004  *
00005  * Redistribution and use in source and binary forms, with or without
00006  * modification, are permitted provided that the following conditions
00007  * are met:
00008  *
00009  * - Redistributions of source code must retain the above copyright
00010  *   notice, this list of conditions and the following disclaimer.
00011  * - Redistributions in binary form must reproduce the above copyright
00012  *   notice, this list of conditions and the following disclaimer in the
00013  *   documentation and/or other materials provided with the distribution.
00014  * - The name of the author may not be used to endorse or promote products
00015  *   derived from this software without specific prior written permission.
00016  *
00017  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
00018  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
00019  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
00020  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
00021  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
00022  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00023  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00024  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00025  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
00026  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00027  */
00028 
00035 #include<sftypes.h>
00036 #include<mul.h>
00037 #include<comparison.h>
00038 #include<common.h>
00039 
00043 float32 mulFloat32(float32 a, float32 b)
00044 {
00045         float32 result;
00046         uint64_t frac1, frac2;
00047         int32_t exp;
00048 
00049         result.parts.sign = a.parts.sign ^ b.parts.sign;
00050         
00051         if (isFloat32NaN(a) || isFloat32NaN(b) ) {
00052                 /* TODO: fix SigNaNs */
00053                 if (isFloat32SigNaN(a)) {
00054                         result.parts.fraction = a.parts.fraction;
00055                         result.parts.exp = a.parts.exp;
00056                         return result;
00057                 };
00058                 if (isFloat32SigNaN(b)) { /* TODO: fix SigNaN */
00059                         result.parts.fraction = b.parts.fraction;
00060                         result.parts.exp = b.parts.exp;
00061                         return result;
00062                 };
00063                 /* set NaN as result */
00064                 result.binary = FLOAT32_NAN;
00065                 return result;
00066         };
00067                 
00068         if (isFloat32Infinity(a)) { 
00069                 if (isFloat32Zero(b)) {
00070                         /* FIXME: zero * infinity */
00071                         result.binary = FLOAT32_NAN;
00072                         return result;
00073                 }
00074                 result.parts.fraction = a.parts.fraction;
00075                 result.parts.exp = a.parts.exp;
00076                 return result;
00077         }
00078 
00079         if (isFloat32Infinity(b)) { 
00080                 if (isFloat32Zero(a)) {
00081                         /* FIXME: zero * infinity */
00082                         result.binary = FLOAT32_NAN;
00083                         return result;
00084                 }
00085                 result.parts.fraction = b.parts.fraction;
00086                 result.parts.exp = b.parts.exp;
00087                 return result;
00088         }
00089 
00090         /* exp is signed so we can easy detect underflow */
00091         exp = a.parts.exp + b.parts.exp;
00092         exp -= FLOAT32_BIAS;
00093         
00094         if (exp >= FLOAT32_MAX_EXPONENT) {
00095                 /* FIXME: overflow */
00096                 /* set infinity as result */
00097                 result.binary = FLOAT32_INF;
00098                 result.parts.sign = a.parts.sign ^ b.parts.sign;
00099                 return result;
00100         };
00101         
00102         if (exp < 0) { 
00103                 /* FIXME: underflow */
00104                 /* return signed zero */
00105                 result.parts.fraction = 0x0;
00106                 result.parts.exp = 0x0;
00107                 return result;
00108         };
00109         
00110         frac1 = a.parts.fraction;
00111         if (a.parts.exp > 0) {
00112                 frac1 |= FLOAT32_HIDDEN_BIT_MASK;
00113         } else {
00114                 ++exp;
00115         };
00116         
00117         frac2 = b.parts.fraction;
00118 
00119         if (b.parts.exp > 0) {
00120                 frac2 |= FLOAT32_HIDDEN_BIT_MASK;
00121         } else {
00122                 ++exp;
00123         };
00124 
00125         frac1 <<= 1; /* one bit space for rounding */
00126 
00127         frac1 = frac1 * frac2;
00128 /* round and return */
00129         
00130         while ((exp < FLOAT32_MAX_EXPONENT) && (frac1 >= ( 1 << (FLOAT32_FRACTION_SIZE + 2)))) { 
00131                 /* 23 bits of fraction + one more for hidden bit (all shifted 1 bit left)*/
00132                 ++exp;
00133                 frac1 >>= 1;
00134         };
00135 
00136         /* rounding */
00137         /* ++frac1; FIXME: not works - without it is ok */
00138         frac1 >>= 1; /* shift off rounding space */
00139         
00140         if ((exp < FLOAT32_MAX_EXPONENT) && (frac1 >= (1 << (FLOAT32_FRACTION_SIZE + 1)))) {
00141                 ++exp;
00142                 frac1 >>= 1;
00143         };
00144 
00145         if (exp >= FLOAT32_MAX_EXPONENT ) {     
00146                 /* TODO: fix overflow */
00147                 /* return infinity*/
00148                 result.parts.exp = FLOAT32_MAX_EXPONENT;
00149                 result.parts.fraction = 0x0;
00150                 return result;
00151         }
00152         
00153         exp -= FLOAT32_FRACTION_SIZE;
00154 
00155         if (exp <= FLOAT32_FRACTION_SIZE) { 
00156                 /* denormalized number */
00157                 frac1 >>= 1; /* denormalize */
00158                 while ((frac1 > 0) && (exp < 0)) {
00159                         frac1 >>= 1;
00160                         ++exp;
00161                 };
00162                 if (frac1 == 0) {
00163                         /* FIXME : underflow */
00164                 result.parts.exp = 0;
00165                 result.parts.fraction = 0;
00166                 return result;
00167                 };
00168         };
00169         result.parts.exp = exp; 
00170         result.parts.fraction = frac1 & ( (1 << FLOAT32_FRACTION_SIZE) - 1);
00171         
00172         return result;  
00173         
00174 }
00175 
00179 float64 mulFloat64(float64 a, float64 b)
00180 {
00181         float64 result;
00182         uint64_t frac1, frac2;
00183         int32_t exp;
00184 
00185         result.parts.sign = a.parts.sign ^ b.parts.sign;
00186         
00187         if (isFloat64NaN(a) || isFloat64NaN(b) ) {
00188                 /* TODO: fix SigNaNs */
00189                 if (isFloat64SigNaN(a)) {
00190                         result.parts.fraction = a.parts.fraction;
00191                         result.parts.exp = a.parts.exp;
00192                         return result;
00193                 };
00194                 if (isFloat64SigNaN(b)) { /* TODO: fix SigNaN */
00195                         result.parts.fraction = b.parts.fraction;
00196                         result.parts.exp = b.parts.exp;
00197                         return result;
00198                 };
00199                 /* set NaN as result */
00200                 result.binary = FLOAT64_NAN;
00201                 return result;
00202         };
00203                 
00204         if (isFloat64Infinity(a)) { 
00205                 if (isFloat64Zero(b)) {
00206                         /* FIXME: zero * infinity */
00207                         result.binary = FLOAT64_NAN;
00208                         return result;
00209                 }
00210                 result.parts.fraction = a.parts.fraction;
00211                 result.parts.exp = a.parts.exp;
00212                 return result;
00213         }
00214 
00215         if (isFloat64Infinity(b)) { 
00216                 if (isFloat64Zero(a)) {
00217                         /* FIXME: zero * infinity */
00218                         result.binary = FLOAT64_NAN;
00219                         return result;
00220                 }
00221                 result.parts.fraction = b.parts.fraction;
00222                 result.parts.exp = b.parts.exp;
00223                 return result;
00224         }
00225 
00226         /* exp is signed so we can easy detect underflow */
00227         exp = a.parts.exp + b.parts.exp - FLOAT64_BIAS;
00228         
00229         frac1 = a.parts.fraction;
00230 
00231         if (a.parts.exp > 0) {
00232                 frac1 |= FLOAT64_HIDDEN_BIT_MASK;
00233         } else {
00234                 ++exp;
00235         };
00236         
00237         frac2 = b.parts.fraction;
00238 
00239         if (b.parts.exp > 0) {
00240                 frac2 |= FLOAT64_HIDDEN_BIT_MASK;
00241         } else {
00242                 ++exp;
00243         };
00244 
00245         frac1 <<= (64 - FLOAT64_FRACTION_SIZE - 1);
00246         frac2 <<= (64 - FLOAT64_FRACTION_SIZE - 2);
00247 
00248         mul64integers(frac1, frac2, &frac1, &frac2);
00249 
00250         frac2 |= (frac1 != 0);
00251         if (frac2 & (0x1ll << 62)) {
00252                 frac2 <<= 1;
00253                 exp--;
00254         }
00255 
00256         result = finishFloat64(exp, frac2, result.parts.sign);
00257         return result;
00258 }
00259 
00266 void mul64integers(uint64_t a,uint64_t b, uint64_t *lo, uint64_t *hi)
00267 {
00268         uint64_t low, high, middle1, middle2;
00269         uint32_t alow, blow;
00270 
00271         alow = a & 0xFFFFFFFF;
00272         blow = b & 0xFFFFFFFF;
00273         
00274         a >>= 32;
00275         b >>= 32;
00276         
00277         low = ((uint64_t)alow) * blow;
00278         middle1 = a * blow;
00279         middle2 = alow * b;
00280         high = a * b;
00281 
00282         middle1 += middle2;
00283         high += (((uint64_t)(middle1 < middle2)) << 32) + (middle1 >> 32);
00284         middle1 <<= 32;
00285         low += middle1;
00286         high += (low < middle1);
00287         *lo = low;
00288         *hi = high;
00289         
00290         return;
00291 }
00292 

Generated on Sun Jun 18 18:00:18 2006 for HelenOS Userspace (ia64) by  doxygen 1.4.6