Ignore:
File:
1 edited

Legend:

Unmodified
Added
Removed
  • uspace/lib/softfloat/generic/common.c

    rc67aff2 r750636a  
    11/*
    22 * Copyright (c) 2005 Josef Cejka
    3  * Copyright (c) 2011 Petr Koupy
    43 * All rights reserved.
    54 *
     
    3130 * @{
    3231 */
    33 /** @file Common helper operations.
     32/** @file
    3433 */
    3534
     
    3736#include <common.h>
    3837
    39 /* Table for fast leading zeroes counting. */
     38/* Table for fast leading zeroes counting */
    4039char zeroTable[256] = {
    4140        8, 7, 7, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, \
     
    5756};
    5857
    59 /**
    60  * Take fraction shifted by 10 bits to the left, round it, normalize it
    61  * and detect exceptions
    62  *
    63  * @param cexp Exponent with bias.
    64  * @param cfrac Fraction shifted 10 bits to the left with added hidden bit.
    65  * @param sign Resulting sign.
    66  * @return Finished double-precision float.
     58
     59
     60/** Take fraction shifted by 10 bits to left, round it, normalize it and detect exceptions
     61 * @param cexp exponent with bias
     62 * @param cfrac fraction shifted 10 places left with added hidden bit
     63 * @param sign
     64 * @return valied float64
    6765 */
    6866float64 finishFloat64(int32_t cexp, uint64_t cfrac, char sign)
     
    7371
    7472        /* find first nonzero digit and shift result and detect possibly underflow */
    75         while ((cexp > 0) && (cfrac) &&
    76             (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))))) {
     73        while ((cexp > 0) && (cfrac) && (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1 ) )))) {
    7774                cexp--;
    7875                cfrac <<= 1;
    79                 /* TODO: fix underflow */
    80         }
    81        
    82         if ((cexp < 0) || (cexp == 0 &&
    83             (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))))) {
     76                        /* TODO: fix underflow */
     77        };
     78       
     79        if ((cexp < 0) || ( cexp == 0 && (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))))) {
    8480                /* FIXME: underflow */
    8581                result.parts.exp = 0;
     
    9793               
    9894                if (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))) {
    99                         result.parts.fraction =
    100                             ((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK));
     95                       
     96                        result.parts.fraction = ((cfrac >>(64 - FLOAT64_FRACTION_SIZE - 2) ) & (~FLOAT64_HIDDEN_BIT_MASK));
    10197                        return result;
    10298                }       
     
    107103        ++cexp;
    108104
    109         if (cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))) {
     105        if (cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1 ))) {
    110106                ++cexp;
    111107                cfrac >>= 1;
     
    113109
    114110        /* check overflow */
    115         if (cexp >= FLOAT64_MAX_EXPONENT) {
     111        if (cexp >= FLOAT64_MAX_EXPONENT ) {
    116112                /* FIXME: overflow, return infinity */
    117113                result.parts.exp = FLOAT64_MAX_EXPONENT;
     
    120116        }
    121117
    122         result.parts.exp = (uint32_t) cexp;
    123        
    124         result.parts.fraction =
    125             ((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK));
     118        result.parts.exp = (uint32_t)cexp;
     119       
     120        result.parts.fraction = ((cfrac >>(64 - FLOAT64_FRACTION_SIZE - 2 ) ) & (~FLOAT64_HIDDEN_BIT_MASK));
    126121       
    127122        return result; 
    128123}
    129124
    130 /**
    131  * Take fraction, round it, normalize it and detect exceptions
    132  *
    133  * @param cexp Exponent with bias.
    134  * @param cfrac_hi High part of the fraction shifted 14 bits to the left
    135  *     with added hidden bit.
    136  * @param cfrac_lo Low part of the fraction shifted 14 bits to the left
    137  *     with added hidden bit.
    138  * @param sign Resulting sign.
    139  * @param shift_out Bits right-shifted out from fraction by the caller.
    140  * @return Finished quadruple-precision float.
    141  */
    142 float128 finishFloat128(int32_t cexp, uint64_t cfrac_hi, uint64_t cfrac_lo,
    143     char sign, uint64_t shift_out)
    144 {
    145         float128 result;
    146         uint64_t tmp_hi, tmp_lo;
    147 
    148         result.parts.sign = sign;
    149 
    150         /* find first nonzero digit and shift result and detect possibly underflow */
    151         lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
    152             1, &tmp_hi, &tmp_lo);
    153         and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
    154         while ((cexp > 0) && (lt128(0x0ll, 0x0ll, cfrac_hi, cfrac_lo)) &&
    155             (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo))) {
    156                 cexp--;
    157                 lshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
    158                 /* TODO: fix underflow */
    159 
    160                 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
    161                     1, &tmp_hi, &tmp_lo);
    162                 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
    163         }
    164 
    165         lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
    166             1, &tmp_hi, &tmp_lo);
    167         and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
    168         if ((cexp < 0) || (cexp == 0 &&
    169             (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)))) {
    170                 /* FIXME: underflow */
    171                 result.parts.exp = 0;
    172                 if ((cexp + FLOAT128_FRACTION_SIZE + 1) < 0) { /* +1 is place for rounding */
    173                         result.parts.frac_hi = 0x0ll;
    174                         result.parts.frac_lo = 0x0ll;
    175                         return result;
    176                 }
    177 
    178                 while (cexp < 0) {
    179                         cexp++;
    180                         rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
    181                 }
    182 
    183                 if (shift_out & (0x1ull < 64)) {
    184                         add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo);
    185                 }
    186 
    187                 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
    188                     1, &tmp_hi, &tmp_lo);
    189                 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
    190                 if (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
    191                         not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
    192                             &tmp_hi, &tmp_lo);
    193                         and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
    194                         result.parts.frac_hi = tmp_hi;
    195                         result.parts.frac_lo = tmp_lo;
    196                         return result;
    197                 }
    198         } else {
    199                 if (shift_out & (0x1ull < 64)) {
    200                         add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo);
    201                 }
    202         }
    203 
    204         ++cexp;
    205 
    206         lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
    207             1, &tmp_hi, &tmp_lo);
    208         and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
    209         if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
    210                 ++cexp;
    211                 rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
    212         }
    213 
    214         /* check overflow */
    215         if (cexp >= FLOAT128_MAX_EXPONENT) {
    216                 /* FIXME: overflow, return infinity */
    217                 result.parts.exp = FLOAT128_MAX_EXPONENT;
    218                 result.parts.frac_hi = 0x0ll;
    219                 result.parts.frac_lo = 0x0ll;
    220                 return result;
    221         }
    222 
    223         result.parts.exp = (uint32_t) cexp;
    224 
    225         not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
    226             &tmp_hi, &tmp_lo);
    227         and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
    228         result.parts.frac_hi = tmp_hi;
    229         result.parts.frac_lo = tmp_lo;
    230 
    231         return result; 
    232 }
    233 
    234 /**
    235  * Counts leading zeroes in byte.
    236  *
    237  * @param i Byte for which to count leading zeroes.
    238  * @return Number of detected leading zeroes.
     125/** Counts leading zeroes in 64bit unsigned integer
     126 * @param i
     127 */
     128int countZeroes64(uint64_t i)
     129{
     130        int j;
     131        for (j =0; j < 64; j += 8) {
     132                if ( i & (0xFFll << (56 - j))) {
     133                        return (j + countZeroes8(i >> (56 - j)));
     134                }
     135        }
     136
     137        return 64;
     138}
     139
     140/** Counts leading zeroes in 32bit unsigned integer
     141 * @param i
     142 */
     143int countZeroes32(uint32_t i)
     144{
     145        int j;
     146        for (j =0; j < 32; j += 8) {
     147                if ( i & (0xFF << (24 - j))) {
     148                        return (j + countZeroes8(i >> (24 - j)));
     149                }
     150        }
     151
     152        return 32;
     153}
     154
     155/** Counts leading zeroes in byte
     156 * @param i
    239157 */
    240158int countZeroes8(uint8_t i)
     
    243161}
    244162
    245 /**
    246  * Counts leading zeroes in 32bit unsigned integer.
    247  *
    248  * @param i Integer for which to count leading zeroes.
    249  * @return Number of detected leading zeroes.
    250  */
    251 int countZeroes32(uint32_t i)
    252 {
    253         int j;
    254         for (j = 0; j < 32; j += 8) {
    255                 if (i & (0xFF << (24 - j))) {
    256                         return (j + countZeroes8(i >> (24 - j)));
    257                 }
    258         }
    259 
    260         return 32;
    261 }
    262 
    263 /**
    264  * Counts leading zeroes in 64bit unsigned integer.
    265  *
    266  * @param i Integer for which to count leading zeroes.
    267  * @return Number of detected leading zeroes.
    268  */
    269 int countZeroes64(uint64_t i)
    270 {
    271         int j;
    272         for (j = 0; j < 64; j += 8) {
    273                 if (i & (0xFFll << (56 - j))) {
    274                         return (j + countZeroes8(i >> (56 - j)));
    275                 }
    276         }
    277 
    278         return 64;
    279 }
    280 
    281 /**
    282  * Round and normalize number expressed by exponent and fraction with
    283  * first bit (equal to hidden bit) at 30th bit.
    284  *
    285  * @param exp Exponent part.
    286  * @param fraction Fraction with hidden bit shifted to 30th bit.
     163/** Round and normalize number expressed by exponent and fraction with first bit (equal to hidden bit) at 30. bit
     164 * @param exp exponent
     165 * @param fraction part with hidden bit shifted to 30. bit
    287166 */
    288167void roundFloat32(int32_t *exp, uint32_t *fraction)
    289168{
    290169        /* rounding - if first bit after fraction is set then round up */
    291         (*fraction) += (0x1 << (32 - FLOAT32_FRACTION_SIZE - 3));
    292        
    293         if ((*fraction) &
    294             (FLOAT32_HIDDEN_BIT_MASK << (32 - FLOAT32_FRACTION_SIZE - 1))) {
     170        (*fraction) += (0x1 << 6);
     171       
     172        if ((*fraction) & (FLOAT32_HIDDEN_BIT_MASK << 8)) {
    295173                /* rounding overflow */
    296174                ++(*exp);
    297175                (*fraction) >>= 1;
    298         }
    299        
    300         if (((*exp) >= FLOAT32_MAX_EXPONENT) || ((*exp) < 0)) {
     176        };
     177       
     178        if (((*exp) >= FLOAT32_MAX_EXPONENT ) || ((*exp) < 0)) {
    301179                /* overflow - set infinity as result */
    302180                (*exp) = FLOAT32_MAX_EXPONENT;
    303181                (*fraction) = 0;
    304         }
    305 }
    306 
    307 /**
    308  * Round and normalize number expressed by exponent and fraction with
    309  * first bit (equal to hidden bit) at 62nd bit.
    310  *
    311  * @param exp Exponent part.
    312  * @param fraction Fraction with hidden bit shifted to 62nd bit.
     182                return;
     183        }
     184
     185        return;
     186}
     187
     188/** Round and normalize number expressed by exponent and fraction with first bit (equal to hidden bit) at 62. bit
     189 * @param exp exponent
     190 * @param fraction part with hidden bit shifted to 62. bit
    313191 */
    314192void roundFloat64(int32_t *exp, uint64_t *fraction)
    315193{
    316194        /* rounding - if first bit after fraction is set then round up */
    317         (*fraction) += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
    318        
    319         if ((*fraction) &
    320             (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 3))) {
     195        (*fraction) += (0x1 << 9);
     196       
     197        if ((*fraction) & (FLOAT64_HIDDEN_BIT_MASK << 11)) {
    321198                /* rounding overflow */
    322199                ++(*exp);
    323200                (*fraction) >>= 1;
    324         }
    325        
    326         if (((*exp) >= FLOAT64_MAX_EXPONENT) || ((*exp) < 0)) {
     201        };
     202       
     203        if (((*exp) >= FLOAT64_MAX_EXPONENT ) || ((*exp) < 0)) {
    327204                /* overflow - set infinity as result */
    328205                (*exp) = FLOAT64_MAX_EXPONENT;
    329206                (*fraction) = 0;
    330         }
    331 }
    332 
    333 /**
    334  * Round and normalize number expressed by exponent and fraction with
    335  * first bit (equal to hidden bit) at 126th bit.
    336  *
    337  * @param exp Exponent part.
    338  * @param frac_hi High part of fraction part with hidden bit shifted to 126th bit.
    339  * @param frac_lo Low part of fraction part with hidden bit shifted to 126th bit.
    340  */
    341 void roundFloat128(int32_t *exp, uint64_t *frac_hi, uint64_t *frac_lo)
    342 {
    343         uint64_t tmp_hi, tmp_lo;
    344 
    345         /* rounding - if first bit after fraction is set then round up */
    346         lshift128(0x0ll, 0x1ll, (128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo);
    347         add128(*frac_hi, *frac_lo, tmp_hi, tmp_lo, frac_hi, frac_lo);
    348 
    349         lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
    350             (128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo);
    351         and128(*frac_hi, *frac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
    352         if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
    353                 /* rounding overflow */
    354                 ++(*exp);
    355                 rshift128(*frac_hi, *frac_lo, 1, frac_hi, frac_lo);
    356         }
    357 
    358         if (((*exp) >= FLOAT128_MAX_EXPONENT) || ((*exp) < 0)) {
    359                 /* overflow - set infinity as result */
    360                 (*exp) = FLOAT128_MAX_EXPONENT;
    361                 (*frac_hi) = 0;
    362                 (*frac_lo) = 0;
    363         }
    364 }
    365 
    366 /**
    367  * Logical shift left on the 128-bit operand.
    368  *
    369  * @param a_hi High part of the input operand.
    370  * @param a_lo Low part of the input operand.
    371  * @param shift Number of bits by witch to shift.
    372  * @param r_hi Address to store high part of the result.
    373  * @param r_lo Address to store low part of the result.
    374  */
    375 void lshift128(
    376     uint64_t a_hi, uint64_t a_lo, int shift,
    377     uint64_t *r_hi, uint64_t *r_lo)
    378 {
    379         if (shift <= 0) {
    380                 /* do nothing */
    381         } else if (shift >= 128) {
    382                 a_hi = 0;
    383                 a_lo = 0;
    384         } else if (shift >= 64) {
    385                 a_hi = a_lo << (shift - 64);
    386                 a_lo = 0;
    387         } else {
    388                 a_hi <<= shift;
    389                 a_hi |= a_lo >> (64 - shift);
    390                 a_lo <<= shift;
    391         }
    392 
    393         *r_hi = a_hi;
    394         *r_lo = a_lo;
    395 }
    396 
    397 /**
    398  * Logical shift right on the 128-bit operand.
    399  *
    400  * @param a_hi High part of the input operand.
    401  * @param a_lo Low part of the input operand.
    402  * @param shift Number of bits by witch to shift.
    403  * @param r_hi Address to store high part of the result.
    404  * @param r_lo Address to store low part of the result.
    405  */
    406 void rshift128(
    407     uint64_t a_hi, uint64_t a_lo, int shift,
    408     uint64_t *r_hi, uint64_t *r_lo)
    409 {
    410         if (shift <= 0) {
    411                 /* do nothing */
    412         } else  if (shift >= 128) {
    413                 a_hi = 0;
    414                 a_lo = 0;
    415         } else if (shift >= 64) {
    416                 a_lo = a_hi >> (shift - 64);
    417                 a_hi = 0;
    418         } else {
    419                 a_lo >>= shift;
    420                 a_lo |= a_hi << (64 - shift);
    421                 a_hi >>= shift;
    422         }
    423 
    424         *r_hi = a_hi;
    425         *r_lo = a_lo;
    426 }
    427 
    428 /**
    429  * Bitwise AND on 128-bit operands.
    430  *
    431  * @param a_hi High part of the first input operand.
    432  * @param a_lo Low part of the first input operand.
    433  * @param b_hi High part of the second input operand.
    434  * @param b_lo Low part of the second input operand.
    435  * @param r_hi Address to store high part of the result.
    436  * @param r_lo Address to store low part of the result.
    437  */
    438 void and128(
    439     uint64_t a_hi, uint64_t a_lo,
    440     uint64_t b_hi, uint64_t b_lo,
    441     uint64_t *r_hi, uint64_t *r_lo)
    442 {
    443         *r_hi = a_hi & b_hi;
    444         *r_lo = a_lo & b_lo;
    445 }
    446 
    447 /**
    448  * Bitwise inclusive OR on 128-bit operands.
    449  *
    450  * @param a_hi High part of the first input operand.
    451  * @param a_lo Low part of the first input operand.
    452  * @param b_hi High part of the second input operand.
    453  * @param b_lo Low part of the second input operand.
    454  * @param r_hi Address to store high part of the result.
    455  * @param r_lo Address to store low part of the result.
    456  */
    457 void or128(
    458     uint64_t a_hi, uint64_t a_lo,
    459     uint64_t b_hi, uint64_t b_lo,
    460     uint64_t *r_hi, uint64_t *r_lo)
    461 {
    462         *r_hi = a_hi | b_hi;
    463         *r_lo = a_lo | b_lo;
    464 }
    465 
    466 /**
    467  * Bitwise exclusive OR on 128-bit operands.
    468  *
    469  * @param a_hi High part of the first input operand.
    470  * @param a_lo Low part of the first input operand.
    471  * @param b_hi High part of the second input operand.
    472  * @param b_lo Low part of the second input operand.
    473  * @param r_hi Address to store high part of the result.
    474  * @param r_lo Address to store low part of the result.
    475  */
    476 void xor128(
    477     uint64_t a_hi, uint64_t a_lo,
    478     uint64_t b_hi, uint64_t b_lo,
    479     uint64_t *r_hi, uint64_t *r_lo)
    480 {
    481         *r_hi = a_hi ^ b_hi;
    482         *r_lo = a_lo ^ b_lo;
    483 }
    484 
    485 /**
    486  * Bitwise NOT on the 128-bit operand.
    487  *
    488  * @param a_hi High part of the input operand.
    489  * @param a_lo Low part of the input operand.
    490  * @param r_hi Address to store high part of the result.
    491  * @param r_lo Address to store low part of the result.
    492  */
    493 void not128(
    494     uint64_t a_hi, uint64_t a_lo,
    495         uint64_t *r_hi, uint64_t *r_lo)
    496 {
    497         *r_hi = ~a_hi;
    498         *r_lo = ~a_lo;
    499 }
    500 
    501 /**
    502  * Equality comparison of 128-bit operands.
    503  *
    504  * @param a_hi High part of the first input operand.
    505  * @param a_lo Low part of the first input operand.
    506  * @param b_hi High part of the second input operand.
    507  * @param b_lo Low part of the second input operand.
    508  * @return 1 if operands are equal, 0 otherwise.
    509  */
    510 int eq128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
    511 {
    512         return (a_hi == b_hi) && (a_lo == b_lo);
    513 }
    514 
    515 /**
    516  * Lower-or-equal comparison of 128-bit operands.
    517  *
    518  * @param a_hi High part of the first input operand.
    519  * @param a_lo Low part of the first input operand.
    520  * @param b_hi High part of the second input operand.
    521  * @param b_lo Low part of the second input operand.
    522  * @return 1 if a is lower or equal to b, 0 otherwise.
    523  */
    524 int le128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
    525 {
    526         return (a_hi < b_hi) || ((a_hi == b_hi) && (a_lo <= b_lo));
    527 }
    528 
    529 /**
    530  * Lower-than comparison of 128-bit operands.
    531  *
    532  * @param a_hi High part of the first input operand.
    533  * @param a_lo Low part of the first input operand.
    534  * @param b_hi High part of the second input operand.
    535  * @param b_lo Low part of the second input operand.
    536  * @return 1 if a is lower than b, 0 otherwise.
    537  */
    538 int lt128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
    539 {
    540         return (a_hi < b_hi) || ((a_hi == b_hi) && (a_lo < b_lo));
    541 }
    542 
    543 /**
    544  * Addition of two 128-bit unsigned integers.
    545  *
    546  * @param a_hi High part of the first input operand.
    547  * @param a_lo Low part of the first input operand.
    548  * @param b_hi High part of the second input operand.
    549  * @param b_lo Low part of the second input operand.
    550  * @param r_hi Address to store high part of the result.
    551  * @param r_lo Address to store low part of the result.
    552  */
    553 void add128(uint64_t a_hi, uint64_t a_lo,
    554     uint64_t b_hi, uint64_t b_lo,
    555     uint64_t *r_hi, uint64_t *r_lo)
    556 {
    557         uint64_t low = a_lo + b_lo;
    558         *r_lo = low;
    559         /* detect overflow to add a carry */
    560         *r_hi = a_hi + b_hi + (low < a_lo);
    561 }
    562 
    563 /**
    564  * Substraction of two 128-bit unsigned integers.
    565  *
    566  * @param a_hi High part of the first input operand.
    567  * @param a_lo Low part of the first input operand.
    568  * @param b_hi High part of the second input operand.
    569  * @param b_lo Low part of the second input operand.
    570  * @param r_hi Address to store high part of the result.
    571  * @param r_lo Address to store low part of the result.
    572  */
    573 void sub128(uint64_t a_hi, uint64_t a_lo,
    574     uint64_t b_hi, uint64_t b_lo,
    575     uint64_t *r_hi, uint64_t *r_lo)
    576 {
    577         *r_lo = a_lo - b_lo;
    578         /* detect underflow to substract a carry */
    579         *r_hi = a_hi - b_hi - (a_lo < b_lo);
    580 }
    581 
    582 /**
    583  * Multiplication of two 64-bit unsigned integers.
    584  *
    585  * @param a First input operand.
    586  * @param b Second input operand.
    587  * @param r_hi Address to store high part of the result.
    588  * @param r_lo Address to store low part of the result.
    589  */
    590 void mul64(uint64_t a, uint64_t b, uint64_t *r_hi, uint64_t *r_lo)
    591 {
    592         uint64_t low, high, middle1, middle2;
    593         uint32_t alow, blow;
    594 
    595         alow = a & 0xFFFFFFFF;
    596         blow = b & 0xFFFFFFFF;
    597 
    598         a >>= 32;
    599         b >>= 32;
    600 
    601         low = ((uint64_t) alow) * blow;
    602         middle1 = a * blow;
    603         middle2 = alow * b;
    604         high = a * b;
    605 
    606         middle1 += middle2;
    607         high += (((uint64_t) (middle1 < middle2)) << 32) + (middle1 >> 32);
    608         middle1 <<= 32;
    609         low += middle1;
    610         high += (low < middle1);
    611         *r_lo = low;
    612         *r_hi = high;
    613 }
    614 
    615 /**
    616  * Multiplication of two 128-bit unsigned integers.
    617  *
    618  * @param a_hi High part of the first input operand.
    619  * @param a_lo Low part of the first input operand.
    620  * @param b_hi High part of the second input operand.
    621  * @param b_lo Low part of the second input operand.
    622  * @param r_hihi Address to store first (highest) quarter of the result.
    623  * @param r_hilo Address to store second quarter of the result.
    624  * @param r_lohi Address to store third quarter of the result.
    625  * @param r_lolo Address to store fourth (lowest) quarter of the result.
    626  */
    627 void mul128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo,
    628     uint64_t *r_hihi, uint64_t *r_hilo, uint64_t *r_lohi, uint64_t *r_lolo)
    629 {
    630         uint64_t hihi, hilo, lohi, lolo;
    631         uint64_t tmp1, tmp2;
    632 
    633         mul64(a_lo, b_lo, &lohi, &lolo);
    634         mul64(a_lo, b_hi, &hilo, &tmp2);
    635         add128(hilo, tmp2, 0x0ll, lohi, &hilo, &lohi);
    636         mul64(a_hi, b_hi, &hihi, &tmp1);
    637         add128(hihi, tmp1, 0x0ll, hilo, &hihi, &hilo);
    638         mul64(a_hi, b_lo, &tmp1, &tmp2);
    639         add128(tmp1, tmp2, 0x0ll, lohi, &tmp1, &lohi);
    640         add128(hihi, hilo, 0x0ll, tmp1, &hihi, &hilo);
    641 
    642         *r_hihi = hihi;
    643         *r_hilo = hilo;
    644         *r_lohi = lohi;
    645         *r_lolo = lolo;
    646 }
    647 
    648 /**
    649  * Estimate the quotient of 128-bit unsigned divident and 64-bit unsigned
    650  * divisor.
    651  *
    652  * @param a_hi High part of the divident.
    653  * @param a_lo Low part of the divident.
    654  * @param b Divisor.
    655  * @return Quotient approximation.
    656  */
    657 uint64_t div128est(uint64_t a_hi, uint64_t a_lo, uint64_t b)
    658 {
    659         uint64_t b_hi, b_lo;
    660         uint64_t rem_hi, rem_lo;
    661         uint64_t tmp_hi, tmp_lo;
    662         uint64_t result;
    663 
    664         if (b <= a_hi) {
    665                 return 0xFFFFFFFFFFFFFFFFull;
    666         }
    667 
    668         b_hi = b >> 32;
    669         result = ((b_hi << 32) <= a_hi) ? (0xFFFFFFFFull << 32) : (a_hi / b_hi) << 32;
    670         mul64(b, result, &tmp_hi, &tmp_lo);
    671         sub128(a_hi, a_lo, tmp_hi, tmp_lo, &rem_hi, &rem_lo);
    672        
    673         while ((int64_t) rem_hi < 0) {
    674                 result -= 0x1ll << 32;
    675                 b_lo = b << 32;
    676                 add128(rem_hi, rem_lo, b_hi, b_lo, &rem_hi, &rem_lo);
    677         }
    678 
    679         rem_hi = (rem_hi << 32) | (rem_lo >> 32);
    680         if ((b_hi << 32) <= rem_hi) {
    681                 result |= 0xFFFFFFFF;
    682         } else {
    683                 result |= rem_hi / b_hi;
    684         }
    685 
    686         return result;
     207                return;
     208        }
     209
     210        return;
    687211}
    688212
Note: See TracChangeset for help on using the changeset viewer.