Ignore:
Timestamp:
2011-08-06T07:04:50Z (13 years ago)
Author:
Petr Koupy <petr.koupy@…>
Branches:
lfn, master, serial, ticket/834-toolchain-update, topic/msim-upgrade, topic/simplify-dev-export
Children:
d3e241a, e0e922d
Parents:
9a6034a
Message:

Quadruple-precision softfloat, coding style improvements. Details below…

Highlights:

  • completed double-precision support
  • added quadruple-precision support
  • added SPARC quadruple-precision wrappers
  • added doxygen comments
  • corrected and unified coding style

Current state of the softfloat library:

Support for single, double and quadruple precision is currently almost complete (apart from power, square root, complex multiplication and complex division) and provides the same set of features (i.e. the support for all three precisions is now aligned). In order to extend softfloat library consistently, addition of quadruple precision was done in the same spirit as already existing single and double precision written by Josef Cejka in 2006 - that is relaxed standard-compliance for corner cases while mission-critical code sections heavily inspired by the widely used softfloat library written by John R. Hauser (although I personally think it would be more appropriate for HelenOS to use something less optimized, shorter and more readable).

Most of the quadruple-precision code is just an adapted double-precision code to work on 128-bit variables. That means if there is TODO, FIXME or some defect in single or double-precision code, it is most likely also in the quadruple-precision code. Please note that quadruple-precision functions are currently not tested - it is challenging task for itself, especially when the ports that use them are either not finished (mips64) or badly supported by simulators (sparc64). To test whole softfloat library, one would probably have to either write very non-trivial native tester, or use some existing one (e.g. TestFloat from J. R. Hauser) and port it to HelenOS (or rip the softfloat library out of HelenOS and test it on a host system). At the time of writing this, the code dependent on quadruple-precision functions (on mips64 and sparc64) is just a libposix strtold() function (and its callers, most notably scanf backend).

File:
1 edited

Legend:

Unmodified
Added
Removed
  • uspace/lib/softfloat/generic/common.c

    r9a6034a rc67aff2  
    11/*
    22 * Copyright (c) 2005 Josef Cejka
     3 * Copyright (c) 2011 Petr Koupy
    34 * All rights reserved.
    45 *
     
    3031 * @{
    3132 */
    32 /** @file
     33/** @file Common helper operations.
    3334 */
    3435
     
    3637#include <common.h>
    3738
    38 /* Table for fast leading zeroes counting */
     39/* Table for fast leading zeroes counting. */
    3940char zeroTable[256] = {
    4041        8, 7, 7, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, \
     
    5657};
    5758
    58 
    59 
    60 /** Take fraction shifted by 10 bits to left, round it, normalize it and detect exceptions
    61  * @param cexp exponent with bias
    62  * @param cfrac fraction shifted 10 places left with added hidden bit
    63  * @param sign
    64  * @return valied float64
     59/**
     60 * Take fraction shifted by 10 bits to the left, round it, normalize it
     61 * and detect exceptions
     62 *
     63 * @param cexp Exponent with bias.
     64 * @param cfrac Fraction shifted 10 bits to the left with added hidden bit.
     65 * @param sign Resulting sign.
     66 * @return Finished double-precision float.
    6567 */
    6668float64 finishFloat64(int32_t cexp, uint64_t cfrac, char sign)
     
    7173
    7274        /* find first nonzero digit and shift result and detect possibly underflow */
    73         while ((cexp > 0) && (cfrac) && (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1 ) )))) {
     75        while ((cexp > 0) && (cfrac) &&
     76            (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))))) {
    7477                cexp--;
    7578                cfrac <<= 1;
    76                         /* TODO: fix underflow */
    77         };
    78        
    79         if ((cexp < 0) || ( cexp == 0 && (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))))) {
     79                /* TODO: fix underflow */
     80        }
     81       
     82        if ((cexp < 0) || (cexp == 0 &&
     83            (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))))) {
    8084                /* FIXME: underflow */
    8185                result.parts.exp = 0;
     
    9397               
    9498                if (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))) {
    95                        
    96                         result.parts.fraction = ((cfrac >>(64 - FLOAT64_FRACTION_SIZE - 2) ) & (~FLOAT64_HIDDEN_BIT_MASK));
     99                        result.parts.fraction =
     100                            ((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK));
    97101                        return result;
    98102                }       
     
    103107        ++cexp;
    104108
    105         if (cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1 ))) {
     109        if (cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))) {
    106110                ++cexp;
    107111                cfrac >>= 1;
     
    109113
    110114        /* check overflow */
    111         if (cexp >= FLOAT64_MAX_EXPONENT ) {
     115        if (cexp >= FLOAT64_MAX_EXPONENT) {
    112116                /* FIXME: overflow, return infinity */
    113117                result.parts.exp = FLOAT64_MAX_EXPONENT;
     
    116120        }
    117121
    118         result.parts.exp = (uint32_t)cexp;
    119        
    120         result.parts.fraction = ((cfrac >>(64 - FLOAT64_FRACTION_SIZE - 2 ) ) & (~FLOAT64_HIDDEN_BIT_MASK));
     122        result.parts.exp = (uint32_t) cexp;
     123       
     124        result.parts.fraction =
     125            ((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK));
    121126       
    122127        return result; 
    123128}
    124129
    125 /** Counts leading zeroes in 64bit unsigned integer
    126  * @param i
     130/**
     131 * Take fraction, round it, normalize it and detect exceptions
     132 *
     133 * @param cexp Exponent with bias.
     134 * @param cfrac_hi High part of the fraction shifted 14 bits to the left
     135 *     with added hidden bit.
     136 * @param cfrac_lo Low part of the fraction shifted 14 bits to the left
     137 *     with added hidden bit.
     138 * @param sign Resulting sign.
     139 * @param shift_out Bits right-shifted out from fraction by the caller.
     140 * @return Finished quadruple-precision float.
     141 */
     142float128 finishFloat128(int32_t cexp, uint64_t cfrac_hi, uint64_t cfrac_lo,
     143    char sign, uint64_t shift_out)
     144{
     145        float128 result;
     146        uint64_t tmp_hi, tmp_lo;
     147
     148        result.parts.sign = sign;
     149
     150        /* find first nonzero digit and shift result and detect possibly underflow */
     151        lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
     152            1, &tmp_hi, &tmp_lo);
     153        and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
     154        while ((cexp > 0) && (lt128(0x0ll, 0x0ll, cfrac_hi, cfrac_lo)) &&
     155            (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo))) {
     156                cexp--;
     157                lshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
     158                /* TODO: fix underflow */
     159
     160                lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
     161                    1, &tmp_hi, &tmp_lo);
     162                and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
     163        }
     164
     165        lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
     166            1, &tmp_hi, &tmp_lo);
     167        and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
     168        if ((cexp < 0) || (cexp == 0 &&
     169            (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)))) {
     170                /* FIXME: underflow */
     171                result.parts.exp = 0;
     172                if ((cexp + FLOAT128_FRACTION_SIZE + 1) < 0) { /* +1 is place for rounding */
     173                        result.parts.frac_hi = 0x0ll;
     174                        result.parts.frac_lo = 0x0ll;
     175                        return result;
     176                }
     177
     178                while (cexp < 0) {
     179                        cexp++;
     180                        rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
     181                }
     182
     183                if (shift_out & (0x1ull < 64)) {
     184                        add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo);
     185                }
     186
     187                lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
     188                    1, &tmp_hi, &tmp_lo);
     189                and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
     190                if (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
     191                        not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
     192                            &tmp_hi, &tmp_lo);
     193                        and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
     194                        result.parts.frac_hi = tmp_hi;
     195                        result.parts.frac_lo = tmp_lo;
     196                        return result;
     197                }
     198        } else {
     199                if (shift_out & (0x1ull < 64)) {
     200                        add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo);
     201                }
     202        }
     203
     204        ++cexp;
     205
     206        lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
     207            1, &tmp_hi, &tmp_lo);
     208        and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
     209        if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
     210                ++cexp;
     211                rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
     212        }
     213
     214        /* check overflow */
     215        if (cexp >= FLOAT128_MAX_EXPONENT) {
     216                /* FIXME: overflow, return infinity */
     217                result.parts.exp = FLOAT128_MAX_EXPONENT;
     218                result.parts.frac_hi = 0x0ll;
     219                result.parts.frac_lo = 0x0ll;
     220                return result;
     221        }
     222
     223        result.parts.exp = (uint32_t) cexp;
     224
     225        not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
     226            &tmp_hi, &tmp_lo);
     227        and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
     228        result.parts.frac_hi = tmp_hi;
     229        result.parts.frac_lo = tmp_lo;
     230
     231        return result; 
     232}
     233
     234/**
     235 * Counts leading zeroes in byte.
     236 *
     237 * @param i Byte for which to count leading zeroes.
     238 * @return Number of detected leading zeroes.
     239 */
     240int countZeroes8(uint8_t i)
     241{
     242        return zeroTable[i];
     243}
     244
     245/**
     246 * Counts leading zeroes in 32bit unsigned integer.
     247 *
     248 * @param i Integer for which to count leading zeroes.
     249 * @return Number of detected leading zeroes.
     250 */
     251int countZeroes32(uint32_t i)
     252{
     253        int j;
     254        for (j = 0; j < 32; j += 8) {
     255                if (i & (0xFF << (24 - j))) {
     256                        return (j + countZeroes8(i >> (24 - j)));
     257                }
     258        }
     259
     260        return 32;
     261}
     262
     263/**
     264 * Counts leading zeroes in 64bit unsigned integer.
     265 *
     266 * @param i Integer for which to count leading zeroes.
     267 * @return Number of detected leading zeroes.
    127268 */
    128269int countZeroes64(uint64_t i)
    129270{
    130271        int j;
    131         for (j =0; j < 64; j += 8) {
    132                 if ( i & (0xFFll << (56 - j))) {
     272        for (j = 0; j < 64; j += 8) {
     273                if (i & (0xFFll << (56 - j))) {
    133274                        return (j + countZeroes8(i >> (56 - j)));
    134275                }
     
    138279}
    139280
    140 /** Counts leading zeroes in 32bit unsigned integer
    141  * @param i
    142  */
    143 int countZeroes32(uint32_t i)
    144 {
    145         int j;
    146         for (j =0; j < 32; j += 8) {
    147                 if ( i & (0xFF << (24 - j))) {
    148                         return (j + countZeroes8(i >> (24 - j)));
    149                 }
    150         }
    151 
    152         return 32;
    153 }
    154 
    155 /** Counts leading zeroes in byte
    156  * @param i
    157  */
    158 int countZeroes8(uint8_t i)
    159 {
    160         return zeroTable[i];
    161 }
    162 
    163 /** Round and normalize number expressed by exponent and fraction with first bit (equal to hidden bit) at 30. bit
    164  * @param exp exponent
    165  * @param fraction part with hidden bit shifted to 30. bit
     281/**
     282 * Round and normalize number expressed by exponent and fraction with
     283 * first bit (equal to hidden bit) at 30th bit.
     284 *
     285 * @param exp Exponent part.
     286 * @param fraction Fraction with hidden bit shifted to 30th bit.
    166287 */
    167288void roundFloat32(int32_t *exp, uint32_t *fraction)
    168289{
    169290        /* rounding - if first bit after fraction is set then round up */
    170         (*fraction) += (0x1 << 6);
    171        
    172         if ((*fraction) & (FLOAT32_HIDDEN_BIT_MASK << 8)) {
     291        (*fraction) += (0x1 << (32 - FLOAT32_FRACTION_SIZE - 3));
     292       
     293        if ((*fraction) &
     294            (FLOAT32_HIDDEN_BIT_MASK << (32 - FLOAT32_FRACTION_SIZE - 1))) {
    173295                /* rounding overflow */
    174296                ++(*exp);
    175297                (*fraction) >>= 1;
    176         };
    177        
    178         if (((*exp) >= FLOAT32_MAX_EXPONENT ) || ((*exp) < 0)) {
     298        }
     299       
     300        if (((*exp) >= FLOAT32_MAX_EXPONENT) || ((*exp) < 0)) {
    179301                /* overflow - set infinity as result */
    180302                (*exp) = FLOAT32_MAX_EXPONENT;
    181303                (*fraction) = 0;
    182                 return;
    183         }
    184 
    185         return;
    186 }
    187 
    188 /** Round and normalize number expressed by exponent and fraction with first bit (equal to hidden bit) at 62. bit
    189  * @param exp exponent
    190  * @param fraction part with hidden bit shifted to 62. bit
     304        }
     305}
     306
     307/**
     308 * Round and normalize number expressed by exponent and fraction with
     309 * first bit (equal to hidden bit) at 62nd bit.
     310 *
     311 * @param exp Exponent part.
     312 * @param fraction Fraction with hidden bit shifted to 62nd bit.
    191313 */
    192314void roundFloat64(int32_t *exp, uint64_t *fraction)
    193315{
    194316        /* rounding - if first bit after fraction is set then round up */
    195         (*fraction) += (0x1 << 9);
    196        
    197         if ((*fraction) & (FLOAT64_HIDDEN_BIT_MASK << 11)) {
     317        (*fraction) += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
     318       
     319        if ((*fraction) &
     320            (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 3))) {
    198321                /* rounding overflow */
    199322                ++(*exp);
    200323                (*fraction) >>= 1;
    201         };
    202        
    203         if (((*exp) >= FLOAT64_MAX_EXPONENT ) || ((*exp) < 0)) {
     324        }
     325       
     326        if (((*exp) >= FLOAT64_MAX_EXPONENT) || ((*exp) < 0)) {
    204327                /* overflow - set infinity as result */
    205328                (*exp) = FLOAT64_MAX_EXPONENT;
    206329                (*fraction) = 0;
    207                 return;
    208         }
    209 
    210         return;
     330        }
     331}
     332
     333/**
     334 * Round and normalize number expressed by exponent and fraction with
     335 * first bit (equal to hidden bit) at 126th bit.
     336 *
     337 * @param exp Exponent part.
     338 * @param frac_hi High part of fraction part with hidden bit shifted to 126th bit.
     339 * @param frac_lo Low part of fraction part with hidden bit shifted to 126th bit.
     340 */
     341void roundFloat128(int32_t *exp, uint64_t *frac_hi, uint64_t *frac_lo)
     342{
     343        uint64_t tmp_hi, tmp_lo;
     344
     345        /* rounding - if first bit after fraction is set then round up */
     346        lshift128(0x0ll, 0x1ll, (128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo);
     347        add128(*frac_hi, *frac_lo, tmp_hi, tmp_lo, frac_hi, frac_lo);
     348
     349        lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
     350            (128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo);
     351        and128(*frac_hi, *frac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
     352        if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
     353                /* rounding overflow */
     354                ++(*exp);
     355                rshift128(*frac_hi, *frac_lo, 1, frac_hi, frac_lo);
     356        }
     357
     358        if (((*exp) >= FLOAT128_MAX_EXPONENT) || ((*exp) < 0)) {
     359                /* overflow - set infinity as result */
     360                (*exp) = FLOAT128_MAX_EXPONENT;
     361                (*frac_hi) = 0;
     362                (*frac_lo) = 0;
     363        }
     364}
     365
     366/**
     367 * Logical shift left on the 128-bit operand.
     368 *
     369 * @param a_hi High part of the input operand.
     370 * @param a_lo Low part of the input operand.
     371 * @param shift Number of bits by witch to shift.
     372 * @param r_hi Address to store high part of the result.
     373 * @param r_lo Address to store low part of the result.
     374 */
     375void lshift128(
     376    uint64_t a_hi, uint64_t a_lo, int shift,
     377    uint64_t *r_hi, uint64_t *r_lo)
     378{
     379        if (shift <= 0) {
     380                /* do nothing */
     381        } else if (shift >= 128) {
     382                a_hi = 0;
     383                a_lo = 0;
     384        } else if (shift >= 64) {
     385                a_hi = a_lo << (shift - 64);
     386                a_lo = 0;
     387        } else {
     388                a_hi <<= shift;
     389                a_hi |= a_lo >> (64 - shift);
     390                a_lo <<= shift;
     391        }
     392
     393        *r_hi = a_hi;
     394        *r_lo = a_lo;
     395}
     396
     397/**
     398 * Logical shift right on the 128-bit operand.
     399 *
     400 * @param a_hi High part of the input operand.
     401 * @param a_lo Low part of the input operand.
     402 * @param shift Number of bits by witch to shift.
     403 * @param r_hi Address to store high part of the result.
     404 * @param r_lo Address to store low part of the result.
     405 */
     406void rshift128(
     407    uint64_t a_hi, uint64_t a_lo, int shift,
     408    uint64_t *r_hi, uint64_t *r_lo)
     409{
     410        if (shift <= 0) {
     411                /* do nothing */
     412        } else  if (shift >= 128) {
     413                a_hi = 0;
     414                a_lo = 0;
     415        } else if (shift >= 64) {
     416                a_lo = a_hi >> (shift - 64);
     417                a_hi = 0;
     418        } else {
     419                a_lo >>= shift;
     420                a_lo |= a_hi << (64 - shift);
     421                a_hi >>= shift;
     422        }
     423
     424        *r_hi = a_hi;
     425        *r_lo = a_lo;
     426}
     427
     428/**
     429 * Bitwise AND on 128-bit operands.
     430 *
     431 * @param a_hi High part of the first input operand.
     432 * @param a_lo Low part of the first input operand.
     433 * @param b_hi High part of the second input operand.
     434 * @param b_lo Low part of the second input operand.
     435 * @param r_hi Address to store high part of the result.
     436 * @param r_lo Address to store low part of the result.
     437 */
     438void and128(
     439    uint64_t a_hi, uint64_t a_lo,
     440    uint64_t b_hi, uint64_t b_lo,
     441    uint64_t *r_hi, uint64_t *r_lo)
     442{
     443        *r_hi = a_hi & b_hi;
     444        *r_lo = a_lo & b_lo;
     445}
     446
     447/**
     448 * Bitwise inclusive OR on 128-bit operands.
     449 *
     450 * @param a_hi High part of the first input operand.
     451 * @param a_lo Low part of the first input operand.
     452 * @param b_hi High part of the second input operand.
     453 * @param b_lo Low part of the second input operand.
     454 * @param r_hi Address to store high part of the result.
     455 * @param r_lo Address to store low part of the result.
     456 */
     457void or128(
     458    uint64_t a_hi, uint64_t a_lo,
     459    uint64_t b_hi, uint64_t b_lo,
     460    uint64_t *r_hi, uint64_t *r_lo)
     461{
     462        *r_hi = a_hi | b_hi;
     463        *r_lo = a_lo | b_lo;
     464}
     465
     466/**
     467 * Bitwise exclusive OR on 128-bit operands.
     468 *
     469 * @param a_hi High part of the first input operand.
     470 * @param a_lo Low part of the first input operand.
     471 * @param b_hi High part of the second input operand.
     472 * @param b_lo Low part of the second input operand.
     473 * @param r_hi Address to store high part of the result.
     474 * @param r_lo Address to store low part of the result.
     475 */
     476void xor128(
     477    uint64_t a_hi, uint64_t a_lo,
     478    uint64_t b_hi, uint64_t b_lo,
     479    uint64_t *r_hi, uint64_t *r_lo)
     480{
     481        *r_hi = a_hi ^ b_hi;
     482        *r_lo = a_lo ^ b_lo;
     483}
     484
     485/**
     486 * Bitwise NOT on the 128-bit operand.
     487 *
     488 * @param a_hi High part of the input operand.
     489 * @param a_lo Low part of the input operand.
     490 * @param r_hi Address to store high part of the result.
     491 * @param r_lo Address to store low part of the result.
     492 */
     493void not128(
     494    uint64_t a_hi, uint64_t a_lo,
     495        uint64_t *r_hi, uint64_t *r_lo)
     496{
     497        *r_hi = ~a_hi;
     498        *r_lo = ~a_lo;
     499}
     500
     501/**
     502 * Equality comparison of 128-bit operands.
     503 *
     504 * @param a_hi High part of the first input operand.
     505 * @param a_lo Low part of the first input operand.
     506 * @param b_hi High part of the second input operand.
     507 * @param b_lo Low part of the second input operand.
     508 * @return 1 if operands are equal, 0 otherwise.
     509 */
     510int eq128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
     511{
     512        return (a_hi == b_hi) && (a_lo == b_lo);
     513}
     514
     515/**
     516 * Lower-or-equal comparison of 128-bit operands.
     517 *
     518 * @param a_hi High part of the first input operand.
     519 * @param a_lo Low part of the first input operand.
     520 * @param b_hi High part of the second input operand.
     521 * @param b_lo Low part of the second input operand.
     522 * @return 1 if a is lower or equal to b, 0 otherwise.
     523 */
     524int le128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
     525{
     526        return (a_hi < b_hi) || ((a_hi == b_hi) && (a_lo <= b_lo));
     527}
     528
     529/**
     530 * Lower-than comparison of 128-bit operands.
     531 *
     532 * @param a_hi High part of the first input operand.
     533 * @param a_lo Low part of the first input operand.
     534 * @param b_hi High part of the second input operand.
     535 * @param b_lo Low part of the second input operand.
     536 * @return 1 if a is lower than b, 0 otherwise.
     537 */
     538int lt128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
     539{
     540        return (a_hi < b_hi) || ((a_hi == b_hi) && (a_lo < b_lo));
     541}
     542
     543/**
     544 * Addition of two 128-bit unsigned integers.
     545 *
     546 * @param a_hi High part of the first input operand.
     547 * @param a_lo Low part of the first input operand.
     548 * @param b_hi High part of the second input operand.
     549 * @param b_lo Low part of the second input operand.
     550 * @param r_hi Address to store high part of the result.
     551 * @param r_lo Address to store low part of the result.
     552 */
     553void add128(uint64_t a_hi, uint64_t a_lo,
     554    uint64_t b_hi, uint64_t b_lo,
     555    uint64_t *r_hi, uint64_t *r_lo)
     556{
     557        uint64_t low = a_lo + b_lo;
     558        *r_lo = low;
     559        /* detect overflow to add a carry */
     560        *r_hi = a_hi + b_hi + (low < a_lo);
     561}
     562
     563/**
     564 * Substraction of two 128-bit unsigned integers.
     565 *
     566 * @param a_hi High part of the first input operand.
     567 * @param a_lo Low part of the first input operand.
     568 * @param b_hi High part of the second input operand.
     569 * @param b_lo Low part of the second input operand.
     570 * @param r_hi Address to store high part of the result.
     571 * @param r_lo Address to store low part of the result.
     572 */
     573void sub128(uint64_t a_hi, uint64_t a_lo,
     574    uint64_t b_hi, uint64_t b_lo,
     575    uint64_t *r_hi, uint64_t *r_lo)
     576{
     577        *r_lo = a_lo - b_lo;
     578        /* detect underflow to substract a carry */
     579        *r_hi = a_hi - b_hi - (a_lo < b_lo);
     580}
     581
     582/**
     583 * Multiplication of two 64-bit unsigned integers.
     584 *
     585 * @param a First input operand.
     586 * @param b Second input operand.
     587 * @param r_hi Address to store high part of the result.
     588 * @param r_lo Address to store low part of the result.
     589 */
     590void mul64(uint64_t a, uint64_t b, uint64_t *r_hi, uint64_t *r_lo)
     591{
     592        uint64_t low, high, middle1, middle2;
     593        uint32_t alow, blow;
     594
     595        alow = a & 0xFFFFFFFF;
     596        blow = b & 0xFFFFFFFF;
     597
     598        a >>= 32;
     599        b >>= 32;
     600
     601        low = ((uint64_t) alow) * blow;
     602        middle1 = a * blow;
     603        middle2 = alow * b;
     604        high = a * b;
     605
     606        middle1 += middle2;
     607        high += (((uint64_t) (middle1 < middle2)) << 32) + (middle1 >> 32);
     608        middle1 <<= 32;
     609        low += middle1;
     610        high += (low < middle1);
     611        *r_lo = low;
     612        *r_hi = high;
     613}
     614
     615/**
     616 * Multiplication of two 128-bit unsigned integers.
     617 *
     618 * @param a_hi High part of the first input operand.
     619 * @param a_lo Low part of the first input operand.
     620 * @param b_hi High part of the second input operand.
     621 * @param b_lo Low part of the second input operand.
     622 * @param r_hihi Address to store first (highest) quarter of the result.
     623 * @param r_hilo Address to store second quarter of the result.
     624 * @param r_lohi Address to store third quarter of the result.
     625 * @param r_lolo Address to store fourth (lowest) quarter of the result.
     626 */
     627void mul128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo,
     628    uint64_t *r_hihi, uint64_t *r_hilo, uint64_t *r_lohi, uint64_t *r_lolo)
     629{
     630        uint64_t hihi, hilo, lohi, lolo;
     631        uint64_t tmp1, tmp2;
     632
     633        mul64(a_lo, b_lo, &lohi, &lolo);
     634        mul64(a_lo, b_hi, &hilo, &tmp2);
     635        add128(hilo, tmp2, 0x0ll, lohi, &hilo, &lohi);
     636        mul64(a_hi, b_hi, &hihi, &tmp1);
     637        add128(hihi, tmp1, 0x0ll, hilo, &hihi, &hilo);
     638        mul64(a_hi, b_lo, &tmp1, &tmp2);
     639        add128(tmp1, tmp2, 0x0ll, lohi, &tmp1, &lohi);
     640        add128(hihi, hilo, 0x0ll, tmp1, &hihi, &hilo);
     641
     642        *r_hihi = hihi;
     643        *r_hilo = hilo;
     644        *r_lohi = lohi;
     645        *r_lolo = lolo;
     646}
     647
     648/**
     649 * Estimate the quotient of 128-bit unsigned divident and 64-bit unsigned
     650 * divisor.
     651 *
     652 * @param a_hi High part of the divident.
     653 * @param a_lo Low part of the divident.
     654 * @param b Divisor.
     655 * @return Quotient approximation.
     656 */
     657uint64_t div128est(uint64_t a_hi, uint64_t a_lo, uint64_t b)
     658{
     659        uint64_t b_hi, b_lo;
     660        uint64_t rem_hi, rem_lo;
     661        uint64_t tmp_hi, tmp_lo;
     662        uint64_t result;
     663
     664        if (b <= a_hi) {
     665                return 0xFFFFFFFFFFFFFFFFull;
     666        }
     667
     668        b_hi = b >> 32;
     669        result = ((b_hi << 32) <= a_hi) ? (0xFFFFFFFFull << 32) : (a_hi / b_hi) << 32;
     670        mul64(b, result, &tmp_hi, &tmp_lo);
     671        sub128(a_hi, a_lo, tmp_hi, tmp_lo, &rem_hi, &rem_lo);
     672       
     673        while ((int64_t) rem_hi < 0) {
     674                result -= 0x1ll << 32;
     675                b_lo = b << 32;
     676                add128(rem_hi, rem_lo, b_hi, b_lo, &rem_hi, &rem_lo);
     677        }
     678
     679        rem_hi = (rem_hi << 32) | (rem_lo >> 32);
     680        if ((b_hi << 32) <= rem_hi) {
     681                result |= 0xFFFFFFFF;
     682        } else {
     683                result |= rem_hi / b_hi;
     684        }
     685
     686        return result;
    211687}
    212688
Note: See TracChangeset for help on using the changeset viewer.