/* * Copyright (c) 2005 Josef Cejka * Copyright (c) 2011 Petr Koupy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * - The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /** @addtogroup softfloat * @{ */ /** @file Substraction functions. */ #include "sub.h" #include "comparison.h" #include "common.h" #include "add.h" /** Subtract two single-precision floats with the same sign. * * @param a First input operand. * @param b Second input operand. * * @return Result of substraction. * */ float32 sub_float32(float32 a, float32 b) { int expdiff; uint32_t exp1, exp2, frac1, frac2; float32 result; result.bin = 0; expdiff = a.parts.exp - b.parts.exp; if ((expdiff < 0 ) || ((expdiff == 0) && (a.parts.fraction < b.parts.fraction))) { if (is_float32_nan(b)) { if (is_float32_signan(b)) { // TODO: fix SigNaN } return b; } if (b.parts.exp == FLOAT32_MAX_EXPONENT) { /* num -(+-inf) = -+inf */ b.parts.sign = !b.parts.sign; return b; } result.parts.sign = !a.parts.sign; frac1 = b.parts.fraction; exp1 = b.parts.exp; frac2 = a.parts.fraction; exp2 = a.parts.exp; expdiff *= -1; } else { if (is_float32_nan(a)) { if ((is_float32_signan(a)) || (is_float32_signan(b))) { // TODO: fix SigNaN } return a; } if (a.parts.exp == FLOAT32_MAX_EXPONENT) { if (b.parts.exp == FLOAT32_MAX_EXPONENT) { /* inf - inf => nan */ // TODO: fix exception result.bin = FLOAT32_NAN; return result; } return a; } result.parts.sign = a.parts.sign; frac1 = a.parts.fraction; exp1 = a.parts.exp; frac2 = b.parts.fraction; exp2 = b.parts.exp; } if (exp1 == 0) { /* both are denormalized */ result.parts.fraction = frac1 - frac2; if (result.parts.fraction > frac1) { // TODO: underflow exception return result; } result.parts.exp = 0; return result; } /* add hidden bit */ frac1 |= FLOAT32_HIDDEN_BIT_MASK; if (exp2 == 0) { /* denormalized */ --expdiff; } else { /* normalized */ frac2 |= FLOAT32_HIDDEN_BIT_MASK; } /* create some space for rounding */ frac1 <<= 6; frac2 <<= 6; if (expdiff > FLOAT32_FRACTION_SIZE + 1) goto done; frac1 = frac1 - (frac2 >> expdiff); done: /* TODO: find first nonzero digit and shift result and detect possibly underflow */ while ((exp1 > 0) && (!(frac1 & (FLOAT32_HIDDEN_BIT_MASK << 6 )))) { --exp1; frac1 <<= 1; /* TODO: fix underflow - frac1 == 0 does not necessary means underflow... */ } /* rounding - if first bit after fraction is set then round up */ frac1 += 0x20; if (frac1 & (FLOAT32_HIDDEN_BIT_MASK << 7)) { ++exp1; frac1 >>= 1; } /* Clear hidden bit and shift */ result.parts.fraction = ((frac1 >> 6) & (~FLOAT32_HIDDEN_BIT_MASK)); result.parts.exp = exp1; return result; } /** Subtract two double-precision floats with the same sign. * * @param a First input operand. * @param b Second input operand. * * @return Result of substraction. * */ float64 sub_float64(float64 a, float64 b) { int expdiff; uint32_t exp1, exp2; uint64_t frac1, frac2; float64 result; result.bin = 0; expdiff = a.parts.exp - b.parts.exp; if ((expdiff < 0 ) || ((expdiff == 0) && (a.parts.fraction < b.parts.fraction))) { if (is_float64_nan(b)) { if (is_float64_signan(b)) { // TODO: fix SigNaN } return b; } if (b.parts.exp == FLOAT64_MAX_EXPONENT) { /* num -(+-inf) = -+inf */ b.parts.sign = !b.parts.sign; return b; } result.parts.sign = !a.parts.sign; frac1 = b.parts.fraction; exp1 = b.parts.exp; frac2 = a.parts.fraction; exp2 = a.parts.exp; expdiff *= -1; } else { if (is_float64_nan(a)) { if (is_float64_signan(a) || is_float64_signan(b)) { // TODO: fix SigNaN } return a; } if (a.parts.exp == FLOAT64_MAX_EXPONENT) { if (b.parts.exp == FLOAT64_MAX_EXPONENT) { /* inf - inf => nan */ // TODO: fix exception result.bin = FLOAT64_NAN; return result; } return a; } result.parts.sign = a.parts.sign; frac1 = a.parts.fraction; exp1 = a.parts.exp; frac2 = b.parts.fraction; exp2 = b.parts.exp; } if (exp1 == 0) { /* both are denormalized */ result.parts.fraction = frac1 - frac2; if (result.parts.fraction > frac1) { // TODO: underflow exception return result; } result.parts.exp = 0; return result; } /* add hidden bit */ frac1 |= FLOAT64_HIDDEN_BIT_MASK; if (exp2 == 0) { /* denormalized */ --expdiff; } else { /* normalized */ frac2 |= FLOAT64_HIDDEN_BIT_MASK; } /* create some space for rounding */ frac1 <<= 6; frac2 <<= 6; if (expdiff > FLOAT64_FRACTION_SIZE + 1) goto done; frac1 = frac1 - (frac2 >> expdiff); done: /* TODO: find first nonzero digit and shift result and detect possibly underflow */ while ((exp1 > 0) && (!(frac1 & (FLOAT64_HIDDEN_BIT_MASK << 6 )))) { --exp1; frac1 <<= 1; /* TODO: fix underflow - frac1 == 0 does not necessary means underflow... */ } /* rounding - if first bit after fraction is set then round up */ frac1 += 0x20; if (frac1 & (FLOAT64_HIDDEN_BIT_MASK << 7)) { ++exp1; frac1 >>= 1; } /* Clear hidden bit and shift */ result.parts.fraction = ((frac1 >> 6) & (~FLOAT64_HIDDEN_BIT_MASK)); result.parts.exp = exp1; return result; } /** Subtract two quadruple-precision floats with the same sign. * * @param a First input operand. * @param b Second input operand. * * @return Result of substraction. * */ float128 sub_float128(float128 a, float128 b) { int expdiff; uint32_t exp1, exp2; uint64_t frac1_hi, frac1_lo, frac2_hi, frac2_lo, tmp_hi, tmp_lo; float128 result; result.bin.hi = 0; result.bin.lo = 0; expdiff = a.parts.exp - b.parts.exp; if ((expdiff < 0 ) || ((expdiff == 0) && lt128(a.parts.frac_hi, a.parts.frac_lo, b.parts.frac_hi, b.parts.frac_lo))) { if (is_float128_nan(b)) { if (is_float128_signan(b)) { // TODO: fix SigNaN } return b; } if (b.parts.exp == FLOAT128_MAX_EXPONENT) { /* num -(+-inf) = -+inf */ b.parts.sign = !b.parts.sign; return b; } result.parts.sign = !a.parts.sign; frac1_hi = b.parts.frac_hi; frac1_lo = b.parts.frac_lo; exp1 = b.parts.exp; frac2_hi = a.parts.frac_hi; frac2_lo = a.parts.frac_lo; exp2 = a.parts.exp; expdiff *= -1; } else { if (is_float128_nan(a)) { if (is_float128_signan(a) || is_float128_signan(b)) { // TODO: fix SigNaN } return a; } if (a.parts.exp == FLOAT128_MAX_EXPONENT) { if (b.parts.exp == FLOAT128_MAX_EXPONENT) { /* inf - inf => nan */ // TODO: fix exception result.bin.hi = FLOAT128_NAN_HI; result.bin.lo = FLOAT128_NAN_LO; return result; } return a; } result.parts.sign = a.parts.sign; frac1_hi = a.parts.frac_hi; frac1_lo = a.parts.frac_lo; exp1 = a.parts.exp; frac2_hi = b.parts.frac_hi; frac2_lo = b.parts.frac_lo; exp2 = b.parts.exp; } if (exp1 == 0) { /* both are denormalized */ sub128(frac1_hi, frac1_lo, frac2_hi, frac2_lo, &tmp_hi, &tmp_lo); result.parts.frac_hi = tmp_hi; result.parts.frac_lo = tmp_lo; if (lt128(frac1_hi, frac1_lo, result.parts.frac_hi, result.parts.frac_lo)) { // TODO: underflow exception return result; } result.parts.exp = 0; return result; } /* add hidden bit */ or128(frac1_hi, frac1_lo, FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, &frac1_hi, &frac1_lo); if (exp2 == 0) { /* denormalized */ --expdiff; } else { /* normalized */ or128(frac2_hi, frac2_lo, FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, &frac2_hi, &frac2_lo); } /* create some space for rounding */ lshift128(frac1_hi, frac1_lo, 6, &frac1_hi, &frac1_lo); lshift128(frac2_hi, frac2_lo, 6, &frac2_hi, &frac2_lo); if (expdiff > FLOAT128_FRACTION_SIZE + 1) goto done; rshift128(frac2_hi, frac2_lo, expdiff, &tmp_hi, &tmp_lo); sub128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &frac1_hi, &frac1_lo); done: /* TODO: find first nonzero digit and shift result and detect possibly underflow */ lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 6, &tmp_hi, &tmp_lo); and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); while ((exp1 > 0) && (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo))) { --exp1; lshift128(frac1_hi, frac1_lo, 1, &frac1_hi, &frac1_lo); /* TODO: fix underflow - frac1 == 0 does not necessary means underflow... */ lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 6, &tmp_hi, &tmp_lo); and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); } /* rounding - if first bit after fraction is set then round up */ add128(frac1_hi, frac1_lo, 0x0ll, 0x20ll, &frac1_hi, &frac1_lo); lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 7, &tmp_hi, &tmp_lo); and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) { ++exp1; rshift128(frac1_hi, frac1_lo, 1, &frac1_hi, &frac1_lo); } /* Clear hidden bit and shift */ rshift128(frac1_hi, frac1_lo, 6, &frac1_hi, &frac1_lo); not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, &tmp_hi, &tmp_lo); and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); result.parts.frac_hi = tmp_hi; result.parts.frac_lo = tmp_lo; result.parts.exp = exp1; return result; } #ifdef float32_t float32_t __subsf3(float32_t a, float32_t b) { float32_u ua; ua.val = a; float32_u ub; ub.val = b; float32_u res; if (ua.data.parts.sign != ub.data.parts.sign) { ub.data.parts.sign = !ub.data.parts.sign; res.data = add_float32(ua.data, ub.data); } else res.data = sub_float32(ua.data, ub.data); return res.val; } float32_t __aeabi_fsub(float32_t a, float32_t b) { float32_u ua; ua.val = a; float32_u ub; ub.val = b; float32_u res; if (ua.data.parts.sign != ub.data.parts.sign) { ub.data.parts.sign = !ub.data.parts.sign; res.data = add_float32(ua.data, ub.data); } else res.data = sub_float32(ua.data, ub.data); return res.val; } #endif #ifdef float64_t float64_t __subdf3(float64_t a, float64_t b) { float64_u ua; ua.val = a; float64_u ub; ub.val = b; float64_u res; if (ua.data.parts.sign != ub.data.parts.sign) { ub.data.parts.sign = !ub.data.parts.sign; res.data = add_float64(ua.data, ub.data); } else res.data = sub_float64(ua.data, ub.data); return res.val; } float64_t __aeabi_dsub(float64_t a, float64_t b) { float64_u ua; ua.val = a; float64_u ub; ub.val = b; float64_u res; if (ua.data.parts.sign != ub.data.parts.sign) { ub.data.parts.sign = !ub.data.parts.sign; res.data = add_float64(ua.data, ub.data); } else res.data = sub_float64(ua.data, ub.data); return res.val; } #endif #ifdef float128_t float128_t __subtf3(float128_t a, float128_t b) { float128_u ua; ua.val = a; float128_u ub; ub.val = b; float128_u res; if (ua.data.parts.sign != ub.data.parts.sign) { ub.data.parts.sign = !ub.data.parts.sign; res.data = add_float128(ua.data, ub.data); } else res.data = sub_float128(ua.data, ub.data); return res.val; } void _Qp_sub(float128_t *c, float128_t *a, float128_t *b) { *c = __subtf3(*a, *b); } #endif /** @} */