/*
 * Copyright (c) 2005 Josef Cejka
 * Copyright (c) 2011 Petr Koupy
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * - Redistributions of source code must retain the above copyright
 *   notice, this list of conditions and the following disclaimer.
 * - Redistributions in binary form must reproduce the above copyright
 *   notice, this list of conditions and the following disclaimer in the
 *   documentation and/or other materials provided with the distribution.
 * - The name of the author may not be used to endorse or promote products
 *   derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/** @addtogroup softfloat
 * @{
 */
/** @file Division functions.
 */

#include <sftypes.h>
#include <add.h>
#include <div.h>
#include <comparison.h>
#include <mul.h>
#include <common.h>

/**
 * Divide two single-precision floats.
 * 
 * @param a Nominator.
 * @param b Denominator.
 * @return Result of division.
 */
float32 divFloat32(float32 a, float32 b) 
{
	float32 result;
	int32_t aexp, bexp, cexp;
	uint64_t afrac, bfrac, cfrac;
	
	result.parts.sign = a.parts.sign ^ b.parts.sign;
	
	if (isFloat32NaN(a)) {
		if (isFloat32SigNaN(a)) {
			/*FIXME: SigNaN*/
		}
		/*NaN*/
		return a;
	}
	
	if (isFloat32NaN(b)) {
		if (isFloat32SigNaN(b)) {
			/*FIXME: SigNaN*/
		}
		/*NaN*/
		return b;
	}
	
	if (isFloat32Infinity(a)) {
		if (isFloat32Infinity(b)) {
			/*FIXME: inf / inf */
			result.binary = FLOAT32_NAN;
			return result;
		}
		/* inf / num */
		result.parts.exp = a.parts.exp;
		result.parts.fraction = a.parts.fraction;
		return result;
	}

	if (isFloat32Infinity(b)) {
		if (isFloat32Zero(a)) {
			/* FIXME 0 / inf */
			result.parts.exp = 0;
			result.parts.fraction = 0;
			return result;
		}
		/* FIXME: num / inf*/
		result.parts.exp = 0;
		result.parts.fraction = 0;
		return result;
	}
	
	if (isFloat32Zero(b)) {
		if (isFloat32Zero(a)) {
			/*FIXME: 0 / 0*/
			result.binary = FLOAT32_NAN;
			return result;
		}
		/* FIXME: division by zero */
		result.parts.exp = 0;
		result.parts.fraction = 0;
		return result;
	}
	
	afrac = a.parts.fraction;
	aexp = a.parts.exp;
	bfrac = b.parts.fraction;
	bexp = b.parts.exp;
	
	/* denormalized numbers */
	if (aexp == 0) {
		if (afrac == 0) {
			result.parts.exp = 0;
			result.parts.fraction = 0;
			return result;
		}

		/* normalize it*/
		afrac <<= 1;
		/* afrac is nonzero => it must stop */	
		while (!(afrac & FLOAT32_HIDDEN_BIT_MASK)) {
			afrac <<= 1;
			aexp--;
		}
	}

	if (bexp == 0) {
		bfrac <<= 1;
		/* bfrac is nonzero => it must stop */	
		while (!(bfrac & FLOAT32_HIDDEN_BIT_MASK)) {
			bfrac <<= 1;
			bexp--;
		}
	}

	afrac =	(afrac | FLOAT32_HIDDEN_BIT_MASK) << (32 - FLOAT32_FRACTION_SIZE - 1);
	bfrac =	(bfrac | FLOAT32_HIDDEN_BIT_MASK) << (32 - FLOAT32_FRACTION_SIZE);

	if (bfrac <= (afrac << 1)) {
		afrac >>= 1;
		aexp++;
	}
	
	cexp = aexp - bexp + FLOAT32_BIAS - 2;
	
	cfrac = (afrac << 32) / bfrac;
	if ((cfrac & 0x3F) == 0) { 
		cfrac |= (bfrac * cfrac != afrac << 32);
	}
	
	/* pack and round */
	
	/* find first nonzero digit and shift result and detect possibly underflow */
	while ((cexp > 0) && (cfrac) && (!(cfrac & (FLOAT32_HIDDEN_BIT_MASK << 7)))) {
		cexp--;
		cfrac <<= 1;
		/* TODO: fix underflow */
	}
	
	cfrac += (0x1 << 6); /* FIXME: 7 is not sure*/
	
	if (cfrac & (FLOAT32_HIDDEN_BIT_MASK << 7)) {
		++cexp;
		cfrac >>= 1;
	}	

	/* check overflow */
	if (cexp >= FLOAT32_MAX_EXPONENT) {
		/* FIXME: overflow, return infinity */
		result.parts.exp = FLOAT32_MAX_EXPONENT;
		result.parts.fraction = 0;
		return result;
	}

	if (cexp < 0) {
		/* FIXME: underflow */
		result.parts.exp = 0;
		if ((cexp + FLOAT32_FRACTION_SIZE) < 0) {
			result.parts.fraction = 0;
			return result;
		}
		cfrac >>= 1;
		while (cexp < 0) {
			cexp++;
			cfrac >>= 1;
		}	
	} else {
		result.parts.exp = (uint32_t) cexp;
	}
	
	result.parts.fraction = ((cfrac >> 6) & (~FLOAT32_HIDDEN_BIT_MASK)); 
	
	return result;	
}

/**
 * Divide two double-precision floats.
 *
 * @param a Nominator.
 * @param b Denominator.
 * @return Result of division.
 */
float64 divFloat64(float64 a, float64 b) 
{
	float64 result;
	int64_t aexp, bexp, cexp;
	uint64_t afrac, bfrac, cfrac; 
	uint64_t remlo, remhi;
	uint64_t tmplo, tmphi;
	
	result.parts.sign = a.parts.sign ^ b.parts.sign;
	
	if (isFloat64NaN(a)) {
		if (isFloat64SigNaN(b)) {
			/*FIXME: SigNaN*/
			return b;
		}
		
		if (isFloat64SigNaN(a)) {
			/*FIXME: SigNaN*/
		}
		/*NaN*/
		return a;
	}
	
	if (isFloat64NaN(b)) {
		if (isFloat64SigNaN(b)) {
			/*FIXME: SigNaN*/
		}
		/*NaN*/
		return b;
	}
	
	if (isFloat64Infinity(a)) {
		if (isFloat64Infinity(b) || isFloat64Zero(b)) {
			/*FIXME: inf / inf */
			result.binary = FLOAT64_NAN;
			return result;
		}
		/* inf / num */
		result.parts.exp = a.parts.exp;
		result.parts.fraction = a.parts.fraction;
		return result;
	}

	if (isFloat64Infinity(b)) {
		if (isFloat64Zero(a)) {
			/* FIXME 0 / inf */
			result.parts.exp = 0;
			result.parts.fraction = 0;
			return result;
		}
		/* FIXME: num / inf*/
		result.parts.exp = 0;
		result.parts.fraction = 0;
		return result;
	}
	
	if (isFloat64Zero(b)) {
		if (isFloat64Zero(a)) {
			/*FIXME: 0 / 0*/
			result.binary = FLOAT64_NAN;
			return result;
		}
		/* FIXME: division by zero */
		result.parts.exp = 0;
		result.parts.fraction = 0;
		return result;
	}

	afrac = a.parts.fraction;
	aexp = a.parts.exp;
	bfrac = b.parts.fraction;
	bexp = b.parts.exp;
	
	/* denormalized numbers */
	if (aexp == 0) {
		if (afrac == 0) {
			result.parts.exp = 0;
			result.parts.fraction = 0;
			return result;
		}

		/* normalize it*/
		aexp++;
		/* afrac is nonzero => it must stop */	
		while (!(afrac & FLOAT64_HIDDEN_BIT_MASK)) {
			afrac <<= 1;
			aexp--;
		}
	}

	if (bexp == 0) {
		bexp++;
		/* bfrac is nonzero => it must stop */	
		while (!(bfrac & FLOAT64_HIDDEN_BIT_MASK)) {
			bfrac <<= 1;
			bexp--;
		}
	}

	afrac =	(afrac | FLOAT64_HIDDEN_BIT_MASK) << (64 - FLOAT64_FRACTION_SIZE - 2);
	bfrac =	(bfrac | FLOAT64_HIDDEN_BIT_MASK) << (64 - FLOAT64_FRACTION_SIZE - 1);

	if (bfrac <= (afrac << 1)) {
		afrac >>= 1;
		aexp++;
	}
	
	cexp = aexp - bexp + FLOAT64_BIAS - 2; 
	
	cfrac = div128est(afrac, 0x0ll, bfrac);
	
	if ((cfrac & 0x1FF) <= 2) {
		mul64(bfrac, cfrac, &tmphi, &tmplo);
		sub128(afrac, 0x0ll, tmphi, tmplo, &remhi, &remlo);
		
		while ((int64_t) remhi < 0) {
			cfrac--;
			add128(remhi, remlo, 0x0ll, bfrac, &remhi, &remlo);
		}
		cfrac |= (remlo != 0);
	}
	
	/* round and shift */
	result = finishFloat64(cexp, cfrac, result.parts.sign);
	return result;
}

/**
 * Divide two quadruple-precision floats.
 *
 * @param a Nominator.
 * @param b Denominator.
 * @return Result of division.
 */
float128 divFloat128(float128 a, float128 b)
{
	float128 result;
	int64_t aexp, bexp, cexp;
	uint64_t afrac_hi, afrac_lo, bfrac_hi, bfrac_lo, cfrac_hi, cfrac_lo;
	uint64_t shift_out;
	uint64_t rem_hihi, rem_hilo, rem_lohi, rem_lolo;
	uint64_t tmp_hihi, tmp_hilo, tmp_lohi, tmp_lolo;

	result.parts.sign = a.parts.sign ^ b.parts.sign;

	if (isFloat128NaN(a)) {
		if (isFloat128SigNaN(b)) {
			/*FIXME: SigNaN*/
			return b;
		}

		if (isFloat128SigNaN(a)) {
			/*FIXME: SigNaN*/
		}
		/*NaN*/
		return a;
	}

	if (isFloat128NaN(b)) {
		if (isFloat128SigNaN(b)) {
			/*FIXME: SigNaN*/
		}
		/*NaN*/
		return b;
	}

	if (isFloat128Infinity(a)) {
		if (isFloat128Infinity(b) || isFloat128Zero(b)) {
			/*FIXME: inf / inf */
			result.binary.hi = FLOAT128_NAN_HI;
			result.binary.lo = FLOAT128_NAN_LO;
			return result;
		}
		/* inf / num */
		result.parts.exp = a.parts.exp;
		result.parts.frac_hi = a.parts.frac_hi;
		result.parts.frac_lo = a.parts.frac_lo;
		return result;
	}

	if (isFloat128Infinity(b)) {
		if (isFloat128Zero(a)) {
			/* FIXME 0 / inf */
			result.parts.exp = 0;
			result.parts.frac_hi = 0;
			result.parts.frac_lo = 0;
			return result;
		}
		/* FIXME: num / inf*/
		result.parts.exp = 0;
		result.parts.frac_hi = 0;
		result.parts.frac_lo = 0;
		return result;
	}

	if (isFloat128Zero(b)) {
		if (isFloat128Zero(a)) {
			/*FIXME: 0 / 0*/
			result.binary.hi = FLOAT128_NAN_HI;
			result.binary.lo = FLOAT128_NAN_LO;
			return result;
		}
		/* FIXME: division by zero */
		result.parts.exp = 0;
		result.parts.frac_hi = 0;
		result.parts.frac_lo = 0;
		return result;
	}

	afrac_hi = a.parts.frac_hi;
	afrac_lo = a.parts.frac_lo;
	aexp = a.parts.exp;
	bfrac_hi = b.parts.frac_hi;
	bfrac_lo = b.parts.frac_lo;
	bexp = b.parts.exp;

	/* denormalized numbers */
	if (aexp == 0) {
		if (eq128(afrac_hi, afrac_lo, 0x0ll, 0x0ll)) {
			result.parts.exp = 0;
			result.parts.frac_hi = 0;
			result.parts.frac_lo = 0;
			return result;
		}

		/* normalize it*/
		aexp++;
		/* afrac is nonzero => it must stop */
		and128(afrac_hi, afrac_lo,
		    FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
		    &tmp_hihi, &tmp_lolo);
		while (!lt128(0x0ll, 0x0ll, tmp_hihi, tmp_lolo)) {
			lshift128(afrac_hi, afrac_lo, 1, &afrac_hi, &afrac_lo);
			aexp--;
		}
	}

	if (bexp == 0) {
		bexp++;
		/* bfrac is nonzero => it must stop */
		and128(bfrac_hi, bfrac_lo,
		    FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
		    &tmp_hihi, &tmp_lolo);
		while (!lt128(0x0ll, 0x0ll, tmp_hihi, tmp_lolo)) {
			lshift128(bfrac_hi, bfrac_lo, 1, &bfrac_hi, &bfrac_lo);
			bexp--;
		}
	}

	or128(afrac_hi, afrac_lo,
	    FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
	    &afrac_hi, &afrac_lo);
	lshift128(afrac_hi, afrac_lo,
	    (128 - FLOAT128_FRACTION_SIZE - 1), &afrac_hi, &afrac_lo);
	or128(bfrac_hi, bfrac_lo,
	    FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
	    &bfrac_hi, &bfrac_lo);
	lshift128(bfrac_hi, bfrac_lo,
	    (128 - FLOAT128_FRACTION_SIZE - 1), &bfrac_hi, &bfrac_lo);

	if (le128(bfrac_hi, bfrac_lo, afrac_hi, afrac_lo)) {
		rshift128(afrac_hi, afrac_lo, 1, &afrac_hi, &afrac_lo);
		aexp++;
	}

	cexp = aexp - bexp + FLOAT128_BIAS - 2;

	cfrac_hi = div128est(afrac_hi, afrac_lo, bfrac_hi);

	mul128(bfrac_hi, bfrac_lo, 0x0ll, cfrac_hi,
	    &tmp_lolo /* dummy */, &tmp_hihi, &tmp_hilo, &tmp_lohi);

	/* sub192(afrac_hi, afrac_lo, 0, 
	 *     tmp_hihi, tmp_hilo, tmp_lohi
	 *     &rem_hihi, &rem_hilo, &rem_lohi); */
	sub128(afrac_hi, afrac_lo, tmp_hihi, tmp_hilo, &rem_hihi, &rem_hilo);
	if (tmp_lohi > 0) {
		sub128(rem_hihi, rem_hilo, 0x0ll, 0x1ll, &rem_hihi, &rem_hilo);
	}
	rem_lohi = -tmp_lohi;

	while ((int64_t) rem_hihi < 0) {
		--cfrac_hi;
		/* add192(rem_hihi, rem_hilo, rem_lohi, 
		 *     0, bfrac_hi, bfrac_lo,
		 *     &rem_hihi, &rem_hilo, &rem_lohi); */
		add128(rem_hilo, rem_lohi, bfrac_hi, bfrac_lo, &rem_hilo, &rem_lohi);
		if (lt128(rem_hilo, rem_lohi, bfrac_hi, bfrac_lo)) {
			++rem_hihi;
		}
	}

	cfrac_lo = div128est(rem_hilo, rem_lohi, bfrac_lo);

	if ((cfrac_lo & 0x3FFF) <= 4) {
		mul128(bfrac_hi, bfrac_lo, 0x0ll, cfrac_lo,
	        &tmp_hihi /* dummy */, &tmp_hilo, &tmp_lohi, &tmp_lolo);

		/* sub192(rem_hilo, rem_lohi, 0,
		 *     tmp_hilo, tmp_lohi, tmp_lolo,
		 *     &rem_hilo, &rem_lohi, &rem_lolo); */
		sub128(rem_hilo, rem_lohi, tmp_hilo, tmp_lohi, &rem_hilo, &rem_lohi);
		if (tmp_lolo > 0) {
			sub128(rem_hilo, rem_lohi, 0x0ll, 0x1ll, &rem_hilo, &rem_lohi);
		}
		rem_lolo = -tmp_lolo;

		while ((int64_t) rem_hilo < 0) {
			--cfrac_lo;
			/* add192(rem_hilo, rem_lohi, rem_lolo,
			 *     0, bfrac_hi, bfrac_lo,
			 *     &rem_hilo, &rem_lohi, &rem_lolo); */
			add128(rem_lohi, rem_lolo, bfrac_hi, bfrac_lo, &rem_lohi, &rem_lolo);
			if (lt128(rem_lohi, rem_lolo, bfrac_hi, bfrac_lo)) {
				++rem_hilo;
			}
		}

		cfrac_lo |= ((rem_hilo | rem_lohi | rem_lolo) != 0 );
	}

	shift_out = cfrac_lo << (64 - (128 - FLOAT128_FRACTION_SIZE - 1));
	rshift128(cfrac_hi, cfrac_lo, (128 - FLOAT128_FRACTION_SIZE - 1),
	    &cfrac_hi, &cfrac_lo);

	result = finishFloat128(cexp, cfrac_hi, cfrac_lo, result.parts.sign, shift_out);
	return result;
}

/** @}
 */
