Context Navigation

source: mainline/uspace/lib/softfloat/common.c@ 257feec

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 257feec was 7462674, checked in by Jakub Jermar <jakub@…>, 13 years ago
Fix the zeroTable once again. (Thanks Jiri Zarevucky for noticing this.)
Property mode set to `100644`
File size: 19.6 KB

Rev	Line
[e979fea]	1	/*
[df4ed85]	2	* Copyright (c) 2005 Josef Cejka
[c67aff2]	3	* Copyright (c) 2011 Petr Koupy
[e979fea]	4	* All rights reserved.
	5	*
	6	* Redistribution and use in source and binary forms, with or without
	7	* modification, are permitted provided that the following conditions
	8	* are met:
	9	*
	10	* - Redistributions of source code must retain the above copyright
	11	* notice, this list of conditions and the following disclaimer.
	12	* - Redistributions in binary form must reproduce the above copyright
	13	* notice, this list of conditions and the following disclaimer in the
	14	* documentation and/or other materials provided with the distribution.
	15	* - The name of the author may not be used to endorse or promote products
	16	* derived from this software without specific prior written permission.
	17	*
	18	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	19	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	20	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	21	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	22	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	23	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	24	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	25	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	26	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	27	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	28	*/
	29
[750636a]	30	/** @addtogroup softfloat
[846848a6]	31	* @{
	32	*/
[c67aff2]	33	/** @file Common helper operations.
[846848a6]	34	*/
	35
[2416085]	36	#include "sftypes.h"
	37	#include "common.h"
[e979fea]	38
[c67aff2]	39	/* Table for fast leading zeroes counting. */
[1d83419]	40	char zeroTable[256] = {
[7462674]	41	8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, \
[1d83419]	42	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, \
	43	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, \
	44	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, \
	45	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
	46	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
	47	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
	48	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
	49	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
	50	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
	51	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
	52	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
	53	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
	54	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
	55	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
	56	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
	57	};
	58
[2416085]	59	/**
[c67aff2]	60	* Take fraction shifted by 10 bits to the left, round it, normalize it
	61	* and detect exceptions
	62	*
	63	* @param cexp Exponent with bias.
	64	* @param cfrac Fraction shifted 10 bits to the left with added hidden bit.
	65	* @param sign Resulting sign.
	66	* @return Finished double-precision float.
[e979fea]	67	*/
[88d5c1e]	68	float64 finish_float64(int32_t cexp, uint64_t cfrac, char sign)
[e979fea]	69	{
	70	float64 result;
	71
	72	result.parts.sign = sign;
	73
	74	/* find first nonzero digit and shift result and detect possibly underflow */
[c67aff2]	75	while ((cexp > 0) && (cfrac) &&
	76	(!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))))) {
[2416085]	77	cexp--;
[e979fea]	78	cfrac <<= 1;
[c67aff2]	79	/* TODO: fix underflow */
	80	}
[e979fea]	81
[c67aff2]	82	if ((cexp < 0) \|\| (cexp == 0 &&
	83	(!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))))) {
[d3ca210]	84	/* FIXME: underflow */
	85	result.parts.exp = 0;
[f1f95f2]	86	if ((cexp + FLOAT64_FRACTION_SIZE + 1) < 0) { /* +1 is place for rounding */
[d3ca210]	87	result.parts.fraction = 0;
	88	return result;
	89	}
[f1f95f2]	90
[d3ca210]	91	while (cexp < 0) {
	92	cexp++;
	93	cfrac >>= 1;
	94	}
[f1f95f2]	95
	96	cfrac += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
	97
[1d83419]	98	if (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))) {
[c67aff2]	99	result.parts.fraction =
	100	((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK));
[f1f95f2]	101	return result;
	102	}
	103	} else {
	104	cfrac += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
[d3ca210]	105	}
	106
	107	++cexp;
[e979fea]	108
[c67aff2]	109	if (cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))) {
[e979fea]	110	++cexp;
	111	cfrac >>= 1;
[2416085]	112	}
[e979fea]	113
	114	/* check overflow */
[c67aff2]	115	if (cexp >= FLOAT64_MAX_EXPONENT) {
[e979fea]	116	/* FIXME: overflow, return infinity */
	117	result.parts.exp = FLOAT64_MAX_EXPONENT;
	118	result.parts.fraction = 0;
	119	return result;
	120	}
	121
[c67aff2]	122	result.parts.exp = (uint32_t) cexp;
[e979fea]	123
[c67aff2]	124	result.parts.fraction =
	125	((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK));
[e979fea]	126
	127	return result;
	128	}
[d3ca210]	129
[c67aff2]	130	/**
	131	* Take fraction, round it, normalize it and detect exceptions
	132	*
	133	* @param cexp Exponent with bias.
	134	* @param cfrac_hi High part of the fraction shifted 14 bits to the left
	135	* with added hidden bit.
	136	* @param cfrac_lo Low part of the fraction shifted 14 bits to the left
	137	* with added hidden bit.
	138	* @param sign Resulting sign.
	139	* @param shift_out Bits right-shifted out from fraction by the caller.
	140	* @return Finished quadruple-precision float.
[1d83419]	141	*/
[88d5c1e]	142	float128 finish_float128(int32_t cexp, uint64_t cfrac_hi, uint64_t cfrac_lo,
[c67aff2]	143	char sign, uint64_t shift_out)
[1d83419]	144	{
[c67aff2]	145	float128 result;
	146	uint64_t tmp_hi, tmp_lo;
	147
	148	result.parts.sign = sign;
	149
	150	/* find first nonzero digit and shift result and detect possibly underflow */
	151	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
	152	1, &tmp_hi, &tmp_lo);
	153	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
	154	while ((cexp > 0) && (lt128(0x0ll, 0x0ll, cfrac_hi, cfrac_lo)) &&
	155	(!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo))) {
	156	cexp--;
	157	lshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
	158	/* TODO: fix underflow */
	159
	160	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
	161	1, &tmp_hi, &tmp_lo);
	162	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
	163	}
	164
	165	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
	166	1, &tmp_hi, &tmp_lo);
	167	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
	168	if ((cexp < 0) \|\| (cexp == 0 &&
	169	(!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)))) {
	170	/* FIXME: underflow */
	171	result.parts.exp = 0;
	172	if ((cexp + FLOAT128_FRACTION_SIZE + 1) < 0) { /* +1 is place for rounding */
	173	result.parts.frac_hi = 0x0ll;
	174	result.parts.frac_lo = 0x0ll;
	175	return result;
	176	}
	177
	178	while (cexp < 0) {
	179	cexp++;
	180	rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
	181	}
	182
	183	if (shift_out & (0x1ull < 64)) {
	184	add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo);
	185	}
	186
	187	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
	188	1, &tmp_hi, &tmp_lo);
	189	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
	190	if (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
	191	not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
	192	&tmp_hi, &tmp_lo);
	193	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
	194	result.parts.frac_hi = tmp_hi;
	195	result.parts.frac_lo = tmp_lo;
	196	return result;
	197	}
	198	} else {
	199	if (shift_out & (0x1ull < 64)) {
	200	add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo);
[1d83419]	201	}
	202	}
	203
[c67aff2]	204	++cexp;
	205
	206	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
	207	1, &tmp_hi, &tmp_lo);
	208	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
	209	if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
	210	++cexp;
	211	rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
	212	}
	213
	214	/* check overflow */
	215	if (cexp >= FLOAT128_MAX_EXPONENT) {
	216	/* FIXME: overflow, return infinity */
	217	result.parts.exp = FLOAT128_MAX_EXPONENT;
	218	result.parts.frac_hi = 0x0ll;
	219	result.parts.frac_lo = 0x0ll;
	220	return result;
	221	}
	222
	223	result.parts.exp = (uint32_t) cexp;
	224
	225	not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
	226	&tmp_hi, &tmp_lo);
	227	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
	228	result.parts.frac_hi = tmp_hi;
	229	result.parts.frac_lo = tmp_lo;
	230
	231	return result;
[1d83419]	232	}
	233
[c67aff2]	234	/**
	235	* Counts leading zeroes in byte.
	236	*
	237	* @param i Byte for which to count leading zeroes.
	238	* @return Number of detected leading zeroes.
	239	*/
[88d5c1e]	240	int count_zeroes8(uint8_t i)
[c67aff2]	241	{
	242	return zeroTable[i];
	243	}
	244
	245	/**
	246	* Counts leading zeroes in 32bit unsigned integer.
	247	*
	248	* @param i Integer for which to count leading zeroes.
	249	* @return Number of detected leading zeroes.
[1d83419]	250	*/
[88d5c1e]	251	int count_zeroes32(uint32_t i)
[1d83419]	252	{
	253	int j;
[c67aff2]	254	for (j = 0; j < 32; j += 8) {
	255	if (i & (0xFF << (24 - j))) {
[88d5c1e]	256	return (j + count_zeroes8(i >> (24 - j)));
[1d83419]	257	}
	258	}
	259
	260	return 32;
	261	}
	262
[c67aff2]	263	/**
	264	* Counts leading zeroes in 64bit unsigned integer.
	265	*
	266	* @param i Integer for which to count leading zeroes.
	267	* @return Number of detected leading zeroes.
[1d83419]	268	*/
[88d5c1e]	269	int count_zeroes64(uint64_t i)
[1d83419]	270	{
[c67aff2]	271	int j;
	272	for (j = 0; j < 64; j += 8) {
	273	if (i & (0xFFll << (56 - j))) {
[88d5c1e]	274	return (j + count_zeroes8(i >> (56 - j)));
[c67aff2]	275	}
	276	}
	277
	278	return 64;
[1d83419]	279	}
	280
[c67aff2]	281	/**
	282	* Round and normalize number expressed by exponent and fraction with
	283	* first bit (equal to hidden bit) at 30th bit.
	284	*
	285	* @param exp Exponent part.
	286	* @param fraction Fraction with hidden bit shifted to 30th bit.
[1d83419]	287	*/
[88d5c1e]	288	void round_float32(int32_t exp, uint32_t fraction)
[1d83419]	289	{
	290	/* rounding - if first bit after fraction is set then round up */
[c67aff2]	291	(*fraction) += (0x1 << (32 - FLOAT32_FRACTION_SIZE - 3));
[1d83419]	292
[c67aff2]	293	if ((*fraction) &
	294	(FLOAT32_HIDDEN_BIT_MASK << (32 - FLOAT32_FRACTION_SIZE - 1))) {
[1d83419]	295	/* rounding overflow */
	296	++(*exp);
	297	(*fraction) >>= 1;
[c67aff2]	298	}
[1d83419]	299
[c67aff2]	300	if (((exp) >= FLOAT32_MAX_EXPONENT) \|\| ((exp) < 0)) {
[1d83419]	301	/* overflow - set infinity as result */
	302	(*exp) = FLOAT32_MAX_EXPONENT;
	303	(*fraction) = 0;
	304	}
	305	}
	306
[c67aff2]	307	/**
	308	* Round and normalize number expressed by exponent and fraction with
[94b696c]	309	* first bit (equal to hidden bit) at bit 62.
[c67aff2]	310	*
	311	* @param exp Exponent part.
[94b696c]	312	* @param fraction Fraction with hidden bit shifted to bit 62.
[1d83419]	313	*/
[88d5c1e]	314	void round_float64(int32_t exp, uint64_t fraction)
[1d83419]	315	{
[94b696c]	316	/*
	317	* Rounding - if first bit after fraction is set then round up.
	318	*/
	319
	320	/*
	321	* Add 1 to the least significant bit of the fraction respecting the
	322	* current shift to bit 62 and see if there will be a carry to bit 63.
	323	*/
[c67aff2]	324	(*fraction) += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
[1d83419]	325
[94b696c]	326	/* See if there was a carry to bit 63. */
[c67aff2]	327	if ((*fraction) &
[94b696c]	328	(FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))) {
[1d83419]	329	/* rounding overflow */
	330	++(*exp);
	331	(*fraction) >>= 1;
[c67aff2]	332	}
[1d83419]	333
[c67aff2]	334	if (((exp) >= FLOAT64_MAX_EXPONENT) \|\| ((exp) < 0)) {
[1d83419]	335	/* overflow - set infinity as result */
	336	(*exp) = FLOAT64_MAX_EXPONENT;
	337	(*fraction) = 0;
[c67aff2]	338	}
	339	}
	340
	341	/**
	342	* Round and normalize number expressed by exponent and fraction with
	343	* first bit (equal to hidden bit) at 126th bit.
	344	*
	345	* @param exp Exponent part.
	346	* @param frac_hi High part of fraction part with hidden bit shifted to 126th bit.
	347	* @param frac_lo Low part of fraction part with hidden bit shifted to 126th bit.
	348	*/
[88d5c1e]	349	void round_float128(int32_t exp, uint64_t frac_hi, uint64_t *frac_lo)
[c67aff2]	350	{
	351	uint64_t tmp_hi, tmp_lo;
	352
	353	/* rounding - if first bit after fraction is set then round up */
	354	lshift128(0x0ll, 0x1ll, (128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo);
	355	add128(frac_hi, frac_lo, tmp_hi, tmp_lo, frac_hi, frac_lo);
	356
	357	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
	358	(128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo);
	359	and128(frac_hi, frac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
	360	if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
	361	/* rounding overflow */
	362	++(*exp);
	363	rshift128(frac_hi, frac_lo, 1, frac_hi, frac_lo);
	364	}
	365
	366	if (((exp) >= FLOAT128_MAX_EXPONENT) \|\| ((exp) < 0)) {
	367	/* overflow - set infinity as result */
	368	(*exp) = FLOAT128_MAX_EXPONENT;
	369	(*frac_hi) = 0;
	370	(*frac_lo) = 0;
	371	}
	372	}
	373
	374	/**
	375	* Logical shift left on the 128-bit operand.
	376	*
	377	* @param a_hi High part of the input operand.
	378	* @param a_lo Low part of the input operand.
	379	* @param shift Number of bits by witch to shift.
	380	* @param r_hi Address to store high part of the result.
	381	* @param r_lo Address to store low part of the result.
	382	*/
	383	void lshift128(
	384	uint64_t a_hi, uint64_t a_lo, int shift,
	385	uint64_t r_hi, uint64_t r_lo)
	386	{
	387	if (shift <= 0) {
	388	/* do nothing */
	389	} else if (shift >= 128) {
	390	a_hi = 0;
	391	a_lo = 0;
	392	} else if (shift >= 64) {
	393	a_hi = a_lo << (shift - 64);
	394	a_lo = 0;
	395	} else {
	396	a_hi <<= shift;
	397	a_hi \|= a_lo >> (64 - shift);
	398	a_lo <<= shift;
	399	}
	400
	401	*r_hi = a_hi;
	402	*r_lo = a_lo;
	403	}
	404
	405	/**
	406	* Logical shift right on the 128-bit operand.
	407	*
	408	* @param a_hi High part of the input operand.
	409	* @param a_lo Low part of the input operand.
	410	* @param shift Number of bits by witch to shift.
	411	* @param r_hi Address to store high part of the result.
	412	* @param r_lo Address to store low part of the result.
	413	*/
	414	void rshift128(
	415	uint64_t a_hi, uint64_t a_lo, int shift,
	416	uint64_t r_hi, uint64_t r_lo)
	417	{
	418	if (shift <= 0) {
	419	/* do nothing */
	420	} else if (shift >= 128) {
	421	a_hi = 0;
	422	a_lo = 0;
	423	} else if (shift >= 64) {
	424	a_lo = a_hi >> (shift - 64);
	425	a_hi = 0;
	426	} else {
	427	a_lo >>= shift;
	428	a_lo \|= a_hi << (64 - shift);
	429	a_hi >>= shift;
	430	}
	431
	432	*r_hi = a_hi;
	433	*r_lo = a_lo;
	434	}
	435
	436	/**
	437	* Bitwise AND on 128-bit operands.
	438	*
	439	* @param a_hi High part of the first input operand.
	440	* @param a_lo Low part of the first input operand.
	441	* @param b_hi High part of the second input operand.
	442	* @param b_lo Low part of the second input operand.
	443	* @param r_hi Address to store high part of the result.
	444	* @param r_lo Address to store low part of the result.
	445	*/
	446	void and128(
	447	uint64_t a_hi, uint64_t a_lo,
	448	uint64_t b_hi, uint64_t b_lo,
	449	uint64_t r_hi, uint64_t r_lo)
	450	{
	451	*r_hi = a_hi & b_hi;
	452	*r_lo = a_lo & b_lo;
	453	}
	454
	455	/**
	456	* Bitwise inclusive OR on 128-bit operands.
	457	*
	458	* @param a_hi High part of the first input operand.
	459	* @param a_lo Low part of the first input operand.
	460	* @param b_hi High part of the second input operand.
	461	* @param b_lo Low part of the second input operand.
	462	* @param r_hi Address to store high part of the result.
	463	* @param r_lo Address to store low part of the result.
	464	*/
	465	void or128(
	466	uint64_t a_hi, uint64_t a_lo,
	467	uint64_t b_hi, uint64_t b_lo,
	468	uint64_t r_hi, uint64_t r_lo)
	469	{
	470	*r_hi = a_hi \| b_hi;
	471	*r_lo = a_lo \| b_lo;
	472	}
	473
	474	/**
	475	* Bitwise exclusive OR on 128-bit operands.
	476	*
	477	* @param a_hi High part of the first input operand.
	478	* @param a_lo Low part of the first input operand.
	479	* @param b_hi High part of the second input operand.
	480	* @param b_lo Low part of the second input operand.
	481	* @param r_hi Address to store high part of the result.
	482	* @param r_lo Address to store low part of the result.
	483	*/
	484	void xor128(
	485	uint64_t a_hi, uint64_t a_lo,
	486	uint64_t b_hi, uint64_t b_lo,
	487	uint64_t r_hi, uint64_t r_lo)
	488	{
	489	*r_hi = a_hi ^ b_hi;
	490	*r_lo = a_lo ^ b_lo;
	491	}
	492
	493	/**
	494	* Bitwise NOT on the 128-bit operand.
	495	*
	496	* @param a_hi High part of the input operand.
	497	* @param a_lo Low part of the input operand.
	498	* @param r_hi Address to store high part of the result.
	499	* @param r_lo Address to store low part of the result.
	500	*/
	501	void not128(
	502	uint64_t a_hi, uint64_t a_lo,
	503	uint64_t r_hi, uint64_t r_lo)
	504	{
	505	*r_hi = ~a_hi;
	506	*r_lo = ~a_lo;
	507	}
	508
	509	/**
	510	* Equality comparison of 128-bit operands.
	511	*
	512	* @param a_hi High part of the first input operand.
	513	* @param a_lo Low part of the first input operand.
	514	* @param b_hi High part of the second input operand.
	515	* @param b_lo Low part of the second input operand.
	516	* @return 1 if operands are equal, 0 otherwise.
	517	*/
	518	int eq128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
	519	{
	520	return (a_hi == b_hi) && (a_lo == b_lo);
	521	}
	522
	523	/**
	524	* Lower-or-equal comparison of 128-bit operands.
	525	*
	526	* @param a_hi High part of the first input operand.
	527	* @param a_lo Low part of the first input operand.
	528	* @param b_hi High part of the second input operand.
	529	* @param b_lo Low part of the second input operand.
	530	* @return 1 if a is lower or equal to b, 0 otherwise.
	531	*/
	532	int le128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
	533	{
	534	return (a_hi < b_hi) \|\| ((a_hi == b_hi) && (a_lo <= b_lo));
	535	}
	536
	537	/**
	538	* Lower-than comparison of 128-bit operands.
	539	*
	540	* @param a_hi High part of the first input operand.
	541	* @param a_lo Low part of the first input operand.
	542	* @param b_hi High part of the second input operand.
	543	* @param b_lo Low part of the second input operand.
	544	* @return 1 if a is lower than b, 0 otherwise.
	545	*/
	546	int lt128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
	547	{
	548	return (a_hi < b_hi) \|\| ((a_hi == b_hi) && (a_lo < b_lo));
	549	}
	550
	551	/**
	552	* Addition of two 128-bit unsigned integers.
	553	*
	554	* @param a_hi High part of the first input operand.
	555	* @param a_lo Low part of the first input operand.
	556	* @param b_hi High part of the second input operand.
	557	* @param b_lo Low part of the second input operand.
	558	* @param r_hi Address to store high part of the result.
	559	* @param r_lo Address to store low part of the result.
	560	*/
	561	void add128(uint64_t a_hi, uint64_t a_lo,
	562	uint64_t b_hi, uint64_t b_lo,
	563	uint64_t r_hi, uint64_t r_lo)
	564	{
	565	uint64_t low = a_lo + b_lo;
	566	*r_lo = low;
	567	/* detect overflow to add a carry */
	568	*r_hi = a_hi + b_hi + (low < a_lo);
	569	}
	570
	571	/**
	572	* Substraction of two 128-bit unsigned integers.
	573	*
	574	* @param a_hi High part of the first input operand.
	575	* @param a_lo Low part of the first input operand.
	576	* @param b_hi High part of the second input operand.
	577	* @param b_lo Low part of the second input operand.
	578	* @param r_hi Address to store high part of the result.
	579	* @param r_lo Address to store low part of the result.
	580	*/
	581	void sub128(uint64_t a_hi, uint64_t a_lo,
	582	uint64_t b_hi, uint64_t b_lo,
	583	uint64_t r_hi, uint64_t r_lo)
	584	{
	585	*r_lo = a_lo - b_lo;
	586	/* detect underflow to substract a carry */
	587	*r_hi = a_hi - b_hi - (a_lo < b_lo);
	588	}
	589
	590	/**
	591	* Multiplication of two 64-bit unsigned integers.
	592	*
	593	* @param a First input operand.
	594	* @param b Second input operand.
	595	* @param r_hi Address to store high part of the result.
	596	* @param r_lo Address to store low part of the result.
	597	*/
	598	void mul64(uint64_t a, uint64_t b, uint64_t r_hi, uint64_t r_lo)
	599	{
	600	uint64_t low, high, middle1, middle2;
	601	uint32_t alow, blow;
	602
	603	alow = a & 0xFFFFFFFF;
	604	blow = b & 0xFFFFFFFF;
	605
	606	a >>= 32;
	607	b >>= 32;
	608
	609	low = ((uint64_t) alow) * blow;
	610	middle1 = a * blow;
	611	middle2 = alow * b;
	612	high = a * b;
	613
	614	middle1 += middle2;
	615	high += (((uint64_t) (middle1 < middle2)) << 32) + (middle1 >> 32);
	616	middle1 <<= 32;
	617	low += middle1;
	618	high += (low < middle1);
	619	*r_lo = low;
	620	*r_hi = high;
	621	}
	622
	623	/**
	624	* Multiplication of two 128-bit unsigned integers.
	625	*
	626	* @param a_hi High part of the first input operand.
	627	* @param a_lo Low part of the first input operand.
	628	* @param b_hi High part of the second input operand.
	629	* @param b_lo Low part of the second input operand.
	630	* @param r_hihi Address to store first (highest) quarter of the result.
	631	* @param r_hilo Address to store second quarter of the result.
	632	* @param r_lohi Address to store third quarter of the result.
	633	* @param r_lolo Address to store fourth (lowest) quarter of the result.
	634	*/
	635	void mul128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo,
	636	uint64_t r_hihi, uint64_t r_hilo, uint64_t r_lohi, uint64_t r_lolo)
	637	{
	638	uint64_t hihi, hilo, lohi, lolo;
	639	uint64_t tmp1, tmp2;
	640
	641	mul64(a_lo, b_lo, &lohi, &lolo);
	642	mul64(a_lo, b_hi, &hilo, &tmp2);
	643	add128(hilo, tmp2, 0x0ll, lohi, &hilo, &lohi);
	644	mul64(a_hi, b_hi, &hihi, &tmp1);
	645	add128(hihi, tmp1, 0x0ll, hilo, &hihi, &hilo);
	646	mul64(a_hi, b_lo, &tmp1, &tmp2);
	647	add128(tmp1, tmp2, 0x0ll, lohi, &tmp1, &lohi);
	648	add128(hihi, hilo, 0x0ll, tmp1, &hihi, &hilo);
	649
	650	*r_hihi = hihi;
	651	*r_hilo = hilo;
	652	*r_lohi = lohi;
	653	*r_lolo = lolo;
	654	}
	655
	656	/**
	657	* Estimate the quotient of 128-bit unsigned divident and 64-bit unsigned
	658	* divisor.
	659	*
	660	* @param a_hi High part of the divident.
	661	* @param a_lo Low part of the divident.
	662	* @param b Divisor.
	663	* @return Quotient approximation.
	664	*/
	665	uint64_t div128est(uint64_t a_hi, uint64_t a_lo, uint64_t b)
	666	{
	667	uint64_t b_hi, b_lo;
	668	uint64_t rem_hi, rem_lo;
	669	uint64_t tmp_hi, tmp_lo;
	670	uint64_t result;
	671
	672	if (b <= a_hi) {
	673	return 0xFFFFFFFFFFFFFFFFull;
	674	}
	675
	676	b_hi = b >> 32;
	677	result = ((b_hi << 32) <= a_hi) ? (0xFFFFFFFFull << 32) : (a_hi / b_hi) << 32;
	678	mul64(b, result, &tmp_hi, &tmp_lo);
	679	sub128(a_hi, a_lo, tmp_hi, tmp_lo, &rem_hi, &rem_lo);
	680
	681	while ((int64_t) rem_hi < 0) {
	682	result -= 0x1ll << 32;
	683	b_lo = b << 32;
	684	add128(rem_hi, rem_lo, b_hi, b_lo, &rem_hi, &rem_lo);
	685	}
	686
	687	rem_hi = (rem_hi << 32) \| (rem_lo >> 32);
	688	if ((b_hi << 32) <= rem_hi) {
	689	result \|= 0xFFFFFFFF;
	690	} else {
	691	result \|= rem_hi / b_hi;
[1d83419]	692	}
	693
[c67aff2]	694	return result;
[1d83419]	695	}
	696
[231a60a]	697	/** @}
[846848a6]	698	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: