Context Navigation

source: mainline/uspace/lib/softfloat/common.c@ 94b696c

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 94b696c was 94b696c, checked in by Jakub Jermar <jakub@…>, 13 years ago
Fix detection of rounding overflow for 64-bit floats.
Property mode set to `100644`
File size: 19.6 KB

Line
1	/*
2	* Copyright (c) 2005 Josef Cejka
3	* Copyright (c) 2011 Petr Koupy
4	* All rights reserved.
5	*
6	* Redistribution and use in source and binary forms, with or without
7	* modification, are permitted provided that the following conditions
8	* are met:
9	*
10	* - Redistributions of source code must retain the above copyright
11	* notice, this list of conditions and the following disclaimer.
12	* - Redistributions in binary form must reproduce the above copyright
13	* notice, this list of conditions and the following disclaimer in the
14	* documentation and/or other materials provided with the distribution.
15	* - The name of the author may not be used to endorse or promote products
16	* derived from this software without specific prior written permission.
17	*
18	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28	*/
29
30	/** @addtogroup softfloat
31	* @{
32	*/
33	/** @file Common helper operations.
34	*/
35
36	#include "sftypes.h"
37	#include "common.h"
38
39	/* Table for fast leading zeroes counting. */
40	char zeroTable[256] = {
41	8, 7, 6, 6, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, \
42	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, \
43	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, \
44	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, \
45	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
46	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
47	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
48	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
49	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
50	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
51	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
52	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
53	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
54	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
55	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
56	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
57	};
58
59	/**
60	* Take fraction shifted by 10 bits to the left, round it, normalize it
61	* and detect exceptions
62	*
63	* @param cexp Exponent with bias.
64	* @param cfrac Fraction shifted 10 bits to the left with added hidden bit.
65	* @param sign Resulting sign.
66	* @return Finished double-precision float.
67	*/
68	float64 finish_float64(int32_t cexp, uint64_t cfrac, char sign)
69	{
70	float64 result;
71
72	result.parts.sign = sign;
73
74	/* find first nonzero digit and shift result and detect possibly underflow */
75	while ((cexp > 0) && (cfrac) &&
76	(!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))))) {
77	cexp--;
78	cfrac <<= 1;
79	/* TODO: fix underflow */
80	}
81
82	if ((cexp < 0) \|\| (cexp == 0 &&
83	(!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))))) {
84	/* FIXME: underflow */
85	result.parts.exp = 0;
86	if ((cexp + FLOAT64_FRACTION_SIZE + 1) < 0) { /* +1 is place for rounding */
87	result.parts.fraction = 0;
88	return result;
89	}
90
91	while (cexp < 0) {
92	cexp++;
93	cfrac >>= 1;
94	}
95
96	cfrac += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
97
98	if (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))) {
99	result.parts.fraction =
100	((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK));
101	return result;
102	}
103	} else {
104	cfrac += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
105	}
106
107	++cexp;
108
109	if (cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))) {
110	++cexp;
111	cfrac >>= 1;
112	}
113
114	/* check overflow */
115	if (cexp >= FLOAT64_MAX_EXPONENT) {
116	/* FIXME: overflow, return infinity */
117	result.parts.exp = FLOAT64_MAX_EXPONENT;
118	result.parts.fraction = 0;
119	return result;
120	}
121
122	result.parts.exp = (uint32_t) cexp;
123
124	result.parts.fraction =
125	((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK));
126
127	return result;
128	}
129
130	/**
131	* Take fraction, round it, normalize it and detect exceptions
132	*
133	* @param cexp Exponent with bias.
134	* @param cfrac_hi High part of the fraction shifted 14 bits to the left
135	* with added hidden bit.
136	* @param cfrac_lo Low part of the fraction shifted 14 bits to the left
137	* with added hidden bit.
138	* @param sign Resulting sign.
139	* @param shift_out Bits right-shifted out from fraction by the caller.
140	* @return Finished quadruple-precision float.
141	*/
142	float128 finish_float128(int32_t cexp, uint64_t cfrac_hi, uint64_t cfrac_lo,
143	char sign, uint64_t shift_out)
144	{
145	float128 result;
146	uint64_t tmp_hi, tmp_lo;
147
148	result.parts.sign = sign;
149
150	/* find first nonzero digit and shift result and detect possibly underflow */
151	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
152	1, &tmp_hi, &tmp_lo);
153	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
154	while ((cexp > 0) && (lt128(0x0ll, 0x0ll, cfrac_hi, cfrac_lo)) &&
155	(!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo))) {
156	cexp--;
157	lshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
158	/* TODO: fix underflow */
159
160	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
161	1, &tmp_hi, &tmp_lo);
162	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
163	}
164
165	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
166	1, &tmp_hi, &tmp_lo);
167	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
168	if ((cexp < 0) \|\| (cexp == 0 &&
169	(!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)))) {
170	/* FIXME: underflow */
171	result.parts.exp = 0;
172	if ((cexp + FLOAT128_FRACTION_SIZE + 1) < 0) { /* +1 is place for rounding */
173	result.parts.frac_hi = 0x0ll;
174	result.parts.frac_lo = 0x0ll;
175	return result;
176	}
177
178	while (cexp < 0) {
179	cexp++;
180	rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
181	}
182
183	if (shift_out & (0x1ull < 64)) {
184	add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo);
185	}
186
187	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
188	1, &tmp_hi, &tmp_lo);
189	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
190	if (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
191	not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
192	&tmp_hi, &tmp_lo);
193	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
194	result.parts.frac_hi = tmp_hi;
195	result.parts.frac_lo = tmp_lo;
196	return result;
197	}
198	} else {
199	if (shift_out & (0x1ull < 64)) {
200	add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo);
201	}
202	}
203
204	++cexp;
205
206	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
207	1, &tmp_hi, &tmp_lo);
208	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
209	if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
210	++cexp;
211	rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
212	}
213
214	/* check overflow */
215	if (cexp >= FLOAT128_MAX_EXPONENT) {
216	/* FIXME: overflow, return infinity */
217	result.parts.exp = FLOAT128_MAX_EXPONENT;
218	result.parts.frac_hi = 0x0ll;
219	result.parts.frac_lo = 0x0ll;
220	return result;
221	}
222
223	result.parts.exp = (uint32_t) cexp;
224
225	not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
226	&tmp_hi, &tmp_lo);
227	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
228	result.parts.frac_hi = tmp_hi;
229	result.parts.frac_lo = tmp_lo;
230
231	return result;
232	}
233
234	/**
235	* Counts leading zeroes in byte.
236	*
237	* @param i Byte for which to count leading zeroes.
238	* @return Number of detected leading zeroes.
239	*/
240	int count_zeroes8(uint8_t i)
241	{
242	return zeroTable[i];
243	}
244
245	/**
246	* Counts leading zeroes in 32bit unsigned integer.
247	*
248	* @param i Integer for which to count leading zeroes.
249	* @return Number of detected leading zeroes.
250	*/
251	int count_zeroes32(uint32_t i)
252	{
253	int j;
254	for (j = 0; j < 32; j += 8) {
255	if (i & (0xFF << (24 - j))) {
256	return (j + count_zeroes8(i >> (24 - j)));
257	}
258	}
259
260	return 32;
261	}
262
263	/**
264	* Counts leading zeroes in 64bit unsigned integer.
265	*
266	* @param i Integer for which to count leading zeroes.
267	* @return Number of detected leading zeroes.
268	*/
269	int count_zeroes64(uint64_t i)
270	{
271	int j;
272	for (j = 0; j < 64; j += 8) {
273	if (i & (0xFFll << (56 - j))) {
274	return (j + count_zeroes8(i >> (56 - j)));
275	}
276	}
277
278	return 64;
279	}
280
281	/**
282	* Round and normalize number expressed by exponent and fraction with
283	* first bit (equal to hidden bit) at 30th bit.
284	*
285	* @param exp Exponent part.
286	* @param fraction Fraction with hidden bit shifted to 30th bit.
287	*/
288	void round_float32(int32_t exp, uint32_t fraction)
289	{
290	/* rounding - if first bit after fraction is set then round up */
291	(*fraction) += (0x1 << (32 - FLOAT32_FRACTION_SIZE - 3));
292
293	if ((*fraction) &
294	(FLOAT32_HIDDEN_BIT_MASK << (32 - FLOAT32_FRACTION_SIZE - 1))) {
295	/* rounding overflow */
296	++(*exp);
297	(*fraction) >>= 1;
298	}
299
300	if (((exp) >= FLOAT32_MAX_EXPONENT) \|\| ((exp) < 0)) {
301	/* overflow - set infinity as result */
302	(*exp) = FLOAT32_MAX_EXPONENT;
303	(*fraction) = 0;
304	}
305	}
306
307	/**
308	* Round and normalize number expressed by exponent and fraction with
309	* first bit (equal to hidden bit) at bit 62.
310	*
311	* @param exp Exponent part.
312	* @param fraction Fraction with hidden bit shifted to bit 62.
313	*/
314	void round_float64(int32_t exp, uint64_t fraction)
315	{
316	/*
317	* Rounding - if first bit after fraction is set then round up.
318	*/
319
320	/*
321	* Add 1 to the least significant bit of the fraction respecting the
322	* current shift to bit 62 and see if there will be a carry to bit 63.
323	*/
324	(*fraction) += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
325
326	/* See if there was a carry to bit 63. */
327	if ((*fraction) &
328	(FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))) {
329	/* rounding overflow */
330	++(*exp);
331	(*fraction) >>= 1;
332	}
333
334	if (((exp) >= FLOAT64_MAX_EXPONENT) \|\| ((exp) < 0)) {
335	/* overflow - set infinity as result */
336	(*exp) = FLOAT64_MAX_EXPONENT;
337	(*fraction) = 0;
338	}
339	}
340
341	/**
342	* Round and normalize number expressed by exponent and fraction with
343	* first bit (equal to hidden bit) at 126th bit.
344	*
345	* @param exp Exponent part.
346	* @param frac_hi High part of fraction part with hidden bit shifted to 126th bit.
347	* @param frac_lo Low part of fraction part with hidden bit shifted to 126th bit.
348	*/
349	void round_float128(int32_t exp, uint64_t frac_hi, uint64_t *frac_lo)
350	{
351	uint64_t tmp_hi, tmp_lo;
352
353	/* rounding - if first bit after fraction is set then round up */
354	lshift128(0x0ll, 0x1ll, (128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo);
355	add128(frac_hi, frac_lo, tmp_hi, tmp_lo, frac_hi, frac_lo);
356
357	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
358	(128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo);
359	and128(frac_hi, frac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
360	if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
361	/* rounding overflow */
362	++(*exp);
363	rshift128(frac_hi, frac_lo, 1, frac_hi, frac_lo);
364	}
365
366	if (((exp) >= FLOAT128_MAX_EXPONENT) \|\| ((exp) < 0)) {
367	/* overflow - set infinity as result */
368	(*exp) = FLOAT128_MAX_EXPONENT;
369	(*frac_hi) = 0;
370	(*frac_lo) = 0;
371	}
372	}
373
374	/**
375	* Logical shift left on the 128-bit operand.
376	*
377	* @param a_hi High part of the input operand.
378	* @param a_lo Low part of the input operand.
379	* @param shift Number of bits by witch to shift.
380	* @param r_hi Address to store high part of the result.
381	* @param r_lo Address to store low part of the result.
382	*/
383	void lshift128(
384	uint64_t a_hi, uint64_t a_lo, int shift,
385	uint64_t r_hi, uint64_t r_lo)
386	{
387	if (shift <= 0) {
388	/* do nothing */
389	} else if (shift >= 128) {
390	a_hi = 0;
391	a_lo = 0;
392	} else if (shift >= 64) {
393	a_hi = a_lo << (shift - 64);
394	a_lo = 0;
395	} else {
396	a_hi <<= shift;
397	a_hi \|= a_lo >> (64 - shift);
398	a_lo <<= shift;
399	}
400
401	*r_hi = a_hi;
402	*r_lo = a_lo;
403	}
404
405	/**
406	* Logical shift right on the 128-bit operand.
407	*
408	* @param a_hi High part of the input operand.
409	* @param a_lo Low part of the input operand.
410	* @param shift Number of bits by witch to shift.
411	* @param r_hi Address to store high part of the result.
412	* @param r_lo Address to store low part of the result.
413	*/
414	void rshift128(
415	uint64_t a_hi, uint64_t a_lo, int shift,
416	uint64_t r_hi, uint64_t r_lo)
417	{
418	if (shift <= 0) {
419	/* do nothing */
420	} else if (shift >= 128) {
421	a_hi = 0;
422	a_lo = 0;
423	} else if (shift >= 64) {
424	a_lo = a_hi >> (shift - 64);
425	a_hi = 0;
426	} else {
427	a_lo >>= shift;
428	a_lo \|= a_hi << (64 - shift);
429	a_hi >>= shift;
430	}
431
432	*r_hi = a_hi;
433	*r_lo = a_lo;
434	}
435
436	/**
437	* Bitwise AND on 128-bit operands.
438	*
439	* @param a_hi High part of the first input operand.
440	* @param a_lo Low part of the first input operand.
441	* @param b_hi High part of the second input operand.
442	* @param b_lo Low part of the second input operand.
443	* @param r_hi Address to store high part of the result.
444	* @param r_lo Address to store low part of the result.
445	*/
446	void and128(
447	uint64_t a_hi, uint64_t a_lo,
448	uint64_t b_hi, uint64_t b_lo,
449	uint64_t r_hi, uint64_t r_lo)
450	{
451	*r_hi = a_hi & b_hi;
452	*r_lo = a_lo & b_lo;
453	}
454
455	/**
456	* Bitwise inclusive OR on 128-bit operands.
457	*
458	* @param a_hi High part of the first input operand.
459	* @param a_lo Low part of the first input operand.
460	* @param b_hi High part of the second input operand.
461	* @param b_lo Low part of the second input operand.
462	* @param r_hi Address to store high part of the result.
463	* @param r_lo Address to store low part of the result.
464	*/
465	void or128(
466	uint64_t a_hi, uint64_t a_lo,
467	uint64_t b_hi, uint64_t b_lo,
468	uint64_t r_hi, uint64_t r_lo)
469	{
470	*r_hi = a_hi \| b_hi;
471	*r_lo = a_lo \| b_lo;
472	}
473
474	/**
475	* Bitwise exclusive OR on 128-bit operands.
476	*
477	* @param a_hi High part of the first input operand.
478	* @param a_lo Low part of the first input operand.
479	* @param b_hi High part of the second input operand.
480	* @param b_lo Low part of the second input operand.
481	* @param r_hi Address to store high part of the result.
482	* @param r_lo Address to store low part of the result.
483	*/
484	void xor128(
485	uint64_t a_hi, uint64_t a_lo,
486	uint64_t b_hi, uint64_t b_lo,
487	uint64_t r_hi, uint64_t r_lo)
488	{
489	*r_hi = a_hi ^ b_hi;
490	*r_lo = a_lo ^ b_lo;
491	}
492
493	/**
494	* Bitwise NOT on the 128-bit operand.
495	*
496	* @param a_hi High part of the input operand.
497	* @param a_lo Low part of the input operand.
498	* @param r_hi Address to store high part of the result.
499	* @param r_lo Address to store low part of the result.
500	*/
501	void not128(
502	uint64_t a_hi, uint64_t a_lo,
503	uint64_t r_hi, uint64_t r_lo)
504	{
505	*r_hi = ~a_hi;
506	*r_lo = ~a_lo;
507	}
508
509	/**
510	* Equality comparison of 128-bit operands.
511	*
512	* @param a_hi High part of the first input operand.
513	* @param a_lo Low part of the first input operand.
514	* @param b_hi High part of the second input operand.
515	* @param b_lo Low part of the second input operand.
516	* @return 1 if operands are equal, 0 otherwise.
517	*/
518	int eq128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
519	{
520	return (a_hi == b_hi) && (a_lo == b_lo);
521	}
522
523	/**
524	* Lower-or-equal comparison of 128-bit operands.
525	*
526	* @param a_hi High part of the first input operand.
527	* @param a_lo Low part of the first input operand.
528	* @param b_hi High part of the second input operand.
529	* @param b_lo Low part of the second input operand.
530	* @return 1 if a is lower or equal to b, 0 otherwise.
531	*/
532	int le128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
533	{
534	return (a_hi < b_hi) \|\| ((a_hi == b_hi) && (a_lo <= b_lo));
535	}
536
537	/**
538	* Lower-than comparison of 128-bit operands.
539	*
540	* @param a_hi High part of the first input operand.
541	* @param a_lo Low part of the first input operand.
542	* @param b_hi High part of the second input operand.
543	* @param b_lo Low part of the second input operand.
544	* @return 1 if a is lower than b, 0 otherwise.
545	*/
546	int lt128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
547	{
548	return (a_hi < b_hi) \|\| ((a_hi == b_hi) && (a_lo < b_lo));
549	}
550
551	/**
552	* Addition of two 128-bit unsigned integers.
553	*
554	* @param a_hi High part of the first input operand.
555	* @param a_lo Low part of the first input operand.
556	* @param b_hi High part of the second input operand.
557	* @param b_lo Low part of the second input operand.
558	* @param r_hi Address to store high part of the result.
559	* @param r_lo Address to store low part of the result.
560	*/
561	void add128(uint64_t a_hi, uint64_t a_lo,
562	uint64_t b_hi, uint64_t b_lo,
563	uint64_t r_hi, uint64_t r_lo)
564	{
565	uint64_t low = a_lo + b_lo;
566	*r_lo = low;
567	/* detect overflow to add a carry */
568	*r_hi = a_hi + b_hi + (low < a_lo);
569	}
570
571	/**
572	* Substraction of two 128-bit unsigned integers.
573	*
574	* @param a_hi High part of the first input operand.
575	* @param a_lo Low part of the first input operand.
576	* @param b_hi High part of the second input operand.
577	* @param b_lo Low part of the second input operand.
578	* @param r_hi Address to store high part of the result.
579	* @param r_lo Address to store low part of the result.
580	*/
581	void sub128(uint64_t a_hi, uint64_t a_lo,
582	uint64_t b_hi, uint64_t b_lo,
583	uint64_t r_hi, uint64_t r_lo)
584	{
585	*r_lo = a_lo - b_lo;
586	/* detect underflow to substract a carry */
587	*r_hi = a_hi - b_hi - (a_lo < b_lo);
588	}
589
590	/**
591	* Multiplication of two 64-bit unsigned integers.
592	*
593	* @param a First input operand.
594	* @param b Second input operand.
595	* @param r_hi Address to store high part of the result.
596	* @param r_lo Address to store low part of the result.
597	*/
598	void mul64(uint64_t a, uint64_t b, uint64_t r_hi, uint64_t r_lo)
599	{
600	uint64_t low, high, middle1, middle2;
601	uint32_t alow, blow;
602
603	alow = a & 0xFFFFFFFF;
604	blow = b & 0xFFFFFFFF;
605
606	a >>= 32;
607	b >>= 32;
608
609	low = ((uint64_t) alow) * blow;
610	middle1 = a * blow;
611	middle2 = alow * b;
612	high = a * b;
613
614	middle1 += middle2;
615	high += (((uint64_t) (middle1 < middle2)) << 32) + (middle1 >> 32);
616	middle1 <<= 32;
617	low += middle1;
618	high += (low < middle1);
619	*r_lo = low;
620	*r_hi = high;
621	}
622
623	/**
624	* Multiplication of two 128-bit unsigned integers.
625	*
626	* @param a_hi High part of the first input operand.
627	* @param a_lo Low part of the first input operand.
628	* @param b_hi High part of the second input operand.
629	* @param b_lo Low part of the second input operand.
630	* @param r_hihi Address to store first (highest) quarter of the result.
631	* @param r_hilo Address to store second quarter of the result.
632	* @param r_lohi Address to store third quarter of the result.
633	* @param r_lolo Address to store fourth (lowest) quarter of the result.
634	*/
635	void mul128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo,
636	uint64_t r_hihi, uint64_t r_hilo, uint64_t r_lohi, uint64_t r_lolo)
637	{
638	uint64_t hihi, hilo, lohi, lolo;
639	uint64_t tmp1, tmp2;
640
641	mul64(a_lo, b_lo, &lohi, &lolo);
642	mul64(a_lo, b_hi, &hilo, &tmp2);
643	add128(hilo, tmp2, 0x0ll, lohi, &hilo, &lohi);
644	mul64(a_hi, b_hi, &hihi, &tmp1);
645	add128(hihi, tmp1, 0x0ll, hilo, &hihi, &hilo);
646	mul64(a_hi, b_lo, &tmp1, &tmp2);
647	add128(tmp1, tmp2, 0x0ll, lohi, &tmp1, &lohi);
648	add128(hihi, hilo, 0x0ll, tmp1, &hihi, &hilo);
649
650	*r_hihi = hihi;
651	*r_hilo = hilo;
652	*r_lohi = lohi;
653	*r_lolo = lolo;
654	}
655
656	/**
657	* Estimate the quotient of 128-bit unsigned divident and 64-bit unsigned
658	* divisor.
659	*
660	* @param a_hi High part of the divident.
661	* @param a_lo Low part of the divident.
662	* @param b Divisor.
663	* @return Quotient approximation.
664	*/
665	uint64_t div128est(uint64_t a_hi, uint64_t a_lo, uint64_t b)
666	{
667	uint64_t b_hi, b_lo;
668	uint64_t rem_hi, rem_lo;
669	uint64_t tmp_hi, tmp_lo;
670	uint64_t result;
671
672	if (b <= a_hi) {
673	return 0xFFFFFFFFFFFFFFFFull;
674	}
675
676	b_hi = b >> 32;
677	result = ((b_hi << 32) <= a_hi) ? (0xFFFFFFFFull << 32) : (a_hi / b_hi) << 32;
678	mul64(b, result, &tmp_hi, &tmp_lo);
679	sub128(a_hi, a_lo, tmp_hi, tmp_lo, &rem_hi, &rem_lo);
680
681	while ((int64_t) rem_hi < 0) {
682	result -= 0x1ll << 32;
683	b_lo = b << 32;
684	add128(rem_hi, rem_lo, b_hi, b_lo, &rem_hi, &rem_lo);
685	}
686
687	rem_hi = (rem_hi << 32) \| (rem_lo >> 32);
688	if ((b_hi << 32) <= rem_hi) {
689	result \|= 0xFFFFFFFF;
690	} else {
691	result \|= rem_hi / b_hi;
692	}
693
694	return result;
695	}
696
697	/** @}
698	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: