Context Navigation

source: mainline/uspace/lib/softfloat/common.c@ 22fb7ab

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 22fb7ab was c0c38c7c, checked in by Martin Decky <martin@…>, 10 years ago
software floating point overhaul use proper type mapping fix cosine calculation
Property mode set to `100644`
File size: 19.6 KB

Line
1	/*
2	* Copyright (c) 2005 Josef Cejka
3	* Copyright (c) 2011 Petr Koupy
4	* All rights reserved.
5	*
6	* Redistribution and use in source and binary forms, with or without
7	* modification, are permitted provided that the following conditions
8	* are met:
9	*
10	* - Redistributions of source code must retain the above copyright
11	* notice, this list of conditions and the following disclaimer.
12	* - Redistributions in binary form must reproduce the above copyright
13	* notice, this list of conditions and the following disclaimer in the
14	* documentation and/or other materials provided with the distribution.
15	* - The name of the author may not be used to endorse or promote products
16	* derived from this software without specific prior written permission.
17	*
18	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28	*/
29
30	/** @addtogroup softfloat
31	* @{
32	*/
33	/** @file Common helper operations.
34	*/
35
36	#include "common.h"
37
38	/* Table for fast leading zeroes counting. */
39	char zeroTable[256] = {
40	8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, \
41	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, \
42	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, \
43	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, \
44	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
45	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
46	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
47	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
48	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
49	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
50	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
51	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
52	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
53	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
54	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
55	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
56	};
57
58	/**
59	* Take fraction shifted by 10 bits to the left, round it, normalize it
60	* and detect exceptions
61	*
62	* @param cexp Exponent with bias.
63	* @param cfrac Fraction shifted 10 bits to the left with added hidden bit.
64	* @param sign Resulting sign.
65	* @return Finished double-precision float.
66	*/
67	float64 finish_float64(int32_t cexp, uint64_t cfrac, char sign)
68	{
69	float64 result;
70
71	result.parts.sign = sign;
72
73	/* find first nonzero digit and shift result and detect possibly underflow */
74	while ((cexp > 0) && (cfrac) &&
75	(!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))))) {
76	cexp--;
77	cfrac <<= 1;
78	/* TODO: fix underflow */
79	}
80
81	if ((cexp < 0) \|\| (cexp == 0 &&
82	(!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))))) {
83	/* FIXME: underflow */
84	result.parts.exp = 0;
85	if ((cexp + FLOAT64_FRACTION_SIZE + 1) < 0) { /* +1 is place for rounding */
86	result.parts.fraction = 0;
87	return result;
88	}
89
90	while (cexp < 0) {
91	cexp++;
92	cfrac >>= 1;
93	}
94
95	cfrac += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
96
97	if (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))) {
98	result.parts.fraction =
99	((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK));
100	return result;
101	}
102	} else {
103	cfrac += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
104	}
105
106	++cexp;
107
108	if (cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))) {
109	++cexp;
110	cfrac >>= 1;
111	}
112
113	/* check overflow */
114	if (cexp >= FLOAT64_MAX_EXPONENT) {
115	/* FIXME: overflow, return infinity */
116	result.parts.exp = FLOAT64_MAX_EXPONENT;
117	result.parts.fraction = 0;
118	return result;
119	}
120
121	result.parts.exp = (uint32_t) cexp;
122
123	result.parts.fraction =
124	((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK));
125
126	return result;
127	}
128
129	/**
130	* Take fraction, round it, normalize it and detect exceptions
131	*
132	* @param cexp Exponent with bias.
133	* @param cfrac_hi High part of the fraction shifted 14 bits to the left
134	* with added hidden bit.
135	* @param cfrac_lo Low part of the fraction shifted 14 bits to the left
136	* with added hidden bit.
137	* @param sign Resulting sign.
138	* @param shift_out Bits right-shifted out from fraction by the caller.
139	* @return Finished quadruple-precision float.
140	*/
141	float128 finish_float128(int32_t cexp, uint64_t cfrac_hi, uint64_t cfrac_lo,
142	char sign, uint64_t shift_out)
143	{
144	float128 result;
145	uint64_t tmp_hi, tmp_lo;
146
147	result.parts.sign = sign;
148
149	/* find first nonzero digit and shift result and detect possibly underflow */
150	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
151	1, &tmp_hi, &tmp_lo);
152	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
153	while ((cexp > 0) && (lt128(0x0ll, 0x0ll, cfrac_hi, cfrac_lo)) &&
154	(!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo))) {
155	cexp--;
156	lshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
157	/* TODO: fix underflow */
158
159	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
160	1, &tmp_hi, &tmp_lo);
161	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
162	}
163
164	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
165	1, &tmp_hi, &tmp_lo);
166	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
167	if ((cexp < 0) \|\| (cexp == 0 &&
168	(!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)))) {
169	/* FIXME: underflow */
170	result.parts.exp = 0;
171	if ((cexp + FLOAT128_FRACTION_SIZE + 1) < 0) { /* +1 is place for rounding */
172	result.parts.frac_hi = 0x0ll;
173	result.parts.frac_lo = 0x0ll;
174	return result;
175	}
176
177	while (cexp < 0) {
178	cexp++;
179	rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
180	}
181
182	if (shift_out & (0x1ull < 64)) {
183	add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo);
184	}
185
186	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
187	1, &tmp_hi, &tmp_lo);
188	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
189	if (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
190	not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
191	&tmp_hi, &tmp_lo);
192	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
193	result.parts.frac_hi = tmp_hi;
194	result.parts.frac_lo = tmp_lo;
195	return result;
196	}
197	} else {
198	if (shift_out & (0x1ull < 64)) {
199	add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo);
200	}
201	}
202
203	++cexp;
204
205	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
206	1, &tmp_hi, &tmp_lo);
207	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
208	if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
209	++cexp;
210	rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
211	}
212
213	/* check overflow */
214	if (cexp >= FLOAT128_MAX_EXPONENT) {
215	/* FIXME: overflow, return infinity */
216	result.parts.exp = FLOAT128_MAX_EXPONENT;
217	result.parts.frac_hi = 0x0ll;
218	result.parts.frac_lo = 0x0ll;
219	return result;
220	}
221
222	result.parts.exp = (uint32_t) cexp;
223
224	not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
225	&tmp_hi, &tmp_lo);
226	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
227	result.parts.frac_hi = tmp_hi;
228	result.parts.frac_lo = tmp_lo;
229
230	return result;
231	}
232
233	/**
234	* Counts leading zeroes in byte.
235	*
236	* @param i Byte for which to count leading zeroes.
237	* @return Number of detected leading zeroes.
238	*/
239	int count_zeroes8(uint8_t i)
240	{
241	return zeroTable[i];
242	}
243
244	/**
245	* Counts leading zeroes in 32bit unsigned integer.
246	*
247	* @param i Integer for which to count leading zeroes.
248	* @return Number of detected leading zeroes.
249	*/
250	int count_zeroes32(uint32_t i)
251	{
252	int j;
253	for (j = 0; j < 32; j += 8) {
254	if (i & (0xFF << (24 - j))) {
255	return (j + count_zeroes8(i >> (24 - j)));
256	}
257	}
258
259	return 32;
260	}
261
262	/**
263	* Counts leading zeroes in 64bit unsigned integer.
264	*
265	* @param i Integer for which to count leading zeroes.
266	* @return Number of detected leading zeroes.
267	*/
268	int count_zeroes64(uint64_t i)
269	{
270	int j;
271	for (j = 0; j < 64; j += 8) {
272	if (i & (0xFFll << (56 - j))) {
273	return (j + count_zeroes8(i >> (56 - j)));
274	}
275	}
276
277	return 64;
278	}
279
280	/**
281	* Round and normalize number expressed by exponent and fraction with
282	* first bit (equal to hidden bit) at 30th bit.
283	*
284	* @param exp Exponent part.
285	* @param fraction Fraction with hidden bit shifted to 30th bit.
286	*/
287	void round_float32(int32_t exp, uint32_t fraction)
288	{
289	/* rounding - if first bit after fraction is set then round up */
290	(*fraction) += (0x1 << (32 - FLOAT32_FRACTION_SIZE - 3));
291
292	if ((*fraction) &
293	(FLOAT32_HIDDEN_BIT_MASK << (32 - FLOAT32_FRACTION_SIZE - 1))) {
294	/* rounding overflow */
295	++(*exp);
296	(*fraction) >>= 1;
297	}
298
299	if (((exp) >= FLOAT32_MAX_EXPONENT) \|\| ((exp) < 0)) {
300	/* overflow - set infinity as result */
301	(*exp) = FLOAT32_MAX_EXPONENT;
302	(*fraction) = 0;
303	}
304	}
305
306	/**
307	* Round and normalize number expressed by exponent and fraction with
308	* first bit (equal to hidden bit) at bit 62.
309	*
310	* @param exp Exponent part.
311	* @param fraction Fraction with hidden bit shifted to bit 62.
312	*/
313	void round_float64(int32_t exp, uint64_t fraction)
314	{
315	/*
316	* Rounding - if first bit after fraction is set then round up.
317	*/
318
319	/*
320	* Add 1 to the least significant bit of the fraction respecting the
321	* current shift to bit 62 and see if there will be a carry to bit 63.
322	*/
323	(*fraction) += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
324
325	/* See if there was a carry to bit 63. */
326	if ((*fraction) &
327	(FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))) {
328	/* rounding overflow */
329	++(*exp);
330	(*fraction) >>= 1;
331	}
332
333	if (((exp) >= FLOAT64_MAX_EXPONENT) \|\| ((exp) < 0)) {
334	/* overflow - set infinity as result */
335	(*exp) = FLOAT64_MAX_EXPONENT;
336	(*fraction) = 0;
337	}
338	}
339
340	/**
341	* Round and normalize number expressed by exponent and fraction with
342	* first bit (equal to hidden bit) at 126th bit.
343	*
344	* @param exp Exponent part.
345	* @param frac_hi High part of fraction part with hidden bit shifted to 126th bit.
346	* @param frac_lo Low part of fraction part with hidden bit shifted to 126th bit.
347	*/
348	void round_float128(int32_t exp, uint64_t frac_hi, uint64_t *frac_lo)
349	{
350	uint64_t tmp_hi, tmp_lo;
351
352	/* rounding - if first bit after fraction is set then round up */
353	lshift128(0x0ll, 0x1ll, (128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo);
354	add128(frac_hi, frac_lo, tmp_hi, tmp_lo, frac_hi, frac_lo);
355
356	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
357	(128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo);
358	and128(frac_hi, frac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
359	if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
360	/* rounding overflow */
361	++(*exp);
362	rshift128(frac_hi, frac_lo, 1, frac_hi, frac_lo);
363	}
364
365	if (((exp) >= FLOAT128_MAX_EXPONENT) \|\| ((exp) < 0)) {
366	/* overflow - set infinity as result */
367	(*exp) = FLOAT128_MAX_EXPONENT;
368	(*frac_hi) = 0;
369	(*frac_lo) = 0;
370	}
371	}
372
373	/**
374	* Logical shift left on the 128-bit operand.
375	*
376	* @param a_hi High part of the input operand.
377	* @param a_lo Low part of the input operand.
378	* @param shift Number of bits by witch to shift.
379	* @param r_hi Address to store high part of the result.
380	* @param r_lo Address to store low part of the result.
381	*/
382	void lshift128(
383	uint64_t a_hi, uint64_t a_lo, int shift,
384	uint64_t r_hi, uint64_t r_lo)
385	{
386	if (shift <= 0) {
387	/* do nothing */
388	} else if (shift >= 128) {
389	a_hi = 0;
390	a_lo = 0;
391	} else if (shift >= 64) {
392	a_hi = a_lo << (shift - 64);
393	a_lo = 0;
394	} else {
395	a_hi <<= shift;
396	a_hi \|= a_lo >> (64 - shift);
397	a_lo <<= shift;
398	}
399
400	*r_hi = a_hi;
401	*r_lo = a_lo;
402	}
403
404	/**
405	* Logical shift right on the 128-bit operand.
406	*
407	* @param a_hi High part of the input operand.
408	* @param a_lo Low part of the input operand.
409	* @param shift Number of bits by witch to shift.
410	* @param r_hi Address to store high part of the result.
411	* @param r_lo Address to store low part of the result.
412	*/
413	void rshift128(
414	uint64_t a_hi, uint64_t a_lo, int shift,
415	uint64_t r_hi, uint64_t r_lo)
416	{
417	if (shift <= 0) {
418	/* do nothing */
419	} else if (shift >= 128) {
420	a_hi = 0;
421	a_lo = 0;
422	} else if (shift >= 64) {
423	a_lo = a_hi >> (shift - 64);
424	a_hi = 0;
425	} else {
426	a_lo >>= shift;
427	a_lo \|= a_hi << (64 - shift);
428	a_hi >>= shift;
429	}
430
431	*r_hi = a_hi;
432	*r_lo = a_lo;
433	}
434
435	/**
436	* Bitwise AND on 128-bit operands.
437	*
438	* @param a_hi High part of the first input operand.
439	* @param a_lo Low part of the first input operand.
440	* @param b_hi High part of the second input operand.
441	* @param b_lo Low part of the second input operand.
442	* @param r_hi Address to store high part of the result.
443	* @param r_lo Address to store low part of the result.
444	*/
445	void and128(
446	uint64_t a_hi, uint64_t a_lo,
447	uint64_t b_hi, uint64_t b_lo,
448	uint64_t r_hi, uint64_t r_lo)
449	{
450	*r_hi = a_hi & b_hi;
451	*r_lo = a_lo & b_lo;
452	}
453
454	/**
455	* Bitwise inclusive OR on 128-bit operands.
456	*
457	* @param a_hi High part of the first input operand.
458	* @param a_lo Low part of the first input operand.
459	* @param b_hi High part of the second input operand.
460	* @param b_lo Low part of the second input operand.
461	* @param r_hi Address to store high part of the result.
462	* @param r_lo Address to store low part of the result.
463	*/
464	void or128(
465	uint64_t a_hi, uint64_t a_lo,
466	uint64_t b_hi, uint64_t b_lo,
467	uint64_t r_hi, uint64_t r_lo)
468	{
469	*r_hi = a_hi \| b_hi;
470	*r_lo = a_lo \| b_lo;
471	}
472
473	/**
474	* Bitwise exclusive OR on 128-bit operands.
475	*
476	* @param a_hi High part of the first input operand.
477	* @param a_lo Low part of the first input operand.
478	* @param b_hi High part of the second input operand.
479	* @param b_lo Low part of the second input operand.
480	* @param r_hi Address to store high part of the result.
481	* @param r_lo Address to store low part of the result.
482	*/
483	void xor128(
484	uint64_t a_hi, uint64_t a_lo,
485	uint64_t b_hi, uint64_t b_lo,
486	uint64_t r_hi, uint64_t r_lo)
487	{
488	*r_hi = a_hi ^ b_hi;
489	*r_lo = a_lo ^ b_lo;
490	}
491
492	/**
493	* Bitwise NOT on the 128-bit operand.
494	*
495	* @param a_hi High part of the input operand.
496	* @param a_lo Low part of the input operand.
497	* @param r_hi Address to store high part of the result.
498	* @param r_lo Address to store low part of the result.
499	*/
500	void not128(
501	uint64_t a_hi, uint64_t a_lo,
502	uint64_t r_hi, uint64_t r_lo)
503	{
504	*r_hi = ~a_hi;
505	*r_lo = ~a_lo;
506	}
507
508	/**
509	* Equality comparison of 128-bit operands.
510	*
511	* @param a_hi High part of the first input operand.
512	* @param a_lo Low part of the first input operand.
513	* @param b_hi High part of the second input operand.
514	* @param b_lo Low part of the second input operand.
515	* @return 1 if operands are equal, 0 otherwise.
516	*/
517	int eq128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
518	{
519	return (a_hi == b_hi) && (a_lo == b_lo);
520	}
521
522	/**
523	* Lower-or-equal comparison of 128-bit operands.
524	*
525	* @param a_hi High part of the first input operand.
526	* @param a_lo Low part of the first input operand.
527	* @param b_hi High part of the second input operand.
528	* @param b_lo Low part of the second input operand.
529	* @return 1 if a is lower or equal to b, 0 otherwise.
530	*/
531	int le128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
532	{
533	return (a_hi < b_hi) \|\| ((a_hi == b_hi) && (a_lo <= b_lo));
534	}
535
536	/**
537	* Lower-than comparison of 128-bit operands.
538	*
539	* @param a_hi High part of the first input operand.
540	* @param a_lo Low part of the first input operand.
541	* @param b_hi High part of the second input operand.
542	* @param b_lo Low part of the second input operand.
543	* @return 1 if a is lower than b, 0 otherwise.
544	*/
545	int lt128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
546	{
547	return (a_hi < b_hi) \|\| ((a_hi == b_hi) && (a_lo < b_lo));
548	}
549
550	/**
551	* Addition of two 128-bit unsigned integers.
552	*
553	* @param a_hi High part of the first input operand.
554	* @param a_lo Low part of the first input operand.
555	* @param b_hi High part of the second input operand.
556	* @param b_lo Low part of the second input operand.
557	* @param r_hi Address to store high part of the result.
558	* @param r_lo Address to store low part of the result.
559	*/
560	void add128(uint64_t a_hi, uint64_t a_lo,
561	uint64_t b_hi, uint64_t b_lo,
562	uint64_t r_hi, uint64_t r_lo)
563	{
564	uint64_t low = a_lo + b_lo;
565	*r_lo = low;
566	/* detect overflow to add a carry */
567	*r_hi = a_hi + b_hi + (low < a_lo);
568	}
569
570	/**
571	* Substraction of two 128-bit unsigned integers.
572	*
573	* @param a_hi High part of the first input operand.
574	* @param a_lo Low part of the first input operand.
575	* @param b_hi High part of the second input operand.
576	* @param b_lo Low part of the second input operand.
577	* @param r_hi Address to store high part of the result.
578	* @param r_lo Address to store low part of the result.
579	*/
580	void sub128(uint64_t a_hi, uint64_t a_lo,
581	uint64_t b_hi, uint64_t b_lo,
582	uint64_t r_hi, uint64_t r_lo)
583	{
584	*r_lo = a_lo - b_lo;
585	/* detect underflow to substract a carry */
586	*r_hi = a_hi - b_hi - (a_lo < b_lo);
587	}
588
589	/**
590	* Multiplication of two 64-bit unsigned integers.
591	*
592	* @param a First input operand.
593	* @param b Second input operand.
594	* @param r_hi Address to store high part of the result.
595	* @param r_lo Address to store low part of the result.
596	*/
597	void mul64(uint64_t a, uint64_t b, uint64_t r_hi, uint64_t r_lo)
598	{
599	uint64_t low, high, middle1, middle2;
600	uint32_t alow, blow;
601
602	alow = a & 0xFFFFFFFF;
603	blow = b & 0xFFFFFFFF;
604
605	a >>= 32;
606	b >>= 32;
607
608	low = ((uint64_t) alow) * blow;
609	middle1 = a * blow;
610	middle2 = alow * b;
611	high = a * b;
612
613	middle1 += middle2;
614	high += (((uint64_t) (middle1 < middle2)) << 32) + (middle1 >> 32);
615	middle1 <<= 32;
616	low += middle1;
617	high += (low < middle1);
618	*r_lo = low;
619	*r_hi = high;
620	}
621
622	/**
623	* Multiplication of two 128-bit unsigned integers.
624	*
625	* @param a_hi High part of the first input operand.
626	* @param a_lo Low part of the first input operand.
627	* @param b_hi High part of the second input operand.
628	* @param b_lo Low part of the second input operand.
629	* @param r_hihi Address to store first (highest) quarter of the result.
630	* @param r_hilo Address to store second quarter of the result.
631	* @param r_lohi Address to store third quarter of the result.
632	* @param r_lolo Address to store fourth (lowest) quarter of the result.
633	*/
634	void mul128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo,
635	uint64_t r_hihi, uint64_t r_hilo, uint64_t r_lohi, uint64_t r_lolo)
636	{
637	uint64_t hihi, hilo, lohi, lolo;
638	uint64_t tmp1, tmp2;
639
640	mul64(a_lo, b_lo, &lohi, &lolo);
641	mul64(a_lo, b_hi, &hilo, &tmp2);
642	add128(hilo, tmp2, 0x0ll, lohi, &hilo, &lohi);
643	mul64(a_hi, b_hi, &hihi, &tmp1);
644	add128(hihi, tmp1, 0x0ll, hilo, &hihi, &hilo);
645	mul64(a_hi, b_lo, &tmp1, &tmp2);
646	add128(tmp1, tmp2, 0x0ll, lohi, &tmp1, &lohi);
647	add128(hihi, hilo, 0x0ll, tmp1, &hihi, &hilo);
648
649	*r_hihi = hihi;
650	*r_hilo = hilo;
651	*r_lohi = lohi;
652	*r_lolo = lolo;
653	}
654
655	/**
656	* Estimate the quotient of 128-bit unsigned divident and 64-bit unsigned
657	* divisor.
658	*
659	* @param a_hi High part of the divident.
660	* @param a_lo Low part of the divident.
661	* @param b Divisor.
662	* @return Quotient approximation.
663	*/
664	uint64_t div128est(uint64_t a_hi, uint64_t a_lo, uint64_t b)
665	{
666	uint64_t b_hi, b_lo;
667	uint64_t rem_hi, rem_lo;
668	uint64_t tmp_hi, tmp_lo;
669	uint64_t result;
670
671	if (b <= a_hi) {
672	return 0xFFFFFFFFFFFFFFFFull;
673	}
674
675	b_hi = b >> 32;
676	result = ((b_hi << 32) <= a_hi) ? (0xFFFFFFFFull << 32) : (a_hi / b_hi) << 32;
677	mul64(b, result, &tmp_hi, &tmp_lo);
678	sub128(a_hi, a_lo, tmp_hi, tmp_lo, &rem_hi, &rem_lo);
679
680	while ((int64_t) rem_hi < 0) {
681	result -= 0x1ll << 32;
682	b_lo = b << 32;
683	add128(rem_hi, rem_lo, b_hi, b_lo, &rem_hi, &rem_lo);
684	}
685
686	rem_hi = (rem_hi << 32) \| (rem_lo >> 32);
687	if ((b_hi << 32) <= rem_hi) {
688	result \|= 0xFFFFFFFF;
689	} else {
690	result \|= rem_hi / b_hi;
691	}
692
693	return result;
694	}
695
696	/** @}
697	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: