Context Navigation

common.c@ c67aff2

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since c67aff2 was c67aff2, checked in by Petr Koupy <petr.koupy@…>, 14 years ago

Quadruple-precision softfloat, coding style improvements. Details below…

Highlights:

completed double-precision support
added quadruple-precision support
added SPARC quadruple-precision wrappers
added doxygen comments
corrected and unified coding style

Current state of the softfloat library:

Support for single, double and quadruple precision is currently almost complete (apart from power, square root, complex multiplication and complex division) and provides the same set of features (i.e. the support for all three precisions is now aligned). In order to extend softfloat library consistently, addition of quadruple precision was done in the same spirit as already existing single and double precision written by Josef Cejka in 2006 - that is relaxed standard-compliance for corner cases while mission-critical code sections heavily inspired by the widely used softfloat library written by John R. Hauser (although I personally think it would be more appropriate for HelenOS to use something less optimized, shorter and more readable).

Most of the quadruple-precision code is just an adapted double-precision code to work on 128-bit variables. That means if there is TODO, FIXME or some defect in single or double-precision code, it is most likely also in the quadruple-precision code. Please note that quadruple-precision functions are currently not tested - it is challenging task for itself, especially when the ports that use them are either not finished (mips64) or badly supported by simulators (sparc64). To test whole softfloat library, one would probably have to either write very non-trivial native tester, or use some existing one (e.g. TestFloat from J. R. Hauser) and port it to HelenOS (or rip the softfloat library out of HelenOS and test it on a host system). At the time of writing this, the code dependent on quadruple-precision functions (on mips64 and sparc64) is just a libposix strtold() function (and its callers, most notably scanf backend).

Property mode set to 100644

File size: 19.4 KB

Line
1	/*
2	* Copyright (c) 2005 Josef Cejka
3	* Copyright (c) 2011 Petr Koupy
4	* All rights reserved.
5	*
6	* Redistribution and use in source and binary forms, with or without
7	* modification, are permitted provided that the following conditions
8	* are met:
9	*
10	* - Redistributions of source code must retain the above copyright
11	* notice, this list of conditions and the following disclaimer.
12	* - Redistributions in binary form must reproduce the above copyright
13	* notice, this list of conditions and the following disclaimer in the
14	* documentation and/or other materials provided with the distribution.
15	* - The name of the author may not be used to endorse or promote products
16	* derived from this software without specific prior written permission.
17	*
18	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28	*/
29
30	/** @addtogroup softfloat
31	* @{
32	*/
33	/** @file Common helper operations.
34	*/
35
36	#include <sftypes.h>
37	#include <common.h>
38
39	/* Table for fast leading zeroes counting. */
40	char zeroTable[256] = {
41	8, 7, 7, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, \
42	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, \
43	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, \
44	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, \
45	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
46	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
47	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
48	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
49	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
50	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
51	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
52	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
53	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
54	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
55	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
56	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
57	};
58
59	/**
60	* Take fraction shifted by 10 bits to the left, round it, normalize it
61	* and detect exceptions
62	*
63	* @param cexp Exponent with bias.
64	* @param cfrac Fraction shifted 10 bits to the left with added hidden bit.
65	* @param sign Resulting sign.
66	* @return Finished double-precision float.
67	*/
68	float64 finishFloat64(int32_t cexp, uint64_t cfrac, char sign)
69	{
70	float64 result;
71
72	result.parts.sign = sign;
73
74	/* find first nonzero digit and shift result and detect possibly underflow */
75	while ((cexp > 0) && (cfrac) &&
76	(!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))))) {
77	cexp--;
78	cfrac <<= 1;
79	/* TODO: fix underflow */
80	}
81
82	if ((cexp < 0) \|\| (cexp == 0 &&
83	(!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))))) {
84	/* FIXME: underflow */
85	result.parts.exp = 0;
86	if ((cexp + FLOAT64_FRACTION_SIZE + 1) < 0) { /* +1 is place for rounding */
87	result.parts.fraction = 0;
88	return result;
89	}
90
91	while (cexp < 0) {
92	cexp++;
93	cfrac >>= 1;
94	}
95
96	cfrac += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
97
98	if (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))) {
99	result.parts.fraction =
100	((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK));
101	return result;
102	}
103	} else {
104	cfrac += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
105	}
106
107	++cexp;
108
109	if (cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))) {
110	++cexp;
111	cfrac >>= 1;
112	}
113
114	/* check overflow */
115	if (cexp >= FLOAT64_MAX_EXPONENT) {
116	/* FIXME: overflow, return infinity */
117	result.parts.exp = FLOAT64_MAX_EXPONENT;
118	result.parts.fraction = 0;
119	return result;
120	}
121
122	result.parts.exp = (uint32_t) cexp;
123
124	result.parts.fraction =
125	((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK));
126
127	return result;
128	}
129
130	/**
131	* Take fraction, round it, normalize it and detect exceptions
132	*
133	* @param cexp Exponent with bias.
134	* @param cfrac_hi High part of the fraction shifted 14 bits to the left
135	* with added hidden bit.
136	* @param cfrac_lo Low part of the fraction shifted 14 bits to the left
137	* with added hidden bit.
138	* @param sign Resulting sign.
139	* @param shift_out Bits right-shifted out from fraction by the caller.
140	* @return Finished quadruple-precision float.
141	*/
142	float128 finishFloat128(int32_t cexp, uint64_t cfrac_hi, uint64_t cfrac_lo,
143	char sign, uint64_t shift_out)
144	{
145	float128 result;
146	uint64_t tmp_hi, tmp_lo;
147
148	result.parts.sign = sign;
149
150	/* find first nonzero digit and shift result and detect possibly underflow */
151	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
152	1, &tmp_hi, &tmp_lo);
153	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
154	while ((cexp > 0) && (lt128(0x0ll, 0x0ll, cfrac_hi, cfrac_lo)) &&
155	(!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo))) {
156	cexp--;
157	lshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
158	/* TODO: fix underflow */
159
160	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
161	1, &tmp_hi, &tmp_lo);
162	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
163	}
164
165	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
166	1, &tmp_hi, &tmp_lo);
167	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
168	if ((cexp < 0) \|\| (cexp == 0 &&
169	(!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)))) {
170	/* FIXME: underflow */
171	result.parts.exp = 0;
172	if ((cexp + FLOAT128_FRACTION_SIZE + 1) < 0) { /* +1 is place for rounding */
173	result.parts.frac_hi = 0x0ll;
174	result.parts.frac_lo = 0x0ll;
175	return result;
176	}
177
178	while (cexp < 0) {
179	cexp++;
180	rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
181	}
182
183	if (shift_out & (0x1ull < 64)) {
184	add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo);
185	}
186
187	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
188	1, &tmp_hi, &tmp_lo);
189	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
190	if (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
191	not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
192	&tmp_hi, &tmp_lo);
193	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
194	result.parts.frac_hi = tmp_hi;
195	result.parts.frac_lo = tmp_lo;
196	return result;
197	}
198	} else {
199	if (shift_out & (0x1ull < 64)) {
200	add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo);
201	}
202	}
203
204	++cexp;
205
206	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
207	1, &tmp_hi, &tmp_lo);
208	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
209	if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
210	++cexp;
211	rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
212	}
213
214	/* check overflow */
215	if (cexp >= FLOAT128_MAX_EXPONENT) {
216	/* FIXME: overflow, return infinity */
217	result.parts.exp = FLOAT128_MAX_EXPONENT;
218	result.parts.frac_hi = 0x0ll;
219	result.parts.frac_lo = 0x0ll;
220	return result;
221	}
222
223	result.parts.exp = (uint32_t) cexp;
224
225	not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
226	&tmp_hi, &tmp_lo);
227	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
228	result.parts.frac_hi = tmp_hi;
229	result.parts.frac_lo = tmp_lo;
230
231	return result;
232	}
233
234	/**
235	* Counts leading zeroes in byte.
236	*
237	* @param i Byte for which to count leading zeroes.
238	* @return Number of detected leading zeroes.
239	*/
240	int countZeroes8(uint8_t i)
241	{
242	return zeroTable[i];
243	}
244
245	/**
246	* Counts leading zeroes in 32bit unsigned integer.
247	*
248	* @param i Integer for which to count leading zeroes.
249	* @return Number of detected leading zeroes.
250	*/
251	int countZeroes32(uint32_t i)
252	{
253	int j;
254	for (j = 0; j < 32; j += 8) {
255	if (i & (0xFF << (24 - j))) {
256	return (j + countZeroes8(i >> (24 - j)));
257	}
258	}
259
260	return 32;
261	}
262
263	/**
264	* Counts leading zeroes in 64bit unsigned integer.
265	*
266	* @param i Integer for which to count leading zeroes.
267	* @return Number of detected leading zeroes.
268	*/
269	int countZeroes64(uint64_t i)
270	{
271	int j;
272	for (j = 0; j < 64; j += 8) {
273	if (i & (0xFFll << (56 - j))) {
274	return (j + countZeroes8(i >> (56 - j)));
275	}
276	}
277
278	return 64;
279	}
280
281	/**
282	* Round and normalize number expressed by exponent and fraction with
283	* first bit (equal to hidden bit) at 30th bit.
284	*
285	* @param exp Exponent part.
286	* @param fraction Fraction with hidden bit shifted to 30th bit.
287	*/
288	void roundFloat32(int32_t exp, uint32_t fraction)
289	{
290	/* rounding - if first bit after fraction is set then round up */
291	(*fraction) += (0x1 << (32 - FLOAT32_FRACTION_SIZE - 3));
292
293	if ((*fraction) &
294	(FLOAT32_HIDDEN_BIT_MASK << (32 - FLOAT32_FRACTION_SIZE - 1))) {
295	/* rounding overflow */
296	++(*exp);
297	(*fraction) >>= 1;
298	}
299
300	if (((exp) >= FLOAT32_MAX_EXPONENT) \|\| ((exp) < 0)) {
301	/* overflow - set infinity as result */
302	(*exp) = FLOAT32_MAX_EXPONENT;
303	(*fraction) = 0;
304	}
305	}
306
307	/**
308	* Round and normalize number expressed by exponent and fraction with
309	* first bit (equal to hidden bit) at 62nd bit.
310	*
311	* @param exp Exponent part.
312	* @param fraction Fraction with hidden bit shifted to 62nd bit.
313	*/
314	void roundFloat64(int32_t exp, uint64_t fraction)
315	{
316	/* rounding - if first bit after fraction is set then round up */
317	(*fraction) += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
318
319	if ((*fraction) &
320	(FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 3))) {
321	/* rounding overflow */
322	++(*exp);
323	(*fraction) >>= 1;
324	}
325
326	if (((exp) >= FLOAT64_MAX_EXPONENT) \|\| ((exp) < 0)) {
327	/* overflow - set infinity as result */
328	(*exp) = FLOAT64_MAX_EXPONENT;
329	(*fraction) = 0;
330	}
331	}
332
333	/**
334	* Round and normalize number expressed by exponent and fraction with
335	* first bit (equal to hidden bit) at 126th bit.
336	*
337	* @param exp Exponent part.
338	* @param frac_hi High part of fraction part with hidden bit shifted to 126th bit.
339	* @param frac_lo Low part of fraction part with hidden bit shifted to 126th bit.
340	*/
341	void roundFloat128(int32_t exp, uint64_t frac_hi, uint64_t *frac_lo)
342	{
343	uint64_t tmp_hi, tmp_lo;
344
345	/* rounding - if first bit after fraction is set then round up */
346	lshift128(0x0ll, 0x1ll, (128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo);
347	add128(frac_hi, frac_lo, tmp_hi, tmp_lo, frac_hi, frac_lo);
348
349	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
350	(128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo);
351	and128(frac_hi, frac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
352	if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
353	/* rounding overflow */
354	++(*exp);
355	rshift128(frac_hi, frac_lo, 1, frac_hi, frac_lo);
356	}
357
358	if (((exp) >= FLOAT128_MAX_EXPONENT) \|\| ((exp) < 0)) {
359	/* overflow - set infinity as result */
360	(*exp) = FLOAT128_MAX_EXPONENT;
361	(*frac_hi) = 0;
362	(*frac_lo) = 0;
363	}
364	}
365
366	/**
367	* Logical shift left on the 128-bit operand.
368	*
369	* @param a_hi High part of the input operand.
370	* @param a_lo Low part of the input operand.
371	* @param shift Number of bits by witch to shift.
372	* @param r_hi Address to store high part of the result.
373	* @param r_lo Address to store low part of the result.
374	*/
375	void lshift128(
376	uint64_t a_hi, uint64_t a_lo, int shift,
377	uint64_t r_hi, uint64_t r_lo)
378	{
379	if (shift <= 0) {
380	/* do nothing */
381	} else if (shift >= 128) {
382	a_hi = 0;
383	a_lo = 0;
384	} else if (shift >= 64) {
385	a_hi = a_lo << (shift - 64);
386	a_lo = 0;
387	} else {
388	a_hi <<= shift;
389	a_hi \|= a_lo >> (64 - shift);
390	a_lo <<= shift;
391	}
392
393	*r_hi = a_hi;
394	*r_lo = a_lo;
395	}
396
397	/**
398	* Logical shift right on the 128-bit operand.
399	*
400	* @param a_hi High part of the input operand.
401	* @param a_lo Low part of the input operand.
402	* @param shift Number of bits by witch to shift.
403	* @param r_hi Address to store high part of the result.
404	* @param r_lo Address to store low part of the result.
405	*/
406	void rshift128(
407	uint64_t a_hi, uint64_t a_lo, int shift,
408	uint64_t r_hi, uint64_t r_lo)
409	{
410	if (shift <= 0) {
411	/* do nothing */
412	} else if (shift >= 128) {
413	a_hi = 0;
414	a_lo = 0;
415	} else if (shift >= 64) {
416	a_lo = a_hi >> (shift - 64);
417	a_hi = 0;
418	} else {
419	a_lo >>= shift;
420	a_lo \|= a_hi << (64 - shift);
421	a_hi >>= shift;
422	}
423
424	*r_hi = a_hi;
425	*r_lo = a_lo;
426	}
427
428	/**
429	* Bitwise AND on 128-bit operands.
430	*
431	* @param a_hi High part of the first input operand.
432	* @param a_lo Low part of the first input operand.
433	* @param b_hi High part of the second input operand.
434	* @param b_lo Low part of the second input operand.
435	* @param r_hi Address to store high part of the result.
436	* @param r_lo Address to store low part of the result.
437	*/
438	void and128(
439	uint64_t a_hi, uint64_t a_lo,
440	uint64_t b_hi, uint64_t b_lo,
441	uint64_t r_hi, uint64_t r_lo)
442	{
443	*r_hi = a_hi & b_hi;
444	*r_lo = a_lo & b_lo;
445	}
446
447	/**
448	* Bitwise inclusive OR on 128-bit operands.
449	*
450	* @param a_hi High part of the first input operand.
451	* @param a_lo Low part of the first input operand.
452	* @param b_hi High part of the second input operand.
453	* @param b_lo Low part of the second input operand.
454	* @param r_hi Address to store high part of the result.
455	* @param r_lo Address to store low part of the result.
456	*/
457	void or128(
458	uint64_t a_hi, uint64_t a_lo,
459	uint64_t b_hi, uint64_t b_lo,
460	uint64_t r_hi, uint64_t r_lo)
461	{
462	*r_hi = a_hi \| b_hi;
463	*r_lo = a_lo \| b_lo;
464	}
465
466	/**
467	* Bitwise exclusive OR on 128-bit operands.
468	*
469	* @param a_hi High part of the first input operand.
470	* @param a_lo Low part of the first input operand.
471	* @param b_hi High part of the second input operand.
472	* @param b_lo Low part of the second input operand.
473	* @param r_hi Address to store high part of the result.
474	* @param r_lo Address to store low part of the result.
475	*/
476	void xor128(
477	uint64_t a_hi, uint64_t a_lo,
478	uint64_t b_hi, uint64_t b_lo,
479	uint64_t r_hi, uint64_t r_lo)
480	{
481	*r_hi = a_hi ^ b_hi;
482	*r_lo = a_lo ^ b_lo;
483	}
484
485	/**
486	* Bitwise NOT on the 128-bit operand.
487	*
488	* @param a_hi High part of the input operand.
489	* @param a_lo Low part of the input operand.
490	* @param r_hi Address to store high part of the result.
491	* @param r_lo Address to store low part of the result.
492	*/
493	void not128(
494	uint64_t a_hi, uint64_t a_lo,
495	uint64_t r_hi, uint64_t r_lo)
496	{
497	*r_hi = ~a_hi;
498	*r_lo = ~a_lo;
499	}
500
501	/**
502	* Equality comparison of 128-bit operands.
503	*
504	* @param a_hi High part of the first input operand.
505	* @param a_lo Low part of the first input operand.
506	* @param b_hi High part of the second input operand.
507	* @param b_lo Low part of the second input operand.
508	* @return 1 if operands are equal, 0 otherwise.
509	*/
510	int eq128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
511	{
512	return (a_hi == b_hi) && (a_lo == b_lo);
513	}
514
515	/**
516	* Lower-or-equal comparison of 128-bit operands.
517	*
518	* @param a_hi High part of the first input operand.
519	* @param a_lo Low part of the first input operand.
520	* @param b_hi High part of the second input operand.
521	* @param b_lo Low part of the second input operand.
522	* @return 1 if a is lower or equal to b, 0 otherwise.
523	*/
524	int le128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
525	{
526	return (a_hi < b_hi) \|\| ((a_hi == b_hi) && (a_lo <= b_lo));
527	}
528
529	/**
530	* Lower-than comparison of 128-bit operands.
531	*
532	* @param a_hi High part of the first input operand.
533	* @param a_lo Low part of the first input operand.
534	* @param b_hi High part of the second input operand.
535	* @param b_lo Low part of the second input operand.
536	* @return 1 if a is lower than b, 0 otherwise.
537	*/
538	int lt128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
539	{
540	return (a_hi < b_hi) \|\| ((a_hi == b_hi) && (a_lo < b_lo));
541	}
542
543	/**
544	* Addition of two 128-bit unsigned integers.
545	*
546	* @param a_hi High part of the first input operand.
547	* @param a_lo Low part of the first input operand.
548	* @param b_hi High part of the second input operand.
549	* @param b_lo Low part of the second input operand.
550	* @param r_hi Address to store high part of the result.
551	* @param r_lo Address to store low part of the result.
552	*/
553	void add128(uint64_t a_hi, uint64_t a_lo,
554	uint64_t b_hi, uint64_t b_lo,
555	uint64_t r_hi, uint64_t r_lo)
556	{
557	uint64_t low = a_lo + b_lo;
558	*r_lo = low;
559	/* detect overflow to add a carry */
560	*r_hi = a_hi + b_hi + (low < a_lo);
561	}
562
563	/**
564	* Substraction of two 128-bit unsigned integers.
565	*
566	* @param a_hi High part of the first input operand.
567	* @param a_lo Low part of the first input operand.
568	* @param b_hi High part of the second input operand.
569	* @param b_lo Low part of the second input operand.
570	* @param r_hi Address to store high part of the result.
571	* @param r_lo Address to store low part of the result.
572	*/
573	void sub128(uint64_t a_hi, uint64_t a_lo,
574	uint64_t b_hi, uint64_t b_lo,
575	uint64_t r_hi, uint64_t r_lo)
576	{
577	*r_lo = a_lo - b_lo;
578	/* detect underflow to substract a carry */
579	*r_hi = a_hi - b_hi - (a_lo < b_lo);
580	}
581
582	/**
583	* Multiplication of two 64-bit unsigned integers.
584	*
585	* @param a First input operand.
586	* @param b Second input operand.
587	* @param r_hi Address to store high part of the result.
588	* @param r_lo Address to store low part of the result.
589	*/
590	void mul64(uint64_t a, uint64_t b, uint64_t r_hi, uint64_t r_lo)
591	{
592	uint64_t low, high, middle1, middle2;
593	uint32_t alow, blow;
594
595	alow = a & 0xFFFFFFFF;
596	blow = b & 0xFFFFFFFF;
597
598	a >>= 32;
599	b >>= 32;
600
601	low = ((uint64_t) alow) * blow;
602	middle1 = a * blow;
603	middle2 = alow * b;
604	high = a * b;
605
606	middle1 += middle2;
607	high += (((uint64_t) (middle1 < middle2)) << 32) + (middle1 >> 32);
608	middle1 <<= 32;
609	low += middle1;
610	high += (low < middle1);
611	*r_lo = low;
612	*r_hi = high;
613	}
614
615	/**
616	* Multiplication of two 128-bit unsigned integers.
617	*
618	* @param a_hi High part of the first input operand.
619	* @param a_lo Low part of the first input operand.
620	* @param b_hi High part of the second input operand.
621	* @param b_lo Low part of the second input operand.
622	* @param r_hihi Address to store first (highest) quarter of the result.
623	* @param r_hilo Address to store second quarter of the result.
624	* @param r_lohi Address to store third quarter of the result.
625	* @param r_lolo Address to store fourth (lowest) quarter of the result.
626	*/
627	void mul128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo,
628	uint64_t r_hihi, uint64_t r_hilo, uint64_t r_lohi, uint64_t r_lolo)
629	{
630	uint64_t hihi, hilo, lohi, lolo;
631	uint64_t tmp1, tmp2;
632
633	mul64(a_lo, b_lo, &lohi, &lolo);
634	mul64(a_lo, b_hi, &hilo, &tmp2);
635	add128(hilo, tmp2, 0x0ll, lohi, &hilo, &lohi);
636	mul64(a_hi, b_hi, &hihi, &tmp1);
637	add128(hihi, tmp1, 0x0ll, hilo, &hihi, &hilo);
638	mul64(a_hi, b_lo, &tmp1, &tmp2);
639	add128(tmp1, tmp2, 0x0ll, lohi, &tmp1, &lohi);
640	add128(hihi, hilo, 0x0ll, tmp1, &hihi, &hilo);
641
642	*r_hihi = hihi;
643	*r_hilo = hilo;
644	*r_lohi = lohi;
645	*r_lolo = lolo;
646	}
647
648	/**
649	* Estimate the quotient of 128-bit unsigned divident and 64-bit unsigned
650	* divisor.
651	*
652	* @param a_hi High part of the divident.
653	* @param a_lo Low part of the divident.
654	* @param b Divisor.
655	* @return Quotient approximation.
656	*/
657	uint64_t div128est(uint64_t a_hi, uint64_t a_lo, uint64_t b)
658	{
659	uint64_t b_hi, b_lo;
660	uint64_t rem_hi, rem_lo;
661	uint64_t tmp_hi, tmp_lo;
662	uint64_t result;
663
664	if (b <= a_hi) {
665	return 0xFFFFFFFFFFFFFFFFull;
666	}
667
668	b_hi = b >> 32;
669	result = ((b_hi << 32) <= a_hi) ? (0xFFFFFFFFull << 32) : (a_hi / b_hi) << 32;
670	mul64(b, result, &tmp_hi, &tmp_lo);
671	sub128(a_hi, a_lo, tmp_hi, tmp_lo, &rem_hi, &rem_lo);
672
673	while ((int64_t) rem_hi < 0) {
674	result -= 0x1ll << 32;
675	b_lo = b << 32;
676	add128(rem_hi, rem_lo, b_hi, b_lo, &rem_hi, &rem_lo);
677	}
678
679	rem_hi = (rem_hi << 32) \| (rem_lo >> 32);
680	if ((b_hi << 32) <= rem_hi) {
681	result \|= 0xFFFFFFFF;
682	} else {
683	result \|= rem_hi / b_hi;
684	}
685
686	return result;
687	}
688
689	/** @}
690	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/uspace/lib/softfloat/generic/common.c@ c67aff2

Download in other formats: