Context Navigation

common.c@ 41455a22

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 41455a22 was 88d5c1e, checked in by Martin Decky <martin@…>, 13 years ago

softfloat redesign

avoid hardwired type sizes, actual sizes are determined at compile-time
add basic support for x87 extended-precision data types (stored as 96bit long double)
a lot of coding style changes (removal of CamelCase, etc.)

Property mode set to 100644

File size: 19.4 KB

Line
1	/*
2	* Copyright (c) 2005 Josef Cejka
3	* Copyright (c) 2011 Petr Koupy
4	* All rights reserved.
5	*
6	* Redistribution and use in source and binary forms, with or without
7	* modification, are permitted provided that the following conditions
8	* are met:
9	*
10	* - Redistributions of source code must retain the above copyright
11	* notice, this list of conditions and the following disclaimer.
12	* - Redistributions in binary form must reproduce the above copyright
13	* notice, this list of conditions and the following disclaimer in the
14	* documentation and/or other materials provided with the distribution.
15	* - The name of the author may not be used to endorse or promote products
16	* derived from this software without specific prior written permission.
17	*
18	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28	*/
29
30	/** @addtogroup softfloat
31	* @{
32	*/
33	/** @file Common helper operations.
34	*/
35
36	#include <sftypes.h>
37	#include <common.h>
38
39	/* Table for fast leading zeroes counting. */
40	char zeroTable[256] = {
41	8, 7, 7, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, \
42	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, \
43	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, \
44	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, \
45	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
46	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
47	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
48	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
49	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
50	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
51	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
52	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
53	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
54	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
55	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
56	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
57	};
58
59	/**
60	* Take fraction shifted by 10 bits to the left, round it, normalize it
61	* and detect exceptions
62	*
63	* @param cexp Exponent with bias.
64	* @param cfrac Fraction shifted 10 bits to the left with added hidden bit.
65	* @param sign Resulting sign.
66	* @return Finished double-precision float.
67	*/
68	float64 finish_float64(int32_t cexp, uint64_t cfrac, char sign)
69	{
70	float64 result;
71
72	result.parts.sign = sign;
73
74	/* find first nonzero digit and shift result and detect possibly underflow */
75	while ((cexp > 0) && (cfrac) &&
76	(!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))))) {
77	cexp--;
78	cfrac <<= 1;
79	/* TODO: fix underflow */
80	}
81
82	if ((cexp < 0) \|\| (cexp == 0 &&
83	(!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))))) {
84	/* FIXME: underflow */
85	result.parts.exp = 0;
86	if ((cexp + FLOAT64_FRACTION_SIZE + 1) < 0) { /* +1 is place for rounding */
87	result.parts.fraction = 0;
88	return result;
89	}
90
91	while (cexp < 0) {
92	cexp++;
93	cfrac >>= 1;
94	}
95
96	cfrac += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
97
98	if (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))) {
99	result.parts.fraction =
100	((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK));
101	return result;
102	}
103	} else {
104	cfrac += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
105	}
106
107	++cexp;
108
109	if (cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))) {
110	++cexp;
111	cfrac >>= 1;
112	}
113
114	/* check overflow */
115	if (cexp >= FLOAT64_MAX_EXPONENT) {
116	/* FIXME: overflow, return infinity */
117	result.parts.exp = FLOAT64_MAX_EXPONENT;
118	result.parts.fraction = 0;
119	return result;
120	}
121
122	result.parts.exp = (uint32_t) cexp;
123
124	result.parts.fraction =
125	((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK));
126
127	return result;
128	}
129
130	/**
131	* Take fraction, round it, normalize it and detect exceptions
132	*
133	* @param cexp Exponent with bias.
134	* @param cfrac_hi High part of the fraction shifted 14 bits to the left
135	* with added hidden bit.
136	* @param cfrac_lo Low part of the fraction shifted 14 bits to the left
137	* with added hidden bit.
138	* @param sign Resulting sign.
139	* @param shift_out Bits right-shifted out from fraction by the caller.
140	* @return Finished quadruple-precision float.
141	*/
142	float128 finish_float128(int32_t cexp, uint64_t cfrac_hi, uint64_t cfrac_lo,
143	char sign, uint64_t shift_out)
144	{
145	float128 result;
146	uint64_t tmp_hi, tmp_lo;
147
148	result.parts.sign = sign;
149
150	/* find first nonzero digit and shift result and detect possibly underflow */
151	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
152	1, &tmp_hi, &tmp_lo);
153	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
154	while ((cexp > 0) && (lt128(0x0ll, 0x0ll, cfrac_hi, cfrac_lo)) &&
155	(!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo))) {
156	cexp--;
157	lshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
158	/* TODO: fix underflow */
159
160	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
161	1, &tmp_hi, &tmp_lo);
162	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
163	}
164
165	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
166	1, &tmp_hi, &tmp_lo);
167	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
168	if ((cexp < 0) \|\| (cexp == 0 &&
169	(!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)))) {
170	/* FIXME: underflow */
171	result.parts.exp = 0;
172	if ((cexp + FLOAT128_FRACTION_SIZE + 1) < 0) { /* +1 is place for rounding */
173	result.parts.frac_hi = 0x0ll;
174	result.parts.frac_lo = 0x0ll;
175	return result;
176	}
177
178	while (cexp < 0) {
179	cexp++;
180	rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
181	}
182
183	if (shift_out & (0x1ull < 64)) {
184	add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo);
185	}
186
187	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
188	1, &tmp_hi, &tmp_lo);
189	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
190	if (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
191	not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
192	&tmp_hi, &tmp_lo);
193	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
194	result.parts.frac_hi = tmp_hi;
195	result.parts.frac_lo = tmp_lo;
196	return result;
197	}
198	} else {
199	if (shift_out & (0x1ull < 64)) {
200	add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo);
201	}
202	}
203
204	++cexp;
205
206	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
207	1, &tmp_hi, &tmp_lo);
208	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
209	if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
210	++cexp;
211	rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
212	}
213
214	/* check overflow */
215	if (cexp >= FLOAT128_MAX_EXPONENT) {
216	/* FIXME: overflow, return infinity */
217	result.parts.exp = FLOAT128_MAX_EXPONENT;
218	result.parts.frac_hi = 0x0ll;
219	result.parts.frac_lo = 0x0ll;
220	return result;
221	}
222
223	result.parts.exp = (uint32_t) cexp;
224
225	not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
226	&tmp_hi, &tmp_lo);
227	and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
228	result.parts.frac_hi = tmp_hi;
229	result.parts.frac_lo = tmp_lo;
230
231	return result;
232	}
233
234	/**
235	* Counts leading zeroes in byte.
236	*
237	* @param i Byte for which to count leading zeroes.
238	* @return Number of detected leading zeroes.
239	*/
240	int count_zeroes8(uint8_t i)
241	{
242	return zeroTable[i];
243	}
244
245	/**
246	* Counts leading zeroes in 32bit unsigned integer.
247	*
248	* @param i Integer for which to count leading zeroes.
249	* @return Number of detected leading zeroes.
250	*/
251	int count_zeroes32(uint32_t i)
252	{
253	int j;
254	for (j = 0; j < 32; j += 8) {
255	if (i & (0xFF << (24 - j))) {
256	return (j + count_zeroes8(i >> (24 - j)));
257	}
258	}
259
260	return 32;
261	}
262
263	/**
264	* Counts leading zeroes in 64bit unsigned integer.
265	*
266	* @param i Integer for which to count leading zeroes.
267	* @return Number of detected leading zeroes.
268	*/
269	int count_zeroes64(uint64_t i)
270	{
271	int j;
272	for (j = 0; j < 64; j += 8) {
273	if (i & (0xFFll << (56 - j))) {
274	return (j + count_zeroes8(i >> (56 - j)));
275	}
276	}
277
278	return 64;
279	}
280
281	/**
282	* Round and normalize number expressed by exponent and fraction with
283	* first bit (equal to hidden bit) at 30th bit.
284	*
285	* @param exp Exponent part.
286	* @param fraction Fraction with hidden bit shifted to 30th bit.
287	*/
288	void round_float32(int32_t exp, uint32_t fraction)
289	{
290	/* rounding - if first bit after fraction is set then round up */
291	(*fraction) += (0x1 << (32 - FLOAT32_FRACTION_SIZE - 3));
292
293	if ((*fraction) &
294	(FLOAT32_HIDDEN_BIT_MASK << (32 - FLOAT32_FRACTION_SIZE - 1))) {
295	/* rounding overflow */
296	++(*exp);
297	(*fraction) >>= 1;
298	}
299
300	if (((exp) >= FLOAT32_MAX_EXPONENT) \|\| ((exp) < 0)) {
301	/* overflow - set infinity as result */
302	(*exp) = FLOAT32_MAX_EXPONENT;
303	(*fraction) = 0;
304	}
305	}
306
307	/**
308	* Round and normalize number expressed by exponent and fraction with
309	* first bit (equal to hidden bit) at 62nd bit.
310	*
311	* @param exp Exponent part.
312	* @param fraction Fraction with hidden bit shifted to 62nd bit.
313	*/
314	void round_float64(int32_t exp, uint64_t fraction)
315	{
316	/* rounding - if first bit after fraction is set then round up */
317	(*fraction) += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
318
319	if ((*fraction) &
320	(FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 3))) {
321	/* rounding overflow */
322	++(*exp);
323	(*fraction) >>= 1;
324	}
325
326	if (((exp) >= FLOAT64_MAX_EXPONENT) \|\| ((exp) < 0)) {
327	/* overflow - set infinity as result */
328	(*exp) = FLOAT64_MAX_EXPONENT;
329	(*fraction) = 0;
330	}
331	}
332
333	/**
334	* Round and normalize number expressed by exponent and fraction with
335	* first bit (equal to hidden bit) at 126th bit.
336	*
337	* @param exp Exponent part.
338	* @param frac_hi High part of fraction part with hidden bit shifted to 126th bit.
339	* @param frac_lo Low part of fraction part with hidden bit shifted to 126th bit.
340	*/
341	void round_float128(int32_t exp, uint64_t frac_hi, uint64_t *frac_lo)
342	{
343	uint64_t tmp_hi, tmp_lo;
344
345	/* rounding - if first bit after fraction is set then round up */
346	lshift128(0x0ll, 0x1ll, (128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo);
347	add128(frac_hi, frac_lo, tmp_hi, tmp_lo, frac_hi, frac_lo);
348
349	lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
350	(128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo);
351	and128(frac_hi, frac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
352	if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
353	/* rounding overflow */
354	++(*exp);
355	rshift128(frac_hi, frac_lo, 1, frac_hi, frac_lo);
356	}
357
358	if (((exp) >= FLOAT128_MAX_EXPONENT) \|\| ((exp) < 0)) {
359	/* overflow - set infinity as result */
360	(*exp) = FLOAT128_MAX_EXPONENT;
361	(*frac_hi) = 0;
362	(*frac_lo) = 0;
363	}
364	}
365
366	/**
367	* Logical shift left on the 128-bit operand.
368	*
369	* @param a_hi High part of the input operand.
370	* @param a_lo Low part of the input operand.
371	* @param shift Number of bits by witch to shift.
372	* @param r_hi Address to store high part of the result.
373	* @param r_lo Address to store low part of the result.
374	*/
375	void lshift128(
376	uint64_t a_hi, uint64_t a_lo, int shift,
377	uint64_t r_hi, uint64_t r_lo)
378	{
379	if (shift <= 0) {
380	/* do nothing */
381	} else if (shift >= 128) {
382	a_hi = 0;
383	a_lo = 0;
384	} else if (shift >= 64) {
385	a_hi = a_lo << (shift - 64);
386	a_lo = 0;
387	} else {
388	a_hi <<= shift;
389	a_hi \|= a_lo >> (64 - shift);
390	a_lo <<= shift;
391	}
392
393	*r_hi = a_hi;
394	*r_lo = a_lo;
395	}
396
397	/**
398	* Logical shift right on the 128-bit operand.
399	*
400	* @param a_hi High part of the input operand.
401	* @param a_lo Low part of the input operand.
402	* @param shift Number of bits by witch to shift.
403	* @param r_hi Address to store high part of the result.
404	* @param r_lo Address to store low part of the result.
405	*/
406	void rshift128(
407	uint64_t a_hi, uint64_t a_lo, int shift,
408	uint64_t r_hi, uint64_t r_lo)
409	{
410	if (shift <= 0) {
411	/* do nothing */
412	} else if (shift >= 128) {
413	a_hi = 0;
414	a_lo = 0;
415	} else if (shift >= 64) {
416	a_lo = a_hi >> (shift - 64);
417	a_hi = 0;
418	} else {
419	a_lo >>= shift;
420	a_lo \|= a_hi << (64 - shift);
421	a_hi >>= shift;
422	}
423
424	*r_hi = a_hi;
425	*r_lo = a_lo;
426	}
427
428	/**
429	* Bitwise AND on 128-bit operands.
430	*
431	* @param a_hi High part of the first input operand.
432	* @param a_lo Low part of the first input operand.
433	* @param b_hi High part of the second input operand.
434	* @param b_lo Low part of the second input operand.
435	* @param r_hi Address to store high part of the result.
436	* @param r_lo Address to store low part of the result.
437	*/
438	void and128(
439	uint64_t a_hi, uint64_t a_lo,
440	uint64_t b_hi, uint64_t b_lo,
441	uint64_t r_hi, uint64_t r_lo)
442	{
443	*r_hi = a_hi & b_hi;
444	*r_lo = a_lo & b_lo;
445	}
446
447	/**
448	* Bitwise inclusive OR on 128-bit operands.
449	*
450	* @param a_hi High part of the first input operand.
451	* @param a_lo Low part of the first input operand.
452	* @param b_hi High part of the second input operand.
453	* @param b_lo Low part of the second input operand.
454	* @param r_hi Address to store high part of the result.
455	* @param r_lo Address to store low part of the result.
456	*/
457	void or128(
458	uint64_t a_hi, uint64_t a_lo,
459	uint64_t b_hi, uint64_t b_lo,
460	uint64_t r_hi, uint64_t r_lo)
461	{
462	*r_hi = a_hi \| b_hi;
463	*r_lo = a_lo \| b_lo;
464	}
465
466	/**
467	* Bitwise exclusive OR on 128-bit operands.
468	*
469	* @param a_hi High part of the first input operand.
470	* @param a_lo Low part of the first input operand.
471	* @param b_hi High part of the second input operand.
472	* @param b_lo Low part of the second input operand.
473	* @param r_hi Address to store high part of the result.
474	* @param r_lo Address to store low part of the result.
475	*/
476	void xor128(
477	uint64_t a_hi, uint64_t a_lo,
478	uint64_t b_hi, uint64_t b_lo,
479	uint64_t r_hi, uint64_t r_lo)
480	{
481	*r_hi = a_hi ^ b_hi;
482	*r_lo = a_lo ^ b_lo;
483	}
484
485	/**
486	* Bitwise NOT on the 128-bit operand.
487	*
488	* @param a_hi High part of the input operand.
489	* @param a_lo Low part of the input operand.
490	* @param r_hi Address to store high part of the result.
491	* @param r_lo Address to store low part of the result.
492	*/
493	void not128(
494	uint64_t a_hi, uint64_t a_lo,
495	uint64_t r_hi, uint64_t r_lo)
496	{
497	*r_hi = ~a_hi;
498	*r_lo = ~a_lo;
499	}
500
501	/**
502	* Equality comparison of 128-bit operands.
503	*
504	* @param a_hi High part of the first input operand.
505	* @param a_lo Low part of the first input operand.
506	* @param b_hi High part of the second input operand.
507	* @param b_lo Low part of the second input operand.
508	* @return 1 if operands are equal, 0 otherwise.
509	*/
510	int eq128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
511	{
512	return (a_hi == b_hi) && (a_lo == b_lo);
513	}
514
515	/**
516	* Lower-or-equal comparison of 128-bit operands.
517	*
518	* @param a_hi High part of the first input operand.
519	* @param a_lo Low part of the first input operand.
520	* @param b_hi High part of the second input operand.
521	* @param b_lo Low part of the second input operand.
522	* @return 1 if a is lower or equal to b, 0 otherwise.
523	*/
524	int le128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
525	{
526	return (a_hi < b_hi) \|\| ((a_hi == b_hi) && (a_lo <= b_lo));
527	}
528
529	/**
530	* Lower-than comparison of 128-bit operands.
531	*
532	* @param a_hi High part of the first input operand.
533	* @param a_lo Low part of the first input operand.
534	* @param b_hi High part of the second input operand.
535	* @param b_lo Low part of the second input operand.
536	* @return 1 if a is lower than b, 0 otherwise.
537	*/
538	int lt128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
539	{
540	return (a_hi < b_hi) \|\| ((a_hi == b_hi) && (a_lo < b_lo));
541	}
542
543	/**
544	* Addition of two 128-bit unsigned integers.
545	*
546	* @param a_hi High part of the first input operand.
547	* @param a_lo Low part of the first input operand.
548	* @param b_hi High part of the second input operand.
549	* @param b_lo Low part of the second input operand.
550	* @param r_hi Address to store high part of the result.
551	* @param r_lo Address to store low part of the result.
552	*/
553	void add128(uint64_t a_hi, uint64_t a_lo,
554	uint64_t b_hi, uint64_t b_lo,
555	uint64_t r_hi, uint64_t r_lo)
556	{
557	uint64_t low = a_lo + b_lo;
558	*r_lo = low;
559	/* detect overflow to add a carry */
560	*r_hi = a_hi + b_hi + (low < a_lo);
561	}
562
563	/**
564	* Substraction of two 128-bit unsigned integers.
565	*
566	* @param a_hi High part of the first input operand.
567	* @param a_lo Low part of the first input operand.
568	* @param b_hi High part of the second input operand.
569	* @param b_lo Low part of the second input operand.
570	* @param r_hi Address to store high part of the result.
571	* @param r_lo Address to store low part of the result.
572	*/
573	void sub128(uint64_t a_hi, uint64_t a_lo,
574	uint64_t b_hi, uint64_t b_lo,
575	uint64_t r_hi, uint64_t r_lo)
576	{
577	*r_lo = a_lo - b_lo;
578	/* detect underflow to substract a carry */
579	*r_hi = a_hi - b_hi - (a_lo < b_lo);
580	}
581
582	/**
583	* Multiplication of two 64-bit unsigned integers.
584	*
585	* @param a First input operand.
586	* @param b Second input operand.
587	* @param r_hi Address to store high part of the result.
588	* @param r_lo Address to store low part of the result.
589	*/
590	void mul64(uint64_t a, uint64_t b, uint64_t r_hi, uint64_t r_lo)
591	{
592	uint64_t low, high, middle1, middle2;
593	uint32_t alow, blow;
594
595	alow = a & 0xFFFFFFFF;
596	blow = b & 0xFFFFFFFF;
597
598	a >>= 32;
599	b >>= 32;
600
601	low = ((uint64_t) alow) * blow;
602	middle1 = a * blow;
603	middle2 = alow * b;
604	high = a * b;
605
606	middle1 += middle2;
607	high += (((uint64_t) (middle1 < middle2)) << 32) + (middle1 >> 32);
608	middle1 <<= 32;
609	low += middle1;
610	high += (low < middle1);
611	*r_lo = low;
612	*r_hi = high;
613	}
614
615	/**
616	* Multiplication of two 128-bit unsigned integers.
617	*
618	* @param a_hi High part of the first input operand.
619	* @param a_lo Low part of the first input operand.
620	* @param b_hi High part of the second input operand.
621	* @param b_lo Low part of the second input operand.
622	* @param r_hihi Address to store first (highest) quarter of the result.
623	* @param r_hilo Address to store second quarter of the result.
624	* @param r_lohi Address to store third quarter of the result.
625	* @param r_lolo Address to store fourth (lowest) quarter of the result.
626	*/
627	void mul128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo,
628	uint64_t r_hihi, uint64_t r_hilo, uint64_t r_lohi, uint64_t r_lolo)
629	{
630	uint64_t hihi, hilo, lohi, lolo;
631	uint64_t tmp1, tmp2;
632
633	mul64(a_lo, b_lo, &lohi, &lolo);
634	mul64(a_lo, b_hi, &hilo, &tmp2);
635	add128(hilo, tmp2, 0x0ll, lohi, &hilo, &lohi);
636	mul64(a_hi, b_hi, &hihi, &tmp1);
637	add128(hihi, tmp1, 0x0ll, hilo, &hihi, &hilo);
638	mul64(a_hi, b_lo, &tmp1, &tmp2);
639	add128(tmp1, tmp2, 0x0ll, lohi, &tmp1, &lohi);
640	add128(hihi, hilo, 0x0ll, tmp1, &hihi, &hilo);
641
642	*r_hihi = hihi;
643	*r_hilo = hilo;
644	*r_lohi = lohi;
645	*r_lolo = lolo;
646	}
647
648	/**
649	* Estimate the quotient of 128-bit unsigned divident and 64-bit unsigned
650	* divisor.
651	*
652	* @param a_hi High part of the divident.
653	* @param a_lo Low part of the divident.
654	* @param b Divisor.
655	* @return Quotient approximation.
656	*/
657	uint64_t div128est(uint64_t a_hi, uint64_t a_lo, uint64_t b)
658	{
659	uint64_t b_hi, b_lo;
660	uint64_t rem_hi, rem_lo;
661	uint64_t tmp_hi, tmp_lo;
662	uint64_t result;
663
664	if (b <= a_hi) {
665	return 0xFFFFFFFFFFFFFFFFull;
666	}
667
668	b_hi = b >> 32;
669	result = ((b_hi << 32) <= a_hi) ? (0xFFFFFFFFull << 32) : (a_hi / b_hi) << 32;
670	mul64(b, result, &tmp_hi, &tmp_lo);
671	sub128(a_hi, a_lo, tmp_hi, tmp_lo, &rem_hi, &rem_lo);
672
673	while ((int64_t) rem_hi < 0) {
674	result -= 0x1ll << 32;
675	b_lo = b << 32;
676	add128(rem_hi, rem_lo, b_hi, b_lo, &rem_hi, &rem_lo);
677	}
678
679	rem_hi = (rem_hi << 32) \| (rem_lo >> 32);
680	if ((b_hi << 32) <= rem_hi) {
681	result \|= 0xFFFFFFFF;
682	} else {
683	result \|= rem_hi / b_hi;
684	}
685
686	return result;
687	}
688
689	/** @}
690	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/uspace/lib/softfloat/generic/common.c@ 41455a22

Download in other formats: