Context Navigation

div.c@ 8bcd727

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 8bcd727 was c67aff2, checked in by Petr Koupy <petr.koupy@…>, 14 years ago

Quadruple-precision softfloat, coding style improvements. Details below…

Highlights:

completed double-precision support
added quadruple-precision support
added SPARC quadruple-precision wrappers
added doxygen comments
corrected and unified coding style

Current state of the softfloat library:

Support for single, double and quadruple precision is currently almost complete (apart from power, square root, complex multiplication and complex division) and provides the same set of features (i.e. the support for all three precisions is now aligned). In order to extend softfloat library consistently, addition of quadruple precision was done in the same spirit as already existing single and double precision written by Josef Cejka in 2006 - that is relaxed standard-compliance for corner cases while mission-critical code sections heavily inspired by the widely used softfloat library written by John R. Hauser (although I personally think it would be more appropriate for HelenOS to use something less optimized, shorter and more readable).

Most of the quadruple-precision code is just an adapted double-precision code to work on 128-bit variables. That means if there is TODO, FIXME or some defect in single or double-precision code, it is most likely also in the quadruple-precision code. Please note that quadruple-precision functions are currently not tested - it is challenging task for itself, especially when the ports that use them are either not finished (mips64) or badly supported by simulators (sparc64). To test whole softfloat library, one would probably have to either write very non-trivial native tester, or use some existing one (e.g. TestFloat from J. R. Hauser) and port it to HelenOS (or rip the softfloat library out of HelenOS and test it on a host system). At the time of writing this, the code dependent on quadruple-precision functions (on mips64 and sparc64) is just a libposix strtold() function (and its callers, most notably scanf backend).

Property mode set to 100644

File size: 12.4 KB

Line
1	/*
2	* Copyright (c) 2005 Josef Cejka
3	* Copyright (c) 2011 Petr Koupy
4	* All rights reserved.
5	*
6	* Redistribution and use in source and binary forms, with or without
7	* modification, are permitted provided that the following conditions
8	* are met:
9	*
10	* - Redistributions of source code must retain the above copyright
11	* notice, this list of conditions and the following disclaimer.
12	* - Redistributions in binary form must reproduce the above copyright
13	* notice, this list of conditions and the following disclaimer in the
14	* documentation and/or other materials provided with the distribution.
15	* - The name of the author may not be used to endorse or promote products
16	* derived from this software without specific prior written permission.
17	*
18	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28	*/
29
30	/** @addtogroup softfloat
31	* @{
32	*/
33	/** @file Division functions.
34	*/
35
36	#include <sftypes.h>
37	#include <add.h>
38	#include <div.h>
39	#include <comparison.h>
40	#include <mul.h>
41	#include <common.h>
42
43	/**
44	* Divide two single-precision floats.
45	*
46	* @param a Nominator.
47	* @param b Denominator.
48	* @return Result of division.
49	*/
50	float32 divFloat32(float32 a, float32 b)
51	{
52	float32 result;
53	int32_t aexp, bexp, cexp;
54	uint64_t afrac, bfrac, cfrac;
55
56	result.parts.sign = a.parts.sign ^ b.parts.sign;
57
58	if (isFloat32NaN(a)) {
59	if (isFloat32SigNaN(a)) {
60	/FIXME: SigNaN/
61	}
62	/NaN/
63	return a;
64	}
65
66	if (isFloat32NaN(b)) {
67	if (isFloat32SigNaN(b)) {
68	/FIXME: SigNaN/
69	}
70	/NaN/
71	return b;
72	}
73
74	if (isFloat32Infinity(a)) {
75	if (isFloat32Infinity(b)) {
76	/FIXME: inf / inf /
77	result.binary = FLOAT32_NAN;
78	return result;
79	}
80	/* inf / num */
81	result.parts.exp = a.parts.exp;
82	result.parts.fraction = a.parts.fraction;
83	return result;
84	}
85
86	if (isFloat32Infinity(b)) {
87	if (isFloat32Zero(a)) {
88	/* FIXME 0 / inf */
89	result.parts.exp = 0;
90	result.parts.fraction = 0;
91	return result;
92	}
93	/* FIXME: num / inf*/
94	result.parts.exp = 0;
95	result.parts.fraction = 0;
96	return result;
97	}
98
99	if (isFloat32Zero(b)) {
100	if (isFloat32Zero(a)) {
101	/FIXME: 0 / 0/
102	result.binary = FLOAT32_NAN;
103	return result;
104	}
105	/* FIXME: division by zero */
106	result.parts.exp = 0;
107	result.parts.fraction = 0;
108	return result;
109	}
110
111	afrac = a.parts.fraction;
112	aexp = a.parts.exp;
113	bfrac = b.parts.fraction;
114	bexp = b.parts.exp;
115
116	/* denormalized numbers */
117	if (aexp == 0) {
118	if (afrac == 0) {
119	result.parts.exp = 0;
120	result.parts.fraction = 0;
121	return result;
122	}
123
124	/* normalize it*/
125	afrac <<= 1;
126	/* afrac is nonzero => it must stop */
127	while (!(afrac & FLOAT32_HIDDEN_BIT_MASK)) {
128	afrac <<= 1;
129	aexp--;
130	}
131	}
132
133	if (bexp == 0) {
134	bfrac <<= 1;
135	/* bfrac is nonzero => it must stop */
136	while (!(bfrac & FLOAT32_HIDDEN_BIT_MASK)) {
137	bfrac <<= 1;
138	bexp--;
139	}
140	}
141
142	afrac = (afrac \| FLOAT32_HIDDEN_BIT_MASK) << (32 - FLOAT32_FRACTION_SIZE - 1);
143	bfrac = (bfrac \| FLOAT32_HIDDEN_BIT_MASK) << (32 - FLOAT32_FRACTION_SIZE);
144
145	if (bfrac <= (afrac << 1)) {
146	afrac >>= 1;
147	aexp++;
148	}
149
150	cexp = aexp - bexp + FLOAT32_BIAS - 2;
151
152	cfrac = (afrac << 32) / bfrac;
153	if ((cfrac & 0x3F) == 0) {
154	cfrac \|= (bfrac * cfrac != afrac << 32);
155	}
156
157	/* pack and round */
158
159	/* find first nonzero digit and shift result and detect possibly underflow */
160	while ((cexp > 0) && (cfrac) && (!(cfrac & (FLOAT32_HIDDEN_BIT_MASK << 7)))) {
161	cexp--;
162	cfrac <<= 1;
163	/* TODO: fix underflow */
164	}
165
166	cfrac += (0x1 << 6); /* FIXME: 7 is not sure*/
167
168	if (cfrac & (FLOAT32_HIDDEN_BIT_MASK << 7)) {
169	++cexp;
170	cfrac >>= 1;
171	}
172
173	/* check overflow */
174	if (cexp >= FLOAT32_MAX_EXPONENT) {
175	/* FIXME: overflow, return infinity */
176	result.parts.exp = FLOAT32_MAX_EXPONENT;
177	result.parts.fraction = 0;
178	return result;
179	}
180
181	if (cexp < 0) {
182	/* FIXME: underflow */
183	result.parts.exp = 0;
184	if ((cexp + FLOAT32_FRACTION_SIZE) < 0) {
185	result.parts.fraction = 0;
186	return result;
187	}
188	cfrac >>= 1;
189	while (cexp < 0) {
190	cexp++;
191	cfrac >>= 1;
192	}
193	} else {
194	result.parts.exp = (uint32_t) cexp;
195	}
196
197	result.parts.fraction = ((cfrac >> 6) & (~FLOAT32_HIDDEN_BIT_MASK));
198
199	return result;
200	}
201
202	/**
203	* Divide two double-precision floats.
204	*
205	* @param a Nominator.
206	* @param b Denominator.
207	* @return Result of division.
208	*/
209	float64 divFloat64(float64 a, float64 b)
210	{
211	float64 result;
212	int64_t aexp, bexp, cexp;
213	uint64_t afrac, bfrac, cfrac;
214	uint64_t remlo, remhi;
215	uint64_t tmplo, tmphi;
216
217	result.parts.sign = a.parts.sign ^ b.parts.sign;
218
219	if (isFloat64NaN(a)) {
220	if (isFloat64SigNaN(b)) {
221	/FIXME: SigNaN/
222	return b;
223	}
224
225	if (isFloat64SigNaN(a)) {
226	/FIXME: SigNaN/
227	}
228	/NaN/
229	return a;
230	}
231
232	if (isFloat64NaN(b)) {
233	if (isFloat64SigNaN(b)) {
234	/FIXME: SigNaN/
235	}
236	/NaN/
237	return b;
238	}
239
240	if (isFloat64Infinity(a)) {
241	if (isFloat64Infinity(b) \|\| isFloat64Zero(b)) {
242	/FIXME: inf / inf /
243	result.binary = FLOAT64_NAN;
244	return result;
245	}
246	/* inf / num */
247	result.parts.exp = a.parts.exp;
248	result.parts.fraction = a.parts.fraction;
249	return result;
250	}
251
252	if (isFloat64Infinity(b)) {
253	if (isFloat64Zero(a)) {
254	/* FIXME 0 / inf */
255	result.parts.exp = 0;
256	result.parts.fraction = 0;
257	return result;
258	}
259	/* FIXME: num / inf*/
260	result.parts.exp = 0;
261	result.parts.fraction = 0;
262	return result;
263	}
264
265	if (isFloat64Zero(b)) {
266	if (isFloat64Zero(a)) {
267	/FIXME: 0 / 0/
268	result.binary = FLOAT64_NAN;
269	return result;
270	}
271	/* FIXME: division by zero */
272	result.parts.exp = 0;
273	result.parts.fraction = 0;
274	return result;
275	}
276
277	afrac = a.parts.fraction;
278	aexp = a.parts.exp;
279	bfrac = b.parts.fraction;
280	bexp = b.parts.exp;
281
282	/* denormalized numbers */
283	if (aexp == 0) {
284	if (afrac == 0) {
285	result.parts.exp = 0;
286	result.parts.fraction = 0;
287	return result;
288	}
289
290	/* normalize it*/
291	aexp++;
292	/* afrac is nonzero => it must stop */
293	while (!(afrac & FLOAT64_HIDDEN_BIT_MASK)) {
294	afrac <<= 1;
295	aexp--;
296	}
297	}
298
299	if (bexp == 0) {
300	bexp++;
301	/* bfrac is nonzero => it must stop */
302	while (!(bfrac & FLOAT64_HIDDEN_BIT_MASK)) {
303	bfrac <<= 1;
304	bexp--;
305	}
306	}
307
308	afrac = (afrac \| FLOAT64_HIDDEN_BIT_MASK) << (64 - FLOAT64_FRACTION_SIZE - 2);
309	bfrac = (bfrac \| FLOAT64_HIDDEN_BIT_MASK) << (64 - FLOAT64_FRACTION_SIZE - 1);
310
311	if (bfrac <= (afrac << 1)) {
312	afrac >>= 1;
313	aexp++;
314	}
315
316	cexp = aexp - bexp + FLOAT64_BIAS - 2;
317
318	cfrac = div128est(afrac, 0x0ll, bfrac);
319
320	if ((cfrac & 0x1FF) <= 2) {
321	mul64(bfrac, cfrac, &tmphi, &tmplo);
322	sub128(afrac, 0x0ll, tmphi, tmplo, &remhi, &remlo);
323
324	while ((int64_t) remhi < 0) {
325	cfrac--;
326	add128(remhi, remlo, 0x0ll, bfrac, &remhi, &remlo);
327	}
328	cfrac \|= (remlo != 0);
329	}
330
331	/* round and shift */
332	result = finishFloat64(cexp, cfrac, result.parts.sign);
333	return result;
334	}
335
336	/**
337	* Divide two quadruple-precision floats.
338	*
339	* @param a Nominator.
340	* @param b Denominator.
341	* @return Result of division.
342	*/
343	float128 divFloat128(float128 a, float128 b)
344	{
345	float128 result;
346	int64_t aexp, bexp, cexp;
347	uint64_t afrac_hi, afrac_lo, bfrac_hi, bfrac_lo, cfrac_hi, cfrac_lo;
348	uint64_t shift_out;
349	uint64_t rem_hihi, rem_hilo, rem_lohi, rem_lolo;
350	uint64_t tmp_hihi, tmp_hilo, tmp_lohi, tmp_lolo;
351
352	result.parts.sign = a.parts.sign ^ b.parts.sign;
353
354	if (isFloat128NaN(a)) {
355	if (isFloat128SigNaN(b)) {
356	/FIXME: SigNaN/
357	return b;
358	}
359
360	if (isFloat128SigNaN(a)) {
361	/FIXME: SigNaN/
362	}
363	/NaN/
364	return a;
365	}
366
367	if (isFloat128NaN(b)) {
368	if (isFloat128SigNaN(b)) {
369	/FIXME: SigNaN/
370	}
371	/NaN/
372	return b;
373	}
374
375	if (isFloat128Infinity(a)) {
376	if (isFloat128Infinity(b) \|\| isFloat128Zero(b)) {
377	/FIXME: inf / inf /
378	result.binary.hi = FLOAT128_NAN_HI;
379	result.binary.lo = FLOAT128_NAN_LO;
380	return result;
381	}
382	/* inf / num */
383	result.parts.exp = a.parts.exp;
384	result.parts.frac_hi = a.parts.frac_hi;
385	result.parts.frac_lo = a.parts.frac_lo;
386	return result;
387	}
388
389	if (isFloat128Infinity(b)) {
390	if (isFloat128Zero(a)) {
391	/* FIXME 0 / inf */
392	result.parts.exp = 0;
393	result.parts.frac_hi = 0;
394	result.parts.frac_lo = 0;
395	return result;
396	}
397	/* FIXME: num / inf*/
398	result.parts.exp = 0;
399	result.parts.frac_hi = 0;
400	result.parts.frac_lo = 0;
401	return result;
402	}
403
404	if (isFloat128Zero(b)) {
405	if (isFloat128Zero(a)) {
406	/FIXME: 0 / 0/
407	result.binary.hi = FLOAT128_NAN_HI;
408	result.binary.lo = FLOAT128_NAN_LO;
409	return result;
410	}
411	/* FIXME: division by zero */
412	result.parts.exp = 0;
413	result.parts.frac_hi = 0;
414	result.parts.frac_lo = 0;
415	return result;
416	}
417
418	afrac_hi = a.parts.frac_hi;
419	afrac_lo = a.parts.frac_lo;
420	aexp = a.parts.exp;
421	bfrac_hi = b.parts.frac_hi;
422	bfrac_lo = b.parts.frac_lo;
423	bexp = b.parts.exp;
424
425	/* denormalized numbers */
426	if (aexp == 0) {
427	if (eq128(afrac_hi, afrac_lo, 0x0ll, 0x0ll)) {
428	result.parts.exp = 0;
429	result.parts.frac_hi = 0;
430	result.parts.frac_lo = 0;
431	return result;
432	}
433
434	/* normalize it*/
435	aexp++;
436	/* afrac is nonzero => it must stop */
437	and128(afrac_hi, afrac_lo,
438	FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
439	&tmp_hihi, &tmp_lolo);
440	while (!lt128(0x0ll, 0x0ll, tmp_hihi, tmp_lolo)) {
441	lshift128(afrac_hi, afrac_lo, 1, &afrac_hi, &afrac_lo);
442	aexp--;
443	}
444	}
445
446	if (bexp == 0) {
447	bexp++;
448	/* bfrac is nonzero => it must stop */
449	and128(bfrac_hi, bfrac_lo,
450	FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
451	&tmp_hihi, &tmp_lolo);
452	while (!lt128(0x0ll, 0x0ll, tmp_hihi, tmp_lolo)) {
453	lshift128(bfrac_hi, bfrac_lo, 1, &bfrac_hi, &bfrac_lo);
454	bexp--;
455	}
456	}
457
458	or128(afrac_hi, afrac_lo,
459	FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
460	&afrac_hi, &afrac_lo);
461	lshift128(afrac_hi, afrac_lo,
462	(128 - FLOAT128_FRACTION_SIZE - 1), &afrac_hi, &afrac_lo);
463	or128(bfrac_hi, bfrac_lo,
464	FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
465	&bfrac_hi, &bfrac_lo);
466	lshift128(bfrac_hi, bfrac_lo,
467	(128 - FLOAT128_FRACTION_SIZE - 1), &bfrac_hi, &bfrac_lo);
468
469	if (le128(bfrac_hi, bfrac_lo, afrac_hi, afrac_lo)) {
470	rshift128(afrac_hi, afrac_lo, 1, &afrac_hi, &afrac_lo);
471	aexp++;
472	}
473
474	cexp = aexp - bexp + FLOAT128_BIAS - 2;
475
476	cfrac_hi = div128est(afrac_hi, afrac_lo, bfrac_hi);
477
478	mul128(bfrac_hi, bfrac_lo, 0x0ll, cfrac_hi,
479	&tmp_lolo /* dummy */, &tmp_hihi, &tmp_hilo, &tmp_lohi);
480
481	/* sub192(afrac_hi, afrac_lo, 0,
482	* tmp_hihi, tmp_hilo, tmp_lohi
483	* &rem_hihi, &rem_hilo, &rem_lohi); */
484	sub128(afrac_hi, afrac_lo, tmp_hihi, tmp_hilo, &rem_hihi, &rem_hilo);
485	if (tmp_lohi > 0) {
486	sub128(rem_hihi, rem_hilo, 0x0ll, 0x1ll, &rem_hihi, &rem_hilo);
487	}
488	rem_lohi = -tmp_lohi;
489
490	while ((int64_t) rem_hihi < 0) {
491	--cfrac_hi;
492	/* add192(rem_hihi, rem_hilo, rem_lohi,
493	* 0, bfrac_hi, bfrac_lo,
494	* &rem_hihi, &rem_hilo, &rem_lohi); */
495	add128(rem_hilo, rem_lohi, bfrac_hi, bfrac_lo, &rem_hilo, &rem_lohi);
496	if (lt128(rem_hilo, rem_lohi, bfrac_hi, bfrac_lo)) {
497	++rem_hihi;
498	}
499	}
500
501	cfrac_lo = div128est(rem_hilo, rem_lohi, bfrac_lo);
502
503	if ((cfrac_lo & 0x3FFF) <= 4) {
504	mul128(bfrac_hi, bfrac_lo, 0x0ll, cfrac_lo,
505	&tmp_hihi /* dummy */, &tmp_hilo, &tmp_lohi, &tmp_lolo);
506
507	/* sub192(rem_hilo, rem_lohi, 0,
508	* tmp_hilo, tmp_lohi, tmp_lolo,
509	* &rem_hilo, &rem_lohi, &rem_lolo); */
510	sub128(rem_hilo, rem_lohi, tmp_hilo, tmp_lohi, &rem_hilo, &rem_lohi);
511	if (tmp_lolo > 0) {
512	sub128(rem_hilo, rem_lohi, 0x0ll, 0x1ll, &rem_hilo, &rem_lohi);
513	}
514	rem_lolo = -tmp_lolo;
515
516	while ((int64_t) rem_hilo < 0) {
517	--cfrac_lo;
518	/* add192(rem_hilo, rem_lohi, rem_lolo,
519	* 0, bfrac_hi, bfrac_lo,
520	* &rem_hilo, &rem_lohi, &rem_lolo); */
521	add128(rem_lohi, rem_lolo, bfrac_hi, bfrac_lo, &rem_lohi, &rem_lolo);
522	if (lt128(rem_lohi, rem_lolo, bfrac_hi, bfrac_lo)) {
523	++rem_hilo;
524	}
525	}
526
527	cfrac_lo \|= ((rem_hilo \| rem_lohi \| rem_lolo) != 0 );
528	}
529
530	shift_out = cfrac_lo << (64 - (128 - FLOAT128_FRACTION_SIZE - 1));
531	rshift128(cfrac_hi, cfrac_lo, (128 - FLOAT128_FRACTION_SIZE - 1),
532	&cfrac_hi, &cfrac_lo);
533
534	result = finishFloat128(cexp, cfrac_hi, cfrac_lo, result.parts.sign, shift_out);
535	return result;
536	}
537
538	/** @}
539	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/uspace/lib/softfloat/generic/div.c@ 8bcd727

Download in other formats: