Context Navigation

mul.c@ c67aff2

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since c67aff2 was c67aff2, checked in by Petr Koupy <petr.koupy@…>, 14 years ago

Quadruple-precision softfloat, coding style improvements. Details below…

Highlights:

completed double-precision support
added quadruple-precision support
added SPARC quadruple-precision wrappers
added doxygen comments
corrected and unified coding style

Current state of the softfloat library:

Support for single, double and quadruple precision is currently almost complete (apart from power, square root, complex multiplication and complex division) and provides the same set of features (i.e. the support for all three precisions is now aligned). In order to extend softfloat library consistently, addition of quadruple precision was done in the same spirit as already existing single and double precision written by Josef Cejka in 2006 - that is relaxed standard-compliance for corner cases while mission-critical code sections heavily inspired by the widely used softfloat library written by John R. Hauser (although I personally think it would be more appropriate for HelenOS to use something less optimized, shorter and more readable).

Most of the quadruple-precision code is just an adapted double-precision code to work on 128-bit variables. That means if there is TODO, FIXME or some defect in single or double-precision code, it is most likely also in the quadruple-precision code. Please note that quadruple-precision functions are currently not tested - it is challenging task for itself, especially when the ports that use them are either not finished (mips64) or badly supported by simulators (sparc64). To test whole softfloat library, one would probably have to either write very non-trivial native tester, or use some existing one (e.g. TestFloat from J. R. Hauser) and port it to HelenOS (or rip the softfloat library out of HelenOS and test it on a host system). At the time of writing this, the code dependent on quadruple-precision functions (on mips64 and sparc64) is just a libposix strtold() function (and its callers, most notably scanf backend).

Property mode set to 100644

File size: 9.1 KB

Line
1	/*
2	* Copyright (c) 2005 Josef Cejka
3	* Copyright (c) 2011 Petr Koupy
4	* All rights reserved.
5	*
6	* Redistribution and use in source and binary forms, with or without
7	* modification, are permitted provided that the following conditions
8	* are met:
9	*
10	* - Redistributions of source code must retain the above copyright
11	* notice, this list of conditions and the following disclaimer.
12	* - Redistributions in binary form must reproduce the above copyright
13	* notice, this list of conditions and the following disclaimer in the
14	* documentation and/or other materials provided with the distribution.
15	* - The name of the author may not be used to endorse or promote products
16	* derived from this software without specific prior written permission.
17	*
18	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28	*/
29
30	/** @addtogroup softfloat
31	* @{
32	*/
33	/** @file Multiplication functions.
34	*/
35
36	#include <sftypes.h>
37	#include <mul.h>
38	#include <comparison.h>
39	#include <common.h>
40
41	/**
42	* Multiply two single-precision floats.
43	*
44	* @param a First input operand.
45	* @param b Second input operand.
46	* @return Result of multiplication.
47	*/
48	float32 mulFloat32(float32 a, float32 b)
49	{
50	float32 result;
51	uint64_t frac1, frac2;
52	int32_t exp;
53
54	result.parts.sign = a.parts.sign ^ b.parts.sign;
55
56	if (isFloat32NaN(a) \|\| isFloat32NaN(b)) {
57	/* TODO: fix SigNaNs */
58	if (isFloat32SigNaN(a)) {
59	result.parts.fraction = a.parts.fraction;
60	result.parts.exp = a.parts.exp;
61	return result;
62	}
63	if (isFloat32SigNaN(b)) { /* TODO: fix SigNaN */
64	result.parts.fraction = b.parts.fraction;
65	result.parts.exp = b.parts.exp;
66	return result;
67	}
68	/* set NaN as result */
69	result.binary = FLOAT32_NAN;
70	return result;
71	}
72
73	if (isFloat32Infinity(a)) {
74	if (isFloat32Zero(b)) {
75	/* FIXME: zero * infinity */
76	result.binary = FLOAT32_NAN;
77	return result;
78	}
79	result.parts.fraction = a.parts.fraction;
80	result.parts.exp = a.parts.exp;
81	return result;
82	}
83
84	if (isFloat32Infinity(b)) {
85	if (isFloat32Zero(a)) {
86	/* FIXME: zero * infinity */
87	result.binary = FLOAT32_NAN;
88	return result;
89	}
90	result.parts.fraction = b.parts.fraction;
91	result.parts.exp = b.parts.exp;
92	return result;
93	}
94
95	/* exp is signed so we can easy detect underflow */
96	exp = a.parts.exp + b.parts.exp;
97	exp -= FLOAT32_BIAS;
98
99	if (exp >= FLOAT32_MAX_EXPONENT) {
100	/* FIXME: overflow */
101	/* set infinity as result */
102	result.binary = FLOAT32_INF;
103	result.parts.sign = a.parts.sign ^ b.parts.sign;
104	return result;
105	}
106
107	if (exp < 0) {
108	/* FIXME: underflow */
109	/* return signed zero */
110	result.parts.fraction = 0x0;
111	result.parts.exp = 0x0;
112	return result;
113	}
114
115	frac1 = a.parts.fraction;
116	if (a.parts.exp > 0) {
117	frac1 \|= FLOAT32_HIDDEN_BIT_MASK;
118	} else {
119	++exp;
120	}
121
122	frac2 = b.parts.fraction;
123
124	if (b.parts.exp > 0) {
125	frac2 \|= FLOAT32_HIDDEN_BIT_MASK;
126	} else {
127	++exp;
128	}
129
130	frac1 <<= 1; /* one bit space for rounding */
131
132	frac1 = frac1 * frac2;
133
134	/* round and return */
135	while ((exp < FLOAT32_MAX_EXPONENT) && (frac1 >= (1 << (FLOAT32_FRACTION_SIZE + 2)))) {
136	/* 23 bits of fraction + one more for hidden bit (all shifted 1 bit left) */
137	++exp;
138	frac1 >>= 1;
139	}
140
141	/* rounding */
142	/* ++frac1; FIXME: not works - without it is ok */
143	frac1 >>= 1; /* shift off rounding space */
144
145	if ((exp < FLOAT32_MAX_EXPONENT) && (frac1 >= (1 << (FLOAT32_FRACTION_SIZE + 1)))) {
146	++exp;
147	frac1 >>= 1;
148	}
149
150	if (exp >= FLOAT32_MAX_EXPONENT) {
151	/* TODO: fix overflow */
152	/* return infinity*/
153	result.parts.exp = FLOAT32_MAX_EXPONENT;
154	result.parts.fraction = 0x0;
155	return result;
156	}
157
158	exp -= FLOAT32_FRACTION_SIZE;
159
160	if (exp <= FLOAT32_FRACTION_SIZE) {
161	/* denormalized number */
162	frac1 >>= 1; /* denormalize */
163	while ((frac1 > 0) && (exp < 0)) {
164	frac1 >>= 1;
165	++exp;
166	}
167	if (frac1 == 0) {
168	/* FIXME : underflow */
169	result.parts.exp = 0;
170	result.parts.fraction = 0;
171	return result;
172	}
173	}
174	result.parts.exp = exp;
175	result.parts.fraction = frac1 & ((1 << FLOAT32_FRACTION_SIZE) - 1);
176
177	return result;
178	}
179
180	/**
181	* Multiply two double-precision floats.
182	*
183	* @param a First input operand.
184	* @param b Second input operand.
185	* @return Result of multiplication.
186	*/
187	float64 mulFloat64(float64 a, float64 b)
188	{
189	float64 result;
190	uint64_t frac1, frac2;
191	int32_t exp;
192
193	result.parts.sign = a.parts.sign ^ b.parts.sign;
194
195	if (isFloat64NaN(a) \|\| isFloat64NaN(b)) {
196	/* TODO: fix SigNaNs */
197	if (isFloat64SigNaN(a)) {
198	result.parts.fraction = a.parts.fraction;
199	result.parts.exp = a.parts.exp;
200	return result;
201	}
202	if (isFloat64SigNaN(b)) { /* TODO: fix SigNaN */
203	result.parts.fraction = b.parts.fraction;
204	result.parts.exp = b.parts.exp;
205	return result;
206	}
207	/* set NaN as result */
208	result.binary = FLOAT64_NAN;
209	return result;
210	}
211
212	if (isFloat64Infinity(a)) {
213	if (isFloat64Zero(b)) {
214	/* FIXME: zero * infinity */
215	result.binary = FLOAT64_NAN;
216	return result;
217	}
218	result.parts.fraction = a.parts.fraction;
219	result.parts.exp = a.parts.exp;
220	return result;
221	}
222
223	if (isFloat64Infinity(b)) {
224	if (isFloat64Zero(a)) {
225	/* FIXME: zero * infinity */
226	result.binary = FLOAT64_NAN;
227	return result;
228	}
229	result.parts.fraction = b.parts.fraction;
230	result.parts.exp = b.parts.exp;
231	return result;
232	}
233
234	/* exp is signed so we can easy detect underflow */
235	exp = a.parts.exp + b.parts.exp - FLOAT64_BIAS;
236
237	frac1 = a.parts.fraction;
238
239	if (a.parts.exp > 0) {
240	frac1 \|= FLOAT64_HIDDEN_BIT_MASK;
241	} else {
242	++exp;
243	}
244
245	frac2 = b.parts.fraction;
246
247	if (b.parts.exp > 0) {
248	frac2 \|= FLOAT64_HIDDEN_BIT_MASK;
249	} else {
250	++exp;
251	}
252
253	frac1 <<= (64 - FLOAT64_FRACTION_SIZE - 1);
254	frac2 <<= (64 - FLOAT64_FRACTION_SIZE - 2);
255
256	mul64(frac1, frac2, &frac1, &frac2);
257
258	frac1 \|= (frac2 != 0);
259	if (frac1 & (0x1ll << 62)) {
260	frac1 <<= 1;
261	exp--;
262	}
263
264	result = finishFloat64(exp, frac1, result.parts.sign);
265	return result;
266	}
267
268	/**
269	* Multiply two quadruple-precision floats.
270	*
271	* @param a First input operand.
272	* @param b Second input operand.
273	* @return Result of multiplication.
274	*/
275	float128 mulFloat128(float128 a, float128 b)
276	{
277	float128 result;
278	uint64_t frac1_hi, frac1_lo, frac2_hi, frac2_lo, tmp_hi, tmp_lo;
279	int32_t exp;
280
281	result.parts.sign = a.parts.sign ^ b.parts.sign;
282
283	if (isFloat128NaN(a) \|\| isFloat128NaN(b)) {
284	/* TODO: fix SigNaNs */
285	if (isFloat128SigNaN(a)) {
286	result.parts.frac_hi = a.parts.frac_hi;
287	result.parts.frac_lo = a.parts.frac_lo;
288	result.parts.exp = a.parts.exp;
289	return result;
290	}
291	if (isFloat128SigNaN(b)) { /* TODO: fix SigNaN */
292	result.parts.frac_hi = b.parts.frac_hi;
293	result.parts.frac_lo = b.parts.frac_lo;
294	result.parts.exp = b.parts.exp;
295	return result;
296	}
297	/* set NaN as result */
298	result.binary.hi = FLOAT128_NAN_HI;
299	result.binary.lo = FLOAT128_NAN_LO;
300	return result;
301	}
302
303	if (isFloat128Infinity(a)) {
304	if (isFloat128Zero(b)) {
305	/* FIXME: zero * infinity */
306	result.binary.hi = FLOAT128_NAN_HI;
307	result.binary.lo = FLOAT128_NAN_LO;
308	return result;
309	}
310	result.parts.frac_hi = a.parts.frac_hi;
311	result.parts.frac_lo = a.parts.frac_lo;
312	result.parts.exp = a.parts.exp;
313	return result;
314	}
315
316	if (isFloat128Infinity(b)) {
317	if (isFloat128Zero(a)) {
318	/* FIXME: zero * infinity */
319	result.binary.hi = FLOAT128_NAN_HI;
320	result.binary.lo = FLOAT128_NAN_LO;
321	return result;
322	}
323	result.parts.frac_hi = b.parts.frac_hi;
324	result.parts.frac_lo = b.parts.frac_lo;
325	result.parts.exp = b.parts.exp;
326	return result;
327	}
328
329	/* exp is signed so we can easy detect underflow */
330	exp = a.parts.exp + b.parts.exp - FLOAT128_BIAS - 1;
331
332	frac1_hi = a.parts.frac_hi;
333	frac1_lo = a.parts.frac_lo;
334
335	if (a.parts.exp > 0) {
336	or128(frac1_hi, frac1_lo,
337	FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
338	&frac1_hi, &frac1_lo);
339	} else {
340	++exp;
341	}
342
343	frac2_hi = b.parts.frac_hi;
344	frac2_lo = b.parts.frac_lo;
345
346	if (b.parts.exp > 0) {
347	or128(frac2_hi, frac2_lo,
348	FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
349	&frac2_hi, &frac2_lo);
350	} else {
351	++exp;
352	}
353
354	lshift128(frac2_hi, frac2_lo,
355	128 - FLOAT128_FRACTION_SIZE, &frac2_hi, &frac2_lo);
356
357	tmp_hi = frac1_hi;
358	tmp_lo = frac1_lo;
359	mul128(frac1_hi, frac1_lo, frac2_hi, frac2_lo,
360	&frac1_hi, &frac1_lo, &frac2_hi, &frac2_lo);
361	add128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &frac1_hi, &frac1_lo);
362	frac2_hi \|= (frac2_lo != 0x0ll);
363
364	if ((FLOAT128_HIDDEN_BIT_MASK_HI << 1) <= frac1_hi) {
365	frac2_hi >>= 1;
366	if (frac1_lo & 0x1ll) {
367	frac2_hi \|= (0x1ull < 64);
368	}
369	rshift128(frac1_hi, frac1_lo, 1, &frac1_hi, &frac1_lo);
370	++exp;
371	}
372
373	result = finishFloat128(exp, frac1_hi, frac1_lo, result.parts.sign, frac2_hi);
374	return result;
375	}
376
377	/** @}
378	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/uspace/lib/softfloat/generic/mul.c@ c67aff2

Download in other formats: