Context Navigation

conversion.c@ 8bcd727

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 8bcd727 was c67aff2, checked in by Petr Koupy <petr.koupy@…>, 14 years ago

Quadruple-precision softfloat, coding style improvements. Details below…

Highlights:

completed double-precision support
added quadruple-precision support
added SPARC quadruple-precision wrappers
added doxygen comments
corrected and unified coding style

Current state of the softfloat library:

Support for single, double and quadruple precision is currently almost complete (apart from power, square root, complex multiplication and complex division) and provides the same set of features (i.e. the support for all three precisions is now aligned). In order to extend softfloat library consistently, addition of quadruple precision was done in the same spirit as already existing single and double precision written by Josef Cejka in 2006 - that is relaxed standard-compliance for corner cases while mission-critical code sections heavily inspired by the widely used softfloat library written by John R. Hauser (although I personally think it would be more appropriate for HelenOS to use something less optimized, shorter and more readable).

Most of the quadruple-precision code is just an adapted double-precision code to work on 128-bit variables. That means if there is TODO, FIXME or some defect in single or double-precision code, it is most likely also in the quadruple-precision code. Please note that quadruple-precision functions are currently not tested - it is challenging task for itself, especially when the ports that use them are either not finished (mips64) or badly supported by simulators (sparc64). To test whole softfloat library, one would probably have to either write very non-trivial native tester, or use some existing one (e.g. TestFloat from J. R. Hauser) and port it to HelenOS (or rip the softfloat library out of HelenOS and test it on a host system). At the time of writing this, the code dependent on quadruple-precision functions (on mips64 and sparc64) is just a libposix strtold() function (and its callers, most notably scanf backend).

Property mode set to 100644

File size: 21.8 KB

Line
1	/*
2	* Copyright (c) 2005 Josef Cejka
3	* Copyright (c) 2011 Petr Koupy
4	* All rights reserved.
5	*
6	* Redistribution and use in source and binary forms, with or without
7	* modification, are permitted provided that the following conditions
8	* are met:
9	*
10	* - Redistributions of source code must retain the above copyright
11	* notice, this list of conditions and the following disclaimer.
12	* - Redistributions in binary form must reproduce the above copyright
13	* notice, this list of conditions and the following disclaimer in the
14	* documentation and/or other materials provided with the distribution.
15	* - The name of the author may not be used to endorse or promote products
16	* derived from this software without specific prior written permission.
17	*
18	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28	*/
29
30	/** @addtogroup softfloat
31	* @{
32	*/
33	/** @file Conversion of precision and conversion between integers and floats.
34	*/
35
36	#include <sftypes.h>
37	#include <conversion.h>
38	#include <comparison.h>
39	#include <common.h>
40
41	float64 convertFloat32ToFloat64(float32 a)
42	{
43	float64 result;
44	uint64_t frac;
45
46	result.parts.sign = a.parts.sign;
47	result.parts.fraction = a.parts.fraction;
48	result.parts.fraction <<= (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE);
49
50	if ((isFloat32Infinity(a)) \|\| (isFloat32NaN(a))) {
51	result.parts.exp = FLOAT64_MAX_EXPONENT;
52	/* TODO; check if its correct for SigNaNs*/
53	return result;
54	}
55
56	result.parts.exp = a.parts.exp + ((int) FLOAT64_BIAS - FLOAT32_BIAS);
57	if (a.parts.exp == 0) {
58	/* normalize denormalized numbers */
59
60	if (result.parts.fraction == 0) { /* fix zero */
61	result.parts.exp = 0;
62	return result;
63	}
64
65	frac = result.parts.fraction;
66
67	while (!(frac & FLOAT64_HIDDEN_BIT_MASK)) {
68	frac <<= 1;
69	--result.parts.exp;
70	}
71
72	++result.parts.exp;
73	result.parts.fraction = frac;
74	}
75
76	return result;
77	}
78
79	float128 convertFloat32ToFloat128(float32 a)
80	{
81	float128 result;
82	uint64_t frac_hi, frac_lo;
83	uint64_t tmp_hi, tmp_lo;
84
85	result.parts.sign = a.parts.sign;
86	result.parts.frac_hi = 0;
87	result.parts.frac_lo = a.parts.fraction;
88	lshift128(result.parts.frac_hi, result.parts.frac_lo,
89	(FLOAT128_FRACTION_SIZE - FLOAT32_FRACTION_SIZE),
90	&frac_hi, &frac_lo);
91	result.parts.frac_hi = frac_hi;
92	result.parts.frac_lo = frac_lo;
93
94	if ((isFloat32Infinity(a)) \|\| (isFloat32NaN(a))) {
95	result.parts.exp = FLOAT128_MAX_EXPONENT;
96	/* TODO; check if its correct for SigNaNs*/
97	return result;
98	}
99
100	result.parts.exp = a.parts.exp + ((int) FLOAT128_BIAS - FLOAT32_BIAS);
101	if (a.parts.exp == 0) {
102	/* normalize denormalized numbers */
103
104	if (eq128(result.parts.frac_hi,
105	result.parts.frac_lo, 0x0ll, 0x0ll)) { /* fix zero */
106	result.parts.exp = 0;
107	return result;
108	}
109
110	frac_hi = result.parts.frac_hi;
111	frac_lo = result.parts.frac_lo;
112
113	and128(frac_hi, frac_lo,
114	FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
115	&tmp_hi, &tmp_lo);
116	while (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
117	lshift128(frac_hi, frac_lo, 1, &frac_hi, &frac_lo);
118	--result.parts.exp;
119	}
120
121	++result.parts.exp;
122	result.parts.frac_hi = frac_hi;
123	result.parts.frac_lo = frac_lo;
124	}
125
126	return result;
127	}
128
129	float128 convertFloat64ToFloat128(float64 a)
130	{
131	float128 result;
132	uint64_t frac_hi, frac_lo;
133	uint64_t tmp_hi, tmp_lo;
134
135	result.parts.sign = a.parts.sign;
136	result.parts.frac_hi = 0;
137	result.parts.frac_lo = a.parts.fraction;
138	lshift128(result.parts.frac_hi, result.parts.frac_lo,
139	(FLOAT128_FRACTION_SIZE - FLOAT64_FRACTION_SIZE),
140	&frac_hi, &frac_lo);
141	result.parts.frac_hi = frac_hi;
142	result.parts.frac_lo = frac_lo;
143
144	if ((isFloat64Infinity(a)) \|\| (isFloat64NaN(a))) {
145	result.parts.exp = FLOAT128_MAX_EXPONENT;
146	/* TODO; check if its correct for SigNaNs*/
147	return result;
148	}
149
150	result.parts.exp = a.parts.exp + ((int) FLOAT128_BIAS - FLOAT64_BIAS);
151	if (a.parts.exp == 0) {
152	/* normalize denormalized numbers */
153
154	if (eq128(result.parts.frac_hi,
155	result.parts.frac_lo, 0x0ll, 0x0ll)) { /* fix zero */
156	result.parts.exp = 0;
157	return result;
158	}
159
160	frac_hi = result.parts.frac_hi;
161	frac_lo = result.parts.frac_lo;
162
163	and128(frac_hi, frac_lo,
164	FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
165	&tmp_hi, &tmp_lo);
166	while (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
167	lshift128(frac_hi, frac_lo, 1, &frac_hi, &frac_lo);
168	--result.parts.exp;
169	}
170
171	++result.parts.exp;
172	result.parts.frac_hi = frac_hi;
173	result.parts.frac_lo = frac_lo;
174	}
175
176	return result;
177	}
178
179	float32 convertFloat64ToFloat32(float64 a)
180	{
181	float32 result;
182	int32_t exp;
183	uint64_t frac;
184
185	result.parts.sign = a.parts.sign;
186
187	if (isFloat64NaN(a)) {
188	result.parts.exp = FLOAT32_MAX_EXPONENT;
189
190	if (isFloat64SigNaN(a)) {
191	/* set first bit of fraction nonzero */
192	result.parts.fraction = FLOAT32_HIDDEN_BIT_MASK >> 1;
193	return result;
194	}
195
196	/* fraction nonzero but its first bit is zero */
197	result.parts.fraction = 0x1;
198	return result;
199	}
200
201	if (isFloat64Infinity(a)) {
202	result.parts.fraction = 0;
203	result.parts.exp = FLOAT32_MAX_EXPONENT;
204	return result;
205	}
206
207	exp = (int) a.parts.exp - FLOAT64_BIAS + FLOAT32_BIAS;
208
209	if (exp >= FLOAT32_MAX_EXPONENT) {
210	/* FIXME: overflow */
211	result.parts.fraction = 0;
212	result.parts.exp = FLOAT32_MAX_EXPONENT;
213	return result;
214	} else if (exp <= 0) {
215	/* underflow or denormalized */
216
217	result.parts.exp = 0;
218
219	exp *= -1;
220	if (exp > FLOAT32_FRACTION_SIZE) {
221	/* FIXME: underflow */
222	result.parts.fraction = 0;
223	return result;
224	}
225
226	/* denormalized */
227
228	frac = a.parts.fraction;
229	frac \|= FLOAT64_HIDDEN_BIT_MASK; /* denormalize and set hidden bit */
230
231	frac >>= (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE + 1);
232
233	while (exp > 0) {
234	--exp;
235	frac >>= 1;
236	}
237	result.parts.fraction = frac;
238
239	return result;
240	}
241
242	result.parts.exp = exp;
243	result.parts.fraction =
244	a.parts.fraction >> (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE);
245	return result;
246	}
247
248	float32 convertFloat128ToFloat32(float128 a)
249	{
250	float32 result;
251	int32_t exp;
252	uint64_t frac_hi, frac_lo;
253
254	result.parts.sign = a.parts.sign;
255
256	if (isFloat128NaN(a)) {
257	result.parts.exp = FLOAT32_MAX_EXPONENT;
258
259	if (isFloat128SigNaN(a)) {
260	/* set first bit of fraction nonzero */
261	result.parts.fraction = FLOAT32_HIDDEN_BIT_MASK >> 1;
262	return result;
263	}
264
265	/* fraction nonzero but its first bit is zero */
266	result.parts.fraction = 0x1;
267	return result;
268	}
269
270	if (isFloat128Infinity(a)) {
271	result.parts.fraction = 0;
272	result.parts.exp = FLOAT32_MAX_EXPONENT;
273	return result;
274	}
275
276	exp = (int) a.parts.exp - FLOAT128_BIAS + FLOAT32_BIAS;
277
278	if (exp >= FLOAT32_MAX_EXPONENT) {
279	/* FIXME: overflow */
280	result.parts.fraction = 0;
281	result.parts.exp = FLOAT32_MAX_EXPONENT;
282	return result;
283	} else if (exp <= 0) {
284	/* underflow or denormalized */
285
286	result.parts.exp = 0;
287
288	exp *= -1;
289	if (exp > FLOAT32_FRACTION_SIZE) {
290	/* FIXME: underflow */
291	result.parts.fraction = 0;
292	return result;
293	}
294
295	/* denormalized */
296
297	frac_hi = a.parts.frac_hi;
298	frac_lo = a.parts.frac_lo;
299
300	/* denormalize and set hidden bit */
301	frac_hi \|= FLOAT128_HIDDEN_BIT_MASK_HI;
302
303	rshift128(frac_hi, frac_lo,
304	(FLOAT128_FRACTION_SIZE - FLOAT32_FRACTION_SIZE + 1),
305	&frac_hi, &frac_lo);
306
307	while (exp > 0) {
308	--exp;
309	rshift128(frac_hi, frac_lo, 1, &frac_hi, &frac_lo);
310	}
311	result.parts.fraction = frac_lo;
312
313	return result;
314	}
315
316	result.parts.exp = exp;
317	frac_hi = a.parts.frac_hi;
318	frac_lo = a.parts.frac_lo;
319	rshift128(frac_hi, frac_lo,
320	(FLOAT128_FRACTION_SIZE - FLOAT32_FRACTION_SIZE + 1),
321	&frac_hi, &frac_lo);
322	result.parts.fraction = frac_lo;
323	return result;
324	}
325
326	float64 convertFloat128ToFloat64(float128 a)
327	{
328	float64 result;
329	int32_t exp;
330	uint64_t frac_hi, frac_lo;
331
332	result.parts.sign = a.parts.sign;
333
334	if (isFloat128NaN(a)) {
335	result.parts.exp = FLOAT64_MAX_EXPONENT;
336
337	if (isFloat128SigNaN(a)) {
338	/* set first bit of fraction nonzero */
339	result.parts.fraction = FLOAT64_HIDDEN_BIT_MASK >> 1;
340	return result;
341	}
342
343	/* fraction nonzero but its first bit is zero */
344	result.parts.fraction = 0x1;
345	return result;
346	}
347
348	if (isFloat128Infinity(a)) {
349	result.parts.fraction = 0;
350	result.parts.exp = FLOAT64_MAX_EXPONENT;
351	return result;
352	}
353
354	exp = (int) a.parts.exp - FLOAT128_BIAS + FLOAT64_BIAS;
355
356	if (exp >= FLOAT64_MAX_EXPONENT) {
357	/* FIXME: overflow */
358	result.parts.fraction = 0;
359	result.parts.exp = FLOAT64_MAX_EXPONENT;
360	return result;
361	} else if (exp <= 0) {
362	/* underflow or denormalized */
363
364	result.parts.exp = 0;
365
366	exp *= -1;
367	if (exp > FLOAT64_FRACTION_SIZE) {
368	/* FIXME: underflow */
369	result.parts.fraction = 0;
370	return result;
371	}
372
373	/* denormalized */
374
375	frac_hi = a.parts.frac_hi;
376	frac_lo = a.parts.frac_lo;
377
378	/* denormalize and set hidden bit */
379	frac_hi \|= FLOAT128_HIDDEN_BIT_MASK_HI;
380
381	rshift128(frac_hi, frac_lo,
382	(FLOAT128_FRACTION_SIZE - FLOAT64_FRACTION_SIZE + 1),
383	&frac_hi, &frac_lo);
384
385	while (exp > 0) {
386	--exp;
387	rshift128(frac_hi, frac_lo, 1, &frac_hi, &frac_lo);
388	}
389	result.parts.fraction = frac_lo;
390
391	return result;
392	}
393
394	result.parts.exp = exp;
395	frac_hi = a.parts.frac_hi;
396	frac_lo = a.parts.frac_lo;
397	rshift128(frac_hi, frac_lo,
398	(FLOAT128_FRACTION_SIZE - FLOAT64_FRACTION_SIZE + 1),
399	&frac_hi, &frac_lo);
400	result.parts.fraction = frac_lo;
401	return result;
402	}
403
404
405	/**
406	* Helping procedure for converting float32 to uint32.
407	*
408	* @param a Floating point number in normalized form
409	* (NaNs or Inf are not checked).
410	* @return Converted unsigned integer.
411	*/
412	static uint32_t _float32_to_uint32_helper(float32 a)
413	{
414	uint32_t frac;
415
416	if (a.parts.exp < FLOAT32_BIAS) {
417	/* TODO: rounding */
418	return 0;
419	}
420
421	frac = a.parts.fraction;
422
423	frac \|= FLOAT32_HIDDEN_BIT_MASK;
424	/* shift fraction to left so hidden bit will be the most significant bit */
425	frac <<= 32 - FLOAT32_FRACTION_SIZE - 1;
426
427	frac >>= 32 - (a.parts.exp - FLOAT32_BIAS) - 1;
428	if ((a.parts.sign == 1) && (frac != 0)) {
429	frac = ~frac;
430	++frac;
431	}
432
433	return frac;
434	}
435
436	/*
437	* FIXME: Im not sure what to return if overflow/underflow happens
438	* - now its the biggest or the smallest int
439	*/
440	uint32_t float32_to_uint32(float32 a)
441	{
442	if (isFloat32NaN(a))
443	return UINT32_MAX;
444
445	if (isFloat32Infinity(a) \|\| (a.parts.exp >= (32 + FLOAT32_BIAS))) {
446	if (a.parts.sign)
447	return UINT32_MIN;
448
449	return UINT32_MAX;
450	}
451
452	return _float32_to_uint32_helper(a);
453	}
454
455	/*
456	* FIXME: Im not sure what to return if overflow/underflow happens
457	* - now its the biggest or the smallest int
458	*/
459	int32_t float32_to_int32(float32 a)
460	{
461	if (isFloat32NaN(a))
462	return INT32_MAX;
463
464	if (isFloat32Infinity(a) \|\| (a.parts.exp >= (32 + FLOAT32_BIAS))) {
465	if (a.parts.sign)
466	return INT32_MIN;
467
468	return INT32_MAX;
469	}
470
471	return _float32_to_uint32_helper(a);
472	}
473
474
475	/**
476	* Helping procedure for converting float32 to uint64.
477	*
478	* @param a Floating point number in normalized form
479	* (NaNs or Inf are not checked).
480	* @return Converted unsigned integer.
481	*/
482	static uint64_t _float32_to_uint64_helper(float32 a)
483	{
484	uint64_t frac;
485
486	if (a.parts.exp < FLOAT32_BIAS) {
487	/TODO: rounding/
488	return 0;
489	}
490
491	frac = a.parts.fraction;
492
493	frac \|= FLOAT32_HIDDEN_BIT_MASK;
494	/* shift fraction to left so hidden bit will be the most significant bit */
495	frac <<= 64 - FLOAT32_FRACTION_SIZE - 1;
496
497	frac >>= 64 - (a.parts.exp - FLOAT32_BIAS) - 1;
498	if ((a.parts.sign == 1) && (frac != 0)) {
499	frac = ~frac;
500	++frac;
501	}
502
503	return frac;
504	}
505
506	/*
507	* FIXME: Im not sure what to return if overflow/underflow happens
508	* - now its the biggest or the smallest int
509	*/
510	uint64_t float32_to_uint64(float32 a)
511	{
512	if (isFloat32NaN(a))
513	return UINT64_MAX;
514
515
516	if (isFloat32Infinity(a) \|\| (a.parts.exp >= (64 + FLOAT32_BIAS))) {
517	if (a.parts.sign)
518	return UINT64_MIN;
519
520	return UINT64_MAX;
521	}
522
523	return _float32_to_uint64_helper(a);
524	}
525
526	/*
527	* FIXME: Im not sure what to return if overflow/underflow happens
528	* - now its the biggest or the smallest int
529	*/
530	int64_t float32_to_int64(float32 a)
531	{
532	if (isFloat32NaN(a))
533	return INT64_MAX;
534
535	if (isFloat32Infinity(a) \|\| (a.parts.exp >= (64 + FLOAT32_BIAS))) {
536	if (a.parts.sign)
537	return INT64_MIN;
538
539	return INT64_MAX;
540	}
541
542	return _float32_to_uint64_helper(a);
543	}
544
545
546	/**
547	* Helping procedure for converting float64 to uint64.
548	*
549	* @param a Floating point number in normalized form
550	* (NaNs or Inf are not checked).
551	* @return Converted unsigned integer.
552	*/
553	static uint64_t _float64_to_uint64_helper(float64 a)
554	{
555	uint64_t frac;
556
557	if (a.parts.exp < FLOAT64_BIAS) {
558	/TODO: rounding/
559	return 0;
560	}
561
562	frac = a.parts.fraction;
563
564	frac \|= FLOAT64_HIDDEN_BIT_MASK;
565	/* shift fraction to left so hidden bit will be the most significant bit */
566	frac <<= 64 - FLOAT64_FRACTION_SIZE - 1;
567
568	frac >>= 64 - (a.parts.exp - FLOAT64_BIAS) - 1;
569	if ((a.parts.sign == 1) && (frac != 0)) {
570	frac = ~frac;
571	++frac;
572	}
573
574	return frac;
575	}
576
577	/*
578	* FIXME: Im not sure what to return if overflow/underflow happens
579	* - now its the biggest or the smallest int
580	*/
581	uint32_t float64_to_uint32(float64 a)
582	{
583	if (isFloat64NaN(a))
584	return UINT32_MAX;
585
586	if (isFloat64Infinity(a) \|\| (a.parts.exp >= (32 + FLOAT64_BIAS))) {
587	if (a.parts.sign)
588	return UINT32_MIN;
589
590	return UINT32_MAX;
591	}
592
593	return (uint32_t) _float64_to_uint64_helper(a);
594	}
595
596	/*
597	* FIXME: Im not sure what to return if overflow/underflow happens
598	* - now its the biggest or the smallest int
599	*/
600	int32_t float64_to_int32(float64 a)
601	{
602	if (isFloat64NaN(a))
603	return INT32_MAX;
604
605	if (isFloat64Infinity(a) \|\| (a.parts.exp >= (32 + FLOAT64_BIAS))) {
606	if (a.parts.sign)
607	return INT32_MIN;
608
609	return INT32_MAX;
610	}
611
612	return (int32_t) _float64_to_uint64_helper(a);
613	}
614
615
616	/*
617	* FIXME: Im not sure what to return if overflow/underflow happens
618	* - now its the biggest or the smallest int
619	*/
620	uint64_t float64_to_uint64(float64 a)
621	{
622	if (isFloat64NaN(a))
623	return UINT64_MAX;
624
625	if (isFloat64Infinity(a) \|\| (a.parts.exp >= (64 + FLOAT64_BIAS))) {
626	if (a.parts.sign)
627	return UINT64_MIN;
628
629	return UINT64_MAX;
630	}
631
632	return _float64_to_uint64_helper(a);
633	}
634
635	/*
636	* FIXME: Im not sure what to return if overflow/underflow happens
637	* - now its the biggest or the smallest int
638	*/
639	int64_t float64_to_int64(float64 a)
640	{
641	if (isFloat64NaN(a))
642	return INT64_MAX;
643
644	if (isFloat64Infinity(a) \|\| (a.parts.exp >= (64 + FLOAT64_BIAS))) {
645	if (a.parts.sign)
646	return INT64_MIN;
647
648	return INT64_MAX;
649	}
650
651	return _float64_to_uint64_helper(a);
652	}
653
654
655	/**
656	* Helping procedure for converting float128 to uint64.
657	*
658	* @param a Floating point number in normalized form
659	* (NaNs or Inf are not checked).
660	* @return Converted unsigned integer.
661	*/
662	static uint64_t _float128_to_uint64_helper(float128 a)
663	{
664	uint64_t frac_hi, frac_lo;
665
666	if (a.parts.exp < FLOAT128_BIAS) {
667	/TODO: rounding/
668	return 0;
669	}
670
671	frac_hi = a.parts.frac_hi;
672	frac_lo = a.parts.frac_lo;
673
674	frac_hi \|= FLOAT128_HIDDEN_BIT_MASK_HI;
675	/* shift fraction to left so hidden bit will be the most significant bit */
676	lshift128(frac_hi, frac_lo,
677	(128 - FLOAT128_FRACTION_SIZE - 1), &frac_hi, &frac_lo);
678
679	rshift128(frac_hi, frac_lo,
680	(128 - (a.parts.exp - FLOAT128_BIAS) - 1), &frac_hi, &frac_lo);
681	if ((a.parts.sign == 1) && !eq128(frac_hi, frac_lo, 0x0ll, 0x0ll)) {
682	not128(frac_hi, frac_lo, &frac_hi, &frac_lo);
683	add128(frac_hi, frac_lo, 0x0ll, 0x1ll, &frac_hi, &frac_lo);
684	}
685
686	return frac_lo;
687	}
688
689	/*
690	* FIXME: Im not sure what to return if overflow/underflow happens
691	* - now its the biggest or the smallest int
692	*/
693	uint32_t float128_to_uint32(float128 a)
694	{
695	if (isFloat128NaN(a))
696	return UINT32_MAX;
697
698	if (isFloat128Infinity(a) \|\| (a.parts.exp >= (32 + FLOAT128_BIAS))) {
699	if (a.parts.sign)
700	return UINT32_MIN;
701
702	return UINT32_MAX;
703	}
704
705	return (uint32_t) _float128_to_uint64_helper(a);
706	}
707
708	/*
709	* FIXME: Im not sure what to return if overflow/underflow happens
710	* - now its the biggest or the smallest int
711	*/
712	int32_t float128_to_int32(float128 a)
713	{
714	if (isFloat128NaN(a))
715	return INT32_MAX;
716
717	if (isFloat128Infinity(a) \|\| (a.parts.exp >= (32 + FLOAT128_BIAS))) {
718	if (a.parts.sign)
719	return INT32_MIN;
720
721	return INT32_MAX;
722	}
723
724	return (int32_t) _float128_to_uint64_helper(a);
725	}
726
727
728	/*
729	* FIXME: Im not sure what to return if overflow/underflow happens
730	* - now its the biggest or the smallest int
731	*/
732	uint64_t float128_to_uint64(float128 a)
733	{
734	if (isFloat128NaN(a))
735	return UINT64_MAX;
736
737	if (isFloat128Infinity(a) \|\| (a.parts.exp >= (64 + FLOAT128_BIAS))) {
738	if (a.parts.sign)
739	return UINT64_MIN;
740
741	return UINT64_MAX;
742	}
743
744	return _float128_to_uint64_helper(a);
745	}
746
747	/*
748	* FIXME: Im not sure what to return if overflow/underflow happens
749	* - now its the biggest or the smallest int
750	*/
751	int64_t float128_to_int64(float128 a)
752	{
753	if (isFloat128NaN(a))
754	return INT64_MAX;
755
756	if (isFloat128Infinity(a) \|\| (a.parts.exp >= (64 + FLOAT128_BIAS))) {
757	if (a.parts.sign)
758	return INT64_MIN;
759
760	return INT64_MAX;
761	}
762
763	return _float128_to_uint64_helper(a);
764	}
765
766
767	float32 uint32_to_float32(uint32_t i)
768	{
769	int counter;
770	int32_t exp;
771	float32 result;
772
773	result.parts.sign = 0;
774	result.parts.fraction = 0;
775
776	counter = countZeroes32(i);
777
778	exp = FLOAT32_BIAS + 32 - counter - 1;
779
780	if (counter == 32) {
781	result.binary = 0;
782	return result;
783	}
784
785	if (counter > 0) {
786	i <<= counter - 1;
787	} else {
788	i >>= 1;
789	}
790
791	roundFloat32(&exp, &i);
792
793	result.parts.fraction = i >> (32 - FLOAT32_FRACTION_SIZE - 2);
794	result.parts.exp = exp;
795
796	return result;
797	}
798
799	float32 int32_to_float32(int32_t i)
800	{
801	float32 result;
802
803	if (i < 0) {
804	result = uint32_to_float32((uint32_t) (-i));
805	} else {
806	result = uint32_to_float32((uint32_t) i);
807	}
808
809	result.parts.sign = i < 0;
810
811	return result;
812	}
813
814
815	float32 uint64_to_float32(uint64_t i)
816	{
817	int counter;
818	int32_t exp;
819	uint32_t j;
820	float32 result;
821
822	result.parts.sign = 0;
823	result.parts.fraction = 0;
824
825	counter = countZeroes64(i);
826
827	exp = FLOAT32_BIAS + 64 - counter - 1;
828
829	if (counter == 64) {
830	result.binary = 0;
831	return result;
832	}
833
834	/* Shift all to the first 31 bits (31st will be hidden 1) */
835	if (counter > 33) {
836	i <<= counter - 1 - 32;
837	} else {
838	i >>= 1 + 32 - counter;
839	}
840
841	j = (uint32_t) i;
842	roundFloat32(&exp, &j);
843
844	result.parts.fraction = j >> (32 - FLOAT32_FRACTION_SIZE - 2);
845	result.parts.exp = exp;
846	return result;
847	}
848
849	float32 int64_to_float32(int64_t i)
850	{
851	float32 result;
852
853	if (i < 0) {
854	result = uint64_to_float32((uint64_t) (-i));
855	} else {
856	result = uint64_to_float32((uint64_t) i);
857	}
858
859	result.parts.sign = i < 0;
860
861	return result;
862	}
863
864	float64 uint32_to_float64(uint32_t i)
865	{
866	int counter;
867	int32_t exp;
868	float64 result;
869	uint64_t frac;
870
871	result.parts.sign = 0;
872	result.parts.fraction = 0;
873
874	counter = countZeroes32(i);
875
876	exp = FLOAT64_BIAS + 32 - counter - 1;
877
878	if (counter == 32) {
879	result.binary = 0;
880	return result;
881	}
882
883	frac = i;
884	frac <<= counter + 32 - 1;
885
886	roundFloat64(&exp, &frac);
887
888	result.parts.fraction = frac >> (64 - FLOAT64_FRACTION_SIZE - 2);
889	result.parts.exp = exp;
890
891	return result;
892	}
893
894	float64 int32_to_float64(int32_t i)
895	{
896	float64 result;
897
898	if (i < 0) {
899	result = uint32_to_float64((uint32_t) (-i));
900	} else {
901	result = uint32_to_float64((uint32_t) i);
902	}
903
904	result.parts.sign = i < 0;
905
906	return result;
907	}
908
909
910	float64 uint64_to_float64(uint64_t i)
911	{
912	int counter;
913	int32_t exp;
914	float64 result;
915
916	result.parts.sign = 0;
917	result.parts.fraction = 0;
918
919	counter = countZeroes64(i);
920
921	exp = FLOAT64_BIAS + 64 - counter - 1;
922
923	if (counter == 64) {
924	result.binary = 0;
925	return result;
926	}
927
928	if (counter > 0) {
929	i <<= counter - 1;
930	} else {
931	i >>= 1;
932	}
933
934	roundFloat64(&exp, &i);
935
936	result.parts.fraction = i >> (64 - FLOAT64_FRACTION_SIZE - 2);
937	result.parts.exp = exp;
938	return result;
939	}
940
941	float64 int64_to_float64(int64_t i)
942	{
943	float64 result;
944
945	if (i < 0) {
946	result = uint64_to_float64((uint64_t) (-i));
947	} else {
948	result = uint64_to_float64((uint64_t) i);
949	}
950
951	result.parts.sign = i < 0;
952
953	return result;
954	}
955
956
957	float128 uint32_to_float128(uint32_t i)
958	{
959	int counter;
960	int32_t exp;
961	float128 result;
962	uint64_t frac_hi, frac_lo;
963
964	result.parts.sign = 0;
965	result.parts.frac_hi = 0;
966	result.parts.frac_lo = 0;
967
968	counter = countZeroes32(i);
969
970	exp = FLOAT128_BIAS + 32 - counter - 1;
971
972	if (counter == 32) {
973	result.binary.hi = 0;
974	result.binary.lo = 0;
975	return result;
976	}
977
978	frac_hi = 0;
979	frac_lo = i;
980	lshift128(frac_hi, frac_lo, (counter + 96 - 1), &frac_hi, &frac_lo);
981
982	roundFloat128(&exp, &frac_hi, &frac_lo);
983
984	rshift128(frac_hi, frac_lo,
985	(128 - FLOAT128_FRACTION_SIZE - 2), &frac_hi, &frac_lo);
986	result.parts.frac_hi = frac_hi;
987	result.parts.frac_lo = frac_lo;
988	result.parts.exp = exp;
989
990	return result;
991	}
992
993	float128 int32_to_float128(int32_t i)
994	{
995	float128 result;
996
997	if (i < 0) {
998	result = uint32_to_float128((uint32_t) (-i));
999	} else {
1000	result = uint32_to_float128((uint32_t) i);
1001	}
1002
1003	result.parts.sign = i < 0;
1004
1005	return result;
1006	}
1007
1008
1009	float128 uint64_to_float128(uint64_t i)
1010	{
1011	int counter;
1012	int32_t exp;
1013	float128 result;
1014	uint64_t frac_hi, frac_lo;
1015
1016	result.parts.sign = 0;
1017	result.parts.frac_hi = 0;
1018	result.parts.frac_lo = 0;
1019
1020	counter = countZeroes64(i);
1021
1022	exp = FLOAT128_BIAS + 64 - counter - 1;
1023
1024	if (counter == 64) {
1025	result.binary.hi = 0;
1026	result.binary.lo = 0;
1027	return result;
1028	}
1029
1030	frac_hi = 0;
1031	frac_lo = i;
1032	lshift128(frac_hi, frac_lo, (counter + 64 - 1), &frac_hi, &frac_lo);
1033
1034	roundFloat128(&exp, &frac_hi, &frac_lo);
1035
1036	rshift128(frac_hi, frac_lo,
1037	(128 - FLOAT128_FRACTION_SIZE - 2), &frac_hi, &frac_lo);
1038	result.parts.frac_hi = frac_hi;
1039	result.parts.frac_lo = frac_lo;
1040	result.parts.exp = exp;
1041
1042	return result;
1043	}
1044
1045	float128 int64_to_float128(int64_t i)
1046	{
1047	float128 result;
1048
1049	if (i < 0) {
1050	result = uint64_to_float128((uint64_t) (-i));
1051	} else {
1052	result = uint64_to_float128((uint64_t) i);
1053	}
1054
1055	result.parts.sign = i < 0;
1056
1057	return result;
1058	}
1059
1060	/** @}
1061	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/uspace/lib/softfloat/generic/conversion.c@ 8bcd727

Download in other formats: