Context Navigation

conversion.c@ eea3e39

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since eea3e39 was c67aff2, checked in by Petr Koupy <petr.koupy@…>, 14 years ago

Quadruple-precision softfloat, coding style improvements. Details below…

Highlights:

completed double-precision support
added quadruple-precision support
added SPARC quadruple-precision wrappers
added doxygen comments
corrected and unified coding style

Current state of the softfloat library:

Support for single, double and quadruple precision is currently almost complete (apart from power, square root, complex multiplication and complex division) and provides the same set of features (i.e. the support for all three precisions is now aligned). In order to extend softfloat library consistently, addition of quadruple precision was done in the same spirit as already existing single and double precision written by Josef Cejka in 2006 - that is relaxed standard-compliance for corner cases while mission-critical code sections heavily inspired by the widely used softfloat library written by John R. Hauser (although I personally think it would be more appropriate for HelenOS to use something less optimized, shorter and more readable).

Most of the quadruple-precision code is just an adapted double-precision code to work on 128-bit variables. That means if there is TODO, FIXME or some defect in single or double-precision code, it is most likely also in the quadruple-precision code. Please note that quadruple-precision functions are currently not tested - it is challenging task for itself, especially when the ports that use them are either not finished (mips64) or badly supported by simulators (sparc64). To test whole softfloat library, one would probably have to either write very non-trivial native tester, or use some existing one (e.g. TestFloat from J. R. Hauser) and port it to HelenOS (or rip the softfloat library out of HelenOS and test it on a host system). At the time of writing this, the code dependent on quadruple-precision functions (on mips64 and sparc64) is just a libposix strtold() function (and its callers, most notably scanf backend).

Property mode set to 100644

File size: 21.8 KB

Rev	Line
[b5440cf]	1	/*
[df4ed85]	2	* Copyright (c) 2005 Josef Cejka
[c67aff2]	3	* Copyright (c) 2011 Petr Koupy
[b5440cf]	4	* All rights reserved.
	5	*
	6	* Redistribution and use in source and binary forms, with or without
	7	* modification, are permitted provided that the following conditions
	8	* are met:
	9	*
	10	* - Redistributions of source code must retain the above copyright
	11	* notice, this list of conditions and the following disclaimer.
	12	* - Redistributions in binary form must reproduce the above copyright
	13	* notice, this list of conditions and the following disclaimer in the
	14	* documentation and/or other materials provided with the distribution.
	15	* - The name of the author may not be used to endorse or promote products
	16	* derived from this software without specific prior written permission.
	17	*
	18	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	19	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	20	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	21	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	22	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	23	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	24	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	25	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	26	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	27	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	28	*/
	29
[9539be6]	30	/** @addtogroup softfloat
[846848a6]	31	* @{
	32	*/
[c67aff2]	33	/** @file Conversion of precision and conversion between integers and floats.
[846848a6]	34	*/
	35
[c67aff2]	36	#include <sftypes.h>
	37	#include <conversion.h>
	38	#include <comparison.h>
	39	#include <common.h>
[feef1cd]	40
	41	float64 convertFloat32ToFloat64(float32 a)
	42	{
	43	float64 result;
[aa59fa0]	44	uint64_t frac;
[feef1cd]	45
	46	result.parts.sign = a.parts.sign;
[1266543]	47	result.parts.fraction = a.parts.fraction;
[9539be6]	48	result.parts.fraction <<= (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE);
[feef1cd]	49
[9539be6]	50	if ((isFloat32Infinity(a)) \|\| (isFloat32NaN(a))) {
[c67aff2]	51	result.parts.exp = FLOAT64_MAX_EXPONENT;
[feef1cd]	52	/* TODO; check if its correct for SigNaNs*/
	53	return result;
[c67aff2]	54	}
[feef1cd]	55
[9539be6]	56	result.parts.exp = a.parts.exp + ((int) FLOAT64_BIAS - FLOAT32_BIAS);
[feef1cd]	57	if (a.parts.exp == 0) {
	58	/* normalize denormalized numbers */
	59
[c67aff2]	60	if (result.parts.fraction == 0) { /* fix zero */
	61	result.parts.exp = 0;
[feef1cd]	62	return result;
	63	}
	64
[1266543]	65	frac = result.parts.fraction;
[feef1cd]	66
[c67aff2]	67	while (!(frac & FLOAT64_HIDDEN_BIT_MASK)) {
[1266543]	68	frac <<= 1;
[feef1cd]	69	--result.parts.exp;
[c67aff2]	70	}
[56a39dde]	71
	72	++result.parts.exp;
[1266543]	73	result.parts.fraction = frac;
[c67aff2]	74	}
[feef1cd]	75
	76	return result;
[c67aff2]	77	}
	78
	79	float128 convertFloat32ToFloat128(float32 a)
	80	{
	81	float128 result;
	82	uint64_t frac_hi, frac_lo;
	83	uint64_t tmp_hi, tmp_lo;
	84
	85	result.parts.sign = a.parts.sign;
	86	result.parts.frac_hi = 0;
	87	result.parts.frac_lo = a.parts.fraction;
	88	lshift128(result.parts.frac_hi, result.parts.frac_lo,
	89	(FLOAT128_FRACTION_SIZE - FLOAT32_FRACTION_SIZE),
	90	&frac_hi, &frac_lo);
	91	result.parts.frac_hi = frac_hi;
	92	result.parts.frac_lo = frac_lo;
	93
	94	if ((isFloat32Infinity(a)) \|\| (isFloat32NaN(a))) {
	95	result.parts.exp = FLOAT128_MAX_EXPONENT;
	96	/* TODO; check if its correct for SigNaNs*/
	97	return result;
	98	}
	99
	100	result.parts.exp = a.parts.exp + ((int) FLOAT128_BIAS - FLOAT32_BIAS);
	101	if (a.parts.exp == 0) {
	102	/* normalize denormalized numbers */
	103
	104	if (eq128(result.parts.frac_hi,
	105	result.parts.frac_lo, 0x0ll, 0x0ll)) { /* fix zero */
	106	result.parts.exp = 0;
	107	return result;
	108	}
	109
	110	frac_hi = result.parts.frac_hi;
	111	frac_lo = result.parts.frac_lo;
	112
	113	and128(frac_hi, frac_lo,
	114	FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
	115	&tmp_hi, &tmp_lo);
	116	while (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
	117	lshift128(frac_hi, frac_lo, 1, &frac_hi, &frac_lo);
	118	--result.parts.exp;
	119	}
	120
	121	++result.parts.exp;
	122	result.parts.frac_hi = frac_hi;
	123	result.parts.frac_lo = frac_lo;
	124	}
	125
	126	return result;
	127	}
	128
	129	float128 convertFloat64ToFloat128(float64 a)
	130	{
	131	float128 result;
	132	uint64_t frac_hi, frac_lo;
	133	uint64_t tmp_hi, tmp_lo;
	134
	135	result.parts.sign = a.parts.sign;
	136	result.parts.frac_hi = 0;
	137	result.parts.frac_lo = a.parts.fraction;
	138	lshift128(result.parts.frac_hi, result.parts.frac_lo,
	139	(FLOAT128_FRACTION_SIZE - FLOAT64_FRACTION_SIZE),
	140	&frac_hi, &frac_lo);
	141	result.parts.frac_hi = frac_hi;
	142	result.parts.frac_lo = frac_lo;
	143
	144	if ((isFloat64Infinity(a)) \|\| (isFloat64NaN(a))) {
	145	result.parts.exp = FLOAT128_MAX_EXPONENT;
	146	/* TODO; check if its correct for SigNaNs*/
	147	return result;
	148	}
	149
	150	result.parts.exp = a.parts.exp + ((int) FLOAT128_BIAS - FLOAT64_BIAS);
	151	if (a.parts.exp == 0) {
	152	/* normalize denormalized numbers */
	153
	154	if (eq128(result.parts.frac_hi,
	155	result.parts.frac_lo, 0x0ll, 0x0ll)) { /* fix zero */
	156	result.parts.exp = 0;
	157	return result;
	158	}
	159
	160	frac_hi = result.parts.frac_hi;
	161	frac_lo = result.parts.frac_lo;
	162
	163	and128(frac_hi, frac_lo,
	164	FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
	165	&tmp_hi, &tmp_lo);
	166	while (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
	167	lshift128(frac_hi, frac_lo, 1, &frac_hi, &frac_lo);
	168	--result.parts.exp;
	169	}
	170
	171	++result.parts.exp;
	172	result.parts.frac_hi = frac_hi;
	173	result.parts.frac_lo = frac_lo;
	174	}
	175
	176	return result;
[afffa1e]	177	}
[feef1cd]	178
	179	float32 convertFloat64ToFloat32(float64 a)
	180	{
	181	float32 result;
[aa59fa0]	182	int32_t exp;
	183	uint64_t frac;
[feef1cd]	184
	185	result.parts.sign = a.parts.sign;
	186
	187	if (isFloat64NaN(a)) {
[c67aff2]	188	result.parts.exp = FLOAT32_MAX_EXPONENT;
[feef1cd]	189
	190	if (isFloat64SigNaN(a)) {
[c67aff2]	191	/* set first bit of fraction nonzero */
	192	result.parts.fraction = FLOAT32_HIDDEN_BIT_MASK >> 1;
[feef1cd]	193	return result;
	194	}
[c67aff2]	195
	196	/* fraction nonzero but its first bit is zero */
	197	result.parts.fraction = 0x1;
[feef1cd]	198	return result;
[c67aff2]	199	}
[feef1cd]	200
	201	if (isFloat64Infinity(a)) {
[1266543]	202	result.parts.fraction = 0;
[c67aff2]	203	result.parts.exp = FLOAT32_MAX_EXPONENT;
[feef1cd]	204	return result;
[c67aff2]	205	}
[feef1cd]	206
[c67aff2]	207	exp = (int) a.parts.exp - FLOAT64_BIAS + FLOAT32_BIAS;
[feef1cd]	208
[c67aff2]	209	if (exp >= FLOAT32_MAX_EXPONENT) {
	210	/* FIXME: overflow */
[1266543]	211	result.parts.fraction = 0;
[c67aff2]	212	result.parts.exp = FLOAT32_MAX_EXPONENT;
[feef1cd]	213	return result;
[c67aff2]	214	} else if (exp <= 0) {
[feef1cd]	215	/* underflow or denormalized */
	216
	217	result.parts.exp = 0;
	218
	219	exp *= -1;
[c67aff2]	220	if (exp > FLOAT32_FRACTION_SIZE) {
[feef1cd]	221	/* FIXME: underflow */
[1266543]	222	result.parts.fraction = 0;
[feef1cd]	223	return result;
[c67aff2]	224	}
[feef1cd]	225
	226	/* denormalized */
	227
[1266543]	228	frac = a.parts.fraction;
[c67aff2]	229	frac \|= FLOAT64_HIDDEN_BIT_MASK; /* denormalize and set hidden bit */
[feef1cd]	230
[1266543]	231	frac >>= (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE + 1);
[56a39dde]	232
[feef1cd]	233	while (exp > 0) {
	234	--exp;
[1266543]	235	frac >>= 1;
[c67aff2]	236	}
[1266543]	237	result.parts.fraction = frac;
[feef1cd]	238
	239	return result;
[c67aff2]	240	}
	241
	242	result.parts.exp = exp;
	243	result.parts.fraction =
	244	a.parts.fraction >> (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE);
	245	return result;
	246	}
	247
	248	float32 convertFloat128ToFloat32(float128 a)
	249	{
	250	float32 result;
	251	int32_t exp;
	252	uint64_t frac_hi, frac_lo;
	253
	254	result.parts.sign = a.parts.sign;
	255
	256	if (isFloat128NaN(a)) {
	257	result.parts.exp = FLOAT32_MAX_EXPONENT;
	258
	259	if (isFloat128SigNaN(a)) {
	260	/* set first bit of fraction nonzero */
	261	result.parts.fraction = FLOAT32_HIDDEN_BIT_MASK >> 1;
	262	return result;
	263	}
	264
	265	/* fraction nonzero but its first bit is zero */
	266	result.parts.fraction = 0x1;
	267	return result;
	268	}
	269
	270	if (isFloat128Infinity(a)) {
	271	result.parts.fraction = 0;
	272	result.parts.exp = FLOAT32_MAX_EXPONENT;
	273	return result;
	274	}
	275
	276	exp = (int) a.parts.exp - FLOAT128_BIAS + FLOAT32_BIAS;
	277
	278	if (exp >= FLOAT32_MAX_EXPONENT) {
	279	/* FIXME: overflow */
	280	result.parts.fraction = 0;
	281	result.parts.exp = FLOAT32_MAX_EXPONENT;
	282	return result;
	283	} else if (exp <= 0) {
	284	/* underflow or denormalized */
	285
	286	result.parts.exp = 0;
	287
	288	exp *= -1;
	289	if (exp > FLOAT32_FRACTION_SIZE) {
	290	/* FIXME: underflow */
	291	result.parts.fraction = 0;
	292	return result;
	293	}
	294
	295	/* denormalized */
	296
	297	frac_hi = a.parts.frac_hi;
	298	frac_lo = a.parts.frac_lo;
	299
	300	/* denormalize and set hidden bit */
	301	frac_hi \|= FLOAT128_HIDDEN_BIT_MASK_HI;
	302
	303	rshift128(frac_hi, frac_lo,
	304	(FLOAT128_FRACTION_SIZE - FLOAT32_FRACTION_SIZE + 1),
	305	&frac_hi, &frac_lo);
	306
	307	while (exp > 0) {
	308	--exp;
	309	rshift128(frac_hi, frac_lo, 1, &frac_hi, &frac_lo);
	310	}
	311	result.parts.fraction = frac_lo;
	312
	313	return result;
	314	}
[feef1cd]	315
	316	result.parts.exp = exp;
[c67aff2]	317	frac_hi = a.parts.frac_hi;
	318	frac_lo = a.parts.frac_lo;
	319	rshift128(frac_hi, frac_lo,
	320	(FLOAT128_FRACTION_SIZE - FLOAT32_FRACTION_SIZE + 1),
	321	&frac_hi, &frac_lo);
	322	result.parts.fraction = frac_lo;
[feef1cd]	323	return result;
[afffa1e]	324	}
	325
[c67aff2]	326	float64 convertFloat128ToFloat64(float128 a)
	327	{
	328	float64 result;
	329	int32_t exp;
	330	uint64_t frac_hi, frac_lo;
	331
	332	result.parts.sign = a.parts.sign;
	333
	334	if (isFloat128NaN(a)) {
	335	result.parts.exp = FLOAT64_MAX_EXPONENT;
[afffa1e]	336
[c67aff2]	337	if (isFloat128SigNaN(a)) {
	338	/* set first bit of fraction nonzero */
	339	result.parts.fraction = FLOAT64_HIDDEN_BIT_MASK >> 1;
	340	return result;
	341	}
	342
	343	/* fraction nonzero but its first bit is zero */
	344	result.parts.fraction = 0x1;
	345	return result;
	346	}
	347
	348	if (isFloat128Infinity(a)) {
	349	result.parts.fraction = 0;
	350	result.parts.exp = FLOAT64_MAX_EXPONENT;
	351	return result;
	352	}
	353
	354	exp = (int) a.parts.exp - FLOAT128_BIAS + FLOAT64_BIAS;
	355
	356	if (exp >= FLOAT64_MAX_EXPONENT) {
	357	/* FIXME: overflow */
	358	result.parts.fraction = 0;
	359	result.parts.exp = FLOAT64_MAX_EXPONENT;
	360	return result;
	361	} else if (exp <= 0) {
	362	/* underflow or denormalized */
	363
	364	result.parts.exp = 0;
	365
	366	exp *= -1;
	367	if (exp > FLOAT64_FRACTION_SIZE) {
	368	/* FIXME: underflow */
	369	result.parts.fraction = 0;
	370	return result;
	371	}
	372
	373	/* denormalized */
	374
	375	frac_hi = a.parts.frac_hi;
	376	frac_lo = a.parts.frac_lo;
	377
	378	/* denormalize and set hidden bit */
	379	frac_hi \|= FLOAT128_HIDDEN_BIT_MASK_HI;
	380
	381	rshift128(frac_hi, frac_lo,
	382	(FLOAT128_FRACTION_SIZE - FLOAT64_FRACTION_SIZE + 1),
	383	&frac_hi, &frac_lo);
	384
	385	while (exp > 0) {
	386	--exp;
	387	rshift128(frac_hi, frac_lo, 1, &frac_hi, &frac_lo);
	388	}
	389	result.parts.fraction = frac_lo;
	390
	391	return result;
	392	}
	393
	394	result.parts.exp = exp;
	395	frac_hi = a.parts.frac_hi;
	396	frac_lo = a.parts.frac_lo;
	397	rshift128(frac_hi, frac_lo,
	398	(FLOAT128_FRACTION_SIZE - FLOAT64_FRACTION_SIZE + 1),
	399	&frac_hi, &frac_lo);
	400	result.parts.fraction = frac_lo;
	401	return result;
	402	}
	403
	404
	405	/**
	406	* Helping procedure for converting float32 to uint32.
	407	*
	408	* @param a Floating point number in normalized form
	409	* (NaNs or Inf are not checked).
	410	* @return Converted unsigned integer.
[afffa1e]	411	*/
[aa59fa0]	412	static uint32_t _float32_to_uint32_helper(float32 a)
[afffa1e]	413	{
[aa59fa0]	414	uint32_t frac;
[afffa1e]	415
	416	if (a.parts.exp < FLOAT32_BIAS) {
[c67aff2]	417	/* TODO: rounding */
[afffa1e]	418	return 0;
	419	}
	420
	421	frac = a.parts.fraction;
	422
	423	frac \|= FLOAT32_HIDDEN_BIT_MASK;
	424	/* shift fraction to left so hidden bit will be the most significant bit */
	425	frac <<= 32 - FLOAT32_FRACTION_SIZE - 1;
	426
	427	frac >>= 32 - (a.parts.exp - FLOAT32_BIAS) - 1;
	428	if ((a.parts.sign == 1) && (frac != 0)) {
	429	frac = ~frac;
	430	++frac;
	431	}
	432
	433	return frac;
	434	}
	435
[c67aff2]	436	/*
[afffa1e]	437	* FIXME: Im not sure what to return if overflow/underflow happens
	438	* - now its the biggest or the smallest int
	439	*/
[aa59fa0]	440	uint32_t float32_to_uint32(float32 a)
[afffa1e]	441	{
[9539be6]	442	if (isFloat32NaN(a))
	443	return UINT32_MAX;
[afffa1e]	444
[9539be6]	445	if (isFloat32Infinity(a) \|\| (a.parts.exp >= (32 + FLOAT32_BIAS))) {
	446	if (a.parts.sign)
	447	return UINT32_MIN;
	448
	449	return UINT32_MAX;
[afffa1e]	450	}
	451
[9539be6]	452	return _float32_to_uint32_helper(a);
[afffa1e]	453	}
	454
[c67aff2]	455	/*
[afffa1e]	456	* FIXME: Im not sure what to return if overflow/underflow happens
	457	* - now its the biggest or the smallest int
	458	*/
[aa59fa0]	459	int32_t float32_to_int32(float32 a)
[afffa1e]	460	{
[9539be6]	461	if (isFloat32NaN(a))
	462	return INT32_MAX;
[afffa1e]	463
[9539be6]	464	if (isFloat32Infinity(a) \|\| (a.parts.exp >= (32 + FLOAT32_BIAS))) {
	465	if (a.parts.sign)
	466	return INT32_MIN;
	467
	468	return INT32_MAX;
[afffa1e]	469	}
[9539be6]	470
[afffa1e]	471	return _float32_to_uint32_helper(a);
[9539be6]	472	}
[afffa1e]	473
	474
[c67aff2]	475	/**
	476	* Helping procedure for converting float32 to uint64.
	477	*
	478	* @param a Floating point number in normalized form
	479	* (NaNs or Inf are not checked).
	480	* @return Converted unsigned integer.
[a82695c]	481	*/
[c67aff2]	482	static uint64_t _float32_to_uint64_helper(float32 a)
[a82695c]	483	{
[aa59fa0]	484	uint64_t frac;
[c67aff2]	485
	486	if (a.parts.exp < FLOAT32_BIAS) {
[a82695c]	487	/TODO: rounding/
	488	return 0;
	489	}
[c67aff2]	490
[a82695c]	491	frac = a.parts.fraction;
[c67aff2]	492
	493	frac \|= FLOAT32_HIDDEN_BIT_MASK;
[a82695c]	494	/* shift fraction to left so hidden bit will be the most significant bit */
[c67aff2]	495	frac <<= 64 - FLOAT32_FRACTION_SIZE - 1;
[a82695c]	496
[c67aff2]	497	frac >>= 64 - (a.parts.exp - FLOAT32_BIAS) - 1;
[a82695c]	498	if ((a.parts.sign == 1) && (frac != 0)) {
	499	frac = ~frac;
	500	++frac;
	501	}
[c67aff2]	502
[a82695c]	503	return frac;
	504	}
	505
[c67aff2]	506	/*
	507	* FIXME: Im not sure what to return if overflow/underflow happens
[a82695c]	508	* - now its the biggest or the smallest int
[c67aff2]	509	*/
	510	uint64_t float32_to_uint64(float32 a)
[a82695c]	511	{
[c67aff2]	512	if (isFloat32NaN(a))
[9539be6]	513	return UINT64_MAX;
[c67aff2]	514
	515
	516	if (isFloat32Infinity(a) \|\| (a.parts.exp >= (64 + FLOAT32_BIAS))) {
[9539be6]	517	if (a.parts.sign)
	518	return UINT64_MIN;
[c67aff2]	519
[9539be6]	520	return UINT64_MAX;
[a82695c]	521	}
[c67aff2]	522
	523	return _float32_to_uint64_helper(a);
[a82695c]	524	}
	525
[c67aff2]	526	/*
	527	* FIXME: Im not sure what to return if overflow/underflow happens
[a82695c]	528	* - now its the biggest or the smallest int
[c67aff2]	529	*/
	530	int64_t float32_to_int64(float32 a)
[a82695c]	531	{
[c67aff2]	532	if (isFloat32NaN(a))
[9539be6]	533	return INT64_MAX;
[c67aff2]	534
	535	if (isFloat32Infinity(a) \|\| (a.parts.exp >= (64 + FLOAT32_BIAS))) {
[9539be6]	536	if (a.parts.sign)
	537	return INT64_MIN;
[c67aff2]	538
[9539be6]	539	return INT64_MAX;
[a82695c]	540	}
	541
[c67aff2]	542	return _float32_to_uint64_helper(a);
	543	}
[a82695c]	544
	545
[c67aff2]	546	/**
	547	* Helping procedure for converting float64 to uint64.
	548	*
	549	* @param a Floating point number in normalized form
	550	* (NaNs or Inf are not checked).
	551	* @return Converted unsigned integer.
[a82695c]	552	*/
[c67aff2]	553	static uint64_t _float64_to_uint64_helper(float64 a)
[a82695c]	554	{
[aa59fa0]	555	uint64_t frac;
[c67aff2]	556
	557	if (a.parts.exp < FLOAT64_BIAS) {
[a82695c]	558	/TODO: rounding/
	559	return 0;
	560	}
[c67aff2]	561
[a82695c]	562	frac = a.parts.fraction;
[c67aff2]	563
	564	frac \|= FLOAT64_HIDDEN_BIT_MASK;
[a82695c]	565	/* shift fraction to left so hidden bit will be the most significant bit */
[c67aff2]	566	frac <<= 64 - FLOAT64_FRACTION_SIZE - 1;
[a82695c]	567
[c67aff2]	568	frac >>= 64 - (a.parts.exp - FLOAT64_BIAS) - 1;
[a82695c]	569	if ((a.parts.sign == 1) && (frac != 0)) {
	570	frac = ~frac;
	571	++frac;
	572	}
[c67aff2]	573
[a82695c]	574	return frac;
	575	}
	576
[c67aff2]	577	/*
	578	* FIXME: Im not sure what to return if overflow/underflow happens
	579	* - now its the biggest or the smallest int
	580	*/
	581	uint32_t float64_to_uint32(float64 a)
	582	{
	583	if (isFloat64NaN(a))
	584	return UINT32_MAX;
	585
	586	if (isFloat64Infinity(a) \|\| (a.parts.exp >= (32 + FLOAT64_BIAS))) {
	587	if (a.parts.sign)
	588	return UINT32_MIN;
	589
	590	return UINT32_MAX;
	591	}
	592
	593	return (uint32_t) _float64_to_uint64_helper(a);
	594	}
	595
	596	/*
	597	* FIXME: Im not sure what to return if overflow/underflow happens
	598	* - now its the biggest or the smallest int
	599	*/
	600	int32_t float64_to_int32(float64 a)
	601	{
	602	if (isFloat64NaN(a))
	603	return INT32_MAX;
	604
	605	if (isFloat64Infinity(a) \|\| (a.parts.exp >= (32 + FLOAT64_BIAS))) {
	606	if (a.parts.sign)
	607	return INT32_MIN;
	608
	609	return INT32_MAX;
	610	}
	611
	612	return (int32_t) _float64_to_uint64_helper(a);
	613	}
	614
	615
	616	/*
[a82695c]	617	* FIXME: Im not sure what to return if overflow/underflow happens
	618	* - now its the biggest or the smallest int
	619	*/
[c67aff2]	620	uint64_t float64_to_uint64(float64 a)
[a82695c]	621	{
[c67aff2]	622	if (isFloat64NaN(a))
[9539be6]	623	return UINT64_MAX;
[a82695c]	624
[c67aff2]	625	if (isFloat64Infinity(a) \|\| (a.parts.exp >= (64 + FLOAT64_BIAS))) {
[9539be6]	626	if (a.parts.sign)
	627	return UINT64_MIN;
	628
	629	return UINT64_MAX;
[a82695c]	630	}
	631
[c67aff2]	632	return _float64_to_uint64_helper(a);
[a82695c]	633	}
	634
[c67aff2]	635	/*
[a82695c]	636	* FIXME: Im not sure what to return if overflow/underflow happens
	637	* - now its the biggest or the smallest int
	638	*/
[c67aff2]	639	int64_t float64_to_int64(float64 a)
[a82695c]	640	{
[c67aff2]	641	if (isFloat64NaN(a))
[9539be6]	642	return INT64_MAX;
[a82695c]	643
[c67aff2]	644	if (isFloat64Infinity(a) \|\| (a.parts.exp >= (64 + FLOAT64_BIAS))) {
[9539be6]	645	if (a.parts.sign)
	646	return INT64_MIN;
	647
	648	return INT64_MAX;
[a82695c]	649	}
[9539be6]	650
[c67aff2]	651	return _float64_to_uint64_helper(a);
[9539be6]	652	}
[a82695c]	653
	654
[c67aff2]	655	/**
	656	* Helping procedure for converting float128 to uint64.
	657	*
	658	* @param a Floating point number in normalized form
	659	* (NaNs or Inf are not checked).
	660	* @return Converted unsigned integer.
	661	*/
	662	static uint64_t _float128_to_uint64_helper(float128 a)
	663	{
	664	uint64_t frac_hi, frac_lo;
	665
	666	if (a.parts.exp < FLOAT128_BIAS) {
	667	/TODO: rounding/
	668	return 0;
	669	}
	670
	671	frac_hi = a.parts.frac_hi;
	672	frac_lo = a.parts.frac_lo;
	673
	674	frac_hi \|= FLOAT128_HIDDEN_BIT_MASK_HI;
	675	/* shift fraction to left so hidden bit will be the most significant bit */
	676	lshift128(frac_hi, frac_lo,
	677	(128 - FLOAT128_FRACTION_SIZE - 1), &frac_hi, &frac_lo);
	678
	679	rshift128(frac_hi, frac_lo,
	680	(128 - (a.parts.exp - FLOAT128_BIAS) - 1), &frac_hi, &frac_lo);
	681	if ((a.parts.sign == 1) && !eq128(frac_hi, frac_lo, 0x0ll, 0x0ll)) {
	682	not128(frac_hi, frac_lo, &frac_hi, &frac_lo);
	683	add128(frac_hi, frac_lo, 0x0ll, 0x1ll, &frac_hi, &frac_lo);
	684	}
	685
	686	return frac_lo;
	687	}
	688
	689	/*
	690	* FIXME: Im not sure what to return if overflow/underflow happens
[a82695c]	691	* - now its the biggest or the smallest int
[c67aff2]	692	*/
	693	uint32_t float128_to_uint32(float128 a)
[a82695c]	694	{
[c67aff2]	695	if (isFloat128NaN(a))
[9539be6]	696	return UINT32_MAX;
[c67aff2]	697
	698	if (isFloat128Infinity(a) \|\| (a.parts.exp >= (32 + FLOAT128_BIAS))) {
[9539be6]	699	if (a.parts.sign)
	700	return UINT32_MIN;
[c67aff2]	701
[9539be6]	702	return UINT32_MAX;
[a82695c]	703	}
[c67aff2]	704
	705	return (uint32_t) _float128_to_uint64_helper(a);
[a82695c]	706	}
	707
[c67aff2]	708	/*
	709	* FIXME: Im not sure what to return if overflow/underflow happens
[a82695c]	710	* - now its the biggest or the smallest int
[c67aff2]	711	*/
	712	int32_t float128_to_int32(float128 a)
[a82695c]	713	{
[c67aff2]	714	if (isFloat128NaN(a))
[9539be6]	715	return INT32_MAX;
[c67aff2]	716
	717	if (isFloat128Infinity(a) \|\| (a.parts.exp >= (32 + FLOAT128_BIAS))) {
[9539be6]	718	if (a.parts.sign)
	719	return INT32_MIN;
[c67aff2]	720
[9539be6]	721	return INT32_MAX;
[a82695c]	722	}
[c67aff2]	723
	724	return (int32_t) _float128_to_uint64_helper(a);
[9539be6]	725	}
[a82695c]	726
[c67aff2]	727
	728	/*
	729	* FIXME: Im not sure what to return if overflow/underflow happens
	730	* - now its the biggest or the smallest int
	731	*/
	732	uint64_t float128_to_uint64(float128 a)
	733	{
	734	if (isFloat128NaN(a))
	735	return UINT64_MAX;
	736
	737	if (isFloat128Infinity(a) \|\| (a.parts.exp >= (64 + FLOAT128_BIAS))) {
	738	if (a.parts.sign)
	739	return UINT64_MIN;
	740
	741	return UINT64_MAX;
	742	}
	743
	744	return _float128_to_uint64_helper(a);
	745	}
	746
	747	/*
	748	* FIXME: Im not sure what to return if overflow/underflow happens
	749	* - now its the biggest or the smallest int
[1d83419]	750	*/
[c67aff2]	751	int64_t float128_to_int64(float128 a)
	752	{
	753	if (isFloat128NaN(a))
	754	return INT64_MAX;
	755
	756	if (isFloat128Infinity(a) \|\| (a.parts.exp >= (64 + FLOAT128_BIAS))) {
	757	if (a.parts.sign)
	758	return INT64_MIN;
	759
	760	return INT64_MAX;
	761	}
	762
	763	return _float128_to_uint64_helper(a);
	764	}
	765
	766
[aa59fa0]	767	float32 uint32_to_float32(uint32_t i)
[1d83419]	768	{
	769	int counter;
[aa59fa0]	770	int32_t exp;
[1d83419]	771	float32 result;
	772
	773	result.parts.sign = 0;
	774	result.parts.fraction = 0;
	775
	776	counter = countZeroes32(i);
	777
	778	exp = FLOAT32_BIAS + 32 - counter - 1;
	779
	780	if (counter == 32) {
	781	result.binary = 0;
	782	return result;
	783	}
	784
	785	if (counter > 0) {
	786	i <<= counter - 1;
	787	} else {
	788	i >>= 1;
	789	}
	790
	791	roundFloat32(&exp, &i);
	792
[c67aff2]	793	result.parts.fraction = i >> (32 - FLOAT32_FRACTION_SIZE - 2);
[1d83419]	794	result.parts.exp = exp;
	795
	796	return result;
	797	}
	798
[aa59fa0]	799	float32 int32_to_float32(int32_t i)
[1d83419]	800	{
	801	float32 result;
	802
	803	if (i < 0) {
[c67aff2]	804	result = uint32_to_float32((uint32_t) (-i));
[1d83419]	805	} else {
[c67aff2]	806	result = uint32_to_float32((uint32_t) i);
[1d83419]	807	}
	808
	809	result.parts.sign = i < 0;
	810
	811	return result;
	812	}
	813
[feef1cd]	814
[aa59fa0]	815	float32 uint64_to_float32(uint64_t i)
[1d83419]	816	{
[ba5870d]	817	int counter;
[aa59fa0]	818	int32_t exp;
[e591928]	819	uint32_t j;
[ba5870d]	820	float32 result;
	821
	822	result.parts.sign = 0;
	823	result.parts.fraction = 0;
	824
	825	counter = countZeroes64(i);
	826
	827	exp = FLOAT32_BIAS + 64 - counter - 1;
	828
	829	if (counter == 64) {
	830	result.binary = 0;
	831	return result;
	832	}
	833
[c67aff2]	834	/* Shift all to the first 31 bits (31st will be hidden 1) */
[ba5870d]	835	if (counter > 33) {
	836	i <<= counter - 1 - 32;
	837	} else {
	838	i >>= 1 + 32 - counter;
	839	}
[aa59fa0]	840
[c67aff2]	841	j = (uint32_t) i;
[aa59fa0]	842	roundFloat32(&exp, &j);
[ba5870d]	843
[c67aff2]	844	result.parts.fraction = j >> (32 - FLOAT32_FRACTION_SIZE - 2);
[ba5870d]	845	result.parts.exp = exp;
	846	return result;
[1d83419]	847	}
	848
[aa59fa0]	849	float32 int64_to_float32(int64_t i)
[1d83419]	850	{
	851	float32 result;
	852
	853	if (i < 0) {
[c67aff2]	854	result = uint64_to_float32((uint64_t) (-i));
[1d83419]	855	} else {
[c67aff2]	856	result = uint64_to_float32((uint64_t) i);
[1d83419]	857	}
	858
	859	result.parts.sign = i < 0;
	860
	861	return result;
	862	}
[f37d769]	863
[aa59fa0]	864	float64 uint32_to_float64(uint32_t i)
[f37d769]	865	{
	866	int counter;
[aa59fa0]	867	int32_t exp;
[f37d769]	868	float64 result;
[aa59fa0]	869	uint64_t frac;
[f37d769]	870
	871	result.parts.sign = 0;
	872	result.parts.fraction = 0;
	873
	874	counter = countZeroes32(i);
	875
	876	exp = FLOAT64_BIAS + 32 - counter - 1;
	877
	878	if (counter == 32) {
	879	result.binary = 0;
	880	return result;
	881	}
	882
	883	frac = i;
	884	frac <<= counter + 32 - 1;
	885
	886	roundFloat64(&exp, &frac);
	887
[c67aff2]	888	result.parts.fraction = frac >> (64 - FLOAT64_FRACTION_SIZE - 2);
[f37d769]	889	result.parts.exp = exp;
	890
	891	return result;
	892	}
	893
[aa59fa0]	894	float64 int32_to_float64(int32_t i)
[f37d769]	895	{
	896	float64 result;
	897
	898	if (i < 0) {
[c67aff2]	899	result = uint32_to_float64((uint32_t) (-i));
[f37d769]	900	} else {
[c67aff2]	901	result = uint32_to_float64((uint32_t) i);
[f37d769]	902	}
	903
	904	result.parts.sign = i < 0;
	905
	906	return result;
	907	}
	908
	909
[aa59fa0]	910	float64 uint64_to_float64(uint64_t i)
[f37d769]	911	{
	912	int counter;
[aa59fa0]	913	int32_t exp;
[f37d769]	914	float64 result;
	915
	916	result.parts.sign = 0;
	917	result.parts.fraction = 0;
	918
	919	counter = countZeroes64(i);
	920
	921	exp = FLOAT64_BIAS + 64 - counter - 1;
	922
	923	if (counter == 64) {
	924	result.binary = 0;
	925	return result;
	926	}
	927
	928	if (counter > 0) {
	929	i <<= counter - 1;
	930	} else {
	931	i >>= 1;
	932	}
	933
	934	roundFloat64(&exp, &i);
	935
[c67aff2]	936	result.parts.fraction = i >> (64 - FLOAT64_FRACTION_SIZE - 2);
[f37d769]	937	result.parts.exp = exp;
	938	return result;
	939	}
	940
[aa59fa0]	941	float64 int64_to_float64(int64_t i)
[f37d769]	942	{
	943	float64 result;
	944
	945	if (i < 0) {
[c67aff2]	946	result = uint64_to_float64((uint64_t) (-i));
[f37d769]	947	} else {
[c67aff2]	948	result = uint64_to_float64((uint64_t) i);
[f37d769]	949	}
	950
	951	result.parts.sign = i < 0;
	952
	953	return result;
	954	}
	955
[c67aff2]	956
	957	float128 uint32_to_float128(uint32_t i)
	958	{
	959	int counter;
	960	int32_t exp;
	961	float128 result;
	962	uint64_t frac_hi, frac_lo;
	963
	964	result.parts.sign = 0;
	965	result.parts.frac_hi = 0;
	966	result.parts.frac_lo = 0;
	967
	968	counter = countZeroes32(i);
	969
	970	exp = FLOAT128_BIAS + 32 - counter - 1;
	971
	972	if (counter == 32) {
	973	result.binary.hi = 0;
	974	result.binary.lo = 0;
	975	return result;
	976	}
	977
	978	frac_hi = 0;
	979	frac_lo = i;
	980	lshift128(frac_hi, frac_lo, (counter + 96 - 1), &frac_hi, &frac_lo);
	981
	982	roundFloat128(&exp, &frac_hi, &frac_lo);
	983
	984	rshift128(frac_hi, frac_lo,
	985	(128 - FLOAT128_FRACTION_SIZE - 2), &frac_hi, &frac_lo);
	986	result.parts.frac_hi = frac_hi;
	987	result.parts.frac_lo = frac_lo;
	988	result.parts.exp = exp;
	989
	990	return result;
	991	}
	992
	993	float128 int32_to_float128(int32_t i)
	994	{
	995	float128 result;
	996
	997	if (i < 0) {
	998	result = uint32_to_float128((uint32_t) (-i));
	999	} else {
	1000	result = uint32_to_float128((uint32_t) i);
	1001	}
	1002
	1003	result.parts.sign = i < 0;
	1004
	1005	return result;
	1006	}
	1007
	1008
	1009	float128 uint64_to_float128(uint64_t i)
	1010	{
	1011	int counter;
	1012	int32_t exp;
	1013	float128 result;
	1014	uint64_t frac_hi, frac_lo;
	1015
	1016	result.parts.sign = 0;
	1017	result.parts.frac_hi = 0;
	1018	result.parts.frac_lo = 0;
	1019
	1020	counter = countZeroes64(i);
	1021
	1022	exp = FLOAT128_BIAS + 64 - counter - 1;
	1023
	1024	if (counter == 64) {
	1025	result.binary.hi = 0;
	1026	result.binary.lo = 0;
	1027	return result;
	1028	}
	1029
	1030	frac_hi = 0;
	1031	frac_lo = i;
	1032	lshift128(frac_hi, frac_lo, (counter + 64 - 1), &frac_hi, &frac_lo);
	1033
	1034	roundFloat128(&exp, &frac_hi, &frac_lo);
	1035
	1036	rshift128(frac_hi, frac_lo,
	1037	(128 - FLOAT128_FRACTION_SIZE - 2), &frac_hi, &frac_lo);
	1038	result.parts.frac_hi = frac_hi;
	1039	result.parts.frac_lo = frac_lo;
	1040	result.parts.exp = exp;
	1041
	1042	return result;
	1043	}
	1044
	1045	float128 int64_to_float128(int64_t i)
	1046	{
	1047	float128 result;
	1048
	1049	if (i < 0) {
	1050	result = uint64_to_float128((uint64_t) (-i));
	1051	} else {
	1052	result = uint64_to_float128((uint64_t) i);
	1053	}
	1054
	1055	result.parts.sign = i < 0;
	1056
	1057	return result;
	1058	}
	1059
[231a60a]	1060	/** @}
[846848a6]	1061	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/uspace/lib/softfloat/generic/conversion.c@ eea3e39

Download in other formats: