source: mainline/uspace/lib/softfloat/generic/common.c@ c67aff2

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since c67aff2 was c67aff2, checked in by Petr Koupy <petr.koupy@…>, 14 years ago

Quadruple-precision softfloat, coding style improvements. Details below…

Highlights:

  • completed double-precision support
  • added quadruple-precision support
  • added SPARC quadruple-precision wrappers
  • added doxygen comments
  • corrected and unified coding style

Current state of the softfloat library:

Support for single, double and quadruple precision is currently almost complete (apart from power, square root, complex multiplication and complex division) and provides the same set of features (i.e. the support for all three precisions is now aligned). In order to extend softfloat library consistently, addition of quadruple precision was done in the same spirit as already existing single and double precision written by Josef Cejka in 2006 - that is relaxed standard-compliance for corner cases while mission-critical code sections heavily inspired by the widely used softfloat library written by John R. Hauser (although I personally think it would be more appropriate for HelenOS to use something less optimized, shorter and more readable).

Most of the quadruple-precision code is just an adapted double-precision code to work on 128-bit variables. That means if there is TODO, FIXME or some defect in single or double-precision code, it is most likely also in the quadruple-precision code. Please note that quadruple-precision functions are currently not tested - it is challenging task for itself, especially when the ports that use them are either not finished (mips64) or badly supported by simulators (sparc64). To test whole softfloat library, one would probably have to either write very non-trivial native tester, or use some existing one (e.g. TestFloat from J. R. Hauser) and port it to HelenOS (or rip the softfloat library out of HelenOS and test it on a host system). At the time of writing this, the code dependent on quadruple-precision functions (on mips64 and sparc64) is just a libposix strtold() function (and its callers, most notably scanf backend).

  • Property mode set to 100644
File size: 19.4 KB
Line 
1/*
2 * Copyright (c) 2005 Josef Cejka
3 * Copyright (c) 2011 Petr Koupy
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/** @addtogroup softfloat
31 * @{
32 */
33/** @file Common helper operations.
34 */
35
36#include <sftypes.h>
37#include <common.h>
38
39/* Table for fast leading zeroes counting. */
40char zeroTable[256] = {
41 8, 7, 7, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, \
42 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, \
43 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, \
44 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, \
45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
47 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
48 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
49 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
50 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
51 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
52 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
53 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
54 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
55 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
56 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
57};
58
59/**
60 * Take fraction shifted by 10 bits to the left, round it, normalize it
61 * and detect exceptions
62 *
63 * @param cexp Exponent with bias.
64 * @param cfrac Fraction shifted 10 bits to the left with added hidden bit.
65 * @param sign Resulting sign.
66 * @return Finished double-precision float.
67 */
68float64 finishFloat64(int32_t cexp, uint64_t cfrac, char sign)
69{
70 float64 result;
71
72 result.parts.sign = sign;
73
74 /* find first nonzero digit and shift result and detect possibly underflow */
75 while ((cexp > 0) && (cfrac) &&
76 (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))))) {
77 cexp--;
78 cfrac <<= 1;
79 /* TODO: fix underflow */
80 }
81
82 if ((cexp < 0) || (cexp == 0 &&
83 (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))))) {
84 /* FIXME: underflow */
85 result.parts.exp = 0;
86 if ((cexp + FLOAT64_FRACTION_SIZE + 1) < 0) { /* +1 is place for rounding */
87 result.parts.fraction = 0;
88 return result;
89 }
90
91 while (cexp < 0) {
92 cexp++;
93 cfrac >>= 1;
94 }
95
96 cfrac += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
97
98 if (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))) {
99 result.parts.fraction =
100 ((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK));
101 return result;
102 }
103 } else {
104 cfrac += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
105 }
106
107 ++cexp;
108
109 if (cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))) {
110 ++cexp;
111 cfrac >>= 1;
112 }
113
114 /* check overflow */
115 if (cexp >= FLOAT64_MAX_EXPONENT) {
116 /* FIXME: overflow, return infinity */
117 result.parts.exp = FLOAT64_MAX_EXPONENT;
118 result.parts.fraction = 0;
119 return result;
120 }
121
122 result.parts.exp = (uint32_t) cexp;
123
124 result.parts.fraction =
125 ((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK));
126
127 return result;
128}
129
130/**
131 * Take fraction, round it, normalize it and detect exceptions
132 *
133 * @param cexp Exponent with bias.
134 * @param cfrac_hi High part of the fraction shifted 14 bits to the left
135 * with added hidden bit.
136 * @param cfrac_lo Low part of the fraction shifted 14 bits to the left
137 * with added hidden bit.
138 * @param sign Resulting sign.
139 * @param shift_out Bits right-shifted out from fraction by the caller.
140 * @return Finished quadruple-precision float.
141 */
142float128 finishFloat128(int32_t cexp, uint64_t cfrac_hi, uint64_t cfrac_lo,
143 char sign, uint64_t shift_out)
144{
145 float128 result;
146 uint64_t tmp_hi, tmp_lo;
147
148 result.parts.sign = sign;
149
150 /* find first nonzero digit and shift result and detect possibly underflow */
151 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
152 1, &tmp_hi, &tmp_lo);
153 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
154 while ((cexp > 0) && (lt128(0x0ll, 0x0ll, cfrac_hi, cfrac_lo)) &&
155 (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo))) {
156 cexp--;
157 lshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
158 /* TODO: fix underflow */
159
160 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
161 1, &tmp_hi, &tmp_lo);
162 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
163 }
164
165 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
166 1, &tmp_hi, &tmp_lo);
167 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
168 if ((cexp < 0) || (cexp == 0 &&
169 (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)))) {
170 /* FIXME: underflow */
171 result.parts.exp = 0;
172 if ((cexp + FLOAT128_FRACTION_SIZE + 1) < 0) { /* +1 is place for rounding */
173 result.parts.frac_hi = 0x0ll;
174 result.parts.frac_lo = 0x0ll;
175 return result;
176 }
177
178 while (cexp < 0) {
179 cexp++;
180 rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
181 }
182
183 if (shift_out & (0x1ull < 64)) {
184 add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo);
185 }
186
187 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
188 1, &tmp_hi, &tmp_lo);
189 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
190 if (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
191 not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
192 &tmp_hi, &tmp_lo);
193 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
194 result.parts.frac_hi = tmp_hi;
195 result.parts.frac_lo = tmp_lo;
196 return result;
197 }
198 } else {
199 if (shift_out & (0x1ull < 64)) {
200 add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo);
201 }
202 }
203
204 ++cexp;
205
206 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
207 1, &tmp_hi, &tmp_lo);
208 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
209 if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
210 ++cexp;
211 rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
212 }
213
214 /* check overflow */
215 if (cexp >= FLOAT128_MAX_EXPONENT) {
216 /* FIXME: overflow, return infinity */
217 result.parts.exp = FLOAT128_MAX_EXPONENT;
218 result.parts.frac_hi = 0x0ll;
219 result.parts.frac_lo = 0x0ll;
220 return result;
221 }
222
223 result.parts.exp = (uint32_t) cexp;
224
225 not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
226 &tmp_hi, &tmp_lo);
227 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
228 result.parts.frac_hi = tmp_hi;
229 result.parts.frac_lo = tmp_lo;
230
231 return result;
232}
233
234/**
235 * Counts leading zeroes in byte.
236 *
237 * @param i Byte for which to count leading zeroes.
238 * @return Number of detected leading zeroes.
239 */
240int countZeroes8(uint8_t i)
241{
242 return zeroTable[i];
243}
244
245/**
246 * Counts leading zeroes in 32bit unsigned integer.
247 *
248 * @param i Integer for which to count leading zeroes.
249 * @return Number of detected leading zeroes.
250 */
251int countZeroes32(uint32_t i)
252{
253 int j;
254 for (j = 0; j < 32; j += 8) {
255 if (i & (0xFF << (24 - j))) {
256 return (j + countZeroes8(i >> (24 - j)));
257 }
258 }
259
260 return 32;
261}
262
263/**
264 * Counts leading zeroes in 64bit unsigned integer.
265 *
266 * @param i Integer for which to count leading zeroes.
267 * @return Number of detected leading zeroes.
268 */
269int countZeroes64(uint64_t i)
270{
271 int j;
272 for (j = 0; j < 64; j += 8) {
273 if (i & (0xFFll << (56 - j))) {
274 return (j + countZeroes8(i >> (56 - j)));
275 }
276 }
277
278 return 64;
279}
280
281/**
282 * Round and normalize number expressed by exponent and fraction with
283 * first bit (equal to hidden bit) at 30th bit.
284 *
285 * @param exp Exponent part.
286 * @param fraction Fraction with hidden bit shifted to 30th bit.
287 */
288void roundFloat32(int32_t *exp, uint32_t *fraction)
289{
290 /* rounding - if first bit after fraction is set then round up */
291 (*fraction) += (0x1 << (32 - FLOAT32_FRACTION_SIZE - 3));
292
293 if ((*fraction) &
294 (FLOAT32_HIDDEN_BIT_MASK << (32 - FLOAT32_FRACTION_SIZE - 1))) {
295 /* rounding overflow */
296 ++(*exp);
297 (*fraction) >>= 1;
298 }
299
300 if (((*exp) >= FLOAT32_MAX_EXPONENT) || ((*exp) < 0)) {
301 /* overflow - set infinity as result */
302 (*exp) = FLOAT32_MAX_EXPONENT;
303 (*fraction) = 0;
304 }
305}
306
307/**
308 * Round and normalize number expressed by exponent and fraction with
309 * first bit (equal to hidden bit) at 62nd bit.
310 *
311 * @param exp Exponent part.
312 * @param fraction Fraction with hidden bit shifted to 62nd bit.
313 */
314void roundFloat64(int32_t *exp, uint64_t *fraction)
315{
316 /* rounding - if first bit after fraction is set then round up */
317 (*fraction) += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
318
319 if ((*fraction) &
320 (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 3))) {
321 /* rounding overflow */
322 ++(*exp);
323 (*fraction) >>= 1;
324 }
325
326 if (((*exp) >= FLOAT64_MAX_EXPONENT) || ((*exp) < 0)) {
327 /* overflow - set infinity as result */
328 (*exp) = FLOAT64_MAX_EXPONENT;
329 (*fraction) = 0;
330 }
331}
332
333/**
334 * Round and normalize number expressed by exponent and fraction with
335 * first bit (equal to hidden bit) at 126th bit.
336 *
337 * @param exp Exponent part.
338 * @param frac_hi High part of fraction part with hidden bit shifted to 126th bit.
339 * @param frac_lo Low part of fraction part with hidden bit shifted to 126th bit.
340 */
341void roundFloat128(int32_t *exp, uint64_t *frac_hi, uint64_t *frac_lo)
342{
343 uint64_t tmp_hi, tmp_lo;
344
345 /* rounding - if first bit after fraction is set then round up */
346 lshift128(0x0ll, 0x1ll, (128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo);
347 add128(*frac_hi, *frac_lo, tmp_hi, tmp_lo, frac_hi, frac_lo);
348
349 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
350 (128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo);
351 and128(*frac_hi, *frac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
352 if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
353 /* rounding overflow */
354 ++(*exp);
355 rshift128(*frac_hi, *frac_lo, 1, frac_hi, frac_lo);
356 }
357
358 if (((*exp) >= FLOAT128_MAX_EXPONENT) || ((*exp) < 0)) {
359 /* overflow - set infinity as result */
360 (*exp) = FLOAT128_MAX_EXPONENT;
361 (*frac_hi) = 0;
362 (*frac_lo) = 0;
363 }
364}
365
366/**
367 * Logical shift left on the 128-bit operand.
368 *
369 * @param a_hi High part of the input operand.
370 * @param a_lo Low part of the input operand.
371 * @param shift Number of bits by witch to shift.
372 * @param r_hi Address to store high part of the result.
373 * @param r_lo Address to store low part of the result.
374 */
375void lshift128(
376 uint64_t a_hi, uint64_t a_lo, int shift,
377 uint64_t *r_hi, uint64_t *r_lo)
378{
379 if (shift <= 0) {
380 /* do nothing */
381 } else if (shift >= 128) {
382 a_hi = 0;
383 a_lo = 0;
384 } else if (shift >= 64) {
385 a_hi = a_lo << (shift - 64);
386 a_lo = 0;
387 } else {
388 a_hi <<= shift;
389 a_hi |= a_lo >> (64 - shift);
390 a_lo <<= shift;
391 }
392
393 *r_hi = a_hi;
394 *r_lo = a_lo;
395}
396
397/**
398 * Logical shift right on the 128-bit operand.
399 *
400 * @param a_hi High part of the input operand.
401 * @param a_lo Low part of the input operand.
402 * @param shift Number of bits by witch to shift.
403 * @param r_hi Address to store high part of the result.
404 * @param r_lo Address to store low part of the result.
405 */
406void rshift128(
407 uint64_t a_hi, uint64_t a_lo, int shift,
408 uint64_t *r_hi, uint64_t *r_lo)
409{
410 if (shift <= 0) {
411 /* do nothing */
412 } else if (shift >= 128) {
413 a_hi = 0;
414 a_lo = 0;
415 } else if (shift >= 64) {
416 a_lo = a_hi >> (shift - 64);
417 a_hi = 0;
418 } else {
419 a_lo >>= shift;
420 a_lo |= a_hi << (64 - shift);
421 a_hi >>= shift;
422 }
423
424 *r_hi = a_hi;
425 *r_lo = a_lo;
426}
427
428/**
429 * Bitwise AND on 128-bit operands.
430 *
431 * @param a_hi High part of the first input operand.
432 * @param a_lo Low part of the first input operand.
433 * @param b_hi High part of the second input operand.
434 * @param b_lo Low part of the second input operand.
435 * @param r_hi Address to store high part of the result.
436 * @param r_lo Address to store low part of the result.
437 */
438void and128(
439 uint64_t a_hi, uint64_t a_lo,
440 uint64_t b_hi, uint64_t b_lo,
441 uint64_t *r_hi, uint64_t *r_lo)
442{
443 *r_hi = a_hi & b_hi;
444 *r_lo = a_lo & b_lo;
445}
446
447/**
448 * Bitwise inclusive OR on 128-bit operands.
449 *
450 * @param a_hi High part of the first input operand.
451 * @param a_lo Low part of the first input operand.
452 * @param b_hi High part of the second input operand.
453 * @param b_lo Low part of the second input operand.
454 * @param r_hi Address to store high part of the result.
455 * @param r_lo Address to store low part of the result.
456 */
457void or128(
458 uint64_t a_hi, uint64_t a_lo,
459 uint64_t b_hi, uint64_t b_lo,
460 uint64_t *r_hi, uint64_t *r_lo)
461{
462 *r_hi = a_hi | b_hi;
463 *r_lo = a_lo | b_lo;
464}
465
466/**
467 * Bitwise exclusive OR on 128-bit operands.
468 *
469 * @param a_hi High part of the first input operand.
470 * @param a_lo Low part of the first input operand.
471 * @param b_hi High part of the second input operand.
472 * @param b_lo Low part of the second input operand.
473 * @param r_hi Address to store high part of the result.
474 * @param r_lo Address to store low part of the result.
475 */
476void xor128(
477 uint64_t a_hi, uint64_t a_lo,
478 uint64_t b_hi, uint64_t b_lo,
479 uint64_t *r_hi, uint64_t *r_lo)
480{
481 *r_hi = a_hi ^ b_hi;
482 *r_lo = a_lo ^ b_lo;
483}
484
485/**
486 * Bitwise NOT on the 128-bit operand.
487 *
488 * @param a_hi High part of the input operand.
489 * @param a_lo Low part of the input operand.
490 * @param r_hi Address to store high part of the result.
491 * @param r_lo Address to store low part of the result.
492 */
493void not128(
494 uint64_t a_hi, uint64_t a_lo,
495 uint64_t *r_hi, uint64_t *r_lo)
496{
497 *r_hi = ~a_hi;
498 *r_lo = ~a_lo;
499}
500
501/**
502 * Equality comparison of 128-bit operands.
503 *
504 * @param a_hi High part of the first input operand.
505 * @param a_lo Low part of the first input operand.
506 * @param b_hi High part of the second input operand.
507 * @param b_lo Low part of the second input operand.
508 * @return 1 if operands are equal, 0 otherwise.
509 */
510int eq128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
511{
512 return (a_hi == b_hi) && (a_lo == b_lo);
513}
514
515/**
516 * Lower-or-equal comparison of 128-bit operands.
517 *
518 * @param a_hi High part of the first input operand.
519 * @param a_lo Low part of the first input operand.
520 * @param b_hi High part of the second input operand.
521 * @param b_lo Low part of the second input operand.
522 * @return 1 if a is lower or equal to b, 0 otherwise.
523 */
524int le128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
525{
526 return (a_hi < b_hi) || ((a_hi == b_hi) && (a_lo <= b_lo));
527}
528
529/**
530 * Lower-than comparison of 128-bit operands.
531 *
532 * @param a_hi High part of the first input operand.
533 * @param a_lo Low part of the first input operand.
534 * @param b_hi High part of the second input operand.
535 * @param b_lo Low part of the second input operand.
536 * @return 1 if a is lower than b, 0 otherwise.
537 */
538int lt128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
539{
540 return (a_hi < b_hi) || ((a_hi == b_hi) && (a_lo < b_lo));
541}
542
543/**
544 * Addition of two 128-bit unsigned integers.
545 *
546 * @param a_hi High part of the first input operand.
547 * @param a_lo Low part of the first input operand.
548 * @param b_hi High part of the second input operand.
549 * @param b_lo Low part of the second input operand.
550 * @param r_hi Address to store high part of the result.
551 * @param r_lo Address to store low part of the result.
552 */
553void add128(uint64_t a_hi, uint64_t a_lo,
554 uint64_t b_hi, uint64_t b_lo,
555 uint64_t *r_hi, uint64_t *r_lo)
556{
557 uint64_t low = a_lo + b_lo;
558 *r_lo = low;
559 /* detect overflow to add a carry */
560 *r_hi = a_hi + b_hi + (low < a_lo);
561}
562
563/**
564 * Substraction of two 128-bit unsigned integers.
565 *
566 * @param a_hi High part of the first input operand.
567 * @param a_lo Low part of the first input operand.
568 * @param b_hi High part of the second input operand.
569 * @param b_lo Low part of the second input operand.
570 * @param r_hi Address to store high part of the result.
571 * @param r_lo Address to store low part of the result.
572 */
573void sub128(uint64_t a_hi, uint64_t a_lo,
574 uint64_t b_hi, uint64_t b_lo,
575 uint64_t *r_hi, uint64_t *r_lo)
576{
577 *r_lo = a_lo - b_lo;
578 /* detect underflow to substract a carry */
579 *r_hi = a_hi - b_hi - (a_lo < b_lo);
580}
581
582/**
583 * Multiplication of two 64-bit unsigned integers.
584 *
585 * @param a First input operand.
586 * @param b Second input operand.
587 * @param r_hi Address to store high part of the result.
588 * @param r_lo Address to store low part of the result.
589 */
590void mul64(uint64_t a, uint64_t b, uint64_t *r_hi, uint64_t *r_lo)
591{
592 uint64_t low, high, middle1, middle2;
593 uint32_t alow, blow;
594
595 alow = a & 0xFFFFFFFF;
596 blow = b & 0xFFFFFFFF;
597
598 a >>= 32;
599 b >>= 32;
600
601 low = ((uint64_t) alow) * blow;
602 middle1 = a * blow;
603 middle2 = alow * b;
604 high = a * b;
605
606 middle1 += middle2;
607 high += (((uint64_t) (middle1 < middle2)) << 32) + (middle1 >> 32);
608 middle1 <<= 32;
609 low += middle1;
610 high += (low < middle1);
611 *r_lo = low;
612 *r_hi = high;
613}
614
615/**
616 * Multiplication of two 128-bit unsigned integers.
617 *
618 * @param a_hi High part of the first input operand.
619 * @param a_lo Low part of the first input operand.
620 * @param b_hi High part of the second input operand.
621 * @param b_lo Low part of the second input operand.
622 * @param r_hihi Address to store first (highest) quarter of the result.
623 * @param r_hilo Address to store second quarter of the result.
624 * @param r_lohi Address to store third quarter of the result.
625 * @param r_lolo Address to store fourth (lowest) quarter of the result.
626 */
627void mul128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo,
628 uint64_t *r_hihi, uint64_t *r_hilo, uint64_t *r_lohi, uint64_t *r_lolo)
629{
630 uint64_t hihi, hilo, lohi, lolo;
631 uint64_t tmp1, tmp2;
632
633 mul64(a_lo, b_lo, &lohi, &lolo);
634 mul64(a_lo, b_hi, &hilo, &tmp2);
635 add128(hilo, tmp2, 0x0ll, lohi, &hilo, &lohi);
636 mul64(a_hi, b_hi, &hihi, &tmp1);
637 add128(hihi, tmp1, 0x0ll, hilo, &hihi, &hilo);
638 mul64(a_hi, b_lo, &tmp1, &tmp2);
639 add128(tmp1, tmp2, 0x0ll, lohi, &tmp1, &lohi);
640 add128(hihi, hilo, 0x0ll, tmp1, &hihi, &hilo);
641
642 *r_hihi = hihi;
643 *r_hilo = hilo;
644 *r_lohi = lohi;
645 *r_lolo = lolo;
646}
647
648/**
649 * Estimate the quotient of 128-bit unsigned divident and 64-bit unsigned
650 * divisor.
651 *
652 * @param a_hi High part of the divident.
653 * @param a_lo Low part of the divident.
654 * @param b Divisor.
655 * @return Quotient approximation.
656 */
657uint64_t div128est(uint64_t a_hi, uint64_t a_lo, uint64_t b)
658{
659 uint64_t b_hi, b_lo;
660 uint64_t rem_hi, rem_lo;
661 uint64_t tmp_hi, tmp_lo;
662 uint64_t result;
663
664 if (b <= a_hi) {
665 return 0xFFFFFFFFFFFFFFFFull;
666 }
667
668 b_hi = b >> 32;
669 result = ((b_hi << 32) <= a_hi) ? (0xFFFFFFFFull << 32) : (a_hi / b_hi) << 32;
670 mul64(b, result, &tmp_hi, &tmp_lo);
671 sub128(a_hi, a_lo, tmp_hi, tmp_lo, &rem_hi, &rem_lo);
672
673 while ((int64_t) rem_hi < 0) {
674 result -= 0x1ll << 32;
675 b_lo = b << 32;
676 add128(rem_hi, rem_lo, b_hi, b_lo, &rem_hi, &rem_lo);
677 }
678
679 rem_hi = (rem_hi << 32) | (rem_lo >> 32);
680 if ((b_hi << 32) <= rem_hi) {
681 result |= 0xFFFFFFFF;
682 } else {
683 result |= rem_hi / b_hi;
684 }
685
686 return result;
687}
688
689/** @}
690 */
Note: See TracBrowser for help on using the repository browser.