source: mainline/kernel/generic/src/lib/str.c@ 08e103d4

Last change on this file since 08e103d4 was 08e103d4, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 6 years ago

Use clearer naming for string length functions

This and the following commit change the names of functions, as well as
their documentation, to use unambiguous terms "bytes" and "code points"
instead of ambiguous terms "size", "length", and "characters".

  • Property mode set to 100644
File size: 24.4 KB
Line 
1/*
2 * Copyright (c) 2001-2004 Jakub Jermar
3 * Copyright (c) 2005 Martin Decky
4 * Copyright (c) 2008 Jiri Svoboda
5 * Copyright (c) 2011 Martin Sucha
6 * Copyright (c) 2011 Oleg Romanenko
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * - Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * - Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * - The name of the author may not be used to endorse or promote products
19 * derived from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/** @addtogroup kernel_generic
34 * @{
35 */
36
37/**
38 * @file
39 * @brief String functions.
40 *
41 * Strings and characters use the Universal Character Set (UCS). The standard
42 * strings, called just strings are encoded in UTF-8. Wide strings (encoded
43 * in UTF-32) are supported to a limited degree. A single character is
44 * represented as wchar_t.@n
45 *
46 * Overview of the terminology:@n
47 *
48 * Term Meaning
49 * -------------------- ----------------------------------------------------
50 * byte 8 bits stored in uint8_t (unsigned 8 bit integer)
51 *
52 * character UTF-32 encoded Unicode character, stored in wchar_t
53 * (signed 32 bit integer), code points 0 .. 1114111
54 * are valid
55 *
56 * ASCII character 7 bit encoded ASCII character, stored in char
57 * (usually signed 8 bit integer), code points 0 .. 127
58 * are valid
59 *
60 * string UTF-8 encoded NULL-terminated Unicode string, char *
61 *
62 * wide string UTF-32 encoded NULL-terminated Unicode string,
63 * wchar_t *
64 *
65 * [wide] string size number of BYTES in a [wide] string (excluding
66 * the NULL-terminator), size_t
67 *
68 * [wide] string length number of CHARACTERS in a [wide] string (excluding
69 * the NULL-terminator), size_t
70 *
71 * [wide] string width number of display cells on a monospace display taken
72 * by a [wide] string, size_t
73 *
74 *
75 * Overview of string metrics:@n
76 *
77 * Metric Abbrev. Type Meaning
78 * ------ ------ ------ -------------------------------------------------
79 * size n size_t number of BYTES in a string (excluding the
80 * NULL-terminator)
81 *
82 * length l size_t number of CHARACTERS in a string (excluding the
83 * null terminator)
84 *
85 * width w size_t number of display cells on a monospace display
86 * taken by a string
87 *
88 *
89 * Function naming prefixes:@n
90 *
91 * chr_ operate on characters
92 * ascii_ operate on ASCII characters
93 * str_ operate on strings
94 * wstr_ operate on wide strings
95 *
96 * [w]str_[n|l|w] operate on a prefix limited by size, length
97 * or width
98 *
99 *
100 * A specific character inside a [wide] string can be referred to by:@n
101 *
102 * pointer (char *, wchar_t *)
103 * byte offset (size_t)
104 * character index (size_t)
105 *
106 */
107
108#include <str.h>
109
110#include <assert.h>
111#include <errno.h>
112#include <stdbool.h>
113#include <stddef.h>
114#include <stdint.h>
115#include <stdlib.h>
116
117#include <align.h>
118#include <macros.h>
119
120/** Check the condition if wchar_t is signed */
121#ifdef __WCHAR_UNSIGNED__
122#define WCHAR_SIGNED_CHECK(cond) (true)
123#else
124#define WCHAR_SIGNED_CHECK(cond) (cond)
125#endif
126
127/** Byte mask consisting of lowest @n bits (out of 8) */
128#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
129
130/** Byte mask consisting of lowest @n bits (out of 32) */
131#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
132
133/** Byte mask consisting of highest @n bits (out of 8) */
134#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
135
136/** Number of data bits in a UTF-8 continuation byte */
137#define CONT_BITS 6
138
139/** Decode a single character from a string.
140 *
141 * Decode a single character from a string of size @a size. Decoding starts
142 * at @a offset and this offset is moved to the beginning of the next
143 * character. In case of decoding error, offset generally advances at least
144 * by one. However, offset is never moved beyond size.
145 *
146 * @param str String (not necessarily NULL-terminated).
147 * @param offset Byte offset in string where to start decoding.
148 * @param size Size of the string (in bytes).
149 *
150 * @return Value of decoded character, U_SPECIAL on decoding error or
151 * NULL if attempt to decode beyond @a size.
152 *
153 */
154wchar_t str_decode(const char *str, size_t *offset, size_t size)
155{
156 if (*offset + 1 > size)
157 return 0;
158
159 /* First byte read from string */
160 uint8_t b0 = (uint8_t) str[(*offset)++];
161
162 /* Determine code length */
163
164 unsigned int b0_bits; /* Data bits in first byte */
165 unsigned int cbytes; /* Number of continuation bytes */
166
167 if ((b0 & 0x80) == 0) {
168 /* 0xxxxxxx (Plain ASCII) */
169 b0_bits = 7;
170 cbytes = 0;
171 } else if ((b0 & 0xe0) == 0xc0) {
172 /* 110xxxxx 10xxxxxx */
173 b0_bits = 5;
174 cbytes = 1;
175 } else if ((b0 & 0xf0) == 0xe0) {
176 /* 1110xxxx 10xxxxxx 10xxxxxx */
177 b0_bits = 4;
178 cbytes = 2;
179 } else if ((b0 & 0xf8) == 0xf0) {
180 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
181 b0_bits = 3;
182 cbytes = 3;
183 } else {
184 /* 10xxxxxx -- unexpected continuation byte */
185 return U_SPECIAL;
186 }
187
188 if (*offset + cbytes > size)
189 return U_SPECIAL;
190
191 wchar_t ch = b0 & LO_MASK_8(b0_bits);
192
193 /* Decode continuation bytes */
194 while (cbytes > 0) {
195 uint8_t b = (uint8_t) str[(*offset)++];
196
197 /* Must be 10xxxxxx */
198 if ((b & 0xc0) != 0x80)
199 return U_SPECIAL;
200
201 /* Shift data bits to ch */
202 ch = (ch << CONT_BITS) | (wchar_t) (b & LO_MASK_8(CONT_BITS));
203 cbytes--;
204 }
205
206 return ch;
207}
208
209/** Encode a single character to string representation.
210 *
211 * Encode a single character to string representation (i.e. UTF-8) and store
212 * it into a buffer at @a offset. Encoding starts at @a offset and this offset
213 * is moved to the position where the next character can be written to.
214 *
215 * @param ch Input character.
216 * @param str Output buffer.
217 * @param offset Byte offset where to start writing.
218 * @param size Size of the output buffer (in bytes).
219 *
220 * @return EOK if the character was encoded successfully, EOVERFLOW if there
221 * was not enough space in the output buffer or EINVAL if the character
222 * code was invalid.
223 */
224errno_t chr_encode(const wchar_t ch, char *str, size_t *offset, size_t size)
225{
226 if (*offset >= size)
227 return EOVERFLOW;
228
229 if (!chr_check(ch))
230 return EINVAL;
231
232 /*
233 * Unsigned version of ch (bit operations should only be done
234 * on unsigned types).
235 */
236 uint32_t cc = (uint32_t) ch;
237
238 /* Determine how many continuation bytes are needed */
239
240 unsigned int b0_bits; /* Data bits in first byte */
241 unsigned int cbytes; /* Number of continuation bytes */
242
243 if ((cc & ~LO_MASK_32(7)) == 0) {
244 b0_bits = 7;
245 cbytes = 0;
246 } else if ((cc & ~LO_MASK_32(11)) == 0) {
247 b0_bits = 5;
248 cbytes = 1;
249 } else if ((cc & ~LO_MASK_32(16)) == 0) {
250 b0_bits = 4;
251 cbytes = 2;
252 } else if ((cc & ~LO_MASK_32(21)) == 0) {
253 b0_bits = 3;
254 cbytes = 3;
255 } else {
256 /* Codes longer than 21 bits are not supported */
257 return EINVAL;
258 }
259
260 /* Check for available space in buffer */
261 if (*offset + cbytes >= size)
262 return EOVERFLOW;
263
264 /* Encode continuation bytes */
265 unsigned int i;
266 for (i = cbytes; i > 0; i--) {
267 str[*offset + i] = 0x80 | (cc & LO_MASK_32(CONT_BITS));
268 cc = cc >> CONT_BITS;
269 }
270
271 /* Encode first byte */
272 str[*offset] = (cc & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1);
273
274 /* Advance offset */
275 *offset += cbytes + 1;
276
277 return EOK;
278}
279
280/** Get size of string.
281 *
282 * Get the number of bytes which are used by the string @a str (excluding the
283 * NULL-terminator).
284 *
285 * @param str String to consider.
286 *
287 * @return Number of bytes used by the string
288 *
289 */
290size_t str_bytes(const char *str)
291{
292 size_t size = 0;
293
294 while (*str++ != 0)
295 size++;
296
297 return size;
298}
299
300/** Get size of wide string.
301 *
302 * Get the number of bytes which are used by the wide string @a str (excluding the
303 * NULL-terminator).
304 *
305 * @param str Wide string to consider.
306 *
307 * @return Number of bytes used by the wide string
308 *
309 */
310size_t wstr_bytes(const wchar_t *str)
311{
312 return (wstr_code_points(str) * sizeof(wchar_t));
313}
314
315/** Get size of string with length limit.
316 *
317 * Get the number of bytes which are used by up to @a max_len first
318 * characters in the string @a str. If @a max_len is greater than
319 * the length of @a str, the entire string is measured (excluding the
320 * NULL-terminator).
321 *
322 * @param str String to consider.
323 * @param max_len Maximum number of characters to measure.
324 *
325 * @return Number of bytes used by the characters.
326 *
327 */
328size_t str_lbytes(const char *str, size_t max_len)
329{
330 size_t len = 0;
331 size_t offset = 0;
332
333 while (len < max_len) {
334 if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
335 break;
336
337 len++;
338 }
339
340 return offset;
341}
342
343/** Get size of wide string with length limit.
344 *
345 * Get the number of bytes which are used by up to @a max_len first
346 * wide characters in the wide string @a str. If @a max_len is greater than
347 * the length of @a str, the entire wide string is measured (excluding the
348 * NULL-terminator).
349 *
350 * @param str Wide string to consider.
351 * @param max_len Maximum number of wide characters to measure.
352 *
353 * @return Number of bytes used by the wide characters.
354 *
355 */
356size_t wstr_lbytes(const wchar_t *str, size_t max_len)
357{
358 return (wstr_ncode_points(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
359}
360
361/** Get number of characters in a string.
362 *
363 * @param str NULL-terminated string.
364 *
365 * @return Number of characters in string.
366 *
367 */
368size_t str_code_points(const char *str)
369{
370 size_t len = 0;
371 size_t offset = 0;
372
373 while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
374 len++;
375
376 return len;
377}
378
379/** Get number of characters in a wide string.
380 *
381 * @param str NULL-terminated wide string.
382 *
383 * @return Number of characters in @a str.
384 *
385 */
386size_t wstr_code_points(const wchar_t *wstr)
387{
388 size_t len = 0;
389
390 while (*wstr++ != 0)
391 len++;
392
393 return len;
394}
395
396/** Get number of characters in a string with size limit.
397 *
398 * @param str NULL-terminated string.
399 * @param size Maximum number of bytes to consider.
400 *
401 * @return Number of characters in string.
402 *
403 */
404size_t str_ncode_points(const char *str, size_t size)
405{
406 size_t len = 0;
407 size_t offset = 0;
408
409 while (str_decode(str, &offset, size) != 0)
410 len++;
411
412 return len;
413}
414
415/** Get number of characters in a string with size limit.
416 *
417 * @param str NULL-terminated string.
418 * @param size Maximum number of bytes to consider.
419 *
420 * @return Number of characters in string.
421 *
422 */
423size_t wstr_ncode_points(const wchar_t *str, size_t size)
424{
425 size_t len = 0;
426 size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
427 size_t offset = 0;
428
429 while ((offset < limit) && (*str++ != 0)) {
430 len++;
431 offset += sizeof(wchar_t);
432 }
433
434 return len;
435}
436
437/** Check whether character is plain ASCII.
438 *
439 * @return True if character is plain ASCII.
440 *
441 */
442bool ascii_check(wchar_t ch)
443{
444 if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 127))
445 return true;
446
447 return false;
448}
449
450/** Check whether character is valid
451 *
452 * @return True if character is a valid Unicode code point.
453 *
454 */
455bool chr_check(wchar_t ch)
456{
457 if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 1114111))
458 return true;
459
460 return false;
461}
462
463/** Compare two NULL terminated strings.
464 *
465 * Do a char-by-char comparison of two NULL-terminated strings.
466 * The strings are considered equal iff their length is equal
467 * and both strings consist of the same sequence of characters.
468 *
469 * A string S1 is less than another string S2 if it has a character with
470 * lower value at the first character position where the strings differ.
471 * If the strings differ in length, the shorter one is treated as if
472 * padded by characters with a value of zero.
473 *
474 * @param s1 First string to compare.
475 * @param s2 Second string to compare.
476 *
477 * @return 0 if the strings are equal, -1 if the first is less than the second,
478 * 1 if the second is less than the first.
479 *
480 */
481int str_cmp(const char *s1, const char *s2)
482{
483 wchar_t c1 = 0;
484 wchar_t c2 = 0;
485
486 size_t off1 = 0;
487 size_t off2 = 0;
488
489 while (true) {
490 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
491 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
492
493 if (c1 < c2)
494 return -1;
495
496 if (c1 > c2)
497 return 1;
498
499 if (c1 == 0 || c2 == 0)
500 break;
501 }
502
503 return 0;
504}
505
506/** Compare two NULL terminated strings with length limit.
507 *
508 * Do a char-by-char comparison of two NULL-terminated strings.
509 * The strings are considered equal iff
510 * min(str_code_points(s1), max_len) == min(str_code_points(s2), max_len)
511 * and both strings consist of the same sequence of characters,
512 * up to max_len characters.
513 *
514 * A string S1 is less than another string S2 if it has a character with
515 * lower value at the first character position where the strings differ.
516 * If the strings differ in length, the shorter one is treated as if
517 * padded by characters with a value of zero. Only the first max_len
518 * characters are considered.
519 *
520 * @param s1 First string to compare.
521 * @param s2 Second string to compare.
522 * @param max_len Maximum number of characters to consider.
523 *
524 * @return 0 if the strings are equal, -1 if the first is less than the second,
525 * 1 if the second is less than the first.
526 *
527 */
528int str_lcmp(const char *s1, const char *s2, size_t max_len)
529{
530 wchar_t c1 = 0;
531 wchar_t c2 = 0;
532
533 size_t off1 = 0;
534 size_t off2 = 0;
535
536 size_t len = 0;
537
538 while (true) {
539 if (len >= max_len)
540 break;
541
542 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
543 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
544
545 if (c1 < c2)
546 return -1;
547
548 if (c1 > c2)
549 return 1;
550
551 if (c1 == 0 || c2 == 0)
552 break;
553
554 ++len;
555 }
556
557 return 0;
558
559}
560
561/** Copy string.
562 *
563 * Copy source string @a src to destination buffer @a dest.
564 * No more than @a size bytes are written. If the size of the output buffer
565 * is at least one byte, the output string will always be well-formed, i.e.
566 * null-terminated and containing only complete characters.
567 *
568 * @param dest Destination buffer.
569 * @param count Size of the destination buffer (must be > 0).
570 * @param src Source string.
571 *
572 */
573void str_cpy(char *dest, size_t size, const char *src)
574{
575 /* There must be space for a null terminator in the buffer. */
576 assert(size > 0);
577 assert(src != NULL);
578
579 size_t src_off = 0;
580 size_t dest_off = 0;
581
582 wchar_t ch;
583 while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
584 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
585 break;
586 }
587
588 dest[dest_off] = '\0';
589}
590
591/** Copy size-limited substring.
592 *
593 * Copy prefix of string @a src of max. size @a size to destination buffer
594 * @a dest. No more than @a size bytes are written. The output string will
595 * always be well-formed, i.e. null-terminated and containing only complete
596 * characters.
597 *
598 * No more than @a n bytes are read from the input string, so it does not
599 * have to be null-terminated.
600 *
601 * @param dest Destination buffer.
602 * @param count Size of the destination buffer (must be > 0).
603 * @param src Source string.
604 * @param n Maximum number of bytes to read from @a src.
605 *
606 */
607void str_ncpy(char *dest, size_t size, const char *src, size_t n)
608{
609 /* There must be space for a null terminator in the buffer. */
610 assert(size > 0);
611
612 size_t src_off = 0;
613 size_t dest_off = 0;
614
615 wchar_t ch;
616 while ((ch = str_decode(src, &src_off, n)) != 0) {
617 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
618 break;
619 }
620
621 dest[dest_off] = '\0';
622}
623
624/** Convert wide string to string.
625 *
626 * Convert wide string @a src to string. The output is written to the buffer
627 * specified by @a dest and @a size. @a size must be non-zero and the string
628 * written will always be well-formed.
629 *
630 * @param dest Destination buffer.
631 * @param size Size of the destination buffer.
632 * @param src Source wide string.
633 */
634void wstr_to_str(char *dest, size_t size, const wchar_t *src)
635{
636 wchar_t ch;
637 size_t src_idx;
638 size_t dest_off;
639
640 /* There must be space for a null terminator in the buffer. */
641 assert(size > 0);
642
643 src_idx = 0;
644 dest_off = 0;
645
646 while ((ch = src[src_idx++]) != 0) {
647 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
648 break;
649 }
650
651 dest[dest_off] = '\0';
652}
653
654/** Find first occurence of character in string.
655 *
656 * @param str String to search.
657 * @param ch Character to look for.
658 *
659 * @return Pointer to character in @a str or NULL if not found.
660 */
661char *str_chr(const char *str, wchar_t ch)
662{
663 wchar_t acc;
664 size_t off = 0;
665 size_t last = 0;
666
667 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
668 if (acc == ch)
669 return (char *) (str + last);
670 last = off;
671 }
672
673 return NULL;
674}
675
676/** Insert a wide character into a wide string.
677 *
678 * Insert a wide character into a wide string at position
679 * @a pos. The characters after the position are shifted.
680 *
681 * @param str String to insert to.
682 * @param ch Character to insert to.
683 * @param pos Character index where to insert.
684 * @param max_pos Characters in the buffer.
685 *
686 * @return True if the insertion was sucessful, false if the position
687 * is out of bounds.
688 *
689 */
690bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
691{
692 size_t len = wstr_code_points(str);
693
694 if ((pos > len) || (pos + 1 > max_pos))
695 return false;
696
697 size_t i;
698 for (i = len; i + 1 > pos; i--)
699 str[i + 1] = str[i];
700
701 str[pos] = ch;
702
703 return true;
704}
705
706/** Remove a wide character from a wide string.
707 *
708 * Remove a wide character from a wide string at position
709 * @a pos. The characters after the position are shifted.
710 *
711 * @param str String to remove from.
712 * @param pos Character index to remove.
713 *
714 * @return True if the removal was sucessful, false if the position
715 * is out of bounds.
716 *
717 */
718bool wstr_remove(wchar_t *str, size_t pos)
719{
720 size_t len = wstr_code_points(str);
721
722 if (pos >= len)
723 return false;
724
725 size_t i;
726 for (i = pos + 1; i <= len; i++)
727 str[i - 1] = str[i];
728
729 return true;
730}
731
732/** Duplicate string.
733 *
734 * Allocate a new string and copy characters from the source
735 * string into it. The duplicate string is allocated via sleeping
736 * malloc(), thus this function can sleep in no memory conditions.
737 *
738 * The allocation cannot fail and the return value is always
739 * a valid pointer. The duplicate string is always a well-formed
740 * null-terminated UTF-8 string, but it can differ from the source
741 * string on the byte level.
742 *
743 * @param src Source string.
744 *
745 * @return Duplicate string.
746 *
747 */
748char *str_dup(const char *src)
749{
750 size_t size = str_bytes(src) + 1;
751 char *dest = malloc(size);
752 if (!dest)
753 return NULL;
754
755 str_cpy(dest, size, src);
756 return dest;
757}
758
759/** Duplicate string with size limit.
760 *
761 * Allocate a new string and copy up to @max_size bytes from the source
762 * string into it. The duplicate string is allocated via sleeping
763 * malloc(), thus this function can sleep in no memory conditions.
764 * No more than @max_size + 1 bytes is allocated, but if the size
765 * occupied by the source string is smaller than @max_size + 1,
766 * less is allocated.
767 *
768 * The allocation cannot fail and the return value is always
769 * a valid pointer. The duplicate string is always a well-formed
770 * null-terminated UTF-8 string, but it can differ from the source
771 * string on the byte level.
772 *
773 * @param src Source string.
774 * @param n Maximum number of bytes to duplicate.
775 *
776 * @return Duplicate string.
777 *
778 */
779char *str_ndup(const char *src, size_t n)
780{
781 size_t size = str_bytes(src);
782 if (size > n)
783 size = n;
784
785 char *dest = malloc(size + 1);
786 if (!dest)
787 return NULL;
788
789 str_ncpy(dest, size + 1, src, size);
790 return dest;
791}
792
793/** Convert string to uint64_t (internal variant).
794 *
795 * @param nptr Pointer to string.
796 * @param endptr Pointer to the first invalid character is stored here.
797 * @param base Zero or number between 2 and 36 inclusive.
798 * @param neg Indication of unary minus is stored here.
799 * @apram result Result of the conversion.
800 *
801 * @return EOK if conversion was successful.
802 *
803 */
804static errno_t str_uint(const char *nptr, char **endptr, unsigned int base,
805 bool *neg, uint64_t *result)
806{
807 assert(endptr != NULL);
808 assert(neg != NULL);
809 assert(result != NULL);
810
811 *neg = false;
812 const char *str = nptr;
813
814 /* Ignore leading whitespace */
815 while (isspace(*str))
816 str++;
817
818 if (*str == '-') {
819 *neg = true;
820 str++;
821 } else if (*str == '+')
822 str++;
823
824 if (base == 0) {
825 /* Decode base if not specified */
826 base = 10;
827
828 if (*str == '0') {
829 base = 8;
830 str++;
831
832 switch (*str) {
833 case 'b':
834 case 'B':
835 base = 2;
836 str++;
837 break;
838 case 'o':
839 case 'O':
840 base = 8;
841 str++;
842 break;
843 case 'd':
844 case 'D':
845 case 't':
846 case 'T':
847 base = 10;
848 str++;
849 break;
850 case 'x':
851 case 'X':
852 base = 16;
853 str++;
854 break;
855 default:
856 str--;
857 }
858 }
859 } else {
860 /* Check base range */
861 if ((base < 2) || (base > 36)) {
862 *endptr = (char *) str;
863 return EINVAL;
864 }
865 }
866
867 *result = 0;
868 const char *startstr = str;
869
870 while (*str != 0) {
871 unsigned int digit;
872
873 if ((*str >= 'a') && (*str <= 'z'))
874 digit = *str - 'a' + 10;
875 else if ((*str >= 'A') && (*str <= 'Z'))
876 digit = *str - 'A' + 10;
877 else if ((*str >= '0') && (*str <= '9'))
878 digit = *str - '0';
879 else
880 break;
881
882 if (digit >= base)
883 break;
884
885 uint64_t prev = *result;
886 *result = (*result) * base + digit;
887
888 if (*result < prev) {
889 /* Overflow */
890 *endptr = (char *) str;
891 return EOVERFLOW;
892 }
893
894 str++;
895 }
896
897 if (str == startstr) {
898 /*
899 * No digits were decoded => first invalid character is
900 * the first character of the string.
901 */
902 str = nptr;
903 }
904
905 *endptr = (char *) str;
906
907 if (str == nptr)
908 return EINVAL;
909
910 return EOK;
911}
912
913/** Convert string to uint64_t.
914 *
915 * @param nptr Pointer to string.
916 * @param endptr If not NULL, pointer to the first invalid character
917 * is stored here.
918 * @param base Zero or number between 2 and 36 inclusive.
919 * @param strict Do not allow any trailing characters.
920 * @param result Result of the conversion.
921 *
922 * @return EOK if conversion was successful.
923 *
924 */
925errno_t str_uint64_t(const char *nptr, char **endptr, unsigned int base,
926 bool strict, uint64_t *result)
927{
928 assert(result != NULL);
929
930 bool neg;
931 char *lendptr;
932 errno_t ret = str_uint(nptr, &lendptr, base, &neg, result);
933
934 if (endptr != NULL)
935 *endptr = (char *) lendptr;
936
937 if (ret != EOK)
938 return ret;
939
940 /* Do not allow negative values */
941 if (neg)
942 return EINVAL;
943
944 /*
945 * Check whether we are at the end of
946 * the string in strict mode
947 */
948 if ((strict) && (*lendptr != 0))
949 return EINVAL;
950
951 return EOK;
952}
953
954void order_suffix(const uint64_t val, uint64_t *rv, char *suffix)
955{
956 if (val > UINT64_C(10000000000000000000)) {
957 *rv = val / UINT64_C(1000000000000000000);
958 *suffix = 'Z';
959 } else if (val > UINT64_C(1000000000000000000)) {
960 *rv = val / UINT64_C(1000000000000000);
961 *suffix = 'E';
962 } else if (val > UINT64_C(1000000000000000)) {
963 *rv = val / UINT64_C(1000000000000);
964 *suffix = 'T';
965 } else if (val > UINT64_C(1000000000000)) {
966 *rv = val / UINT64_C(1000000000);
967 *suffix = 'G';
968 } else if (val > UINT64_C(1000000000)) {
969 *rv = val / UINT64_C(1000000);
970 *suffix = 'M';
971 } else if (val > UINT64_C(1000000)) {
972 *rv = val / UINT64_C(1000);
973 *suffix = 'k';
974 } else {
975 *rv = val;
976 *suffix = ' ';
977 }
978}
979
980void bin_order_suffix(const uint64_t val, uint64_t *rv, const char **suffix,
981 bool fixed)
982{
983 if (val > UINT64_C(1152921504606846976)) {
984 *rv = val / UINT64_C(1125899906842624);
985 *suffix = "EiB";
986 } else if (val > UINT64_C(1125899906842624)) {
987 *rv = val / UINT64_C(1099511627776);
988 *suffix = "TiB";
989 } else if (val > UINT64_C(1099511627776)) {
990 *rv = val / UINT64_C(1073741824);
991 *suffix = "GiB";
992 } else if (val > UINT64_C(1073741824)) {
993 *rv = val / UINT64_C(1048576);
994 *suffix = "MiB";
995 } else if (val > UINT64_C(1048576)) {
996 *rv = val / UINT64_C(1024);
997 *suffix = "KiB";
998 } else {
999 *rv = val;
1000 if (fixed)
1001 *suffix = "B ";
1002 else
1003 *suffix = "B";
1004 }
1005}
1006
1007/** @}
1008 */
Note: See TracBrowser for help on using the repository browser.