source: mainline/common/str.c@ f94a11f

Last change on this file since f94a11f was 0600976, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 3 months ago

Reject invalid non-shortest UTF-8 forms and fix some other issues in str

  • Property mode set to 100644
File size: 40.9 KB
Line 
1/*
2 * Copyright (c) 2001-2004 Jakub Jermar
3 * Copyright (c) 2005 Martin Decky
4 * Copyright (c) 2008 Jiri Svoboda
5 * Copyright (c) 2011 Martin Sucha
6 * Copyright (c) 2011 Oleg Romanenko
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * - Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * - Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * - The name of the author may not be used to endorse or promote products
19 * derived from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/** @addtogroup libc
34 * @{
35 */
36
37/**
38 * @file
39 * @brief String functions.
40 *
41 * Strings and characters use the Universal Character Set (UCS). The standard
42 * strings, called just strings are encoded in UTF-8. Wide strings (encoded
43 * in UTF-32) are supported to a limited degree. A single character is
44 * represented as char32_t.@n
45 *
46 * Overview of the terminology:@n
47 *
48 * Term Meaning
49 * -------------------- ----------------------------------------------------
50 * byte 8 bits stored in uint8_t (unsigned 8 bit integer)
51 *
52 * character UTF-32 encoded Unicode character, stored in char32_t
53 * (unsigned 32 bit integer), code points 0 .. 1114111
54 * are valid
55 *
56 * Note that Unicode characters do not match
57 * one-to-one with displayed characters or glyphs on
58 * screen. For that level of precision, look up
59 * Grapheme Clusters.
60 *
61 * ASCII character 7 bit encoded ASCII character, stored in char
62 * (usually signed 8 bit integer), code points 0 .. 127
63 * are valid
64 *
65 * string UTF-8 encoded NULL-terminated Unicode string, char *
66 *
67 * wide string UTF-32 encoded NULL-terminated Unicode string,
68 * char32_t *
69 *
70 * [wide] string size number of BYTES in a [wide] string (excluding
71 * the NULL-terminator), size_t
72 *
73 * [wide] string length number of CHARACTERS in a [wide] string (excluding
74 * the NULL-terminator), size_t
75 *
76 * [wide] string width number of display cells on a monospace display taken
77 * by a [wide] string, size_t
78 *
79 * This is virtually impossible to determine exactly for
80 * all strings without knowing specifics of the display
81 * device, due to various factors affecting text output.
82 * If you have the option to query the terminal for
83 * position change caused by outputting the string,
84 * it is preferrable to determine width that way.
85 *
86 *
87 * Overview of string metrics:@n
88 *
89 * Metric Abbrev. Type Meaning
90 * ------ ------ ------ -------------------------------------------------
91 * size n size_t number of BYTES in a string (excluding the
92 * NULL-terminator)
93 *
94 * length l size_t number of CHARACTERS in a string (excluding the
95 * null terminator)
96 *
97 * width w size_t number of display cells on a monospace display
98 * taken by a string
99 *
100 *
101 * Function naming prefixes:@n
102 *
103 * chr_ operate on characters
104 * ascii_ operate on ASCII characters
105 * str_ operate on strings
106 * wstr_ operate on wide strings
107 *
108 * [w]str_[n|l|w] operate on a prefix limited by size, length
109 * or width
110 *
111 *
112 * A specific character inside a [wide] string can be referred to by:@n
113 *
114 * pointer (char *, char32_t *)
115 * byte offset (size_t)
116 * character index (size_t)
117 *
118 */
119
120#include <str.h>
121
122#include <align.h>
123#include <assert.h>
124#include <ctype.h>
125#include <errno.h>
126#include <macros.h>
127#include <mem.h>
128#include <stdbool.h>
129#include <stddef.h>
130#include <stdint.h>
131#include <stdlib.h>
132#include <uchar.h>
133
134/** Byte mask consisting of lowest @n bits (out of 8) */
135#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
136
137/** Byte mask consisting of lowest @n bits (out of 32) */
138#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
139
140/** Byte mask consisting of highest @n bits (out of 8) */
141#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
142
143/** Number of data bits in a UTF-8 continuation byte */
144#define CONT_BITS 6
145
146static inline bool _is_ascii(uint8_t b)
147{
148 return b < 0x80;
149}
150
151static inline bool _is_continuation_byte(uint8_t b)
152{
153 return (b & 0xc0) == 0x80;
154}
155
156static inline int _char_continuation_bytes(char32_t c)
157{
158 if ((c & ~LO_MASK_32(7)) == 0)
159 return 0;
160
161 if ((c & ~LO_MASK_32(11)) == 0)
162 return 1;
163
164 if ((c & ~LO_MASK_32(16)) == 0)
165 return 2;
166
167 if ((c & ~LO_MASK_32(21)) == 0)
168 return 3;
169
170 /* Codes longer than 21 bits are not supported */
171 return -1;
172}
173
174static inline int _continuation_bytes(uint8_t b)
175{
176 /* 0xxxxxxx */
177 if (_is_ascii(b))
178 return 0;
179
180 /* 110xxxxx 10xxxxxx */
181 if ((b & 0xe0) == 0xc0)
182 return 1;
183
184 /* 1110xxxx 10xxxxxx 10xxxxxx */
185 if ((b & 0xf0) == 0xe0)
186 return 2;
187
188 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
189 if ((b & 0xf8) == 0xf0)
190 return 3;
191
192 return -1;
193}
194
195/** Decode a single character from a string.
196 *
197 * Decode a single character from a string of size @a size. Decoding starts
198 * at @a offset and this offset is moved to the beginning of the next
199 * character. In case of decoding error, offset generally advances at least
200 * by one. However, offset is never moved beyond size.
201 *
202 * @param str String (not necessarily NULL-terminated).
203 * @param offset Byte offset in string where to start decoding.
204 * @param size Size of the string (in bytes).
205 *
206 * @return Value of decoded character, U_SPECIAL on decoding error or
207 * NULL if attempt to decode beyond @a size.
208 *
209 */
210char32_t str_decode(const char *str, size_t *offset, size_t size)
211{
212 if (*offset >= size)
213 return 0;
214
215 /* First byte read from string */
216 uint8_t b0 = (uint8_t) str[(*offset)++];
217
218 /* Fast exit for the most common case. */
219 if (_is_ascii(b0))
220 return b0;
221
222 /* 10xxxxxx -- unexpected continuation byte */
223 if (_is_continuation_byte(b0))
224 return U_SPECIAL;
225
226 /* Determine code length */
227
228 int cbytes = _continuation_bytes(b0);
229 int b0_bits = 6 - cbytes; /* Data bits in first byte */
230
231 if (cbytes < 0 || *offset + cbytes > size)
232 return U_SPECIAL;
233
234 char32_t ch = b0 & LO_MASK_8(b0_bits);
235
236 /* Decode continuation bytes */
237 for (int i = 0; i < cbytes; i++) {
238 uint8_t b = (uint8_t) str[*offset];
239
240 if (!_is_continuation_byte(b))
241 return U_SPECIAL;
242
243 (*offset)++;
244
245 /* Shift data bits to ch */
246 ch = (ch << CONT_BITS) | (char32_t) (b & LO_MASK_8(CONT_BITS));
247 }
248
249 /*
250 * Reject non-shortest form encodings.
251 * See https://www.unicode.org/versions/corrigendum1.html
252 */
253 if (cbytes != _char_continuation_bytes(ch))
254 return U_SPECIAL;
255
256 return ch;
257}
258
259/** Decode a single character from a string to the left.
260 *
261 * Decode a single character from a string of size @a size. Decoding starts
262 * at @a offset and this offset is moved to the beginning of the previous
263 * character. In case of decoding error, offset generally decreases at least
264 * by one. However, offset is never moved before 0.
265 *
266 * @param str String (not necessarily NULL-terminated).
267 * @param offset Byte offset in string where to start decoding.
268 * @param size Size of the string (in bytes).
269 *
270 * @return Value of decoded character, U_SPECIAL on decoding error or
271 * NULL if attempt to decode beyond @a start of str.
272 *
273 */
274char32_t str_decode_reverse(const char *str, size_t *offset, size_t size)
275{
276 if (*offset == 0)
277 return 0;
278
279 int cbytes = 0;
280 /* Continue while continuation bytes found */
281 while (*offset > 0 && cbytes < 4) {
282 uint8_t b = (uint8_t) str[--(*offset)];
283
284 if (_is_continuation_byte(b)) {
285 cbytes++;
286 continue;
287 }
288
289 /* Invalid byte. */
290 if (cbytes != _continuation_bytes(b))
291 return U_SPECIAL;
292
293 /* Start byte */
294 size_t start_offset = *offset;
295 return str_decode(str, &start_offset, size);
296 }
297
298 /* Too many continuation bytes */
299 return U_SPECIAL;
300}
301
302/** Encode a single character to string representation.
303 *
304 * Encode a single character to string representation (i.e. UTF-8) and store
305 * it into a buffer at @a offset. Encoding starts at @a offset and this offset
306 * is moved to the position where the next character can be written to.
307 *
308 * @param ch Input character.
309 * @param str Output buffer.
310 * @param offset Byte offset where to start writing.
311 * @param size Size of the output buffer (in bytes).
312 *
313 * @return EOK if the character was encoded successfully, EOVERFLOW if there
314 * was not enough space in the output buffer or EINVAL if the character
315 * code was invalid.
316 */
317errno_t chr_encode(char32_t ch, char *str, size_t *offset, size_t size)
318{
319 if (*offset >= size)
320 return EOVERFLOW;
321
322 /* Fast exit for the most common case. */
323 if (ch < 0x80) {
324 str[(*offset)++] = (char) ch;
325 return EOK;
326 }
327
328 /* Codes longer than 21 bits are not supported */
329 if (!chr_check(ch))
330 return EINVAL;
331
332 /* Determine how many continuation bytes are needed */
333
334 unsigned int cbytes = _char_continuation_bytes(ch);
335 unsigned int b0_bits = 6 - cbytes; /* Data bits in first byte */
336
337 /* Check for available space in buffer */
338 if (*offset + cbytes >= size)
339 return EOVERFLOW;
340
341 /* Encode continuation bytes */
342 unsigned int i;
343 for (i = cbytes; i > 0; i--) {
344 str[*offset + i] = 0x80 | (ch & LO_MASK_32(CONT_BITS));
345 ch >>= CONT_BITS;
346 }
347
348 /* Encode first byte */
349 str[*offset] = (ch & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1);
350
351 /* Advance offset */
352 *offset += cbytes + 1;
353
354 return EOK;
355}
356
357/* Convert in place any bytes that don't form a valid character into U_SPECIAL. */
358static void _sanitize_string(char *str, size_t n)
359{
360 uint8_t *b = (uint8_t *) str;
361
362 for (; *b && n > 0; b++, n--) {
363 int cont = _continuation_bytes(b[0]);
364 if (__builtin_expect(cont, 0) == 0)
365 continue;
366
367 if (cont < 0 || n <= (size_t) cont) {
368 b[0] = U_SPECIAL;
369 continue;
370 }
371
372 /* Check continuation bytes. */
373 for (int i = 1; i <= cont; i++) {
374 if (!_is_continuation_byte(b[i])) {
375 b[0] = U_SPECIAL;
376 continue;
377 }
378 }
379
380 /*
381 * Check for non-shortest form encoding.
382 * See https://www.unicode.org/versions/corrigendum1.html
383 */
384
385 switch (cont) {
386 case 1:
387 /* 0b110!!!!x 0b10xxxxxx */
388 if (!(b[0] & 0b00011110))
389 b[0] = U_SPECIAL;
390
391 continue;
392 case 2:
393 /* 0b1110!!!! 0b10!xxxxx 0b10xxxxxx */
394 if (!(b[0] & 0b00001111) && !(b[1] & 0b00100000))
395 b[0] = U_SPECIAL;
396
397 continue;
398 case 3:
399 /* 0b11110!!! 0b10!!xxxx 0b10xxxxxx 0b10xxxxxx */
400 if (!(b[0] & 0b00000111) && !(b[1] & 0b00110000))
401 b[0] = U_SPECIAL;
402
403 continue;
404 }
405 }
406}
407
408static size_t _str_size(const char *str)
409{
410 size_t size = 0;
411
412 while (*str++ != 0)
413 size++;
414
415 return size;
416}
417
418/** Get size of string.
419 *
420 * Get the number of bytes which are used by the string @a str (excluding the
421 * NULL-terminator).
422 *
423 * @param str String to consider.
424 *
425 * @return Number of bytes used by the string
426 *
427 */
428size_t str_size(const char *str)
429{
430 return _str_size(str);
431}
432
433/** Get size of wide string.
434 *
435 * Get the number of bytes which are used by the wide string @a str (excluding the
436 * NULL-terminator).
437 *
438 * @param str Wide string to consider.
439 *
440 * @return Number of bytes used by the wide string
441 *
442 */
443size_t wstr_size(const char32_t *str)
444{
445 return (wstr_length(str) * sizeof(char32_t));
446}
447
448/** Get size of string with length limit.
449 *
450 * Get the number of bytes which are used by up to @a max_len first
451 * characters in the string @a str. If @a max_len is greater than
452 * the length of @a str, the entire string is measured (excluding the
453 * NULL-terminator).
454 *
455 * @param str String to consider.
456 * @param max_len Maximum number of characters to measure.
457 *
458 * @return Number of bytes used by the characters.
459 *
460 */
461size_t str_lsize(const char *str, size_t max_len)
462{
463 size_t len = 0;
464 size_t offset = 0;
465
466 while (len < max_len) {
467 if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
468 break;
469
470 len++;
471 }
472
473 return offset;
474}
475
476static size_t _str_nsize(const char *str, size_t max_size)
477{
478 size_t size = 0;
479
480 while ((*str++ != 0) && (size < max_size))
481 size++;
482
483 return size;
484}
485
486/** Get size of string with size limit.
487 *
488 * Get the number of bytes which are used by the string @a str
489 * (excluding the NULL-terminator), but no more than @max_size bytes.
490 *
491 * @param str String to consider.
492 * @param max_size Maximum number of bytes to measure.
493 *
494 * @return Number of bytes used by the string
495 *
496 */
497size_t str_nsize(const char *str, size_t max_size)
498{
499 return _str_nsize(str, max_size);
500}
501
502/** Get size of wide string with size limit.
503 *
504 * Get the number of bytes which are used by the wide string @a str
505 * (excluding the NULL-terminator), but no more than @max_size bytes.
506 *
507 * @param str Wide string to consider.
508 * @param max_size Maximum number of bytes to measure.
509 *
510 * @return Number of bytes used by the wide string
511 *
512 */
513size_t wstr_nsize(const char32_t *str, size_t max_size)
514{
515 return (wstr_nlength(str, max_size) * sizeof(char32_t));
516}
517
518/** Get size of wide string with length limit.
519 *
520 * Get the number of bytes which are used by up to @a max_len first
521 * wide characters in the wide string @a str. If @a max_len is greater than
522 * the length of @a str, the entire wide string is measured (excluding the
523 * NULL-terminator).
524 *
525 * @param str Wide string to consider.
526 * @param max_len Maximum number of wide characters to measure.
527 *
528 * @return Number of bytes used by the wide characters.
529 *
530 */
531size_t wstr_lsize(const char32_t *str, size_t max_len)
532{
533 return (wstr_nlength(str, max_len * sizeof(char32_t)) * sizeof(char32_t));
534}
535
536/** Get number of characters in a string.
537 *
538 * @param str NULL-terminated string.
539 *
540 * @return Number of characters in string.
541 *
542 */
543size_t str_length(const char *str)
544{
545 size_t len = 0;
546 size_t offset = 0;
547
548 while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
549 len++;
550
551 return len;
552}
553
554/** Get number of characters in a wide string.
555 *
556 * @param str NULL-terminated wide string.
557 *
558 * @return Number of characters in @a str.
559 *
560 */
561size_t wstr_length(const char32_t *wstr)
562{
563 size_t len = 0;
564
565 while (*wstr++ != 0)
566 len++;
567
568 return len;
569}
570
571/** Get number of characters in a string with size limit.
572 *
573 * @param str NULL-terminated string.
574 * @param size Maximum number of bytes to consider.
575 *
576 * @return Number of characters in string.
577 *
578 */
579size_t str_nlength(const char *str, size_t size)
580{
581 size_t len = 0;
582 size_t offset = 0;
583
584 while (str_decode(str, &offset, size) != 0)
585 len++;
586
587 return len;
588}
589
590/** Get number of characters in a string with size limit.
591 *
592 * @param str NULL-terminated string.
593 * @param size Maximum number of bytes to consider.
594 *
595 * @return Number of characters in string.
596 *
597 */
598size_t wstr_nlength(const char32_t *str, size_t size)
599{
600 size_t len = 0;
601 size_t limit = ALIGN_DOWN(size, sizeof(char32_t));
602 size_t offset = 0;
603
604 while ((offset < limit) && (*str++ != 0)) {
605 len++;
606 offset += sizeof(char32_t);
607 }
608
609 return len;
610}
611
612/** Get character display width on a character cell display.
613 *
614 * @param ch Character
615 * @return Width of character in cells.
616 */
617size_t chr_width(char32_t ch)
618{
619 return 1;
620}
621
622/** Get string display width on a character cell display.
623 *
624 * @param str String
625 * @return Width of string in cells.
626 */
627size_t str_width(const char *str)
628{
629 size_t width = 0;
630 size_t offset = 0;
631 char32_t ch;
632
633 while ((ch = str_decode(str, &offset, STR_NO_LIMIT)) != 0)
634 width += chr_width(ch);
635
636 return width;
637}
638
639/** Check whether character is plain ASCII.
640 *
641 * @return True if character is plain ASCII.
642 *
643 */
644bool ascii_check(char32_t ch)
645{
646 if (ch <= 127)
647 return true;
648
649 return false;
650}
651
652/** Check whether character is valid
653 *
654 * @return True if character is a valid Unicode code point.
655 *
656 */
657bool chr_check(char32_t ch)
658{
659 if (ch <= 1114111)
660 return true;
661
662 return false;
663}
664
665/** Compare two NULL terminated strings.
666 *
667 * Do a char-by-char comparison of two NULL-terminated strings.
668 * The strings are considered equal iff their length is equal
669 * and both strings consist of the same sequence of characters.
670 *
671 * A string S1 is less than another string S2 if it has a character with
672 * lower value at the first character position where the strings differ.
673 * If the strings differ in length, the shorter one is treated as if
674 * padded by characters with a value of zero.
675 *
676 * @param s1 First string to compare.
677 * @param s2 Second string to compare.
678 *
679 * @return 0 if the strings are equal, -1 if the first is less than the second,
680 * 1 if the second is less than the first.
681 *
682 */
683int str_cmp(const char *s1, const char *s2)
684{
685 /*
686 * UTF-8 has the nice property that lexicographic ordering on bytes is
687 * the same as the lexicographic ordering of the character sequences.
688 */
689 while (*s1 == *s2 && *s1 != 0) {
690 s1++;
691 s2++;
692 }
693
694 if (*s1 == *s2)
695 return 0;
696
697 return (*s1 < *s2) ? -1 : 1;
698}
699
700/** Compare two NULL terminated strings with length limit.
701 *
702 * Do a char-by-char comparison of two NULL-terminated strings.
703 * The strings are considered equal iff
704 * min(str_length(s1), max_len) == min(str_length(s2), max_len)
705 * and both strings consist of the same sequence of characters,
706 * up to max_len characters.
707 *
708 * A string S1 is less than another string S2 if it has a character with
709 * lower value at the first character position where the strings differ.
710 * If the strings differ in length, the shorter one is treated as if
711 * padded by characters with a value of zero. Only the first max_len
712 * characters are considered.
713 *
714 * @param s1 First string to compare.
715 * @param s2 Second string to compare.
716 * @param max_len Maximum number of characters to consider.
717 *
718 * @return 0 if the strings are equal, -1 if the first is less than the second,
719 * 1 if the second is less than the first.
720 *
721 */
722int str_lcmp(const char *s1, const char *s2, size_t max_len)
723{
724 char32_t c1 = 0;
725 char32_t c2 = 0;
726
727 size_t off1 = 0;
728 size_t off2 = 0;
729
730 size_t len = 0;
731
732 while (true) {
733 if (len >= max_len)
734 break;
735
736 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
737 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
738
739 if (c1 < c2)
740 return -1;
741
742 if (c1 > c2)
743 return 1;
744
745 if (c1 == 0 || c2 == 0)
746 break;
747
748 ++len;
749 }
750
751 return 0;
752
753}
754
755/** Compare two NULL terminated strings in case-insensitive manner.
756 *
757 * Do a char-by-char comparison of two NULL-terminated strings.
758 * The strings are considered equal iff their length is equal
759 * and both strings consist of the same sequence of characters
760 * when converted to lower case.
761 *
762 * A string S1 is less than another string S2 if it has a character with
763 * lower value at the first character position where the strings differ.
764 * If the strings differ in length, the shorter one is treated as if
765 * padded by characters with a value of zero.
766 *
767 * @param s1 First string to compare.
768 * @param s2 Second string to compare.
769 *
770 * @return 0 if the strings are equal, -1 if the first is less than the second,
771 * 1 if the second is less than the first.
772 *
773 */
774int str_casecmp(const char *s1, const char *s2)
775{
776 // FIXME: doesn't work for non-ASCII caseful characters
777
778 char32_t c1 = 0;
779 char32_t c2 = 0;
780
781 size_t off1 = 0;
782 size_t off2 = 0;
783
784 while (true) {
785 c1 = tolower(str_decode(s1, &off1, STR_NO_LIMIT));
786 c2 = tolower(str_decode(s2, &off2, STR_NO_LIMIT));
787
788 if (c1 < c2)
789 return -1;
790
791 if (c1 > c2)
792 return 1;
793
794 if (c1 == 0 || c2 == 0)
795 break;
796 }
797
798 return 0;
799}
800
801/** Compare two NULL terminated strings with length limit in case-insensitive
802 * manner.
803 *
804 * Do a char-by-char comparison of two NULL-terminated strings.
805 * The strings are considered equal iff
806 * min(str_length(s1), max_len) == min(str_length(s2), max_len)
807 * and both strings consist of the same sequence of characters,
808 * up to max_len characters.
809 *
810 * A string S1 is less than another string S2 if it has a character with
811 * lower value at the first character position where the strings differ.
812 * If the strings differ in length, the shorter one is treated as if
813 * padded by characters with a value of zero. Only the first max_len
814 * characters are considered.
815 *
816 * @param s1 First string to compare.
817 * @param s2 Second string to compare.
818 * @param max_len Maximum number of characters to consider.
819 *
820 * @return 0 if the strings are equal, -1 if the first is less than the second,
821 * 1 if the second is less than the first.
822 *
823 */
824int str_lcasecmp(const char *s1, const char *s2, size_t max_len)
825{
826 // FIXME: doesn't work for non-ASCII caseful characters
827
828 char32_t c1 = 0;
829 char32_t c2 = 0;
830
831 size_t off1 = 0;
832 size_t off2 = 0;
833
834 size_t len = 0;
835
836 while (true) {
837 if (len >= max_len)
838 break;
839
840 c1 = tolower(str_decode(s1, &off1, STR_NO_LIMIT));
841 c2 = tolower(str_decode(s2, &off2, STR_NO_LIMIT));
842
843 if (c1 < c2)
844 return -1;
845
846 if (c1 > c2)
847 return 1;
848
849 if (c1 == 0 || c2 == 0)
850 break;
851
852 ++len;
853 }
854
855 return 0;
856
857}
858
859static bool _test_prefix(const char *s, const char *p)
860{
861 while (*s == *p && *s != 0) {
862 s++;
863 p++;
864 }
865
866 return *p == 0;
867}
868
869/** Test whether p is a prefix of s.
870 *
871 * Do a char-by-char comparison of two NULL-terminated strings
872 * and determine if p is a prefix of s.
873 *
874 * @param s The string in which to look
875 * @param p The string to check if it is a prefix of s
876 *
877 * @return true iff p is prefix of s else false
878 *
879 */
880bool str_test_prefix(const char *s, const char *p)
881{
882 return _test_prefix(s, p);
883}
884
885/** Get a string suffix.
886 *
887 * Return a string suffix defined by the prefix length.
888 *
889 * @param s The string to get the suffix from.
890 * @param prefix_length Number of prefix characters to ignore.
891 *
892 * @return String suffix.
893 *
894 */
895const char *str_suffix(const char *s, size_t prefix_length)
896{
897 size_t off = 0;
898 size_t i = 0;
899
900 while (true) {
901 str_decode(s, &off, STR_NO_LIMIT);
902 i++;
903
904 if (i >= prefix_length)
905 break;
906 }
907
908 return s + off;
909}
910
911/** Copy string as a sequence of bytes. */
912static void _str_cpy(char *dest, const char *src)
913{
914 while (*src)
915 *(dest++) = *(src++);
916
917 *dest = 0;
918}
919
920/** Copy string as a sequence of bytes. */
921static void _str_cpyn(char *dest, size_t size, const char *src)
922{
923 assert(dest && src && size);
924
925 if (!dest || !src || !size)
926 return;
927
928 if (size == STR_NO_LIMIT)
929 return _str_cpy(dest, src);
930
931 char *dest_top = dest + size - 1;
932 assert(size == 1 || dest < dest_top);
933
934 while (*src && dest < dest_top)
935 *(dest++) = *(src++);
936
937 *dest = 0;
938}
939
940/** Copy string.
941 *
942 * Copy source string @a src to destination buffer @a dest.
943 * No more than @a size bytes are written. If the size of the output buffer
944 * is at least one byte, the output string will always be well-formed, i.e.
945 * null-terminated and containing only complete characters.
946 *
947 * @param dest Destination buffer.
948 * @param count Size of the destination buffer (must be > 0).
949 * @param src Source string.
950 *
951 */
952void str_cpy(char *dest, size_t size, const char *src)
953{
954 /* There must be space for a null terminator in the buffer. */
955 assert(size > 0);
956 assert(src != NULL);
957 assert(dest != NULL);
958 assert(size == STR_NO_LIMIT || dest + size > dest);
959
960 /* Copy data. */
961 _str_cpyn(dest, size, src);
962
963 /* In-place translate invalid bytes to U_SPECIAL. */
964 _sanitize_string(dest, size);
965}
966
967/** Copy size-limited substring.
968 *
969 * Copy prefix of string @a src of max. size @a size to destination buffer
970 * @a dest. No more than @a size bytes are written. The output string will
971 * always be well-formed, i.e. null-terminated and containing only complete
972 * characters.
973 *
974 * No more than @a n bytes are read from the input string, so it does not
975 * have to be null-terminated.
976 *
977 * @param dest Destination buffer.
978 * @param count Size of the destination buffer (must be > 0).
979 * @param src Source string.
980 * @param n Maximum number of bytes to read from @a src.
981 *
982 */
983void str_ncpy(char *dest, size_t size, const char *src, size_t n)
984{
985 /* There must be space for a null terminator in the buffer. */
986 assert(size > 0);
987 assert(src != NULL);
988
989 /* Copy data. */
990 _str_cpyn(dest, min(size, n + 1), src);
991
992 /* In-place translate invalid bytes to U_SPECIAL. */
993 _sanitize_string(dest, size);
994}
995
996/** Append one string to another.
997 *
998 * Append source string @a src to string in destination buffer @a dest.
999 * Size of the destination buffer is @a dest. If the size of the output buffer
1000 * is at least one byte, the output string will always be well-formed, i.e.
1001 * null-terminated and containing only complete characters.
1002 *
1003 * @param dest Destination buffer.
1004 * @param count Size of the destination buffer.
1005 * @param src Source string.
1006 */
1007void str_append(char *dest, size_t size, const char *src)
1008{
1009 assert(src != NULL);
1010 assert(dest != NULL);
1011 assert(size > 0);
1012 assert(size == STR_NO_LIMIT || dest + size > dest);
1013
1014 size_t dstr_size = _str_nsize(dest, size);
1015 if (dstr_size < size) {
1016 _str_cpyn(dest + dstr_size, size - dstr_size, src);
1017 _sanitize_string(dest + dstr_size, size - dstr_size);
1018 }
1019}
1020
1021/** Convert space-padded ASCII to string.
1022 *
1023 * Common legacy text encoding in hardware is 7-bit ASCII fitted into
1024 * a fixed-width byte buffer (bit 7 always zero), right-padded with spaces
1025 * (ASCII 0x20). Convert space-padded ascii to string representation.
1026 *
1027 * If the text does not fit into the destination buffer, the function converts
1028 * as many characters as possible and returns EOVERFLOW.
1029 *
1030 * If the text contains non-ASCII bytes (with bit 7 set), the whole string is
1031 * converted anyway and invalid characters are replaced with question marks
1032 * (U_SPECIAL) and the function returns EIO.
1033 *
1034 * Regardless of return value upon return @a dest will always be well-formed.
1035 *
1036 * @param dest Destination buffer
1037 * @param size Size of destination buffer
1038 * @param src Space-padded ASCII.
1039 * @param n Size of the source buffer in bytes.
1040 *
1041 * @return EOK on success, EOVERFLOW if the text does not fit
1042 * destination buffer, EIO if the text contains
1043 * non-ASCII bytes.
1044 */
1045errno_t spascii_to_str(char *dest, size_t size, const uint8_t *src, size_t n)
1046{
1047 size_t len = 0;
1048
1049 /* Determine the length of the source string. */
1050 for (size_t i = 0; i < n; i++) {
1051 if (src[i] == 0)
1052 break;
1053
1054 if (src[i] != ' ')
1055 len = i + 1;
1056 }
1057
1058 errno_t result = EOK;
1059 size_t out_len = min(len, size - 1);
1060
1061 /* Copy characters */
1062 for (size_t i = 0; i < out_len; i++) {
1063 dest[i] = src[i];
1064
1065 if (dest[i] < 0) {
1066 dest[i] = U_SPECIAL;
1067 result = EIO;
1068 }
1069 }
1070
1071 dest[out_len] = 0;
1072
1073 if (out_len < len)
1074 return EOVERFLOW;
1075
1076 return result;
1077}
1078
1079/** Convert wide string to string.
1080 *
1081 * Convert wide string @a src to string. The output is written to the buffer
1082 * specified by @a dest and @a size. @a size must be non-zero and the string
1083 * written will always be well-formed.
1084 *
1085 * @param dest Destination buffer.
1086 * @param size Size of the destination buffer.
1087 * @param src Source wide string.
1088 */
1089void wstr_to_str(char *dest, size_t size, const char32_t *src)
1090{
1091 char32_t ch;
1092 size_t src_idx;
1093 size_t dest_off;
1094
1095 /* There must be space for a null terminator in the buffer. */
1096 assert(size > 0);
1097
1098 src_idx = 0;
1099 dest_off = 0;
1100
1101 while ((ch = src[src_idx++]) != 0) {
1102 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
1103 break;
1104 }
1105
1106 dest[dest_off] = '\0';
1107}
1108
1109/** Convert UTF16 string to string.
1110 *
1111 * Convert utf16 string @a src to string. The output is written to the buffer
1112 * specified by @a dest and @a size. @a size must be non-zero and the string
1113 * written will always be well-formed. Surrogate pairs also supported.
1114 *
1115 * @param dest Destination buffer.
1116 * @param size Size of the destination buffer.
1117 * @param src Source utf16 string.
1118 *
1119 * @return EOK, if success, an error code otherwise.
1120 */
1121errno_t utf16_to_str(char *dest, size_t size, const uint16_t *src)
1122{
1123 size_t idx = 0, dest_off = 0;
1124 char32_t ch;
1125 errno_t rc = EOK;
1126
1127 /* There must be space for a null terminator in the buffer. */
1128 assert(size > 0);
1129
1130 while (src[idx]) {
1131 if ((src[idx] & 0xfc00) == 0xd800) {
1132 if (src[idx + 1] && (src[idx + 1] & 0xfc00) == 0xdc00) {
1133 ch = 0x10000;
1134 ch += (src[idx] & 0x03FF) << 10;
1135 ch += (src[idx + 1] & 0x03FF);
1136 idx += 2;
1137 } else
1138 break;
1139 } else {
1140 ch = src[idx];
1141 idx++;
1142 }
1143 rc = chr_encode(ch, dest, &dest_off, size - 1);
1144 if (rc != EOK)
1145 break;
1146 }
1147 dest[dest_off] = '\0';
1148 return rc;
1149}
1150
1151/** Convert string to UTF16 string.
1152 *
1153 * Convert string @a src to utf16 string. The output is written to the buffer
1154 * specified by @a dest and @a dlen. @a dlen must be non-zero and the string
1155 * written will always be well-formed. Surrogate pairs also supported.
1156 *
1157 * @param dest Destination buffer.
1158 * @param dlen Number of utf16 characters that fit in the destination buffer.
1159 * @param src Source string.
1160 *
1161 * @return EOK, if success, an error code otherwise.
1162 */
1163errno_t str_to_utf16(uint16_t *dest, size_t dlen, const char *src)
1164{
1165 errno_t rc = EOK;
1166 size_t offset = 0;
1167 size_t idx = 0;
1168 char32_t c;
1169
1170 assert(dlen > 0);
1171
1172 while ((c = str_decode(src, &offset, STR_NO_LIMIT)) != 0) {
1173 if (c > 0x10000) {
1174 if (idx + 2 >= dlen - 1) {
1175 rc = EOVERFLOW;
1176 break;
1177 }
1178 c = (c - 0x10000);
1179 dest[idx] = 0xD800 | (c >> 10);
1180 dest[idx + 1] = 0xDC00 | (c & 0x3FF);
1181 idx++;
1182 } else {
1183 dest[idx] = c;
1184 }
1185
1186 idx++;
1187 if (idx >= dlen - 1) {
1188 rc = EOVERFLOW;
1189 break;
1190 }
1191 }
1192
1193 dest[idx] = '\0';
1194 return rc;
1195}
1196
1197/** Get size of UTF-16 string.
1198 *
1199 * Get the number of words which are used by the UTF-16 string @a ustr
1200 * (excluding the NULL-terminator).
1201 *
1202 * @param ustr UTF-16 string to consider.
1203 *
1204 * @return Number of words used by the UTF-16 string
1205 *
1206 */
1207size_t utf16_wsize(const uint16_t *ustr)
1208{
1209 size_t wsize = 0;
1210
1211 while (*ustr++ != 0)
1212 wsize++;
1213
1214 return wsize;
1215}
1216
1217/** Convert wide string to new string.
1218 *
1219 * Convert wide string @a src to string. Space for the new string is allocated
1220 * on the heap.
1221 *
1222 * @param src Source wide string.
1223 * @return New string.
1224 */
1225char *wstr_to_astr(const char32_t *src)
1226{
1227 char dbuf[STR_BOUNDS(1)];
1228 char *str;
1229 char32_t ch;
1230
1231 size_t src_idx;
1232 size_t dest_off;
1233 size_t dest_size;
1234
1235 /* Compute size of encoded string. */
1236
1237 src_idx = 0;
1238 dest_size = 0;
1239
1240 while ((ch = src[src_idx++]) != 0) {
1241 dest_off = 0;
1242 if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
1243 break;
1244 dest_size += dest_off;
1245 }
1246
1247 str = malloc(dest_size + 1);
1248 if (str == NULL)
1249 return NULL;
1250
1251 /* Encode string. */
1252
1253 src_idx = 0;
1254 dest_off = 0;
1255
1256 while ((ch = src[src_idx++]) != 0) {
1257 if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
1258 break;
1259 }
1260
1261 str[dest_size] = '\0';
1262 return str;
1263}
1264
1265/** Convert string to wide string.
1266 *
1267 * Convert string @a src to wide string. The output is written to the
1268 * buffer specified by @a dest and @a dlen. @a dlen must be non-zero
1269 * and the wide string written will always be null-terminated.
1270 *
1271 * @param dest Destination buffer.
1272 * @param dlen Length of destination buffer (number of wchars).
1273 * @param src Source string.
1274 */
1275void str_to_wstr(char32_t *dest, size_t dlen, const char *src)
1276{
1277 size_t offset;
1278 size_t di;
1279 char32_t c;
1280
1281 assert(dlen > 0);
1282
1283 offset = 0;
1284 di = 0;
1285
1286 do {
1287 if (di >= dlen - 1)
1288 break;
1289
1290 c = str_decode(src, &offset, STR_NO_LIMIT);
1291 dest[di++] = c;
1292 } while (c != '\0');
1293
1294 dest[dlen - 1] = '\0';
1295}
1296
1297/** Convert string to wide string.
1298 *
1299 * Convert string @a src to wide string. A new wide NULL-terminated
1300 * string will be allocated on the heap.
1301 *
1302 * @param src Source string.
1303 */
1304char32_t *str_to_awstr(const char *str)
1305{
1306 size_t len = str_length(str);
1307
1308 char32_t *wstr = calloc(len + 1, sizeof(char32_t));
1309 if (wstr == NULL)
1310 return NULL;
1311
1312 str_to_wstr(wstr, len + 1, str);
1313 return wstr;
1314}
1315
1316static char *_strchr(const char *str, char c)
1317{
1318 while (*str != 0 && *str != c)
1319 str++;
1320
1321 return (*str == c) ? (char *) str : NULL;
1322}
1323
1324/** Find first occurence of character in string.
1325 *
1326 * @param str String to search.
1327 * @param ch Character to look for.
1328 *
1329 * @return Pointer to character in @a str or NULL if not found.
1330 */
1331char *str_chr(const char *str, char32_t ch)
1332{
1333 /* Fast path for an ASCII character. */
1334 if (ascii_check(ch))
1335 return _strchr(str, ch);
1336
1337 /* Convert character to UTF-8. */
1338 char utf8[STR_BOUNDS(1) + 1];
1339 size_t offset = 0;
1340
1341 if (chr_encode(ch, utf8, &offset, sizeof(utf8)) != EOK || offset == 0)
1342 return NULL;
1343
1344 utf8[offset] = '\0';
1345
1346 /* Find the first byte, then check if all of them are correct. */
1347 while (*str != 0) {
1348 str = _strchr(str, utf8[0]);
1349 if (!str)
1350 return NULL;
1351
1352 if (_test_prefix(str, utf8))
1353 return (char *) str;
1354
1355 str++;
1356 }
1357
1358 return NULL;
1359}
1360
1361/** Find first occurence of substring in string.
1362 *
1363 * @param hs Haystack (string)
1364 * @param n Needle (substring to look for)
1365 *
1366 * @return Pointer to character in @a hs or @c NULL if not found.
1367 */
1368char *str_str(const char *hs, const char *n)
1369{
1370 size_t hsize = _str_size(hs);
1371 size_t nsize = _str_size(n);
1372
1373 while (hsize >= nsize) {
1374 if (_test_prefix(hs, n))
1375 return (char *) hs;
1376
1377 hs++;
1378 hsize--;
1379 }
1380
1381 return NULL;
1382}
1383
1384static void _str_rtrim(char *str, char c)
1385{
1386 char *last = str;
1387
1388 while (*str) {
1389 if (*str != c)
1390 last = str;
1391
1392 str++;
1393 }
1394
1395 /* Truncate string. */
1396 last[1] = 0;
1397}
1398
1399/** Removes specified trailing characters from a string.
1400 *
1401 * @param str String to remove from.
1402 * @param ch Character to remove.
1403 */
1404void str_rtrim(char *str, char32_t ch)
1405{
1406 /* Fast path for the ASCII case. */
1407 if (ascii_check(ch)) {
1408 _str_rtrim(str, ch);
1409 return;
1410 }
1411
1412 size_t off = 0;
1413 size_t pos = 0;
1414 char32_t c;
1415 bool update_last_chunk = true;
1416 char *last_chunk = NULL;
1417
1418 while ((c = str_decode(str, &off, STR_NO_LIMIT))) {
1419 if (c != ch) {
1420 update_last_chunk = true;
1421 last_chunk = NULL;
1422 } else if (update_last_chunk) {
1423 update_last_chunk = false;
1424 last_chunk = (str + pos);
1425 }
1426 pos = off;
1427 }
1428
1429 if (last_chunk)
1430 *last_chunk = '\0';
1431}
1432
1433static void _str_ltrim(char *str, char c)
1434{
1435 char *p = str;
1436
1437 while (*p == c)
1438 p++;
1439
1440 if (str != p)
1441 _str_cpy(str, p);
1442}
1443
1444/** Removes specified leading characters from a string.
1445 *
1446 * @param str String to remove from.
1447 * @param ch Character to remove.
1448 */
1449void str_ltrim(char *str, char32_t ch)
1450{
1451 /* Fast path for the ASCII case. */
1452 if (ascii_check(ch)) {
1453 _str_ltrim(str, ch);
1454 return;
1455 }
1456
1457 char32_t acc;
1458 size_t off = 0;
1459 size_t pos = 0;
1460 size_t str_sz = str_size(str);
1461
1462 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
1463 if (acc != ch)
1464 break;
1465 else
1466 pos = off;
1467 }
1468
1469 if (pos > 0) {
1470 memmove(str, &str[pos], str_sz - pos);
1471 pos = str_sz - pos;
1472 str[pos] = '\0';
1473 }
1474}
1475
1476static char *_str_rchr(const char *str, char c)
1477{
1478 const char *last = NULL;
1479
1480 while (*str) {
1481 if (*str == c)
1482 last = str;
1483
1484 str++;
1485 }
1486
1487 return (char *) last;
1488}
1489
1490/** Find last occurence of character in string.
1491 *
1492 * @param str String to search.
1493 * @param ch Character to look for.
1494 *
1495 * @return Pointer to character in @a str or NULL if not found.
1496 */
1497char *str_rchr(const char *str, char32_t ch)
1498{
1499 if (ascii_check(ch))
1500 return _str_rchr(str, ch);
1501
1502 char32_t acc;
1503 size_t off = 0;
1504 size_t last = 0;
1505 const char *res = NULL;
1506
1507 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
1508 if (acc == ch)
1509 res = (str + last);
1510 last = off;
1511 }
1512
1513 return (char *) res;
1514}
1515
1516/** Insert a wide character into a wide string.
1517 *
1518 * Insert a wide character into a wide string at position
1519 * @a pos. The characters after the position are shifted.
1520 *
1521 * @param str String to insert to.
1522 * @param ch Character to insert to.
1523 * @param pos Character index where to insert.
1524 * @param max_pos Characters in the buffer.
1525 *
1526 * @return True if the insertion was sucessful, false if the position
1527 * is out of bounds.
1528 *
1529 */
1530bool wstr_linsert(char32_t *str, char32_t ch, size_t pos, size_t max_pos)
1531{
1532 size_t len = wstr_length(str);
1533
1534 if ((pos > len) || (pos + 1 > max_pos))
1535 return false;
1536
1537 size_t i;
1538 for (i = len; i + 1 > pos; i--)
1539 str[i + 1] = str[i];
1540
1541 str[pos] = ch;
1542
1543 return true;
1544}
1545
1546/** Remove a wide character from a wide string.
1547 *
1548 * Remove a wide character from a wide string at position
1549 * @a pos. The characters after the position are shifted.
1550 *
1551 * @param str String to remove from.
1552 * @param pos Character index to remove.
1553 *
1554 * @return True if the removal was sucessful, false if the position
1555 * is out of bounds.
1556 *
1557 */
1558bool wstr_remove(char32_t *str, size_t pos)
1559{
1560 size_t len = wstr_length(str);
1561
1562 if (pos >= len)
1563 return false;
1564
1565 size_t i;
1566 for (i = pos + 1; i <= len; i++)
1567 str[i - 1] = str[i];
1568
1569 return true;
1570}
1571
1572/** Duplicate string.
1573 *
1574 * Allocate a new string and copy characters from the source
1575 * string into it. The duplicate string is allocated via sleeping
1576 * malloc(), thus this function can sleep in no memory conditions.
1577 *
1578 * The allocation cannot fail and the return value is always
1579 * a valid pointer. The duplicate string is always a well-formed
1580 * null-terminated UTF-8 string, but it can differ from the source
1581 * string on the byte level.
1582 *
1583 * @param src Source string.
1584 *
1585 * @return Duplicate string.
1586 *
1587 */
1588char *str_dup(const char *src)
1589{
1590 size_t size = _str_size(src) + 1;
1591 char *dest = malloc(size);
1592 if (!dest)
1593 return NULL;
1594
1595 memcpy(dest, src, size);
1596 _sanitize_string(dest, size);
1597 return dest;
1598}
1599
1600/** Duplicate string with size limit.
1601 *
1602 * Allocate a new string and copy up to @max_size bytes from the source
1603 * string into it. The duplicate string is allocated via sleeping
1604 * malloc(), thus this function can sleep in no memory conditions.
1605 * No more than @max_size + 1 bytes is allocated, but if the size
1606 * occupied by the source string is smaller than @max_size + 1,
1607 * less is allocated.
1608 *
1609 * The allocation cannot fail and the return value is always
1610 * a valid pointer. The duplicate string is always a well-formed
1611 * null-terminated UTF-8 string, but it can differ from the source
1612 * string on the byte level.
1613 *
1614 * @param src Source string.
1615 * @param n Maximum number of bytes to duplicate.
1616 *
1617 * @return Duplicate string.
1618 *
1619 */
1620char *str_ndup(const char *src, size_t n)
1621{
1622 size_t size = _str_nsize(src, n);
1623
1624 char *dest = malloc(size + 1);
1625 if (!dest)
1626 return NULL;
1627
1628 memcpy(dest, src, size);
1629 _sanitize_string(dest, size);
1630 dest[size] = 0;
1631 return dest;
1632}
1633
1634/** Split string by delimiters.
1635 *
1636 * @param s String to be tokenized. May not be NULL.
1637 * @param delim String with the delimiters.
1638 * @param next Variable which will receive the pointer to the
1639 * continuation of the string following the first
1640 * occurrence of any of the delimiter characters.
1641 * May be NULL.
1642 * @return Pointer to the prefix of @a s before the first
1643 * delimiter character. NULL if no such prefix
1644 * exists.
1645 */
1646char *str_tok(char *s, const char *delim, char **next)
1647{
1648 char *start, *end;
1649
1650 if (!s)
1651 return NULL;
1652
1653 size_t len = str_size(s);
1654 size_t cur;
1655 size_t tmp;
1656 char32_t ch;
1657
1658 /* Skip over leading delimiters. */
1659 tmp = 0;
1660 cur = 0;
1661 while ((ch = str_decode(s, &tmp, len)) && str_chr(delim, ch))
1662 cur = tmp;
1663 start = &s[cur];
1664
1665 /* Skip over token characters. */
1666 tmp = cur;
1667 while ((ch = str_decode(s, &tmp, len)) && !str_chr(delim, ch))
1668 cur = tmp;
1669 end = &s[cur];
1670 if (next)
1671 *next = (ch ? &s[tmp] : &s[cur]);
1672
1673 if (start == end)
1674 return NULL; /* No more tokens. */
1675
1676 /* Overwrite delimiter with NULL terminator. */
1677 *end = '\0';
1678 return start;
1679}
1680
1681void order_suffix(const uint64_t val, uint64_t *rv, char *suffix)
1682{
1683 if (val > UINT64_C(10000000000000000000)) {
1684 *rv = val / UINT64_C(1000000000000000000);
1685 *suffix = 'Z';
1686 } else if (val > UINT64_C(1000000000000000000)) {
1687 *rv = val / UINT64_C(1000000000000000);
1688 *suffix = 'E';
1689 } else if (val > UINT64_C(1000000000000000)) {
1690 *rv = val / UINT64_C(1000000000000);
1691 *suffix = 'T';
1692 } else if (val > UINT64_C(1000000000000)) {
1693 *rv = val / UINT64_C(1000000000);
1694 *suffix = 'G';
1695 } else if (val > UINT64_C(1000000000)) {
1696 *rv = val / UINT64_C(1000000);
1697 *suffix = 'M';
1698 } else if (val > UINT64_C(1000000)) {
1699 *rv = val / UINT64_C(1000);
1700 *suffix = 'k';
1701 } else {
1702 *rv = val;
1703 *suffix = ' ';
1704 }
1705}
1706
1707void bin_order_suffix(const uint64_t val, uint64_t *rv, const char **suffix,
1708 bool fixed)
1709{
1710 if (val > UINT64_C(1152921504606846976)) {
1711 *rv = val / UINT64_C(1125899906842624);
1712 *suffix = "EiB";
1713 } else if (val > UINT64_C(1125899906842624)) {
1714 *rv = val / UINT64_C(1099511627776);
1715 *suffix = "TiB";
1716 } else if (val > UINT64_C(1099511627776)) {
1717 *rv = val / UINT64_C(1073741824);
1718 *suffix = "GiB";
1719 } else if (val > UINT64_C(1073741824)) {
1720 *rv = val / UINT64_C(1048576);
1721 *suffix = "MiB";
1722 } else if (val > UINT64_C(1048576)) {
1723 *rv = val / UINT64_C(1024);
1724 *suffix = "KiB";
1725 } else {
1726 *rv = val;
1727 if (fixed)
1728 *suffix = "B ";
1729 else
1730 *suffix = "B";
1731 }
1732}
1733
1734/** @}
1735 */
Note: See TracBrowser for help on using the repository browser.