source: mainline/uspace/lib/c/generic/str.c@ 08e103d4

Last change on this file since 08e103d4 was 08e103d4, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 6 years ago

Use clearer naming for string length functions

This and the following commit change the names of functions, as well as
their documentation, to use unambiguous terms "bytes" and "code points"
instead of ambiguous terms "size", "length", and "characters".

  • Property mode set to 100644
File size: 45.0 KB
Line 
1/*
2 * Copyright (c) 2001-2004 Jakub Jermar
3 * Copyright (c) 2005 Martin Decky
4 * Copyright (c) 2008 Jiri Svoboda
5 * Copyright (c) 2011 Martin Sucha
6 * Copyright (c) 2011 Oleg Romanenko
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * - Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * - Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * - The name of the author may not be used to endorse or promote products
19 * derived from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33/** @addtogroup libc
34 * @{
35 */
36
37/**
38 * @file
39 * @brief String functions.
40 *
41 * Strings and characters use the Universal Character Set (UCS). The standard
42 * strings, called just strings are encoded in UTF-8. Wide strings (encoded
43 * in UTF-32) are supported to a limited degree. A single character is
44 * represented as wchar_t.@n
45 *
46 * Overview of the terminology:@n
47 *
48 * Term Meaning
49 * -------------------- ----------------------------------------------------
50 * byte 8 bits stored in uint8_t (unsigned 8 bit integer)
51 *
52 * character UTF-32 encoded Unicode character, stored in wchar_t
53 * (signed 32 bit integer), code points 0 .. 1114111
54 * are valid
55 *
56 * ASCII character 7 bit encoded ASCII character, stored in char
57 * (usually signed 8 bit integer), code points 0 .. 127
58 * are valid
59 *
60 * string UTF-8 encoded NULL-terminated Unicode string, char *
61 *
62 * wide string UTF-32 encoded NULL-terminated Unicode string,
63 * wchar_t *
64 *
65 * [wide] string size number of BYTES in a [wide] string (excluding
66 * the NULL-terminator), size_t
67 *
68 * [wide] string length number of CHARACTERS in a [wide] string (excluding
69 * the NULL-terminator), size_t
70 *
71 * [wide] string width number of display cells on a monospace display taken
72 * by a [wide] string, size_t
73 *
74 *
75 * Overview of string metrics:@n
76 *
77 * Metric Abbrev. Type Meaning
78 * ------ ------ ------ -------------------------------------------------
79 * size n size_t number of BYTES in a string (excluding the
80 * NULL-terminator)
81 *
82 * length l size_t number of CHARACTERS in a string (excluding the
83 * null terminator)
84 *
85 * width w size_t number of display cells on a monospace display
86 * taken by a string
87 *
88 *
89 * Function naming prefixes:@n
90 *
91 * chr_ operate on characters
92 * ascii_ operate on ASCII characters
93 * str_ operate on strings
94 * wstr_ operate on wide strings
95 *
96 * [w]str_[n|l|w] operate on a prefix limited by size, length
97 * or width
98 *
99 *
100 * A specific character inside a [wide] string can be referred to by:@n
101 *
102 * pointer (char *, wchar_t *)
103 * byte offset (size_t)
104 * character index (size_t)
105 *
106 */
107
108#include <str.h>
109
110#include <assert.h>
111#include <ctype.h>
112#include <errno.h>
113#include <stdbool.h>
114#include <stddef.h>
115#include <stdint.h>
116#include <stdlib.h>
117
118#include <align.h>
119#include <mem.h>
120
121/** Check the condition if wchar_t is signed */
122#ifdef __WCHAR_UNSIGNED__
123#define WCHAR_SIGNED_CHECK(cond) (true)
124#else
125#define WCHAR_SIGNED_CHECK(cond) (cond)
126#endif
127
128/** Byte mask consisting of lowest @n bits (out of 8) */
129#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
130
131/** Byte mask consisting of lowest @n bits (out of 32) */
132#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
133
134/** Byte mask consisting of highest @n bits (out of 8) */
135#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
136
137/** Number of data bits in a UTF-8 continuation byte */
138#define CONT_BITS 6
139
140/** Decode a single character from a string.
141 *
142 * Decode a single character from a string of size @a size. Decoding starts
143 * at @a offset and this offset is moved to the beginning of the next
144 * character. In case of decoding error, offset generally advances at least
145 * by one. However, offset is never moved beyond size.
146 *
147 * @param str String (not necessarily NULL-terminated).
148 * @param offset Byte offset in string where to start decoding.
149 * @param size Size of the string (in bytes).
150 *
151 * @return Value of decoded character, U_SPECIAL on decoding error or
152 * NULL if attempt to decode beyond @a size.
153 *
154 */
155wchar_t str_decode(const char *str, size_t *offset, size_t size)
156{
157 if (*offset + 1 > size)
158 return 0;
159
160 /* First byte read from string */
161 uint8_t b0 = (uint8_t) str[(*offset)++];
162
163 /* Determine code length */
164
165 unsigned int b0_bits; /* Data bits in first byte */
166 unsigned int cbytes; /* Number of continuation bytes */
167
168 if ((b0 & 0x80) == 0) {
169 /* 0xxxxxxx (Plain ASCII) */
170 b0_bits = 7;
171 cbytes = 0;
172 } else if ((b0 & 0xe0) == 0xc0) {
173 /* 110xxxxx 10xxxxxx */
174 b0_bits = 5;
175 cbytes = 1;
176 } else if ((b0 & 0xf0) == 0xe0) {
177 /* 1110xxxx 10xxxxxx 10xxxxxx */
178 b0_bits = 4;
179 cbytes = 2;
180 } else if ((b0 & 0xf8) == 0xf0) {
181 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
182 b0_bits = 3;
183 cbytes = 3;
184 } else {
185 /* 10xxxxxx -- unexpected continuation byte */
186 return U_SPECIAL;
187 }
188
189 if (*offset + cbytes > size)
190 return U_SPECIAL;
191
192 wchar_t ch = b0 & LO_MASK_8(b0_bits);
193
194 /* Decode continuation bytes */
195 while (cbytes > 0) {
196 uint8_t b = (uint8_t) str[(*offset)++];
197
198 /* Must be 10xxxxxx */
199 if ((b & 0xc0) != 0x80)
200 return U_SPECIAL;
201
202 /* Shift data bits to ch */
203 ch = (ch << CONT_BITS) | (wchar_t) (b & LO_MASK_8(CONT_BITS));
204 cbytes--;
205 }
206
207 return ch;
208}
209
210/** Decode a single character from a string to the left.
211 *
212 * Decode a single character from a string of size @a size. Decoding starts
213 * at @a offset and this offset is moved to the beginning of the previous
214 * character. In case of decoding error, offset generally decreases at least
215 * by one. However, offset is never moved before 0.
216 *
217 * @param str String (not necessarily NULL-terminated).
218 * @param offset Byte offset in string where to start decoding.
219 * @param size Size of the string (in bytes).
220 *
221 * @return Value of decoded character, U_SPECIAL on decoding error or
222 * NULL if attempt to decode beyond @a start of str.
223 *
224 */
225wchar_t str_decode_reverse(const char *str, size_t *offset, size_t size)
226{
227 if (*offset == 0)
228 return 0;
229
230 size_t processed = 0;
231 /* Continue while continuation bytes found */
232 while (*offset > 0 && processed < 4) {
233 uint8_t b = (uint8_t) str[--(*offset)];
234
235 if (processed == 0 && (b & 0x80) == 0) {
236 /* 0xxxxxxx (Plain ASCII) */
237 return b & 0x7f;
238 } else if ((b & 0xe0) == 0xc0 || (b & 0xf0) == 0xe0 ||
239 (b & 0xf8) == 0xf0) {
240 /* Start byte */
241 size_t start_offset = *offset;
242 return str_decode(str, &start_offset, size);
243 } else if ((b & 0xc0) != 0x80) {
244 /* Not a continuation byte */
245 return U_SPECIAL;
246 }
247 processed++;
248 }
249 /* Too many continuation bytes */
250 return U_SPECIAL;
251}
252
253/** Encode a single character to string representation.
254 *
255 * Encode a single character to string representation (i.e. UTF-8) and store
256 * it into a buffer at @a offset. Encoding starts at @a offset and this offset
257 * is moved to the position where the next character can be written to.
258 *
259 * @param ch Input character.
260 * @param str Output buffer.
261 * @param offset Byte offset where to start writing.
262 * @param size Size of the output buffer (in bytes).
263 *
264 * @return EOK if the character was encoded successfully, EOVERFLOW if there
265 * was not enough space in the output buffer or EINVAL if the character
266 * code was invalid.
267 */
268errno_t chr_encode(const wchar_t ch, char *str, size_t *offset, size_t size)
269{
270 if (*offset >= size)
271 return EOVERFLOW;
272
273 if (!chr_check(ch))
274 return EINVAL;
275
276 /*
277 * Unsigned version of ch (bit operations should only be done
278 * on unsigned types).
279 */
280 uint32_t cc = (uint32_t) ch;
281
282 /* Determine how many continuation bytes are needed */
283
284 unsigned int b0_bits; /* Data bits in first byte */
285 unsigned int cbytes; /* Number of continuation bytes */
286
287 if ((cc & ~LO_MASK_32(7)) == 0) {
288 b0_bits = 7;
289 cbytes = 0;
290 } else if ((cc & ~LO_MASK_32(11)) == 0) {
291 b0_bits = 5;
292 cbytes = 1;
293 } else if ((cc & ~LO_MASK_32(16)) == 0) {
294 b0_bits = 4;
295 cbytes = 2;
296 } else if ((cc & ~LO_MASK_32(21)) == 0) {
297 b0_bits = 3;
298 cbytes = 3;
299 } else {
300 /* Codes longer than 21 bits are not supported */
301 return EINVAL;
302 }
303
304 /* Check for available space in buffer */
305 if (*offset + cbytes >= size)
306 return EOVERFLOW;
307
308 /* Encode continuation bytes */
309 unsigned int i;
310 for (i = cbytes; i > 0; i--) {
311 str[*offset + i] = 0x80 | (cc & LO_MASK_32(CONT_BITS));
312 cc = cc >> CONT_BITS;
313 }
314
315 /* Encode first byte */
316 str[*offset] = (cc & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1);
317
318 /* Advance offset */
319 *offset += cbytes + 1;
320
321 return EOK;
322}
323
324/** Get size of string.
325 *
326 * Get the number of bytes which are used by the string @a str (excluding the
327 * NULL-terminator).
328 *
329 * @param str String to consider.
330 *
331 * @return Number of bytes used by the string
332 *
333 */
334size_t str_bytes(const char *str)
335{
336 size_t size = 0;
337
338 while (*str++ != 0)
339 size++;
340
341 return size;
342}
343
344/** Get size of wide string.
345 *
346 * Get the number of bytes which are used by the wide string @a str (excluding the
347 * NULL-terminator).
348 *
349 * @param str Wide string to consider.
350 *
351 * @return Number of bytes used by the wide string
352 *
353 */
354size_t wstr_bytes(const wchar_t *str)
355{
356 return (wstr_code_points(str) * sizeof(wchar_t));
357}
358
359/** Get size of string with length limit.
360 *
361 * Get the number of bytes which are used by up to @a max_len first
362 * characters in the string @a str. If @a max_len is greater than
363 * the length of @a str, the entire string is measured (excluding the
364 * NULL-terminator).
365 *
366 * @param str String to consider.
367 * @param max_len Maximum number of characters to measure.
368 *
369 * @return Number of bytes used by the characters.
370 *
371 */
372size_t str_lbytes(const char *str, size_t max_len)
373{
374 size_t len = 0;
375 size_t offset = 0;
376
377 while (len < max_len) {
378 if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
379 break;
380
381 len++;
382 }
383
384 return offset;
385}
386
387/** Get size of string with size limit.
388 *
389 * Get the number of bytes which are used by the string @a str
390 * (excluding the NULL-terminator), but no more than @max_size bytes.
391 *
392 * @param str String to consider.
393 * @param max_size Maximum number of bytes to measure.
394 *
395 * @return Number of bytes used by the string
396 *
397 */
398size_t str_nbytes(const char *str, size_t max_size)
399{
400 size_t size = 0;
401
402 while ((*str++ != 0) && (size < max_size))
403 size++;
404
405 return size;
406}
407
408/** Get size of wide string with size limit.
409 *
410 * Get the number of bytes which are used by the wide string @a str
411 * (excluding the NULL-terminator), but no more than @max_size bytes.
412 *
413 * @param str Wide string to consider.
414 * @param max_size Maximum number of bytes to measure.
415 *
416 * @return Number of bytes used by the wide string
417 *
418 */
419size_t wstr_nbytes(const wchar_t *str, size_t max_size)
420{
421 return (wstr_ncode_points(str, max_size) * sizeof(wchar_t));
422}
423
424/** Get size of wide string with length limit.
425 *
426 * Get the number of bytes which are used by up to @a max_len first
427 * wide characters in the wide string @a str. If @a max_len is greater than
428 * the length of @a str, the entire wide string is measured (excluding the
429 * NULL-terminator).
430 *
431 * @param str Wide string to consider.
432 * @param max_len Maximum number of wide characters to measure.
433 *
434 * @return Number of bytes used by the wide characters.
435 *
436 */
437size_t wstr_lbytes(const wchar_t *str, size_t max_len)
438{
439 return (wstr_ncode_points(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
440}
441
442/** Get number of characters in a string.
443 *
444 * @param str NULL-terminated string.
445 *
446 * @return Number of characters in string.
447 *
448 */
449size_t str_code_points(const char *str)
450{
451 size_t len = 0;
452 size_t offset = 0;
453
454 while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
455 len++;
456
457 return len;
458}
459
460/** Get number of characters in a wide string.
461 *
462 * @param str NULL-terminated wide string.
463 *
464 * @return Number of characters in @a str.
465 *
466 */
467size_t wstr_code_points(const wchar_t *wstr)
468{
469 size_t len = 0;
470
471 while (*wstr++ != 0)
472 len++;
473
474 return len;
475}
476
477/** Get number of characters in a string with size limit.
478 *
479 * @param str NULL-terminated string.
480 * @param size Maximum number of bytes to consider.
481 *
482 * @return Number of characters in string.
483 *
484 */
485size_t str_ncode_points(const char *str, size_t size)
486{
487 size_t len = 0;
488 size_t offset = 0;
489
490 while (str_decode(str, &offset, size) != 0)
491 len++;
492
493 return len;
494}
495
496/** Get number of characters in a string with size limit.
497 *
498 * @param str NULL-terminated string.
499 * @param size Maximum number of bytes to consider.
500 *
501 * @return Number of characters in string.
502 *
503 */
504size_t wstr_ncode_points(const wchar_t *str, size_t size)
505{
506 size_t len = 0;
507 size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
508 size_t offset = 0;
509
510 while ((offset < limit) && (*str++ != 0)) {
511 len++;
512 offset += sizeof(wchar_t);
513 }
514
515 return len;
516}
517
518/** Get character display width on a character cell display.
519 *
520 * @param ch Character
521 * @return Width of character in cells.
522 */
523size_t chr_width(wchar_t ch)
524{
525 return 1;
526}
527
528/** Get string display width on a character cell display.
529 *
530 * @param str String
531 * @return Width of string in cells.
532 */
533size_t str_width(const char *str)
534{
535 size_t width = 0;
536 size_t offset = 0;
537 wchar_t ch;
538
539 while ((ch = str_decode(str, &offset, STR_NO_LIMIT)) != 0)
540 width += chr_width(ch);
541
542 return width;
543}
544
545/** Check whether character is plain ASCII.
546 *
547 * @return True if character is plain ASCII.
548 *
549 */
550bool ascii_check(wchar_t ch)
551{
552 if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 127))
553 return true;
554
555 return false;
556}
557
558/** Check whether character is valid
559 *
560 * @return True if character is a valid Unicode code point.
561 *
562 */
563bool chr_check(wchar_t ch)
564{
565 if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 1114111))
566 return true;
567
568 return false;
569}
570
571/** Compare two NULL terminated strings.
572 *
573 * Do a char-by-char comparison of two NULL-terminated strings.
574 * The strings are considered equal iff their length is equal
575 * and both strings consist of the same sequence of characters.
576 *
577 * A string S1 is less than another string S2 if it has a character with
578 * lower value at the first character position where the strings differ.
579 * If the strings differ in length, the shorter one is treated as if
580 * padded by characters with a value of zero.
581 *
582 * @param s1 First string to compare.
583 * @param s2 Second string to compare.
584 *
585 * @return 0 if the strings are equal, -1 if the first is less than the second,
586 * 1 if the second is less than the first.
587 *
588 */
589int str_cmp(const char *s1, const char *s2)
590{
591 wchar_t c1 = 0;
592 wchar_t c2 = 0;
593
594 size_t off1 = 0;
595 size_t off2 = 0;
596
597 while (true) {
598 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
599 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
600
601 if (c1 < c2)
602 return -1;
603
604 if (c1 > c2)
605 return 1;
606
607 if (c1 == 0 || c2 == 0)
608 break;
609 }
610
611 return 0;
612}
613
614/** Compare two NULL terminated strings with length limit.
615 *
616 * Do a char-by-char comparison of two NULL-terminated strings.
617 * The strings are considered equal iff
618 * min(str_code_points(s1), max_len) == min(str_code_points(s2), max_len)
619 * and both strings consist of the same sequence of characters,
620 * up to max_len characters.
621 *
622 * A string S1 is less than another string S2 if it has a character with
623 * lower value at the first character position where the strings differ.
624 * If the strings differ in length, the shorter one is treated as if
625 * padded by characters with a value of zero. Only the first max_len
626 * characters are considered.
627 *
628 * @param s1 First string to compare.
629 * @param s2 Second string to compare.
630 * @param max_len Maximum number of characters to consider.
631 *
632 * @return 0 if the strings are equal, -1 if the first is less than the second,
633 * 1 if the second is less than the first.
634 *
635 */
636int str_lcmp(const char *s1, const char *s2, size_t max_len)
637{
638 wchar_t c1 = 0;
639 wchar_t c2 = 0;
640
641 size_t off1 = 0;
642 size_t off2 = 0;
643
644 size_t len = 0;
645
646 while (true) {
647 if (len >= max_len)
648 break;
649
650 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
651 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
652
653 if (c1 < c2)
654 return -1;
655
656 if (c1 > c2)
657 return 1;
658
659 if (c1 == 0 || c2 == 0)
660 break;
661
662 ++len;
663 }
664
665 return 0;
666
667}
668
669/** Compare two NULL terminated strings in case-insensitive manner.
670 *
671 * Do a char-by-char comparison of two NULL-terminated strings.
672 * The strings are considered equal iff their length is equal
673 * and both strings consist of the same sequence of characters
674 * when converted to lower case.
675 *
676 * A string S1 is less than another string S2 if it has a character with
677 * lower value at the first character position where the strings differ.
678 * If the strings differ in length, the shorter one is treated as if
679 * padded by characters with a value of zero.
680 *
681 * @param s1 First string to compare.
682 * @param s2 Second string to compare.
683 *
684 * @return 0 if the strings are equal, -1 if the first is less than the second,
685 * 1 if the second is less than the first.
686 *
687 */
688int str_casecmp(const char *s1, const char *s2)
689{
690 wchar_t c1 = 0;
691 wchar_t c2 = 0;
692
693 size_t off1 = 0;
694 size_t off2 = 0;
695
696 while (true) {
697 c1 = tolower(str_decode(s1, &off1, STR_NO_LIMIT));
698 c2 = tolower(str_decode(s2, &off2, STR_NO_LIMIT));
699
700 if (c1 < c2)
701 return -1;
702
703 if (c1 > c2)
704 return 1;
705
706 if (c1 == 0 || c2 == 0)
707 break;
708 }
709
710 return 0;
711}
712
713/** Compare two NULL terminated strings with length limit in case-insensitive
714 * manner.
715 *
716 * Do a char-by-char comparison of two NULL-terminated strings.
717 * The strings are considered equal iff
718 * min(str_code_points(s1), max_len) == min(str_code_points(s2), max_len)
719 * and both strings consist of the same sequence of characters,
720 * up to max_len characters.
721 *
722 * A string S1 is less than another string S2 if it has a character with
723 * lower value at the first character position where the strings differ.
724 * If the strings differ in length, the shorter one is treated as if
725 * padded by characters with a value of zero. Only the first max_len
726 * characters are considered.
727 *
728 * @param s1 First string to compare.
729 * @param s2 Second string to compare.
730 * @param max_len Maximum number of characters to consider.
731 *
732 * @return 0 if the strings are equal, -1 if the first is less than the second,
733 * 1 if the second is less than the first.
734 *
735 */
736int str_lcasecmp(const char *s1, const char *s2, size_t max_len)
737{
738 wchar_t c1 = 0;
739 wchar_t c2 = 0;
740
741 size_t off1 = 0;
742 size_t off2 = 0;
743
744 size_t len = 0;
745
746 while (true) {
747 if (len >= max_len)
748 break;
749
750 c1 = tolower(str_decode(s1, &off1, STR_NO_LIMIT));
751 c2 = tolower(str_decode(s2, &off2, STR_NO_LIMIT));
752
753 if (c1 < c2)
754 return -1;
755
756 if (c1 > c2)
757 return 1;
758
759 if (c1 == 0 || c2 == 0)
760 break;
761
762 ++len;
763 }
764
765 return 0;
766
767}
768
769/** Test whether p is a prefix of s.
770 *
771 * Do a char-by-char comparison of two NULL-terminated strings
772 * and determine if p is a prefix of s.
773 *
774 * @param s The string in which to look
775 * @param p The string to check if it is a prefix of s
776 *
777 * @return true iff p is prefix of s else false
778 *
779 */
780bool str_test_prefix(const char *s, const char *p)
781{
782 wchar_t c1 = 0;
783 wchar_t c2 = 0;
784
785 size_t off1 = 0;
786 size_t off2 = 0;
787
788 while (true) {
789 c1 = str_decode(s, &off1, STR_NO_LIMIT);
790 c2 = str_decode(p, &off2, STR_NO_LIMIT);
791
792 if (c2 == 0)
793 return true;
794
795 if (c1 != c2)
796 return false;
797
798 if (c1 == 0)
799 break;
800 }
801
802 return false;
803}
804
805/** Copy string.
806 *
807 * Copy source string @a src to destination buffer @a dest.
808 * No more than @a size bytes are written. If the size of the output buffer
809 * is at least one byte, the output string will always be well-formed, i.e.
810 * null-terminated and containing only complete characters.
811 *
812 * @param dest Destination buffer.
813 * @param count Size of the destination buffer (must be > 0).
814 * @param src Source string.
815 *
816 */
817void str_cpy(char *dest, size_t size, const char *src)
818{
819 /* There must be space for a null terminator in the buffer. */
820 assert(size > 0);
821 assert(src != NULL);
822
823 size_t src_off = 0;
824 size_t dest_off = 0;
825
826 wchar_t ch;
827 while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
828 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
829 break;
830 }
831
832 dest[dest_off] = '\0';
833}
834
835/** Copy size-limited substring.
836 *
837 * Copy prefix of string @a src of max. size @a size to destination buffer
838 * @a dest. No more than @a size bytes are written. The output string will
839 * always be well-formed, i.e. null-terminated and containing only complete
840 * characters.
841 *
842 * No more than @a n bytes are read from the input string, so it does not
843 * have to be null-terminated.
844 *
845 * @param dest Destination buffer.
846 * @param count Size of the destination buffer (must be > 0).
847 * @param src Source string.
848 * @param n Maximum number of bytes to read from @a src.
849 *
850 */
851void str_ncpy(char *dest, size_t size, const char *src, size_t n)
852{
853 /* There must be space for a null terminator in the buffer. */
854 assert(size > 0);
855
856 size_t src_off = 0;
857 size_t dest_off = 0;
858
859 wchar_t ch;
860 while ((ch = str_decode(src, &src_off, n)) != 0) {
861 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
862 break;
863 }
864
865 dest[dest_off] = '\0';
866}
867
868/** Append one string to another.
869 *
870 * Append source string @a src to string in destination buffer @a dest.
871 * Size of the destination buffer is @a dest. If the size of the output buffer
872 * is at least one byte, the output string will always be well-formed, i.e.
873 * null-terminated and containing only complete characters.
874 *
875 * @param dest Destination buffer.
876 * @param count Size of the destination buffer.
877 * @param src Source string.
878 */
879void str_append(char *dest, size_t size, const char *src)
880{
881 size_t dstr_bytes;
882
883 dstr_bytes = str_bytes(dest);
884 if (dstr_bytes >= size)
885 return;
886
887 str_cpy(dest + dstr_bytes, size - dstr_bytes, src);
888}
889
890/** Convert space-padded ASCII to string.
891 *
892 * Common legacy text encoding in hardware is 7-bit ASCII fitted into
893 * a fixed-width byte buffer (bit 7 always zero), right-padded with spaces
894 * (ASCII 0x20). Convert space-padded ascii to string representation.
895 *
896 * If the text does not fit into the destination buffer, the function converts
897 * as many characters as possible and returns EOVERFLOW.
898 *
899 * If the text contains non-ASCII bytes (with bit 7 set), the whole string is
900 * converted anyway and invalid characters are replaced with question marks
901 * (U_SPECIAL) and the function returns EIO.
902 *
903 * Regardless of return value upon return @a dest will always be well-formed.
904 *
905 * @param dest Destination buffer
906 * @param size Size of destination buffer
907 * @param src Space-padded ASCII.
908 * @param n Size of the source buffer in bytes.
909 *
910 * @return EOK on success, EOVERFLOW if the text does not fit
911 * destination buffer, EIO if the text contains
912 * non-ASCII bytes.
913 */
914errno_t spascii_to_str(char *dest, size_t size, const uint8_t *src, size_t n)
915{
916 size_t sidx;
917 size_t didx;
918 size_t dlast;
919 uint8_t byte;
920 errno_t rc;
921 errno_t result;
922
923 /* There must be space for a null terminator in the buffer. */
924 assert(size > 0);
925 result = EOK;
926
927 didx = 0;
928 dlast = 0;
929 for (sidx = 0; sidx < n; ++sidx) {
930 byte = src[sidx];
931 if (!ascii_check(byte)) {
932 byte = U_SPECIAL;
933 result = EIO;
934 }
935
936 rc = chr_encode(byte, dest, &didx, size - 1);
937 if (rc != EOK) {
938 assert(rc == EOVERFLOW);
939 dest[didx] = '\0';
940 return rc;
941 }
942
943 /* Remember dest index after last non-empty character */
944 if (byte != 0x20)
945 dlast = didx;
946 }
947
948 /* Terminate string after last non-empty character */
949 dest[dlast] = '\0';
950 return result;
951}
952
953/** Convert wide string to string.
954 *
955 * Convert wide string @a src to string. The output is written to the buffer
956 * specified by @a dest and @a size. @a size must be non-zero and the string
957 * written will always be well-formed.
958 *
959 * @param dest Destination buffer.
960 * @param size Size of the destination buffer.
961 * @param src Source wide string.
962 */
963void wstr_to_str(char *dest, size_t size, const wchar_t *src)
964{
965 wchar_t ch;
966 size_t src_idx;
967 size_t dest_off;
968
969 /* There must be space for a null terminator in the buffer. */
970 assert(size > 0);
971
972 src_idx = 0;
973 dest_off = 0;
974
975 while ((ch = src[src_idx++]) != 0) {
976 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
977 break;
978 }
979
980 dest[dest_off] = '\0';
981}
982
983/** Convert UTF16 string to string.
984 *
985 * Convert utf16 string @a src to string. The output is written to the buffer
986 * specified by @a dest and @a size. @a size must be non-zero and the string
987 * written will always be well-formed. Surrogate pairs also supported.
988 *
989 * @param dest Destination buffer.
990 * @param size Size of the destination buffer.
991 * @param src Source utf16 string.
992 *
993 * @return EOK, if success, an error code otherwise.
994 */
995errno_t utf16_to_str(char *dest, size_t size, const uint16_t *src)
996{
997 size_t idx = 0, dest_off = 0;
998 wchar_t ch;
999 errno_t rc = EOK;
1000
1001 /* There must be space for a null terminator in the buffer. */
1002 assert(size > 0);
1003
1004 while (src[idx]) {
1005 if ((src[idx] & 0xfc00) == 0xd800) {
1006 if (src[idx + 1] && (src[idx + 1] & 0xfc00) == 0xdc00) {
1007 ch = 0x10000;
1008 ch += (src[idx] & 0x03FF) << 10;
1009 ch += (src[idx + 1] & 0x03FF);
1010 idx += 2;
1011 } else
1012 break;
1013 } else {
1014 ch = src[idx];
1015 idx++;
1016 }
1017 rc = chr_encode(ch, dest, &dest_off, size - 1);
1018 if (rc != EOK)
1019 break;
1020 }
1021 dest[dest_off] = '\0';
1022 return rc;
1023}
1024
1025/** Convert string to UTF16 string.
1026 *
1027 * Convert string @a src to utf16 string. The output is written to the buffer
1028 * specified by @a dest and @a dlen. @a dlen must be non-zero and the string
1029 * written will always be well-formed. Surrogate pairs also supported.
1030 *
1031 * @param dest Destination buffer.
1032 * @param dlen Number of utf16 characters that fit in the destination buffer.
1033 * @param src Source string.
1034 *
1035 * @return EOK, if success, an error code otherwise.
1036 */
1037errno_t str_to_utf16(uint16_t *dest, size_t dlen, const char *src)
1038{
1039 errno_t rc = EOK;
1040 size_t offset = 0;
1041 size_t idx = 0;
1042 wchar_t c;
1043
1044 assert(dlen > 0);
1045
1046 while ((c = str_decode(src, &offset, STR_NO_LIMIT)) != 0) {
1047 if (c > 0x10000) {
1048 if (idx + 2 >= dlen - 1) {
1049 rc = EOVERFLOW;
1050 break;
1051 }
1052 c = (c - 0x10000);
1053 dest[idx] = 0xD800 | (c >> 10);
1054 dest[idx + 1] = 0xDC00 | (c & 0x3FF);
1055 idx++;
1056 } else {
1057 dest[idx] = c;
1058 }
1059
1060 idx++;
1061 if (idx >= dlen - 1) {
1062 rc = EOVERFLOW;
1063 break;
1064 }
1065 }
1066
1067 dest[idx] = '\0';
1068 return rc;
1069}
1070
1071/** Get size of UTF-16 string.
1072 *
1073 * Get the number of words which are used by the UTF-16 string @a ustr
1074 * (excluding the NULL-terminator).
1075 *
1076 * @param ustr UTF-16 string to consider.
1077 *
1078 * @return Number of words used by the UTF-16 string
1079 *
1080 */
1081size_t utf16_wsize(const uint16_t *ustr)
1082{
1083 size_t wsize = 0;
1084
1085 while (*ustr++ != 0)
1086 wsize++;
1087
1088 return wsize;
1089}
1090
1091/** Convert wide string to new string.
1092 *
1093 * Convert wide string @a src to string. Space for the new string is allocated
1094 * on the heap.
1095 *
1096 * @param src Source wide string.
1097 * @return New string.
1098 */
1099char *wstr_to_astr(const wchar_t *src)
1100{
1101 char dbuf[STR_BOUNDS(1)];
1102 char *str;
1103 wchar_t ch;
1104
1105 size_t src_idx;
1106 size_t dest_off;
1107 size_t dest_size;
1108
1109 /* Compute size of encoded string. */
1110
1111 src_idx = 0;
1112 dest_size = 0;
1113
1114 while ((ch = src[src_idx++]) != 0) {
1115 dest_off = 0;
1116 if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
1117 break;
1118 dest_size += dest_off;
1119 }
1120
1121 str = malloc(dest_size + 1);
1122 if (str == NULL)
1123 return NULL;
1124
1125 /* Encode string. */
1126
1127 src_idx = 0;
1128 dest_off = 0;
1129
1130 while ((ch = src[src_idx++]) != 0) {
1131 if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
1132 break;
1133 }
1134
1135 str[dest_size] = '\0';
1136 return str;
1137}
1138
1139/** Convert string to wide string.
1140 *
1141 * Convert string @a src to wide string. The output is written to the
1142 * buffer specified by @a dest and @a dlen. @a dlen must be non-zero
1143 * and the wide string written will always be null-terminated.
1144 *
1145 * @param dest Destination buffer.
1146 * @param dlen Length of destination buffer (number of wchars).
1147 * @param src Source string.
1148 */
1149void str_to_wstr(wchar_t *dest, size_t dlen, const char *src)
1150{
1151 size_t offset;
1152 size_t di;
1153 wchar_t c;
1154
1155 assert(dlen > 0);
1156
1157 offset = 0;
1158 di = 0;
1159
1160 do {
1161 if (di >= dlen - 1)
1162 break;
1163
1164 c = str_decode(src, &offset, STR_NO_LIMIT);
1165 dest[di++] = c;
1166 } while (c != '\0');
1167
1168 dest[dlen - 1] = '\0';
1169}
1170
1171/** Convert string to wide string.
1172 *
1173 * Convert string @a src to wide string. A new wide NULL-terminated
1174 * string will be allocated on the heap.
1175 *
1176 * @param src Source string.
1177 */
1178wchar_t *str_to_awstr(const char *str)
1179{
1180 size_t len = str_code_points(str);
1181
1182 wchar_t *wstr = calloc(len + 1, sizeof(wchar_t));
1183 if (wstr == NULL)
1184 return NULL;
1185
1186 str_to_wstr(wstr, len + 1, str);
1187 return wstr;
1188}
1189
1190/** Find first occurence of character in string.
1191 *
1192 * @param str String to search.
1193 * @param ch Character to look for.
1194 *
1195 * @return Pointer to character in @a str or NULL if not found.
1196 */
1197char *str_chr(const char *str, wchar_t ch)
1198{
1199 wchar_t acc;
1200 size_t off = 0;
1201 size_t last = 0;
1202
1203 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
1204 if (acc == ch)
1205 return (char *) (str + last);
1206 last = off;
1207 }
1208
1209 return NULL;
1210}
1211
1212/** Find first occurence of substring in string.
1213 *
1214 * @param hs Haystack (string)
1215 * @param n Needle (substring to look for)
1216 *
1217 * @return Pointer to character in @a hs or @c NULL if not found.
1218 */
1219char *str_str(const char *hs, const char *n)
1220{
1221 size_t off = 0;
1222
1223 if (str_lcmp(hs, n, str_code_points(n)) == 0)
1224 return (char *)hs;
1225
1226 while (str_decode(hs, &off, STR_NO_LIMIT) != 0) {
1227 if (str_lcmp(hs + off, n, str_code_points(n)) == 0)
1228 return (char *)(hs + off);
1229 }
1230
1231 return NULL;
1232}
1233
1234/** Removes specified trailing characters from a string.
1235 *
1236 * @param str String to remove from.
1237 * @param ch Character to remove.
1238 */
1239void str_rtrim(char *str, wchar_t ch)
1240{
1241 size_t off = 0;
1242 size_t pos = 0;
1243 wchar_t c;
1244 bool update_last_chunk = true;
1245 char *last_chunk = NULL;
1246
1247 while ((c = str_decode(str, &off, STR_NO_LIMIT))) {
1248 if (c != ch) {
1249 update_last_chunk = true;
1250 last_chunk = NULL;
1251 } else if (update_last_chunk) {
1252 update_last_chunk = false;
1253 last_chunk = (str + pos);
1254 }
1255 pos = off;
1256 }
1257
1258 if (last_chunk)
1259 *last_chunk = '\0';
1260}
1261
1262/** Removes specified leading characters from a string.
1263 *
1264 * @param str String to remove from.
1265 * @param ch Character to remove.
1266 */
1267void str_ltrim(char *str, wchar_t ch)
1268{
1269 wchar_t acc;
1270 size_t off = 0;
1271 size_t pos = 0;
1272 size_t str_sz = str_bytes(str);
1273
1274 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
1275 if (acc != ch)
1276 break;
1277 else
1278 pos = off;
1279 }
1280
1281 if (pos > 0) {
1282 memmove(str, &str[pos], str_sz - pos);
1283 pos = str_sz - pos;
1284 str[pos] = '\0';
1285 }
1286}
1287
1288/** Find last occurence of character in string.
1289 *
1290 * @param str String to search.
1291 * @param ch Character to look for.
1292 *
1293 * @return Pointer to character in @a str or NULL if not found.
1294 */
1295char *str_rchr(const char *str, wchar_t ch)
1296{
1297 wchar_t acc;
1298 size_t off = 0;
1299 size_t last = 0;
1300 const char *res = NULL;
1301
1302 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
1303 if (acc == ch)
1304 res = (str + last);
1305 last = off;
1306 }
1307
1308 return (char *) res;
1309}
1310
1311/** Insert a wide character into a wide string.
1312 *
1313 * Insert a wide character into a wide string at position
1314 * @a pos. The characters after the position are shifted.
1315 *
1316 * @param str String to insert to.
1317 * @param ch Character to insert to.
1318 * @param pos Character index where to insert.
1319 * @param max_pos Characters in the buffer.
1320 *
1321 * @return True if the insertion was sucessful, false if the position
1322 * is out of bounds.
1323 *
1324 */
1325bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
1326{
1327 size_t len = wstr_code_points(str);
1328
1329 if ((pos > len) || (pos + 1 > max_pos))
1330 return false;
1331
1332 size_t i;
1333 for (i = len; i + 1 > pos; i--)
1334 str[i + 1] = str[i];
1335
1336 str[pos] = ch;
1337
1338 return true;
1339}
1340
1341/** Remove a wide character from a wide string.
1342 *
1343 * Remove a wide character from a wide string at position
1344 * @a pos. The characters after the position are shifted.
1345 *
1346 * @param str String to remove from.
1347 * @param pos Character index to remove.
1348 *
1349 * @return True if the removal was sucessful, false if the position
1350 * is out of bounds.
1351 *
1352 */
1353bool wstr_remove(wchar_t *str, size_t pos)
1354{
1355 size_t len = wstr_code_points(str);
1356
1357 if (pos >= len)
1358 return false;
1359
1360 size_t i;
1361 for (i = pos + 1; i <= len; i++)
1362 str[i - 1] = str[i];
1363
1364 return true;
1365}
1366
1367/** Duplicate string.
1368 *
1369 * Allocate a new string and copy characters from the source
1370 * string into it. The duplicate string is allocated via sleeping
1371 * malloc(), thus this function can sleep in no memory conditions.
1372 *
1373 * The allocation cannot fail and the return value is always
1374 * a valid pointer. The duplicate string is always a well-formed
1375 * null-terminated UTF-8 string, but it can differ from the source
1376 * string on the byte level.
1377 *
1378 * @param src Source string.
1379 *
1380 * @return Duplicate string.
1381 *
1382 */
1383char *str_dup(const char *src)
1384{
1385 size_t size = str_bytes(src) + 1;
1386 char *dest = malloc(size);
1387 if (!dest)
1388 return NULL;
1389
1390 str_cpy(dest, size, src);
1391 return dest;
1392}
1393
1394/** Duplicate string with size limit.
1395 *
1396 * Allocate a new string and copy up to @max_size bytes from the source
1397 * string into it. The duplicate string is allocated via sleeping
1398 * malloc(), thus this function can sleep in no memory conditions.
1399 * No more than @max_size + 1 bytes is allocated, but if the size
1400 * occupied by the source string is smaller than @max_size + 1,
1401 * less is allocated.
1402 *
1403 * The allocation cannot fail and the return value is always
1404 * a valid pointer. The duplicate string is always a well-formed
1405 * null-terminated UTF-8 string, but it can differ from the source
1406 * string on the byte level.
1407 *
1408 * @param src Source string.
1409 * @param n Maximum number of bytes to duplicate.
1410 *
1411 * @return Duplicate string.
1412 *
1413 */
1414char *str_ndup(const char *src, size_t n)
1415{
1416 size_t size = str_bytes(src);
1417 if (size > n)
1418 size = n;
1419
1420 char *dest = malloc(size + 1);
1421 if (!dest)
1422 return NULL;
1423
1424 str_ncpy(dest, size + 1, src, size);
1425 return dest;
1426}
1427
1428/** Split string by delimiters.
1429 *
1430 * @param s String to be tokenized. May not be NULL.
1431 * @param delim String with the delimiters.
1432 * @param next Variable which will receive the pointer to the
1433 * continuation of the string following the first
1434 * occurrence of any of the delimiter characters.
1435 * May be NULL.
1436 * @return Pointer to the prefix of @a s before the first
1437 * delimiter character. NULL if no such prefix
1438 * exists.
1439 */
1440char *str_tok(char *s, const char *delim, char **next)
1441{
1442 char *start, *end;
1443
1444 if (!s)
1445 return NULL;
1446
1447 size_t len = str_bytes(s);
1448 size_t cur;
1449 size_t tmp;
1450 wchar_t ch;
1451
1452 /* Skip over leading delimiters. */
1453 tmp = 0;
1454 cur = 0;
1455 while ((ch = str_decode(s, &tmp, len)) && str_chr(delim, ch))
1456 cur = tmp;
1457 start = &s[cur];
1458
1459 /* Skip over token characters. */
1460 tmp = cur;
1461 while ((ch = str_decode(s, &tmp, len)) && !str_chr(delim, ch))
1462 cur = tmp;
1463 end = &s[cur];
1464 if (next)
1465 *next = (ch ? &s[tmp] : &s[cur]);
1466
1467 if (start == end)
1468 return NULL; /* No more tokens. */
1469
1470 /* Overwrite delimiter with NULL terminator. */
1471 *end = '\0';
1472 return start;
1473}
1474
1475/** Convert string to uint64_t (internal variant).
1476 *
1477 * @param nptr Pointer to string.
1478 * @param endptr Pointer to the first invalid character is stored here.
1479 * @param base Zero or number between 2 and 36 inclusive.
1480 * @param neg Indication of unary minus is stored here.
1481 * @apram result Result of the conversion.
1482 *
1483 * @return EOK if conversion was successful.
1484 *
1485 */
1486static errno_t str_uint(const char *nptr, char **endptr, unsigned int base,
1487 bool *neg, uint64_t *result)
1488{
1489 assert(endptr != NULL);
1490 assert(neg != NULL);
1491 assert(result != NULL);
1492
1493 *neg = false;
1494 const char *str = nptr;
1495
1496 /* Ignore leading whitespace */
1497 while (isspace(*str))
1498 str++;
1499
1500 if (*str == '-') {
1501 *neg = true;
1502 str++;
1503 } else if (*str == '+')
1504 str++;
1505
1506 if (base == 0) {
1507 /* Decode base if not specified */
1508 base = 10;
1509
1510 if (*str == '0') {
1511 base = 8;
1512 str++;
1513
1514 switch (*str) {
1515 case 'b':
1516 case 'B':
1517 base = 2;
1518 str++;
1519 break;
1520 case 'o':
1521 case 'O':
1522 base = 8;
1523 str++;
1524 break;
1525 case 'd':
1526 case 'D':
1527 case 't':
1528 case 'T':
1529 base = 10;
1530 str++;
1531 break;
1532 case 'x':
1533 case 'X':
1534 base = 16;
1535 str++;
1536 break;
1537 default:
1538 str--;
1539 }
1540 }
1541 } else {
1542 /* Check base range */
1543 if ((base < 2) || (base > 36)) {
1544 *endptr = (char *) str;
1545 return EINVAL;
1546 }
1547 }
1548
1549 *result = 0;
1550 const char *startstr = str;
1551
1552 while (*str != 0) {
1553 unsigned int digit;
1554
1555 if ((*str >= 'a') && (*str <= 'z'))
1556 digit = *str - 'a' + 10;
1557 else if ((*str >= 'A') && (*str <= 'Z'))
1558 digit = *str - 'A' + 10;
1559 else if ((*str >= '0') && (*str <= '9'))
1560 digit = *str - '0';
1561 else
1562 break;
1563
1564 if (digit >= base)
1565 break;
1566
1567 uint64_t prev = *result;
1568 *result = (*result) * base + digit;
1569
1570 if (*result < prev) {
1571 /* Overflow */
1572 *endptr = (char *) str;
1573 return EOVERFLOW;
1574 }
1575
1576 str++;
1577 }
1578
1579 if (str == startstr) {
1580 /*
1581 * No digits were decoded => first invalid character is
1582 * the first character of the string.
1583 */
1584 str = nptr;
1585 }
1586
1587 *endptr = (char *) str;
1588
1589 if (str == nptr)
1590 return EINVAL;
1591
1592 return EOK;
1593}
1594
1595/** Convert string to uint8_t.
1596 *
1597 * @param nptr Pointer to string.
1598 * @param endptr If not NULL, pointer to the first invalid character
1599 * is stored here.
1600 * @param base Zero or number between 2 and 36 inclusive.
1601 * @param strict Do not allow any trailing characters.
1602 * @param result Result of the conversion.
1603 *
1604 * @return EOK if conversion was successful.
1605 *
1606 */
1607errno_t str_uint8_t(const char *nptr, const char **endptr, unsigned int base,
1608 bool strict, uint8_t *result)
1609{
1610 assert(result != NULL);
1611
1612 bool neg;
1613 char *lendptr;
1614 uint64_t res;
1615 errno_t ret = str_uint(nptr, &lendptr, base, &neg, &res);
1616
1617 if (endptr != NULL)
1618 *endptr = (char *) lendptr;
1619
1620 if (ret != EOK)
1621 return ret;
1622
1623 /* Do not allow negative values */
1624 if (neg)
1625 return EINVAL;
1626
1627 /*
1628 * Check whether we are at the end of
1629 * the string in strict mode
1630 */
1631 if ((strict) && (*lendptr != 0))
1632 return EINVAL;
1633
1634 /* Check for overflow */
1635 uint8_t _res = (uint8_t) res;
1636 if (_res != res)
1637 return EOVERFLOW;
1638
1639 *result = _res;
1640
1641 return EOK;
1642}
1643
1644/** Convert string to uint16_t.
1645 *
1646 * @param nptr Pointer to string.
1647 * @param endptr If not NULL, pointer to the first invalid character
1648 * is stored here.
1649 * @param base Zero or number between 2 and 36 inclusive.
1650 * @param strict Do not allow any trailing characters.
1651 * @param result Result of the conversion.
1652 *
1653 * @return EOK if conversion was successful.
1654 *
1655 */
1656errno_t str_uint16_t(const char *nptr, const char **endptr, unsigned int base,
1657 bool strict, uint16_t *result)
1658{
1659 assert(result != NULL);
1660
1661 bool neg;
1662 char *lendptr;
1663 uint64_t res;
1664 errno_t ret = str_uint(nptr, &lendptr, base, &neg, &res);
1665
1666 if (endptr != NULL)
1667 *endptr = (char *) lendptr;
1668
1669 if (ret != EOK)
1670 return ret;
1671
1672 /* Do not allow negative values */
1673 if (neg)
1674 return EINVAL;
1675
1676 /*
1677 * Check whether we are at the end of
1678 * the string in strict mode
1679 */
1680 if ((strict) && (*lendptr != 0))
1681 return EINVAL;
1682
1683 /* Check for overflow */
1684 uint16_t _res = (uint16_t) res;
1685 if (_res != res)
1686 return EOVERFLOW;
1687
1688 *result = _res;
1689
1690 return EOK;
1691}
1692
1693/** Convert string to uint32_t.
1694 *
1695 * @param nptr Pointer to string.
1696 * @param endptr If not NULL, pointer to the first invalid character
1697 * is stored here.
1698 * @param base Zero or number between 2 and 36 inclusive.
1699 * @param strict Do not allow any trailing characters.
1700 * @param result Result of the conversion.
1701 *
1702 * @return EOK if conversion was successful.
1703 *
1704 */
1705errno_t str_uint32_t(const char *nptr, const char **endptr, unsigned int base,
1706 bool strict, uint32_t *result)
1707{
1708 assert(result != NULL);
1709
1710 bool neg;
1711 char *lendptr;
1712 uint64_t res;
1713 errno_t ret = str_uint(nptr, &lendptr, base, &neg, &res);
1714
1715 if (endptr != NULL)
1716 *endptr = (char *) lendptr;
1717
1718 if (ret != EOK)
1719 return ret;
1720
1721 /* Do not allow negative values */
1722 if (neg)
1723 return EINVAL;
1724
1725 /*
1726 * Check whether we are at the end of
1727 * the string in strict mode
1728 */
1729 if ((strict) && (*lendptr != 0))
1730 return EINVAL;
1731
1732 /* Check for overflow */
1733 uint32_t _res = (uint32_t) res;
1734 if (_res != res)
1735 return EOVERFLOW;
1736
1737 *result = _res;
1738
1739 return EOK;
1740}
1741
1742/** Convert string to uint64_t.
1743 *
1744 * @param nptr Pointer to string.
1745 * @param endptr If not NULL, pointer to the first invalid character
1746 * is stored here.
1747 * @param base Zero or number between 2 and 36 inclusive.
1748 * @param strict Do not allow any trailing characters.
1749 * @param result Result of the conversion.
1750 *
1751 * @return EOK if conversion was successful.
1752 *
1753 */
1754errno_t str_uint64_t(const char *nptr, const char **endptr, unsigned int base,
1755 bool strict, uint64_t *result)
1756{
1757 assert(result != NULL);
1758
1759 bool neg;
1760 char *lendptr;
1761 errno_t ret = str_uint(nptr, &lendptr, base, &neg, result);
1762
1763 if (endptr != NULL)
1764 *endptr = (char *) lendptr;
1765
1766 if (ret != EOK)
1767 return ret;
1768
1769 /* Do not allow negative values */
1770 if (neg)
1771 return EINVAL;
1772
1773 /*
1774 * Check whether we are at the end of
1775 * the string in strict mode
1776 */
1777 if ((strict) && (*lendptr != 0))
1778 return EINVAL;
1779
1780 return EOK;
1781}
1782
1783/** Convert string to int64_t.
1784 *
1785 * @param nptr Pointer to string.
1786 * @param endptr If not NULL, pointer to the first invalid character
1787 * is stored here.
1788 * @param base Zero or number between 2 and 36 inclusive.
1789 * @param strict Do not allow any trailing characters.
1790 * @param result Result of the conversion.
1791 *
1792 * @return EOK if conversion was successful.
1793 *
1794 */
1795int str_int64_t(const char *nptr, const char **endptr, unsigned int base,
1796 bool strict, int64_t *result)
1797{
1798 assert(result != NULL);
1799
1800 bool neg;
1801 char *lendptr;
1802 uint64_t unsigned_result;
1803 int ret = str_uint(nptr, &lendptr, base, &neg, &unsigned_result);
1804
1805 if (endptr != NULL)
1806 *endptr = (char *) lendptr;
1807
1808 if (ret != EOK)
1809 return ret;
1810
1811 /* Do not allow negative values */
1812 if (neg) {
1813 if (unsigned_result == UINT64_MAX)
1814 return EINVAL;
1815
1816 *result = -(int64_t) unsigned_result;
1817 } else
1818 *result = unsigned_result;
1819
1820 /*
1821 * Check whether we are at the end of
1822 * the string in strict mode
1823 */
1824 if ((strict) && (*lendptr != 0))
1825 return EINVAL;
1826
1827 return EOK;
1828}
1829
1830/** Convert string to size_t.
1831 *
1832 * @param nptr Pointer to string.
1833 * @param endptr If not NULL, pointer to the first invalid character
1834 * is stored here.
1835 * @param base Zero or number between 2 and 36 inclusive.
1836 * @param strict Do not allow any trailing characters.
1837 * @param result Result of the conversion.
1838 *
1839 * @return EOK if conversion was successful.
1840 *
1841 */
1842errno_t str_size_t(const char *nptr, const char **endptr, unsigned int base,
1843 bool strict, size_t *result)
1844{
1845 assert(result != NULL);
1846
1847 bool neg;
1848 char *lendptr;
1849 uint64_t res;
1850 errno_t ret = str_uint(nptr, &lendptr, base, &neg, &res);
1851
1852 if (endptr != NULL)
1853 *endptr = (char *) lendptr;
1854
1855 if (ret != EOK)
1856 return ret;
1857
1858 /* Do not allow negative values */
1859 if (neg)
1860 return EINVAL;
1861
1862 /*
1863 * Check whether we are at the end of
1864 * the string in strict mode
1865 */
1866 if ((strict) && (*lendptr != 0))
1867 return EINVAL;
1868
1869 /* Check for overflow */
1870 size_t _res = (size_t) res;
1871 if (_res != res)
1872 return EOVERFLOW;
1873
1874 *result = _res;
1875
1876 return EOK;
1877}
1878
1879void order_suffix(const uint64_t val, uint64_t *rv, char *suffix)
1880{
1881 if (val > UINT64_C(10000000000000000000)) {
1882 *rv = val / UINT64_C(1000000000000000000);
1883 *suffix = 'Z';
1884 } else if (val > UINT64_C(1000000000000000000)) {
1885 *rv = val / UINT64_C(1000000000000000);
1886 *suffix = 'E';
1887 } else if (val > UINT64_C(1000000000000000)) {
1888 *rv = val / UINT64_C(1000000000000);
1889 *suffix = 'T';
1890 } else if (val > UINT64_C(1000000000000)) {
1891 *rv = val / UINT64_C(1000000000);
1892 *suffix = 'G';
1893 } else if (val > UINT64_C(1000000000)) {
1894 *rv = val / UINT64_C(1000000);
1895 *suffix = 'M';
1896 } else if (val > UINT64_C(1000000)) {
1897 *rv = val / UINT64_C(1000);
1898 *suffix = 'k';
1899 } else {
1900 *rv = val;
1901 *suffix = ' ';
1902 }
1903}
1904
1905void bin_order_suffix(const uint64_t val, uint64_t *rv, const char **suffix,
1906 bool fixed)
1907{
1908 if (val > UINT64_C(1152921504606846976)) {
1909 *rv = val / UINT64_C(1125899906842624);
1910 *suffix = "EiB";
1911 } else if (val > UINT64_C(1125899906842624)) {
1912 *rv = val / UINT64_C(1099511627776);
1913 *suffix = "TiB";
1914 } else if (val > UINT64_C(1099511627776)) {
1915 *rv = val / UINT64_C(1073741824);
1916 *suffix = "GiB";
1917 } else if (val > UINT64_C(1073741824)) {
1918 *rv = val / UINT64_C(1048576);
1919 *suffix = "MiB";
1920 } else if (val > UINT64_C(1048576)) {
1921 *rv = val / UINT64_C(1024);
1922 *suffix = "KiB";
1923 } else {
1924 *rv = val;
1925 if (fixed)
1926 *suffix = "B ";
1927 else
1928 *suffix = "B";
1929 }
1930}
1931
1932/** @}
1933 */
Note: See TracBrowser for help on using the repository browser.