source: mainline/uspace/lib/c/generic/str.c@ cc74cb5

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since cc74cb5 was 09ab0a9a, checked in by Jiri Svoboda <jiri@…>, 7 years ago

Fix vertical spacing with new Ccheck revision.

  • Property mode set to 100644
File size: 42.5 KB
RevLine 
[936351c1]1/*
[df4ed85]2 * Copyright (c) 2005 Martin Decky
[576845ec]3 * Copyright (c) 2008 Jiri Svoboda
[22cf42d9]4 * Copyright (c) 2011 Martin Sucha
[c4bbca8]5 * Copyright (c) 2011 Oleg Romanenko
[936351c1]6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * - Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * - Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * - The name of the author may not be used to endorse or promote products
18 * derived from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
[a46da63]32/** @addtogroup libc
[b2951e2]33 * @{
34 */
35/** @file
36 */
37
[19f857a]38#include <str.h>
[582a0b8]39#include <stddef.h>
[9539be6]40#include <stdint.h>
[38d150e]41#include <stdlib.h>
42#include <assert.h>
[e64c4b2]43#include <ctype.h>
[171f9a1]44#include <errno.h>
[f2b8cdc]45#include <align.h>
[095003a8]46#include <mem.h>
[16bfcd3]47#include <limits.h>
[171f9a1]48
[8e893ae]49/** Check the condition if wchar_t is signed */
[002fd5f]50#ifdef __WCHAR_UNSIGNED__
[1433ecda]51#define WCHAR_SIGNED_CHECK(cond) (true)
[8e893ae]52#else
[1433ecda]53#define WCHAR_SIGNED_CHECK(cond) (cond)
[8e893ae]54#endif
55
[171f9a1]56/** Byte mask consisting of lowest @n bits (out of 8) */
57#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
58
59/** Byte mask consisting of lowest @n bits (out of 32) */
60#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
61
62/** Byte mask consisting of highest @n bits (out of 8) */
63#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
64
65/** Number of data bits in a UTF-8 continuation byte */
66#define CONT_BITS 6
67
68/** Decode a single character from a string.
69 *
70 * Decode a single character from a string of size @a size. Decoding starts
71 * at @a offset and this offset is moved to the beginning of the next
72 * character. In case of decoding error, offset generally advances at least
73 * by one. However, offset is never moved beyond size.
74 *
75 * @param str String (not necessarily NULL-terminated).
76 * @param offset Byte offset in string where to start decoding.
77 * @param size Size of the string (in bytes).
78 *
79 * @return Value of decoded character, U_SPECIAL on decoding error or
80 * NULL if attempt to decode beyond @a size.
81 *
82 */
83wchar_t str_decode(const char *str, size_t *offset, size_t size)
84{
85 if (*offset + 1 > size)
86 return 0;
[a35b458]87
[171f9a1]88 /* First byte read from string */
89 uint8_t b0 = (uint8_t) str[(*offset)++];
[a35b458]90
[171f9a1]91 /* Determine code length */
[a35b458]92
[171f9a1]93 unsigned int b0_bits; /* Data bits in first byte */
94 unsigned int cbytes; /* Number of continuation bytes */
[a35b458]95
[171f9a1]96 if ((b0 & 0x80) == 0) {
97 /* 0xxxxxxx (Plain ASCII) */
98 b0_bits = 7;
99 cbytes = 0;
100 } else if ((b0 & 0xe0) == 0xc0) {
101 /* 110xxxxx 10xxxxxx */
102 b0_bits = 5;
103 cbytes = 1;
104 } else if ((b0 & 0xf0) == 0xe0) {
105 /* 1110xxxx 10xxxxxx 10xxxxxx */
106 b0_bits = 4;
107 cbytes = 2;
108 } else if ((b0 & 0xf8) == 0xf0) {
109 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
110 b0_bits = 3;
111 cbytes = 3;
112 } else {
113 /* 10xxxxxx -- unexpected continuation byte */
114 return U_SPECIAL;
115 }
[a35b458]116
[171f9a1]117 if (*offset + cbytes > size)
118 return U_SPECIAL;
[a35b458]119
[171f9a1]120 wchar_t ch = b0 & LO_MASK_8(b0_bits);
[a35b458]121
[171f9a1]122 /* Decode continuation bytes */
123 while (cbytes > 0) {
124 uint8_t b = (uint8_t) str[(*offset)++];
[a35b458]125
[171f9a1]126 /* Must be 10xxxxxx */
127 if ((b & 0xc0) != 0x80)
128 return U_SPECIAL;
[a35b458]129
[171f9a1]130 /* Shift data bits to ch */
131 ch = (ch << CONT_BITS) | (wchar_t) (b & LO_MASK_8(CONT_BITS));
132 cbytes--;
133 }
[a35b458]134
[171f9a1]135 return ch;
136}
137
[568693b]138/** Decode a single character from a string to the left.
139 *
140 * Decode a single character from a string of size @a size. Decoding starts
141 * at @a offset and this offset is moved to the beginning of the previous
142 * character. In case of decoding error, offset generally decreases at least
143 * by one. However, offset is never moved before 0.
144 *
145 * @param str String (not necessarily NULL-terminated).
146 * @param offset Byte offset in string where to start decoding.
147 * @param size Size of the string (in bytes).
148 *
149 * @return Value of decoded character, U_SPECIAL on decoding error or
150 * NULL if attempt to decode beyond @a start of str.
151 *
152 */
153wchar_t str_decode_reverse(const char *str, size_t *offset, size_t size)
154{
155 if (*offset == 0)
156 return 0;
[a35b458]157
[568693b]158 size_t processed = 0;
159 /* Continue while continuation bytes found */
160 while (*offset > 0 && processed < 4) {
161 uint8_t b = (uint8_t) str[--(*offset)];
[a35b458]162
[568693b]163 if (processed == 0 && (b & 0x80) == 0) {
164 /* 0xxxxxxx (Plain ASCII) */
165 return b & 0x7f;
[1433ecda]166 } else if ((b & 0xe0) == 0xc0 || (b & 0xf0) == 0xe0 ||
[568693b]167 (b & 0xf8) == 0xf0) {
168 /* Start byte */
169 size_t start_offset = *offset;
170 return str_decode(str, &start_offset, size);
[1433ecda]171 } else if ((b & 0xc0) != 0x80) {
[568693b]172 /* Not a continuation byte */
173 return U_SPECIAL;
174 }
175 processed++;
176 }
177 /* Too many continuation bytes */
178 return U_SPECIAL;
179}
180
[171f9a1]181/** Encode a single character to string representation.
182 *
183 * Encode a single character to string representation (i.e. UTF-8) and store
184 * it into a buffer at @a offset. Encoding starts at @a offset and this offset
185 * is moved to the position where the next character can be written to.
186 *
187 * @param ch Input character.
188 * @param str Output buffer.
189 * @param offset Byte offset where to start writing.
190 * @param size Size of the output buffer (in bytes).
191 *
192 * @return EOK if the character was encoded successfully, EOVERFLOW if there
[d4a3ee5]193 * was not enough space in the output buffer or EINVAL if the character
194 * code was invalid.
[171f9a1]195 */
[b7fd2a0]196errno_t chr_encode(const wchar_t ch, char *str, size_t *offset, size_t size)
[171f9a1]197{
198 if (*offset >= size)
199 return EOVERFLOW;
[a35b458]200
[171f9a1]201 if (!chr_check(ch))
202 return EINVAL;
[a35b458]203
[7c3fb9b]204 /*
205 * Unsigned version of ch (bit operations should only be done
206 * on unsigned types).
207 */
[171f9a1]208 uint32_t cc = (uint32_t) ch;
[a35b458]209
[171f9a1]210 /* Determine how many continuation bytes are needed */
[a35b458]211
[171f9a1]212 unsigned int b0_bits; /* Data bits in first byte */
213 unsigned int cbytes; /* Number of continuation bytes */
[a35b458]214
[171f9a1]215 if ((cc & ~LO_MASK_32(7)) == 0) {
216 b0_bits = 7;
217 cbytes = 0;
218 } else if ((cc & ~LO_MASK_32(11)) == 0) {
219 b0_bits = 5;
220 cbytes = 1;
221 } else if ((cc & ~LO_MASK_32(16)) == 0) {
222 b0_bits = 4;
223 cbytes = 2;
224 } else if ((cc & ~LO_MASK_32(21)) == 0) {
225 b0_bits = 3;
226 cbytes = 3;
227 } else {
228 /* Codes longer than 21 bits are not supported */
229 return EINVAL;
230 }
[a35b458]231
[171f9a1]232 /* Check for available space in buffer */
233 if (*offset + cbytes >= size)
234 return EOVERFLOW;
[a35b458]235
[171f9a1]236 /* Encode continuation bytes */
237 unsigned int i;
238 for (i = cbytes; i > 0; i--) {
239 str[*offset + i] = 0x80 | (cc & LO_MASK_32(CONT_BITS));
240 cc = cc >> CONT_BITS;
241 }
[a35b458]242
[171f9a1]243 /* Encode first byte */
244 str[*offset] = (cc & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1);
[a35b458]245
[171f9a1]246 /* Advance offset */
247 *offset += cbytes + 1;
[a35b458]248
[171f9a1]249 return EOK;
250}
251
[f2b8cdc]252/** Get size of string.
253 *
254 * Get the number of bytes which are used by the string @a str (excluding the
255 * NULL-terminator).
256 *
257 * @param str String to consider.
258 *
259 * @return Number of bytes used by the string
260 *
261 */
262size_t str_size(const char *str)
263{
264 size_t size = 0;
[a35b458]265
[f2b8cdc]266 while (*str++ != 0)
267 size++;
[a35b458]268
[f2b8cdc]269 return size;
270}
271
272/** Get size of wide string.
273 *
274 * Get the number of bytes which are used by the wide string @a str (excluding the
275 * NULL-terminator).
276 *
277 * @param str Wide string to consider.
278 *
279 * @return Number of bytes used by the wide string
280 *
281 */
282size_t wstr_size(const wchar_t *str)
283{
284 return (wstr_length(str) * sizeof(wchar_t));
285}
286
287/** Get size of string with length limit.
288 *
289 * Get the number of bytes which are used by up to @a max_len first
290 * characters in the string @a str. If @a max_len is greater than
291 * the length of @a str, the entire string is measured (excluding the
292 * NULL-terminator).
293 *
294 * @param str String to consider.
295 * @param max_len Maximum number of characters to measure.
296 *
297 * @return Number of bytes used by the characters.
298 *
299 */
[d4a3ee5]300size_t str_lsize(const char *str, size_t max_len)
[f2b8cdc]301{
[d4a3ee5]302 size_t len = 0;
[f2b8cdc]303 size_t offset = 0;
[a35b458]304
[f2b8cdc]305 while (len < max_len) {
306 if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
307 break;
[a35b458]308
[f2b8cdc]309 len++;
310 }
[a35b458]311
[f2b8cdc]312 return offset;
313}
314
[560d79f]315/** Get size of string with size limit.
316 *
317 * Get the number of bytes which are used by the string @a str
318 * (excluding the NULL-terminator), but no more than @max_size bytes.
319 *
320 * @param str String to consider.
321 * @param max_size Maximum number of bytes to measure.
322 *
323 * @return Number of bytes used by the string
324 *
325 */
326size_t str_nsize(const char *str, size_t max_size)
327{
328 size_t size = 0;
[a35b458]329
[560d79f]330 while ((*str++ != 0) && (size < max_size))
331 size++;
[a35b458]332
[560d79f]333 return size;
334}
335
336/** Get size of wide string with size limit.
337 *
338 * Get the number of bytes which are used by the wide string @a str
339 * (excluding the NULL-terminator), but no more than @max_size bytes.
340 *
341 * @param str Wide string to consider.
342 * @param max_size Maximum number of bytes to measure.
343 *
344 * @return Number of bytes used by the wide string
345 *
346 */
347size_t wstr_nsize(const wchar_t *str, size_t max_size)
348{
349 return (wstr_nlength(str, max_size) * sizeof(wchar_t));
350}
351
[f2b8cdc]352/** Get size of wide string with length limit.
353 *
354 * Get the number of bytes which are used by up to @a max_len first
355 * wide characters in the wide string @a str. If @a max_len is greater than
356 * the length of @a str, the entire wide string is measured (excluding the
357 * NULL-terminator).
358 *
359 * @param str Wide string to consider.
360 * @param max_len Maximum number of wide characters to measure.
361 *
362 * @return Number of bytes used by the wide characters.
363 *
364 */
[d4a3ee5]365size_t wstr_lsize(const wchar_t *str, size_t max_len)
[f2b8cdc]366{
367 return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
368}
369
370/** Get number of characters in a string.
371 *
372 * @param str NULL-terminated string.
373 *
374 * @return Number of characters in string.
375 *
376 */
[d4a3ee5]377size_t str_length(const char *str)
[f2b8cdc]378{
[d4a3ee5]379 size_t len = 0;
[f2b8cdc]380 size_t offset = 0;
[a35b458]381
[f2b8cdc]382 while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
383 len++;
[a35b458]384
[f2b8cdc]385 return len;
386}
387
388/** Get number of characters in a wide string.
389 *
390 * @param str NULL-terminated wide string.
391 *
392 * @return Number of characters in @a str.
393 *
394 */
[d4a3ee5]395size_t wstr_length(const wchar_t *wstr)
[f2b8cdc]396{
[d4a3ee5]397 size_t len = 0;
[a35b458]398
[f2b8cdc]399 while (*wstr++ != 0)
400 len++;
[a35b458]401
[f2b8cdc]402 return len;
403}
404
405/** Get number of characters in a string with size limit.
406 *
407 * @param str NULL-terminated string.
408 * @param size Maximum number of bytes to consider.
409 *
410 * @return Number of characters in string.
411 *
412 */
[d4a3ee5]413size_t str_nlength(const char *str, size_t size)
[f2b8cdc]414{
[d4a3ee5]415 size_t len = 0;
[f2b8cdc]416 size_t offset = 0;
[a35b458]417
[f2b8cdc]418 while (str_decode(str, &offset, size) != 0)
419 len++;
[a35b458]420
[f2b8cdc]421 return len;
422}
423
424/** Get number of characters in a string with size limit.
425 *
426 * @param str NULL-terminated string.
427 * @param size Maximum number of bytes to consider.
428 *
429 * @return Number of characters in string.
430 *
431 */
[d4a3ee5]432size_t wstr_nlength(const wchar_t *str, size_t size)
[f2b8cdc]433{
[d4a3ee5]434 size_t len = 0;
435 size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
436 size_t offset = 0;
[a35b458]437
[f2b8cdc]438 while ((offset < limit) && (*str++ != 0)) {
439 len++;
440 offset += sizeof(wchar_t);
441 }
[a35b458]442
[f2b8cdc]443 return len;
444}
445
[be2a38ad]446/** Get character display width on a character cell display.
447 *
448 * @param ch Character
449 * @return Width of character in cells.
450 */
451size_t chr_width(wchar_t ch)
452{
453 return 1;
454}
455
456/** Get string display width on a character cell display.
457 *
458 * @param str String
459 * @return Width of string in cells.
460 */
461size_t str_width(const char *str)
462{
463 size_t width = 0;
464 size_t offset = 0;
465 wchar_t ch;
[a35b458]466
[be2a38ad]467 while ((ch = str_decode(str, &offset, STR_NO_LIMIT)) != 0)
468 width += chr_width(ch);
[a35b458]469
[be2a38ad]470 return width;
471}
472
[f2b8cdc]473/** Check whether character is plain ASCII.
474 *
475 * @return True if character is plain ASCII.
476 *
477 */
478bool ascii_check(wchar_t ch)
479{
[8e893ae]480 if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 127))
[f2b8cdc]481 return true;
[a35b458]482
[f2b8cdc]483 return false;
484}
485
[171f9a1]486/** Check whether character is valid
487 *
488 * @return True if character is a valid Unicode code point.
489 *
490 */
[f2b8cdc]491bool chr_check(wchar_t ch)
[171f9a1]492{
[8e893ae]493 if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 1114111))
[171f9a1]494 return true;
[a35b458]495
[171f9a1]496 return false;
497}
[936351c1]498
[f2b8cdc]499/** Compare two NULL terminated strings.
500 *
501 * Do a char-by-char comparison of two NULL-terminated strings.
[4efeab5]502 * The strings are considered equal iff their length is equal
503 * and both strings consist of the same sequence of characters.
504 *
[1772e6d]505 * A string S1 is less than another string S2 if it has a character with
506 * lower value at the first character position where the strings differ.
507 * If the strings differ in length, the shorter one is treated as if
508 * padded by characters with a value of zero.
[f2b8cdc]509 *
510 * @param s1 First string to compare.
511 * @param s2 Second string to compare.
512 *
[1772e6d]513 * @return 0 if the strings are equal, -1 if the first is less than the second,
514 * 1 if the second is less than the first.
[f2b8cdc]515 *
516 */
517int str_cmp(const char *s1, const char *s2)
518{
519 wchar_t c1 = 0;
520 wchar_t c2 = 0;
[8227d63]521
[f2b8cdc]522 size_t off1 = 0;
523 size_t off2 = 0;
524
525 while (true) {
526 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
527 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
528
529 if (c1 < c2)
530 return -1;
[8227d63]531
[f2b8cdc]532 if (c1 > c2)
533 return 1;
534
535 if (c1 == 0 || c2 == 0)
[8227d63]536 break;
[f2b8cdc]537 }
538
539 return 0;
540}
541
542/** Compare two NULL terminated strings with length limit.
543 *
544 * Do a char-by-char comparison of two NULL-terminated strings.
[4efeab5]545 * The strings are considered equal iff
546 * min(str_length(s1), max_len) == min(str_length(s2), max_len)
547 * and both strings consist of the same sequence of characters,
548 * up to max_len characters.
549 *
[1772e6d]550 * A string S1 is less than another string S2 if it has a character with
551 * lower value at the first character position where the strings differ.
552 * If the strings differ in length, the shorter one is treated as if
553 * padded by characters with a value of zero. Only the first max_len
554 * characters are considered.
[f2b8cdc]555 *
556 * @param s1 First string to compare.
557 * @param s2 Second string to compare.
558 * @param max_len Maximum number of characters to consider.
559 *
[1772e6d]560 * @return 0 if the strings are equal, -1 if the first is less than the second,
561 * 1 if the second is less than the first.
[f2b8cdc]562 *
563 */
[d4a3ee5]564int str_lcmp(const char *s1, const char *s2, size_t max_len)
[f2b8cdc]565{
566 wchar_t c1 = 0;
567 wchar_t c2 = 0;
[8227d63]568
[f2b8cdc]569 size_t off1 = 0;
570 size_t off2 = 0;
[8227d63]571
[d4a3ee5]572 size_t len = 0;
[f2b8cdc]573
574 while (true) {
575 if (len >= max_len)
576 break;
577
578 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
579 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
580
[8227d63]581 if (c1 < c2)
582 return -1;
583
584 if (c1 > c2)
585 return 1;
586
587 if (c1 == 0 || c2 == 0)
588 break;
589
590 ++len;
591 }
592
593 return 0;
594
595}
596
597/** Compare two NULL terminated strings in case-insensitive manner.
598 *
599 * Do a char-by-char comparison of two NULL-terminated strings.
600 * The strings are considered equal iff their length is equal
601 * and both strings consist of the same sequence of characters
602 * when converted to lower case.
603 *
604 * A string S1 is less than another string S2 if it has a character with
605 * lower value at the first character position where the strings differ.
606 * If the strings differ in length, the shorter one is treated as if
607 * padded by characters with a value of zero.
608 *
609 * @param s1 First string to compare.
610 * @param s2 Second string to compare.
611 *
612 * @return 0 if the strings are equal, -1 if the first is less than the second,
613 * 1 if the second is less than the first.
614 *
615 */
616int str_casecmp(const char *s1, const char *s2)
617{
618 wchar_t c1 = 0;
619 wchar_t c2 = 0;
620
621 size_t off1 = 0;
622 size_t off2 = 0;
623
624 while (true) {
625 c1 = tolower(str_decode(s1, &off1, STR_NO_LIMIT));
626 c2 = tolower(str_decode(s2, &off2, STR_NO_LIMIT));
627
628 if (c1 < c2)
629 return -1;
630
631 if (c1 > c2)
632 return 1;
633
634 if (c1 == 0 || c2 == 0)
635 break;
636 }
637
638 return 0;
639}
640
641/** Compare two NULL terminated strings with length limit in case-insensitive
642 * manner.
643 *
644 * Do a char-by-char comparison of two NULL-terminated strings.
645 * The strings are considered equal iff
646 * min(str_length(s1), max_len) == min(str_length(s2), max_len)
647 * and both strings consist of the same sequence of characters,
648 * up to max_len characters.
649 *
650 * A string S1 is less than another string S2 if it has a character with
651 * lower value at the first character position where the strings differ.
652 * If the strings differ in length, the shorter one is treated as if
653 * padded by characters with a value of zero. Only the first max_len
654 * characters are considered.
655 *
656 * @param s1 First string to compare.
657 * @param s2 Second string to compare.
658 * @param max_len Maximum number of characters to consider.
659 *
660 * @return 0 if the strings are equal, -1 if the first is less than the second,
661 * 1 if the second is less than the first.
662 *
663 */
664int str_lcasecmp(const char *s1, const char *s2, size_t max_len)
665{
666 wchar_t c1 = 0;
667 wchar_t c2 = 0;
[a35b458]668
[8227d63]669 size_t off1 = 0;
670 size_t off2 = 0;
[a35b458]671
[8227d63]672 size_t len = 0;
673
674 while (true) {
675 if (len >= max_len)
676 break;
677
678 c1 = tolower(str_decode(s1, &off1, STR_NO_LIMIT));
679 c2 = tolower(str_decode(s2, &off2, STR_NO_LIMIT));
680
[f2b8cdc]681 if (c1 < c2)
682 return -1;
683
684 if (c1 > c2)
685 return 1;
686
687 if (c1 == 0 || c2 == 0)
688 break;
689
[1b20da0]690 ++len;
[f2b8cdc]691 }
692
693 return 0;
694
695}
696
[dce39b4]697/** Test whether p is a prefix of s.
698 *
699 * Do a char-by-char comparison of two NULL-terminated strings
700 * and determine if p is a prefix of s.
701 *
702 * @param s The string in which to look
703 * @param p The string to check if it is a prefix of s
704 *
705 * @return true iff p is prefix of s else false
706 *
707 */
708bool str_test_prefix(const char *s, const char *p)
709{
710 wchar_t c1 = 0;
711 wchar_t c2 = 0;
[a35b458]712
[dce39b4]713 size_t off1 = 0;
714 size_t off2 = 0;
715
716 while (true) {
717 c1 = str_decode(s, &off1, STR_NO_LIMIT);
718 c2 = str_decode(p, &off2, STR_NO_LIMIT);
[a35b458]719
[dce39b4]720 if (c2 == 0)
721 return true;
722
723 if (c1 != c2)
724 return false;
[a35b458]725
[dce39b4]726 if (c1 == 0)
727 break;
728 }
729
730 return false;
731}
732
[6eb2e96]733/** Copy string.
[f2b8cdc]734 *
[6eb2e96]735 * Copy source string @a src to destination buffer @a dest.
736 * No more than @a size bytes are written. If the size of the output buffer
737 * is at least one byte, the output string will always be well-formed, i.e.
738 * null-terminated and containing only complete characters.
[f2b8cdc]739 *
[abf09311]740 * @param dest Destination buffer.
[6700ee2]741 * @param count Size of the destination buffer (must be > 0).
[6eb2e96]742 * @param src Source string.
[8e893ae]743 *
[f2b8cdc]744 */
[6eb2e96]745void str_cpy(char *dest, size_t size, const char *src)
[f2b8cdc]746{
[6700ee2]747 /* There must be space for a null terminator in the buffer. */
748 assert(size > 0);
[a35b458]749
[abf09311]750 size_t src_off = 0;
751 size_t dest_off = 0;
[a35b458]752
[abf09311]753 wchar_t ch;
[6eb2e96]754 while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
755 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
756 break;
757 }
[a35b458]758
[6eb2e96]759 dest[dest_off] = '\0';
760}
761
762/** Copy size-limited substring.
763 *
[6700ee2]764 * Copy prefix of string @a src of max. size @a size to destination buffer
765 * @a dest. No more than @a size bytes are written. The output string will
766 * always be well-formed, i.e. null-terminated and containing only complete
767 * characters.
[6eb2e96]768 *
769 * No more than @a n bytes are read from the input string, so it does not
770 * have to be null-terminated.
771 *
[abf09311]772 * @param dest Destination buffer.
[6700ee2]773 * @param count Size of the destination buffer (must be > 0).
[6eb2e96]774 * @param src Source string.
[abf09311]775 * @param n Maximum number of bytes to read from @a src.
[8e893ae]776 *
[6eb2e96]777 */
778void str_ncpy(char *dest, size_t size, const char *src, size_t n)
779{
[6700ee2]780 /* There must be space for a null terminator in the buffer. */
781 assert(size > 0);
[a35b458]782
[abf09311]783 size_t src_off = 0;
784 size_t dest_off = 0;
[a35b458]785
[abf09311]786 wchar_t ch;
[6eb2e96]787 while ((ch = str_decode(src, &src_off, n)) != 0) {
788 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
[f2b8cdc]789 break;
790 }
[a35b458]791
[6eb2e96]792 dest[dest_off] = '\0';
[f2b8cdc]793}
794
[4482bc7]795/** Append one string to another.
796 *
797 * Append source string @a src to string in destination buffer @a dest.
798 * Size of the destination buffer is @a dest. If the size of the output buffer
799 * is at least one byte, the output string will always be well-formed, i.e.
800 * null-terminated and containing only complete characters.
801 *
[0f06dbc]802 * @param dest Destination buffer.
[4482bc7]803 * @param count Size of the destination buffer.
804 * @param src Source string.
805 */
806void str_append(char *dest, size_t size, const char *src)
807{
808 size_t dstr_size;
809
810 dstr_size = str_size(dest);
[3815efb]811 if (dstr_size >= size)
[a8bc7f8]812 return;
[a35b458]813
[4482bc7]814 str_cpy(dest + dstr_size, size - dstr_size, src);
815}
816
[dcb74c0a]817/** Convert space-padded ASCII to string.
818 *
819 * Common legacy text encoding in hardware is 7-bit ASCII fitted into
[c3d19ac]820 * a fixed-width byte buffer (bit 7 always zero), right-padded with spaces
[dcb74c0a]821 * (ASCII 0x20). Convert space-padded ascii to string representation.
822 *
823 * If the text does not fit into the destination buffer, the function converts
824 * as many characters as possible and returns EOVERFLOW.
825 *
826 * If the text contains non-ASCII bytes (with bit 7 set), the whole string is
827 * converted anyway and invalid characters are replaced with question marks
828 * (U_SPECIAL) and the function returns EIO.
829 *
830 * Regardless of return value upon return @a dest will always be well-formed.
831 *
832 * @param dest Destination buffer
833 * @param size Size of destination buffer
834 * @param src Space-padded ASCII.
835 * @param n Size of the source buffer in bytes.
836 *
837 * @return EOK on success, EOVERFLOW if the text does not fit
838 * destination buffer, EIO if the text contains
839 * non-ASCII bytes.
840 */
[b7fd2a0]841errno_t spascii_to_str(char *dest, size_t size, const uint8_t *src, size_t n)
[dcb74c0a]842{
843 size_t sidx;
844 size_t didx;
845 size_t dlast;
846 uint8_t byte;
[b7fd2a0]847 errno_t rc;
848 errno_t result;
[dcb74c0a]849
850 /* There must be space for a null terminator in the buffer. */
851 assert(size > 0);
852 result = EOK;
853
854 didx = 0;
855 dlast = 0;
856 for (sidx = 0; sidx < n; ++sidx) {
857 byte = src[sidx];
858 if (!ascii_check(byte)) {
859 byte = U_SPECIAL;
860 result = EIO;
861 }
862
863 rc = chr_encode(byte, dest, &didx, size - 1);
864 if (rc != EOK) {
865 assert(rc == EOVERFLOW);
866 dest[didx] = '\0';
867 return rc;
868 }
869
870 /* Remember dest index after last non-empty character */
871 if (byte != 0x20)
872 dlast = didx;
873 }
874
875 /* Terminate string after last non-empty character */
876 dest[dlast] = '\0';
877 return result;
878}
879
[0f06dbc]880/** Convert wide string to string.
[f2b8cdc]881 *
[0f06dbc]882 * Convert wide string @a src to string. The output is written to the buffer
883 * specified by @a dest and @a size. @a size must be non-zero and the string
884 * written will always be well-formed.
[f2b8cdc]885 *
[0f06dbc]886 * @param dest Destination buffer.
887 * @param size Size of the destination buffer.
888 * @param src Source wide string.
[f2b8cdc]889 */
[81e9cb3]890void wstr_to_str(char *dest, size_t size, const wchar_t *src)
[f2b8cdc]891{
892 wchar_t ch;
[0f06dbc]893 size_t src_idx;
894 size_t dest_off;
895
896 /* There must be space for a null terminator in the buffer. */
897 assert(size > 0);
[a35b458]898
[0f06dbc]899 src_idx = 0;
900 dest_off = 0;
901
[f2b8cdc]902 while ((ch = src[src_idx++]) != 0) {
[81e9cb3]903 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
[f2b8cdc]904 break;
905 }
[0f06dbc]906
907 dest[dest_off] = '\0';
[f2b8cdc]908}
909
[82374b2]910/** Convert UTF16 string to string.
911 *
912 * Convert utf16 string @a src to string. The output is written to the buffer
913 * specified by @a dest and @a size. @a size must be non-zero and the string
914 * written will always be well-formed. Surrogate pairs also supported.
915 *
916 * @param dest Destination buffer.
917 * @param size Size of the destination buffer.
918 * @param src Source utf16 string.
919 *
[cde999a]920 * @return EOK, if success, an error code otherwise.
[82374b2]921 */
[b7fd2a0]922errno_t utf16_to_str(char *dest, size_t size, const uint16_t *src)
[82374b2]923{
[abb7491c]924 size_t idx = 0, dest_off = 0;
[82374b2]925 wchar_t ch;
[b7fd2a0]926 errno_t rc = EOK;
[82374b2]927
928 /* There must be space for a null terminator in the buffer. */
929 assert(size > 0);
930
931 while (src[idx]) {
932 if ((src[idx] & 0xfc00) == 0xd800) {
[abb7491c]933 if (src[idx + 1] && (src[idx + 1] & 0xfc00) == 0xdc00) {
[82374b2]934 ch = 0x10000;
935 ch += (src[idx] & 0x03FF) << 10;
[abb7491c]936 ch += (src[idx + 1] & 0x03FF);
[82374b2]937 idx += 2;
[1433ecda]938 } else
[82374b2]939 break;
940 } else {
941 ch = src[idx];
942 idx++;
943 }
[abb7491c]944 rc = chr_encode(ch, dest, &dest_off, size - 1);
[82374b2]945 if (rc != EOK)
946 break;
947 }
948 dest[dest_off] = '\0';
949 return rc;
950}
951
[b06414f]952/** Convert string to UTF16 string.
953 *
954 * Convert string @a src to utf16 string. The output is written to the buffer
955 * specified by @a dest and @a dlen. @a dlen must be non-zero and the string
956 * written will always be well-formed. Surrogate pairs also supported.
957 *
958 * @param dest Destination buffer.
959 * @param dlen Number of utf16 characters that fit in the destination buffer.
960 * @param src Source string.
961 *
[cde999a]962 * @return EOK, if success, an error code otherwise.
[b06414f]963 */
[b7fd2a0]964errno_t str_to_utf16(uint16_t *dest, size_t dlen, const char *src)
[fc97128]965{
[b7fd2a0]966 errno_t rc = EOK;
[abb7491c]967 size_t offset = 0;
968 size_t idx = 0;
[fc97128]969 wchar_t c;
970
[b06414f]971 assert(dlen > 0);
[a35b458]972
[fc97128]973 while ((c = str_decode(src, &offset, STR_NO_LIMIT)) != 0) {
974 if (c > 0x10000) {
[b06414f]975 if (idx + 2 >= dlen - 1) {
[abb7491c]976 rc = EOVERFLOW;
[fc97128]977 break;
978 }
979 c = (c - 0x10000);
980 dest[idx] = 0xD800 | (c >> 10);
[abb7491c]981 dest[idx + 1] = 0xDC00 | (c & 0x3FF);
[fc97128]982 idx++;
983 } else {
[1433ecda]984 dest[idx] = c;
[fc97128]985 }
986
987 idx++;
[b06414f]988 if (idx >= dlen - 1) {
[abb7491c]989 rc = EOVERFLOW;
[fc97128]990 break;
991 }
992 }
993
994 dest[idx] = '\0';
995 return rc;
[f2b8cdc]996}
997
[b2906c0]998/** Get size of UTF-16 string.
999 *
1000 * Get the number of words which are used by the UTF-16 string @a ustr
1001 * (excluding the NULL-terminator).
1002 *
1003 * @param ustr UTF-16 string to consider.
1004 *
1005 * @return Number of words used by the UTF-16 string
1006 *
1007 */
1008size_t utf16_wsize(const uint16_t *ustr)
1009{
1010 size_t wsize = 0;
1011
1012 while (*ustr++ != 0)
1013 wsize++;
1014
1015 return wsize;
1016}
1017
[b67c7d64]1018/** Convert wide string to new string.
1019 *
1020 * Convert wide string @a src to string. Space for the new string is allocated
1021 * on the heap.
1022 *
1023 * @param src Source wide string.
1024 * @return New string.
1025 */
1026char *wstr_to_astr(const wchar_t *src)
1027{
1028 char dbuf[STR_BOUNDS(1)];
1029 char *str;
1030 wchar_t ch;
1031
1032 size_t src_idx;
1033 size_t dest_off;
1034 size_t dest_size;
1035
1036 /* Compute size of encoded string. */
1037
1038 src_idx = 0;
1039 dest_size = 0;
1040
1041 while ((ch = src[src_idx++]) != 0) {
1042 dest_off = 0;
1043 if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
1044 break;
1045 dest_size += dest_off;
1046 }
1047
1048 str = malloc(dest_size + 1);
1049 if (str == NULL)
1050 return NULL;
1051
1052 /* Encode string. */
1053
1054 src_idx = 0;
1055 dest_off = 0;
1056
1057 while ((ch = src[src_idx++]) != 0) {
1058 if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
1059 break;
1060 }
1061
1062 str[dest_size] = '\0';
1063 return str;
1064}
1065
[da2bd08]1066/** Convert string to wide string.
1067 *
1068 * Convert string @a src to wide string. The output is written to the
[0f06dbc]1069 * buffer specified by @a dest and @a dlen. @a dlen must be non-zero
1070 * and the wide string written will always be null-terminated.
[da2bd08]1071 *
1072 * @param dest Destination buffer.
1073 * @param dlen Length of destination buffer (number of wchars).
1074 * @param src Source string.
1075 */
[81e9cb3]1076void str_to_wstr(wchar_t *dest, size_t dlen, const char *src)
[da2bd08]1077{
1078 size_t offset;
1079 size_t di;
1080 wchar_t c;
1081
1082 assert(dlen > 0);
1083
1084 offset = 0;
1085 di = 0;
1086
1087 do {
[81e9cb3]1088 if (di >= dlen - 1)
[da2bd08]1089 break;
1090
1091 c = str_decode(src, &offset, STR_NO_LIMIT);
1092 dest[di++] = c;
1093 } while (c != '\0');
1094
1095 dest[dlen - 1] = '\0';
1096}
1097
[22cf42d9]1098/** Convert string to wide string.
1099 *
1100 * Convert string @a src to wide string. A new wide NULL-terminated
1101 * string will be allocated on the heap.
1102 *
1103 * @param src Source string.
1104 */
1105wchar_t *str_to_awstr(const char *str)
1106{
1107 size_t len = str_length(str);
[a35b458]1108
[1433ecda]1109 wchar_t *wstr = calloc(len + 1, sizeof(wchar_t));
[b48d046]1110 if (wstr == NULL)
1111 return NULL;
[a35b458]1112
[b48d046]1113 str_to_wstr(wstr, len + 1, str);
[22cf42d9]1114 return wstr;
1115}
1116
[f2b8cdc]1117/** Find first occurence of character in string.
1118 *
1119 * @param str String to search.
1120 * @param ch Character to look for.
1121 *
1122 * @return Pointer to character in @a str or NULL if not found.
1123 */
[dd2cfa7]1124char *str_chr(const char *str, wchar_t ch)
[f2b8cdc]1125{
1126 wchar_t acc;
1127 size_t off = 0;
[f2d2c7ba]1128 size_t last = 0;
[a35b458]1129
[f2b8cdc]1130 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
1131 if (acc == ch)
[dd2cfa7]1132 return (char *) (str + last);
[f2d2c7ba]1133 last = off;
[f2b8cdc]1134 }
[a35b458]1135
[f2b8cdc]1136 return NULL;
1137}
1138
[da680b4b]1139/** Find first occurence of substring in string.
1140 *
1141 * @param hs Haystack (string)
1142 * @param n Needle (substring to look for)
1143 *
1144 * @return Pointer to character in @a hs or @c NULL if not found.
1145 */
1146char *str_str(const char *hs, const char *n)
1147{
1148 size_t off = 0;
1149
1150 if (str_lcmp(hs, n, str_length(n)) == 0)
1151 return (char *)hs;
1152
1153 while (str_decode(hs, &off, STR_NO_LIMIT) != 0) {
1154 if (str_lcmp(hs + off, n, str_length(n)) == 0)
1155 return (char *)(hs + off);
1156 }
1157
1158 return NULL;
1159}
1160
[1737bfb]1161/** Removes specified trailing characters from a string.
1162 *
1163 * @param str String to remove from.
1164 * @param ch Character to remove.
1165 */
1166void str_rtrim(char *str, wchar_t ch)
1167{
1168 size_t off = 0;
1169 size_t pos = 0;
1170 wchar_t c;
1171 bool update_last_chunk = true;
1172 char *last_chunk = NULL;
1173
1174 while ((c = str_decode(str, &off, STR_NO_LIMIT))) {
1175 if (c != ch) {
1176 update_last_chunk = true;
1177 last_chunk = NULL;
1178 } else if (update_last_chunk) {
1179 update_last_chunk = false;
1180 last_chunk = (str + pos);
1181 }
1182 pos = off;
1183 }
1184
1185 if (last_chunk)
1186 *last_chunk = '\0';
1187}
1188
1189/** Removes specified leading characters from a string.
1190 *
1191 * @param str String to remove from.
1192 * @param ch Character to remove.
1193 */
1194void str_ltrim(char *str, wchar_t ch)
1195{
1196 wchar_t acc;
1197 size_t off = 0;
1198 size_t pos = 0;
1199 size_t str_sz = str_size(str);
1200
1201 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
1202 if (acc != ch)
1203 break;
1204 else
1205 pos = off;
1206 }
1207
1208 if (pos > 0) {
1209 memmove(str, &str[pos], str_sz - pos);
1210 pos = str_sz - pos;
[a18a8b9]1211 str[pos] = '\0';
[1737bfb]1212 }
1213}
1214
[7afb4a5]1215/** Find last occurence of character in string.
1216 *
1217 * @param str String to search.
1218 * @param ch Character to look for.
1219 *
1220 * @return Pointer to character in @a str or NULL if not found.
1221 */
[dd2cfa7]1222char *str_rchr(const char *str, wchar_t ch)
[7afb4a5]1223{
1224 wchar_t acc;
1225 size_t off = 0;
[f2d2c7ba]1226 size_t last = 0;
[d4a3ee5]1227 const char *res = NULL;
[a35b458]1228
[7afb4a5]1229 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
1230 if (acc == ch)
[f2d2c7ba]1231 res = (str + last);
1232 last = off;
[7afb4a5]1233 }
[a35b458]1234
[dd2cfa7]1235 return (char *) res;
[7afb4a5]1236}
1237
[f2b8cdc]1238/** Insert a wide character into a wide string.
1239 *
1240 * Insert a wide character into a wide string at position
1241 * @a pos. The characters after the position are shifted.
1242 *
1243 * @param str String to insert to.
1244 * @param ch Character to insert to.
1245 * @param pos Character index where to insert.
[7c3fb9b]1246 * @param max_pos Characters in the buffer.
[f2b8cdc]1247 *
1248 * @return True if the insertion was sucessful, false if the position
1249 * is out of bounds.
1250 *
1251 */
[d4a3ee5]1252bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
[f2b8cdc]1253{
[d4a3ee5]1254 size_t len = wstr_length(str);
[a35b458]1255
[f2b8cdc]1256 if ((pos > len) || (pos + 1 > max_pos))
1257 return false;
[a35b458]1258
[d4a3ee5]1259 size_t i;
[f2b8cdc]1260 for (i = len; i + 1 > pos; i--)
1261 str[i + 1] = str[i];
[a35b458]1262
[f2b8cdc]1263 str[pos] = ch;
[a35b458]1264
[f2b8cdc]1265 return true;
1266}
1267
1268/** Remove a wide character from a wide string.
1269 *
1270 * Remove a wide character from a wide string at position
1271 * @a pos. The characters after the position are shifted.
1272 *
1273 * @param str String to remove from.
1274 * @param pos Character index to remove.
1275 *
1276 * @return True if the removal was sucessful, false if the position
1277 * is out of bounds.
1278 *
1279 */
[d4a3ee5]1280bool wstr_remove(wchar_t *str, size_t pos)
[f2b8cdc]1281{
[d4a3ee5]1282 size_t len = wstr_length(str);
[a35b458]1283
[f2b8cdc]1284 if (pos >= len)
1285 return false;
[a35b458]1286
[d4a3ee5]1287 size_t i;
[f2b8cdc]1288 for (i = pos + 1; i <= len; i++)
1289 str[i - 1] = str[i];
[a35b458]1290
[f2b8cdc]1291 return true;
1292}
1293
[abf09311]1294/** Duplicate string.
1295 *
1296 * Allocate a new string and copy characters from the source
1297 * string into it. The duplicate string is allocated via sleeping
1298 * malloc(), thus this function can sleep in no memory conditions.
1299 *
1300 * The allocation cannot fail and the return value is always
1301 * a valid pointer. The duplicate string is always a well-formed
1302 * null-terminated UTF-8 string, but it can differ from the source
1303 * string on the byte level.
1304 *
1305 * @param src Source string.
1306 *
1307 * @return Duplicate string.
1308 *
1309 */
[fc6dd18]1310char *str_dup(const char *src)
1311{
[abf09311]1312 size_t size = str_size(src) + 1;
1313 char *dest = (char *) malloc(size);
[fc6dd18]1314 if (dest == NULL)
1315 return (char *) NULL;
[a35b458]1316
[abf09311]1317 str_cpy(dest, size, src);
1318 return dest;
[fc6dd18]1319}
1320
[abf09311]1321/** Duplicate string with size limit.
1322 *
1323 * Allocate a new string and copy up to @max_size bytes from the source
1324 * string into it. The duplicate string is allocated via sleeping
1325 * malloc(), thus this function can sleep in no memory conditions.
1326 * No more than @max_size + 1 bytes is allocated, but if the size
1327 * occupied by the source string is smaller than @max_size + 1,
1328 * less is allocated.
1329 *
1330 * The allocation cannot fail and the return value is always
1331 * a valid pointer. The duplicate string is always a well-formed
1332 * null-terminated UTF-8 string, but it can differ from the source
1333 * string on the byte level.
1334 *
1335 * @param src Source string.
1336 * @param n Maximum number of bytes to duplicate.
1337 *
1338 * @return Duplicate string.
1339 *
1340 */
1341char *str_ndup(const char *src, size_t n)
[fc6dd18]1342{
1343 size_t size = str_size(src);
[abf09311]1344 if (size > n)
1345 size = n;
[a35b458]1346
[fc6dd18]1347 char *dest = (char *) malloc(size + 1);
1348 if (dest == NULL)
1349 return (char *) NULL;
[a35b458]1350
[abf09311]1351 str_ncpy(dest, size + 1, src, size);
[fc6dd18]1352 return dest;
1353}
1354
[ee3f6f6]1355/** Split string by delimiters.
1356 *
1357 * @param s String to be tokenized. May not be NULL.
1358 * @param delim String with the delimiters.
1359 * @param next Variable which will receive the pointer to the
1360 * continuation of the string following the first
1361 * occurrence of any of the delimiter characters.
1362 * May be NULL.
1363 * @return Pointer to the prefix of @a s before the first
1364 * delimiter character. NULL if no such prefix
1365 * exists.
1366 */
1367char *str_tok(char *s, const char *delim, char **next)
[576845ec]1368{
1369 char *start, *end;
[69df837f]1370
[ee3f6f6]1371 if (!s)
1372 return NULL;
[a35b458]1373
[ee3f6f6]1374 size_t len = str_size(s);
1375 size_t cur;
1376 size_t tmp;
1377 wchar_t ch;
[69df837f]1378
[576845ec]1379 /* Skip over leading delimiters. */
[948222e4]1380 tmp = 0;
1381 cur = 0;
1382 while ((ch = str_decode(s, &tmp, len)) && str_chr(delim, ch))
[ee3f6f6]1383 cur = tmp;
1384 start = &s[cur];
[69df837f]1385
[576845ec]1386 /* Skip over token characters. */
[948222e4]1387 tmp = cur;
1388 while ((ch = str_decode(s, &tmp, len)) && !str_chr(delim, ch))
[ee3f6f6]1389 cur = tmp;
1390 end = &s[cur];
1391 if (next)
1392 *next = (ch ? &s[tmp] : &s[cur]);
1393
1394 if (start == end)
[576845ec]1395 return NULL; /* No more tokens. */
[69df837f]1396
[576845ec]1397 /* Overwrite delimiter with NULL terminator. */
1398 *end = '\0';
1399 return start;
[69df837f]1400}
1401
[d47279b]1402/** Convert string to uint64_t (internal variant).
1403 *
1404 * @param nptr Pointer to string.
1405 * @param endptr Pointer to the first invalid character is stored here.
1406 * @param base Zero or number between 2 and 36 inclusive.
1407 * @param neg Indication of unary minus is stored here.
1408 * @apram result Result of the conversion.
1409 *
1410 * @return EOK if conversion was successful.
1411 *
1412 */
[b7fd2a0]1413static errno_t str_uint(const char *nptr, char **endptr, unsigned int base,
[d47279b]1414 bool *neg, uint64_t *result)
1415{
1416 assert(endptr != NULL);
1417 assert(neg != NULL);
1418 assert(result != NULL);
[a35b458]1419
[d47279b]1420 *neg = false;
1421 const char *str = nptr;
[a35b458]1422
[d47279b]1423 /* Ignore leading whitespace */
1424 while (isspace(*str))
1425 str++;
[a35b458]1426
[d47279b]1427 if (*str == '-') {
1428 *neg = true;
1429 str++;
1430 } else if (*str == '+')
1431 str++;
[a35b458]1432
[d47279b]1433 if (base == 0) {
1434 /* Decode base if not specified */
1435 base = 10;
[a35b458]1436
[d47279b]1437 if (*str == '0') {
1438 base = 8;
1439 str++;
[a35b458]1440
[d47279b]1441 switch (*str) {
1442 case 'b':
1443 case 'B':
1444 base = 2;
1445 str++;
1446 break;
1447 case 'o':
1448 case 'O':
1449 base = 8;
1450 str++;
1451 break;
1452 case 'd':
1453 case 'D':
1454 case 't':
1455 case 'T':
1456 base = 10;
1457 str++;
1458 break;
1459 case 'x':
1460 case 'X':
1461 base = 16;
1462 str++;
1463 break;
1464 default:
1465 str--;
1466 }
1467 }
1468 } else {
1469 /* Check base range */
1470 if ((base < 2) || (base > 36)) {
1471 *endptr = (char *) str;
1472 return EINVAL;
1473 }
1474 }
[a35b458]1475
[d47279b]1476 *result = 0;
1477 const char *startstr = str;
[a35b458]1478
[d47279b]1479 while (*str != 0) {
1480 unsigned int digit;
[a35b458]1481
[d47279b]1482 if ((*str >= 'a') && (*str <= 'z'))
1483 digit = *str - 'a' + 10;
1484 else if ((*str >= 'A') && (*str <= 'Z'))
1485 digit = *str - 'A' + 10;
1486 else if ((*str >= '0') && (*str <= '9'))
1487 digit = *str - '0';
1488 else
1489 break;
[a35b458]1490
[d47279b]1491 if (digit >= base)
1492 break;
[a35b458]1493
[d47279b]1494 uint64_t prev = *result;
1495 *result = (*result) * base + digit;
[a35b458]1496
[d47279b]1497 if (*result < prev) {
1498 /* Overflow */
1499 *endptr = (char *) str;
1500 return EOVERFLOW;
1501 }
[a35b458]1502
[d47279b]1503 str++;
1504 }
[a35b458]1505
[d47279b]1506 if (str == startstr) {
1507 /*
1508 * No digits were decoded => first invalid character is
1509 * the first character of the string.
1510 */
1511 str = nptr;
1512 }
[a35b458]1513
[d47279b]1514 *endptr = (char *) str;
[a35b458]1515
[d47279b]1516 if (str == nptr)
1517 return EINVAL;
[a35b458]1518
[d47279b]1519 return EOK;
1520}
1521
[d7f6248]1522/** Convert string to uint8_t.
1523 *
1524 * @param nptr Pointer to string.
1525 * @param endptr If not NULL, pointer to the first invalid character
1526 * is stored here.
1527 * @param base Zero or number between 2 and 36 inclusive.
1528 * @param strict Do not allow any trailing characters.
1529 * @param result Result of the conversion.
1530 *
1531 * @return EOK if conversion was successful.
1532 *
1533 */
[b7fd2a0]1534errno_t str_uint8_t(const char *nptr, const char **endptr, unsigned int base,
[d7f6248]1535 bool strict, uint8_t *result)
1536{
1537 assert(result != NULL);
[a35b458]1538
[d7f6248]1539 bool neg;
1540 char *lendptr;
1541 uint64_t res;
[b7fd2a0]1542 errno_t ret = str_uint(nptr, &lendptr, base, &neg, &res);
[a35b458]1543
[d7f6248]1544 if (endptr != NULL)
1545 *endptr = (char *) lendptr;
[a35b458]1546
[d7f6248]1547 if (ret != EOK)
1548 return ret;
[a35b458]1549
[d7f6248]1550 /* Do not allow negative values */
1551 if (neg)
1552 return EINVAL;
[a35b458]1553
[7c3fb9b]1554 /*
1555 * Check whether we are at the end of
1556 * the string in strict mode
1557 */
[d7f6248]1558 if ((strict) && (*lendptr != 0))
1559 return EINVAL;
[a35b458]1560
[d7f6248]1561 /* Check for overflow */
1562 uint8_t _res = (uint8_t) res;
1563 if (_res != res)
1564 return EOVERFLOW;
[a35b458]1565
[d7f6248]1566 *result = _res;
[a35b458]1567
[d7f6248]1568 return EOK;
1569}
1570
1571/** Convert string to uint16_t.
1572 *
1573 * @param nptr Pointer to string.
1574 * @param endptr If not NULL, pointer to the first invalid character
1575 * is stored here.
1576 * @param base Zero or number between 2 and 36 inclusive.
1577 * @param strict Do not allow any trailing characters.
1578 * @param result Result of the conversion.
1579 *
1580 * @return EOK if conversion was successful.
1581 *
1582 */
[b7fd2a0]1583errno_t str_uint16_t(const char *nptr, const char **endptr, unsigned int base,
[d7f6248]1584 bool strict, uint16_t *result)
1585{
1586 assert(result != NULL);
[a35b458]1587
[d7f6248]1588 bool neg;
1589 char *lendptr;
1590 uint64_t res;
[b7fd2a0]1591 errno_t ret = str_uint(nptr, &lendptr, base, &neg, &res);
[a35b458]1592
[d7f6248]1593 if (endptr != NULL)
1594 *endptr = (char *) lendptr;
[a35b458]1595
[d7f6248]1596 if (ret != EOK)
1597 return ret;
[a35b458]1598
[d7f6248]1599 /* Do not allow negative values */
1600 if (neg)
1601 return EINVAL;
[a35b458]1602
[7c3fb9b]1603 /*
1604 * Check whether we are at the end of
1605 * the string in strict mode
1606 */
[d7f6248]1607 if ((strict) && (*lendptr != 0))
1608 return EINVAL;
[a35b458]1609
[d7f6248]1610 /* Check for overflow */
1611 uint16_t _res = (uint16_t) res;
1612 if (_res != res)
1613 return EOVERFLOW;
[a35b458]1614
[d7f6248]1615 *result = _res;
[a35b458]1616
[d7f6248]1617 return EOK;
1618}
1619
1620/** Convert string to uint32_t.
1621 *
1622 * @param nptr Pointer to string.
1623 * @param endptr If not NULL, pointer to the first invalid character
1624 * is stored here.
1625 * @param base Zero or number between 2 and 36 inclusive.
1626 * @param strict Do not allow any trailing characters.
1627 * @param result Result of the conversion.
1628 *
1629 * @return EOK if conversion was successful.
1630 *
1631 */
[b7fd2a0]1632errno_t str_uint32_t(const char *nptr, const char **endptr, unsigned int base,
[d7f6248]1633 bool strict, uint32_t *result)
1634{
1635 assert(result != NULL);
[a35b458]1636
[d7f6248]1637 bool neg;
1638 char *lendptr;
1639 uint64_t res;
[b7fd2a0]1640 errno_t ret = str_uint(nptr, &lendptr, base, &neg, &res);
[a35b458]1641
[d7f6248]1642 if (endptr != NULL)
1643 *endptr = (char *) lendptr;
[a35b458]1644
[d7f6248]1645 if (ret != EOK)
1646 return ret;
[a35b458]1647
[d7f6248]1648 /* Do not allow negative values */
1649 if (neg)
1650 return EINVAL;
[a35b458]1651
[7c3fb9b]1652 /*
1653 * Check whether we are at the end of
1654 * the string in strict mode
1655 */
[d7f6248]1656 if ((strict) && (*lendptr != 0))
1657 return EINVAL;
[a35b458]1658
[d7f6248]1659 /* Check for overflow */
1660 uint32_t _res = (uint32_t) res;
1661 if (_res != res)
1662 return EOVERFLOW;
[a35b458]1663
[d7f6248]1664 *result = _res;
[a35b458]1665
[d7f6248]1666 return EOK;
1667}
1668
[d47279b]1669/** Convert string to uint64_t.
1670 *
1671 * @param nptr Pointer to string.
1672 * @param endptr If not NULL, pointer to the first invalid character
1673 * is stored here.
1674 * @param base Zero or number between 2 and 36 inclusive.
1675 * @param strict Do not allow any trailing characters.
1676 * @param result Result of the conversion.
1677 *
1678 * @return EOK if conversion was successful.
1679 *
1680 */
[b7fd2a0]1681errno_t str_uint64_t(const char *nptr, const char **endptr, unsigned int base,
[d47279b]1682 bool strict, uint64_t *result)
1683{
1684 assert(result != NULL);
[a35b458]1685
[d47279b]1686 bool neg;
1687 char *lendptr;
[b7fd2a0]1688 errno_t ret = str_uint(nptr, &lendptr, base, &neg, result);
[a35b458]1689
[d47279b]1690 if (endptr != NULL)
1691 *endptr = (char *) lendptr;
[a35b458]1692
[d47279b]1693 if (ret != EOK)
1694 return ret;
[a35b458]1695
[d47279b]1696 /* Do not allow negative values */
1697 if (neg)
1698 return EINVAL;
[a35b458]1699
[7c3fb9b]1700 /*
1701 * Check whether we are at the end of
1702 * the string in strict mode
1703 */
[d47279b]1704 if ((strict) && (*lendptr != 0))
1705 return EINVAL;
[a35b458]1706
[d47279b]1707 return EOK;
1708}
1709
[af8bda0]1710/** Convert string to int64_t.
1711 *
1712 * @param nptr Pointer to string.
1713 * @param endptr If not NULL, pointer to the first invalid character
1714 * is stored here.
1715 * @param base Zero or number between 2 and 36 inclusive.
1716 * @param strict Do not allow any trailing characters.
1717 * @param result Result of the conversion.
1718 *
1719 * @return EOK if conversion was successful.
1720 *
1721 */
1722int str_int64_t(const char *nptr, const char **endptr, unsigned int base,
1723 bool strict, int64_t *result)
1724{
1725 assert(result != NULL);
1726
1727 bool neg;
1728 char *lendptr;
1729 uint64_t unsigned_result;
1730 int ret = str_uint(nptr, &lendptr, base, &neg, &unsigned_result);
1731
1732 if (endptr != NULL)
1733 *endptr = (char *) lendptr;
1734
1735 if (ret != EOK)
1736 return ret;
1737
1738 /* Do not allow negative values */
1739 if (neg) {
1740 if (unsigned_result == UINT64_MAX)
1741 return EINVAL;
1742
[be0f5e4]1743 *result = -(int64_t) unsigned_result;
[af8bda0]1744 } else
1745 *result = unsigned_result;
1746
[1583793]1747 /*
1748 * Check whether we are at the end of
1749 * the string in strict mode
1750 */
[af8bda0]1751 if ((strict) && (*lendptr != 0))
1752 return EINVAL;
1753
1754 return EOK;
1755}
1756
[d47279b]1757/** Convert string to size_t.
1758 *
1759 * @param nptr Pointer to string.
1760 * @param endptr If not NULL, pointer to the first invalid character
1761 * is stored here.
1762 * @param base Zero or number between 2 and 36 inclusive.
1763 * @param strict Do not allow any trailing characters.
1764 * @param result Result of the conversion.
1765 *
1766 * @return EOK if conversion was successful.
1767 *
1768 */
[b7fd2a0]1769errno_t str_size_t(const char *nptr, const char **endptr, unsigned int base,
[d47279b]1770 bool strict, size_t *result)
1771{
1772 assert(result != NULL);
[a35b458]1773
[d47279b]1774 bool neg;
1775 char *lendptr;
1776 uint64_t res;
[b7fd2a0]1777 errno_t ret = str_uint(nptr, &lendptr, base, &neg, &res);
[a35b458]1778
[d47279b]1779 if (endptr != NULL)
1780 *endptr = (char *) lendptr;
[a35b458]1781
[d47279b]1782 if (ret != EOK)
1783 return ret;
[a35b458]1784
[d47279b]1785 /* Do not allow negative values */
1786 if (neg)
1787 return EINVAL;
[a35b458]1788
[7c3fb9b]1789 /*
1790 * Check whether we are at the end of
1791 * the string in strict mode
1792 */
[d47279b]1793 if ((strict) && (*lendptr != 0))
1794 return EINVAL;
[a35b458]1795
[d47279b]1796 /* Check for overflow */
1797 size_t _res = (size_t) res;
1798 if (_res != res)
1799 return EOVERFLOW;
[a35b458]1800
[d47279b]1801 *result = _res;
[a35b458]1802
[d47279b]1803 return EOK;
1804}
1805
[e535eeb]1806void order_suffix(const uint64_t val, uint64_t *rv, char *suffix)
1807{
[933cadf]1808 if (val > UINT64_C(10000000000000000000)) {
1809 *rv = val / UINT64_C(1000000000000000000);
[e535eeb]1810 *suffix = 'Z';
[933cadf]1811 } else if (val > UINT64_C(1000000000000000000)) {
1812 *rv = val / UINT64_C(1000000000000000);
[e535eeb]1813 *suffix = 'E';
[933cadf]1814 } else if (val > UINT64_C(1000000000000000)) {
1815 *rv = val / UINT64_C(1000000000000);
[e535eeb]1816 *suffix = 'T';
[933cadf]1817 } else if (val > UINT64_C(1000000000000)) {
1818 *rv = val / UINT64_C(1000000000);
[e535eeb]1819 *suffix = 'G';
[933cadf]1820 } else if (val > UINT64_C(1000000000)) {
1821 *rv = val / UINT64_C(1000000);
[e535eeb]1822 *suffix = 'M';
[933cadf]1823 } else if (val > UINT64_C(1000000)) {
1824 *rv = val / UINT64_C(1000);
[e535eeb]1825 *suffix = 'k';
1826 } else {
1827 *rv = val;
1828 *suffix = ' ';
1829 }
1830}
1831
[933cadf]1832void bin_order_suffix(const uint64_t val, uint64_t *rv, const char **suffix,
1833 bool fixed)
1834{
1835 if (val > UINT64_C(1152921504606846976)) {
1836 *rv = val / UINT64_C(1125899906842624);
1837 *suffix = "EiB";
1838 } else if (val > UINT64_C(1125899906842624)) {
1839 *rv = val / UINT64_C(1099511627776);
1840 *suffix = "TiB";
1841 } else if (val > UINT64_C(1099511627776)) {
1842 *rv = val / UINT64_C(1073741824);
1843 *suffix = "GiB";
1844 } else if (val > UINT64_C(1073741824)) {
1845 *rv = val / UINT64_C(1048576);
1846 *suffix = "MiB";
1847 } else if (val > UINT64_C(1048576)) {
1848 *rv = val / UINT64_C(1024);
1849 *suffix = "KiB";
1850 } else {
1851 *rv = val;
1852 if (fixed)
1853 *suffix = "B ";
1854 else
1855 *suffix = "B";
1856 }
1857}
1858
[a46da63]1859/** @}
[b2951e2]1860 */
Note: See TracBrowser for help on using the repository browser.