source: mainline/uspace/lib/c/generic/str.c@ 933cadf

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 933cadf was 933cadf, checked in by Martin Decky <martin@…>, 14 years ago

use binary suffixes in printouts where appropriate

  • Property mode set to 100644
File size: 28.8 KB
Line 
1/*
2 * Copyright (c) 2005 Martin Decky
3 * Copyright (c) 2008 Jiri Svoboda
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/** @addtogroup libc
31 * @{
32 */
33/** @file
34 */
35
36#include <str.h>
37#include <stdlib.h>
38#include <assert.h>
39#include <stdint.h>
40#include <ctype.h>
41#include <malloc.h>
42#include <errno.h>
43#include <align.h>
44#include <mem.h>
45#include <str.h>
46
47/** Byte mask consisting of lowest @n bits (out of 8) */
48#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
49
50/** Byte mask consisting of lowest @n bits (out of 32) */
51#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
52
53/** Byte mask consisting of highest @n bits (out of 8) */
54#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
55
56/** Number of data bits in a UTF-8 continuation byte */
57#define CONT_BITS 6
58
59/** Decode a single character from a string.
60 *
61 * Decode a single character from a string of size @a size. Decoding starts
62 * at @a offset and this offset is moved to the beginning of the next
63 * character. In case of decoding error, offset generally advances at least
64 * by one. However, offset is never moved beyond size.
65 *
66 * @param str String (not necessarily NULL-terminated).
67 * @param offset Byte offset in string where to start decoding.
68 * @param size Size of the string (in bytes).
69 *
70 * @return Value of decoded character, U_SPECIAL on decoding error or
71 * NULL if attempt to decode beyond @a size.
72 *
73 */
74wchar_t str_decode(const char *str, size_t *offset, size_t size)
75{
76 if (*offset + 1 > size)
77 return 0;
78
79 /* First byte read from string */
80 uint8_t b0 = (uint8_t) str[(*offset)++];
81
82 /* Determine code length */
83
84 unsigned int b0_bits; /* Data bits in first byte */
85 unsigned int cbytes; /* Number of continuation bytes */
86
87 if ((b0 & 0x80) == 0) {
88 /* 0xxxxxxx (Plain ASCII) */
89 b0_bits = 7;
90 cbytes = 0;
91 } else if ((b0 & 0xe0) == 0xc0) {
92 /* 110xxxxx 10xxxxxx */
93 b0_bits = 5;
94 cbytes = 1;
95 } else if ((b0 & 0xf0) == 0xe0) {
96 /* 1110xxxx 10xxxxxx 10xxxxxx */
97 b0_bits = 4;
98 cbytes = 2;
99 } else if ((b0 & 0xf8) == 0xf0) {
100 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
101 b0_bits = 3;
102 cbytes = 3;
103 } else {
104 /* 10xxxxxx -- unexpected continuation byte */
105 return U_SPECIAL;
106 }
107
108 if (*offset + cbytes > size)
109 return U_SPECIAL;
110
111 wchar_t ch = b0 & LO_MASK_8(b0_bits);
112
113 /* Decode continuation bytes */
114 while (cbytes > 0) {
115 uint8_t b = (uint8_t) str[(*offset)++];
116
117 /* Must be 10xxxxxx */
118 if ((b & 0xc0) != 0x80)
119 return U_SPECIAL;
120
121 /* Shift data bits to ch */
122 ch = (ch << CONT_BITS) | (wchar_t) (b & LO_MASK_8(CONT_BITS));
123 cbytes--;
124 }
125
126 return ch;
127}
128
129/** Encode a single character to string representation.
130 *
131 * Encode a single character to string representation (i.e. UTF-8) and store
132 * it into a buffer at @a offset. Encoding starts at @a offset and this offset
133 * is moved to the position where the next character can be written to.
134 *
135 * @param ch Input character.
136 * @param str Output buffer.
137 * @param offset Byte offset where to start writing.
138 * @param size Size of the output buffer (in bytes).
139 *
140 * @return EOK if the character was encoded successfully, EOVERFLOW if there
141 * was not enough space in the output buffer or EINVAL if the character
142 * code was invalid.
143 */
144int chr_encode(const wchar_t ch, char *str, size_t *offset, size_t size)
145{
146 if (*offset >= size)
147 return EOVERFLOW;
148
149 if (!chr_check(ch))
150 return EINVAL;
151
152 /* Unsigned version of ch (bit operations should only be done
153 on unsigned types). */
154 uint32_t cc = (uint32_t) ch;
155
156 /* Determine how many continuation bytes are needed */
157
158 unsigned int b0_bits; /* Data bits in first byte */
159 unsigned int cbytes; /* Number of continuation bytes */
160
161 if ((cc & ~LO_MASK_32(7)) == 0) {
162 b0_bits = 7;
163 cbytes = 0;
164 } else if ((cc & ~LO_MASK_32(11)) == 0) {
165 b0_bits = 5;
166 cbytes = 1;
167 } else if ((cc & ~LO_MASK_32(16)) == 0) {
168 b0_bits = 4;
169 cbytes = 2;
170 } else if ((cc & ~LO_MASK_32(21)) == 0) {
171 b0_bits = 3;
172 cbytes = 3;
173 } else {
174 /* Codes longer than 21 bits are not supported */
175 return EINVAL;
176 }
177
178 /* Check for available space in buffer */
179 if (*offset + cbytes >= size)
180 return EOVERFLOW;
181
182 /* Encode continuation bytes */
183 unsigned int i;
184 for (i = cbytes; i > 0; i--) {
185 str[*offset + i] = 0x80 | (cc & LO_MASK_32(CONT_BITS));
186 cc = cc >> CONT_BITS;
187 }
188
189 /* Encode first byte */
190 str[*offset] = (cc & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1);
191
192 /* Advance offset */
193 *offset += cbytes + 1;
194
195 return EOK;
196}
197
198/** Get size of string.
199 *
200 * Get the number of bytes which are used by the string @a str (excluding the
201 * NULL-terminator).
202 *
203 * @param str String to consider.
204 *
205 * @return Number of bytes used by the string
206 *
207 */
208size_t str_size(const char *str)
209{
210 size_t size = 0;
211
212 while (*str++ != 0)
213 size++;
214
215 return size;
216}
217
218/** Get size of wide string.
219 *
220 * Get the number of bytes which are used by the wide string @a str (excluding the
221 * NULL-terminator).
222 *
223 * @param str Wide string to consider.
224 *
225 * @return Number of bytes used by the wide string
226 *
227 */
228size_t wstr_size(const wchar_t *str)
229{
230 return (wstr_length(str) * sizeof(wchar_t));
231}
232
233/** Get size of string with length limit.
234 *
235 * Get the number of bytes which are used by up to @a max_len first
236 * characters in the string @a str. If @a max_len is greater than
237 * the length of @a str, the entire string is measured (excluding the
238 * NULL-terminator).
239 *
240 * @param str String to consider.
241 * @param max_len Maximum number of characters to measure.
242 *
243 * @return Number of bytes used by the characters.
244 *
245 */
246size_t str_lsize(const char *str, size_t max_len)
247{
248 size_t len = 0;
249 size_t offset = 0;
250
251 while (len < max_len) {
252 if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
253 break;
254
255 len++;
256 }
257
258 return offset;
259}
260
261/** Get size of wide string with length limit.
262 *
263 * Get the number of bytes which are used by up to @a max_len first
264 * wide characters in the wide string @a str. If @a max_len is greater than
265 * the length of @a str, the entire wide string is measured (excluding the
266 * NULL-terminator).
267 *
268 * @param str Wide string to consider.
269 * @param max_len Maximum number of wide characters to measure.
270 *
271 * @return Number of bytes used by the wide characters.
272 *
273 */
274size_t wstr_lsize(const wchar_t *str, size_t max_len)
275{
276 return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
277}
278
279/** Get number of characters in a string.
280 *
281 * @param str NULL-terminated string.
282 *
283 * @return Number of characters in string.
284 *
285 */
286size_t str_length(const char *str)
287{
288 size_t len = 0;
289 size_t offset = 0;
290
291 while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
292 len++;
293
294 return len;
295}
296
297/** Get number of characters in a wide string.
298 *
299 * @param str NULL-terminated wide string.
300 *
301 * @return Number of characters in @a str.
302 *
303 */
304size_t wstr_length(const wchar_t *wstr)
305{
306 size_t len = 0;
307
308 while (*wstr++ != 0)
309 len++;
310
311 return len;
312}
313
314/** Get number of characters in a string with size limit.
315 *
316 * @param str NULL-terminated string.
317 * @param size Maximum number of bytes to consider.
318 *
319 * @return Number of characters in string.
320 *
321 */
322size_t str_nlength(const char *str, size_t size)
323{
324 size_t len = 0;
325 size_t offset = 0;
326
327 while (str_decode(str, &offset, size) != 0)
328 len++;
329
330 return len;
331}
332
333/** Get number of characters in a string with size limit.
334 *
335 * @param str NULL-terminated string.
336 * @param size Maximum number of bytes to consider.
337 *
338 * @return Number of characters in string.
339 *
340 */
341size_t wstr_nlength(const wchar_t *str, size_t size)
342{
343 size_t len = 0;
344 size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
345 size_t offset = 0;
346
347 while ((offset < limit) && (*str++ != 0)) {
348 len++;
349 offset += sizeof(wchar_t);
350 }
351
352 return len;
353}
354
355/** Check whether character is plain ASCII.
356 *
357 * @return True if character is plain ASCII.
358 *
359 */
360bool ascii_check(wchar_t ch)
361{
362 if ((ch >= 0) && (ch <= 127))
363 return true;
364
365 return false;
366}
367
368/** Check whether character is valid
369 *
370 * @return True if character is a valid Unicode code point.
371 *
372 */
373bool chr_check(wchar_t ch)
374{
375 if ((ch >= 0) && (ch <= 1114111))
376 return true;
377
378 return false;
379}
380
381/** Compare two NULL terminated strings.
382 *
383 * Do a char-by-char comparison of two NULL-terminated strings.
384 * The strings are considered equal iff they consist of the same
385 * characters on the minimum of their lengths.
386 *
387 * @param s1 First string to compare.
388 * @param s2 Second string to compare.
389 *
390 * @return 0 if the strings are equal, -1 if first is smaller,
391 * 1 if second smaller.
392 *
393 */
394int str_cmp(const char *s1, const char *s2)
395{
396 wchar_t c1 = 0;
397 wchar_t c2 = 0;
398
399 size_t off1 = 0;
400 size_t off2 = 0;
401
402 while (true) {
403 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
404 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
405
406 if (c1 < c2)
407 return -1;
408
409 if (c1 > c2)
410 return 1;
411
412 if (c1 == 0 || c2 == 0)
413 break;
414 }
415
416 return 0;
417}
418
419/** Compare two NULL terminated strings with length limit.
420 *
421 * Do a char-by-char comparison of two NULL-terminated strings.
422 * The strings are considered equal iff they consist of the same
423 * characters on the minimum of their lengths and the length limit.
424 *
425 * @param s1 First string to compare.
426 * @param s2 Second string to compare.
427 * @param max_len Maximum number of characters to consider.
428 *
429 * @return 0 if the strings are equal, -1 if first is smaller,
430 * 1 if second smaller.
431 *
432 */
433int str_lcmp(const char *s1, const char *s2, size_t max_len)
434{
435 wchar_t c1 = 0;
436 wchar_t c2 = 0;
437
438 size_t off1 = 0;
439 size_t off2 = 0;
440
441 size_t len = 0;
442
443 while (true) {
444 if (len >= max_len)
445 break;
446
447 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
448 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
449
450 if (c1 < c2)
451 return -1;
452
453 if (c1 > c2)
454 return 1;
455
456 if (c1 == 0 || c2 == 0)
457 break;
458
459 ++len;
460 }
461
462 return 0;
463
464}
465
466/** Copy string.
467 *
468 * Copy source string @a src to destination buffer @a dest.
469 * No more than @a size bytes are written. If the size of the output buffer
470 * is at least one byte, the output string will always be well-formed, i.e.
471 * null-terminated and containing only complete characters.
472 *
473 * @param dest Destination buffer.
474 * @param count Size of the destination buffer (must be > 0).
475 * @param src Source string.
476 */
477void str_cpy(char *dest, size_t size, const char *src)
478{
479 /* There must be space for a null terminator in the buffer. */
480 assert(size > 0);
481
482 size_t src_off = 0;
483 size_t dest_off = 0;
484
485 wchar_t ch;
486 while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
487 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
488 break;
489 }
490
491 dest[dest_off] = '\0';
492}
493
494/** Copy size-limited substring.
495 *
496 * Copy prefix of string @a src of max. size @a size to destination buffer
497 * @a dest. No more than @a size bytes are written. The output string will
498 * always be well-formed, i.e. null-terminated and containing only complete
499 * characters.
500 *
501 * No more than @a n bytes are read from the input string, so it does not
502 * have to be null-terminated.
503 *
504 * @param dest Destination buffer.
505 * @param count Size of the destination buffer (must be > 0).
506 * @param src Source string.
507 * @param n Maximum number of bytes to read from @a src.
508 */
509void str_ncpy(char *dest, size_t size, const char *src, size_t n)
510{
511 /* There must be space for a null terminator in the buffer. */
512 assert(size > 0);
513
514 size_t src_off = 0;
515 size_t dest_off = 0;
516
517 wchar_t ch;
518 while ((ch = str_decode(src, &src_off, n)) != 0) {
519 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
520 break;
521 }
522
523 dest[dest_off] = '\0';
524}
525
526/** Append one string to another.
527 *
528 * Append source string @a src to string in destination buffer @a dest.
529 * Size of the destination buffer is @a dest. If the size of the output buffer
530 * is at least one byte, the output string will always be well-formed, i.e.
531 * null-terminated and containing only complete characters.
532 *
533 * @param dest Destination buffer.
534 * @param count Size of the destination buffer.
535 * @param src Source string.
536 */
537void str_append(char *dest, size_t size, const char *src)
538{
539 size_t dstr_size;
540
541 dstr_size = str_size(dest);
542 str_cpy(dest + dstr_size, size - dstr_size, src);
543}
544
545/** Convert wide string to string.
546 *
547 * Convert wide string @a src to string. The output is written to the buffer
548 * specified by @a dest and @a size. @a size must be non-zero and the string
549 * written will always be well-formed.
550 *
551 * @param dest Destination buffer.
552 * @param size Size of the destination buffer.
553 * @param src Source wide string.
554 */
555void wstr_to_str(char *dest, size_t size, const wchar_t *src)
556{
557 wchar_t ch;
558 size_t src_idx;
559 size_t dest_off;
560
561 /* There must be space for a null terminator in the buffer. */
562 assert(size > 0);
563
564 src_idx = 0;
565 dest_off = 0;
566
567 while ((ch = src[src_idx++]) != 0) {
568 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
569 break;
570 }
571
572 dest[dest_off] = '\0';
573}
574
575/** Convert wide string to new string.
576 *
577 * Convert wide string @a src to string. Space for the new string is allocated
578 * on the heap.
579 *
580 * @param src Source wide string.
581 * @return New string.
582 */
583char *wstr_to_astr(const wchar_t *src)
584{
585 char dbuf[STR_BOUNDS(1)];
586 char *str;
587 wchar_t ch;
588
589 size_t src_idx;
590 size_t dest_off;
591 size_t dest_size;
592
593 /* Compute size of encoded string. */
594
595 src_idx = 0;
596 dest_size = 0;
597
598 while ((ch = src[src_idx++]) != 0) {
599 dest_off = 0;
600 if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
601 break;
602 dest_size += dest_off;
603 }
604
605 str = malloc(dest_size + 1);
606 if (str == NULL)
607 return NULL;
608
609 /* Encode string. */
610
611 src_idx = 0;
612 dest_off = 0;
613
614 while ((ch = src[src_idx++]) != 0) {
615 if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
616 break;
617 }
618
619 str[dest_size] = '\0';
620 return str;
621}
622
623
624/** Convert string to wide string.
625 *
626 * Convert string @a src to wide string. The output is written to the
627 * buffer specified by @a dest and @a dlen. @a dlen must be non-zero
628 * and the wide string written will always be null-terminated.
629 *
630 * @param dest Destination buffer.
631 * @param dlen Length of destination buffer (number of wchars).
632 * @param src Source string.
633 */
634void str_to_wstr(wchar_t *dest, size_t dlen, const char *src)
635{
636 size_t offset;
637 size_t di;
638 wchar_t c;
639
640 assert(dlen > 0);
641
642 offset = 0;
643 di = 0;
644
645 do {
646 if (di >= dlen - 1)
647 break;
648
649 c = str_decode(src, &offset, STR_NO_LIMIT);
650 dest[di++] = c;
651 } while (c != '\0');
652
653 dest[dlen - 1] = '\0';
654}
655
656/** Find first occurence of character in string.
657 *
658 * @param str String to search.
659 * @param ch Character to look for.
660 *
661 * @return Pointer to character in @a str or NULL if not found.
662 */
663char *str_chr(const char *str, wchar_t ch)
664{
665 wchar_t acc;
666 size_t off = 0;
667 size_t last = 0;
668
669 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
670 if (acc == ch)
671 return (char *) (str + last);
672 last = off;
673 }
674
675 return NULL;
676}
677
678/** Find last occurence of character in string.
679 *
680 * @param str String to search.
681 * @param ch Character to look for.
682 *
683 * @return Pointer to character in @a str or NULL if not found.
684 */
685char *str_rchr(const char *str, wchar_t ch)
686{
687 wchar_t acc;
688 size_t off = 0;
689 size_t last = 0;
690 const char *res = NULL;
691
692 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
693 if (acc == ch)
694 res = (str + last);
695 last = off;
696 }
697
698 return (char *) res;
699}
700
701/** Insert a wide character into a wide string.
702 *
703 * Insert a wide character into a wide string at position
704 * @a pos. The characters after the position are shifted.
705 *
706 * @param str String to insert to.
707 * @param ch Character to insert to.
708 * @param pos Character index where to insert.
709 @ @param max_pos Characters in the buffer.
710 *
711 * @return True if the insertion was sucessful, false if the position
712 * is out of bounds.
713 *
714 */
715bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
716{
717 size_t len = wstr_length(str);
718
719 if ((pos > len) || (pos + 1 > max_pos))
720 return false;
721
722 size_t i;
723 for (i = len; i + 1 > pos; i--)
724 str[i + 1] = str[i];
725
726 str[pos] = ch;
727
728 return true;
729}
730
731/** Remove a wide character from a wide string.
732 *
733 * Remove a wide character from a wide string at position
734 * @a pos. The characters after the position are shifted.
735 *
736 * @param str String to remove from.
737 * @param pos Character index to remove.
738 *
739 * @return True if the removal was sucessful, false if the position
740 * is out of bounds.
741 *
742 */
743bool wstr_remove(wchar_t *str, size_t pos)
744{
745 size_t len = wstr_length(str);
746
747 if (pos >= len)
748 return false;
749
750 size_t i;
751 for (i = pos + 1; i <= len; i++)
752 str[i - 1] = str[i];
753
754 return true;
755}
756
757int stricmp(const char *a, const char *b)
758{
759 int c = 0;
760
761 while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
762 c++;
763
764 return (tolower(a[c]) - tolower(b[c]));
765}
766
767/** Convert string to a number.
768 * Core of strtol and strtoul functions.
769 *
770 * @param nptr Pointer to string.
771 * @param endptr If not NULL, function stores here pointer to the first
772 * invalid character.
773 * @param base Zero or number between 2 and 36 inclusive.
774 * @param sgn It's set to 1 if minus found.
775 * @return Result of conversion.
776 */
777static unsigned long
778_strtoul(const char *nptr, char **endptr, int base, char *sgn)
779{
780 unsigned char c;
781 unsigned long result = 0;
782 unsigned long a, b;
783 const char *str = nptr;
784 const char *tmpptr;
785
786 while (isspace(*str))
787 str++;
788
789 if (*str == '-') {
790 *sgn = 1;
791 ++str;
792 } else if (*str == '+')
793 ++str;
794
795 if (base) {
796 if ((base == 1) || (base > 36)) {
797 /* FIXME: set errno to EINVAL */
798 return 0;
799 }
800 if ((base == 16) && (*str == '0') && ((str[1] == 'x') ||
801 (str[1] == 'X'))) {
802 str += 2;
803 }
804 } else {
805 base = 10;
806
807 if (*str == '0') {
808 base = 8;
809 if ((str[1] == 'X') || (str[1] == 'x')) {
810 base = 16;
811 str += 2;
812 }
813 }
814 }
815
816 tmpptr = str;
817
818 while (*str) {
819 c = *str;
820 c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
821 (c <= '9' ? c - '0' : 0xff)));
822 if (c > base) {
823 break;
824 }
825
826 a = (result & 0xff) * base + c;
827 b = (result >> 8) * base + (a >> 8);
828
829 if (b > (ULONG_MAX >> 8)) {
830 /* overflow */
831 /* FIXME: errno = ERANGE*/
832 return ULONG_MAX;
833 }
834
835 result = (b << 8) + (a & 0xff);
836 ++str;
837 }
838
839 if (str == tmpptr) {
840 /*
841 * No number was found => first invalid character is the first
842 * character of the string.
843 */
844 /* FIXME: set errno to EINVAL */
845 str = nptr;
846 result = 0;
847 }
848
849 if (endptr)
850 *endptr = (char *) str;
851
852 if (nptr == str) {
853 /*FIXME: errno = EINVAL*/
854 return 0;
855 }
856
857 return result;
858}
859
860/** Convert initial part of string to long int according to given base.
861 * The number may begin with an arbitrary number of whitespaces followed by
862 * optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
863 * inserted and the number will be taken as hexadecimal one. If the base is 0
864 * and the number begin with a zero, number will be taken as octal one (as with
865 * base 8). Otherwise the base 0 is taken as decimal.
866 *
867 * @param nptr Pointer to string.
868 * @param endptr If not NULL, function stores here pointer to the first
869 * invalid character.
870 * @param base Zero or number between 2 and 36 inclusive.
871 * @return Result of conversion.
872 */
873long int strtol(const char *nptr, char **endptr, int base)
874{
875 char sgn = 0;
876 unsigned long number = 0;
877
878 number = _strtoul(nptr, endptr, base, &sgn);
879
880 if (number > LONG_MAX) {
881 if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
882 /* FIXME: set 0 to errno */
883 return number;
884 }
885 /* FIXME: set ERANGE to errno */
886 return (sgn ? LONG_MIN : LONG_MAX);
887 }
888
889 return (sgn ? -number : number);
890}
891
892/** Duplicate string.
893 *
894 * Allocate a new string and copy characters from the source
895 * string into it. The duplicate string is allocated via sleeping
896 * malloc(), thus this function can sleep in no memory conditions.
897 *
898 * The allocation cannot fail and the return value is always
899 * a valid pointer. The duplicate string is always a well-formed
900 * null-terminated UTF-8 string, but it can differ from the source
901 * string on the byte level.
902 *
903 * @param src Source string.
904 *
905 * @return Duplicate string.
906 *
907 */
908char *str_dup(const char *src)
909{
910 size_t size = str_size(src) + 1;
911 char *dest = (char *) malloc(size);
912 if (dest == NULL)
913 return (char *) NULL;
914
915 str_cpy(dest, size, src);
916 return dest;
917}
918
919/** Duplicate string with size limit.
920 *
921 * Allocate a new string and copy up to @max_size bytes from the source
922 * string into it. The duplicate string is allocated via sleeping
923 * malloc(), thus this function can sleep in no memory conditions.
924 * No more than @max_size + 1 bytes is allocated, but if the size
925 * occupied by the source string is smaller than @max_size + 1,
926 * less is allocated.
927 *
928 * The allocation cannot fail and the return value is always
929 * a valid pointer. The duplicate string is always a well-formed
930 * null-terminated UTF-8 string, but it can differ from the source
931 * string on the byte level.
932 *
933 * @param src Source string.
934 * @param n Maximum number of bytes to duplicate.
935 *
936 * @return Duplicate string.
937 *
938 */
939char *str_ndup(const char *src, size_t n)
940{
941 size_t size = str_size(src);
942 if (size > n)
943 size = n;
944
945 char *dest = (char *) malloc(size + 1);
946 if (dest == NULL)
947 return (char *) NULL;
948
949 str_ncpy(dest, size + 1, src, size);
950 return dest;
951}
952
953
954/** Convert initial part of string to unsigned long according to given base.
955 * The number may begin with an arbitrary number of whitespaces followed by
956 * optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
957 * inserted and the number will be taken as hexadecimal one. If the base is 0
958 * and the number begin with a zero, number will be taken as octal one (as with
959 * base 8). Otherwise the base 0 is taken as decimal.
960 *
961 * @param nptr Pointer to string.
962 * @param endptr If not NULL, function stores here pointer to the first
963 * invalid character
964 * @param base Zero or number between 2 and 36 inclusive.
965 * @return Result of conversion.
966 */
967unsigned long strtoul(const char *nptr, char **endptr, int base)
968{
969 char sgn = 0;
970 unsigned long number = 0;
971
972 number = _strtoul(nptr, endptr, base, &sgn);
973
974 return (sgn ? -number : number);
975}
976
977char *strtok(char *s, const char *delim)
978{
979 static char *next;
980
981 return strtok_r(s, delim, &next);
982}
983
984char *strtok_r(char *s, const char *delim, char **next)
985{
986 char *start, *end;
987
988 if (s == NULL)
989 s = *next;
990
991 /* Skip over leading delimiters. */
992 while (*s && (str_chr(delim, *s) != NULL)) ++s;
993 start = s;
994
995 /* Skip over token characters. */
996 while (*s && (str_chr(delim, *s) == NULL)) ++s;
997 end = s;
998 *next = (*s ? s + 1 : s);
999
1000 if (start == end) {
1001 return NULL; /* No more tokens. */
1002 }
1003
1004 /* Overwrite delimiter with NULL terminator. */
1005 *end = '\0';
1006 return start;
1007}
1008
1009/** Convert string to uint64_t (internal variant).
1010 *
1011 * @param nptr Pointer to string.
1012 * @param endptr Pointer to the first invalid character is stored here.
1013 * @param base Zero or number between 2 and 36 inclusive.
1014 * @param neg Indication of unary minus is stored here.
1015 * @apram result Result of the conversion.
1016 *
1017 * @return EOK if conversion was successful.
1018 *
1019 */
1020static int str_uint(const char *nptr, char **endptr, unsigned int base,
1021 bool *neg, uint64_t *result)
1022{
1023 assert(endptr != NULL);
1024 assert(neg != NULL);
1025 assert(result != NULL);
1026
1027 *neg = false;
1028 const char *str = nptr;
1029
1030 /* Ignore leading whitespace */
1031 while (isspace(*str))
1032 str++;
1033
1034 if (*str == '-') {
1035 *neg = true;
1036 str++;
1037 } else if (*str == '+')
1038 str++;
1039
1040 if (base == 0) {
1041 /* Decode base if not specified */
1042 base = 10;
1043
1044 if (*str == '0') {
1045 base = 8;
1046 str++;
1047
1048 switch (*str) {
1049 case 'b':
1050 case 'B':
1051 base = 2;
1052 str++;
1053 break;
1054 case 'o':
1055 case 'O':
1056 base = 8;
1057 str++;
1058 break;
1059 case 'd':
1060 case 'D':
1061 case 't':
1062 case 'T':
1063 base = 10;
1064 str++;
1065 break;
1066 case 'x':
1067 case 'X':
1068 base = 16;
1069 str++;
1070 break;
1071 default:
1072 str--;
1073 }
1074 }
1075 } else {
1076 /* Check base range */
1077 if ((base < 2) || (base > 36)) {
1078 *endptr = (char *) str;
1079 return EINVAL;
1080 }
1081 }
1082
1083 *result = 0;
1084 const char *startstr = str;
1085
1086 while (*str != 0) {
1087 unsigned int digit;
1088
1089 if ((*str >= 'a') && (*str <= 'z'))
1090 digit = *str - 'a' + 10;
1091 else if ((*str >= 'A') && (*str <= 'Z'))
1092 digit = *str - 'A' + 10;
1093 else if ((*str >= '0') && (*str <= '9'))
1094 digit = *str - '0';
1095 else
1096 break;
1097
1098 if (digit >= base)
1099 break;
1100
1101 uint64_t prev = *result;
1102 *result = (*result) * base + digit;
1103
1104 if (*result < prev) {
1105 /* Overflow */
1106 *endptr = (char *) str;
1107 return EOVERFLOW;
1108 }
1109
1110 str++;
1111 }
1112
1113 if (str == startstr) {
1114 /*
1115 * No digits were decoded => first invalid character is
1116 * the first character of the string.
1117 */
1118 str = nptr;
1119 }
1120
1121 *endptr = (char *) str;
1122
1123 if (str == nptr)
1124 return EINVAL;
1125
1126 return EOK;
1127}
1128
1129/** Convert string to uint64_t.
1130 *
1131 * @param nptr Pointer to string.
1132 * @param endptr If not NULL, pointer to the first invalid character
1133 * is stored here.
1134 * @param base Zero or number between 2 and 36 inclusive.
1135 * @param strict Do not allow any trailing characters.
1136 * @param result Result of the conversion.
1137 *
1138 * @return EOK if conversion was successful.
1139 *
1140 */
1141int str_uint64(const char *nptr, char **endptr, unsigned int base,
1142 bool strict, uint64_t *result)
1143{
1144 assert(result != NULL);
1145
1146 bool neg;
1147 char *lendptr;
1148 int ret = str_uint(nptr, &lendptr, base, &neg, result);
1149
1150 if (endptr != NULL)
1151 *endptr = (char *) lendptr;
1152
1153 if (ret != EOK)
1154 return ret;
1155
1156 /* Do not allow negative values */
1157 if (neg)
1158 return EINVAL;
1159
1160 /* Check whether we are at the end of
1161 the string in strict mode */
1162 if ((strict) && (*lendptr != 0))
1163 return EINVAL;
1164
1165 return EOK;
1166}
1167
1168/** Convert string to size_t.
1169 *
1170 * @param nptr Pointer to string.
1171 * @param endptr If not NULL, pointer to the first invalid character
1172 * is stored here.
1173 * @param base Zero or number between 2 and 36 inclusive.
1174 * @param strict Do not allow any trailing characters.
1175 * @param result Result of the conversion.
1176 *
1177 * @return EOK if conversion was successful.
1178 *
1179 */
1180int str_size_t(const char *nptr, char **endptr, unsigned int base,
1181 bool strict, size_t *result)
1182{
1183 assert(result != NULL);
1184
1185 bool neg;
1186 char *lendptr;
1187 uint64_t res;
1188 int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1189
1190 if (endptr != NULL)
1191 *endptr = (char *) lendptr;
1192
1193 if (ret != EOK)
1194 return ret;
1195
1196 /* Do not allow negative values */
1197 if (neg)
1198 return EINVAL;
1199
1200 /* Check whether we are at the end of
1201 the string in strict mode */
1202 if ((strict) && (*lendptr != 0))
1203 return EINVAL;
1204
1205 /* Check for overflow */
1206 size_t _res = (size_t) res;
1207 if (_res != res)
1208 return EOVERFLOW;
1209
1210 *result = _res;
1211
1212 return EOK;
1213}
1214
1215void order_suffix(const uint64_t val, uint64_t *rv, char *suffix)
1216{
1217 if (val > UINT64_C(10000000000000000000)) {
1218 *rv = val / UINT64_C(1000000000000000000);
1219 *suffix = 'Z';
1220 } else if (val > UINT64_C(1000000000000000000)) {
1221 *rv = val / UINT64_C(1000000000000000);
1222 *suffix = 'E';
1223 } else if (val > UINT64_C(1000000000000000)) {
1224 *rv = val / UINT64_C(1000000000000);
1225 *suffix = 'T';
1226 } else if (val > UINT64_C(1000000000000)) {
1227 *rv = val / UINT64_C(1000000000);
1228 *suffix = 'G';
1229 } else if (val > UINT64_C(1000000000)) {
1230 *rv = val / UINT64_C(1000000);
1231 *suffix = 'M';
1232 } else if (val > UINT64_C(1000000)) {
1233 *rv = val / UINT64_C(1000);
1234 *suffix = 'k';
1235 } else {
1236 *rv = val;
1237 *suffix = ' ';
1238 }
1239}
1240
1241void bin_order_suffix(const uint64_t val, uint64_t *rv, const char **suffix,
1242 bool fixed)
1243{
1244 if (val > UINT64_C(1152921504606846976)) {
1245 *rv = val / UINT64_C(1125899906842624);
1246 *suffix = "EiB";
1247 } else if (val > UINT64_C(1125899906842624)) {
1248 *rv = val / UINT64_C(1099511627776);
1249 *suffix = "TiB";
1250 } else if (val > UINT64_C(1099511627776)) {
1251 *rv = val / UINT64_C(1073741824);
1252 *suffix = "GiB";
1253 } else if (val > UINT64_C(1073741824)) {
1254 *rv = val / UINT64_C(1048576);
1255 *suffix = "MiB";
1256 } else if (val > UINT64_C(1048576)) {
1257 *rv = val / UINT64_C(1024);
1258 *suffix = "KiB";
1259 } else {
1260 *rv = val;
1261 if (fixed)
1262 *suffix = "B ";
1263 else
1264 *suffix = "B";
1265 }
1266}
1267
1268/** @}
1269 */
Note: See TracBrowser for help on using the repository browser.