source: mainline/uspace/lib/c/generic/str.c@ 61e29a4d

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 61e29a4d was 61e29a4d, checked in by Oleg Romanenko <romanenko.oleg@…>, 14 years ago

Modifications in str.c

  1. Add function wstr_is_ascii
  2. Add return value (error code) to functions: wstr_to_str and str_to_wstr
  • Property mode set to 100644
File size: 29.2 KB
Line 
1/*
2 * Copyright (c) 2005 Martin Decky
3 * Copyright (c) 2008 Jiri Svoboda
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/** @addtogroup libc
31 * @{
32 */
33/** @file
34 */
35
36#include <str.h>
37#include <stdlib.h>
38#include <assert.h>
39#include <stdint.h>
40#include <ctype.h>
41#include <malloc.h>
42#include <errno.h>
43#include <align.h>
44#include <mem.h>
45#include <str.h>
46
47/** Byte mask consisting of lowest @n bits (out of 8) */
48#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
49
50/** Byte mask consisting of lowest @n bits (out of 32) */
51#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
52
53/** Byte mask consisting of highest @n bits (out of 8) */
54#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
55
56/** Number of data bits in a UTF-8 continuation byte */
57#define CONT_BITS 6
58
59/** Decode a single character from a string.
60 *
61 * Decode a single character from a string of size @a size. Decoding starts
62 * at @a offset and this offset is moved to the beginning of the next
63 * character. In case of decoding error, offset generally advances at least
64 * by one. However, offset is never moved beyond size.
65 *
66 * @param str String (not necessarily NULL-terminated).
67 * @param offset Byte offset in string where to start decoding.
68 * @param size Size of the string (in bytes).
69 *
70 * @return Value of decoded character, U_SPECIAL on decoding error or
71 * NULL if attempt to decode beyond @a size.
72 *
73 */
74wchar_t str_decode(const char *str, size_t *offset, size_t size)
75{
76 if (*offset + 1 > size)
77 return 0;
78
79 /* First byte read from string */
80 uint8_t b0 = (uint8_t) str[(*offset)++];
81
82 /* Determine code length */
83
84 unsigned int b0_bits; /* Data bits in first byte */
85 unsigned int cbytes; /* Number of continuation bytes */
86
87 if ((b0 & 0x80) == 0) {
88 /* 0xxxxxxx (Plain ASCII) */
89 b0_bits = 7;
90 cbytes = 0;
91 } else if ((b0 & 0xe0) == 0xc0) {
92 /* 110xxxxx 10xxxxxx */
93 b0_bits = 5;
94 cbytes = 1;
95 } else if ((b0 & 0xf0) == 0xe0) {
96 /* 1110xxxx 10xxxxxx 10xxxxxx */
97 b0_bits = 4;
98 cbytes = 2;
99 } else if ((b0 & 0xf8) == 0xf0) {
100 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
101 b0_bits = 3;
102 cbytes = 3;
103 } else {
104 /* 10xxxxxx -- unexpected continuation byte */
105 return U_SPECIAL;
106 }
107
108 if (*offset + cbytes > size)
109 return U_SPECIAL;
110
111 wchar_t ch = b0 & LO_MASK_8(b0_bits);
112
113 /* Decode continuation bytes */
114 while (cbytes > 0) {
115 uint8_t b = (uint8_t) str[(*offset)++];
116
117 /* Must be 10xxxxxx */
118 if ((b & 0xc0) != 0x80)
119 return U_SPECIAL;
120
121 /* Shift data bits to ch */
122 ch = (ch << CONT_BITS) | (wchar_t) (b & LO_MASK_8(CONT_BITS));
123 cbytes--;
124 }
125
126 return ch;
127}
128
129/** Encode a single character to string representation.
130 *
131 * Encode a single character to string representation (i.e. UTF-8) and store
132 * it into a buffer at @a offset. Encoding starts at @a offset and this offset
133 * is moved to the position where the next character can be written to.
134 *
135 * @param ch Input character.
136 * @param str Output buffer.
137 * @param offset Byte offset where to start writing.
138 * @param size Size of the output buffer (in bytes).
139 *
140 * @return EOK if the character was encoded successfully, EOVERFLOW if there
141 * was not enough space in the output buffer or EINVAL if the character
142 * code was invalid.
143 */
144int chr_encode(const wchar_t ch, char *str, size_t *offset, size_t size)
145{
146 if (*offset >= size)
147 return EOVERFLOW;
148
149 if (!chr_check(ch))
150 return EINVAL;
151
152 /* Unsigned version of ch (bit operations should only be done
153 on unsigned types). */
154 uint32_t cc = (uint32_t) ch;
155
156 /* Determine how many continuation bytes are needed */
157
158 unsigned int b0_bits; /* Data bits in first byte */
159 unsigned int cbytes; /* Number of continuation bytes */
160
161 if ((cc & ~LO_MASK_32(7)) == 0) {
162 b0_bits = 7;
163 cbytes = 0;
164 } else if ((cc & ~LO_MASK_32(11)) == 0) {
165 b0_bits = 5;
166 cbytes = 1;
167 } else if ((cc & ~LO_MASK_32(16)) == 0) {
168 b0_bits = 4;
169 cbytes = 2;
170 } else if ((cc & ~LO_MASK_32(21)) == 0) {
171 b0_bits = 3;
172 cbytes = 3;
173 } else {
174 /* Codes longer than 21 bits are not supported */
175 return EINVAL;
176 }
177
178 /* Check for available space in buffer */
179 if (*offset + cbytes >= size)
180 return EOVERFLOW;
181
182 /* Encode continuation bytes */
183 unsigned int i;
184 for (i = cbytes; i > 0; i--) {
185 str[*offset + i] = 0x80 | (cc & LO_MASK_32(CONT_BITS));
186 cc = cc >> CONT_BITS;
187 }
188
189 /* Encode first byte */
190 str[*offset] = (cc & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1);
191
192 /* Advance offset */
193 *offset += cbytes + 1;
194
195 return EOK;
196}
197
198/** Get size of string.
199 *
200 * Get the number of bytes which are used by the string @a str (excluding the
201 * NULL-terminator).
202 *
203 * @param str String to consider.
204 *
205 * @return Number of bytes used by the string
206 *
207 */
208size_t str_size(const char *str)
209{
210 size_t size = 0;
211
212 while (*str++ != 0)
213 size++;
214
215 return size;
216}
217
218/** Get size of wide string.
219 *
220 * Get the number of bytes which are used by the wide string @a str (excluding the
221 * NULL-terminator).
222 *
223 * @param str Wide string to consider.
224 *
225 * @return Number of bytes used by the wide string
226 *
227 */
228size_t wstr_size(const wchar_t *str)
229{
230 return (wstr_length(str) * sizeof(wchar_t));
231}
232
233/** Get size of string with length limit.
234 *
235 * Get the number of bytes which are used by up to @a max_len first
236 * characters in the string @a str. If @a max_len is greater than
237 * the length of @a str, the entire string is measured (excluding the
238 * NULL-terminator).
239 *
240 * @param str String to consider.
241 * @param max_len Maximum number of characters to measure.
242 *
243 * @return Number of bytes used by the characters.
244 *
245 */
246size_t str_lsize(const char *str, size_t max_len)
247{
248 size_t len = 0;
249 size_t offset = 0;
250
251 while (len < max_len) {
252 if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
253 break;
254
255 len++;
256 }
257
258 return offset;
259}
260
261/** Get size of wide string with length limit.
262 *
263 * Get the number of bytes which are used by up to @a max_len first
264 * wide characters in the wide string @a str. If @a max_len is greater than
265 * the length of @a str, the entire wide string is measured (excluding the
266 * NULL-terminator).
267 *
268 * @param str Wide string to consider.
269 * @param max_len Maximum number of wide characters to measure.
270 *
271 * @return Number of bytes used by the wide characters.
272 *
273 */
274size_t wstr_lsize(const wchar_t *str, size_t max_len)
275{
276 return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
277}
278
279/** Get number of characters in a string.
280 *
281 * @param str NULL-terminated string.
282 *
283 * @return Number of characters in string.
284 *
285 */
286size_t str_length(const char *str)
287{
288 size_t len = 0;
289 size_t offset = 0;
290
291 while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
292 len++;
293
294 return len;
295}
296
297/** Get number of characters in a wide string.
298 *
299 * @param str NULL-terminated wide string.
300 *
301 * @return Number of characters in @a str.
302 *
303 */
304size_t wstr_length(const wchar_t *wstr)
305{
306 size_t len = 0;
307
308 while (*wstr++ != 0)
309 len++;
310
311 return len;
312}
313
314/** Get number of characters in a string with size limit.
315 *
316 * @param str NULL-terminated string.
317 * @param size Maximum number of bytes to consider.
318 *
319 * @return Number of characters in string.
320 *
321 */
322size_t str_nlength(const char *str, size_t size)
323{
324 size_t len = 0;
325 size_t offset = 0;
326
327 while (str_decode(str, &offset, size) != 0)
328 len++;
329
330 return len;
331}
332
333/** Get number of characters in a string with size limit.
334 *
335 * @param str NULL-terminated string.
336 * @param size Maximum number of bytes to consider.
337 *
338 * @return Number of characters in string.
339 *
340 */
341size_t wstr_nlength(const wchar_t *str, size_t size)
342{
343 size_t len = 0;
344 size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
345 size_t offset = 0;
346
347 while ((offset < limit) && (*str++ != 0)) {
348 len++;
349 offset += sizeof(wchar_t);
350 }
351
352 return len;
353}
354
355/** Check whether character is plain ASCII.
356 *
357 * @return True if character is plain ASCII.
358 *
359 */
360bool ascii_check(wchar_t ch)
361{
362 if ((ch >= 0) && (ch <= 127))
363 return true;
364
365 return false;
366}
367
368/** Check whether wide string is plain ASCII.
369 *
370 * @return True if wide string is plain ASCII.
371 *
372 */
373bool wstr_is_ascii(const wchar_t *wstr)
374{
375 while (*wstr && ascii_check(*wstr))
376 wstr++;
377 return *wstr == 0;
378}
379
380/** Check whether character is valid
381 *
382 * @return True if character is a valid Unicode code point.
383 *
384 */
385bool chr_check(wchar_t ch)
386{
387 if ((ch >= 0) && (ch <= 1114111))
388 return true;
389
390 return false;
391}
392
393/** Compare two NULL terminated strings.
394 *
395 * Do a char-by-char comparison of two NULL-terminated strings.
396 * The strings are considered equal iff they consist of the same
397 * characters on the minimum of their lengths.
398 *
399 * @param s1 First string to compare.
400 * @param s2 Second string to compare.
401 *
402 * @return 0 if the strings are equal, -1 if first is smaller,
403 * 1 if second smaller.
404 *
405 */
406int str_cmp(const char *s1, const char *s2)
407{
408 wchar_t c1 = 0;
409 wchar_t c2 = 0;
410
411 size_t off1 = 0;
412 size_t off2 = 0;
413
414 while (true) {
415 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
416 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
417
418 if (c1 < c2)
419 return -1;
420
421 if (c1 > c2)
422 return 1;
423
424 if (c1 == 0 || c2 == 0)
425 break;
426 }
427
428 return 0;
429}
430
431/** Compare two NULL terminated strings with length limit.
432 *
433 * Do a char-by-char comparison of two NULL-terminated strings.
434 * The strings are considered equal iff they consist of the same
435 * characters on the minimum of their lengths and the length limit.
436 *
437 * @param s1 First string to compare.
438 * @param s2 Second string to compare.
439 * @param max_len Maximum number of characters to consider.
440 *
441 * @return 0 if the strings are equal, -1 if first is smaller,
442 * 1 if second smaller.
443 *
444 */
445int str_lcmp(const char *s1, const char *s2, size_t max_len)
446{
447 wchar_t c1 = 0;
448 wchar_t c2 = 0;
449
450 size_t off1 = 0;
451 size_t off2 = 0;
452
453 size_t len = 0;
454
455 while (true) {
456 if (len >= max_len)
457 break;
458
459 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
460 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
461
462 if (c1 < c2)
463 return -1;
464
465 if (c1 > c2)
466 return 1;
467
468 if (c1 == 0 || c2 == 0)
469 break;
470
471 ++len;
472 }
473
474 return 0;
475
476}
477
478/** Copy string.
479 *
480 * Copy source string @a src to destination buffer @a dest.
481 * No more than @a size bytes are written. If the size of the output buffer
482 * is at least one byte, the output string will always be well-formed, i.e.
483 * null-terminated and containing only complete characters.
484 *
485 * @param dest Destination buffer.
486 * @param count Size of the destination buffer (must be > 0).
487 * @param src Source string.
488 */
489void str_cpy(char *dest, size_t size, const char *src)
490{
491 /* There must be space for a null terminator in the buffer. */
492 assert(size > 0);
493
494 size_t src_off = 0;
495 size_t dest_off = 0;
496
497 wchar_t ch;
498 while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
499 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
500 break;
501 }
502
503 dest[dest_off] = '\0';
504}
505
506/** Copy size-limited substring.
507 *
508 * Copy prefix of string @a src of max. size @a size to destination buffer
509 * @a dest. No more than @a size bytes are written. The output string will
510 * always be well-formed, i.e. null-terminated and containing only complete
511 * characters.
512 *
513 * No more than @a n bytes are read from the input string, so it does not
514 * have to be null-terminated.
515 *
516 * @param dest Destination buffer.
517 * @param count Size of the destination buffer (must be > 0).
518 * @param src Source string.
519 * @param n Maximum number of bytes to read from @a src.
520 */
521void str_ncpy(char *dest, size_t size, const char *src, size_t n)
522{
523 /* There must be space for a null terminator in the buffer. */
524 assert(size > 0);
525
526 size_t src_off = 0;
527 size_t dest_off = 0;
528
529 wchar_t ch;
530 while ((ch = str_decode(src, &src_off, n)) != 0) {
531 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
532 break;
533 }
534
535 dest[dest_off] = '\0';
536}
537
538/** Append one string to another.
539 *
540 * Append source string @a src to string in destination buffer @a dest.
541 * Size of the destination buffer is @a dest. If the size of the output buffer
542 * is at least one byte, the output string will always be well-formed, i.e.
543 * null-terminated and containing only complete characters.
544 *
545 * @param dest Destination buffer.
546 * @param count Size of the destination buffer.
547 * @param src Source string.
548 */
549void str_append(char *dest, size_t size, const char *src)
550{
551 size_t dstr_size;
552
553 dstr_size = str_size(dest);
554 str_cpy(dest + dstr_size, size - dstr_size, src);
555}
556
557/** Convert wide string to string.
558 *
559 * Convert wide string @a src to string. The output is written to the buffer
560 * specified by @a dest and @a size. @a size must be non-zero and the string
561 * written will always be well-formed.
562 *
563 * @param dest Destination buffer.
564 * @param size Size of the destination buffer.
565 * @param src Source wide string.
566 *
567 * @return EOK, if success, negative otherwise.
568 */
569int wstr_to_str(char *dest, size_t size, const wchar_t *src)
570{
571 int rc;
572 wchar_t ch;
573 size_t src_idx;
574 size_t dest_off;
575
576 /* There must be space for a null terminator in the buffer. */
577 assert(size > 0);
578
579 src_idx = 0;
580 dest_off = 0;
581
582 while ((ch = src[src_idx++]) != 0) {
583 rc = chr_encode(ch, dest, &dest_off, size - 1);
584 if (rc != EOK)
585 break;
586 }
587
588 dest[dest_off] = '\0';
589 return rc;
590}
591
592/** Convert wide string to new string.
593 *
594 * Convert wide string @a src to string. Space for the new string is allocated
595 * on the heap.
596 *
597 * @param src Source wide string.
598 * @return New string.
599 */
600char *wstr_to_astr(const wchar_t *src)
601{
602 char dbuf[STR_BOUNDS(1)];
603 char *str;
604 wchar_t ch;
605
606 size_t src_idx;
607 size_t dest_off;
608 size_t dest_size;
609
610 /* Compute size of encoded string. */
611
612 src_idx = 0;
613 dest_size = 0;
614
615 while ((ch = src[src_idx++]) != 0) {
616 dest_off = 0;
617 if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
618 break;
619 dest_size += dest_off;
620 }
621
622 str = malloc(dest_size + 1);
623 if (str == NULL)
624 return NULL;
625
626 /* Encode string. */
627
628 src_idx = 0;
629 dest_off = 0;
630
631 while ((ch = src[src_idx++]) != 0) {
632 if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
633 break;
634 }
635
636 str[dest_size] = '\0';
637 return str;
638}
639
640
641/** Convert string to wide string.
642 *
643 * Convert string @a src to wide string. The output is written to the
644 * buffer specified by @a dest and @a dlen. @a dlen must be non-zero
645 * and the wide string written will always be null-terminated.
646 *
647 * @param dest Destination buffer.
648 * @param dlen Length of destination buffer (number of wchars).
649 * @param src Source string.
650 *
651 * @return EOK, if success, negative otherwise.
652 */
653int str_to_wstr(wchar_t *dest, size_t dlen, const char *src)
654{
655 int rc=EOK;
656 size_t offset;
657 size_t di;
658 wchar_t c;
659
660 assert(dlen > 0);
661
662 offset = 0;
663 di = 0;
664
665 do {
666 if (di >= dlen - 1) {
667 rc = EOVERFLOW;
668 break;
669 }
670
671 c = str_decode(src, &offset, STR_NO_LIMIT);
672 dest[di++] = c;
673 } while (c != '\0');
674
675 dest[dlen - 1] = '\0';
676 return rc;
677}
678
679/** Find first occurence of character in string.
680 *
681 * @param str String to search.
682 * @param ch Character to look for.
683 *
684 * @return Pointer to character in @a str or NULL if not found.
685 */
686char *str_chr(const char *str, wchar_t ch)
687{
688 wchar_t acc;
689 size_t off = 0;
690 size_t last = 0;
691
692 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
693 if (acc == ch)
694 return (char *) (str + last);
695 last = off;
696 }
697
698 return NULL;
699}
700
701/** Find last occurence of character in string.
702 *
703 * @param str String to search.
704 * @param ch Character to look for.
705 *
706 * @return Pointer to character in @a str or NULL if not found.
707 */
708char *str_rchr(const char *str, wchar_t ch)
709{
710 wchar_t acc;
711 size_t off = 0;
712 size_t last = 0;
713 const char *res = NULL;
714
715 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
716 if (acc == ch)
717 res = (str + last);
718 last = off;
719 }
720
721 return (char *) res;
722}
723
724/** Insert a wide character into a wide string.
725 *
726 * Insert a wide character into a wide string at position
727 * @a pos. The characters after the position are shifted.
728 *
729 * @param str String to insert to.
730 * @param ch Character to insert to.
731 * @param pos Character index where to insert.
732 @ @param max_pos Characters in the buffer.
733 *
734 * @return True if the insertion was sucessful, false if the position
735 * is out of bounds.
736 *
737 */
738bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
739{
740 size_t len = wstr_length(str);
741
742 if ((pos > len) || (pos + 1 > max_pos))
743 return false;
744
745 size_t i;
746 for (i = len; i + 1 > pos; i--)
747 str[i + 1] = str[i];
748
749 str[pos] = ch;
750
751 return true;
752}
753
754/** Remove a wide character from a wide string.
755 *
756 * Remove a wide character from a wide string at position
757 * @a pos. The characters after the position are shifted.
758 *
759 * @param str String to remove from.
760 * @param pos Character index to remove.
761 *
762 * @return True if the removal was sucessful, false if the position
763 * is out of bounds.
764 *
765 */
766bool wstr_remove(wchar_t *str, size_t pos)
767{
768 size_t len = wstr_length(str);
769
770 if (pos >= len)
771 return false;
772
773 size_t i;
774 for (i = pos + 1; i <= len; i++)
775 str[i - 1] = str[i];
776
777 return true;
778}
779
780int stricmp(const char *a, const char *b)
781{
782 int c = 0;
783
784 while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
785 c++;
786
787 return (tolower(a[c]) - tolower(b[c]));
788}
789
790/** Convert string to a number.
791 * Core of strtol and strtoul functions.
792 *
793 * @param nptr Pointer to string.
794 * @param endptr If not NULL, function stores here pointer to the first
795 * invalid character.
796 * @param base Zero or number between 2 and 36 inclusive.
797 * @param sgn It's set to 1 if minus found.
798 * @return Result of conversion.
799 */
800static unsigned long
801_strtoul(const char *nptr, char **endptr, int base, char *sgn)
802{
803 unsigned char c;
804 unsigned long result = 0;
805 unsigned long a, b;
806 const char *str = nptr;
807 const char *tmpptr;
808
809 while (isspace(*str))
810 str++;
811
812 if (*str == '-') {
813 *sgn = 1;
814 ++str;
815 } else if (*str == '+')
816 ++str;
817
818 if (base) {
819 if ((base == 1) || (base > 36)) {
820 /* FIXME: set errno to EINVAL */
821 return 0;
822 }
823 if ((base == 16) && (*str == '0') && ((str[1] == 'x') ||
824 (str[1] == 'X'))) {
825 str += 2;
826 }
827 } else {
828 base = 10;
829
830 if (*str == '0') {
831 base = 8;
832 if ((str[1] == 'X') || (str[1] == 'x')) {
833 base = 16;
834 str += 2;
835 }
836 }
837 }
838
839 tmpptr = str;
840
841 while (*str) {
842 c = *str;
843 c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
844 (c <= '9' ? c - '0' : 0xff)));
845 if (c > base) {
846 break;
847 }
848
849 a = (result & 0xff) * base + c;
850 b = (result >> 8) * base + (a >> 8);
851
852 if (b > (ULONG_MAX >> 8)) {
853 /* overflow */
854 /* FIXME: errno = ERANGE*/
855 return ULONG_MAX;
856 }
857
858 result = (b << 8) + (a & 0xff);
859 ++str;
860 }
861
862 if (str == tmpptr) {
863 /*
864 * No number was found => first invalid character is the first
865 * character of the string.
866 */
867 /* FIXME: set errno to EINVAL */
868 str = nptr;
869 result = 0;
870 }
871
872 if (endptr)
873 *endptr = (char *) str;
874
875 if (nptr == str) {
876 /*FIXME: errno = EINVAL*/
877 return 0;
878 }
879
880 return result;
881}
882
883/** Convert initial part of string to long int according to given base.
884 * The number may begin with an arbitrary number of whitespaces followed by
885 * optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
886 * inserted and the number will be taken as hexadecimal one. If the base is 0
887 * and the number begin with a zero, number will be taken as octal one (as with
888 * base 8). Otherwise the base 0 is taken as decimal.
889 *
890 * @param nptr Pointer to string.
891 * @param endptr If not NULL, function stores here pointer to the first
892 * invalid character.
893 * @param base Zero or number between 2 and 36 inclusive.
894 * @return Result of conversion.
895 */
896long int strtol(const char *nptr, char **endptr, int base)
897{
898 char sgn = 0;
899 unsigned long number = 0;
900
901 number = _strtoul(nptr, endptr, base, &sgn);
902
903 if (number > LONG_MAX) {
904 if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
905 /* FIXME: set 0 to errno */
906 return number;
907 }
908 /* FIXME: set ERANGE to errno */
909 return (sgn ? LONG_MIN : LONG_MAX);
910 }
911
912 return (sgn ? -number : number);
913}
914
915/** Duplicate string.
916 *
917 * Allocate a new string and copy characters from the source
918 * string into it. The duplicate string is allocated via sleeping
919 * malloc(), thus this function can sleep in no memory conditions.
920 *
921 * The allocation cannot fail and the return value is always
922 * a valid pointer. The duplicate string is always a well-formed
923 * null-terminated UTF-8 string, but it can differ from the source
924 * string on the byte level.
925 *
926 * @param src Source string.
927 *
928 * @return Duplicate string.
929 *
930 */
931char *str_dup(const char *src)
932{
933 size_t size = str_size(src) + 1;
934 char *dest = (char *) malloc(size);
935 if (dest == NULL)
936 return (char *) NULL;
937
938 str_cpy(dest, size, src);
939 return dest;
940}
941
942/** Duplicate string with size limit.
943 *
944 * Allocate a new string and copy up to @max_size bytes from the source
945 * string into it. The duplicate string is allocated via sleeping
946 * malloc(), thus this function can sleep in no memory conditions.
947 * No more than @max_size + 1 bytes is allocated, but if the size
948 * occupied by the source string is smaller than @max_size + 1,
949 * less is allocated.
950 *
951 * The allocation cannot fail and the return value is always
952 * a valid pointer. The duplicate string is always a well-formed
953 * null-terminated UTF-8 string, but it can differ from the source
954 * string on the byte level.
955 *
956 * @param src Source string.
957 * @param n Maximum number of bytes to duplicate.
958 *
959 * @return Duplicate string.
960 *
961 */
962char *str_ndup(const char *src, size_t n)
963{
964 size_t size = str_size(src);
965 if (size > n)
966 size = n;
967
968 char *dest = (char *) malloc(size + 1);
969 if (dest == NULL)
970 return (char *) NULL;
971
972 str_ncpy(dest, size + 1, src, size);
973 return dest;
974}
975
976
977/** Convert initial part of string to unsigned long according to given base.
978 * The number may begin with an arbitrary number of whitespaces followed by
979 * optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
980 * inserted and the number will be taken as hexadecimal one. If the base is 0
981 * and the number begin with a zero, number will be taken as octal one (as with
982 * base 8). Otherwise the base 0 is taken as decimal.
983 *
984 * @param nptr Pointer to string.
985 * @param endptr If not NULL, function stores here pointer to the first
986 * invalid character
987 * @param base Zero or number between 2 and 36 inclusive.
988 * @return Result of conversion.
989 */
990unsigned long strtoul(const char *nptr, char **endptr, int base)
991{
992 char sgn = 0;
993 unsigned long number = 0;
994
995 number = _strtoul(nptr, endptr, base, &sgn);
996
997 return (sgn ? -number : number);
998}
999
1000char *strtok(char *s, const char *delim)
1001{
1002 static char *next;
1003
1004 return strtok_r(s, delim, &next);
1005}
1006
1007char *strtok_r(char *s, const char *delim, char **next)
1008{
1009 char *start, *end;
1010
1011 if (s == NULL)
1012 s = *next;
1013
1014 /* Skip over leading delimiters. */
1015 while (*s && (str_chr(delim, *s) != NULL)) ++s;
1016 start = s;
1017
1018 /* Skip over token characters. */
1019 while (*s && (str_chr(delim, *s) == NULL)) ++s;
1020 end = s;
1021 *next = (*s ? s + 1 : s);
1022
1023 if (start == end) {
1024 return NULL; /* No more tokens. */
1025 }
1026
1027 /* Overwrite delimiter with NULL terminator. */
1028 *end = '\0';
1029 return start;
1030}
1031
1032/** Convert string to uint64_t (internal variant).
1033 *
1034 * @param nptr Pointer to string.
1035 * @param endptr Pointer to the first invalid character is stored here.
1036 * @param base Zero or number between 2 and 36 inclusive.
1037 * @param neg Indication of unary minus is stored here.
1038 * @apram result Result of the conversion.
1039 *
1040 * @return EOK if conversion was successful.
1041 *
1042 */
1043static int str_uint(const char *nptr, char **endptr, unsigned int base,
1044 bool *neg, uint64_t *result)
1045{
1046 assert(endptr != NULL);
1047 assert(neg != NULL);
1048 assert(result != NULL);
1049
1050 *neg = false;
1051 const char *str = nptr;
1052
1053 /* Ignore leading whitespace */
1054 while (isspace(*str))
1055 str++;
1056
1057 if (*str == '-') {
1058 *neg = true;
1059 str++;
1060 } else if (*str == '+')
1061 str++;
1062
1063 if (base == 0) {
1064 /* Decode base if not specified */
1065 base = 10;
1066
1067 if (*str == '0') {
1068 base = 8;
1069 str++;
1070
1071 switch (*str) {
1072 case 'b':
1073 case 'B':
1074 base = 2;
1075 str++;
1076 break;
1077 case 'o':
1078 case 'O':
1079 base = 8;
1080 str++;
1081 break;
1082 case 'd':
1083 case 'D':
1084 case 't':
1085 case 'T':
1086 base = 10;
1087 str++;
1088 break;
1089 case 'x':
1090 case 'X':
1091 base = 16;
1092 str++;
1093 break;
1094 default:
1095 str--;
1096 }
1097 }
1098 } else {
1099 /* Check base range */
1100 if ((base < 2) || (base > 36)) {
1101 *endptr = (char *) str;
1102 return EINVAL;
1103 }
1104 }
1105
1106 *result = 0;
1107 const char *startstr = str;
1108
1109 while (*str != 0) {
1110 unsigned int digit;
1111
1112 if ((*str >= 'a') && (*str <= 'z'))
1113 digit = *str - 'a' + 10;
1114 else if ((*str >= 'A') && (*str <= 'Z'))
1115 digit = *str - 'A' + 10;
1116 else if ((*str >= '0') && (*str <= '9'))
1117 digit = *str - '0';
1118 else
1119 break;
1120
1121 if (digit >= base)
1122 break;
1123
1124 uint64_t prev = *result;
1125 *result = (*result) * base + digit;
1126
1127 if (*result < prev) {
1128 /* Overflow */
1129 *endptr = (char *) str;
1130 return EOVERFLOW;
1131 }
1132
1133 str++;
1134 }
1135
1136 if (str == startstr) {
1137 /*
1138 * No digits were decoded => first invalid character is
1139 * the first character of the string.
1140 */
1141 str = nptr;
1142 }
1143
1144 *endptr = (char *) str;
1145
1146 if (str == nptr)
1147 return EINVAL;
1148
1149 return EOK;
1150}
1151
1152/** Convert string to uint64_t.
1153 *
1154 * @param nptr Pointer to string.
1155 * @param endptr If not NULL, pointer to the first invalid character
1156 * is stored here.
1157 * @param base Zero or number between 2 and 36 inclusive.
1158 * @param strict Do not allow any trailing characters.
1159 * @param result Result of the conversion.
1160 *
1161 * @return EOK if conversion was successful.
1162 *
1163 */
1164int str_uint64(const char *nptr, char **endptr, unsigned int base,
1165 bool strict, uint64_t *result)
1166{
1167 assert(result != NULL);
1168
1169 bool neg;
1170 char *lendptr;
1171 int ret = str_uint(nptr, &lendptr, base, &neg, result);
1172
1173 if (endptr != NULL)
1174 *endptr = (char *) lendptr;
1175
1176 if (ret != EOK)
1177 return ret;
1178
1179 /* Do not allow negative values */
1180 if (neg)
1181 return EINVAL;
1182
1183 /* Check whether we are at the end of
1184 the string in strict mode */
1185 if ((strict) && (*lendptr != 0))
1186 return EINVAL;
1187
1188 return EOK;
1189}
1190
1191/** Convert string to size_t.
1192 *
1193 * @param nptr Pointer to string.
1194 * @param endptr If not NULL, pointer to the first invalid character
1195 * is stored here.
1196 * @param base Zero or number between 2 and 36 inclusive.
1197 * @param strict Do not allow any trailing characters.
1198 * @param result Result of the conversion.
1199 *
1200 * @return EOK if conversion was successful.
1201 *
1202 */
1203int str_size_t(const char *nptr, char **endptr, unsigned int base,
1204 bool strict, size_t *result)
1205{
1206 assert(result != NULL);
1207
1208 bool neg;
1209 char *lendptr;
1210 uint64_t res;
1211 int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1212
1213 if (endptr != NULL)
1214 *endptr = (char *) lendptr;
1215
1216 if (ret != EOK)
1217 return ret;
1218
1219 /* Do not allow negative values */
1220 if (neg)
1221 return EINVAL;
1222
1223 /* Check whether we are at the end of
1224 the string in strict mode */
1225 if ((strict) && (*lendptr != 0))
1226 return EINVAL;
1227
1228 /* Check for overflow */
1229 size_t _res = (size_t) res;
1230 if (_res != res)
1231 return EOVERFLOW;
1232
1233 *result = _res;
1234
1235 return EOK;
1236}
1237
1238void order_suffix(const uint64_t val, uint64_t *rv, char *suffix)
1239{
1240 if (val > UINT64_C(10000000000000000000)) {
1241 *rv = val / UINT64_C(1000000000000000000);
1242 *suffix = 'Z';
1243 } else if (val > UINT64_C(1000000000000000000)) {
1244 *rv = val / UINT64_C(1000000000000000);
1245 *suffix = 'E';
1246 } else if (val > UINT64_C(1000000000000000)) {
1247 *rv = val / UINT64_C(1000000000000);
1248 *suffix = 'T';
1249 } else if (val > UINT64_C(1000000000000)) {
1250 *rv = val / UINT64_C(1000000000);
1251 *suffix = 'G';
1252 } else if (val > UINT64_C(1000000000)) {
1253 *rv = val / UINT64_C(1000000);
1254 *suffix = 'M';
1255 } else if (val > UINT64_C(1000000)) {
1256 *rv = val / UINT64_C(1000);
1257 *suffix = 'k';
1258 } else {
1259 *rv = val;
1260 *suffix = ' ';
1261 }
1262}
1263
1264void bin_order_suffix(const uint64_t val, uint64_t *rv, const char **suffix,
1265 bool fixed)
1266{
1267 if (val > UINT64_C(1152921504606846976)) {
1268 *rv = val / UINT64_C(1125899906842624);
1269 *suffix = "EiB";
1270 } else if (val > UINT64_C(1125899906842624)) {
1271 *rv = val / UINT64_C(1099511627776);
1272 *suffix = "TiB";
1273 } else if (val > UINT64_C(1099511627776)) {
1274 *rv = val / UINT64_C(1073741824);
1275 *suffix = "GiB";
1276 } else if (val > UINT64_C(1073741824)) {
1277 *rv = val / UINT64_C(1048576);
1278 *suffix = "MiB";
1279 } else if (val > UINT64_C(1048576)) {
1280 *rv = val / UINT64_C(1024);
1281 *suffix = "KiB";
1282 } else {
1283 *rv = val;
1284 if (fixed)
1285 *suffix = "B ";
1286 else
1287 *suffix = "B";
1288 }
1289}
1290
1291/** @}
1292 */
Note: See TracBrowser for help on using the repository browser.