source: mainline/uspace/lib/c/generic/str.c@ 2e839dda

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 2e839dda was 2e839dda, checked in by Oleg Romanenko <romanenko.oleg@…>, 14 years ago

New functions for string library:

  1. Lookup for character in wide string

wstr_chr
wstr_rchr

  1. Convert size_t to string: size_t_str
  2. Reverse string: str_reverse
  • Property mode set to 100644
File size: 30.5 KB
Line 
1/*
2 * Copyright (c) 2005 Martin Decky
3 * Copyright (c) 2008 Jiri Svoboda
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/** @addtogroup libc
31 * @{
32 */
33/** @file
34 */
35
36#include <str.h>
37#include <stdlib.h>
38#include <assert.h>
39#include <stdint.h>
40#include <ctype.h>
41#include <malloc.h>
42#include <errno.h>
43#include <align.h>
44#include <mem.h>
45#include <str.h>
46
47/** Byte mask consisting of lowest @n bits (out of 8) */
48#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
49
50/** Byte mask consisting of lowest @n bits (out of 32) */
51#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
52
53/** Byte mask consisting of highest @n bits (out of 8) */
54#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
55
56/** Number of data bits in a UTF-8 continuation byte */
57#define CONT_BITS 6
58
59/** Decode a single character from a string.
60 *
61 * Decode a single character from a string of size @a size. Decoding starts
62 * at @a offset and this offset is moved to the beginning of the next
63 * character. In case of decoding error, offset generally advances at least
64 * by one. However, offset is never moved beyond size.
65 *
66 * @param str String (not necessarily NULL-terminated).
67 * @param offset Byte offset in string where to start decoding.
68 * @param size Size of the string (in bytes).
69 *
70 * @return Value of decoded character, U_SPECIAL on decoding error or
71 * NULL if attempt to decode beyond @a size.
72 *
73 */
74wchar_t str_decode(const char *str, size_t *offset, size_t size)
75{
76 if (*offset + 1 > size)
77 return 0;
78
79 /* First byte read from string */
80 uint8_t b0 = (uint8_t) str[(*offset)++];
81
82 /* Determine code length */
83
84 unsigned int b0_bits; /* Data bits in first byte */
85 unsigned int cbytes; /* Number of continuation bytes */
86
87 if ((b0 & 0x80) == 0) {
88 /* 0xxxxxxx (Plain ASCII) */
89 b0_bits = 7;
90 cbytes = 0;
91 } else if ((b0 & 0xe0) == 0xc0) {
92 /* 110xxxxx 10xxxxxx */
93 b0_bits = 5;
94 cbytes = 1;
95 } else if ((b0 & 0xf0) == 0xe0) {
96 /* 1110xxxx 10xxxxxx 10xxxxxx */
97 b0_bits = 4;
98 cbytes = 2;
99 } else if ((b0 & 0xf8) == 0xf0) {
100 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
101 b0_bits = 3;
102 cbytes = 3;
103 } else {
104 /* 10xxxxxx -- unexpected continuation byte */
105 return U_SPECIAL;
106 }
107
108 if (*offset + cbytes > size)
109 return U_SPECIAL;
110
111 wchar_t ch = b0 & LO_MASK_8(b0_bits);
112
113 /* Decode continuation bytes */
114 while (cbytes > 0) {
115 uint8_t b = (uint8_t) str[(*offset)++];
116
117 /* Must be 10xxxxxx */
118 if ((b & 0xc0) != 0x80)
119 return U_SPECIAL;
120
121 /* Shift data bits to ch */
122 ch = (ch << CONT_BITS) | (wchar_t) (b & LO_MASK_8(CONT_BITS));
123 cbytes--;
124 }
125
126 return ch;
127}
128
129/** Encode a single character to string representation.
130 *
131 * Encode a single character to string representation (i.e. UTF-8) and store
132 * it into a buffer at @a offset. Encoding starts at @a offset and this offset
133 * is moved to the position where the next character can be written to.
134 *
135 * @param ch Input character.
136 * @param str Output buffer.
137 * @param offset Byte offset where to start writing.
138 * @param size Size of the output buffer (in bytes).
139 *
140 * @return EOK if the character was encoded successfully, EOVERFLOW if there
141 * was not enough space in the output buffer or EINVAL if the character
142 * code was invalid.
143 */
144int chr_encode(const wchar_t ch, char *str, size_t *offset, size_t size)
145{
146 if (*offset >= size)
147 return EOVERFLOW;
148
149 if (!chr_check(ch))
150 return EINVAL;
151
152 /* Unsigned version of ch (bit operations should only be done
153 on unsigned types). */
154 uint32_t cc = (uint32_t) ch;
155
156 /* Determine how many continuation bytes are needed */
157
158 unsigned int b0_bits; /* Data bits in first byte */
159 unsigned int cbytes; /* Number of continuation bytes */
160
161 if ((cc & ~LO_MASK_32(7)) == 0) {
162 b0_bits = 7;
163 cbytes = 0;
164 } else if ((cc & ~LO_MASK_32(11)) == 0) {
165 b0_bits = 5;
166 cbytes = 1;
167 } else if ((cc & ~LO_MASK_32(16)) == 0) {
168 b0_bits = 4;
169 cbytes = 2;
170 } else if ((cc & ~LO_MASK_32(21)) == 0) {
171 b0_bits = 3;
172 cbytes = 3;
173 } else {
174 /* Codes longer than 21 bits are not supported */
175 return EINVAL;
176 }
177
178 /* Check for available space in buffer */
179 if (*offset + cbytes >= size)
180 return EOVERFLOW;
181
182 /* Encode continuation bytes */
183 unsigned int i;
184 for (i = cbytes; i > 0; i--) {
185 str[*offset + i] = 0x80 | (cc & LO_MASK_32(CONT_BITS));
186 cc = cc >> CONT_BITS;
187 }
188
189 /* Encode first byte */
190 str[*offset] = (cc & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1);
191
192 /* Advance offset */
193 *offset += cbytes + 1;
194
195 return EOK;
196}
197
198/** Get size of string.
199 *
200 * Get the number of bytes which are used by the string @a str (excluding the
201 * NULL-terminator).
202 *
203 * @param str String to consider.
204 *
205 * @return Number of bytes used by the string
206 *
207 */
208size_t str_size(const char *str)
209{
210 size_t size = 0;
211
212 while (*str++ != 0)
213 size++;
214
215 return size;
216}
217
218/** Get size of wide string.
219 *
220 * Get the number of bytes which are used by the wide string @a str (excluding the
221 * NULL-terminator).
222 *
223 * @param str Wide string to consider.
224 *
225 * @return Number of bytes used by the wide string
226 *
227 */
228size_t wstr_size(const wchar_t *str)
229{
230 return (wstr_length(str) * sizeof(wchar_t));
231}
232
233/** Get size of string with length limit.
234 *
235 * Get the number of bytes which are used by up to @a max_len first
236 * characters in the string @a str. If @a max_len is greater than
237 * the length of @a str, the entire string is measured (excluding the
238 * NULL-terminator).
239 *
240 * @param str String to consider.
241 * @param max_len Maximum number of characters to measure.
242 *
243 * @return Number of bytes used by the characters.
244 *
245 */
246size_t str_lsize(const char *str, size_t max_len)
247{
248 size_t len = 0;
249 size_t offset = 0;
250
251 while (len < max_len) {
252 if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
253 break;
254
255 len++;
256 }
257
258 return offset;
259}
260
261/** Get size of wide string with length limit.
262 *
263 * Get the number of bytes which are used by up to @a max_len first
264 * wide characters in the wide string @a str. If @a max_len is greater than
265 * the length of @a str, the entire wide string is measured (excluding the
266 * NULL-terminator).
267 *
268 * @param str Wide string to consider.
269 * @param max_len Maximum number of wide characters to measure.
270 *
271 * @return Number of bytes used by the wide characters.
272 *
273 */
274size_t wstr_lsize(const wchar_t *str, size_t max_len)
275{
276 return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
277}
278
279/** Get number of characters in a string.
280 *
281 * @param str NULL-terminated string.
282 *
283 * @return Number of characters in string.
284 *
285 */
286size_t str_length(const char *str)
287{
288 size_t len = 0;
289 size_t offset = 0;
290
291 while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
292 len++;
293
294 return len;
295}
296
297/** Get number of characters in a wide string.
298 *
299 * @param str NULL-terminated wide string.
300 *
301 * @return Number of characters in @a str.
302 *
303 */
304size_t wstr_length(const wchar_t *wstr)
305{
306 size_t len = 0;
307
308 while (*wstr++ != 0)
309 len++;
310
311 return len;
312}
313
314/** Get number of characters in a string with size limit.
315 *
316 * @param str NULL-terminated string.
317 * @param size Maximum number of bytes to consider.
318 *
319 * @return Number of characters in string.
320 *
321 */
322size_t str_nlength(const char *str, size_t size)
323{
324 size_t len = 0;
325 size_t offset = 0;
326
327 while (str_decode(str, &offset, size) != 0)
328 len++;
329
330 return len;
331}
332
333/** Get number of characters in a string with size limit.
334 *
335 * @param str NULL-terminated string.
336 * @param size Maximum number of bytes to consider.
337 *
338 * @return Number of characters in string.
339 *
340 */
341size_t wstr_nlength(const wchar_t *str, size_t size)
342{
343 size_t len = 0;
344 size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
345 size_t offset = 0;
346
347 while ((offset < limit) && (*str++ != 0)) {
348 len++;
349 offset += sizeof(wchar_t);
350 }
351
352 return len;
353}
354
355/** Check whether character is plain ASCII.
356 *
357 * @return True if character is plain ASCII.
358 *
359 */
360bool ascii_check(wchar_t ch)
361{
362 if ((ch >= 0) && (ch <= 127))
363 return true;
364
365 return false;
366}
367
368/** Check whether wide string is plain ASCII.
369 *
370 * @return True if wide string is plain ASCII.
371 *
372 */
373bool wstr_is_ascii(const wchar_t *wstr)
374{
375 while (*wstr && ascii_check(*wstr))
376 wstr++;
377 return *wstr == 0;
378}
379
380/** Check whether character is valid
381 *
382 * @return True if character is a valid Unicode code point.
383 *
384 */
385bool chr_check(wchar_t ch)
386{
387 if ((ch >= 0) && (ch <= 1114111))
388 return true;
389
390 return false;
391}
392
393/** Compare two NULL terminated strings.
394 *
395 * Do a char-by-char comparison of two NULL-terminated strings.
396 * The strings are considered equal iff they consist of the same
397 * characters on the minimum of their lengths.
398 *
399 * @param s1 First string to compare.
400 * @param s2 Second string to compare.
401 *
402 * @return 0 if the strings are equal, -1 if first is smaller,
403 * 1 if second smaller.
404 *
405 */
406int str_cmp(const char *s1, const char *s2)
407{
408 wchar_t c1 = 0;
409 wchar_t c2 = 0;
410
411 size_t off1 = 0;
412 size_t off2 = 0;
413
414 while (true) {
415 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
416 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
417
418 if (c1 < c2)
419 return -1;
420
421 if (c1 > c2)
422 return 1;
423
424 if (c1 == 0 || c2 == 0)
425 break;
426 }
427
428 return 0;
429}
430
431/** Compare two NULL terminated strings with length limit.
432 *
433 * Do a char-by-char comparison of two NULL-terminated strings.
434 * The strings are considered equal iff they consist of the same
435 * characters on the minimum of their lengths and the length limit.
436 *
437 * @param s1 First string to compare.
438 * @param s2 Second string to compare.
439 * @param max_len Maximum number of characters to consider.
440 *
441 * @return 0 if the strings are equal, -1 if first is smaller,
442 * 1 if second smaller.
443 *
444 */
445int str_lcmp(const char *s1, const char *s2, size_t max_len)
446{
447 wchar_t c1 = 0;
448 wchar_t c2 = 0;
449
450 size_t off1 = 0;
451 size_t off2 = 0;
452
453 size_t len = 0;
454
455 while (true) {
456 if (len >= max_len)
457 break;
458
459 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
460 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
461
462 if (c1 < c2)
463 return -1;
464
465 if (c1 > c2)
466 return 1;
467
468 if (c1 == 0 || c2 == 0)
469 break;
470
471 ++len;
472 }
473
474 return 0;
475
476}
477
478/** Copy string.
479 *
480 * Copy source string @a src to destination buffer @a dest.
481 * No more than @a size bytes are written. If the size of the output buffer
482 * is at least one byte, the output string will always be well-formed, i.e.
483 * null-terminated and containing only complete characters.
484 *
485 * @param dest Destination buffer.
486 * @param count Size of the destination buffer (must be > 0).
487 * @param src Source string.
488 */
489void str_cpy(char *dest, size_t size, const char *src)
490{
491 /* There must be space for a null terminator in the buffer. */
492 assert(size > 0);
493
494 size_t src_off = 0;
495 size_t dest_off = 0;
496
497 wchar_t ch;
498 while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
499 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
500 break;
501 }
502
503 dest[dest_off] = '\0';
504}
505
506/** Copy size-limited substring.
507 *
508 * Copy prefix of string @a src of max. size @a size to destination buffer
509 * @a dest. No more than @a size bytes are written. The output string will
510 * always be well-formed, i.e. null-terminated and containing only complete
511 * characters.
512 *
513 * No more than @a n bytes are read from the input string, so it does not
514 * have to be null-terminated.
515 *
516 * @param dest Destination buffer.
517 * @param count Size of the destination buffer (must be > 0).
518 * @param src Source string.
519 * @param n Maximum number of bytes to read from @a src.
520 */
521void str_ncpy(char *dest, size_t size, const char *src, size_t n)
522{
523 /* There must be space for a null terminator in the buffer. */
524 assert(size > 0);
525
526 size_t src_off = 0;
527 size_t dest_off = 0;
528
529 wchar_t ch;
530 while ((ch = str_decode(src, &src_off, n)) != 0) {
531 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
532 break;
533 }
534
535 dest[dest_off] = '\0';
536}
537
538/** Append one string to another.
539 *
540 * Append source string @a src to string in destination buffer @a dest.
541 * Size of the destination buffer is @a dest. If the size of the output buffer
542 * is at least one byte, the output string will always be well-formed, i.e.
543 * null-terminated and containing only complete characters.
544 *
545 * @param dest Destination buffer.
546 * @param count Size of the destination buffer.
547 * @param src Source string.
548 */
549void str_append(char *dest, size_t size, const char *src)
550{
551 size_t dstr_size;
552
553 dstr_size = str_size(dest);
554 str_cpy(dest + dstr_size, size - dstr_size, src);
555}
556
557/** Convert wide string to string.
558 *
559 * Convert wide string @a src to string. The output is written to the buffer
560 * specified by @a dest and @a size. @a size must be non-zero and the string
561 * written will always be well-formed.
562 *
563 * @param dest Destination buffer.
564 * @param size Size of the destination buffer.
565 * @param src Source wide string.
566 *
567 * @return EOK, if success, negative otherwise.
568 */
569int wstr_to_str(char *dest, size_t size, const wchar_t *src)
570{
571 int rc;
572 wchar_t ch;
573 size_t src_idx;
574 size_t dest_off;
575
576 /* There must be space for a null terminator in the buffer. */
577 assert(size > 0);
578
579 src_idx = 0;
580 dest_off = 0;
581
582 while ((ch = src[src_idx++]) != 0) {
583 rc = chr_encode(ch, dest, &dest_off, size - 1);
584 if (rc != EOK)
585 break;
586 }
587
588 dest[dest_off] = '\0';
589 return rc;
590}
591
592/** Convert wide string to new string.
593 *
594 * Convert wide string @a src to string. Space for the new string is allocated
595 * on the heap.
596 *
597 * @param src Source wide string.
598 * @return New string.
599 */
600char *wstr_to_astr(const wchar_t *src)
601{
602 char dbuf[STR_BOUNDS(1)];
603 char *str;
604 wchar_t ch;
605
606 size_t src_idx;
607 size_t dest_off;
608 size_t dest_size;
609
610 /* Compute size of encoded string. */
611
612 src_idx = 0;
613 dest_size = 0;
614
615 while ((ch = src[src_idx++]) != 0) {
616 dest_off = 0;
617 if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
618 break;
619 dest_size += dest_off;
620 }
621
622 str = malloc(dest_size + 1);
623 if (str == NULL)
624 return NULL;
625
626 /* Encode string. */
627
628 src_idx = 0;
629 dest_off = 0;
630
631 while ((ch = src[src_idx++]) != 0) {
632 if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
633 break;
634 }
635
636 str[dest_size] = '\0';
637 return str;
638}
639
640
641/** Convert string to wide string.
642 *
643 * Convert string @a src to wide string. The output is written to the
644 * buffer specified by @a dest and @a dlen. @a dlen must be non-zero
645 * and the wide string written will always be null-terminated.
646 *
647 * @param dest Destination buffer.
648 * @param dlen Length of destination buffer (number of wchars).
649 * @param src Source string.
650 *
651 * @return EOK, if success, negative otherwise.
652 */
653int str_to_wstr(wchar_t *dest, size_t dlen, const char *src)
654{
655 int rc=EOK;
656 size_t offset;
657 size_t di;
658 wchar_t c;
659
660 assert(dlen > 0);
661
662 offset = 0;
663 di = 0;
664
665 do {
666 if (di >= dlen - 1) {
667 rc = EOVERFLOW;
668 break;
669 }
670
671 c = str_decode(src, &offset, STR_NO_LIMIT);
672 dest[di++] = c;
673 } while (c != '\0');
674
675 dest[dlen - 1] = '\0';
676 return rc;
677}
678
679/** Find first occurence of character in string.
680 *
681 * @param str String to search.
682 * @param ch Character to look for.
683 *
684 * @return Pointer to character in @a str or NULL if not found.
685 */
686char *str_chr(const char *str, wchar_t ch)
687{
688 wchar_t acc;
689 size_t off = 0;
690 size_t last = 0;
691
692 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
693 if (acc == ch)
694 return (char *) (str + last);
695 last = off;
696 }
697
698 return NULL;
699}
700
701/** Find last occurence of character in string.
702 *
703 * @param str String to search.
704 * @param ch Character to look for.
705 *
706 * @return Pointer to character in @a str or NULL if not found.
707 */
708char *str_rchr(const char *str, wchar_t ch)
709{
710 wchar_t acc;
711 size_t off = 0;
712 size_t last = 0;
713 const char *res = NULL;
714
715 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
716 if (acc == ch)
717 res = (str + last);
718 last = off;
719 }
720
721 return (char *) res;
722}
723
724/** Find first occurence of character in wide string.
725 *
726 * @param wstr String to search.
727 * @param ch Character to look for.
728 *
729 * @return Pointer to character in @a wstr or NULL if not found.
730 */
731wchar_t *wstr_chr(const wchar_t *wstr, wchar_t ch)
732{
733 while (*wstr && *wstr != ch)
734 wstr++;
735 if (*wstr)
736 return (wchar_t *) wstr;
737 else
738 return NULL;
739}
740
741/** Find last occurence of character in wide string.
742 *
743 * @param wstr String to search.
744 * @param ch Character to look for.
745 *
746 * @return Pointer to character in @a wstr or NULL if not found.
747 */
748wchar_t *wstr_rchr(const wchar_t *wstr, wchar_t ch)
749{
750 const wchar_t *res = NULL;
751 while (*wstr) {
752 if (*wstr == ch)
753 res = wstr;
754 wstr++;
755 }
756 return (wchar_t *) res;
757}
758
759/** Insert a wide character into a wide string.
760 *
761 * Insert a wide character into a wide string at position
762 * @a pos. The characters after the position are shifted.
763 *
764 * @param str String to insert to.
765 * @param ch Character to insert to.
766 * @param pos Character index where to insert.
767 @ @param max_pos Characters in the buffer.
768 *
769 * @return True if the insertion was sucessful, false if the position
770 * is out of bounds.
771 *
772 */
773bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
774{
775 size_t len = wstr_length(str);
776
777 if ((pos > len) || (pos + 1 > max_pos))
778 return false;
779
780 size_t i;
781 for (i = len; i + 1 > pos; i--)
782 str[i + 1] = str[i];
783
784 str[pos] = ch;
785
786 return true;
787}
788
789/** Remove a wide character from a wide string.
790 *
791 * Remove a wide character from a wide string at position
792 * @a pos. The characters after the position are shifted.
793 *
794 * @param str String to remove from.
795 * @param pos Character index to remove.
796 *
797 * @return True if the removal was sucessful, false if the position
798 * is out of bounds.
799 *
800 */
801bool wstr_remove(wchar_t *str, size_t pos)
802{
803 size_t len = wstr_length(str);
804
805 if (pos >= len)
806 return false;
807
808 size_t i;
809 for (i = pos + 1; i <= len; i++)
810 str[i - 1] = str[i];
811
812 return true;
813}
814
815int stricmp(const char *a, const char *b)
816{
817 int c = 0;
818
819 while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
820 c++;
821
822 return (tolower(a[c]) - tolower(b[c]));
823}
824
825/** Convert string to a number.
826 * Core of strtol and strtoul functions.
827 *
828 * @param nptr Pointer to string.
829 * @param endptr If not NULL, function stores here pointer to the first
830 * invalid character.
831 * @param base Zero or number between 2 and 36 inclusive.
832 * @param sgn It's set to 1 if minus found.
833 * @return Result of conversion.
834 */
835static unsigned long
836_strtoul(const char *nptr, char **endptr, int base, char *sgn)
837{
838 unsigned char c;
839 unsigned long result = 0;
840 unsigned long a, b;
841 const char *str = nptr;
842 const char *tmpptr;
843
844 while (isspace(*str))
845 str++;
846
847 if (*str == '-') {
848 *sgn = 1;
849 ++str;
850 } else if (*str == '+')
851 ++str;
852
853 if (base) {
854 if ((base == 1) || (base > 36)) {
855 /* FIXME: set errno to EINVAL */
856 return 0;
857 }
858 if ((base == 16) && (*str == '0') && ((str[1] == 'x') ||
859 (str[1] == 'X'))) {
860 str += 2;
861 }
862 } else {
863 base = 10;
864
865 if (*str == '0') {
866 base = 8;
867 if ((str[1] == 'X') || (str[1] == 'x')) {
868 base = 16;
869 str += 2;
870 }
871 }
872 }
873
874 tmpptr = str;
875
876 while (*str) {
877 c = *str;
878 c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
879 (c <= '9' ? c - '0' : 0xff)));
880 if (c > base) {
881 break;
882 }
883
884 a = (result & 0xff) * base + c;
885 b = (result >> 8) * base + (a >> 8);
886
887 if (b > (ULONG_MAX >> 8)) {
888 /* overflow */
889 /* FIXME: errno = ERANGE*/
890 return ULONG_MAX;
891 }
892
893 result = (b << 8) + (a & 0xff);
894 ++str;
895 }
896
897 if (str == tmpptr) {
898 /*
899 * No number was found => first invalid character is the first
900 * character of the string.
901 */
902 /* FIXME: set errno to EINVAL */
903 str = nptr;
904 result = 0;
905 }
906
907 if (endptr)
908 *endptr = (char *) str;
909
910 if (nptr == str) {
911 /*FIXME: errno = EINVAL*/
912 return 0;
913 }
914
915 return result;
916}
917
918/** Convert initial part of string to long int according to given base.
919 * The number may begin with an arbitrary number of whitespaces followed by
920 * optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
921 * inserted and the number will be taken as hexadecimal one. If the base is 0
922 * and the number begin with a zero, number will be taken as octal one (as with
923 * base 8). Otherwise the base 0 is taken as decimal.
924 *
925 * @param nptr Pointer to string.
926 * @param endptr If not NULL, function stores here pointer to the first
927 * invalid character.
928 * @param base Zero or number between 2 and 36 inclusive.
929 * @return Result of conversion.
930 */
931long int strtol(const char *nptr, char **endptr, int base)
932{
933 char sgn = 0;
934 unsigned long number = 0;
935
936 number = _strtoul(nptr, endptr, base, &sgn);
937
938 if (number > LONG_MAX) {
939 if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
940 /* FIXME: set 0 to errno */
941 return number;
942 }
943 /* FIXME: set ERANGE to errno */
944 return (sgn ? LONG_MIN : LONG_MAX);
945 }
946
947 return (sgn ? -number : number);
948}
949
950/** Duplicate string.
951 *
952 * Allocate a new string and copy characters from the source
953 * string into it. The duplicate string is allocated via sleeping
954 * malloc(), thus this function can sleep in no memory conditions.
955 *
956 * The allocation cannot fail and the return value is always
957 * a valid pointer. The duplicate string is always a well-formed
958 * null-terminated UTF-8 string, but it can differ from the source
959 * string on the byte level.
960 *
961 * @param src Source string.
962 *
963 * @return Duplicate string.
964 *
965 */
966char *str_dup(const char *src)
967{
968 size_t size = str_size(src) + 1;
969 char *dest = (char *) malloc(size);
970 if (dest == NULL)
971 return (char *) NULL;
972
973 str_cpy(dest, size, src);
974 return dest;
975}
976
977/** Duplicate string with size limit.
978 *
979 * Allocate a new string and copy up to @max_size bytes from the source
980 * string into it. The duplicate string is allocated via sleeping
981 * malloc(), thus this function can sleep in no memory conditions.
982 * No more than @max_size + 1 bytes is allocated, but if the size
983 * occupied by the source string is smaller than @max_size + 1,
984 * less is allocated.
985 *
986 * The allocation cannot fail and the return value is always
987 * a valid pointer. The duplicate string is always a well-formed
988 * null-terminated UTF-8 string, but it can differ from the source
989 * string on the byte level.
990 *
991 * @param src Source string.
992 * @param n Maximum number of bytes to duplicate.
993 *
994 * @return Duplicate string.
995 *
996 */
997char *str_ndup(const char *src, size_t n)
998{
999 size_t size = str_size(src);
1000 if (size > n)
1001 size = n;
1002
1003 char *dest = (char *) malloc(size + 1);
1004 if (dest == NULL)
1005 return (char *) NULL;
1006
1007 str_ncpy(dest, size + 1, src, size);
1008 return dest;
1009}
1010
1011void str_reverse(char* begin, char* end)
1012{
1013 char aux;
1014 while(end>begin)
1015 aux=*end, *end--=*begin, *begin++=aux;
1016}
1017
1018int size_t_str(size_t value, int base, char* str, size_t size)
1019{
1020 static char num[] = "0123456789abcdefghijklmnopqrstuvwxyz";
1021 char* wstr=str;
1022
1023 if (size == 0)
1024 return EINVAL;
1025 if (base<2 || base>35) {
1026 *str='\0';
1027 return EINVAL;
1028 }
1029
1030 do {
1031 *wstr++ = num[value % base];
1032 if (--size == 0)
1033 return EOVERFLOW;
1034 } while(value /= base);
1035 *wstr='\0';
1036
1037 // Reverse string
1038 str_reverse(str,wstr-1);
1039 return EOK;
1040}
1041
1042/** Convert initial part of string to unsigned long according to given base.
1043 * The number may begin with an arbitrary number of whitespaces followed by
1044 * optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
1045 * inserted and the number will be taken as hexadecimal one. If the base is 0
1046 * and the number begin with a zero, number will be taken as octal one (as with
1047 * base 8). Otherwise the base 0 is taken as decimal.
1048 *
1049 * @param nptr Pointer to string.
1050 * @param endptr If not NULL, function stores here pointer to the first
1051 * invalid character
1052 * @param base Zero or number between 2 and 36 inclusive.
1053 * @return Result of conversion.
1054 */
1055unsigned long strtoul(const char *nptr, char **endptr, int base)
1056{
1057 char sgn = 0;
1058 unsigned long number = 0;
1059
1060 number = _strtoul(nptr, endptr, base, &sgn);
1061
1062 return (sgn ? -number : number);
1063}
1064
1065char *strtok(char *s, const char *delim)
1066{
1067 static char *next;
1068
1069 return strtok_r(s, delim, &next);
1070}
1071
1072char *strtok_r(char *s, const char *delim, char **next)
1073{
1074 char *start, *end;
1075
1076 if (s == NULL)
1077 s = *next;
1078
1079 /* Skip over leading delimiters. */
1080 while (*s && (str_chr(delim, *s) != NULL)) ++s;
1081 start = s;
1082
1083 /* Skip over token characters. */
1084 while (*s && (str_chr(delim, *s) == NULL)) ++s;
1085 end = s;
1086 *next = (*s ? s + 1 : s);
1087
1088 if (start == end) {
1089 return NULL; /* No more tokens. */
1090 }
1091
1092 /* Overwrite delimiter with NULL terminator. */
1093 *end = '\0';
1094 return start;
1095}
1096
1097/** Convert string to uint64_t (internal variant).
1098 *
1099 * @param nptr Pointer to string.
1100 * @param endptr Pointer to the first invalid character is stored here.
1101 * @param base Zero or number between 2 and 36 inclusive.
1102 * @param neg Indication of unary minus is stored here.
1103 * @apram result Result of the conversion.
1104 *
1105 * @return EOK if conversion was successful.
1106 *
1107 */
1108static int str_uint(const char *nptr, char **endptr, unsigned int base,
1109 bool *neg, uint64_t *result)
1110{
1111 assert(endptr != NULL);
1112 assert(neg != NULL);
1113 assert(result != NULL);
1114
1115 *neg = false;
1116 const char *str = nptr;
1117
1118 /* Ignore leading whitespace */
1119 while (isspace(*str))
1120 str++;
1121
1122 if (*str == '-') {
1123 *neg = true;
1124 str++;
1125 } else if (*str == '+')
1126 str++;
1127
1128 if (base == 0) {
1129 /* Decode base if not specified */
1130 base = 10;
1131
1132 if (*str == '0') {
1133 base = 8;
1134 str++;
1135
1136 switch (*str) {
1137 case 'b':
1138 case 'B':
1139 base = 2;
1140 str++;
1141 break;
1142 case 'o':
1143 case 'O':
1144 base = 8;
1145 str++;
1146 break;
1147 case 'd':
1148 case 'D':
1149 case 't':
1150 case 'T':
1151 base = 10;
1152 str++;
1153 break;
1154 case 'x':
1155 case 'X':
1156 base = 16;
1157 str++;
1158 break;
1159 default:
1160 str--;
1161 }
1162 }
1163 } else {
1164 /* Check base range */
1165 if ((base < 2) || (base > 36)) {
1166 *endptr = (char *) str;
1167 return EINVAL;
1168 }
1169 }
1170
1171 *result = 0;
1172 const char *startstr = str;
1173
1174 while (*str != 0) {
1175 unsigned int digit;
1176
1177 if ((*str >= 'a') && (*str <= 'z'))
1178 digit = *str - 'a' + 10;
1179 else if ((*str >= 'A') && (*str <= 'Z'))
1180 digit = *str - 'A' + 10;
1181 else if ((*str >= '0') && (*str <= '9'))
1182 digit = *str - '0';
1183 else
1184 break;
1185
1186 if (digit >= base)
1187 break;
1188
1189 uint64_t prev = *result;
1190 *result = (*result) * base + digit;
1191
1192 if (*result < prev) {
1193 /* Overflow */
1194 *endptr = (char *) str;
1195 return EOVERFLOW;
1196 }
1197
1198 str++;
1199 }
1200
1201 if (str == startstr) {
1202 /*
1203 * No digits were decoded => first invalid character is
1204 * the first character of the string.
1205 */
1206 str = nptr;
1207 }
1208
1209 *endptr = (char *) str;
1210
1211 if (str == nptr)
1212 return EINVAL;
1213
1214 return EOK;
1215}
1216
1217/** Convert string to uint64_t.
1218 *
1219 * @param nptr Pointer to string.
1220 * @param endptr If not NULL, pointer to the first invalid character
1221 * is stored here.
1222 * @param base Zero or number between 2 and 36 inclusive.
1223 * @param strict Do not allow any trailing characters.
1224 * @param result Result of the conversion.
1225 *
1226 * @return EOK if conversion was successful.
1227 *
1228 */
1229int str_uint64(const char *nptr, char **endptr, unsigned int base,
1230 bool strict, uint64_t *result)
1231{
1232 assert(result != NULL);
1233
1234 bool neg;
1235 char *lendptr;
1236 int ret = str_uint(nptr, &lendptr, base, &neg, result);
1237
1238 if (endptr != NULL)
1239 *endptr = (char *) lendptr;
1240
1241 if (ret != EOK)
1242 return ret;
1243
1244 /* Do not allow negative values */
1245 if (neg)
1246 return EINVAL;
1247
1248 /* Check whether we are at the end of
1249 the string in strict mode */
1250 if ((strict) && (*lendptr != 0))
1251 return EINVAL;
1252
1253 return EOK;
1254}
1255
1256/** Convert string to size_t.
1257 *
1258 * @param nptr Pointer to string.
1259 * @param endptr If not NULL, pointer to the first invalid character
1260 * is stored here.
1261 * @param base Zero or number between 2 and 36 inclusive.
1262 * @param strict Do not allow any trailing characters.
1263 * @param result Result of the conversion.
1264 *
1265 * @return EOK if conversion was successful.
1266 *
1267 */
1268int str_size_t(const char *nptr, char **endptr, unsigned int base,
1269 bool strict, size_t *result)
1270{
1271 assert(result != NULL);
1272
1273 bool neg;
1274 char *lendptr;
1275 uint64_t res;
1276 int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1277
1278 if (endptr != NULL)
1279 *endptr = (char *) lendptr;
1280
1281 if (ret != EOK)
1282 return ret;
1283
1284 /* Do not allow negative values */
1285 if (neg)
1286 return EINVAL;
1287
1288 /* Check whether we are at the end of
1289 the string in strict mode */
1290 if ((strict) && (*lendptr != 0))
1291 return EINVAL;
1292
1293 /* Check for overflow */
1294 size_t _res = (size_t) res;
1295 if (_res != res)
1296 return EOVERFLOW;
1297
1298 *result = _res;
1299
1300 return EOK;
1301}
1302
1303void order_suffix(const uint64_t val, uint64_t *rv, char *suffix)
1304{
1305 if (val > UINT64_C(10000000000000000000)) {
1306 *rv = val / UINT64_C(1000000000000000000);
1307 *suffix = 'Z';
1308 } else if (val > UINT64_C(1000000000000000000)) {
1309 *rv = val / UINT64_C(1000000000000000);
1310 *suffix = 'E';
1311 } else if (val > UINT64_C(1000000000000000)) {
1312 *rv = val / UINT64_C(1000000000000);
1313 *suffix = 'T';
1314 } else if (val > UINT64_C(1000000000000)) {
1315 *rv = val / UINT64_C(1000000000);
1316 *suffix = 'G';
1317 } else if (val > UINT64_C(1000000000)) {
1318 *rv = val / UINT64_C(1000000);
1319 *suffix = 'M';
1320 } else if (val > UINT64_C(1000000)) {
1321 *rv = val / UINT64_C(1000);
1322 *suffix = 'k';
1323 } else {
1324 *rv = val;
1325 *suffix = ' ';
1326 }
1327}
1328
1329void bin_order_suffix(const uint64_t val, uint64_t *rv, const char **suffix,
1330 bool fixed)
1331{
1332 if (val > UINT64_C(1152921504606846976)) {
1333 *rv = val / UINT64_C(1125899906842624);
1334 *suffix = "EiB";
1335 } else if (val > UINT64_C(1125899906842624)) {
1336 *rv = val / UINT64_C(1099511627776);
1337 *suffix = "TiB";
1338 } else if (val > UINT64_C(1099511627776)) {
1339 *rv = val / UINT64_C(1073741824);
1340 *suffix = "GiB";
1341 } else if (val > UINT64_C(1073741824)) {
1342 *rv = val / UINT64_C(1048576);
1343 *suffix = "MiB";
1344 } else if (val > UINT64_C(1048576)) {
1345 *rv = val / UINT64_C(1024);
1346 *suffix = "KiB";
1347 } else {
1348 *rv = val;
1349 if (fixed)
1350 *suffix = "B ";
1351 else
1352 *suffix = "B";
1353 }
1354}
1355
1356/** @}
1357 */
Note: See TracBrowser for help on using the repository browser.