source: mainline/uspace/lib/c/generic/str.c@ 58cbf8d5

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 58cbf8d5 was dcb74c0a, checked in by Jiri Svoboda <jiri@…>, 14 years ago

Add function to str.c to convert space-padded ASCII to standard string
representation. Use for decoding SCSI strings.

  • Property mode set to 100644
File size: 30.5 KB
Line 
1/*
2 * Copyright (c) 2005 Martin Decky
3 * Copyright (c) 2008 Jiri Svoboda
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/** @addtogroup libc
31 * @{
32 */
33/** @file
34 */
35
36#include <str.h>
37#include <stdlib.h>
38#include <assert.h>
39#include <stdint.h>
40#include <ctype.h>
41#include <malloc.h>
42#include <errno.h>
43#include <align.h>
44#include <mem.h>
45#include <str.h>
46
47/** Byte mask consisting of lowest @n bits (out of 8) */
48#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
49
50/** Byte mask consisting of lowest @n bits (out of 32) */
51#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
52
53/** Byte mask consisting of highest @n bits (out of 8) */
54#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
55
56/** Number of data bits in a UTF-8 continuation byte */
57#define CONT_BITS 6
58
59/** Decode a single character from a string.
60 *
61 * Decode a single character from a string of size @a size. Decoding starts
62 * at @a offset and this offset is moved to the beginning of the next
63 * character. In case of decoding error, offset generally advances at least
64 * by one. However, offset is never moved beyond size.
65 *
66 * @param str String (not necessarily NULL-terminated).
67 * @param offset Byte offset in string where to start decoding.
68 * @param size Size of the string (in bytes).
69 *
70 * @return Value of decoded character, U_SPECIAL on decoding error or
71 * NULL if attempt to decode beyond @a size.
72 *
73 */
74wchar_t str_decode(const char *str, size_t *offset, size_t size)
75{
76 if (*offset + 1 > size)
77 return 0;
78
79 /* First byte read from string */
80 uint8_t b0 = (uint8_t) str[(*offset)++];
81
82 /* Determine code length */
83
84 unsigned int b0_bits; /* Data bits in first byte */
85 unsigned int cbytes; /* Number of continuation bytes */
86
87 if ((b0 & 0x80) == 0) {
88 /* 0xxxxxxx (Plain ASCII) */
89 b0_bits = 7;
90 cbytes = 0;
91 } else if ((b0 & 0xe0) == 0xc0) {
92 /* 110xxxxx 10xxxxxx */
93 b0_bits = 5;
94 cbytes = 1;
95 } else if ((b0 & 0xf0) == 0xe0) {
96 /* 1110xxxx 10xxxxxx 10xxxxxx */
97 b0_bits = 4;
98 cbytes = 2;
99 } else if ((b0 & 0xf8) == 0xf0) {
100 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
101 b0_bits = 3;
102 cbytes = 3;
103 } else {
104 /* 10xxxxxx -- unexpected continuation byte */
105 return U_SPECIAL;
106 }
107
108 if (*offset + cbytes > size)
109 return U_SPECIAL;
110
111 wchar_t ch = b0 & LO_MASK_8(b0_bits);
112
113 /* Decode continuation bytes */
114 while (cbytes > 0) {
115 uint8_t b = (uint8_t) str[(*offset)++];
116
117 /* Must be 10xxxxxx */
118 if ((b & 0xc0) != 0x80)
119 return U_SPECIAL;
120
121 /* Shift data bits to ch */
122 ch = (ch << CONT_BITS) | (wchar_t) (b & LO_MASK_8(CONT_BITS));
123 cbytes--;
124 }
125
126 return ch;
127}
128
129/** Encode a single character to string representation.
130 *
131 * Encode a single character to string representation (i.e. UTF-8) and store
132 * it into a buffer at @a offset. Encoding starts at @a offset and this offset
133 * is moved to the position where the next character can be written to.
134 *
135 * @param ch Input character.
136 * @param str Output buffer.
137 * @param offset Byte offset where to start writing.
138 * @param size Size of the output buffer (in bytes).
139 *
140 * @return EOK if the character was encoded successfully, EOVERFLOW if there
141 * was not enough space in the output buffer or EINVAL if the character
142 * code was invalid.
143 */
144int chr_encode(const wchar_t ch, char *str, size_t *offset, size_t size)
145{
146 if (*offset >= size)
147 return EOVERFLOW;
148
149 if (!chr_check(ch))
150 return EINVAL;
151
152 /* Unsigned version of ch (bit operations should only be done
153 on unsigned types). */
154 uint32_t cc = (uint32_t) ch;
155
156 /* Determine how many continuation bytes are needed */
157
158 unsigned int b0_bits; /* Data bits in first byte */
159 unsigned int cbytes; /* Number of continuation bytes */
160
161 if ((cc & ~LO_MASK_32(7)) == 0) {
162 b0_bits = 7;
163 cbytes = 0;
164 } else if ((cc & ~LO_MASK_32(11)) == 0) {
165 b0_bits = 5;
166 cbytes = 1;
167 } else if ((cc & ~LO_MASK_32(16)) == 0) {
168 b0_bits = 4;
169 cbytes = 2;
170 } else if ((cc & ~LO_MASK_32(21)) == 0) {
171 b0_bits = 3;
172 cbytes = 3;
173 } else {
174 /* Codes longer than 21 bits are not supported */
175 return EINVAL;
176 }
177
178 /* Check for available space in buffer */
179 if (*offset + cbytes >= size)
180 return EOVERFLOW;
181
182 /* Encode continuation bytes */
183 unsigned int i;
184 for (i = cbytes; i > 0; i--) {
185 str[*offset + i] = 0x80 | (cc & LO_MASK_32(CONT_BITS));
186 cc = cc >> CONT_BITS;
187 }
188
189 /* Encode first byte */
190 str[*offset] = (cc & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1);
191
192 /* Advance offset */
193 *offset += cbytes + 1;
194
195 return EOK;
196}
197
198/** Get size of string.
199 *
200 * Get the number of bytes which are used by the string @a str (excluding the
201 * NULL-terminator).
202 *
203 * @param str String to consider.
204 *
205 * @return Number of bytes used by the string
206 *
207 */
208size_t str_size(const char *str)
209{
210 size_t size = 0;
211
212 while (*str++ != 0)
213 size++;
214
215 return size;
216}
217
218/** Get size of wide string.
219 *
220 * Get the number of bytes which are used by the wide string @a str (excluding the
221 * NULL-terminator).
222 *
223 * @param str Wide string to consider.
224 *
225 * @return Number of bytes used by the wide string
226 *
227 */
228size_t wstr_size(const wchar_t *str)
229{
230 return (wstr_length(str) * sizeof(wchar_t));
231}
232
233/** Get size of string with length limit.
234 *
235 * Get the number of bytes which are used by up to @a max_len first
236 * characters in the string @a str. If @a max_len is greater than
237 * the length of @a str, the entire string is measured (excluding the
238 * NULL-terminator).
239 *
240 * @param str String to consider.
241 * @param max_len Maximum number of characters to measure.
242 *
243 * @return Number of bytes used by the characters.
244 *
245 */
246size_t str_lsize(const char *str, size_t max_len)
247{
248 size_t len = 0;
249 size_t offset = 0;
250
251 while (len < max_len) {
252 if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
253 break;
254
255 len++;
256 }
257
258 return offset;
259}
260
261/** Get size of wide string with length limit.
262 *
263 * Get the number of bytes which are used by up to @a max_len first
264 * wide characters in the wide string @a str. If @a max_len is greater than
265 * the length of @a str, the entire wide string is measured (excluding the
266 * NULL-terminator).
267 *
268 * @param str Wide string to consider.
269 * @param max_len Maximum number of wide characters to measure.
270 *
271 * @return Number of bytes used by the wide characters.
272 *
273 */
274size_t wstr_lsize(const wchar_t *str, size_t max_len)
275{
276 return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
277}
278
279/** Get number of characters in a string.
280 *
281 * @param str NULL-terminated string.
282 *
283 * @return Number of characters in string.
284 *
285 */
286size_t str_length(const char *str)
287{
288 size_t len = 0;
289 size_t offset = 0;
290
291 while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
292 len++;
293
294 return len;
295}
296
297/** Get number of characters in a wide string.
298 *
299 * @param str NULL-terminated wide string.
300 *
301 * @return Number of characters in @a str.
302 *
303 */
304size_t wstr_length(const wchar_t *wstr)
305{
306 size_t len = 0;
307
308 while (*wstr++ != 0)
309 len++;
310
311 return len;
312}
313
314/** Get number of characters in a string with size limit.
315 *
316 * @param str NULL-terminated string.
317 * @param size Maximum number of bytes to consider.
318 *
319 * @return Number of characters in string.
320 *
321 */
322size_t str_nlength(const char *str, size_t size)
323{
324 size_t len = 0;
325 size_t offset = 0;
326
327 while (str_decode(str, &offset, size) != 0)
328 len++;
329
330 return len;
331}
332
333/** Get number of characters in a string with size limit.
334 *
335 * @param str NULL-terminated string.
336 * @param size Maximum number of bytes to consider.
337 *
338 * @return Number of characters in string.
339 *
340 */
341size_t wstr_nlength(const wchar_t *str, size_t size)
342{
343 size_t len = 0;
344 size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
345 size_t offset = 0;
346
347 while ((offset < limit) && (*str++ != 0)) {
348 len++;
349 offset += sizeof(wchar_t);
350 }
351
352 return len;
353}
354
355/** Check whether character is plain ASCII.
356 *
357 * @return True if character is plain ASCII.
358 *
359 */
360bool ascii_check(wchar_t ch)
361{
362 if ((ch >= 0) && (ch <= 127))
363 return true;
364
365 return false;
366}
367
368/** Check whether character is valid
369 *
370 * @return True if character is a valid Unicode code point.
371 *
372 */
373bool chr_check(wchar_t ch)
374{
375 if ((ch >= 0) && (ch <= 1114111))
376 return true;
377
378 return false;
379}
380
381/** Compare two NULL terminated strings.
382 *
383 * Do a char-by-char comparison of two NULL-terminated strings.
384 * The strings are considered equal iff they consist of the same
385 * characters on the minimum of their lengths.
386 *
387 * @param s1 First string to compare.
388 * @param s2 Second string to compare.
389 *
390 * @return 0 if the strings are equal, -1 if first is smaller,
391 * 1 if second smaller.
392 *
393 */
394int str_cmp(const char *s1, const char *s2)
395{
396 wchar_t c1 = 0;
397 wchar_t c2 = 0;
398
399 size_t off1 = 0;
400 size_t off2 = 0;
401
402 while (true) {
403 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
404 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
405
406 if (c1 < c2)
407 return -1;
408
409 if (c1 > c2)
410 return 1;
411
412 if (c1 == 0 || c2 == 0)
413 break;
414 }
415
416 return 0;
417}
418
419/** Compare two NULL terminated strings with length limit.
420 *
421 * Do a char-by-char comparison of two NULL-terminated strings.
422 * The strings are considered equal iff they consist of the same
423 * characters on the minimum of their lengths and the length limit.
424 *
425 * @param s1 First string to compare.
426 * @param s2 Second string to compare.
427 * @param max_len Maximum number of characters to consider.
428 *
429 * @return 0 if the strings are equal, -1 if first is smaller,
430 * 1 if second smaller.
431 *
432 */
433int str_lcmp(const char *s1, const char *s2, size_t max_len)
434{
435 wchar_t c1 = 0;
436 wchar_t c2 = 0;
437
438 size_t off1 = 0;
439 size_t off2 = 0;
440
441 size_t len = 0;
442
443 while (true) {
444 if (len >= max_len)
445 break;
446
447 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
448 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
449
450 if (c1 < c2)
451 return -1;
452
453 if (c1 > c2)
454 return 1;
455
456 if (c1 == 0 || c2 == 0)
457 break;
458
459 ++len;
460 }
461
462 return 0;
463
464}
465
466/** Copy string.
467 *
468 * Copy source string @a src to destination buffer @a dest.
469 * No more than @a size bytes are written. If the size of the output buffer
470 * is at least one byte, the output string will always be well-formed, i.e.
471 * null-terminated and containing only complete characters.
472 *
473 * @param dest Destination buffer.
474 * @param count Size of the destination buffer (must be > 0).
475 * @param src Source string.
476 */
477void str_cpy(char *dest, size_t size, const char *src)
478{
479 /* There must be space for a null terminator in the buffer. */
480 assert(size > 0);
481
482 size_t src_off = 0;
483 size_t dest_off = 0;
484
485 wchar_t ch;
486 while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
487 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
488 break;
489 }
490
491 dest[dest_off] = '\0';
492}
493
494/** Copy size-limited substring.
495 *
496 * Copy prefix of string @a src of max. size @a size to destination buffer
497 * @a dest. No more than @a size bytes are written. The output string will
498 * always be well-formed, i.e. null-terminated and containing only complete
499 * characters.
500 *
501 * No more than @a n bytes are read from the input string, so it does not
502 * have to be null-terminated.
503 *
504 * @param dest Destination buffer.
505 * @param count Size of the destination buffer (must be > 0).
506 * @param src Source string.
507 * @param n Maximum number of bytes to read from @a src.
508 */
509void str_ncpy(char *dest, size_t size, const char *src, size_t n)
510{
511 /* There must be space for a null terminator in the buffer. */
512 assert(size > 0);
513
514 size_t src_off = 0;
515 size_t dest_off = 0;
516
517 wchar_t ch;
518 while ((ch = str_decode(src, &src_off, n)) != 0) {
519 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
520 break;
521 }
522
523 dest[dest_off] = '\0';
524}
525
526/** Append one string to another.
527 *
528 * Append source string @a src to string in destination buffer @a dest.
529 * Size of the destination buffer is @a dest. If the size of the output buffer
530 * is at least one byte, the output string will always be well-formed, i.e.
531 * null-terminated and containing only complete characters.
532 *
533 * @param dest Destination buffer.
534 * @param count Size of the destination buffer.
535 * @param src Source string.
536 */
537void str_append(char *dest, size_t size, const char *src)
538{
539 size_t dstr_size;
540
541 dstr_size = str_size(dest);
542 if (dstr_size >= size)
543 return;
544
545 str_cpy(dest + dstr_size, size - dstr_size, src);
546}
547
548/** Convert space-padded ASCII to string.
549 *
550 * Common legacy text encoding in hardware is 7-bit ASCII fitted into
551 * a fixed-with byte buffer (bit 7 always zero), right-padded with spaces
552 * (ASCII 0x20). Convert space-padded ascii to string representation.
553 *
554 * If the text does not fit into the destination buffer, the function converts
555 * as many characters as possible and returns EOVERFLOW.
556 *
557 * If the text contains non-ASCII bytes (with bit 7 set), the whole string is
558 * converted anyway and invalid characters are replaced with question marks
559 * (U_SPECIAL) and the function returns EIO.
560 *
561 * Regardless of return value upon return @a dest will always be well-formed.
562 *
563 * @param dest Destination buffer
564 * @param size Size of destination buffer
565 * @param src Space-padded ASCII.
566 * @param n Size of the source buffer in bytes.
567 *
568 * @return EOK on success, EOVERFLOW if the text does not fit
569 * destination buffer, EIO if the text contains
570 * non-ASCII bytes.
571 */
572int spascii_to_str(char *dest, size_t size, const uint8_t *src, size_t n)
573{
574 size_t sidx;
575 size_t didx;
576 size_t dlast;
577 uint8_t byte;
578 int rc;
579 int result;
580
581 /* There must be space for a null terminator in the buffer. */
582 assert(size > 0);
583 result = EOK;
584
585 didx = 0;
586 dlast = 0;
587 for (sidx = 0; sidx < n; ++sidx) {
588 byte = src[sidx];
589 if (!ascii_check(byte)) {
590 byte = U_SPECIAL;
591 result = EIO;
592 }
593
594 rc = chr_encode(byte, dest, &didx, size - 1);
595 if (rc != EOK) {
596 assert(rc == EOVERFLOW);
597 dest[didx] = '\0';
598 return rc;
599 }
600
601 /* Remember dest index after last non-empty character */
602 if (byte != 0x20)
603 dlast = didx;
604 }
605
606 /* Terminate string after last non-empty character */
607 dest[dlast] = '\0';
608 return result;
609}
610
611/** Convert wide string to string.
612 *
613 * Convert wide string @a src to string. The output is written to the buffer
614 * specified by @a dest and @a size. @a size must be non-zero and the string
615 * written will always be well-formed.
616 *
617 * @param dest Destination buffer.
618 * @param size Size of the destination buffer.
619 * @param src Source wide string.
620 */
621void wstr_to_str(char *dest, size_t size, const wchar_t *src)
622{
623 wchar_t ch;
624 size_t src_idx;
625 size_t dest_off;
626
627 /* There must be space for a null terminator in the buffer. */
628 assert(size > 0);
629
630 src_idx = 0;
631 dest_off = 0;
632
633 while ((ch = src[src_idx++]) != 0) {
634 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
635 break;
636 }
637
638 dest[dest_off] = '\0';
639}
640
641/** Convert wide string to new string.
642 *
643 * Convert wide string @a src to string. Space for the new string is allocated
644 * on the heap.
645 *
646 * @param src Source wide string.
647 * @return New string.
648 */
649char *wstr_to_astr(const wchar_t *src)
650{
651 char dbuf[STR_BOUNDS(1)];
652 char *str;
653 wchar_t ch;
654
655 size_t src_idx;
656 size_t dest_off;
657 size_t dest_size;
658
659 /* Compute size of encoded string. */
660
661 src_idx = 0;
662 dest_size = 0;
663
664 while ((ch = src[src_idx++]) != 0) {
665 dest_off = 0;
666 if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
667 break;
668 dest_size += dest_off;
669 }
670
671 str = malloc(dest_size + 1);
672 if (str == NULL)
673 return NULL;
674
675 /* Encode string. */
676
677 src_idx = 0;
678 dest_off = 0;
679
680 while ((ch = src[src_idx++]) != 0) {
681 if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
682 break;
683 }
684
685 str[dest_size] = '\0';
686 return str;
687}
688
689
690/** Convert string to wide string.
691 *
692 * Convert string @a src to wide string. The output is written to the
693 * buffer specified by @a dest and @a dlen. @a dlen must be non-zero
694 * and the wide string written will always be null-terminated.
695 *
696 * @param dest Destination buffer.
697 * @param dlen Length of destination buffer (number of wchars).
698 * @param src Source string.
699 */
700void str_to_wstr(wchar_t *dest, size_t dlen, const char *src)
701{
702 size_t offset;
703 size_t di;
704 wchar_t c;
705
706 assert(dlen > 0);
707
708 offset = 0;
709 di = 0;
710
711 do {
712 if (di >= dlen - 1)
713 break;
714
715 c = str_decode(src, &offset, STR_NO_LIMIT);
716 dest[di++] = c;
717 } while (c != '\0');
718
719 dest[dlen - 1] = '\0';
720}
721
722/** Find first occurence of character in string.
723 *
724 * @param str String to search.
725 * @param ch Character to look for.
726 *
727 * @return Pointer to character in @a str or NULL if not found.
728 */
729char *str_chr(const char *str, wchar_t ch)
730{
731 wchar_t acc;
732 size_t off = 0;
733 size_t last = 0;
734
735 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
736 if (acc == ch)
737 return (char *) (str + last);
738 last = off;
739 }
740
741 return NULL;
742}
743
744/** Find last occurence of character in string.
745 *
746 * @param str String to search.
747 * @param ch Character to look for.
748 *
749 * @return Pointer to character in @a str or NULL if not found.
750 */
751char *str_rchr(const char *str, wchar_t ch)
752{
753 wchar_t acc;
754 size_t off = 0;
755 size_t last = 0;
756 const char *res = NULL;
757
758 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
759 if (acc == ch)
760 res = (str + last);
761 last = off;
762 }
763
764 return (char *) res;
765}
766
767/** Insert a wide character into a wide string.
768 *
769 * Insert a wide character into a wide string at position
770 * @a pos. The characters after the position are shifted.
771 *
772 * @param str String to insert to.
773 * @param ch Character to insert to.
774 * @param pos Character index where to insert.
775 @ @param max_pos Characters in the buffer.
776 *
777 * @return True if the insertion was sucessful, false if the position
778 * is out of bounds.
779 *
780 */
781bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
782{
783 size_t len = wstr_length(str);
784
785 if ((pos > len) || (pos + 1 > max_pos))
786 return false;
787
788 size_t i;
789 for (i = len; i + 1 > pos; i--)
790 str[i + 1] = str[i];
791
792 str[pos] = ch;
793
794 return true;
795}
796
797/** Remove a wide character from a wide string.
798 *
799 * Remove a wide character from a wide string at position
800 * @a pos. The characters after the position are shifted.
801 *
802 * @param str String to remove from.
803 * @param pos Character index to remove.
804 *
805 * @return True if the removal was sucessful, false if the position
806 * is out of bounds.
807 *
808 */
809bool wstr_remove(wchar_t *str, size_t pos)
810{
811 size_t len = wstr_length(str);
812
813 if (pos >= len)
814 return false;
815
816 size_t i;
817 for (i = pos + 1; i <= len; i++)
818 str[i - 1] = str[i];
819
820 return true;
821}
822
823int stricmp(const char *a, const char *b)
824{
825 int c = 0;
826
827 while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
828 c++;
829
830 return (tolower(a[c]) - tolower(b[c]));
831}
832
833/** Convert string to a number.
834 * Core of strtol and strtoul functions.
835 *
836 * @param nptr Pointer to string.
837 * @param endptr If not NULL, function stores here pointer to the first
838 * invalid character.
839 * @param base Zero or number between 2 and 36 inclusive.
840 * @param sgn It's set to 1 if minus found.
841 * @return Result of conversion.
842 */
843static unsigned long
844_strtoul(const char *nptr, char **endptr, int base, char *sgn)
845{
846 unsigned char c;
847 unsigned long result = 0;
848 unsigned long a, b;
849 const char *str = nptr;
850 const char *tmpptr;
851
852 while (isspace(*str))
853 str++;
854
855 if (*str == '-') {
856 *sgn = 1;
857 ++str;
858 } else if (*str == '+')
859 ++str;
860
861 if (base) {
862 if ((base == 1) || (base > 36)) {
863 /* FIXME: set errno to EINVAL */
864 return 0;
865 }
866 if ((base == 16) && (*str == '0') && ((str[1] == 'x') ||
867 (str[1] == 'X'))) {
868 str += 2;
869 }
870 } else {
871 base = 10;
872
873 if (*str == '0') {
874 base = 8;
875 if ((str[1] == 'X') || (str[1] == 'x')) {
876 base = 16;
877 str += 2;
878 }
879 }
880 }
881
882 tmpptr = str;
883
884 while (*str) {
885 c = *str;
886 c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
887 (c <= '9' ? c - '0' : 0xff)));
888 if (c > base) {
889 break;
890 }
891
892 a = (result & 0xff) * base + c;
893 b = (result >> 8) * base + (a >> 8);
894
895 if (b > (ULONG_MAX >> 8)) {
896 /* overflow */
897 /* FIXME: errno = ERANGE*/
898 return ULONG_MAX;
899 }
900
901 result = (b << 8) + (a & 0xff);
902 ++str;
903 }
904
905 if (str == tmpptr) {
906 /*
907 * No number was found => first invalid character is the first
908 * character of the string.
909 */
910 /* FIXME: set errno to EINVAL */
911 str = nptr;
912 result = 0;
913 }
914
915 if (endptr)
916 *endptr = (char *) str;
917
918 if (nptr == str) {
919 /*FIXME: errno = EINVAL*/
920 return 0;
921 }
922
923 return result;
924}
925
926/** Convert initial part of string to long int according to given base.
927 * The number may begin with an arbitrary number of whitespaces followed by
928 * optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
929 * inserted and the number will be taken as hexadecimal one. If the base is 0
930 * and the number begin with a zero, number will be taken as octal one (as with
931 * base 8). Otherwise the base 0 is taken as decimal.
932 *
933 * @param nptr Pointer to string.
934 * @param endptr If not NULL, function stores here pointer to the first
935 * invalid character.
936 * @param base Zero or number between 2 and 36 inclusive.
937 * @return Result of conversion.
938 */
939long int strtol(const char *nptr, char **endptr, int base)
940{
941 char sgn = 0;
942 unsigned long number = 0;
943
944 number = _strtoul(nptr, endptr, base, &sgn);
945
946 if (number > LONG_MAX) {
947 if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
948 /* FIXME: set 0 to errno */
949 return number;
950 }
951 /* FIXME: set ERANGE to errno */
952 return (sgn ? LONG_MIN : LONG_MAX);
953 }
954
955 return (sgn ? -number : number);
956}
957
958/** Duplicate string.
959 *
960 * Allocate a new string and copy characters from the source
961 * string into it. The duplicate string is allocated via sleeping
962 * malloc(), thus this function can sleep in no memory conditions.
963 *
964 * The allocation cannot fail and the return value is always
965 * a valid pointer. The duplicate string is always a well-formed
966 * null-terminated UTF-8 string, but it can differ from the source
967 * string on the byte level.
968 *
969 * @param src Source string.
970 *
971 * @return Duplicate string.
972 *
973 */
974char *str_dup(const char *src)
975{
976 size_t size = str_size(src) + 1;
977 char *dest = (char *) malloc(size);
978 if (dest == NULL)
979 return (char *) NULL;
980
981 str_cpy(dest, size, src);
982 return dest;
983}
984
985/** Duplicate string with size limit.
986 *
987 * Allocate a new string and copy up to @max_size bytes from the source
988 * string into it. The duplicate string is allocated via sleeping
989 * malloc(), thus this function can sleep in no memory conditions.
990 * No more than @max_size + 1 bytes is allocated, but if the size
991 * occupied by the source string is smaller than @max_size + 1,
992 * less is allocated.
993 *
994 * The allocation cannot fail and the return value is always
995 * a valid pointer. The duplicate string is always a well-formed
996 * null-terminated UTF-8 string, but it can differ from the source
997 * string on the byte level.
998 *
999 * @param src Source string.
1000 * @param n Maximum number of bytes to duplicate.
1001 *
1002 * @return Duplicate string.
1003 *
1004 */
1005char *str_ndup(const char *src, size_t n)
1006{
1007 size_t size = str_size(src);
1008 if (size > n)
1009 size = n;
1010
1011 char *dest = (char *) malloc(size + 1);
1012 if (dest == NULL)
1013 return (char *) NULL;
1014
1015 str_ncpy(dest, size + 1, src, size);
1016 return dest;
1017}
1018
1019
1020/** Convert initial part of string to unsigned long according to given base.
1021 * The number may begin with an arbitrary number of whitespaces followed by
1022 * optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
1023 * inserted and the number will be taken as hexadecimal one. If the base is 0
1024 * and the number begin with a zero, number will be taken as octal one (as with
1025 * base 8). Otherwise the base 0 is taken as decimal.
1026 *
1027 * @param nptr Pointer to string.
1028 * @param endptr If not NULL, function stores here pointer to the first
1029 * invalid character
1030 * @param base Zero or number between 2 and 36 inclusive.
1031 * @return Result of conversion.
1032 */
1033unsigned long strtoul(const char *nptr, char **endptr, int base)
1034{
1035 char sgn = 0;
1036 unsigned long number = 0;
1037
1038 number = _strtoul(nptr, endptr, base, &sgn);
1039
1040 return (sgn ? -number : number);
1041}
1042
1043char *strtok(char *s, const char *delim)
1044{
1045 static char *next;
1046
1047 return strtok_r(s, delim, &next);
1048}
1049
1050char *strtok_r(char *s, const char *delim, char **next)
1051{
1052 char *start, *end;
1053
1054 if (s == NULL)
1055 s = *next;
1056
1057 /* Skip over leading delimiters. */
1058 while (*s && (str_chr(delim, *s) != NULL)) ++s;
1059 start = s;
1060
1061 /* Skip over token characters. */
1062 while (*s && (str_chr(delim, *s) == NULL)) ++s;
1063 end = s;
1064 *next = (*s ? s + 1 : s);
1065
1066 if (start == end) {
1067 return NULL; /* No more tokens. */
1068 }
1069
1070 /* Overwrite delimiter with NULL terminator. */
1071 *end = '\0';
1072 return start;
1073}
1074
1075/** Convert string to uint64_t (internal variant).
1076 *
1077 * @param nptr Pointer to string.
1078 * @param endptr Pointer to the first invalid character is stored here.
1079 * @param base Zero or number between 2 and 36 inclusive.
1080 * @param neg Indication of unary minus is stored here.
1081 * @apram result Result of the conversion.
1082 *
1083 * @return EOK if conversion was successful.
1084 *
1085 */
1086static int str_uint(const char *nptr, char **endptr, unsigned int base,
1087 bool *neg, uint64_t *result)
1088{
1089 assert(endptr != NULL);
1090 assert(neg != NULL);
1091 assert(result != NULL);
1092
1093 *neg = false;
1094 const char *str = nptr;
1095
1096 /* Ignore leading whitespace */
1097 while (isspace(*str))
1098 str++;
1099
1100 if (*str == '-') {
1101 *neg = true;
1102 str++;
1103 } else if (*str == '+')
1104 str++;
1105
1106 if (base == 0) {
1107 /* Decode base if not specified */
1108 base = 10;
1109
1110 if (*str == '0') {
1111 base = 8;
1112 str++;
1113
1114 switch (*str) {
1115 case 'b':
1116 case 'B':
1117 base = 2;
1118 str++;
1119 break;
1120 case 'o':
1121 case 'O':
1122 base = 8;
1123 str++;
1124 break;
1125 case 'd':
1126 case 'D':
1127 case 't':
1128 case 'T':
1129 base = 10;
1130 str++;
1131 break;
1132 case 'x':
1133 case 'X':
1134 base = 16;
1135 str++;
1136 break;
1137 default:
1138 str--;
1139 }
1140 }
1141 } else {
1142 /* Check base range */
1143 if ((base < 2) || (base > 36)) {
1144 *endptr = (char *) str;
1145 return EINVAL;
1146 }
1147 }
1148
1149 *result = 0;
1150 const char *startstr = str;
1151
1152 while (*str != 0) {
1153 unsigned int digit;
1154
1155 if ((*str >= 'a') && (*str <= 'z'))
1156 digit = *str - 'a' + 10;
1157 else if ((*str >= 'A') && (*str <= 'Z'))
1158 digit = *str - 'A' + 10;
1159 else if ((*str >= '0') && (*str <= '9'))
1160 digit = *str - '0';
1161 else
1162 break;
1163
1164 if (digit >= base)
1165 break;
1166
1167 uint64_t prev = *result;
1168 *result = (*result) * base + digit;
1169
1170 if (*result < prev) {
1171 /* Overflow */
1172 *endptr = (char *) str;
1173 return EOVERFLOW;
1174 }
1175
1176 str++;
1177 }
1178
1179 if (str == startstr) {
1180 /*
1181 * No digits were decoded => first invalid character is
1182 * the first character of the string.
1183 */
1184 str = nptr;
1185 }
1186
1187 *endptr = (char *) str;
1188
1189 if (str == nptr)
1190 return EINVAL;
1191
1192 return EOK;
1193}
1194
1195/** Convert string to uint64_t.
1196 *
1197 * @param nptr Pointer to string.
1198 * @param endptr If not NULL, pointer to the first invalid character
1199 * is stored here.
1200 * @param base Zero or number between 2 and 36 inclusive.
1201 * @param strict Do not allow any trailing characters.
1202 * @param result Result of the conversion.
1203 *
1204 * @return EOK if conversion was successful.
1205 *
1206 */
1207int str_uint64(const char *nptr, char **endptr, unsigned int base,
1208 bool strict, uint64_t *result)
1209{
1210 assert(result != NULL);
1211
1212 bool neg;
1213 char *lendptr;
1214 int ret = str_uint(nptr, &lendptr, base, &neg, result);
1215
1216 if (endptr != NULL)
1217 *endptr = (char *) lendptr;
1218
1219 if (ret != EOK)
1220 return ret;
1221
1222 /* Do not allow negative values */
1223 if (neg)
1224 return EINVAL;
1225
1226 /* Check whether we are at the end of
1227 the string in strict mode */
1228 if ((strict) && (*lendptr != 0))
1229 return EINVAL;
1230
1231 return EOK;
1232}
1233
1234/** Convert string to size_t.
1235 *
1236 * @param nptr Pointer to string.
1237 * @param endptr If not NULL, pointer to the first invalid character
1238 * is stored here.
1239 * @param base Zero or number between 2 and 36 inclusive.
1240 * @param strict Do not allow any trailing characters.
1241 * @param result Result of the conversion.
1242 *
1243 * @return EOK if conversion was successful.
1244 *
1245 */
1246int str_size_t(const char *nptr, char **endptr, unsigned int base,
1247 bool strict, size_t *result)
1248{
1249 assert(result != NULL);
1250
1251 bool neg;
1252 char *lendptr;
1253 uint64_t res;
1254 int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1255
1256 if (endptr != NULL)
1257 *endptr = (char *) lendptr;
1258
1259 if (ret != EOK)
1260 return ret;
1261
1262 /* Do not allow negative values */
1263 if (neg)
1264 return EINVAL;
1265
1266 /* Check whether we are at the end of
1267 the string in strict mode */
1268 if ((strict) && (*lendptr != 0))
1269 return EINVAL;
1270
1271 /* Check for overflow */
1272 size_t _res = (size_t) res;
1273 if (_res != res)
1274 return EOVERFLOW;
1275
1276 *result = _res;
1277
1278 return EOK;
1279}
1280
1281void order_suffix(const uint64_t val, uint64_t *rv, char *suffix)
1282{
1283 if (val > UINT64_C(10000000000000000000)) {
1284 *rv = val / UINT64_C(1000000000000000000);
1285 *suffix = 'Z';
1286 } else if (val > UINT64_C(1000000000000000000)) {
1287 *rv = val / UINT64_C(1000000000000000);
1288 *suffix = 'E';
1289 } else if (val > UINT64_C(1000000000000000)) {
1290 *rv = val / UINT64_C(1000000000000);
1291 *suffix = 'T';
1292 } else if (val > UINT64_C(1000000000000)) {
1293 *rv = val / UINT64_C(1000000000);
1294 *suffix = 'G';
1295 } else if (val > UINT64_C(1000000000)) {
1296 *rv = val / UINT64_C(1000000);
1297 *suffix = 'M';
1298 } else if (val > UINT64_C(1000000)) {
1299 *rv = val / UINT64_C(1000);
1300 *suffix = 'k';
1301 } else {
1302 *rv = val;
1303 *suffix = ' ';
1304 }
1305}
1306
1307void bin_order_suffix(const uint64_t val, uint64_t *rv, const char **suffix,
1308 bool fixed)
1309{
1310 if (val > UINT64_C(1152921504606846976)) {
1311 *rv = val / UINT64_C(1125899906842624);
1312 *suffix = "EiB";
1313 } else if (val > UINT64_C(1125899906842624)) {
1314 *rv = val / UINT64_C(1099511627776);
1315 *suffix = "TiB";
1316 } else if (val > UINT64_C(1099511627776)) {
1317 *rv = val / UINT64_C(1073741824);
1318 *suffix = "GiB";
1319 } else if (val > UINT64_C(1073741824)) {
1320 *rv = val / UINT64_C(1048576);
1321 *suffix = "MiB";
1322 } else if (val > UINT64_C(1048576)) {
1323 *rv = val / UINT64_C(1024);
1324 *suffix = "KiB";
1325 } else {
1326 *rv = val;
1327 if (fixed)
1328 *suffix = "B ";
1329 else
1330 *suffix = "B";
1331 }
1332}
1333
1334/** @}
1335 */
Note: See TracBrowser for help on using the repository browser.