source: mainline/uspace/lib/c/generic/str.c@ b48d046

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since b48d046 was b48d046, checked in by Martin Decky <martin@…>, 14 years ago

cstyle
(no change in functionality)

  • Property mode set to 100644
File size: 30.9 KB
Line 
1/*
2 * Copyright (c) 2005 Martin Decky
3 * Copyright (c) 2008 Jiri Svoboda
4 * Copyright (c) 2011 Martin Sucha
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * - Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * - Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * - The name of the author may not be used to endorse or promote products
17 * derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31/** @addtogroup libc
32 * @{
33 */
34/** @file
35 */
36
37#include <str.h>
38#include <stdlib.h>
39#include <assert.h>
40#include <stdint.h>
41#include <ctype.h>
42#include <malloc.h>
43#include <errno.h>
44#include <align.h>
45#include <mem.h>
46#include <str.h>
47
48/** Byte mask consisting of lowest @n bits (out of 8) */
49#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
50
51/** Byte mask consisting of lowest @n bits (out of 32) */
52#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
53
54/** Byte mask consisting of highest @n bits (out of 8) */
55#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
56
57/** Number of data bits in a UTF-8 continuation byte */
58#define CONT_BITS 6
59
60/** Decode a single character from a string.
61 *
62 * Decode a single character from a string of size @a size. Decoding starts
63 * at @a offset and this offset is moved to the beginning of the next
64 * character. In case of decoding error, offset generally advances at least
65 * by one. However, offset is never moved beyond size.
66 *
67 * @param str String (not necessarily NULL-terminated).
68 * @param offset Byte offset in string where to start decoding.
69 * @param size Size of the string (in bytes).
70 *
71 * @return Value of decoded character, U_SPECIAL on decoding error or
72 * NULL if attempt to decode beyond @a size.
73 *
74 */
75wchar_t str_decode(const char *str, size_t *offset, size_t size)
76{
77 if (*offset + 1 > size)
78 return 0;
79
80 /* First byte read from string */
81 uint8_t b0 = (uint8_t) str[(*offset)++];
82
83 /* Determine code length */
84
85 unsigned int b0_bits; /* Data bits in first byte */
86 unsigned int cbytes; /* Number of continuation bytes */
87
88 if ((b0 & 0x80) == 0) {
89 /* 0xxxxxxx (Plain ASCII) */
90 b0_bits = 7;
91 cbytes = 0;
92 } else if ((b0 & 0xe0) == 0xc0) {
93 /* 110xxxxx 10xxxxxx */
94 b0_bits = 5;
95 cbytes = 1;
96 } else if ((b0 & 0xf0) == 0xe0) {
97 /* 1110xxxx 10xxxxxx 10xxxxxx */
98 b0_bits = 4;
99 cbytes = 2;
100 } else if ((b0 & 0xf8) == 0xf0) {
101 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
102 b0_bits = 3;
103 cbytes = 3;
104 } else {
105 /* 10xxxxxx -- unexpected continuation byte */
106 return U_SPECIAL;
107 }
108
109 if (*offset + cbytes > size)
110 return U_SPECIAL;
111
112 wchar_t ch = b0 & LO_MASK_8(b0_bits);
113
114 /* Decode continuation bytes */
115 while (cbytes > 0) {
116 uint8_t b = (uint8_t) str[(*offset)++];
117
118 /* Must be 10xxxxxx */
119 if ((b & 0xc0) != 0x80)
120 return U_SPECIAL;
121
122 /* Shift data bits to ch */
123 ch = (ch << CONT_BITS) | (wchar_t) (b & LO_MASK_8(CONT_BITS));
124 cbytes--;
125 }
126
127 return ch;
128}
129
130/** Encode a single character to string representation.
131 *
132 * Encode a single character to string representation (i.e. UTF-8) and store
133 * it into a buffer at @a offset. Encoding starts at @a offset and this offset
134 * is moved to the position where the next character can be written to.
135 *
136 * @param ch Input character.
137 * @param str Output buffer.
138 * @param offset Byte offset where to start writing.
139 * @param size Size of the output buffer (in bytes).
140 *
141 * @return EOK if the character was encoded successfully, EOVERFLOW if there
142 * was not enough space in the output buffer or EINVAL if the character
143 * code was invalid.
144 */
145int chr_encode(const wchar_t ch, char *str, size_t *offset, size_t size)
146{
147 if (*offset >= size)
148 return EOVERFLOW;
149
150 if (!chr_check(ch))
151 return EINVAL;
152
153 /* Unsigned version of ch (bit operations should only be done
154 on unsigned types). */
155 uint32_t cc = (uint32_t) ch;
156
157 /* Determine how many continuation bytes are needed */
158
159 unsigned int b0_bits; /* Data bits in first byte */
160 unsigned int cbytes; /* Number of continuation bytes */
161
162 if ((cc & ~LO_MASK_32(7)) == 0) {
163 b0_bits = 7;
164 cbytes = 0;
165 } else if ((cc & ~LO_MASK_32(11)) == 0) {
166 b0_bits = 5;
167 cbytes = 1;
168 } else if ((cc & ~LO_MASK_32(16)) == 0) {
169 b0_bits = 4;
170 cbytes = 2;
171 } else if ((cc & ~LO_MASK_32(21)) == 0) {
172 b0_bits = 3;
173 cbytes = 3;
174 } else {
175 /* Codes longer than 21 bits are not supported */
176 return EINVAL;
177 }
178
179 /* Check for available space in buffer */
180 if (*offset + cbytes >= size)
181 return EOVERFLOW;
182
183 /* Encode continuation bytes */
184 unsigned int i;
185 for (i = cbytes; i > 0; i--) {
186 str[*offset + i] = 0x80 | (cc & LO_MASK_32(CONT_BITS));
187 cc = cc >> CONT_BITS;
188 }
189
190 /* Encode first byte */
191 str[*offset] = (cc & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1);
192
193 /* Advance offset */
194 *offset += cbytes + 1;
195
196 return EOK;
197}
198
199/** Get size of string.
200 *
201 * Get the number of bytes which are used by the string @a str (excluding the
202 * NULL-terminator).
203 *
204 * @param str String to consider.
205 *
206 * @return Number of bytes used by the string
207 *
208 */
209size_t str_size(const char *str)
210{
211 size_t size = 0;
212
213 while (*str++ != 0)
214 size++;
215
216 return size;
217}
218
219/** Get size of wide string.
220 *
221 * Get the number of bytes which are used by the wide string @a str (excluding the
222 * NULL-terminator).
223 *
224 * @param str Wide string to consider.
225 *
226 * @return Number of bytes used by the wide string
227 *
228 */
229size_t wstr_size(const wchar_t *str)
230{
231 return (wstr_length(str) * sizeof(wchar_t));
232}
233
234/** Get size of string with length limit.
235 *
236 * Get the number of bytes which are used by up to @a max_len first
237 * characters in the string @a str. If @a max_len is greater than
238 * the length of @a str, the entire string is measured (excluding the
239 * NULL-terminator).
240 *
241 * @param str String to consider.
242 * @param max_len Maximum number of characters to measure.
243 *
244 * @return Number of bytes used by the characters.
245 *
246 */
247size_t str_lsize(const char *str, size_t max_len)
248{
249 size_t len = 0;
250 size_t offset = 0;
251
252 while (len < max_len) {
253 if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
254 break;
255
256 len++;
257 }
258
259 return offset;
260}
261
262/** Get size of wide string with length limit.
263 *
264 * Get the number of bytes which are used by up to @a max_len first
265 * wide characters in the wide string @a str. If @a max_len is greater than
266 * the length of @a str, the entire wide string is measured (excluding the
267 * NULL-terminator).
268 *
269 * @param str Wide string to consider.
270 * @param max_len Maximum number of wide characters to measure.
271 *
272 * @return Number of bytes used by the wide characters.
273 *
274 */
275size_t wstr_lsize(const wchar_t *str, size_t max_len)
276{
277 return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
278}
279
280/** Get number of characters in a string.
281 *
282 * @param str NULL-terminated string.
283 *
284 * @return Number of characters in string.
285 *
286 */
287size_t str_length(const char *str)
288{
289 size_t len = 0;
290 size_t offset = 0;
291
292 while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
293 len++;
294
295 return len;
296}
297
298/** Get number of characters in a wide string.
299 *
300 * @param str NULL-terminated wide string.
301 *
302 * @return Number of characters in @a str.
303 *
304 */
305size_t wstr_length(const wchar_t *wstr)
306{
307 size_t len = 0;
308
309 while (*wstr++ != 0)
310 len++;
311
312 return len;
313}
314
315/** Get number of characters in a string with size limit.
316 *
317 * @param str NULL-terminated string.
318 * @param size Maximum number of bytes to consider.
319 *
320 * @return Number of characters in string.
321 *
322 */
323size_t str_nlength(const char *str, size_t size)
324{
325 size_t len = 0;
326 size_t offset = 0;
327
328 while (str_decode(str, &offset, size) != 0)
329 len++;
330
331 return len;
332}
333
334/** Get number of characters in a string with size limit.
335 *
336 * @param str NULL-terminated string.
337 * @param size Maximum number of bytes to consider.
338 *
339 * @return Number of characters in string.
340 *
341 */
342size_t wstr_nlength(const wchar_t *str, size_t size)
343{
344 size_t len = 0;
345 size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
346 size_t offset = 0;
347
348 while ((offset < limit) && (*str++ != 0)) {
349 len++;
350 offset += sizeof(wchar_t);
351 }
352
353 return len;
354}
355
356/** Check whether character is plain ASCII.
357 *
358 * @return True if character is plain ASCII.
359 *
360 */
361bool ascii_check(wchar_t ch)
362{
363 if ((ch >= 0) && (ch <= 127))
364 return true;
365
366 return false;
367}
368
369/** Check whether character is valid
370 *
371 * @return True if character is a valid Unicode code point.
372 *
373 */
374bool chr_check(wchar_t ch)
375{
376 if ((ch >= 0) && (ch <= 1114111))
377 return true;
378
379 return false;
380}
381
382/** Compare two NULL terminated strings.
383 *
384 * Do a char-by-char comparison of two NULL-terminated strings.
385 * The strings are considered equal iff they consist of the same
386 * characters on the minimum of their lengths.
387 *
388 * @param s1 First string to compare.
389 * @param s2 Second string to compare.
390 *
391 * @return 0 if the strings are equal, -1 if first is smaller,
392 * 1 if second smaller.
393 *
394 */
395int str_cmp(const char *s1, const char *s2)
396{
397 wchar_t c1 = 0;
398 wchar_t c2 = 0;
399
400 size_t off1 = 0;
401 size_t off2 = 0;
402
403 while (true) {
404 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
405 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
406
407 if (c1 < c2)
408 return -1;
409
410 if (c1 > c2)
411 return 1;
412
413 if (c1 == 0 || c2 == 0)
414 break;
415 }
416
417 return 0;
418}
419
420/** Compare two NULL terminated strings with length limit.
421 *
422 * Do a char-by-char comparison of two NULL-terminated strings.
423 * The strings are considered equal iff they consist of the same
424 * characters on the minimum of their lengths and the length limit.
425 *
426 * @param s1 First string to compare.
427 * @param s2 Second string to compare.
428 * @param max_len Maximum number of characters to consider.
429 *
430 * @return 0 if the strings are equal, -1 if first is smaller,
431 * 1 if second smaller.
432 *
433 */
434int str_lcmp(const char *s1, const char *s2, size_t max_len)
435{
436 wchar_t c1 = 0;
437 wchar_t c2 = 0;
438
439 size_t off1 = 0;
440 size_t off2 = 0;
441
442 size_t len = 0;
443
444 while (true) {
445 if (len >= max_len)
446 break;
447
448 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
449 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
450
451 if (c1 < c2)
452 return -1;
453
454 if (c1 > c2)
455 return 1;
456
457 if (c1 == 0 || c2 == 0)
458 break;
459
460 ++len;
461 }
462
463 return 0;
464
465}
466
467/** Copy string.
468 *
469 * Copy source string @a src to destination buffer @a dest.
470 * No more than @a size bytes are written. If the size of the output buffer
471 * is at least one byte, the output string will always be well-formed, i.e.
472 * null-terminated and containing only complete characters.
473 *
474 * @param dest Destination buffer.
475 * @param count Size of the destination buffer (must be > 0).
476 * @param src Source string.
477 */
478void str_cpy(char *dest, size_t size, const char *src)
479{
480 /* There must be space for a null terminator in the buffer. */
481 assert(size > 0);
482
483 size_t src_off = 0;
484 size_t dest_off = 0;
485
486 wchar_t ch;
487 while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
488 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
489 break;
490 }
491
492 dest[dest_off] = '\0';
493}
494
495/** Copy size-limited substring.
496 *
497 * Copy prefix of string @a src of max. size @a size to destination buffer
498 * @a dest. No more than @a size bytes are written. The output string will
499 * always be well-formed, i.e. null-terminated and containing only complete
500 * characters.
501 *
502 * No more than @a n bytes are read from the input string, so it does not
503 * have to be null-terminated.
504 *
505 * @param dest Destination buffer.
506 * @param count Size of the destination buffer (must be > 0).
507 * @param src Source string.
508 * @param n Maximum number of bytes to read from @a src.
509 */
510void str_ncpy(char *dest, size_t size, const char *src, size_t n)
511{
512 /* There must be space for a null terminator in the buffer. */
513 assert(size > 0);
514
515 size_t src_off = 0;
516 size_t dest_off = 0;
517
518 wchar_t ch;
519 while ((ch = str_decode(src, &src_off, n)) != 0) {
520 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
521 break;
522 }
523
524 dest[dest_off] = '\0';
525}
526
527/** Append one string to another.
528 *
529 * Append source string @a src to string in destination buffer @a dest.
530 * Size of the destination buffer is @a dest. If the size of the output buffer
531 * is at least one byte, the output string will always be well-formed, i.e.
532 * null-terminated and containing only complete characters.
533 *
534 * @param dest Destination buffer.
535 * @param count Size of the destination buffer.
536 * @param src Source string.
537 */
538void str_append(char *dest, size_t size, const char *src)
539{
540 size_t dstr_size;
541
542 dstr_size = str_size(dest);
543 if (dstr_size >= size)
544 return;
545
546 str_cpy(dest + dstr_size, size - dstr_size, src);
547}
548
549/** Convert space-padded ASCII to string.
550 *
551 * Common legacy text encoding in hardware is 7-bit ASCII fitted into
552 * a fixed-with byte buffer (bit 7 always zero), right-padded with spaces
553 * (ASCII 0x20). Convert space-padded ascii to string representation.
554 *
555 * If the text does not fit into the destination buffer, the function converts
556 * as many characters as possible and returns EOVERFLOW.
557 *
558 * If the text contains non-ASCII bytes (with bit 7 set), the whole string is
559 * converted anyway and invalid characters are replaced with question marks
560 * (U_SPECIAL) and the function returns EIO.
561 *
562 * Regardless of return value upon return @a dest will always be well-formed.
563 *
564 * @param dest Destination buffer
565 * @param size Size of destination buffer
566 * @param src Space-padded ASCII.
567 * @param n Size of the source buffer in bytes.
568 *
569 * @return EOK on success, EOVERFLOW if the text does not fit
570 * destination buffer, EIO if the text contains
571 * non-ASCII bytes.
572 */
573int spascii_to_str(char *dest, size_t size, const uint8_t *src, size_t n)
574{
575 size_t sidx;
576 size_t didx;
577 size_t dlast;
578 uint8_t byte;
579 int rc;
580 int result;
581
582 /* There must be space for a null terminator in the buffer. */
583 assert(size > 0);
584 result = EOK;
585
586 didx = 0;
587 dlast = 0;
588 for (sidx = 0; sidx < n; ++sidx) {
589 byte = src[sidx];
590 if (!ascii_check(byte)) {
591 byte = U_SPECIAL;
592 result = EIO;
593 }
594
595 rc = chr_encode(byte, dest, &didx, size - 1);
596 if (rc != EOK) {
597 assert(rc == EOVERFLOW);
598 dest[didx] = '\0';
599 return rc;
600 }
601
602 /* Remember dest index after last non-empty character */
603 if (byte != 0x20)
604 dlast = didx;
605 }
606
607 /* Terminate string after last non-empty character */
608 dest[dlast] = '\0';
609 return result;
610}
611
612/** Convert wide string to string.
613 *
614 * Convert wide string @a src to string. The output is written to the buffer
615 * specified by @a dest and @a size. @a size must be non-zero and the string
616 * written will always be well-formed.
617 *
618 * @param dest Destination buffer.
619 * @param size Size of the destination buffer.
620 * @param src Source wide string.
621 */
622void wstr_to_str(char *dest, size_t size, const wchar_t *src)
623{
624 wchar_t ch;
625 size_t src_idx;
626 size_t dest_off;
627
628 /* There must be space for a null terminator in the buffer. */
629 assert(size > 0);
630
631 src_idx = 0;
632 dest_off = 0;
633
634 while ((ch = src[src_idx++]) != 0) {
635 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
636 break;
637 }
638
639 dest[dest_off] = '\0';
640}
641
642/** Convert wide string to new string.
643 *
644 * Convert wide string @a src to string. Space for the new string is allocated
645 * on the heap.
646 *
647 * @param src Source wide string.
648 * @return New string.
649 */
650char *wstr_to_astr(const wchar_t *src)
651{
652 char dbuf[STR_BOUNDS(1)];
653 char *str;
654 wchar_t ch;
655
656 size_t src_idx;
657 size_t dest_off;
658 size_t dest_size;
659
660 /* Compute size of encoded string. */
661
662 src_idx = 0;
663 dest_size = 0;
664
665 while ((ch = src[src_idx++]) != 0) {
666 dest_off = 0;
667 if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
668 break;
669 dest_size += dest_off;
670 }
671
672 str = malloc(dest_size + 1);
673 if (str == NULL)
674 return NULL;
675
676 /* Encode string. */
677
678 src_idx = 0;
679 dest_off = 0;
680
681 while ((ch = src[src_idx++]) != 0) {
682 if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
683 break;
684 }
685
686 str[dest_size] = '\0';
687 return str;
688}
689
690
691/** Convert string to wide string.
692 *
693 * Convert string @a src to wide string. The output is written to the
694 * buffer specified by @a dest and @a dlen. @a dlen must be non-zero
695 * and the wide string written will always be null-terminated.
696 *
697 * @param dest Destination buffer.
698 * @param dlen Length of destination buffer (number of wchars).
699 * @param src Source string.
700 */
701void str_to_wstr(wchar_t *dest, size_t dlen, const char *src)
702{
703 size_t offset;
704 size_t di;
705 wchar_t c;
706
707 assert(dlen > 0);
708
709 offset = 0;
710 di = 0;
711
712 do {
713 if (di >= dlen - 1)
714 break;
715
716 c = str_decode(src, &offset, STR_NO_LIMIT);
717 dest[di++] = c;
718 } while (c != '\0');
719
720 dest[dlen - 1] = '\0';
721}
722
723/** Convert string to wide string.
724 *
725 * Convert string @a src to wide string. A new wide NULL-terminated
726 * string will be allocated on the heap.
727 *
728 * @param src Source string.
729 */
730wchar_t *str_to_awstr(const char *str)
731{
732 size_t len = str_length(str);
733
734 wchar_t *wstr = calloc(len+1, sizeof(wchar_t));
735 if (wstr == NULL)
736 return NULL;
737
738 str_to_wstr(wstr, len + 1, str);
739 return wstr;
740}
741
742/** Find first occurence of character in string.
743 *
744 * @param str String to search.
745 * @param ch Character to look for.
746 *
747 * @return Pointer to character in @a str or NULL if not found.
748 */
749char *str_chr(const char *str, wchar_t ch)
750{
751 wchar_t acc;
752 size_t off = 0;
753 size_t last = 0;
754
755 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
756 if (acc == ch)
757 return (char *) (str + last);
758 last = off;
759 }
760
761 return NULL;
762}
763
764/** Find last occurence of character in string.
765 *
766 * @param str String to search.
767 * @param ch Character to look for.
768 *
769 * @return Pointer to character in @a str or NULL if not found.
770 */
771char *str_rchr(const char *str, wchar_t ch)
772{
773 wchar_t acc;
774 size_t off = 0;
775 size_t last = 0;
776 const char *res = NULL;
777
778 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
779 if (acc == ch)
780 res = (str + last);
781 last = off;
782 }
783
784 return (char *) res;
785}
786
787/** Insert a wide character into a wide string.
788 *
789 * Insert a wide character into a wide string at position
790 * @a pos. The characters after the position are shifted.
791 *
792 * @param str String to insert to.
793 * @param ch Character to insert to.
794 * @param pos Character index where to insert.
795 @ @param max_pos Characters in the buffer.
796 *
797 * @return True if the insertion was sucessful, false if the position
798 * is out of bounds.
799 *
800 */
801bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
802{
803 size_t len = wstr_length(str);
804
805 if ((pos > len) || (pos + 1 > max_pos))
806 return false;
807
808 size_t i;
809 for (i = len; i + 1 > pos; i--)
810 str[i + 1] = str[i];
811
812 str[pos] = ch;
813
814 return true;
815}
816
817/** Remove a wide character from a wide string.
818 *
819 * Remove a wide character from a wide string at position
820 * @a pos. The characters after the position are shifted.
821 *
822 * @param str String to remove from.
823 * @param pos Character index to remove.
824 *
825 * @return True if the removal was sucessful, false if the position
826 * is out of bounds.
827 *
828 */
829bool wstr_remove(wchar_t *str, size_t pos)
830{
831 size_t len = wstr_length(str);
832
833 if (pos >= len)
834 return false;
835
836 size_t i;
837 for (i = pos + 1; i <= len; i++)
838 str[i - 1] = str[i];
839
840 return true;
841}
842
843int stricmp(const char *a, const char *b)
844{
845 int c = 0;
846
847 while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
848 c++;
849
850 return (tolower(a[c]) - tolower(b[c]));
851}
852
853/** Convert string to a number.
854 * Core of strtol and strtoul functions.
855 *
856 * @param nptr Pointer to string.
857 * @param endptr If not NULL, function stores here pointer to the first
858 * invalid character.
859 * @param base Zero or number between 2 and 36 inclusive.
860 * @param sgn It's set to 1 if minus found.
861 * @return Result of conversion.
862 */
863static unsigned long
864_strtoul(const char *nptr, char **endptr, int base, char *sgn)
865{
866 unsigned char c;
867 unsigned long result = 0;
868 unsigned long a, b;
869 const char *str = nptr;
870 const char *tmpptr;
871
872 while (isspace(*str))
873 str++;
874
875 if (*str == '-') {
876 *sgn = 1;
877 ++str;
878 } else if (*str == '+')
879 ++str;
880
881 if (base) {
882 if ((base == 1) || (base > 36)) {
883 /* FIXME: set errno to EINVAL */
884 return 0;
885 }
886 if ((base == 16) && (*str == '0') && ((str[1] == 'x') ||
887 (str[1] == 'X'))) {
888 str += 2;
889 }
890 } else {
891 base = 10;
892
893 if (*str == '0') {
894 base = 8;
895 if ((str[1] == 'X') || (str[1] == 'x')) {
896 base = 16;
897 str += 2;
898 }
899 }
900 }
901
902 tmpptr = str;
903
904 while (*str) {
905 c = *str;
906 c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
907 (c <= '9' ? c - '0' : 0xff)));
908 if (c > base) {
909 break;
910 }
911
912 a = (result & 0xff) * base + c;
913 b = (result >> 8) * base + (a >> 8);
914
915 if (b > (ULONG_MAX >> 8)) {
916 /* overflow */
917 /* FIXME: errno = ERANGE*/
918 return ULONG_MAX;
919 }
920
921 result = (b << 8) + (a & 0xff);
922 ++str;
923 }
924
925 if (str == tmpptr) {
926 /*
927 * No number was found => first invalid character is the first
928 * character of the string.
929 */
930 /* FIXME: set errno to EINVAL */
931 str = nptr;
932 result = 0;
933 }
934
935 if (endptr)
936 *endptr = (char *) str;
937
938 if (nptr == str) {
939 /*FIXME: errno = EINVAL*/
940 return 0;
941 }
942
943 return result;
944}
945
946/** Convert initial part of string to long int according to given base.
947 * The number may begin with an arbitrary number of whitespaces followed by
948 * optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
949 * inserted and the number will be taken as hexadecimal one. If the base is 0
950 * and the number begin with a zero, number will be taken as octal one (as with
951 * base 8). Otherwise the base 0 is taken as decimal.
952 *
953 * @param nptr Pointer to string.
954 * @param endptr If not NULL, function stores here pointer to the first
955 * invalid character.
956 * @param base Zero or number between 2 and 36 inclusive.
957 * @return Result of conversion.
958 */
959long int strtol(const char *nptr, char **endptr, int base)
960{
961 char sgn = 0;
962 unsigned long number = 0;
963
964 number = _strtoul(nptr, endptr, base, &sgn);
965
966 if (number > LONG_MAX) {
967 if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
968 /* FIXME: set 0 to errno */
969 return number;
970 }
971 /* FIXME: set ERANGE to errno */
972 return (sgn ? LONG_MIN : LONG_MAX);
973 }
974
975 return (sgn ? -number : number);
976}
977
978/** Duplicate string.
979 *
980 * Allocate a new string and copy characters from the source
981 * string into it. The duplicate string is allocated via sleeping
982 * malloc(), thus this function can sleep in no memory conditions.
983 *
984 * The allocation cannot fail and the return value is always
985 * a valid pointer. The duplicate string is always a well-formed
986 * null-terminated UTF-8 string, but it can differ from the source
987 * string on the byte level.
988 *
989 * @param src Source string.
990 *
991 * @return Duplicate string.
992 *
993 */
994char *str_dup(const char *src)
995{
996 size_t size = str_size(src) + 1;
997 char *dest = (char *) malloc(size);
998 if (dest == NULL)
999 return (char *) NULL;
1000
1001 str_cpy(dest, size, src);
1002 return dest;
1003}
1004
1005/** Duplicate string with size limit.
1006 *
1007 * Allocate a new string and copy up to @max_size bytes from the source
1008 * string into it. The duplicate string is allocated via sleeping
1009 * malloc(), thus this function can sleep in no memory conditions.
1010 * No more than @max_size + 1 bytes is allocated, but if the size
1011 * occupied by the source string is smaller than @max_size + 1,
1012 * less is allocated.
1013 *
1014 * The allocation cannot fail and the return value is always
1015 * a valid pointer. The duplicate string is always a well-formed
1016 * null-terminated UTF-8 string, but it can differ from the source
1017 * string on the byte level.
1018 *
1019 * @param src Source string.
1020 * @param n Maximum number of bytes to duplicate.
1021 *
1022 * @return Duplicate string.
1023 *
1024 */
1025char *str_ndup(const char *src, size_t n)
1026{
1027 size_t size = str_size(src);
1028 if (size > n)
1029 size = n;
1030
1031 char *dest = (char *) malloc(size + 1);
1032 if (dest == NULL)
1033 return (char *) NULL;
1034
1035 str_ncpy(dest, size + 1, src, size);
1036 return dest;
1037}
1038
1039
1040/** Convert initial part of string to unsigned long according to given base.
1041 * The number may begin with an arbitrary number of whitespaces followed by
1042 * optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
1043 * inserted and the number will be taken as hexadecimal one. If the base is 0
1044 * and the number begin with a zero, number will be taken as octal one (as with
1045 * base 8). Otherwise the base 0 is taken as decimal.
1046 *
1047 * @param nptr Pointer to string.
1048 * @param endptr If not NULL, function stores here pointer to the first
1049 * invalid character
1050 * @param base Zero or number between 2 and 36 inclusive.
1051 * @return Result of conversion.
1052 */
1053unsigned long strtoul(const char *nptr, char **endptr, int base)
1054{
1055 char sgn = 0;
1056 unsigned long number = 0;
1057
1058 number = _strtoul(nptr, endptr, base, &sgn);
1059
1060 return (sgn ? -number : number);
1061}
1062
1063char *strtok(char *s, const char *delim)
1064{
1065 static char *next;
1066
1067 return strtok_r(s, delim, &next);
1068}
1069
1070char *strtok_r(char *s, const char *delim, char **next)
1071{
1072 char *start, *end;
1073
1074 if (s == NULL)
1075 s = *next;
1076
1077 /* Skip over leading delimiters. */
1078 while (*s && (str_chr(delim, *s) != NULL)) ++s;
1079 start = s;
1080
1081 /* Skip over token characters. */
1082 while (*s && (str_chr(delim, *s) == NULL)) ++s;
1083 end = s;
1084 *next = (*s ? s + 1 : s);
1085
1086 if (start == end) {
1087 return NULL; /* No more tokens. */
1088 }
1089
1090 /* Overwrite delimiter with NULL terminator. */
1091 *end = '\0';
1092 return start;
1093}
1094
1095/** Convert string to uint64_t (internal variant).
1096 *
1097 * @param nptr Pointer to string.
1098 * @param endptr Pointer to the first invalid character is stored here.
1099 * @param base Zero or number between 2 and 36 inclusive.
1100 * @param neg Indication of unary minus is stored here.
1101 * @apram result Result of the conversion.
1102 *
1103 * @return EOK if conversion was successful.
1104 *
1105 */
1106static int str_uint(const char *nptr, char **endptr, unsigned int base,
1107 bool *neg, uint64_t *result)
1108{
1109 assert(endptr != NULL);
1110 assert(neg != NULL);
1111 assert(result != NULL);
1112
1113 *neg = false;
1114 const char *str = nptr;
1115
1116 /* Ignore leading whitespace */
1117 while (isspace(*str))
1118 str++;
1119
1120 if (*str == '-') {
1121 *neg = true;
1122 str++;
1123 } else if (*str == '+')
1124 str++;
1125
1126 if (base == 0) {
1127 /* Decode base if not specified */
1128 base = 10;
1129
1130 if (*str == '0') {
1131 base = 8;
1132 str++;
1133
1134 switch (*str) {
1135 case 'b':
1136 case 'B':
1137 base = 2;
1138 str++;
1139 break;
1140 case 'o':
1141 case 'O':
1142 base = 8;
1143 str++;
1144 break;
1145 case 'd':
1146 case 'D':
1147 case 't':
1148 case 'T':
1149 base = 10;
1150 str++;
1151 break;
1152 case 'x':
1153 case 'X':
1154 base = 16;
1155 str++;
1156 break;
1157 default:
1158 str--;
1159 }
1160 }
1161 } else {
1162 /* Check base range */
1163 if ((base < 2) || (base > 36)) {
1164 *endptr = (char *) str;
1165 return EINVAL;
1166 }
1167 }
1168
1169 *result = 0;
1170 const char *startstr = str;
1171
1172 while (*str != 0) {
1173 unsigned int digit;
1174
1175 if ((*str >= 'a') && (*str <= 'z'))
1176 digit = *str - 'a' + 10;
1177 else if ((*str >= 'A') && (*str <= 'Z'))
1178 digit = *str - 'A' + 10;
1179 else if ((*str >= '0') && (*str <= '9'))
1180 digit = *str - '0';
1181 else
1182 break;
1183
1184 if (digit >= base)
1185 break;
1186
1187 uint64_t prev = *result;
1188 *result = (*result) * base + digit;
1189
1190 if (*result < prev) {
1191 /* Overflow */
1192 *endptr = (char *) str;
1193 return EOVERFLOW;
1194 }
1195
1196 str++;
1197 }
1198
1199 if (str == startstr) {
1200 /*
1201 * No digits were decoded => first invalid character is
1202 * the first character of the string.
1203 */
1204 str = nptr;
1205 }
1206
1207 *endptr = (char *) str;
1208
1209 if (str == nptr)
1210 return EINVAL;
1211
1212 return EOK;
1213}
1214
1215/** Convert string to uint64_t.
1216 *
1217 * @param nptr Pointer to string.
1218 * @param endptr If not NULL, pointer to the first invalid character
1219 * is stored here.
1220 * @param base Zero or number between 2 and 36 inclusive.
1221 * @param strict Do not allow any trailing characters.
1222 * @param result Result of the conversion.
1223 *
1224 * @return EOK if conversion was successful.
1225 *
1226 */
1227int str_uint64(const char *nptr, char **endptr, unsigned int base,
1228 bool strict, uint64_t *result)
1229{
1230 assert(result != NULL);
1231
1232 bool neg;
1233 char *lendptr;
1234 int ret = str_uint(nptr, &lendptr, base, &neg, result);
1235
1236 if (endptr != NULL)
1237 *endptr = (char *) lendptr;
1238
1239 if (ret != EOK)
1240 return ret;
1241
1242 /* Do not allow negative values */
1243 if (neg)
1244 return EINVAL;
1245
1246 /* Check whether we are at the end of
1247 the string in strict mode */
1248 if ((strict) && (*lendptr != 0))
1249 return EINVAL;
1250
1251 return EOK;
1252}
1253
1254/** Convert string to size_t.
1255 *
1256 * @param nptr Pointer to string.
1257 * @param endptr If not NULL, pointer to the first invalid character
1258 * is stored here.
1259 * @param base Zero or number between 2 and 36 inclusive.
1260 * @param strict Do not allow any trailing characters.
1261 * @param result Result of the conversion.
1262 *
1263 * @return EOK if conversion was successful.
1264 *
1265 */
1266int str_size_t(const char *nptr, char **endptr, unsigned int base,
1267 bool strict, size_t *result)
1268{
1269 assert(result != NULL);
1270
1271 bool neg;
1272 char *lendptr;
1273 uint64_t res;
1274 int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1275
1276 if (endptr != NULL)
1277 *endptr = (char *) lendptr;
1278
1279 if (ret != EOK)
1280 return ret;
1281
1282 /* Do not allow negative values */
1283 if (neg)
1284 return EINVAL;
1285
1286 /* Check whether we are at the end of
1287 the string in strict mode */
1288 if ((strict) && (*lendptr != 0))
1289 return EINVAL;
1290
1291 /* Check for overflow */
1292 size_t _res = (size_t) res;
1293 if (_res != res)
1294 return EOVERFLOW;
1295
1296 *result = _res;
1297
1298 return EOK;
1299}
1300
1301void order_suffix(const uint64_t val, uint64_t *rv, char *suffix)
1302{
1303 if (val > UINT64_C(10000000000000000000)) {
1304 *rv = val / UINT64_C(1000000000000000000);
1305 *suffix = 'Z';
1306 } else if (val > UINT64_C(1000000000000000000)) {
1307 *rv = val / UINT64_C(1000000000000000);
1308 *suffix = 'E';
1309 } else if (val > UINT64_C(1000000000000000)) {
1310 *rv = val / UINT64_C(1000000000000);
1311 *suffix = 'T';
1312 } else if (val > UINT64_C(1000000000000)) {
1313 *rv = val / UINT64_C(1000000000);
1314 *suffix = 'G';
1315 } else if (val > UINT64_C(1000000000)) {
1316 *rv = val / UINT64_C(1000000);
1317 *suffix = 'M';
1318 } else if (val > UINT64_C(1000000)) {
1319 *rv = val / UINT64_C(1000);
1320 *suffix = 'k';
1321 } else {
1322 *rv = val;
1323 *suffix = ' ';
1324 }
1325}
1326
1327void bin_order_suffix(const uint64_t val, uint64_t *rv, const char **suffix,
1328 bool fixed)
1329{
1330 if (val > UINT64_C(1152921504606846976)) {
1331 *rv = val / UINT64_C(1125899906842624);
1332 *suffix = "EiB";
1333 } else if (val > UINT64_C(1125899906842624)) {
1334 *rv = val / UINT64_C(1099511627776);
1335 *suffix = "TiB";
1336 } else if (val > UINT64_C(1099511627776)) {
1337 *rv = val / UINT64_C(1073741824);
1338 *suffix = "GiB";
1339 } else if (val > UINT64_C(1073741824)) {
1340 *rv = val / UINT64_C(1048576);
1341 *suffix = "MiB";
1342 } else if (val > UINT64_C(1048576)) {
1343 *rv = val / UINT64_C(1024);
1344 *suffix = "KiB";
1345 } else {
1346 *rv = val;
1347 if (fixed)
1348 *suffix = "B ";
1349 else
1350 *suffix = "B";
1351 }
1352}
1353
1354/** @}
1355 */
Note: See TracBrowser for help on using the repository browser.