source: mainline/uspace/lib/c/generic/str.c@ 972c60ce

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 972c60ce was 1737bfb, checked in by Jakub Jermar <jakub@…>, 14 years ago

cp improvements
(Thanks to Maurizio Lombardi)

  • Support for the -r flag (recursive directory copying) and the -f flag

(force copying even if the destination file already exists) has been
added.

  • Property mode set to 100644
File size: 36.6 KB
Line 
1/*
2 * Copyright (c) 2005 Martin Decky
3 * Copyright (c) 2008 Jiri Svoboda
4 * Copyright (c) 2011 Martin Sucha
5 * Copyright (c) 2011 Oleg Romanenko
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * - Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * - Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * - The name of the author may not be used to endorse or promote products
18 * derived from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/** @addtogroup libc
33 * @{
34 */
35/** @file
36 */
37
38#include <str.h>
39#include <stdlib.h>
40#include <assert.h>
41#include <stdint.h>
42#include <ctype.h>
43#include <malloc.h>
44#include <errno.h>
45#include <align.h>
46#include <mem.h>
47#include <str.h>
48
49/** Byte mask consisting of lowest @n bits (out of 8) */
50#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
51
52/** Byte mask consisting of lowest @n bits (out of 32) */
53#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
54
55/** Byte mask consisting of highest @n bits (out of 8) */
56#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
57
58/** Number of data bits in a UTF-8 continuation byte */
59#define CONT_BITS 6
60
61/** Decode a single character from a string.
62 *
63 * Decode a single character from a string of size @a size. Decoding starts
64 * at @a offset and this offset is moved to the beginning of the next
65 * character. In case of decoding error, offset generally advances at least
66 * by one. However, offset is never moved beyond size.
67 *
68 * @param str String (not necessarily NULL-terminated).
69 * @param offset Byte offset in string where to start decoding.
70 * @param size Size of the string (in bytes).
71 *
72 * @return Value of decoded character, U_SPECIAL on decoding error or
73 * NULL if attempt to decode beyond @a size.
74 *
75 */
76wchar_t str_decode(const char *str, size_t *offset, size_t size)
77{
78 if (*offset + 1 > size)
79 return 0;
80
81 /* First byte read from string */
82 uint8_t b0 = (uint8_t) str[(*offset)++];
83
84 /* Determine code length */
85
86 unsigned int b0_bits; /* Data bits in first byte */
87 unsigned int cbytes; /* Number of continuation bytes */
88
89 if ((b0 & 0x80) == 0) {
90 /* 0xxxxxxx (Plain ASCII) */
91 b0_bits = 7;
92 cbytes = 0;
93 } else if ((b0 & 0xe0) == 0xc0) {
94 /* 110xxxxx 10xxxxxx */
95 b0_bits = 5;
96 cbytes = 1;
97 } else if ((b0 & 0xf0) == 0xe0) {
98 /* 1110xxxx 10xxxxxx 10xxxxxx */
99 b0_bits = 4;
100 cbytes = 2;
101 } else if ((b0 & 0xf8) == 0xf0) {
102 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
103 b0_bits = 3;
104 cbytes = 3;
105 } else {
106 /* 10xxxxxx -- unexpected continuation byte */
107 return U_SPECIAL;
108 }
109
110 if (*offset + cbytes > size)
111 return U_SPECIAL;
112
113 wchar_t ch = b0 & LO_MASK_8(b0_bits);
114
115 /* Decode continuation bytes */
116 while (cbytes > 0) {
117 uint8_t b = (uint8_t) str[(*offset)++];
118
119 /* Must be 10xxxxxx */
120 if ((b & 0xc0) != 0x80)
121 return U_SPECIAL;
122
123 /* Shift data bits to ch */
124 ch = (ch << CONT_BITS) | (wchar_t) (b & LO_MASK_8(CONT_BITS));
125 cbytes--;
126 }
127
128 return ch;
129}
130
131/** Encode a single character to string representation.
132 *
133 * Encode a single character to string representation (i.e. UTF-8) and store
134 * it into a buffer at @a offset. Encoding starts at @a offset and this offset
135 * is moved to the position where the next character can be written to.
136 *
137 * @param ch Input character.
138 * @param str Output buffer.
139 * @param offset Byte offset where to start writing.
140 * @param size Size of the output buffer (in bytes).
141 *
142 * @return EOK if the character was encoded successfully, EOVERFLOW if there
143 * was not enough space in the output buffer or EINVAL if the character
144 * code was invalid.
145 */
146int chr_encode(const wchar_t ch, char *str, size_t *offset, size_t size)
147{
148 if (*offset >= size)
149 return EOVERFLOW;
150
151 if (!chr_check(ch))
152 return EINVAL;
153
154 /* Unsigned version of ch (bit operations should only be done
155 on unsigned types). */
156 uint32_t cc = (uint32_t) ch;
157
158 /* Determine how many continuation bytes are needed */
159
160 unsigned int b0_bits; /* Data bits in first byte */
161 unsigned int cbytes; /* Number of continuation bytes */
162
163 if ((cc & ~LO_MASK_32(7)) == 0) {
164 b0_bits = 7;
165 cbytes = 0;
166 } else if ((cc & ~LO_MASK_32(11)) == 0) {
167 b0_bits = 5;
168 cbytes = 1;
169 } else if ((cc & ~LO_MASK_32(16)) == 0) {
170 b0_bits = 4;
171 cbytes = 2;
172 } else if ((cc & ~LO_MASK_32(21)) == 0) {
173 b0_bits = 3;
174 cbytes = 3;
175 } else {
176 /* Codes longer than 21 bits are not supported */
177 return EINVAL;
178 }
179
180 /* Check for available space in buffer */
181 if (*offset + cbytes >= size)
182 return EOVERFLOW;
183
184 /* Encode continuation bytes */
185 unsigned int i;
186 for (i = cbytes; i > 0; i--) {
187 str[*offset + i] = 0x80 | (cc & LO_MASK_32(CONT_BITS));
188 cc = cc >> CONT_BITS;
189 }
190
191 /* Encode first byte */
192 str[*offset] = (cc & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1);
193
194 /* Advance offset */
195 *offset += cbytes + 1;
196
197 return EOK;
198}
199
200/** Get size of string.
201 *
202 * Get the number of bytes which are used by the string @a str (excluding the
203 * NULL-terminator).
204 *
205 * @param str String to consider.
206 *
207 * @return Number of bytes used by the string
208 *
209 */
210size_t str_size(const char *str)
211{
212 size_t size = 0;
213
214 while (*str++ != 0)
215 size++;
216
217 return size;
218}
219
220/** Get size of wide string.
221 *
222 * Get the number of bytes which are used by the wide string @a str (excluding the
223 * NULL-terminator).
224 *
225 * @param str Wide string to consider.
226 *
227 * @return Number of bytes used by the wide string
228 *
229 */
230size_t wstr_size(const wchar_t *str)
231{
232 return (wstr_length(str) * sizeof(wchar_t));
233}
234
235/** Get size of string with length limit.
236 *
237 * Get the number of bytes which are used by up to @a max_len first
238 * characters in the string @a str. If @a max_len is greater than
239 * the length of @a str, the entire string is measured (excluding the
240 * NULL-terminator).
241 *
242 * @param str String to consider.
243 * @param max_len Maximum number of characters to measure.
244 *
245 * @return Number of bytes used by the characters.
246 *
247 */
248size_t str_lsize(const char *str, size_t max_len)
249{
250 size_t len = 0;
251 size_t offset = 0;
252
253 while (len < max_len) {
254 if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
255 break;
256
257 len++;
258 }
259
260 return offset;
261}
262
263/** Get size of wide string with length limit.
264 *
265 * Get the number of bytes which are used by up to @a max_len first
266 * wide characters in the wide string @a str. If @a max_len is greater than
267 * the length of @a str, the entire wide string is measured (excluding the
268 * NULL-terminator).
269 *
270 * @param str Wide string to consider.
271 * @param max_len Maximum number of wide characters to measure.
272 *
273 * @return Number of bytes used by the wide characters.
274 *
275 */
276size_t wstr_lsize(const wchar_t *str, size_t max_len)
277{
278 return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
279}
280
281/** Get number of characters in a string.
282 *
283 * @param str NULL-terminated string.
284 *
285 * @return Number of characters in string.
286 *
287 */
288size_t str_length(const char *str)
289{
290 size_t len = 0;
291 size_t offset = 0;
292
293 while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
294 len++;
295
296 return len;
297}
298
299/** Get number of characters in a wide string.
300 *
301 * @param str NULL-terminated wide string.
302 *
303 * @return Number of characters in @a str.
304 *
305 */
306size_t wstr_length(const wchar_t *wstr)
307{
308 size_t len = 0;
309
310 while (*wstr++ != 0)
311 len++;
312
313 return len;
314}
315
316/** Get number of characters in a string with size limit.
317 *
318 * @param str NULL-terminated string.
319 * @param size Maximum number of bytes to consider.
320 *
321 * @return Number of characters in string.
322 *
323 */
324size_t str_nlength(const char *str, size_t size)
325{
326 size_t len = 0;
327 size_t offset = 0;
328
329 while (str_decode(str, &offset, size) != 0)
330 len++;
331
332 return len;
333}
334
335/** Get number of characters in a string with size limit.
336 *
337 * @param str NULL-terminated string.
338 * @param size Maximum number of bytes to consider.
339 *
340 * @return Number of characters in string.
341 *
342 */
343size_t wstr_nlength(const wchar_t *str, size_t size)
344{
345 size_t len = 0;
346 size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
347 size_t offset = 0;
348
349 while ((offset < limit) && (*str++ != 0)) {
350 len++;
351 offset += sizeof(wchar_t);
352 }
353
354 return len;
355}
356
357/** Check whether character is plain ASCII.
358 *
359 * @return True if character is plain ASCII.
360 *
361 */
362bool ascii_check(wchar_t ch)
363{
364 if ((ch >= 0) && (ch <= 127))
365 return true;
366
367 return false;
368}
369
370/** Check whether character is valid
371 *
372 * @return True if character is a valid Unicode code point.
373 *
374 */
375bool chr_check(wchar_t ch)
376{
377 if ((ch >= 0) && (ch <= 1114111))
378 return true;
379
380 return false;
381}
382
383/** Compare two NULL terminated strings.
384 *
385 * Do a char-by-char comparison of two NULL-terminated strings.
386 * The strings are considered equal iff they consist of the same
387 * characters on the minimum of their lengths.
388 *
389 * @param s1 First string to compare.
390 * @param s2 Second string to compare.
391 *
392 * @return 0 if the strings are equal, -1 if first is smaller,
393 * 1 if second smaller.
394 *
395 */
396int str_cmp(const char *s1, const char *s2)
397{
398 wchar_t c1 = 0;
399 wchar_t c2 = 0;
400
401 size_t off1 = 0;
402 size_t off2 = 0;
403
404 while (true) {
405 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
406 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
407
408 if (c1 < c2)
409 return -1;
410
411 if (c1 > c2)
412 return 1;
413
414 if (c1 == 0 || c2 == 0)
415 break;
416 }
417
418 return 0;
419}
420
421/** Compare two NULL terminated strings with length limit.
422 *
423 * Do a char-by-char comparison of two NULL-terminated strings.
424 * The strings are considered equal iff they consist of the same
425 * characters on the minimum of their lengths and the length limit.
426 *
427 * @param s1 First string to compare.
428 * @param s2 Second string to compare.
429 * @param max_len Maximum number of characters to consider.
430 *
431 * @return 0 if the strings are equal, -1 if first is smaller,
432 * 1 if second smaller.
433 *
434 */
435int str_lcmp(const char *s1, const char *s2, size_t max_len)
436{
437 wchar_t c1 = 0;
438 wchar_t c2 = 0;
439
440 size_t off1 = 0;
441 size_t off2 = 0;
442
443 size_t len = 0;
444
445 while (true) {
446 if (len >= max_len)
447 break;
448
449 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
450 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
451
452 if (c1 < c2)
453 return -1;
454
455 if (c1 > c2)
456 return 1;
457
458 if (c1 == 0 || c2 == 0)
459 break;
460
461 ++len;
462 }
463
464 return 0;
465
466}
467
468/** Copy string.
469 *
470 * Copy source string @a src to destination buffer @a dest.
471 * No more than @a size bytes are written. If the size of the output buffer
472 * is at least one byte, the output string will always be well-formed, i.e.
473 * null-terminated and containing only complete characters.
474 *
475 * @param dest Destination buffer.
476 * @param count Size of the destination buffer (must be > 0).
477 * @param src Source string.
478 */
479void str_cpy(char *dest, size_t size, const char *src)
480{
481 /* There must be space for a null terminator in the buffer. */
482 assert(size > 0);
483
484 size_t src_off = 0;
485 size_t dest_off = 0;
486
487 wchar_t ch;
488 while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
489 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
490 break;
491 }
492
493 dest[dest_off] = '\0';
494}
495
496/** Copy size-limited substring.
497 *
498 * Copy prefix of string @a src of max. size @a size to destination buffer
499 * @a dest. No more than @a size bytes are written. The output string will
500 * always be well-formed, i.e. null-terminated and containing only complete
501 * characters.
502 *
503 * No more than @a n bytes are read from the input string, so it does not
504 * have to be null-terminated.
505 *
506 * @param dest Destination buffer.
507 * @param count Size of the destination buffer (must be > 0).
508 * @param src Source string.
509 * @param n Maximum number of bytes to read from @a src.
510 */
511void str_ncpy(char *dest, size_t size, const char *src, size_t n)
512{
513 /* There must be space for a null terminator in the buffer. */
514 assert(size > 0);
515
516 size_t src_off = 0;
517 size_t dest_off = 0;
518
519 wchar_t ch;
520 while ((ch = str_decode(src, &src_off, n)) != 0) {
521 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
522 break;
523 }
524
525 dest[dest_off] = '\0';
526}
527
528/** Append one string to another.
529 *
530 * Append source string @a src to string in destination buffer @a dest.
531 * Size of the destination buffer is @a dest. If the size of the output buffer
532 * is at least one byte, the output string will always be well-formed, i.e.
533 * null-terminated and containing only complete characters.
534 *
535 * @param dest Destination buffer.
536 * @param count Size of the destination buffer.
537 * @param src Source string.
538 */
539void str_append(char *dest, size_t size, const char *src)
540{
541 size_t dstr_size;
542
543 dstr_size = str_size(dest);
544 if (dstr_size >= size)
545 return;
546
547 str_cpy(dest + dstr_size, size - dstr_size, src);
548}
549
550/** Convert space-padded ASCII to string.
551 *
552 * Common legacy text encoding in hardware is 7-bit ASCII fitted into
553 * a fixed-width byte buffer (bit 7 always zero), right-padded with spaces
554 * (ASCII 0x20). Convert space-padded ascii to string representation.
555 *
556 * If the text does not fit into the destination buffer, the function converts
557 * as many characters as possible and returns EOVERFLOW.
558 *
559 * If the text contains non-ASCII bytes (with bit 7 set), the whole string is
560 * converted anyway and invalid characters are replaced with question marks
561 * (U_SPECIAL) and the function returns EIO.
562 *
563 * Regardless of return value upon return @a dest will always be well-formed.
564 *
565 * @param dest Destination buffer
566 * @param size Size of destination buffer
567 * @param src Space-padded ASCII.
568 * @param n Size of the source buffer in bytes.
569 *
570 * @return EOK on success, EOVERFLOW if the text does not fit
571 * destination buffer, EIO if the text contains
572 * non-ASCII bytes.
573 */
574int spascii_to_str(char *dest, size_t size, const uint8_t *src, size_t n)
575{
576 size_t sidx;
577 size_t didx;
578 size_t dlast;
579 uint8_t byte;
580 int rc;
581 int result;
582
583 /* There must be space for a null terminator in the buffer. */
584 assert(size > 0);
585 result = EOK;
586
587 didx = 0;
588 dlast = 0;
589 for (sidx = 0; sidx < n; ++sidx) {
590 byte = src[sidx];
591 if (!ascii_check(byte)) {
592 byte = U_SPECIAL;
593 result = EIO;
594 }
595
596 rc = chr_encode(byte, dest, &didx, size - 1);
597 if (rc != EOK) {
598 assert(rc == EOVERFLOW);
599 dest[didx] = '\0';
600 return rc;
601 }
602
603 /* Remember dest index after last non-empty character */
604 if (byte != 0x20)
605 dlast = didx;
606 }
607
608 /* Terminate string after last non-empty character */
609 dest[dlast] = '\0';
610 return result;
611}
612
613/** Convert wide string to string.
614 *
615 * Convert wide string @a src to string. The output is written to the buffer
616 * specified by @a dest and @a size. @a size must be non-zero and the string
617 * written will always be well-formed.
618 *
619 * @param dest Destination buffer.
620 * @param size Size of the destination buffer.
621 * @param src Source wide string.
622 */
623void wstr_to_str(char *dest, size_t size, const wchar_t *src)
624{
625 wchar_t ch;
626 size_t src_idx;
627 size_t dest_off;
628
629 /* There must be space for a null terminator in the buffer. */
630 assert(size > 0);
631
632 src_idx = 0;
633 dest_off = 0;
634
635 while ((ch = src[src_idx++]) != 0) {
636 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
637 break;
638 }
639
640 dest[dest_off] = '\0';
641}
642
643/** Convert UTF16 string to string.
644 *
645 * Convert utf16 string @a src to string. The output is written to the buffer
646 * specified by @a dest and @a size. @a size must be non-zero and the string
647 * written will always be well-formed. Surrogate pairs also supported.
648 *
649 * @param dest Destination buffer.
650 * @param size Size of the destination buffer.
651 * @param src Source utf16 string.
652 *
653 * @return EOK, if success, negative otherwise.
654 */
655int utf16_to_str(char *dest, size_t size, const uint16_t *src)
656{
657 size_t idx = 0, dest_off = 0;
658 wchar_t ch;
659 int rc = EOK;
660
661 /* There must be space for a null terminator in the buffer. */
662 assert(size > 0);
663
664 while (src[idx]) {
665 if ((src[idx] & 0xfc00) == 0xd800) {
666 if (src[idx + 1] && (src[idx + 1] & 0xfc00) == 0xdc00) {
667 ch = 0x10000;
668 ch += (src[idx] & 0x03FF) << 10;
669 ch += (src[idx + 1] & 0x03FF);
670 idx += 2;
671 }
672 else
673 break;
674 } else {
675 ch = src[idx];
676 idx++;
677 }
678 rc = chr_encode(ch, dest, &dest_off, size - 1);
679 if (rc != EOK)
680 break;
681 }
682 dest[dest_off] = '\0';
683 return rc;
684}
685
686int str_to_utf16(uint16_t *dest, size_t size, const char *src)
687{
688 int rc = EOK;
689 size_t offset = 0;
690 size_t idx = 0;
691 wchar_t c;
692
693 assert(size > 0);
694
695 while ((c = str_decode(src, &offset, STR_NO_LIMIT)) != 0) {
696 if (c > 0x10000) {
697 if (idx + 2 >= size - 1) {
698 rc = EOVERFLOW;
699 break;
700 }
701 c = (c - 0x10000);
702 dest[idx] = 0xD800 | (c >> 10);
703 dest[idx + 1] = 0xDC00 | (c & 0x3FF);
704 idx++;
705 } else {
706 dest[idx] = c;
707 }
708
709 idx++;
710 if (idx >= size - 1) {
711 rc = EOVERFLOW;
712 break;
713 }
714 }
715
716 dest[idx] = '\0';
717 return rc;
718}
719
720
721/** Convert wide string to new string.
722 *
723 * Convert wide string @a src to string. Space for the new string is allocated
724 * on the heap.
725 *
726 * @param src Source wide string.
727 * @return New string.
728 */
729char *wstr_to_astr(const wchar_t *src)
730{
731 char dbuf[STR_BOUNDS(1)];
732 char *str;
733 wchar_t ch;
734
735 size_t src_idx;
736 size_t dest_off;
737 size_t dest_size;
738
739 /* Compute size of encoded string. */
740
741 src_idx = 0;
742 dest_size = 0;
743
744 while ((ch = src[src_idx++]) != 0) {
745 dest_off = 0;
746 if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
747 break;
748 dest_size += dest_off;
749 }
750
751 str = malloc(dest_size + 1);
752 if (str == NULL)
753 return NULL;
754
755 /* Encode string. */
756
757 src_idx = 0;
758 dest_off = 0;
759
760 while ((ch = src[src_idx++]) != 0) {
761 if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
762 break;
763 }
764
765 str[dest_size] = '\0';
766 return str;
767}
768
769
770/** Convert string to wide string.
771 *
772 * Convert string @a src to wide string. The output is written to the
773 * buffer specified by @a dest and @a dlen. @a dlen must be non-zero
774 * and the wide string written will always be null-terminated.
775 *
776 * @param dest Destination buffer.
777 * @param dlen Length of destination buffer (number of wchars).
778 * @param src Source string.
779 */
780void str_to_wstr(wchar_t *dest, size_t dlen, const char *src)
781{
782 size_t offset;
783 size_t di;
784 wchar_t c;
785
786 assert(dlen > 0);
787
788 offset = 0;
789 di = 0;
790
791 do {
792 if (di >= dlen - 1)
793 break;
794
795 c = str_decode(src, &offset, STR_NO_LIMIT);
796 dest[di++] = c;
797 } while (c != '\0');
798
799 dest[dlen - 1] = '\0';
800}
801
802/** Convert string to wide string.
803 *
804 * Convert string @a src to wide string. A new wide NULL-terminated
805 * string will be allocated on the heap.
806 *
807 * @param src Source string.
808 */
809wchar_t *str_to_awstr(const char *str)
810{
811 size_t len = str_length(str);
812
813 wchar_t *wstr = calloc(len+1, sizeof(wchar_t));
814 if (wstr == NULL)
815 return NULL;
816
817 str_to_wstr(wstr, len + 1, str);
818 return wstr;
819}
820
821/** Find first occurence of character in string.
822 *
823 * @param str String to search.
824 * @param ch Character to look for.
825 *
826 * @return Pointer to character in @a str or NULL if not found.
827 */
828char *str_chr(const char *str, wchar_t ch)
829{
830 wchar_t acc;
831 size_t off = 0;
832 size_t last = 0;
833
834 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
835 if (acc == ch)
836 return (char *) (str + last);
837 last = off;
838 }
839
840 return NULL;
841}
842
843/** Removes specified trailing characters from a string.
844 *
845 * @param str String to remove from.
846 * @param ch Character to remove.
847 */
848void str_rtrim(char *str, wchar_t ch)
849{
850 size_t off = 0;
851 size_t pos = 0;
852 wchar_t c;
853 bool update_last_chunk = true;
854 char *last_chunk = NULL;
855
856 while ((c = str_decode(str, &off, STR_NO_LIMIT))) {
857 if (c != ch) {
858 update_last_chunk = true;
859 last_chunk = NULL;
860 } else if (update_last_chunk) {
861 update_last_chunk = false;
862 last_chunk = (str + pos);
863 }
864 pos = off;
865 }
866
867 if (last_chunk)
868 *last_chunk = '\0';
869}
870
871/** Removes specified leading characters from a string.
872 *
873 * @param str String to remove from.
874 * @param ch Character to remove.
875 */
876void str_ltrim(char *str, wchar_t ch)
877{
878 wchar_t acc;
879 size_t off = 0;
880 size_t pos = 0;
881 size_t str_sz = str_size(str);
882
883 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
884 if (acc != ch)
885 break;
886 else
887 pos = off;
888 }
889
890 if (pos > 0) {
891 memmove(str, &str[pos], str_sz - pos);
892 pos = str_sz - pos;
893 str[str_sz - pos] = '\0';
894 }
895}
896
897/** Find last occurence of character in string.
898 *
899 * @param str String to search.
900 * @param ch Character to look for.
901 *
902 * @return Pointer to character in @a str or NULL if not found.
903 */
904char *str_rchr(const char *str, wchar_t ch)
905{
906 wchar_t acc;
907 size_t off = 0;
908 size_t last = 0;
909 const char *res = NULL;
910
911 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
912 if (acc == ch)
913 res = (str + last);
914 last = off;
915 }
916
917 return (char *) res;
918}
919
920/** Insert a wide character into a wide string.
921 *
922 * Insert a wide character into a wide string at position
923 * @a pos. The characters after the position are shifted.
924 *
925 * @param str String to insert to.
926 * @param ch Character to insert to.
927 * @param pos Character index where to insert.
928 @ @param max_pos Characters in the buffer.
929 *
930 * @return True if the insertion was sucessful, false if the position
931 * is out of bounds.
932 *
933 */
934bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
935{
936 size_t len = wstr_length(str);
937
938 if ((pos > len) || (pos + 1 > max_pos))
939 return false;
940
941 size_t i;
942 for (i = len; i + 1 > pos; i--)
943 str[i + 1] = str[i];
944
945 str[pos] = ch;
946
947 return true;
948}
949
950/** Remove a wide character from a wide string.
951 *
952 * Remove a wide character from a wide string at position
953 * @a pos. The characters after the position are shifted.
954 *
955 * @param str String to remove from.
956 * @param pos Character index to remove.
957 *
958 * @return True if the removal was sucessful, false if the position
959 * is out of bounds.
960 *
961 */
962bool wstr_remove(wchar_t *str, size_t pos)
963{
964 size_t len = wstr_length(str);
965
966 if (pos >= len)
967 return false;
968
969 size_t i;
970 for (i = pos + 1; i <= len; i++)
971 str[i - 1] = str[i];
972
973 return true;
974}
975
976int stricmp(const char *a, const char *b)
977{
978 int c = 0;
979
980 while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
981 c++;
982
983 return (tolower(a[c]) - tolower(b[c]));
984}
985
986/** Convert string to a number.
987 * Core of strtol and strtoul functions.
988 *
989 * @param nptr Pointer to string.
990 * @param endptr If not NULL, function stores here pointer to the first
991 * invalid character.
992 * @param base Zero or number between 2 and 36 inclusive.
993 * @param sgn It's set to 1 if minus found.
994 * @return Result of conversion.
995 */
996static unsigned long
997_strtoul(const char *nptr, char **endptr, int base, char *sgn)
998{
999 unsigned char c;
1000 unsigned long result = 0;
1001 unsigned long a, b;
1002 const char *str = nptr;
1003 const char *tmpptr;
1004
1005 while (isspace(*str))
1006 str++;
1007
1008 if (*str == '-') {
1009 *sgn = 1;
1010 ++str;
1011 } else if (*str == '+')
1012 ++str;
1013
1014 if (base) {
1015 if ((base == 1) || (base > 36)) {
1016 /* FIXME: set errno to EINVAL */
1017 return 0;
1018 }
1019 if ((base == 16) && (*str == '0') && ((str[1] == 'x') ||
1020 (str[1] == 'X'))) {
1021 str += 2;
1022 }
1023 } else {
1024 base = 10;
1025
1026 if (*str == '0') {
1027 base = 8;
1028 if ((str[1] == 'X') || (str[1] == 'x')) {
1029 base = 16;
1030 str += 2;
1031 }
1032 }
1033 }
1034
1035 tmpptr = str;
1036
1037 while (*str) {
1038 c = *str;
1039 c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
1040 (c <= '9' ? c - '0' : 0xff)));
1041 if (c > base) {
1042 break;
1043 }
1044
1045 a = (result & 0xff) * base + c;
1046 b = (result >> 8) * base + (a >> 8);
1047
1048 if (b > (ULONG_MAX >> 8)) {
1049 /* overflow */
1050 /* FIXME: errno = ERANGE*/
1051 return ULONG_MAX;
1052 }
1053
1054 result = (b << 8) + (a & 0xff);
1055 ++str;
1056 }
1057
1058 if (str == tmpptr) {
1059 /*
1060 * No number was found => first invalid character is the first
1061 * character of the string.
1062 */
1063 /* FIXME: set errno to EINVAL */
1064 str = nptr;
1065 result = 0;
1066 }
1067
1068 if (endptr)
1069 *endptr = (char *) str;
1070
1071 if (nptr == str) {
1072 /*FIXME: errno = EINVAL*/
1073 return 0;
1074 }
1075
1076 return result;
1077}
1078
1079/** Convert initial part of string to long int according to given base.
1080 * The number may begin with an arbitrary number of whitespaces followed by
1081 * optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
1082 * inserted and the number will be taken as hexadecimal one. If the base is 0
1083 * and the number begin with a zero, number will be taken as octal one (as with
1084 * base 8). Otherwise the base 0 is taken as decimal.
1085 *
1086 * @param nptr Pointer to string.
1087 * @param endptr If not NULL, function stores here pointer to the first
1088 * invalid character.
1089 * @param base Zero or number between 2 and 36 inclusive.
1090 * @return Result of conversion.
1091 */
1092long int strtol(const char *nptr, char **endptr, int base)
1093{
1094 char sgn = 0;
1095 unsigned long number = 0;
1096
1097 number = _strtoul(nptr, endptr, base, &sgn);
1098
1099 if (number > LONG_MAX) {
1100 if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
1101 /* FIXME: set 0 to errno */
1102 return number;
1103 }
1104 /* FIXME: set ERANGE to errno */
1105 return (sgn ? LONG_MIN : LONG_MAX);
1106 }
1107
1108 return (sgn ? -number : number);
1109}
1110
1111/** Duplicate string.
1112 *
1113 * Allocate a new string and copy characters from the source
1114 * string into it. The duplicate string is allocated via sleeping
1115 * malloc(), thus this function can sleep in no memory conditions.
1116 *
1117 * The allocation cannot fail and the return value is always
1118 * a valid pointer. The duplicate string is always a well-formed
1119 * null-terminated UTF-8 string, but it can differ from the source
1120 * string on the byte level.
1121 *
1122 * @param src Source string.
1123 *
1124 * @return Duplicate string.
1125 *
1126 */
1127char *str_dup(const char *src)
1128{
1129 size_t size = str_size(src) + 1;
1130 char *dest = (char *) malloc(size);
1131 if (dest == NULL)
1132 return (char *) NULL;
1133
1134 str_cpy(dest, size, src);
1135 return dest;
1136}
1137
1138/** Duplicate string with size limit.
1139 *
1140 * Allocate a new string and copy up to @max_size bytes from the source
1141 * string into it. The duplicate string is allocated via sleeping
1142 * malloc(), thus this function can sleep in no memory conditions.
1143 * No more than @max_size + 1 bytes is allocated, but if the size
1144 * occupied by the source string is smaller than @max_size + 1,
1145 * less is allocated.
1146 *
1147 * The allocation cannot fail and the return value is always
1148 * a valid pointer. The duplicate string is always a well-formed
1149 * null-terminated UTF-8 string, but it can differ from the source
1150 * string on the byte level.
1151 *
1152 * @param src Source string.
1153 * @param n Maximum number of bytes to duplicate.
1154 *
1155 * @return Duplicate string.
1156 *
1157 */
1158char *str_ndup(const char *src, size_t n)
1159{
1160 size_t size = str_size(src);
1161 if (size > n)
1162 size = n;
1163
1164 char *dest = (char *) malloc(size + 1);
1165 if (dest == NULL)
1166 return (char *) NULL;
1167
1168 str_ncpy(dest, size + 1, src, size);
1169 return dest;
1170}
1171
1172/** Convert initial part of string to unsigned long according to given base.
1173 * The number may begin with an arbitrary number of whitespaces followed by
1174 * optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
1175 * inserted and the number will be taken as hexadecimal one. If the base is 0
1176 * and the number begin with a zero, number will be taken as octal one (as with
1177 * base 8). Otherwise the base 0 is taken as decimal.
1178 *
1179 * @param nptr Pointer to string.
1180 * @param endptr If not NULL, function stores here pointer to the first
1181 * invalid character
1182 * @param base Zero or number between 2 and 36 inclusive.
1183 * @return Result of conversion.
1184 */
1185unsigned long strtoul(const char *nptr, char **endptr, int base)
1186{
1187 char sgn = 0;
1188 unsigned long number = 0;
1189
1190 number = _strtoul(nptr, endptr, base, &sgn);
1191
1192 return (sgn ? -number : number);
1193}
1194
1195char *strtok(char *s, const char *delim)
1196{
1197 static char *next;
1198
1199 return strtok_r(s, delim, &next);
1200}
1201
1202char *strtok_r(char *s, const char *delim, char **next)
1203{
1204 char *start, *end;
1205
1206 if (s == NULL)
1207 s = *next;
1208
1209 /* Skip over leading delimiters. */
1210 while (*s && (str_chr(delim, *s) != NULL)) ++s;
1211 start = s;
1212
1213 /* Skip over token characters. */
1214 while (*s && (str_chr(delim, *s) == NULL)) ++s;
1215 end = s;
1216 *next = (*s ? s + 1 : s);
1217
1218 if (start == end) {
1219 return NULL; /* No more tokens. */
1220 }
1221
1222 /* Overwrite delimiter with NULL terminator. */
1223 *end = '\0';
1224 return start;
1225}
1226
1227/** Convert string to uint64_t (internal variant).
1228 *
1229 * @param nptr Pointer to string.
1230 * @param endptr Pointer to the first invalid character is stored here.
1231 * @param base Zero or number between 2 and 36 inclusive.
1232 * @param neg Indication of unary minus is stored here.
1233 * @apram result Result of the conversion.
1234 *
1235 * @return EOK if conversion was successful.
1236 *
1237 */
1238static int str_uint(const char *nptr, char **endptr, unsigned int base,
1239 bool *neg, uint64_t *result)
1240{
1241 assert(endptr != NULL);
1242 assert(neg != NULL);
1243 assert(result != NULL);
1244
1245 *neg = false;
1246 const char *str = nptr;
1247
1248 /* Ignore leading whitespace */
1249 while (isspace(*str))
1250 str++;
1251
1252 if (*str == '-') {
1253 *neg = true;
1254 str++;
1255 } else if (*str == '+')
1256 str++;
1257
1258 if (base == 0) {
1259 /* Decode base if not specified */
1260 base = 10;
1261
1262 if (*str == '0') {
1263 base = 8;
1264 str++;
1265
1266 switch (*str) {
1267 case 'b':
1268 case 'B':
1269 base = 2;
1270 str++;
1271 break;
1272 case 'o':
1273 case 'O':
1274 base = 8;
1275 str++;
1276 break;
1277 case 'd':
1278 case 'D':
1279 case 't':
1280 case 'T':
1281 base = 10;
1282 str++;
1283 break;
1284 case 'x':
1285 case 'X':
1286 base = 16;
1287 str++;
1288 break;
1289 default:
1290 str--;
1291 }
1292 }
1293 } else {
1294 /* Check base range */
1295 if ((base < 2) || (base > 36)) {
1296 *endptr = (char *) str;
1297 return EINVAL;
1298 }
1299 }
1300
1301 *result = 0;
1302 const char *startstr = str;
1303
1304 while (*str != 0) {
1305 unsigned int digit;
1306
1307 if ((*str >= 'a') && (*str <= 'z'))
1308 digit = *str - 'a' + 10;
1309 else if ((*str >= 'A') && (*str <= 'Z'))
1310 digit = *str - 'A' + 10;
1311 else if ((*str >= '0') && (*str <= '9'))
1312 digit = *str - '0';
1313 else
1314 break;
1315
1316 if (digit >= base)
1317 break;
1318
1319 uint64_t prev = *result;
1320 *result = (*result) * base + digit;
1321
1322 if (*result < prev) {
1323 /* Overflow */
1324 *endptr = (char *) str;
1325 return EOVERFLOW;
1326 }
1327
1328 str++;
1329 }
1330
1331 if (str == startstr) {
1332 /*
1333 * No digits were decoded => first invalid character is
1334 * the first character of the string.
1335 */
1336 str = nptr;
1337 }
1338
1339 *endptr = (char *) str;
1340
1341 if (str == nptr)
1342 return EINVAL;
1343
1344 return EOK;
1345}
1346
1347/** Convert string to uint8_t.
1348 *
1349 * @param nptr Pointer to string.
1350 * @param endptr If not NULL, pointer to the first invalid character
1351 * is stored here.
1352 * @param base Zero or number between 2 and 36 inclusive.
1353 * @param strict Do not allow any trailing characters.
1354 * @param result Result of the conversion.
1355 *
1356 * @return EOK if conversion was successful.
1357 *
1358 */
1359int str_uint8_t(const char *nptr, char **endptr, unsigned int base,
1360 bool strict, uint8_t *result)
1361{
1362 assert(result != NULL);
1363
1364 bool neg;
1365 char *lendptr;
1366 uint64_t res;
1367 int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1368
1369 if (endptr != NULL)
1370 *endptr = (char *) lendptr;
1371
1372 if (ret != EOK)
1373 return ret;
1374
1375 /* Do not allow negative values */
1376 if (neg)
1377 return EINVAL;
1378
1379 /* Check whether we are at the end of
1380 the string in strict mode */
1381 if ((strict) && (*lendptr != 0))
1382 return EINVAL;
1383
1384 /* Check for overflow */
1385 uint8_t _res = (uint8_t) res;
1386 if (_res != res)
1387 return EOVERFLOW;
1388
1389 *result = _res;
1390
1391 return EOK;
1392}
1393
1394/** Convert string to uint16_t.
1395 *
1396 * @param nptr Pointer to string.
1397 * @param endptr If not NULL, pointer to the first invalid character
1398 * is stored here.
1399 * @param base Zero or number between 2 and 36 inclusive.
1400 * @param strict Do not allow any trailing characters.
1401 * @param result Result of the conversion.
1402 *
1403 * @return EOK if conversion was successful.
1404 *
1405 */
1406int str_uint16_t(const char *nptr, char **endptr, unsigned int base,
1407 bool strict, uint16_t *result)
1408{
1409 assert(result != NULL);
1410
1411 bool neg;
1412 char *lendptr;
1413 uint64_t res;
1414 int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1415
1416 if (endptr != NULL)
1417 *endptr = (char *) lendptr;
1418
1419 if (ret != EOK)
1420 return ret;
1421
1422 /* Do not allow negative values */
1423 if (neg)
1424 return EINVAL;
1425
1426 /* Check whether we are at the end of
1427 the string in strict mode */
1428 if ((strict) && (*lendptr != 0))
1429 return EINVAL;
1430
1431 /* Check for overflow */
1432 uint16_t _res = (uint16_t) res;
1433 if (_res != res)
1434 return EOVERFLOW;
1435
1436 *result = _res;
1437
1438 return EOK;
1439}
1440
1441/** Convert string to uint32_t.
1442 *
1443 * @param nptr Pointer to string.
1444 * @param endptr If not NULL, pointer to the first invalid character
1445 * is stored here.
1446 * @param base Zero or number between 2 and 36 inclusive.
1447 * @param strict Do not allow any trailing characters.
1448 * @param result Result of the conversion.
1449 *
1450 * @return EOK if conversion was successful.
1451 *
1452 */
1453int str_uint32_t(const char *nptr, char **endptr, unsigned int base,
1454 bool strict, uint32_t *result)
1455{
1456 assert(result != NULL);
1457
1458 bool neg;
1459 char *lendptr;
1460 uint64_t res;
1461 int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1462
1463 if (endptr != NULL)
1464 *endptr = (char *) lendptr;
1465
1466 if (ret != EOK)
1467 return ret;
1468
1469 /* Do not allow negative values */
1470 if (neg)
1471 return EINVAL;
1472
1473 /* Check whether we are at the end of
1474 the string in strict mode */
1475 if ((strict) && (*lendptr != 0))
1476 return EINVAL;
1477
1478 /* Check for overflow */
1479 uint32_t _res = (uint32_t) res;
1480 if (_res != res)
1481 return EOVERFLOW;
1482
1483 *result = _res;
1484
1485 return EOK;
1486}
1487
1488/** Convert string to uint64_t.
1489 *
1490 * @param nptr Pointer to string.
1491 * @param endptr If not NULL, pointer to the first invalid character
1492 * is stored here.
1493 * @param base Zero or number between 2 and 36 inclusive.
1494 * @param strict Do not allow any trailing characters.
1495 * @param result Result of the conversion.
1496 *
1497 * @return EOK if conversion was successful.
1498 *
1499 */
1500int str_uint64(const char *nptr, char **endptr, unsigned int base,
1501 bool strict, uint64_t *result)
1502{
1503 assert(result != NULL);
1504
1505 bool neg;
1506 char *lendptr;
1507 int ret = str_uint(nptr, &lendptr, base, &neg, result);
1508
1509 if (endptr != NULL)
1510 *endptr = (char *) lendptr;
1511
1512 if (ret != EOK)
1513 return ret;
1514
1515 /* Do not allow negative values */
1516 if (neg)
1517 return EINVAL;
1518
1519 /* Check whether we are at the end of
1520 the string in strict mode */
1521 if ((strict) && (*lendptr != 0))
1522 return EINVAL;
1523
1524 return EOK;
1525}
1526
1527/** Convert string to size_t.
1528 *
1529 * @param nptr Pointer to string.
1530 * @param endptr If not NULL, pointer to the first invalid character
1531 * is stored here.
1532 * @param base Zero or number between 2 and 36 inclusive.
1533 * @param strict Do not allow any trailing characters.
1534 * @param result Result of the conversion.
1535 *
1536 * @return EOK if conversion was successful.
1537 *
1538 */
1539int str_size_t(const char *nptr, char **endptr, unsigned int base,
1540 bool strict, size_t *result)
1541{
1542 assert(result != NULL);
1543
1544 bool neg;
1545 char *lendptr;
1546 uint64_t res;
1547 int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1548
1549 if (endptr != NULL)
1550 *endptr = (char *) lendptr;
1551
1552 if (ret != EOK)
1553 return ret;
1554
1555 /* Do not allow negative values */
1556 if (neg)
1557 return EINVAL;
1558
1559 /* Check whether we are at the end of
1560 the string in strict mode */
1561 if ((strict) && (*lendptr != 0))
1562 return EINVAL;
1563
1564 /* Check for overflow */
1565 size_t _res = (size_t) res;
1566 if (_res != res)
1567 return EOVERFLOW;
1568
1569 *result = _res;
1570
1571 return EOK;
1572}
1573
1574void order_suffix(const uint64_t val, uint64_t *rv, char *suffix)
1575{
1576 if (val > UINT64_C(10000000000000000000)) {
1577 *rv = val / UINT64_C(1000000000000000000);
1578 *suffix = 'Z';
1579 } else if (val > UINT64_C(1000000000000000000)) {
1580 *rv = val / UINT64_C(1000000000000000);
1581 *suffix = 'E';
1582 } else if (val > UINT64_C(1000000000000000)) {
1583 *rv = val / UINT64_C(1000000000000);
1584 *suffix = 'T';
1585 } else if (val > UINT64_C(1000000000000)) {
1586 *rv = val / UINT64_C(1000000000);
1587 *suffix = 'G';
1588 } else if (val > UINT64_C(1000000000)) {
1589 *rv = val / UINT64_C(1000000);
1590 *suffix = 'M';
1591 } else if (val > UINT64_C(1000000)) {
1592 *rv = val / UINT64_C(1000);
1593 *suffix = 'k';
1594 } else {
1595 *rv = val;
1596 *suffix = ' ';
1597 }
1598}
1599
1600void bin_order_suffix(const uint64_t val, uint64_t *rv, const char **suffix,
1601 bool fixed)
1602{
1603 if (val > UINT64_C(1152921504606846976)) {
1604 *rv = val / UINT64_C(1125899906842624);
1605 *suffix = "EiB";
1606 } else if (val > UINT64_C(1125899906842624)) {
1607 *rv = val / UINT64_C(1099511627776);
1608 *suffix = "TiB";
1609 } else if (val > UINT64_C(1099511627776)) {
1610 *rv = val / UINT64_C(1073741824);
1611 *suffix = "GiB";
1612 } else if (val > UINT64_C(1073741824)) {
1613 *rv = val / UINT64_C(1048576);
1614 *suffix = "MiB";
1615 } else if (val > UINT64_C(1048576)) {
1616 *rv = val / UINT64_C(1024);
1617 *suffix = "KiB";
1618 } else {
1619 *rv = val;
1620 if (fixed)
1621 *suffix = "B ";
1622 else
1623 *suffix = "B";
1624 }
1625}
1626
1627/** @}
1628 */
Note: See TracBrowser for help on using the repository browser.