source: mainline/uspace/lib/c/generic/str.c@ 560d79f

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 560d79f was 560d79f, checked in by Martin Decky <martin@…>, 13 years ago

implement str_nsize, wstr_nsize

  • Property mode set to 100644
File size: 37.5 KB
Line 
1/*
2 * Copyright (c) 2005 Martin Decky
3 * Copyright (c) 2008 Jiri Svoboda
4 * Copyright (c) 2011 Martin Sucha
5 * Copyright (c) 2011 Oleg Romanenko
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * - Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * - Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * - The name of the author may not be used to endorse or promote products
18 * derived from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/** @addtogroup libc
33 * @{
34 */
35/** @file
36 */
37
38#include <str.h>
39#include <stdlib.h>
40#include <assert.h>
41#include <stdint.h>
42#include <ctype.h>
43#include <malloc.h>
44#include <errno.h>
45#include <align.h>
46#include <mem.h>
47#include <str.h>
48
49/** Byte mask consisting of lowest @n bits (out of 8) */
50#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
51
52/** Byte mask consisting of lowest @n bits (out of 32) */
53#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
54
55/** Byte mask consisting of highest @n bits (out of 8) */
56#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
57
58/** Number of data bits in a UTF-8 continuation byte */
59#define CONT_BITS 6
60
61/** Decode a single character from a string.
62 *
63 * Decode a single character from a string of size @a size. Decoding starts
64 * at @a offset and this offset is moved to the beginning of the next
65 * character. In case of decoding error, offset generally advances at least
66 * by one. However, offset is never moved beyond size.
67 *
68 * @param str String (not necessarily NULL-terminated).
69 * @param offset Byte offset in string where to start decoding.
70 * @param size Size of the string (in bytes).
71 *
72 * @return Value of decoded character, U_SPECIAL on decoding error or
73 * NULL if attempt to decode beyond @a size.
74 *
75 */
76wchar_t str_decode(const char *str, size_t *offset, size_t size)
77{
78 if (*offset + 1 > size)
79 return 0;
80
81 /* First byte read from string */
82 uint8_t b0 = (uint8_t) str[(*offset)++];
83
84 /* Determine code length */
85
86 unsigned int b0_bits; /* Data bits in first byte */
87 unsigned int cbytes; /* Number of continuation bytes */
88
89 if ((b0 & 0x80) == 0) {
90 /* 0xxxxxxx (Plain ASCII) */
91 b0_bits = 7;
92 cbytes = 0;
93 } else if ((b0 & 0xe0) == 0xc0) {
94 /* 110xxxxx 10xxxxxx */
95 b0_bits = 5;
96 cbytes = 1;
97 } else if ((b0 & 0xf0) == 0xe0) {
98 /* 1110xxxx 10xxxxxx 10xxxxxx */
99 b0_bits = 4;
100 cbytes = 2;
101 } else if ((b0 & 0xf8) == 0xf0) {
102 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
103 b0_bits = 3;
104 cbytes = 3;
105 } else {
106 /* 10xxxxxx -- unexpected continuation byte */
107 return U_SPECIAL;
108 }
109
110 if (*offset + cbytes > size)
111 return U_SPECIAL;
112
113 wchar_t ch = b0 & LO_MASK_8(b0_bits);
114
115 /* Decode continuation bytes */
116 while (cbytes > 0) {
117 uint8_t b = (uint8_t) str[(*offset)++];
118
119 /* Must be 10xxxxxx */
120 if ((b & 0xc0) != 0x80)
121 return U_SPECIAL;
122
123 /* Shift data bits to ch */
124 ch = (ch << CONT_BITS) | (wchar_t) (b & LO_MASK_8(CONT_BITS));
125 cbytes--;
126 }
127
128 return ch;
129}
130
131/** Encode a single character to string representation.
132 *
133 * Encode a single character to string representation (i.e. UTF-8) and store
134 * it into a buffer at @a offset. Encoding starts at @a offset and this offset
135 * is moved to the position where the next character can be written to.
136 *
137 * @param ch Input character.
138 * @param str Output buffer.
139 * @param offset Byte offset where to start writing.
140 * @param size Size of the output buffer (in bytes).
141 *
142 * @return EOK if the character was encoded successfully, EOVERFLOW if there
143 * was not enough space in the output buffer or EINVAL if the character
144 * code was invalid.
145 */
146int chr_encode(const wchar_t ch, char *str, size_t *offset, size_t size)
147{
148 if (*offset >= size)
149 return EOVERFLOW;
150
151 if (!chr_check(ch))
152 return EINVAL;
153
154 /* Unsigned version of ch (bit operations should only be done
155 on unsigned types). */
156 uint32_t cc = (uint32_t) ch;
157
158 /* Determine how many continuation bytes are needed */
159
160 unsigned int b0_bits; /* Data bits in first byte */
161 unsigned int cbytes; /* Number of continuation bytes */
162
163 if ((cc & ~LO_MASK_32(7)) == 0) {
164 b0_bits = 7;
165 cbytes = 0;
166 } else if ((cc & ~LO_MASK_32(11)) == 0) {
167 b0_bits = 5;
168 cbytes = 1;
169 } else if ((cc & ~LO_MASK_32(16)) == 0) {
170 b0_bits = 4;
171 cbytes = 2;
172 } else if ((cc & ~LO_MASK_32(21)) == 0) {
173 b0_bits = 3;
174 cbytes = 3;
175 } else {
176 /* Codes longer than 21 bits are not supported */
177 return EINVAL;
178 }
179
180 /* Check for available space in buffer */
181 if (*offset + cbytes >= size)
182 return EOVERFLOW;
183
184 /* Encode continuation bytes */
185 unsigned int i;
186 for (i = cbytes; i > 0; i--) {
187 str[*offset + i] = 0x80 | (cc & LO_MASK_32(CONT_BITS));
188 cc = cc >> CONT_BITS;
189 }
190
191 /* Encode first byte */
192 str[*offset] = (cc & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1);
193
194 /* Advance offset */
195 *offset += cbytes + 1;
196
197 return EOK;
198}
199
200/** Get size of string.
201 *
202 * Get the number of bytes which are used by the string @a str (excluding the
203 * NULL-terminator).
204 *
205 * @param str String to consider.
206 *
207 * @return Number of bytes used by the string
208 *
209 */
210size_t str_size(const char *str)
211{
212 size_t size = 0;
213
214 while (*str++ != 0)
215 size++;
216
217 return size;
218}
219
220/** Get size of wide string.
221 *
222 * Get the number of bytes which are used by the wide string @a str (excluding the
223 * NULL-terminator).
224 *
225 * @param str Wide string to consider.
226 *
227 * @return Number of bytes used by the wide string
228 *
229 */
230size_t wstr_size(const wchar_t *str)
231{
232 return (wstr_length(str) * sizeof(wchar_t));
233}
234
235/** Get size of string with length limit.
236 *
237 * Get the number of bytes which are used by up to @a max_len first
238 * characters in the string @a str. If @a max_len is greater than
239 * the length of @a str, the entire string is measured (excluding the
240 * NULL-terminator).
241 *
242 * @param str String to consider.
243 * @param max_len Maximum number of characters to measure.
244 *
245 * @return Number of bytes used by the characters.
246 *
247 */
248size_t str_lsize(const char *str, size_t max_len)
249{
250 size_t len = 0;
251 size_t offset = 0;
252
253 while (len < max_len) {
254 if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
255 break;
256
257 len++;
258 }
259
260 return offset;
261}
262
263/** Get size of string with size limit.
264 *
265 * Get the number of bytes which are used by the string @a str
266 * (excluding the NULL-terminator), but no more than @max_size bytes.
267 *
268 * @param str String to consider.
269 * @param max_size Maximum number of bytes to measure.
270 *
271 * @return Number of bytes used by the string
272 *
273 */
274size_t str_nsize(const char *str, size_t max_size)
275{
276 size_t size = 0;
277
278 while ((*str++ != 0) && (size < max_size))
279 size++;
280
281 return size;
282}
283
284/** Get size of wide string with size limit.
285 *
286 * Get the number of bytes which are used by the wide string @a str
287 * (excluding the NULL-terminator), but no more than @max_size bytes.
288 *
289 * @param str Wide string to consider.
290 * @param max_size Maximum number of bytes to measure.
291 *
292 * @return Number of bytes used by the wide string
293 *
294 */
295size_t wstr_nsize(const wchar_t *str, size_t max_size)
296{
297 return (wstr_nlength(str, max_size) * sizeof(wchar_t));
298}
299
300/** Get size of wide string with length limit.
301 *
302 * Get the number of bytes which are used by up to @a max_len first
303 * wide characters in the wide string @a str. If @a max_len is greater than
304 * the length of @a str, the entire wide string is measured (excluding the
305 * NULL-terminator).
306 *
307 * @param str Wide string to consider.
308 * @param max_len Maximum number of wide characters to measure.
309 *
310 * @return Number of bytes used by the wide characters.
311 *
312 */
313size_t wstr_lsize(const wchar_t *str, size_t max_len)
314{
315 return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
316}
317
318/** Get number of characters in a string.
319 *
320 * @param str NULL-terminated string.
321 *
322 * @return Number of characters in string.
323 *
324 */
325size_t str_length(const char *str)
326{
327 size_t len = 0;
328 size_t offset = 0;
329
330 while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
331 len++;
332
333 return len;
334}
335
336/** Get number of characters in a wide string.
337 *
338 * @param str NULL-terminated wide string.
339 *
340 * @return Number of characters in @a str.
341 *
342 */
343size_t wstr_length(const wchar_t *wstr)
344{
345 size_t len = 0;
346
347 while (*wstr++ != 0)
348 len++;
349
350 return len;
351}
352
353/** Get number of characters in a string with size limit.
354 *
355 * @param str NULL-terminated string.
356 * @param size Maximum number of bytes to consider.
357 *
358 * @return Number of characters in string.
359 *
360 */
361size_t str_nlength(const char *str, size_t size)
362{
363 size_t len = 0;
364 size_t offset = 0;
365
366 while (str_decode(str, &offset, size) != 0)
367 len++;
368
369 return len;
370}
371
372/** Get number of characters in a string with size limit.
373 *
374 * @param str NULL-terminated string.
375 * @param size Maximum number of bytes to consider.
376 *
377 * @return Number of characters in string.
378 *
379 */
380size_t wstr_nlength(const wchar_t *str, size_t size)
381{
382 size_t len = 0;
383 size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
384 size_t offset = 0;
385
386 while ((offset < limit) && (*str++ != 0)) {
387 len++;
388 offset += sizeof(wchar_t);
389 }
390
391 return len;
392}
393
394/** Check whether character is plain ASCII.
395 *
396 * @return True if character is plain ASCII.
397 *
398 */
399bool ascii_check(wchar_t ch)
400{
401 if ((ch >= 0) && (ch <= 127))
402 return true;
403
404 return false;
405}
406
407/** Check whether character is valid
408 *
409 * @return True if character is a valid Unicode code point.
410 *
411 */
412bool chr_check(wchar_t ch)
413{
414 if ((ch >= 0) && (ch <= 1114111))
415 return true;
416
417 return false;
418}
419
420/** Compare two NULL terminated strings.
421 *
422 * Do a char-by-char comparison of two NULL-terminated strings.
423 * The strings are considered equal iff they consist of the same
424 * characters on the minimum of their lengths.
425 *
426 * @param s1 First string to compare.
427 * @param s2 Second string to compare.
428 *
429 * @return 0 if the strings are equal, -1 if first is smaller,
430 * 1 if second smaller.
431 *
432 */
433int str_cmp(const char *s1, const char *s2)
434{
435 wchar_t c1 = 0;
436 wchar_t c2 = 0;
437
438 size_t off1 = 0;
439 size_t off2 = 0;
440
441 while (true) {
442 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
443 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
444
445 if (c1 < c2)
446 return -1;
447
448 if (c1 > c2)
449 return 1;
450
451 if (c1 == 0 || c2 == 0)
452 break;
453 }
454
455 return 0;
456}
457
458/** Compare two NULL terminated strings with length limit.
459 *
460 * Do a char-by-char comparison of two NULL-terminated strings.
461 * The strings are considered equal iff they consist of the same
462 * characters on the minimum of their lengths and the length limit.
463 *
464 * @param s1 First string to compare.
465 * @param s2 Second string to compare.
466 * @param max_len Maximum number of characters to consider.
467 *
468 * @return 0 if the strings are equal, -1 if first is smaller,
469 * 1 if second smaller.
470 *
471 */
472int str_lcmp(const char *s1, const char *s2, size_t max_len)
473{
474 wchar_t c1 = 0;
475 wchar_t c2 = 0;
476
477 size_t off1 = 0;
478 size_t off2 = 0;
479
480 size_t len = 0;
481
482 while (true) {
483 if (len >= max_len)
484 break;
485
486 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
487 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
488
489 if (c1 < c2)
490 return -1;
491
492 if (c1 > c2)
493 return 1;
494
495 if (c1 == 0 || c2 == 0)
496 break;
497
498 ++len;
499 }
500
501 return 0;
502
503}
504
505/** Copy string.
506 *
507 * Copy source string @a src to destination buffer @a dest.
508 * No more than @a size bytes are written. If the size of the output buffer
509 * is at least one byte, the output string will always be well-formed, i.e.
510 * null-terminated and containing only complete characters.
511 *
512 * @param dest Destination buffer.
513 * @param count Size of the destination buffer (must be > 0).
514 * @param src Source string.
515 */
516void str_cpy(char *dest, size_t size, const char *src)
517{
518 /* There must be space for a null terminator in the buffer. */
519 assert(size > 0);
520
521 size_t src_off = 0;
522 size_t dest_off = 0;
523
524 wchar_t ch;
525 while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
526 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
527 break;
528 }
529
530 dest[dest_off] = '\0';
531}
532
533/** Copy size-limited substring.
534 *
535 * Copy prefix of string @a src of max. size @a size to destination buffer
536 * @a dest. No more than @a size bytes are written. The output string will
537 * always be well-formed, i.e. null-terminated and containing only complete
538 * characters.
539 *
540 * No more than @a n bytes are read from the input string, so it does not
541 * have to be null-terminated.
542 *
543 * @param dest Destination buffer.
544 * @param count Size of the destination buffer (must be > 0).
545 * @param src Source string.
546 * @param n Maximum number of bytes to read from @a src.
547 */
548void str_ncpy(char *dest, size_t size, const char *src, size_t n)
549{
550 /* There must be space for a null terminator in the buffer. */
551 assert(size > 0);
552
553 size_t src_off = 0;
554 size_t dest_off = 0;
555
556 wchar_t ch;
557 while ((ch = str_decode(src, &src_off, n)) != 0) {
558 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
559 break;
560 }
561
562 dest[dest_off] = '\0';
563}
564
565/** Append one string to another.
566 *
567 * Append source string @a src to string in destination buffer @a dest.
568 * Size of the destination buffer is @a dest. If the size of the output buffer
569 * is at least one byte, the output string will always be well-formed, i.e.
570 * null-terminated and containing only complete characters.
571 *
572 * @param dest Destination buffer.
573 * @param count Size of the destination buffer.
574 * @param src Source string.
575 */
576void str_append(char *dest, size_t size, const char *src)
577{
578 size_t dstr_size;
579
580 dstr_size = str_size(dest);
581 if (dstr_size >= size)
582 return;
583
584 str_cpy(dest + dstr_size, size - dstr_size, src);
585}
586
587/** Convert space-padded ASCII to string.
588 *
589 * Common legacy text encoding in hardware is 7-bit ASCII fitted into
590 * a fixed-width byte buffer (bit 7 always zero), right-padded with spaces
591 * (ASCII 0x20). Convert space-padded ascii to string representation.
592 *
593 * If the text does not fit into the destination buffer, the function converts
594 * as many characters as possible and returns EOVERFLOW.
595 *
596 * If the text contains non-ASCII bytes (with bit 7 set), the whole string is
597 * converted anyway and invalid characters are replaced with question marks
598 * (U_SPECIAL) and the function returns EIO.
599 *
600 * Regardless of return value upon return @a dest will always be well-formed.
601 *
602 * @param dest Destination buffer
603 * @param size Size of destination buffer
604 * @param src Space-padded ASCII.
605 * @param n Size of the source buffer in bytes.
606 *
607 * @return EOK on success, EOVERFLOW if the text does not fit
608 * destination buffer, EIO if the text contains
609 * non-ASCII bytes.
610 */
611int spascii_to_str(char *dest, size_t size, const uint8_t *src, size_t n)
612{
613 size_t sidx;
614 size_t didx;
615 size_t dlast;
616 uint8_t byte;
617 int rc;
618 int result;
619
620 /* There must be space for a null terminator in the buffer. */
621 assert(size > 0);
622 result = EOK;
623
624 didx = 0;
625 dlast = 0;
626 for (sidx = 0; sidx < n; ++sidx) {
627 byte = src[sidx];
628 if (!ascii_check(byte)) {
629 byte = U_SPECIAL;
630 result = EIO;
631 }
632
633 rc = chr_encode(byte, dest, &didx, size - 1);
634 if (rc != EOK) {
635 assert(rc == EOVERFLOW);
636 dest[didx] = '\0';
637 return rc;
638 }
639
640 /* Remember dest index after last non-empty character */
641 if (byte != 0x20)
642 dlast = didx;
643 }
644
645 /* Terminate string after last non-empty character */
646 dest[dlast] = '\0';
647 return result;
648}
649
650/** Convert wide string to string.
651 *
652 * Convert wide string @a src to string. The output is written to the buffer
653 * specified by @a dest and @a size. @a size must be non-zero and the string
654 * written will always be well-formed.
655 *
656 * @param dest Destination buffer.
657 * @param size Size of the destination buffer.
658 * @param src Source wide string.
659 */
660void wstr_to_str(char *dest, size_t size, const wchar_t *src)
661{
662 wchar_t ch;
663 size_t src_idx;
664 size_t dest_off;
665
666 /* There must be space for a null terminator in the buffer. */
667 assert(size > 0);
668
669 src_idx = 0;
670 dest_off = 0;
671
672 while ((ch = src[src_idx++]) != 0) {
673 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
674 break;
675 }
676
677 dest[dest_off] = '\0';
678}
679
680/** Convert UTF16 string to string.
681 *
682 * Convert utf16 string @a src to string. The output is written to the buffer
683 * specified by @a dest and @a size. @a size must be non-zero and the string
684 * written will always be well-formed. Surrogate pairs also supported.
685 *
686 * @param dest Destination buffer.
687 * @param size Size of the destination buffer.
688 * @param src Source utf16 string.
689 *
690 * @return EOK, if success, negative otherwise.
691 */
692int utf16_to_str(char *dest, size_t size, const uint16_t *src)
693{
694 size_t idx = 0, dest_off = 0;
695 wchar_t ch;
696 int rc = EOK;
697
698 /* There must be space for a null terminator in the buffer. */
699 assert(size > 0);
700
701 while (src[idx]) {
702 if ((src[idx] & 0xfc00) == 0xd800) {
703 if (src[idx + 1] && (src[idx + 1] & 0xfc00) == 0xdc00) {
704 ch = 0x10000;
705 ch += (src[idx] & 0x03FF) << 10;
706 ch += (src[idx + 1] & 0x03FF);
707 idx += 2;
708 }
709 else
710 break;
711 } else {
712 ch = src[idx];
713 idx++;
714 }
715 rc = chr_encode(ch, dest, &dest_off, size - 1);
716 if (rc != EOK)
717 break;
718 }
719 dest[dest_off] = '\0';
720 return rc;
721}
722
723int str_to_utf16(uint16_t *dest, size_t size, const char *src)
724{
725 int rc = EOK;
726 size_t offset = 0;
727 size_t idx = 0;
728 wchar_t c;
729
730 assert(size > 0);
731
732 while ((c = str_decode(src, &offset, STR_NO_LIMIT)) != 0) {
733 if (c > 0x10000) {
734 if (idx + 2 >= size - 1) {
735 rc = EOVERFLOW;
736 break;
737 }
738 c = (c - 0x10000);
739 dest[idx] = 0xD800 | (c >> 10);
740 dest[idx + 1] = 0xDC00 | (c & 0x3FF);
741 idx++;
742 } else {
743 dest[idx] = c;
744 }
745
746 idx++;
747 if (idx >= size - 1) {
748 rc = EOVERFLOW;
749 break;
750 }
751 }
752
753 dest[idx] = '\0';
754 return rc;
755}
756
757
758/** Convert wide string to new string.
759 *
760 * Convert wide string @a src to string. Space for the new string is allocated
761 * on the heap.
762 *
763 * @param src Source wide string.
764 * @return New string.
765 */
766char *wstr_to_astr(const wchar_t *src)
767{
768 char dbuf[STR_BOUNDS(1)];
769 char *str;
770 wchar_t ch;
771
772 size_t src_idx;
773 size_t dest_off;
774 size_t dest_size;
775
776 /* Compute size of encoded string. */
777
778 src_idx = 0;
779 dest_size = 0;
780
781 while ((ch = src[src_idx++]) != 0) {
782 dest_off = 0;
783 if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
784 break;
785 dest_size += dest_off;
786 }
787
788 str = malloc(dest_size + 1);
789 if (str == NULL)
790 return NULL;
791
792 /* Encode string. */
793
794 src_idx = 0;
795 dest_off = 0;
796
797 while ((ch = src[src_idx++]) != 0) {
798 if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
799 break;
800 }
801
802 str[dest_size] = '\0';
803 return str;
804}
805
806
807/** Convert string to wide string.
808 *
809 * Convert string @a src to wide string. The output is written to the
810 * buffer specified by @a dest and @a dlen. @a dlen must be non-zero
811 * and the wide string written will always be null-terminated.
812 *
813 * @param dest Destination buffer.
814 * @param dlen Length of destination buffer (number of wchars).
815 * @param src Source string.
816 */
817void str_to_wstr(wchar_t *dest, size_t dlen, const char *src)
818{
819 size_t offset;
820 size_t di;
821 wchar_t c;
822
823 assert(dlen > 0);
824
825 offset = 0;
826 di = 0;
827
828 do {
829 if (di >= dlen - 1)
830 break;
831
832 c = str_decode(src, &offset, STR_NO_LIMIT);
833 dest[di++] = c;
834 } while (c != '\0');
835
836 dest[dlen - 1] = '\0';
837}
838
839/** Convert string to wide string.
840 *
841 * Convert string @a src to wide string. A new wide NULL-terminated
842 * string will be allocated on the heap.
843 *
844 * @param src Source string.
845 */
846wchar_t *str_to_awstr(const char *str)
847{
848 size_t len = str_length(str);
849
850 wchar_t *wstr = calloc(len+1, sizeof(wchar_t));
851 if (wstr == NULL)
852 return NULL;
853
854 str_to_wstr(wstr, len + 1, str);
855 return wstr;
856}
857
858/** Find first occurence of character in string.
859 *
860 * @param str String to search.
861 * @param ch Character to look for.
862 *
863 * @return Pointer to character in @a str or NULL if not found.
864 */
865char *str_chr(const char *str, wchar_t ch)
866{
867 wchar_t acc;
868 size_t off = 0;
869 size_t last = 0;
870
871 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
872 if (acc == ch)
873 return (char *) (str + last);
874 last = off;
875 }
876
877 return NULL;
878}
879
880/** Removes specified trailing characters from a string.
881 *
882 * @param str String to remove from.
883 * @param ch Character to remove.
884 */
885void str_rtrim(char *str, wchar_t ch)
886{
887 size_t off = 0;
888 size_t pos = 0;
889 wchar_t c;
890 bool update_last_chunk = true;
891 char *last_chunk = NULL;
892
893 while ((c = str_decode(str, &off, STR_NO_LIMIT))) {
894 if (c != ch) {
895 update_last_chunk = true;
896 last_chunk = NULL;
897 } else if (update_last_chunk) {
898 update_last_chunk = false;
899 last_chunk = (str + pos);
900 }
901 pos = off;
902 }
903
904 if (last_chunk)
905 *last_chunk = '\0';
906}
907
908/** Removes specified leading characters from a string.
909 *
910 * @param str String to remove from.
911 * @param ch Character to remove.
912 */
913void str_ltrim(char *str, wchar_t ch)
914{
915 wchar_t acc;
916 size_t off = 0;
917 size_t pos = 0;
918 size_t str_sz = str_size(str);
919
920 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
921 if (acc != ch)
922 break;
923 else
924 pos = off;
925 }
926
927 if (pos > 0) {
928 memmove(str, &str[pos], str_sz - pos);
929 pos = str_sz - pos;
930 str[str_sz - pos] = '\0';
931 }
932}
933
934/** Find last occurence of character in string.
935 *
936 * @param str String to search.
937 * @param ch Character to look for.
938 *
939 * @return Pointer to character in @a str or NULL if not found.
940 */
941char *str_rchr(const char *str, wchar_t ch)
942{
943 wchar_t acc;
944 size_t off = 0;
945 size_t last = 0;
946 const char *res = NULL;
947
948 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
949 if (acc == ch)
950 res = (str + last);
951 last = off;
952 }
953
954 return (char *) res;
955}
956
957/** Insert a wide character into a wide string.
958 *
959 * Insert a wide character into a wide string at position
960 * @a pos. The characters after the position are shifted.
961 *
962 * @param str String to insert to.
963 * @param ch Character to insert to.
964 * @param pos Character index where to insert.
965 @ @param max_pos Characters in the buffer.
966 *
967 * @return True if the insertion was sucessful, false if the position
968 * is out of bounds.
969 *
970 */
971bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
972{
973 size_t len = wstr_length(str);
974
975 if ((pos > len) || (pos + 1 > max_pos))
976 return false;
977
978 size_t i;
979 for (i = len; i + 1 > pos; i--)
980 str[i + 1] = str[i];
981
982 str[pos] = ch;
983
984 return true;
985}
986
987/** Remove a wide character from a wide string.
988 *
989 * Remove a wide character from a wide string at position
990 * @a pos. The characters after the position are shifted.
991 *
992 * @param str String to remove from.
993 * @param pos Character index to remove.
994 *
995 * @return True if the removal was sucessful, false if the position
996 * is out of bounds.
997 *
998 */
999bool wstr_remove(wchar_t *str, size_t pos)
1000{
1001 size_t len = wstr_length(str);
1002
1003 if (pos >= len)
1004 return false;
1005
1006 size_t i;
1007 for (i = pos + 1; i <= len; i++)
1008 str[i - 1] = str[i];
1009
1010 return true;
1011}
1012
1013int stricmp(const char *a, const char *b)
1014{
1015 int c = 0;
1016
1017 while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
1018 c++;
1019
1020 return (tolower(a[c]) - tolower(b[c]));
1021}
1022
1023/** Convert string to a number.
1024 * Core of strtol and strtoul functions.
1025 *
1026 * @param nptr Pointer to string.
1027 * @param endptr If not NULL, function stores here pointer to the first
1028 * invalid character.
1029 * @param base Zero or number between 2 and 36 inclusive.
1030 * @param sgn It's set to 1 if minus found.
1031 * @return Result of conversion.
1032 */
1033static unsigned long
1034_strtoul(const char *nptr, char **endptr, int base, char *sgn)
1035{
1036 unsigned char c;
1037 unsigned long result = 0;
1038 unsigned long a, b;
1039 const char *str = nptr;
1040 const char *tmpptr;
1041
1042 while (isspace(*str))
1043 str++;
1044
1045 if (*str == '-') {
1046 *sgn = 1;
1047 ++str;
1048 } else if (*str == '+')
1049 ++str;
1050
1051 if (base) {
1052 if ((base == 1) || (base > 36)) {
1053 /* FIXME: set errno to EINVAL */
1054 return 0;
1055 }
1056 if ((base == 16) && (*str == '0') && ((str[1] == 'x') ||
1057 (str[1] == 'X'))) {
1058 str += 2;
1059 }
1060 } else {
1061 base = 10;
1062
1063 if (*str == '0') {
1064 base = 8;
1065 if ((str[1] == 'X') || (str[1] == 'x')) {
1066 base = 16;
1067 str += 2;
1068 }
1069 }
1070 }
1071
1072 tmpptr = str;
1073
1074 while (*str) {
1075 c = *str;
1076 c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
1077 (c <= '9' ? c - '0' : 0xff)));
1078 if (c > base) {
1079 break;
1080 }
1081
1082 a = (result & 0xff) * base + c;
1083 b = (result >> 8) * base + (a >> 8);
1084
1085 if (b > (ULONG_MAX >> 8)) {
1086 /* overflow */
1087 /* FIXME: errno = ERANGE*/
1088 return ULONG_MAX;
1089 }
1090
1091 result = (b << 8) + (a & 0xff);
1092 ++str;
1093 }
1094
1095 if (str == tmpptr) {
1096 /*
1097 * No number was found => first invalid character is the first
1098 * character of the string.
1099 */
1100 /* FIXME: set errno to EINVAL */
1101 str = nptr;
1102 result = 0;
1103 }
1104
1105 if (endptr)
1106 *endptr = (char *) str;
1107
1108 if (nptr == str) {
1109 /*FIXME: errno = EINVAL*/
1110 return 0;
1111 }
1112
1113 return result;
1114}
1115
1116/** Convert initial part of string to long int according to given base.
1117 * The number may begin with an arbitrary number of whitespaces followed by
1118 * optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
1119 * inserted and the number will be taken as hexadecimal one. If the base is 0
1120 * and the number begin with a zero, number will be taken as octal one (as with
1121 * base 8). Otherwise the base 0 is taken as decimal.
1122 *
1123 * @param nptr Pointer to string.
1124 * @param endptr If not NULL, function stores here pointer to the first
1125 * invalid character.
1126 * @param base Zero or number between 2 and 36 inclusive.
1127 * @return Result of conversion.
1128 */
1129long int strtol(const char *nptr, char **endptr, int base)
1130{
1131 char sgn = 0;
1132 unsigned long number = 0;
1133
1134 number = _strtoul(nptr, endptr, base, &sgn);
1135
1136 if (number > LONG_MAX) {
1137 if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
1138 /* FIXME: set 0 to errno */
1139 return number;
1140 }
1141 /* FIXME: set ERANGE to errno */
1142 return (sgn ? LONG_MIN : LONG_MAX);
1143 }
1144
1145 return (sgn ? -number : number);
1146}
1147
1148/** Duplicate string.
1149 *
1150 * Allocate a new string and copy characters from the source
1151 * string into it. The duplicate string is allocated via sleeping
1152 * malloc(), thus this function can sleep in no memory conditions.
1153 *
1154 * The allocation cannot fail and the return value is always
1155 * a valid pointer. The duplicate string is always a well-formed
1156 * null-terminated UTF-8 string, but it can differ from the source
1157 * string on the byte level.
1158 *
1159 * @param src Source string.
1160 *
1161 * @return Duplicate string.
1162 *
1163 */
1164char *str_dup(const char *src)
1165{
1166 size_t size = str_size(src) + 1;
1167 char *dest = (char *) malloc(size);
1168 if (dest == NULL)
1169 return (char *) NULL;
1170
1171 str_cpy(dest, size, src);
1172 return dest;
1173}
1174
1175/** Duplicate string with size limit.
1176 *
1177 * Allocate a new string and copy up to @max_size bytes from the source
1178 * string into it. The duplicate string is allocated via sleeping
1179 * malloc(), thus this function can sleep in no memory conditions.
1180 * No more than @max_size + 1 bytes is allocated, but if the size
1181 * occupied by the source string is smaller than @max_size + 1,
1182 * less is allocated.
1183 *
1184 * The allocation cannot fail and the return value is always
1185 * a valid pointer. The duplicate string is always a well-formed
1186 * null-terminated UTF-8 string, but it can differ from the source
1187 * string on the byte level.
1188 *
1189 * @param src Source string.
1190 * @param n Maximum number of bytes to duplicate.
1191 *
1192 * @return Duplicate string.
1193 *
1194 */
1195char *str_ndup(const char *src, size_t n)
1196{
1197 size_t size = str_size(src);
1198 if (size > n)
1199 size = n;
1200
1201 char *dest = (char *) malloc(size + 1);
1202 if (dest == NULL)
1203 return (char *) NULL;
1204
1205 str_ncpy(dest, size + 1, src, size);
1206 return dest;
1207}
1208
1209/** Convert initial part of string to unsigned long according to given base.
1210 * The number may begin with an arbitrary number of whitespaces followed by
1211 * optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
1212 * inserted and the number will be taken as hexadecimal one. If the base is 0
1213 * and the number begin with a zero, number will be taken as octal one (as with
1214 * base 8). Otherwise the base 0 is taken as decimal.
1215 *
1216 * @param nptr Pointer to string.
1217 * @param endptr If not NULL, function stores here pointer to the first
1218 * invalid character
1219 * @param base Zero or number between 2 and 36 inclusive.
1220 * @return Result of conversion.
1221 */
1222unsigned long strtoul(const char *nptr, char **endptr, int base)
1223{
1224 char sgn = 0;
1225 unsigned long number = 0;
1226
1227 number = _strtoul(nptr, endptr, base, &sgn);
1228
1229 return (sgn ? -number : number);
1230}
1231
1232char *strtok(char *s, const char *delim)
1233{
1234 static char *next;
1235
1236 return strtok_r(s, delim, &next);
1237}
1238
1239char *strtok_r(char *s, const char *delim, char **next)
1240{
1241 char *start, *end;
1242
1243 if (s == NULL)
1244 s = *next;
1245
1246 /* Skip over leading delimiters. */
1247 while (*s && (str_chr(delim, *s) != NULL)) ++s;
1248 start = s;
1249
1250 /* Skip over token characters. */
1251 while (*s && (str_chr(delim, *s) == NULL)) ++s;
1252 end = s;
1253 *next = (*s ? s + 1 : s);
1254
1255 if (start == end) {
1256 return NULL; /* No more tokens. */
1257 }
1258
1259 /* Overwrite delimiter with NULL terminator. */
1260 *end = '\0';
1261 return start;
1262}
1263
1264/** Convert string to uint64_t (internal variant).
1265 *
1266 * @param nptr Pointer to string.
1267 * @param endptr Pointer to the first invalid character is stored here.
1268 * @param base Zero or number between 2 and 36 inclusive.
1269 * @param neg Indication of unary minus is stored here.
1270 * @apram result Result of the conversion.
1271 *
1272 * @return EOK if conversion was successful.
1273 *
1274 */
1275static int str_uint(const char *nptr, char **endptr, unsigned int base,
1276 bool *neg, uint64_t *result)
1277{
1278 assert(endptr != NULL);
1279 assert(neg != NULL);
1280 assert(result != NULL);
1281
1282 *neg = false;
1283 const char *str = nptr;
1284
1285 /* Ignore leading whitespace */
1286 while (isspace(*str))
1287 str++;
1288
1289 if (*str == '-') {
1290 *neg = true;
1291 str++;
1292 } else if (*str == '+')
1293 str++;
1294
1295 if (base == 0) {
1296 /* Decode base if not specified */
1297 base = 10;
1298
1299 if (*str == '0') {
1300 base = 8;
1301 str++;
1302
1303 switch (*str) {
1304 case 'b':
1305 case 'B':
1306 base = 2;
1307 str++;
1308 break;
1309 case 'o':
1310 case 'O':
1311 base = 8;
1312 str++;
1313 break;
1314 case 'd':
1315 case 'D':
1316 case 't':
1317 case 'T':
1318 base = 10;
1319 str++;
1320 break;
1321 case 'x':
1322 case 'X':
1323 base = 16;
1324 str++;
1325 break;
1326 default:
1327 str--;
1328 }
1329 }
1330 } else {
1331 /* Check base range */
1332 if ((base < 2) || (base > 36)) {
1333 *endptr = (char *) str;
1334 return EINVAL;
1335 }
1336 }
1337
1338 *result = 0;
1339 const char *startstr = str;
1340
1341 while (*str != 0) {
1342 unsigned int digit;
1343
1344 if ((*str >= 'a') && (*str <= 'z'))
1345 digit = *str - 'a' + 10;
1346 else if ((*str >= 'A') && (*str <= 'Z'))
1347 digit = *str - 'A' + 10;
1348 else if ((*str >= '0') && (*str <= '9'))
1349 digit = *str - '0';
1350 else
1351 break;
1352
1353 if (digit >= base)
1354 break;
1355
1356 uint64_t prev = *result;
1357 *result = (*result) * base + digit;
1358
1359 if (*result < prev) {
1360 /* Overflow */
1361 *endptr = (char *) str;
1362 return EOVERFLOW;
1363 }
1364
1365 str++;
1366 }
1367
1368 if (str == startstr) {
1369 /*
1370 * No digits were decoded => first invalid character is
1371 * the first character of the string.
1372 */
1373 str = nptr;
1374 }
1375
1376 *endptr = (char *) str;
1377
1378 if (str == nptr)
1379 return EINVAL;
1380
1381 return EOK;
1382}
1383
1384/** Convert string to uint8_t.
1385 *
1386 * @param nptr Pointer to string.
1387 * @param endptr If not NULL, pointer to the first invalid character
1388 * is stored here.
1389 * @param base Zero or number between 2 and 36 inclusive.
1390 * @param strict Do not allow any trailing characters.
1391 * @param result Result of the conversion.
1392 *
1393 * @return EOK if conversion was successful.
1394 *
1395 */
1396int str_uint8_t(const char *nptr, char **endptr, unsigned int base,
1397 bool strict, uint8_t *result)
1398{
1399 assert(result != NULL);
1400
1401 bool neg;
1402 char *lendptr;
1403 uint64_t res;
1404 int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1405
1406 if (endptr != NULL)
1407 *endptr = (char *) lendptr;
1408
1409 if (ret != EOK)
1410 return ret;
1411
1412 /* Do not allow negative values */
1413 if (neg)
1414 return EINVAL;
1415
1416 /* Check whether we are at the end of
1417 the string in strict mode */
1418 if ((strict) && (*lendptr != 0))
1419 return EINVAL;
1420
1421 /* Check for overflow */
1422 uint8_t _res = (uint8_t) res;
1423 if (_res != res)
1424 return EOVERFLOW;
1425
1426 *result = _res;
1427
1428 return EOK;
1429}
1430
1431/** Convert string to uint16_t.
1432 *
1433 * @param nptr Pointer to string.
1434 * @param endptr If not NULL, pointer to the first invalid character
1435 * is stored here.
1436 * @param base Zero or number between 2 and 36 inclusive.
1437 * @param strict Do not allow any trailing characters.
1438 * @param result Result of the conversion.
1439 *
1440 * @return EOK if conversion was successful.
1441 *
1442 */
1443int str_uint16_t(const char *nptr, char **endptr, unsigned int base,
1444 bool strict, uint16_t *result)
1445{
1446 assert(result != NULL);
1447
1448 bool neg;
1449 char *lendptr;
1450 uint64_t res;
1451 int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1452
1453 if (endptr != NULL)
1454 *endptr = (char *) lendptr;
1455
1456 if (ret != EOK)
1457 return ret;
1458
1459 /* Do not allow negative values */
1460 if (neg)
1461 return EINVAL;
1462
1463 /* Check whether we are at the end of
1464 the string in strict mode */
1465 if ((strict) && (*lendptr != 0))
1466 return EINVAL;
1467
1468 /* Check for overflow */
1469 uint16_t _res = (uint16_t) res;
1470 if (_res != res)
1471 return EOVERFLOW;
1472
1473 *result = _res;
1474
1475 return EOK;
1476}
1477
1478/** Convert string to uint32_t.
1479 *
1480 * @param nptr Pointer to string.
1481 * @param endptr If not NULL, pointer to the first invalid character
1482 * is stored here.
1483 * @param base Zero or number between 2 and 36 inclusive.
1484 * @param strict Do not allow any trailing characters.
1485 * @param result Result of the conversion.
1486 *
1487 * @return EOK if conversion was successful.
1488 *
1489 */
1490int str_uint32_t(const char *nptr, char **endptr, unsigned int base,
1491 bool strict, uint32_t *result)
1492{
1493 assert(result != NULL);
1494
1495 bool neg;
1496 char *lendptr;
1497 uint64_t res;
1498 int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1499
1500 if (endptr != NULL)
1501 *endptr = (char *) lendptr;
1502
1503 if (ret != EOK)
1504 return ret;
1505
1506 /* Do not allow negative values */
1507 if (neg)
1508 return EINVAL;
1509
1510 /* Check whether we are at the end of
1511 the string in strict mode */
1512 if ((strict) && (*lendptr != 0))
1513 return EINVAL;
1514
1515 /* Check for overflow */
1516 uint32_t _res = (uint32_t) res;
1517 if (_res != res)
1518 return EOVERFLOW;
1519
1520 *result = _res;
1521
1522 return EOK;
1523}
1524
1525/** Convert string to uint64_t.
1526 *
1527 * @param nptr Pointer to string.
1528 * @param endptr If not NULL, pointer to the first invalid character
1529 * is stored here.
1530 * @param base Zero or number between 2 and 36 inclusive.
1531 * @param strict Do not allow any trailing characters.
1532 * @param result Result of the conversion.
1533 *
1534 * @return EOK if conversion was successful.
1535 *
1536 */
1537int str_uint64(const char *nptr, char **endptr, unsigned int base,
1538 bool strict, uint64_t *result)
1539{
1540 assert(result != NULL);
1541
1542 bool neg;
1543 char *lendptr;
1544 int ret = str_uint(nptr, &lendptr, base, &neg, result);
1545
1546 if (endptr != NULL)
1547 *endptr = (char *) lendptr;
1548
1549 if (ret != EOK)
1550 return ret;
1551
1552 /* Do not allow negative values */
1553 if (neg)
1554 return EINVAL;
1555
1556 /* Check whether we are at the end of
1557 the string in strict mode */
1558 if ((strict) && (*lendptr != 0))
1559 return EINVAL;
1560
1561 return EOK;
1562}
1563
1564/** Convert string to size_t.
1565 *
1566 * @param nptr Pointer to string.
1567 * @param endptr If not NULL, pointer to the first invalid character
1568 * is stored here.
1569 * @param base Zero or number between 2 and 36 inclusive.
1570 * @param strict Do not allow any trailing characters.
1571 * @param result Result of the conversion.
1572 *
1573 * @return EOK if conversion was successful.
1574 *
1575 */
1576int str_size_t(const char *nptr, char **endptr, unsigned int base,
1577 bool strict, size_t *result)
1578{
1579 assert(result != NULL);
1580
1581 bool neg;
1582 char *lendptr;
1583 uint64_t res;
1584 int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1585
1586 if (endptr != NULL)
1587 *endptr = (char *) lendptr;
1588
1589 if (ret != EOK)
1590 return ret;
1591
1592 /* Do not allow negative values */
1593 if (neg)
1594 return EINVAL;
1595
1596 /* Check whether we are at the end of
1597 the string in strict mode */
1598 if ((strict) && (*lendptr != 0))
1599 return EINVAL;
1600
1601 /* Check for overflow */
1602 size_t _res = (size_t) res;
1603 if (_res != res)
1604 return EOVERFLOW;
1605
1606 *result = _res;
1607
1608 return EOK;
1609}
1610
1611void order_suffix(const uint64_t val, uint64_t *rv, char *suffix)
1612{
1613 if (val > UINT64_C(10000000000000000000)) {
1614 *rv = val / UINT64_C(1000000000000000000);
1615 *suffix = 'Z';
1616 } else if (val > UINT64_C(1000000000000000000)) {
1617 *rv = val / UINT64_C(1000000000000000);
1618 *suffix = 'E';
1619 } else if (val > UINT64_C(1000000000000000)) {
1620 *rv = val / UINT64_C(1000000000000);
1621 *suffix = 'T';
1622 } else if (val > UINT64_C(1000000000000)) {
1623 *rv = val / UINT64_C(1000000000);
1624 *suffix = 'G';
1625 } else if (val > UINT64_C(1000000000)) {
1626 *rv = val / UINT64_C(1000000);
1627 *suffix = 'M';
1628 } else if (val > UINT64_C(1000000)) {
1629 *rv = val / UINT64_C(1000);
1630 *suffix = 'k';
1631 } else {
1632 *rv = val;
1633 *suffix = ' ';
1634 }
1635}
1636
1637void bin_order_suffix(const uint64_t val, uint64_t *rv, const char **suffix,
1638 bool fixed)
1639{
1640 if (val > UINT64_C(1152921504606846976)) {
1641 *rv = val / UINT64_C(1125899906842624);
1642 *suffix = "EiB";
1643 } else if (val > UINT64_C(1125899906842624)) {
1644 *rv = val / UINT64_C(1099511627776);
1645 *suffix = "TiB";
1646 } else if (val > UINT64_C(1099511627776)) {
1647 *rv = val / UINT64_C(1073741824);
1648 *suffix = "GiB";
1649 } else if (val > UINT64_C(1073741824)) {
1650 *rv = val / UINT64_C(1048576);
1651 *suffix = "MiB";
1652 } else if (val > UINT64_C(1048576)) {
1653 *rv = val / UINT64_C(1024);
1654 *suffix = "KiB";
1655 } else {
1656 *rv = val;
1657 if (fixed)
1658 *suffix = "B ";
1659 else
1660 *suffix = "B";
1661 }
1662}
1663
1664/** @}
1665 */
Note: See TracBrowser for help on using the repository browser.