source: mainline/uspace/lib/c/generic/str.c@ ebddd71

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since ebddd71 was 375ab5e, checked in by Jakub Jermar <jakub@…>, 14 years ago

Merge from lp:~romanenko-oleg/helenos/fat.

  • Property mode set to 100644
File size: 34.2 KB
Line 
1/*
2 * Copyright (c) 2005 Martin Decky
3 * Copyright (c) 2008 Jiri Svoboda
4 * Copyright (c) 2011 Martin Sucha
5 * Copyright (c) 2011 Oleg Romanenko
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * - Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * - Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * - The name of the author may not be used to endorse or promote products
18 * derived from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/** @addtogroup libc
33 * @{
34 */
35/** @file
36 */
37
38#include <str.h>
39#include <stdlib.h>
40#include <assert.h>
41#include <stdint.h>
42#include <ctype.h>
43#include <malloc.h>
44#include <errno.h>
45#include <align.h>
46#include <mem.h>
47#include <str.h>
48
49/** Byte mask consisting of lowest @n bits (out of 8) */
50#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
51
52/** Byte mask consisting of lowest @n bits (out of 32) */
53#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
54
55/** Byte mask consisting of highest @n bits (out of 8) */
56#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
57
58/** Number of data bits in a UTF-8 continuation byte */
59#define CONT_BITS 6
60
61/** Decode a single character from a string.
62 *
63 * Decode a single character from a string of size @a size. Decoding starts
64 * at @a offset and this offset is moved to the beginning of the next
65 * character. In case of decoding error, offset generally advances at least
66 * by one. However, offset is never moved beyond size.
67 *
68 * @param str String (not necessarily NULL-terminated).
69 * @param offset Byte offset in string where to start decoding.
70 * @param size Size of the string (in bytes).
71 *
72 * @return Value of decoded character, U_SPECIAL on decoding error or
73 * NULL if attempt to decode beyond @a size.
74 *
75 */
76wchar_t str_decode(const char *str, size_t *offset, size_t size)
77{
78 if (*offset + 1 > size)
79 return 0;
80
81 /* First byte read from string */
82 uint8_t b0 = (uint8_t) str[(*offset)++];
83
84 /* Determine code length */
85
86 unsigned int b0_bits; /* Data bits in first byte */
87 unsigned int cbytes; /* Number of continuation bytes */
88
89 if ((b0 & 0x80) == 0) {
90 /* 0xxxxxxx (Plain ASCII) */
91 b0_bits = 7;
92 cbytes = 0;
93 } else if ((b0 & 0xe0) == 0xc0) {
94 /* 110xxxxx 10xxxxxx */
95 b0_bits = 5;
96 cbytes = 1;
97 } else if ((b0 & 0xf0) == 0xe0) {
98 /* 1110xxxx 10xxxxxx 10xxxxxx */
99 b0_bits = 4;
100 cbytes = 2;
101 } else if ((b0 & 0xf8) == 0xf0) {
102 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
103 b0_bits = 3;
104 cbytes = 3;
105 } else {
106 /* 10xxxxxx -- unexpected continuation byte */
107 return U_SPECIAL;
108 }
109
110 if (*offset + cbytes > size)
111 return U_SPECIAL;
112
113 wchar_t ch = b0 & LO_MASK_8(b0_bits);
114
115 /* Decode continuation bytes */
116 while (cbytes > 0) {
117 uint8_t b = (uint8_t) str[(*offset)++];
118
119 /* Must be 10xxxxxx */
120 if ((b & 0xc0) != 0x80)
121 return U_SPECIAL;
122
123 /* Shift data bits to ch */
124 ch = (ch << CONT_BITS) | (wchar_t) (b & LO_MASK_8(CONT_BITS));
125 cbytes--;
126 }
127
128 return ch;
129}
130
131/** Encode a single character to string representation.
132 *
133 * Encode a single character to string representation (i.e. UTF-8) and store
134 * it into a buffer at @a offset. Encoding starts at @a offset and this offset
135 * is moved to the position where the next character can be written to.
136 *
137 * @param ch Input character.
138 * @param str Output buffer.
139 * @param offset Byte offset where to start writing.
140 * @param size Size of the output buffer (in bytes).
141 *
142 * @return EOK if the character was encoded successfully, EOVERFLOW if there
143 * was not enough space in the output buffer or EINVAL if the character
144 * code was invalid.
145 */
146int chr_encode(const wchar_t ch, char *str, size_t *offset, size_t size)
147{
148 if (*offset >= size)
149 return EOVERFLOW;
150
151 if (!chr_check(ch))
152 return EINVAL;
153
154 /* Unsigned version of ch (bit operations should only be done
155 on unsigned types). */
156 uint32_t cc = (uint32_t) ch;
157
158 /* Determine how many continuation bytes are needed */
159
160 unsigned int b0_bits; /* Data bits in first byte */
161 unsigned int cbytes; /* Number of continuation bytes */
162
163 if ((cc & ~LO_MASK_32(7)) == 0) {
164 b0_bits = 7;
165 cbytes = 0;
166 } else if ((cc & ~LO_MASK_32(11)) == 0) {
167 b0_bits = 5;
168 cbytes = 1;
169 } else if ((cc & ~LO_MASK_32(16)) == 0) {
170 b0_bits = 4;
171 cbytes = 2;
172 } else if ((cc & ~LO_MASK_32(21)) == 0) {
173 b0_bits = 3;
174 cbytes = 3;
175 } else {
176 /* Codes longer than 21 bits are not supported */
177 return EINVAL;
178 }
179
180 /* Check for available space in buffer */
181 if (*offset + cbytes >= size)
182 return EOVERFLOW;
183
184 /* Encode continuation bytes */
185 unsigned int i;
186 for (i = cbytes; i > 0; i--) {
187 str[*offset + i] = 0x80 | (cc & LO_MASK_32(CONT_BITS));
188 cc = cc >> CONT_BITS;
189 }
190
191 /* Encode first byte */
192 str[*offset] = (cc & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1);
193
194 /* Advance offset */
195 *offset += cbytes + 1;
196
197 return EOK;
198}
199
200/** Get size of string.
201 *
202 * Get the number of bytes which are used by the string @a str (excluding the
203 * NULL-terminator).
204 *
205 * @param str String to consider.
206 *
207 * @return Number of bytes used by the string
208 *
209 */
210size_t str_size(const char *str)
211{
212 size_t size = 0;
213
214 while (*str++ != 0)
215 size++;
216
217 return size;
218}
219
220/** Get size of wide string.
221 *
222 * Get the number of bytes which are used by the wide string @a str (excluding the
223 * NULL-terminator).
224 *
225 * @param str Wide string to consider.
226 *
227 * @return Number of bytes used by the wide string
228 *
229 */
230size_t wstr_size(const wchar_t *str)
231{
232 return (wstr_length(str) * sizeof(wchar_t));
233}
234
235/** Get size of string with length limit.
236 *
237 * Get the number of bytes which are used by up to @a max_len first
238 * characters in the string @a str. If @a max_len is greater than
239 * the length of @a str, the entire string is measured (excluding the
240 * NULL-terminator).
241 *
242 * @param str String to consider.
243 * @param max_len Maximum number of characters to measure.
244 *
245 * @return Number of bytes used by the characters.
246 *
247 */
248size_t str_lsize(const char *str, size_t max_len)
249{
250 size_t len = 0;
251 size_t offset = 0;
252
253 while (len < max_len) {
254 if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
255 break;
256
257 len++;
258 }
259
260 return offset;
261}
262
263/** Get size of wide string with length limit.
264 *
265 * Get the number of bytes which are used by up to @a max_len first
266 * wide characters in the wide string @a str. If @a max_len is greater than
267 * the length of @a str, the entire wide string is measured (excluding the
268 * NULL-terminator).
269 *
270 * @param str Wide string to consider.
271 * @param max_len Maximum number of wide characters to measure.
272 *
273 * @return Number of bytes used by the wide characters.
274 *
275 */
276size_t wstr_lsize(const wchar_t *str, size_t max_len)
277{
278 return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
279}
280
281/** Get number of characters in a string.
282 *
283 * @param str NULL-terminated string.
284 *
285 * @return Number of characters in string.
286 *
287 */
288size_t str_length(const char *str)
289{
290 size_t len = 0;
291 size_t offset = 0;
292
293 while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
294 len++;
295
296 return len;
297}
298
299/** Get number of characters in a wide string.
300 *
301 * @param str NULL-terminated wide string.
302 *
303 * @return Number of characters in @a str.
304 *
305 */
306size_t wstr_length(const wchar_t *wstr)
307{
308 size_t len = 0;
309
310 while (*wstr++ != 0)
311 len++;
312
313 return len;
314}
315
316/** Get number of characters in a string with size limit.
317 *
318 * @param str NULL-terminated string.
319 * @param size Maximum number of bytes to consider.
320 *
321 * @return Number of characters in string.
322 *
323 */
324size_t str_nlength(const char *str, size_t size)
325{
326 size_t len = 0;
327 size_t offset = 0;
328
329 while (str_decode(str, &offset, size) != 0)
330 len++;
331
332 return len;
333}
334
335/** Get number of characters in a string with size limit.
336 *
337 * @param str NULL-terminated string.
338 * @param size Maximum number of bytes to consider.
339 *
340 * @return Number of characters in string.
341 *
342 */
343size_t wstr_nlength(const wchar_t *str, size_t size)
344{
345 size_t len = 0;
346 size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
347 size_t offset = 0;
348
349 while ((offset < limit) && (*str++ != 0)) {
350 len++;
351 offset += sizeof(wchar_t);
352 }
353
354 return len;
355}
356
357/** Check whether character is plain ASCII.
358 *
359 * @return True if character is plain ASCII.
360 *
361 */
362bool ascii_check(wchar_t ch)
363{
364 if ((ch >= 0) && (ch <= 127))
365 return true;
366
367 return false;
368}
369
370/** Check whether wide string is plain ASCII.
371 *
372 * @return True if wide string is plain ASCII.
373 *
374 */
375bool wstr_is_ascii(const wchar_t *wstr)
376{
377 while (*wstr && ascii_check(*wstr))
378 wstr++;
379 return *wstr == 0;
380}
381
382/** Check whether character is valid
383 *
384 * @return True if character is a valid Unicode code point.
385 *
386 */
387bool chr_check(wchar_t ch)
388{
389 if ((ch >= 0) && (ch <= 1114111))
390 return true;
391
392 return false;
393}
394
395/** Compare two NULL terminated strings.
396 *
397 * Do a char-by-char comparison of two NULL-terminated strings.
398 * The strings are considered equal iff they consist of the same
399 * characters on the minimum of their lengths.
400 *
401 * @param s1 First string to compare.
402 * @param s2 Second string to compare.
403 *
404 * @return 0 if the strings are equal, -1 if first is smaller,
405 * 1 if second smaller.
406 *
407 */
408int str_cmp(const char *s1, const char *s2)
409{
410 wchar_t c1 = 0;
411 wchar_t c2 = 0;
412
413 size_t off1 = 0;
414 size_t off2 = 0;
415
416 while (true) {
417 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
418 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
419
420 if (c1 < c2)
421 return -1;
422
423 if (c1 > c2)
424 return 1;
425
426 if (c1 == 0 || c2 == 0)
427 break;
428 }
429
430 return 0;
431}
432
433/** Compare two NULL terminated strings with length limit.
434 *
435 * Do a char-by-char comparison of two NULL-terminated strings.
436 * The strings are considered equal iff they consist of the same
437 * characters on the minimum of their lengths and the length limit.
438 *
439 * @param s1 First string to compare.
440 * @param s2 Second string to compare.
441 * @param max_len Maximum number of characters to consider.
442 *
443 * @return 0 if the strings are equal, -1 if first is smaller,
444 * 1 if second smaller.
445 *
446 */
447int str_lcmp(const char *s1, const char *s2, size_t max_len)
448{
449 wchar_t c1 = 0;
450 wchar_t c2 = 0;
451
452 size_t off1 = 0;
453 size_t off2 = 0;
454
455 size_t len = 0;
456
457 while (true) {
458 if (len >= max_len)
459 break;
460
461 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
462 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
463
464 if (c1 < c2)
465 return -1;
466
467 if (c1 > c2)
468 return 1;
469
470 if (c1 == 0 || c2 == 0)
471 break;
472
473 ++len;
474 }
475
476 return 0;
477
478}
479
480/** Copy string.
481 *
482 * Copy source string @a src to destination buffer @a dest.
483 * No more than @a size bytes are written. If the size of the output buffer
484 * is at least one byte, the output string will always be well-formed, i.e.
485 * null-terminated and containing only complete characters.
486 *
487 * @param dest Destination buffer.
488 * @param count Size of the destination buffer (must be > 0).
489 * @param src Source string.
490 */
491void str_cpy(char *dest, size_t size, const char *src)
492{
493 /* There must be space for a null terminator in the buffer. */
494 assert(size > 0);
495
496 size_t src_off = 0;
497 size_t dest_off = 0;
498
499 wchar_t ch;
500 while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
501 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
502 break;
503 }
504
505 dest[dest_off] = '\0';
506}
507
508/** Copy size-limited substring.
509 *
510 * Copy prefix of string @a src of max. size @a size to destination buffer
511 * @a dest. No more than @a size bytes are written. The output string will
512 * always be well-formed, i.e. null-terminated and containing only complete
513 * characters.
514 *
515 * No more than @a n bytes are read from the input string, so it does not
516 * have to be null-terminated.
517 *
518 * @param dest Destination buffer.
519 * @param count Size of the destination buffer (must be > 0).
520 * @param src Source string.
521 * @param n Maximum number of bytes to read from @a src.
522 */
523void str_ncpy(char *dest, size_t size, const char *src, size_t n)
524{
525 /* There must be space for a null terminator in the buffer. */
526 assert(size > 0);
527
528 size_t src_off = 0;
529 size_t dest_off = 0;
530
531 wchar_t ch;
532 while ((ch = str_decode(src, &src_off, n)) != 0) {
533 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
534 break;
535 }
536
537 dest[dest_off] = '\0';
538}
539
540/** Append one string to another.
541 *
542 * Append source string @a src to string in destination buffer @a dest.
543 * Size of the destination buffer is @a dest. If the size of the output buffer
544 * is at least one byte, the output string will always be well-formed, i.e.
545 * null-terminated and containing only complete characters.
546 *
547 * @param dest Destination buffer.
548 * @param count Size of the destination buffer.
549 * @param src Source string.
550 */
551void str_append(char *dest, size_t size, const char *src)
552{
553 size_t dstr_size;
554
555 dstr_size = str_size(dest);
556 if (dstr_size >= size)
557 return;
558
559 str_cpy(dest + dstr_size, size - dstr_size, src);
560}
561
562/** Convert space-padded ASCII to string.
563 *
564 * Common legacy text encoding in hardware is 7-bit ASCII fitted into
565 * a fixed-with byte buffer (bit 7 always zero), right-padded with spaces
566 * (ASCII 0x20). Convert space-padded ascii to string representation.
567 *
568 * If the text does not fit into the destination buffer, the function converts
569 * as many characters as possible and returns EOVERFLOW.
570 *
571 * If the text contains non-ASCII bytes (with bit 7 set), the whole string is
572 * converted anyway and invalid characters are replaced with question marks
573 * (U_SPECIAL) and the function returns EIO.
574 *
575 * Regardless of return value upon return @a dest will always be well-formed.
576 *
577 * @param dest Destination buffer
578 * @param size Size of destination buffer
579 * @param src Space-padded ASCII.
580 * @param n Size of the source buffer in bytes.
581 *
582 * @return EOK on success, EOVERFLOW if the text does not fit
583 * destination buffer, EIO if the text contains
584 * non-ASCII bytes.
585 */
586int spascii_to_str(char *dest, size_t size, const uint8_t *src, size_t n)
587{
588 size_t sidx;
589 size_t didx;
590 size_t dlast;
591 uint8_t byte;
592 int rc;
593 int result;
594
595 /* There must be space for a null terminator in the buffer. */
596 assert(size > 0);
597 result = EOK;
598
599 didx = 0;
600 dlast = 0;
601 for (sidx = 0; sidx < n; ++sidx) {
602 byte = src[sidx];
603 if (!ascii_check(byte)) {
604 byte = U_SPECIAL;
605 result = EIO;
606 }
607
608 rc = chr_encode(byte, dest, &didx, size - 1);
609 if (rc != EOK) {
610 assert(rc == EOVERFLOW);
611 dest[didx] = '\0';
612 return rc;
613 }
614
615 /* Remember dest index after last non-empty character */
616 if (byte != 0x20)
617 dlast = didx;
618 }
619
620 /* Terminate string after last non-empty character */
621 dest[dlast] = '\0';
622 return result;
623}
624
625/** Convert wide string to string.
626 *
627 * Convert wide string @a src to string. The output is written to the buffer
628 * specified by @a dest and @a size. @a size must be non-zero and the string
629 * written will always be well-formed.
630 *
631 * @param dest Destination buffer.
632 * @param size Size of the destination buffer.
633 * @param src Source wide string.
634 *
635 * @return EOK, if success, negative otherwise.
636 */
637int wstr_to_str(char *dest, size_t size, const wchar_t *src)
638{
639 int rc;
640 wchar_t ch;
641 size_t src_idx;
642 size_t dest_off;
643
644 /* There must be space for a null terminator in the buffer. */
645 assert(size > 0);
646
647 src_idx = 0;
648 dest_off = 0;
649
650 while ((ch = src[src_idx++]) != 0) {
651 rc = chr_encode(ch, dest, &dest_off, size - 1);
652 if (rc != EOK)
653 break;
654 }
655
656 dest[dest_off] = '\0';
657 return rc;
658}
659
660/** Convert UTF16 string to string.
661 *
662 * Convert utf16 string @a src to string. The output is written to the buffer
663 * specified by @a dest and @a size. @a size must be non-zero and the string
664 * written will always be well-formed. Surrogate pairs also supported.
665 *
666 * @param dest Destination buffer.
667 * @param size Size of the destination buffer.
668 * @param src Source utf16 string.
669 *
670 * @return EOK, if success, negative otherwise.
671 */
672int utf16_to_str(char *dest, size_t size, const uint16_t *src)
673{
674 size_t idx=0, dest_off=0;
675 wchar_t ch;
676 int rc = EOK;
677
678 /* There must be space for a null terminator in the buffer. */
679 assert(size > 0);
680
681 while (src[idx]) {
682 if ((src[idx] & 0xfc00) == 0xd800) {
683 if (src[idx+1] && (src[idx+1] & 0xfc00) == 0xdc00) {
684 ch = 0x10000;
685 ch += (src[idx] & 0x03FF) << 10;
686 ch += (src[idx+1] & 0x03FF);
687 idx += 2;
688 }
689 else
690 break;
691 } else {
692 ch = src[idx];
693 idx++;
694 }
695 rc = chr_encode(ch, dest, &dest_off, size-1);
696 if (rc != EOK)
697 break;
698 }
699 dest[dest_off] = '\0';
700 return rc;
701}
702
703int str_to_utf16(uint16_t *dest, size_t size, const char *src)
704{
705 int rc=EOK;
706 size_t offset=0;
707 size_t idx=0;
708 wchar_t c;
709
710 assert(size > 0);
711
712 while ((c = str_decode(src, &offset, STR_NO_LIMIT)) != 0) {
713 if (c > 0x10000) {
714 if (idx+2 >= size-1) {
715 rc=EOVERFLOW;
716 break;
717 }
718 c = (c - 0x10000);
719 dest[idx] = 0xD800 | (c >> 10);
720 dest[idx+1] = 0xDC00 | (c & 0x3FF);
721 idx++;
722 } else {
723 dest[idx] = c;
724 }
725
726 idx++;
727 if (idx >= size-1) {
728 rc=EOVERFLOW;
729 break;
730 }
731 }
732
733 dest[idx] = '\0';
734 return rc;
735}
736
737
738/** Convert wide string to new string.
739 *
740 * Convert wide string @a src to string. Space for the new string is allocated
741 * on the heap.
742 *
743 * @param src Source wide string.
744 * @return New string.
745 */
746char *wstr_to_astr(const wchar_t *src)
747{
748 char dbuf[STR_BOUNDS(1)];
749 char *str;
750 wchar_t ch;
751
752 size_t src_idx;
753 size_t dest_off;
754 size_t dest_size;
755
756 /* Compute size of encoded string. */
757
758 src_idx = 0;
759 dest_size = 0;
760
761 while ((ch = src[src_idx++]) != 0) {
762 dest_off = 0;
763 if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
764 break;
765 dest_size += dest_off;
766 }
767
768 str = malloc(dest_size + 1);
769 if (str == NULL)
770 return NULL;
771
772 /* Encode string. */
773
774 src_idx = 0;
775 dest_off = 0;
776
777 while ((ch = src[src_idx++]) != 0) {
778 if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
779 break;
780 }
781
782 str[dest_size] = '\0';
783 return str;
784}
785
786
787/** Convert string to wide string.
788 *
789 * Convert string @a src to wide string. The output is written to the
790 * buffer specified by @a dest and @a dlen. @a dlen must be non-zero
791 * and the wide string written will always be null-terminated.
792 *
793 * @param dest Destination buffer.
794 * @param dlen Length of destination buffer (number of wchars).
795 * @param src Source string.
796 *
797 * @return EOK, if success, negative otherwise.
798 */
799int str_to_wstr(wchar_t *dest, size_t dlen, const char *src)
800{
801 int rc=EOK;
802 size_t offset;
803 size_t di;
804 wchar_t c;
805
806 assert(dlen > 0);
807
808 offset = 0;
809 di = 0;
810
811 do {
812 if (di >= dlen - 1) {
813 rc = EOVERFLOW;
814 break;
815 }
816
817 c = str_decode(src, &offset, STR_NO_LIMIT);
818 dest[di++] = c;
819 } while (c != '\0');
820
821 dest[dlen - 1] = '\0';
822 return rc;
823}
824
825/** Convert string to wide string.
826 *
827 * Convert string @a src to wide string. A new wide NULL-terminated
828 * string will be allocated on the heap.
829 *
830 * @param src Source string.
831 */
832wchar_t *str_to_awstr(const char *str)
833{
834 size_t len = str_length(str);
835
836 wchar_t *wstr = calloc(len+1, sizeof(wchar_t));
837 if (wstr == NULL)
838 return NULL;
839
840 str_to_wstr(wstr, len + 1, str);
841 return wstr;
842}
843
844/** Find first occurence of character in string.
845 *
846 * @param str String to search.
847 * @param ch Character to look for.
848 *
849 * @return Pointer to character in @a str or NULL if not found.
850 */
851char *str_chr(const char *str, wchar_t ch)
852{
853 wchar_t acc;
854 size_t off = 0;
855 size_t last = 0;
856
857 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
858 if (acc == ch)
859 return (char *) (str + last);
860 last = off;
861 }
862
863 return NULL;
864}
865
866/** Find last occurence of character in string.
867 *
868 * @param str String to search.
869 * @param ch Character to look for.
870 *
871 * @return Pointer to character in @a str or NULL if not found.
872 */
873char *str_rchr(const char *str, wchar_t ch)
874{
875 wchar_t acc;
876 size_t off = 0;
877 size_t last = 0;
878 const char *res = NULL;
879
880 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
881 if (acc == ch)
882 res = (str + last);
883 last = off;
884 }
885
886 return (char *) res;
887}
888
889/** Find first occurence of character in wide string.
890 *
891 * @param wstr String to search.
892 * @param ch Character to look for.
893 *
894 * @return Pointer to character in @a wstr or NULL if not found.
895 */
896wchar_t *wstr_chr(const wchar_t *wstr, wchar_t ch)
897{
898 while (*wstr && *wstr != ch)
899 wstr++;
900 if (*wstr)
901 return (wchar_t *) wstr;
902 else
903 return NULL;
904}
905
906/** Find last occurence of character in wide string.
907 *
908 * @param wstr String to search.
909 * @param ch Character to look for.
910 *
911 * @return Pointer to character in @a wstr or NULL if not found.
912 */
913wchar_t *wstr_rchr(const wchar_t *wstr, wchar_t ch)
914{
915 const wchar_t *res = NULL;
916 while (*wstr) {
917 if (*wstr == ch)
918 res = wstr;
919 wstr++;
920 }
921 return (wchar_t *) res;
922}
923
924/** Insert a wide character into a wide string.
925 *
926 * Insert a wide character into a wide string at position
927 * @a pos. The characters after the position are shifted.
928 *
929 * @param str String to insert to.
930 * @param ch Character to insert to.
931 * @param pos Character index where to insert.
932 @ @param max_pos Characters in the buffer.
933 *
934 * @return True if the insertion was sucessful, false if the position
935 * is out of bounds.
936 *
937 */
938bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
939{
940 size_t len = wstr_length(str);
941
942 if ((pos > len) || (pos + 1 > max_pos))
943 return false;
944
945 size_t i;
946 for (i = len; i + 1 > pos; i--)
947 str[i + 1] = str[i];
948
949 str[pos] = ch;
950
951 return true;
952}
953
954/** Remove a wide character from a wide string.
955 *
956 * Remove a wide character from a wide string at position
957 * @a pos. The characters after the position are shifted.
958 *
959 * @param str String to remove from.
960 * @param pos Character index to remove.
961 *
962 * @return True if the removal was sucessful, false if the position
963 * is out of bounds.
964 *
965 */
966bool wstr_remove(wchar_t *str, size_t pos)
967{
968 size_t len = wstr_length(str);
969
970 if (pos >= len)
971 return false;
972
973 size_t i;
974 for (i = pos + 1; i <= len; i++)
975 str[i - 1] = str[i];
976
977 return true;
978}
979
980int stricmp(const char *a, const char *b)
981{
982 int c = 0;
983
984 while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
985 c++;
986
987 return (tolower(a[c]) - tolower(b[c]));
988}
989
990/** Convert string to a number.
991 * Core of strtol and strtoul functions.
992 *
993 * @param nptr Pointer to string.
994 * @param endptr If not NULL, function stores here pointer to the first
995 * invalid character.
996 * @param base Zero or number between 2 and 36 inclusive.
997 * @param sgn It's set to 1 if minus found.
998 * @return Result of conversion.
999 */
1000static unsigned long
1001_strtoul(const char *nptr, char **endptr, int base, char *sgn)
1002{
1003 unsigned char c;
1004 unsigned long result = 0;
1005 unsigned long a, b;
1006 const char *str = nptr;
1007 const char *tmpptr;
1008
1009 while (isspace(*str))
1010 str++;
1011
1012 if (*str == '-') {
1013 *sgn = 1;
1014 ++str;
1015 } else if (*str == '+')
1016 ++str;
1017
1018 if (base) {
1019 if ((base == 1) || (base > 36)) {
1020 /* FIXME: set errno to EINVAL */
1021 return 0;
1022 }
1023 if ((base == 16) && (*str == '0') && ((str[1] == 'x') ||
1024 (str[1] == 'X'))) {
1025 str += 2;
1026 }
1027 } else {
1028 base = 10;
1029
1030 if (*str == '0') {
1031 base = 8;
1032 if ((str[1] == 'X') || (str[1] == 'x')) {
1033 base = 16;
1034 str += 2;
1035 }
1036 }
1037 }
1038
1039 tmpptr = str;
1040
1041 while (*str) {
1042 c = *str;
1043 c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
1044 (c <= '9' ? c - '0' : 0xff)));
1045 if (c > base) {
1046 break;
1047 }
1048
1049 a = (result & 0xff) * base + c;
1050 b = (result >> 8) * base + (a >> 8);
1051
1052 if (b > (ULONG_MAX >> 8)) {
1053 /* overflow */
1054 /* FIXME: errno = ERANGE*/
1055 return ULONG_MAX;
1056 }
1057
1058 result = (b << 8) + (a & 0xff);
1059 ++str;
1060 }
1061
1062 if (str == tmpptr) {
1063 /*
1064 * No number was found => first invalid character is the first
1065 * character of the string.
1066 */
1067 /* FIXME: set errno to EINVAL */
1068 str = nptr;
1069 result = 0;
1070 }
1071
1072 if (endptr)
1073 *endptr = (char *) str;
1074
1075 if (nptr == str) {
1076 /*FIXME: errno = EINVAL*/
1077 return 0;
1078 }
1079
1080 return result;
1081}
1082
1083/** Convert initial part of string to long int according to given base.
1084 * The number may begin with an arbitrary number of whitespaces followed by
1085 * optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
1086 * inserted and the number will be taken as hexadecimal one. If the base is 0
1087 * and the number begin with a zero, number will be taken as octal one (as with
1088 * base 8). Otherwise the base 0 is taken as decimal.
1089 *
1090 * @param nptr Pointer to string.
1091 * @param endptr If not NULL, function stores here pointer to the first
1092 * invalid character.
1093 * @param base Zero or number between 2 and 36 inclusive.
1094 * @return Result of conversion.
1095 */
1096long int strtol(const char *nptr, char **endptr, int base)
1097{
1098 char sgn = 0;
1099 unsigned long number = 0;
1100
1101 number = _strtoul(nptr, endptr, base, &sgn);
1102
1103 if (number > LONG_MAX) {
1104 if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
1105 /* FIXME: set 0 to errno */
1106 return number;
1107 }
1108 /* FIXME: set ERANGE to errno */
1109 return (sgn ? LONG_MIN : LONG_MAX);
1110 }
1111
1112 return (sgn ? -number : number);
1113}
1114
1115/** Duplicate string.
1116 *
1117 * Allocate a new string and copy characters from the source
1118 * string into it. The duplicate string is allocated via sleeping
1119 * malloc(), thus this function can sleep in no memory conditions.
1120 *
1121 * The allocation cannot fail and the return value is always
1122 * a valid pointer. The duplicate string is always a well-formed
1123 * null-terminated UTF-8 string, but it can differ from the source
1124 * string on the byte level.
1125 *
1126 * @param src Source string.
1127 *
1128 * @return Duplicate string.
1129 *
1130 */
1131char *str_dup(const char *src)
1132{
1133 size_t size = str_size(src) + 1;
1134 char *dest = (char *) malloc(size);
1135 if (dest == NULL)
1136 return (char *) NULL;
1137
1138 str_cpy(dest, size, src);
1139 return dest;
1140}
1141
1142/** Duplicate string with size limit.
1143 *
1144 * Allocate a new string and copy up to @max_size bytes from the source
1145 * string into it. The duplicate string is allocated via sleeping
1146 * malloc(), thus this function can sleep in no memory conditions.
1147 * No more than @max_size + 1 bytes is allocated, but if the size
1148 * occupied by the source string is smaller than @max_size + 1,
1149 * less is allocated.
1150 *
1151 * The allocation cannot fail and the return value is always
1152 * a valid pointer. The duplicate string is always a well-formed
1153 * null-terminated UTF-8 string, but it can differ from the source
1154 * string on the byte level.
1155 *
1156 * @param src Source string.
1157 * @param n Maximum number of bytes to duplicate.
1158 *
1159 * @return Duplicate string.
1160 *
1161 */
1162char *str_ndup(const char *src, size_t n)
1163{
1164 size_t size = str_size(src);
1165 if (size > n)
1166 size = n;
1167
1168 char *dest = (char *) malloc(size + 1);
1169 if (dest == NULL)
1170 return (char *) NULL;
1171
1172 str_ncpy(dest, size + 1, src, size);
1173 return dest;
1174}
1175
1176void str_reverse(char* begin, char* end)
1177{
1178 char aux;
1179 while(end>begin)
1180 aux=*end, *end--=*begin, *begin++=aux;
1181}
1182
1183int size_t_str(size_t value, int base, char* str, size_t size)
1184{
1185 static char num[] = "0123456789abcdefghijklmnopqrstuvwxyz";
1186 char* wstr=str;
1187
1188 if (size == 0)
1189 return EINVAL;
1190 if (base<2 || base>35) {
1191 *str='\0';
1192 return EINVAL;
1193 }
1194
1195 do {
1196 *wstr++ = num[value % base];
1197 if (--size == 0)
1198 return EOVERFLOW;
1199 } while(value /= base);
1200 *wstr='\0';
1201
1202 // Reverse string
1203 str_reverse(str,wstr-1);
1204 return EOK;
1205}
1206
1207/** Convert initial part of string to unsigned long according to given base.
1208 * The number may begin with an arbitrary number of whitespaces followed by
1209 * optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
1210 * inserted and the number will be taken as hexadecimal one. If the base is 0
1211 * and the number begin with a zero, number will be taken as octal one (as with
1212 * base 8). Otherwise the base 0 is taken as decimal.
1213 *
1214 * @param nptr Pointer to string.
1215 * @param endptr If not NULL, function stores here pointer to the first
1216 * invalid character
1217 * @param base Zero or number between 2 and 36 inclusive.
1218 * @return Result of conversion.
1219 */
1220unsigned long strtoul(const char *nptr, char **endptr, int base)
1221{
1222 char sgn = 0;
1223 unsigned long number = 0;
1224
1225 number = _strtoul(nptr, endptr, base, &sgn);
1226
1227 return (sgn ? -number : number);
1228}
1229
1230char *strtok(char *s, const char *delim)
1231{
1232 static char *next;
1233
1234 return strtok_r(s, delim, &next);
1235}
1236
1237char *strtok_r(char *s, const char *delim, char **next)
1238{
1239 char *start, *end;
1240
1241 if (s == NULL)
1242 s = *next;
1243
1244 /* Skip over leading delimiters. */
1245 while (*s && (str_chr(delim, *s) != NULL)) ++s;
1246 start = s;
1247
1248 /* Skip over token characters. */
1249 while (*s && (str_chr(delim, *s) == NULL)) ++s;
1250 end = s;
1251 *next = (*s ? s + 1 : s);
1252
1253 if (start == end) {
1254 return NULL; /* No more tokens. */
1255 }
1256
1257 /* Overwrite delimiter with NULL terminator. */
1258 *end = '\0';
1259 return start;
1260}
1261
1262/** Convert string to uint64_t (internal variant).
1263 *
1264 * @param nptr Pointer to string.
1265 * @param endptr Pointer to the first invalid character is stored here.
1266 * @param base Zero or number between 2 and 36 inclusive.
1267 * @param neg Indication of unary minus is stored here.
1268 * @apram result Result of the conversion.
1269 *
1270 * @return EOK if conversion was successful.
1271 *
1272 */
1273static int str_uint(const char *nptr, char **endptr, unsigned int base,
1274 bool *neg, uint64_t *result)
1275{
1276 assert(endptr != NULL);
1277 assert(neg != NULL);
1278 assert(result != NULL);
1279
1280 *neg = false;
1281 const char *str = nptr;
1282
1283 /* Ignore leading whitespace */
1284 while (isspace(*str))
1285 str++;
1286
1287 if (*str == '-') {
1288 *neg = true;
1289 str++;
1290 } else if (*str == '+')
1291 str++;
1292
1293 if (base == 0) {
1294 /* Decode base if not specified */
1295 base = 10;
1296
1297 if (*str == '0') {
1298 base = 8;
1299 str++;
1300
1301 switch (*str) {
1302 case 'b':
1303 case 'B':
1304 base = 2;
1305 str++;
1306 break;
1307 case 'o':
1308 case 'O':
1309 base = 8;
1310 str++;
1311 break;
1312 case 'd':
1313 case 'D':
1314 case 't':
1315 case 'T':
1316 base = 10;
1317 str++;
1318 break;
1319 case 'x':
1320 case 'X':
1321 base = 16;
1322 str++;
1323 break;
1324 default:
1325 str--;
1326 }
1327 }
1328 } else {
1329 /* Check base range */
1330 if ((base < 2) || (base > 36)) {
1331 *endptr = (char *) str;
1332 return EINVAL;
1333 }
1334 }
1335
1336 *result = 0;
1337 const char *startstr = str;
1338
1339 while (*str != 0) {
1340 unsigned int digit;
1341
1342 if ((*str >= 'a') && (*str <= 'z'))
1343 digit = *str - 'a' + 10;
1344 else if ((*str >= 'A') && (*str <= 'Z'))
1345 digit = *str - 'A' + 10;
1346 else if ((*str >= '0') && (*str <= '9'))
1347 digit = *str - '0';
1348 else
1349 break;
1350
1351 if (digit >= base)
1352 break;
1353
1354 uint64_t prev = *result;
1355 *result = (*result) * base + digit;
1356
1357 if (*result < prev) {
1358 /* Overflow */
1359 *endptr = (char *) str;
1360 return EOVERFLOW;
1361 }
1362
1363 str++;
1364 }
1365
1366 if (str == startstr) {
1367 /*
1368 * No digits were decoded => first invalid character is
1369 * the first character of the string.
1370 */
1371 str = nptr;
1372 }
1373
1374 *endptr = (char *) str;
1375
1376 if (str == nptr)
1377 return EINVAL;
1378
1379 return EOK;
1380}
1381
1382/** Convert string to uint64_t.
1383 *
1384 * @param nptr Pointer to string.
1385 * @param endptr If not NULL, pointer to the first invalid character
1386 * is stored here.
1387 * @param base Zero or number between 2 and 36 inclusive.
1388 * @param strict Do not allow any trailing characters.
1389 * @param result Result of the conversion.
1390 *
1391 * @return EOK if conversion was successful.
1392 *
1393 */
1394int str_uint64(const char *nptr, char **endptr, unsigned int base,
1395 bool strict, uint64_t *result)
1396{
1397 assert(result != NULL);
1398
1399 bool neg;
1400 char *lendptr;
1401 int ret = str_uint(nptr, &lendptr, base, &neg, result);
1402
1403 if (endptr != NULL)
1404 *endptr = (char *) lendptr;
1405
1406 if (ret != EOK)
1407 return ret;
1408
1409 /* Do not allow negative values */
1410 if (neg)
1411 return EINVAL;
1412
1413 /* Check whether we are at the end of
1414 the string in strict mode */
1415 if ((strict) && (*lendptr != 0))
1416 return EINVAL;
1417
1418 return EOK;
1419}
1420
1421/** Convert string to size_t.
1422 *
1423 * @param nptr Pointer to string.
1424 * @param endptr If not NULL, pointer to the first invalid character
1425 * is stored here.
1426 * @param base Zero or number between 2 and 36 inclusive.
1427 * @param strict Do not allow any trailing characters.
1428 * @param result Result of the conversion.
1429 *
1430 * @return EOK if conversion was successful.
1431 *
1432 */
1433int str_size_t(const char *nptr, char **endptr, unsigned int base,
1434 bool strict, size_t *result)
1435{
1436 assert(result != NULL);
1437
1438 bool neg;
1439 char *lendptr;
1440 uint64_t res;
1441 int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1442
1443 if (endptr != NULL)
1444 *endptr = (char *) lendptr;
1445
1446 if (ret != EOK)
1447 return ret;
1448
1449 /* Do not allow negative values */
1450 if (neg)
1451 return EINVAL;
1452
1453 /* Check whether we are at the end of
1454 the string in strict mode */
1455 if ((strict) && (*lendptr != 0))
1456 return EINVAL;
1457
1458 /* Check for overflow */
1459 size_t _res = (size_t) res;
1460 if (_res != res)
1461 return EOVERFLOW;
1462
1463 *result = _res;
1464
1465 return EOK;
1466}
1467
1468void order_suffix(const uint64_t val, uint64_t *rv, char *suffix)
1469{
1470 if (val > UINT64_C(10000000000000000000)) {
1471 *rv = val / UINT64_C(1000000000000000000);
1472 *suffix = 'Z';
1473 } else if (val > UINT64_C(1000000000000000000)) {
1474 *rv = val / UINT64_C(1000000000000000);
1475 *suffix = 'E';
1476 } else if (val > UINT64_C(1000000000000000)) {
1477 *rv = val / UINT64_C(1000000000000);
1478 *suffix = 'T';
1479 } else if (val > UINT64_C(1000000000000)) {
1480 *rv = val / UINT64_C(1000000000);
1481 *suffix = 'G';
1482 } else if (val > UINT64_C(1000000000)) {
1483 *rv = val / UINT64_C(1000000);
1484 *suffix = 'M';
1485 } else if (val > UINT64_C(1000000)) {
1486 *rv = val / UINT64_C(1000);
1487 *suffix = 'k';
1488 } else {
1489 *rv = val;
1490 *suffix = ' ';
1491 }
1492}
1493
1494void bin_order_suffix(const uint64_t val, uint64_t *rv, const char **suffix,
1495 bool fixed)
1496{
1497 if (val > UINT64_C(1152921504606846976)) {
1498 *rv = val / UINT64_C(1125899906842624);
1499 *suffix = "EiB";
1500 } else if (val > UINT64_C(1125899906842624)) {
1501 *rv = val / UINT64_C(1099511627776);
1502 *suffix = "TiB";
1503 } else if (val > UINT64_C(1099511627776)) {
1504 *rv = val / UINT64_C(1073741824);
1505 *suffix = "GiB";
1506 } else if (val > UINT64_C(1073741824)) {
1507 *rv = val / UINT64_C(1048576);
1508 *suffix = "MiB";
1509 } else if (val > UINT64_C(1048576)) {
1510 *rv = val / UINT64_C(1024);
1511 *suffix = "KiB";
1512 } else {
1513 *rv = val;
1514 if (fixed)
1515 *suffix = "B ";
1516 else
1517 *suffix = "B";
1518 }
1519}
1520
1521/** @}
1522 */
Note: See TracBrowser for help on using the repository browser.