source: mainline/uspace/lib/c/generic/str.c@ 22cf42d9

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 22cf42d9 was 22cf42d9, checked in by Martin Sucha <sucha14@…>, 14 years ago

Add formatting library and display help message wrapped

  • Property mode set to 100644
File size: 31.0 KB
RevLine 
[936351c1]1/*
[df4ed85]2 * Copyright (c) 2005 Martin Decky
[576845ec]3 * Copyright (c) 2008 Jiri Svoboda
[22cf42d9]4 * Copyright (c) 2011 Martin Sucha
[936351c1]5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * - Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * - Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * - The name of the author may not be used to endorse or promote products
17 * derived from this software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
[a46da63]31/** @addtogroup libc
[b2951e2]32 * @{
33 */
34/** @file
35 */
36
[19f857a]37#include <str.h>
[e64c4b2]38#include <stdlib.h>
[6700ee2]39#include <assert.h>
[9539be6]40#include <stdint.h>
[e64c4b2]41#include <ctype.h>
[566987b0]42#include <malloc.h>
[171f9a1]43#include <errno.h>
[f2b8cdc]44#include <align.h>
[095003a8]45#include <mem.h>
[19f857a]46#include <str.h>
[171f9a1]47
48/** Byte mask consisting of lowest @n bits (out of 8) */
49#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
50
51/** Byte mask consisting of lowest @n bits (out of 32) */
52#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
53
54/** Byte mask consisting of highest @n bits (out of 8) */
55#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
56
57/** Number of data bits in a UTF-8 continuation byte */
58#define CONT_BITS 6
59
60/** Decode a single character from a string.
61 *
62 * Decode a single character from a string of size @a size. Decoding starts
63 * at @a offset and this offset is moved to the beginning of the next
64 * character. In case of decoding error, offset generally advances at least
65 * by one. However, offset is never moved beyond size.
66 *
67 * @param str String (not necessarily NULL-terminated).
68 * @param offset Byte offset in string where to start decoding.
69 * @param size Size of the string (in bytes).
70 *
71 * @return Value of decoded character, U_SPECIAL on decoding error or
72 * NULL if attempt to decode beyond @a size.
73 *
74 */
75wchar_t str_decode(const char *str, size_t *offset, size_t size)
76{
77 if (*offset + 1 > size)
78 return 0;
79
80 /* First byte read from string */
81 uint8_t b0 = (uint8_t) str[(*offset)++];
82
83 /* Determine code length */
84
85 unsigned int b0_bits; /* Data bits in first byte */
86 unsigned int cbytes; /* Number of continuation bytes */
87
88 if ((b0 & 0x80) == 0) {
89 /* 0xxxxxxx (Plain ASCII) */
90 b0_bits = 7;
91 cbytes = 0;
92 } else if ((b0 & 0xe0) == 0xc0) {
93 /* 110xxxxx 10xxxxxx */
94 b0_bits = 5;
95 cbytes = 1;
96 } else if ((b0 & 0xf0) == 0xe0) {
97 /* 1110xxxx 10xxxxxx 10xxxxxx */
98 b0_bits = 4;
99 cbytes = 2;
100 } else if ((b0 & 0xf8) == 0xf0) {
101 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
102 b0_bits = 3;
103 cbytes = 3;
104 } else {
105 /* 10xxxxxx -- unexpected continuation byte */
106 return U_SPECIAL;
107 }
108
109 if (*offset + cbytes > size)
110 return U_SPECIAL;
111
112 wchar_t ch = b0 & LO_MASK_8(b0_bits);
113
114 /* Decode continuation bytes */
115 while (cbytes > 0) {
116 uint8_t b = (uint8_t) str[(*offset)++];
117
118 /* Must be 10xxxxxx */
119 if ((b & 0xc0) != 0x80)
120 return U_SPECIAL;
121
122 /* Shift data bits to ch */
123 ch = (ch << CONT_BITS) | (wchar_t) (b & LO_MASK_8(CONT_BITS));
124 cbytes--;
125 }
126
127 return ch;
128}
129
130/** Encode a single character to string representation.
131 *
132 * Encode a single character to string representation (i.e. UTF-8) and store
133 * it into a buffer at @a offset. Encoding starts at @a offset and this offset
134 * is moved to the position where the next character can be written to.
135 *
136 * @param ch Input character.
137 * @param str Output buffer.
138 * @param offset Byte offset where to start writing.
139 * @param size Size of the output buffer (in bytes).
140 *
141 * @return EOK if the character was encoded successfully, EOVERFLOW if there
[d4a3ee5]142 * was not enough space in the output buffer or EINVAL if the character
143 * code was invalid.
[171f9a1]144 */
145int chr_encode(const wchar_t ch, char *str, size_t *offset, size_t size)
146{
147 if (*offset >= size)
148 return EOVERFLOW;
149
150 if (!chr_check(ch))
151 return EINVAL;
152
153 /* Unsigned version of ch (bit operations should only be done
154 on unsigned types). */
155 uint32_t cc = (uint32_t) ch;
156
157 /* Determine how many continuation bytes are needed */
158
159 unsigned int b0_bits; /* Data bits in first byte */
160 unsigned int cbytes; /* Number of continuation bytes */
161
162 if ((cc & ~LO_MASK_32(7)) == 0) {
163 b0_bits = 7;
164 cbytes = 0;
165 } else if ((cc & ~LO_MASK_32(11)) == 0) {
166 b0_bits = 5;
167 cbytes = 1;
168 } else if ((cc & ~LO_MASK_32(16)) == 0) {
169 b0_bits = 4;
170 cbytes = 2;
171 } else if ((cc & ~LO_MASK_32(21)) == 0) {
172 b0_bits = 3;
173 cbytes = 3;
174 } else {
175 /* Codes longer than 21 bits are not supported */
176 return EINVAL;
177 }
178
179 /* Check for available space in buffer */
180 if (*offset + cbytes >= size)
181 return EOVERFLOW;
182
183 /* Encode continuation bytes */
184 unsigned int i;
185 for (i = cbytes; i > 0; i--) {
186 str[*offset + i] = 0x80 | (cc & LO_MASK_32(CONT_BITS));
187 cc = cc >> CONT_BITS;
188 }
189
190 /* Encode first byte */
191 str[*offset] = (cc & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1);
192
193 /* Advance offset */
194 *offset += cbytes + 1;
195
196 return EOK;
197}
198
[f2b8cdc]199/** Get size of string.
200 *
201 * Get the number of bytes which are used by the string @a str (excluding the
202 * NULL-terminator).
203 *
204 * @param str String to consider.
205 *
206 * @return Number of bytes used by the string
207 *
208 */
209size_t str_size(const char *str)
210{
211 size_t size = 0;
212
213 while (*str++ != 0)
214 size++;
215
216 return size;
217}
218
219/** Get size of wide string.
220 *
221 * Get the number of bytes which are used by the wide string @a str (excluding the
222 * NULL-terminator).
223 *
224 * @param str Wide string to consider.
225 *
226 * @return Number of bytes used by the wide string
227 *
228 */
229size_t wstr_size(const wchar_t *str)
230{
231 return (wstr_length(str) * sizeof(wchar_t));
232}
233
234/** Get size of string with length limit.
235 *
236 * Get the number of bytes which are used by up to @a max_len first
237 * characters in the string @a str. If @a max_len is greater than
238 * the length of @a str, the entire string is measured (excluding the
239 * NULL-terminator).
240 *
241 * @param str String to consider.
242 * @param max_len Maximum number of characters to measure.
243 *
244 * @return Number of bytes used by the characters.
245 *
246 */
[d4a3ee5]247size_t str_lsize(const char *str, size_t max_len)
[f2b8cdc]248{
[d4a3ee5]249 size_t len = 0;
[f2b8cdc]250 size_t offset = 0;
251
252 while (len < max_len) {
253 if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
254 break;
255
256 len++;
257 }
258
259 return offset;
260}
261
262/** Get size of wide string with length limit.
263 *
264 * Get the number of bytes which are used by up to @a max_len first
265 * wide characters in the wide string @a str. If @a max_len is greater than
266 * the length of @a str, the entire wide string is measured (excluding the
267 * NULL-terminator).
268 *
269 * @param str Wide string to consider.
270 * @param max_len Maximum number of wide characters to measure.
271 *
272 * @return Number of bytes used by the wide characters.
273 *
274 */
[d4a3ee5]275size_t wstr_lsize(const wchar_t *str, size_t max_len)
[f2b8cdc]276{
277 return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
278}
279
280/** Get number of characters in a string.
281 *
282 * @param str NULL-terminated string.
283 *
284 * @return Number of characters in string.
285 *
286 */
[d4a3ee5]287size_t str_length(const char *str)
[f2b8cdc]288{
[d4a3ee5]289 size_t len = 0;
[f2b8cdc]290 size_t offset = 0;
291
292 while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
293 len++;
294
295 return len;
296}
297
298/** Get number of characters in a wide string.
299 *
300 * @param str NULL-terminated wide string.
301 *
302 * @return Number of characters in @a str.
303 *
304 */
[d4a3ee5]305size_t wstr_length(const wchar_t *wstr)
[f2b8cdc]306{
[d4a3ee5]307 size_t len = 0;
[f2b8cdc]308
309 while (*wstr++ != 0)
310 len++;
311
312 return len;
313}
314
315/** Get number of characters in a string with size limit.
316 *
317 * @param str NULL-terminated string.
318 * @param size Maximum number of bytes to consider.
319 *
320 * @return Number of characters in string.
321 *
322 */
[d4a3ee5]323size_t str_nlength(const char *str, size_t size)
[f2b8cdc]324{
[d4a3ee5]325 size_t len = 0;
[f2b8cdc]326 size_t offset = 0;
327
328 while (str_decode(str, &offset, size) != 0)
329 len++;
330
331 return len;
332}
333
334/** Get number of characters in a string with size limit.
335 *
336 * @param str NULL-terminated string.
337 * @param size Maximum number of bytes to consider.
338 *
339 * @return Number of characters in string.
340 *
341 */
[d4a3ee5]342size_t wstr_nlength(const wchar_t *str, size_t size)
[f2b8cdc]343{
[d4a3ee5]344 size_t len = 0;
345 size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
346 size_t offset = 0;
[f2b8cdc]347
348 while ((offset < limit) && (*str++ != 0)) {
349 len++;
350 offset += sizeof(wchar_t);
351 }
352
353 return len;
354}
355
356/** Check whether character is plain ASCII.
357 *
358 * @return True if character is plain ASCII.
359 *
360 */
361bool ascii_check(wchar_t ch)
362{
363 if ((ch >= 0) && (ch <= 127))
364 return true;
365
366 return false;
367}
368
[171f9a1]369/** Check whether character is valid
370 *
371 * @return True if character is a valid Unicode code point.
372 *
373 */
[f2b8cdc]374bool chr_check(wchar_t ch)
[171f9a1]375{
376 if ((ch >= 0) && (ch <= 1114111))
377 return true;
378
379 return false;
380}
[936351c1]381
[f2b8cdc]382/** Compare two NULL terminated strings.
383 *
384 * Do a char-by-char comparison of two NULL-terminated strings.
385 * The strings are considered equal iff they consist of the same
386 * characters on the minimum of their lengths.
387 *
388 * @param s1 First string to compare.
389 * @param s2 Second string to compare.
390 *
391 * @return 0 if the strings are equal, -1 if first is smaller,
392 * 1 if second smaller.
393 *
394 */
395int str_cmp(const char *s1, const char *s2)
396{
397 wchar_t c1 = 0;
398 wchar_t c2 = 0;
399
400 size_t off1 = 0;
401 size_t off2 = 0;
402
403 while (true) {
404 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
405 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
406
407 if (c1 < c2)
408 return -1;
409
410 if (c1 > c2)
411 return 1;
412
413 if (c1 == 0 || c2 == 0)
414 break;
415 }
416
417 return 0;
418}
419
420/** Compare two NULL terminated strings with length limit.
421 *
422 * Do a char-by-char comparison of two NULL-terminated strings.
423 * The strings are considered equal iff they consist of the same
424 * characters on the minimum of their lengths and the length limit.
425 *
426 * @param s1 First string to compare.
427 * @param s2 Second string to compare.
428 * @param max_len Maximum number of characters to consider.
429 *
430 * @return 0 if the strings are equal, -1 if first is smaller,
431 * 1 if second smaller.
432 *
433 */
[d4a3ee5]434int str_lcmp(const char *s1, const char *s2, size_t max_len)
[f2b8cdc]435{
436 wchar_t c1 = 0;
437 wchar_t c2 = 0;
438
439 size_t off1 = 0;
440 size_t off2 = 0;
441
[d4a3ee5]442 size_t len = 0;
[f2b8cdc]443
444 while (true) {
445 if (len >= max_len)
446 break;
447
448 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
449 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
450
451 if (c1 < c2)
452 return -1;
453
454 if (c1 > c2)
455 return 1;
456
457 if (c1 == 0 || c2 == 0)
458 break;
459
460 ++len;
461 }
462
463 return 0;
464
465}
466
[6eb2e96]467/** Copy string.
[f2b8cdc]468 *
[6eb2e96]469 * Copy source string @a src to destination buffer @a dest.
470 * No more than @a size bytes are written. If the size of the output buffer
471 * is at least one byte, the output string will always be well-formed, i.e.
472 * null-terminated and containing only complete characters.
[f2b8cdc]473 *
[abf09311]474 * @param dest Destination buffer.
[6700ee2]475 * @param count Size of the destination buffer (must be > 0).
[6eb2e96]476 * @param src Source string.
[f2b8cdc]477 */
[6eb2e96]478void str_cpy(char *dest, size_t size, const char *src)
[f2b8cdc]479{
[6700ee2]480 /* There must be space for a null terminator in the buffer. */
481 assert(size > 0);
[f2b8cdc]482
[abf09311]483 size_t src_off = 0;
484 size_t dest_off = 0;
485
486 wchar_t ch;
[6eb2e96]487 while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
488 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
489 break;
490 }
[abf09311]491
[6eb2e96]492 dest[dest_off] = '\0';
493}
494
495/** Copy size-limited substring.
496 *
[6700ee2]497 * Copy prefix of string @a src of max. size @a size to destination buffer
498 * @a dest. No more than @a size bytes are written. The output string will
499 * always be well-formed, i.e. null-terminated and containing only complete
500 * characters.
[6eb2e96]501 *
502 * No more than @a n bytes are read from the input string, so it does not
503 * have to be null-terminated.
504 *
[abf09311]505 * @param dest Destination buffer.
[6700ee2]506 * @param count Size of the destination buffer (must be > 0).
[6eb2e96]507 * @param src Source string.
[abf09311]508 * @param n Maximum number of bytes to read from @a src.
[6eb2e96]509 */
510void str_ncpy(char *dest, size_t size, const char *src, size_t n)
511{
[6700ee2]512 /* There must be space for a null terminator in the buffer. */
513 assert(size > 0);
[f2b8cdc]514
[abf09311]515 size_t src_off = 0;
516 size_t dest_off = 0;
517
518 wchar_t ch;
[6eb2e96]519 while ((ch = str_decode(src, &src_off, n)) != 0) {
520 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
[f2b8cdc]521 break;
522 }
[abf09311]523
[6eb2e96]524 dest[dest_off] = '\0';
[f2b8cdc]525}
526
[4482bc7]527/** Append one string to another.
528 *
529 * Append source string @a src to string in destination buffer @a dest.
530 * Size of the destination buffer is @a dest. If the size of the output buffer
531 * is at least one byte, the output string will always be well-formed, i.e.
532 * null-terminated and containing only complete characters.
533 *
[0f06dbc]534 * @param dest Destination buffer.
[4482bc7]535 * @param count Size of the destination buffer.
536 * @param src Source string.
537 */
538void str_append(char *dest, size_t size, const char *src)
539{
540 size_t dstr_size;
541
542 dstr_size = str_size(dest);
[3815efb]543 if (dstr_size >= size)
[a8bc7f8]544 return;
[3815efb]545
[4482bc7]546 str_cpy(dest + dstr_size, size - dstr_size, src);
547}
548
[dcb74c0a]549/** Convert space-padded ASCII to string.
550 *
551 * Common legacy text encoding in hardware is 7-bit ASCII fitted into
552 * a fixed-with byte buffer (bit 7 always zero), right-padded with spaces
553 * (ASCII 0x20). Convert space-padded ascii to string representation.
554 *
555 * If the text does not fit into the destination buffer, the function converts
556 * as many characters as possible and returns EOVERFLOW.
557 *
558 * If the text contains non-ASCII bytes (with bit 7 set), the whole string is
559 * converted anyway and invalid characters are replaced with question marks
560 * (U_SPECIAL) and the function returns EIO.
561 *
562 * Regardless of return value upon return @a dest will always be well-formed.
563 *
564 * @param dest Destination buffer
565 * @param size Size of destination buffer
566 * @param src Space-padded ASCII.
567 * @param n Size of the source buffer in bytes.
568 *
569 * @return EOK on success, EOVERFLOW if the text does not fit
570 * destination buffer, EIO if the text contains
571 * non-ASCII bytes.
572 */
573int spascii_to_str(char *dest, size_t size, const uint8_t *src, size_t n)
574{
575 size_t sidx;
576 size_t didx;
577 size_t dlast;
578 uint8_t byte;
579 int rc;
580 int result;
581
582 /* There must be space for a null terminator in the buffer. */
583 assert(size > 0);
584 result = EOK;
585
586 didx = 0;
587 dlast = 0;
588 for (sidx = 0; sidx < n; ++sidx) {
589 byte = src[sidx];
590 if (!ascii_check(byte)) {
591 byte = U_SPECIAL;
592 result = EIO;
593 }
594
595 rc = chr_encode(byte, dest, &didx, size - 1);
596 if (rc != EOK) {
597 assert(rc == EOVERFLOW);
598 dest[didx] = '\0';
599 return rc;
600 }
601
602 /* Remember dest index after last non-empty character */
603 if (byte != 0x20)
604 dlast = didx;
605 }
606
607 /* Terminate string after last non-empty character */
608 dest[dlast] = '\0';
609 return result;
610}
611
[0f06dbc]612/** Convert wide string to string.
[f2b8cdc]613 *
[0f06dbc]614 * Convert wide string @a src to string. The output is written to the buffer
615 * specified by @a dest and @a size. @a size must be non-zero and the string
616 * written will always be well-formed.
[f2b8cdc]617 *
[0f06dbc]618 * @param dest Destination buffer.
619 * @param size Size of the destination buffer.
620 * @param src Source wide string.
[f2b8cdc]621 */
[0f06dbc]622void wstr_to_str(char *dest, size_t size, const wchar_t *src)
[f2b8cdc]623{
624 wchar_t ch;
[0f06dbc]625 size_t src_idx;
626 size_t dest_off;
627
628 /* There must be space for a null terminator in the buffer. */
629 assert(size > 0);
[f2b8cdc]630
[0f06dbc]631 src_idx = 0;
632 dest_off = 0;
633
[f2b8cdc]634 while ((ch = src[src_idx++]) != 0) {
[0f06dbc]635 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
[f2b8cdc]636 break;
637 }
[0f06dbc]638
639 dest[dest_off] = '\0';
[f2b8cdc]640}
641
[b67c7d64]642/** Convert wide string to new string.
643 *
644 * Convert wide string @a src to string. Space for the new string is allocated
645 * on the heap.
646 *
647 * @param src Source wide string.
648 * @return New string.
649 */
650char *wstr_to_astr(const wchar_t *src)
651{
652 char dbuf[STR_BOUNDS(1)];
653 char *str;
654 wchar_t ch;
655
656 size_t src_idx;
657 size_t dest_off;
658 size_t dest_size;
659
660 /* Compute size of encoded string. */
661
662 src_idx = 0;
663 dest_size = 0;
664
665 while ((ch = src[src_idx++]) != 0) {
666 dest_off = 0;
667 if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
668 break;
669 dest_size += dest_off;
670 }
671
672 str = malloc(dest_size + 1);
673 if (str == NULL)
674 return NULL;
675
676 /* Encode string. */
677
678 src_idx = 0;
679 dest_off = 0;
680
681 while ((ch = src[src_idx++]) != 0) {
682 if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
683 break;
684 }
685
686 str[dest_size] = '\0';
687 return str;
688}
689
690
[da2bd08]691/** Convert string to wide string.
692 *
693 * Convert string @a src to wide string. The output is written to the
[0f06dbc]694 * buffer specified by @a dest and @a dlen. @a dlen must be non-zero
695 * and the wide string written will always be null-terminated.
[da2bd08]696 *
697 * @param dest Destination buffer.
698 * @param dlen Length of destination buffer (number of wchars).
699 * @param src Source string.
700 */
701void str_to_wstr(wchar_t *dest, size_t dlen, const char *src)
702{
703 size_t offset;
704 size_t di;
705 wchar_t c;
706
707 assert(dlen > 0);
708
709 offset = 0;
710 di = 0;
711
712 do {
713 if (di >= dlen - 1)
714 break;
715
716 c = str_decode(src, &offset, STR_NO_LIMIT);
717 dest[di++] = c;
718 } while (c != '\0');
719
720 dest[dlen - 1] = '\0';
721}
722
[22cf42d9]723/** Convert string to wide string.
724 *
725 * Convert string @a src to wide string. A new wide NULL-terminated
726 * string will be allocated on the heap.
727 *
728 * @param src Source string.
729 */
730wchar_t *str_to_awstr(const char *str)
731{
732 size_t len = str_length(str);
733 wchar_t *wstr = calloc(len+1, sizeof(wchar_t));
734 if (wstr == NULL) {
735 return NULL;
736 }
737 str_to_wstr(wstr, len+1, str);
738 return wstr;
739}
740
[f2b8cdc]741/** Find first occurence of character in string.
742 *
743 * @param str String to search.
744 * @param ch Character to look for.
745 *
746 * @return Pointer to character in @a str or NULL if not found.
747 */
[dd2cfa7]748char *str_chr(const char *str, wchar_t ch)
[f2b8cdc]749{
750 wchar_t acc;
751 size_t off = 0;
[f2d2c7ba]752 size_t last = 0;
[f2b8cdc]753
754 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
755 if (acc == ch)
[dd2cfa7]756 return (char *) (str + last);
[f2d2c7ba]757 last = off;
[f2b8cdc]758 }
759
760 return NULL;
761}
762
[7afb4a5]763/** Find last occurence of character in string.
764 *
765 * @param str String to search.
766 * @param ch Character to look for.
767 *
768 * @return Pointer to character in @a str or NULL if not found.
769 */
[dd2cfa7]770char *str_rchr(const char *str, wchar_t ch)
[7afb4a5]771{
772 wchar_t acc;
773 size_t off = 0;
[f2d2c7ba]774 size_t last = 0;
[d4a3ee5]775 const char *res = NULL;
[f2d2c7ba]776
[7afb4a5]777 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
778 if (acc == ch)
[f2d2c7ba]779 res = (str + last);
780 last = off;
[7afb4a5]781 }
[f2d2c7ba]782
[dd2cfa7]783 return (char *) res;
[7afb4a5]784}
785
[f2b8cdc]786/** Insert a wide character into a wide string.
787 *
788 * Insert a wide character into a wide string at position
789 * @a pos. The characters after the position are shifted.
790 *
791 * @param str String to insert to.
792 * @param ch Character to insert to.
793 * @param pos Character index where to insert.
794 @ @param max_pos Characters in the buffer.
795 *
796 * @return True if the insertion was sucessful, false if the position
797 * is out of bounds.
798 *
799 */
[d4a3ee5]800bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
[f2b8cdc]801{
[d4a3ee5]802 size_t len = wstr_length(str);
[f2b8cdc]803
804 if ((pos > len) || (pos + 1 > max_pos))
805 return false;
806
[d4a3ee5]807 size_t i;
[f2b8cdc]808 for (i = len; i + 1 > pos; i--)
809 str[i + 1] = str[i];
810
811 str[pos] = ch;
812
813 return true;
814}
815
816/** Remove a wide character from a wide string.
817 *
818 * Remove a wide character from a wide string at position
819 * @a pos. The characters after the position are shifted.
820 *
821 * @param str String to remove from.
822 * @param pos Character index to remove.
823 *
824 * @return True if the removal was sucessful, false if the position
825 * is out of bounds.
826 *
827 */
[d4a3ee5]828bool wstr_remove(wchar_t *str, size_t pos)
[f2b8cdc]829{
[d4a3ee5]830 size_t len = wstr_length(str);
[f2b8cdc]831
832 if (pos >= len)
833 return false;
834
[d4a3ee5]835 size_t i;
[f2b8cdc]836 for (i = pos + 1; i <= len; i++)
837 str[i - 1] = str[i];
838
839 return true;
840}
841
[2dd7288]842int stricmp(const char *a, const char *b)
843{
844 int c = 0;
845
846 while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
847 c++;
848
849 return (tolower(a[c]) - tolower(b[c]));
850}
851
[672a24d]852/** Convert string to a number.
853 * Core of strtol and strtoul functions.
[838e14e2]854 *
855 * @param nptr Pointer to string.
856 * @param endptr If not NULL, function stores here pointer to the first
857 * invalid character.
858 * @param base Zero or number between 2 and 36 inclusive.
859 * @param sgn It's set to 1 if minus found.
860 * @return Result of conversion.
[672a24d]861 */
[838e14e2]862static unsigned long
863_strtoul(const char *nptr, char **endptr, int base, char *sgn)
[672a24d]864{
865 unsigned char c;
866 unsigned long result = 0;
867 unsigned long a, b;
868 const char *str = nptr;
869 const char *tmpptr;
870
871 while (isspace(*str))
872 str++;
873
874 if (*str == '-') {
875 *sgn = 1;
876 ++str;
877 } else if (*str == '+')
878 ++str;
879
880 if (base) {
881 if ((base == 1) || (base > 36)) {
882 /* FIXME: set errno to EINVAL */
883 return 0;
884 }
[838e14e2]885 if ((base == 16) && (*str == '0') && ((str[1] == 'x') ||
886 (str[1] == 'X'))) {
[672a24d]887 str += 2;
888 }
889 } else {
890 base = 10;
891
892 if (*str == '0') {
893 base = 8;
894 if ((str[1] == 'X') || (str[1] == 'x')) {
895 base = 16;
896 str += 2;
897 }
898 }
899 }
900
901 tmpptr = str;
902
903 while (*str) {
904 c = *str;
[838e14e2]905 c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
906 (c <= '9' ? c - '0' : 0xff)));
[672a24d]907 if (c > base) {
908 break;
909 }
910
911 a = (result & 0xff) * base + c;
912 b = (result >> 8) * base + (a >> 8);
913
914 if (b > (ULONG_MAX >> 8)) {
915 /* overflow */
916 /* FIXME: errno = ERANGE*/
917 return ULONG_MAX;
918 }
919
920 result = (b << 8) + (a & 0xff);
921 ++str;
922 }
923
924 if (str == tmpptr) {
[838e14e2]925 /*
926 * No number was found => first invalid character is the first
927 * character of the string.
928 */
[672a24d]929 /* FIXME: set errno to EINVAL */
930 str = nptr;
931 result = 0;
932 }
933
934 if (endptr)
[a46da63]935 *endptr = (char *) str;
[672a24d]936
937 if (nptr == str) {
938 /*FIXME: errno = EINVAL*/
939 return 0;
940 }
941
942 return result;
943}
944
945/** Convert initial part of string to long int according to given base.
[838e14e2]946 * The number may begin with an arbitrary number of whitespaces followed by
947 * optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
948 * inserted and the number will be taken as hexadecimal one. If the base is 0
949 * and the number begin with a zero, number will be taken as octal one (as with
950 * base 8). Otherwise the base 0 is taken as decimal.
951 *
952 * @param nptr Pointer to string.
953 * @param endptr If not NULL, function stores here pointer to the first
954 * invalid character.
955 * @param base Zero or number between 2 and 36 inclusive.
956 * @return Result of conversion.
[672a24d]957 */
958long int strtol(const char *nptr, char **endptr, int base)
959{
960 char sgn = 0;
961 unsigned long number = 0;
962
963 number = _strtoul(nptr, endptr, base, &sgn);
964
965 if (number > LONG_MAX) {
[a46da63]966 if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
[672a24d]967 /* FIXME: set 0 to errno */
968 return number;
969 }
970 /* FIXME: set ERANGE to errno */
[a46da63]971 return (sgn ? LONG_MIN : LONG_MAX);
[672a24d]972 }
973
[a46da63]974 return (sgn ? -number : number);
[672a24d]975}
976
[abf09311]977/** Duplicate string.
978 *
979 * Allocate a new string and copy characters from the source
980 * string into it. The duplicate string is allocated via sleeping
981 * malloc(), thus this function can sleep in no memory conditions.
982 *
983 * The allocation cannot fail and the return value is always
984 * a valid pointer. The duplicate string is always a well-formed
985 * null-terminated UTF-8 string, but it can differ from the source
986 * string on the byte level.
987 *
988 * @param src Source string.
989 *
990 * @return Duplicate string.
991 *
992 */
[fc6dd18]993char *str_dup(const char *src)
994{
[abf09311]995 size_t size = str_size(src) + 1;
996 char *dest = (char *) malloc(size);
[fc6dd18]997 if (dest == NULL)
998 return (char *) NULL;
999
[abf09311]1000 str_cpy(dest, size, src);
1001 return dest;
[fc6dd18]1002}
1003
[abf09311]1004/** Duplicate string with size limit.
1005 *
1006 * Allocate a new string and copy up to @max_size bytes from the source
1007 * string into it. The duplicate string is allocated via sleeping
1008 * malloc(), thus this function can sleep in no memory conditions.
1009 * No more than @max_size + 1 bytes is allocated, but if the size
1010 * occupied by the source string is smaller than @max_size + 1,
1011 * less is allocated.
1012 *
1013 * The allocation cannot fail and the return value is always
1014 * a valid pointer. The duplicate string is always a well-formed
1015 * null-terminated UTF-8 string, but it can differ from the source
1016 * string on the byte level.
1017 *
1018 * @param src Source string.
1019 * @param n Maximum number of bytes to duplicate.
1020 *
1021 * @return Duplicate string.
1022 *
1023 */
1024char *str_ndup(const char *src, size_t n)
[fc6dd18]1025{
1026 size_t size = str_size(src);
[abf09311]1027 if (size > n)
1028 size = n;
[fc6dd18]1029
1030 char *dest = (char *) malloc(size + 1);
1031 if (dest == NULL)
1032 return (char *) NULL;
1033
[abf09311]1034 str_ncpy(dest, size + 1, src, size);
[fc6dd18]1035 return dest;
1036}
1037
[672a24d]1038
1039/** Convert initial part of string to unsigned long according to given base.
[838e14e2]1040 * The number may begin with an arbitrary number of whitespaces followed by
1041 * optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
1042 * inserted and the number will be taken as hexadecimal one. If the base is 0
1043 * and the number begin with a zero, number will be taken as octal one (as with
1044 * base 8). Otherwise the base 0 is taken as decimal.
1045 *
1046 * @param nptr Pointer to string.
1047 * @param endptr If not NULL, function stores here pointer to the first
1048 * invalid character
1049 * @param base Zero or number between 2 and 36 inclusive.
1050 * @return Result of conversion.
[672a24d]1051 */
1052unsigned long strtoul(const char *nptr, char **endptr, int base)
1053{
1054 char sgn = 0;
1055 unsigned long number = 0;
1056
1057 number = _strtoul(nptr, endptr, base, &sgn);
1058
[a46da63]1059 return (sgn ? -number : number);
[672a24d]1060}
[c594489]1061
[576845ec]1062char *strtok(char *s, const char *delim)
[69df837f]1063{
[576845ec]1064 static char *next;
[69df837f]1065
[576845ec]1066 return strtok_r(s, delim, &next);
1067}
[69df837f]1068
[576845ec]1069char *strtok_r(char *s, const char *delim, char **next)
1070{
1071 char *start, *end;
[69df837f]1072
[576845ec]1073 if (s == NULL)
1074 s = *next;
[69df837f]1075
[576845ec]1076 /* Skip over leading delimiters. */
[7afb4a5]1077 while (*s && (str_chr(delim, *s) != NULL)) ++s;
[576845ec]1078 start = s;
[69df837f]1079
[576845ec]1080 /* Skip over token characters. */
[7afb4a5]1081 while (*s && (str_chr(delim, *s) == NULL)) ++s;
[576845ec]1082 end = s;
1083 *next = (*s ? s + 1 : s);
1084
1085 if (start == end) {
1086 return NULL; /* No more tokens. */
1087 }
[69df837f]1088
[576845ec]1089 /* Overwrite delimiter with NULL terminator. */
1090 *end = '\0';
1091 return start;
[69df837f]1092}
1093
[d47279b]1094/** Convert string to uint64_t (internal variant).
1095 *
1096 * @param nptr Pointer to string.
1097 * @param endptr Pointer to the first invalid character is stored here.
1098 * @param base Zero or number between 2 and 36 inclusive.
1099 * @param neg Indication of unary minus is stored here.
1100 * @apram result Result of the conversion.
1101 *
1102 * @return EOK if conversion was successful.
1103 *
1104 */
1105static int str_uint(const char *nptr, char **endptr, unsigned int base,
1106 bool *neg, uint64_t *result)
1107{
1108 assert(endptr != NULL);
1109 assert(neg != NULL);
1110 assert(result != NULL);
1111
1112 *neg = false;
1113 const char *str = nptr;
1114
1115 /* Ignore leading whitespace */
1116 while (isspace(*str))
1117 str++;
1118
1119 if (*str == '-') {
1120 *neg = true;
1121 str++;
1122 } else if (*str == '+')
1123 str++;
1124
1125 if (base == 0) {
1126 /* Decode base if not specified */
1127 base = 10;
1128
1129 if (*str == '0') {
1130 base = 8;
1131 str++;
1132
1133 switch (*str) {
1134 case 'b':
1135 case 'B':
1136 base = 2;
1137 str++;
1138 break;
1139 case 'o':
1140 case 'O':
1141 base = 8;
1142 str++;
1143 break;
1144 case 'd':
1145 case 'D':
1146 case 't':
1147 case 'T':
1148 base = 10;
1149 str++;
1150 break;
1151 case 'x':
1152 case 'X':
1153 base = 16;
1154 str++;
1155 break;
1156 default:
1157 str--;
1158 }
1159 }
1160 } else {
1161 /* Check base range */
1162 if ((base < 2) || (base > 36)) {
1163 *endptr = (char *) str;
1164 return EINVAL;
1165 }
1166 }
1167
1168 *result = 0;
1169 const char *startstr = str;
1170
1171 while (*str != 0) {
1172 unsigned int digit;
1173
1174 if ((*str >= 'a') && (*str <= 'z'))
1175 digit = *str - 'a' + 10;
1176 else if ((*str >= 'A') && (*str <= 'Z'))
1177 digit = *str - 'A' + 10;
1178 else if ((*str >= '0') && (*str <= '9'))
1179 digit = *str - '0';
1180 else
1181 break;
1182
1183 if (digit >= base)
1184 break;
1185
1186 uint64_t prev = *result;
1187 *result = (*result) * base + digit;
1188
1189 if (*result < prev) {
1190 /* Overflow */
1191 *endptr = (char *) str;
1192 return EOVERFLOW;
1193 }
1194
1195 str++;
1196 }
1197
1198 if (str == startstr) {
1199 /*
1200 * No digits were decoded => first invalid character is
1201 * the first character of the string.
1202 */
1203 str = nptr;
1204 }
1205
1206 *endptr = (char *) str;
1207
1208 if (str == nptr)
1209 return EINVAL;
1210
1211 return EOK;
1212}
1213
1214/** Convert string to uint64_t.
1215 *
1216 * @param nptr Pointer to string.
1217 * @param endptr If not NULL, pointer to the first invalid character
1218 * is stored here.
1219 * @param base Zero or number between 2 and 36 inclusive.
1220 * @param strict Do not allow any trailing characters.
1221 * @param result Result of the conversion.
1222 *
1223 * @return EOK if conversion was successful.
1224 *
1225 */
1226int str_uint64(const char *nptr, char **endptr, unsigned int base,
1227 bool strict, uint64_t *result)
1228{
1229 assert(result != NULL);
1230
1231 bool neg;
1232 char *lendptr;
1233 int ret = str_uint(nptr, &lendptr, base, &neg, result);
1234
1235 if (endptr != NULL)
1236 *endptr = (char *) lendptr;
1237
1238 if (ret != EOK)
1239 return ret;
1240
1241 /* Do not allow negative values */
1242 if (neg)
1243 return EINVAL;
1244
1245 /* Check whether we are at the end of
1246 the string in strict mode */
1247 if ((strict) && (*lendptr != 0))
1248 return EINVAL;
1249
1250 return EOK;
1251}
1252
1253/** Convert string to size_t.
1254 *
1255 * @param nptr Pointer to string.
1256 * @param endptr If not NULL, pointer to the first invalid character
1257 * is stored here.
1258 * @param base Zero or number between 2 and 36 inclusive.
1259 * @param strict Do not allow any trailing characters.
1260 * @param result Result of the conversion.
1261 *
1262 * @return EOK if conversion was successful.
1263 *
1264 */
1265int str_size_t(const char *nptr, char **endptr, unsigned int base,
1266 bool strict, size_t *result)
1267{
1268 assert(result != NULL);
1269
1270 bool neg;
1271 char *lendptr;
1272 uint64_t res;
1273 int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1274
1275 if (endptr != NULL)
1276 *endptr = (char *) lendptr;
1277
1278 if (ret != EOK)
1279 return ret;
1280
1281 /* Do not allow negative values */
1282 if (neg)
1283 return EINVAL;
1284
1285 /* Check whether we are at the end of
1286 the string in strict mode */
1287 if ((strict) && (*lendptr != 0))
1288 return EINVAL;
1289
1290 /* Check for overflow */
1291 size_t _res = (size_t) res;
1292 if (_res != res)
1293 return EOVERFLOW;
1294
1295 *result = _res;
1296
1297 return EOK;
1298}
1299
[e535eeb]1300void order_suffix(const uint64_t val, uint64_t *rv, char *suffix)
1301{
[933cadf]1302 if (val > UINT64_C(10000000000000000000)) {
1303 *rv = val / UINT64_C(1000000000000000000);
[e535eeb]1304 *suffix = 'Z';
[933cadf]1305 } else if (val > UINT64_C(1000000000000000000)) {
1306 *rv = val / UINT64_C(1000000000000000);
[e535eeb]1307 *suffix = 'E';
[933cadf]1308 } else if (val > UINT64_C(1000000000000000)) {
1309 *rv = val / UINT64_C(1000000000000);
[e535eeb]1310 *suffix = 'T';
[933cadf]1311 } else if (val > UINT64_C(1000000000000)) {
1312 *rv = val / UINT64_C(1000000000);
[e535eeb]1313 *suffix = 'G';
[933cadf]1314 } else if (val > UINT64_C(1000000000)) {
1315 *rv = val / UINT64_C(1000000);
[e535eeb]1316 *suffix = 'M';
[933cadf]1317 } else if (val > UINT64_C(1000000)) {
1318 *rv = val / UINT64_C(1000);
[e535eeb]1319 *suffix = 'k';
1320 } else {
1321 *rv = val;
1322 *suffix = ' ';
1323 }
1324}
1325
[933cadf]1326void bin_order_suffix(const uint64_t val, uint64_t *rv, const char **suffix,
1327 bool fixed)
1328{
1329 if (val > UINT64_C(1152921504606846976)) {
1330 *rv = val / UINT64_C(1125899906842624);
1331 *suffix = "EiB";
1332 } else if (val > UINT64_C(1125899906842624)) {
1333 *rv = val / UINT64_C(1099511627776);
1334 *suffix = "TiB";
1335 } else if (val > UINT64_C(1099511627776)) {
1336 *rv = val / UINT64_C(1073741824);
1337 *suffix = "GiB";
1338 } else if (val > UINT64_C(1073741824)) {
1339 *rv = val / UINT64_C(1048576);
1340 *suffix = "MiB";
1341 } else if (val > UINT64_C(1048576)) {
1342 *rv = val / UINT64_C(1024);
1343 *suffix = "KiB";
1344 } else {
1345 *rv = val;
1346 if (fixed)
1347 *suffix = "B ";
1348 else
1349 *suffix = "B";
1350 }
1351}
1352
[a46da63]1353/** @}
[b2951e2]1354 */
Note: See TracBrowser for help on using the repository browser.