source: mainline/kernel/generic/src/lib/str.c@ 314f4b59

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 314f4b59 was a35b458, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 7 years ago

style: Remove trailing whitespace on _all_ lines, including empty ones, for particular file types.

Command used: tools/srepl '\s\+$' '' -- *.c *.h *.py *.sh *.s *.S *.ag

Currently, whitespace on empty lines is very inconsistent.
There are two basic choices: Either remove the whitespace, or keep empty lines
indented to the level of surrounding code. The former is AFAICT more common,
and also much easier to do automatically.

Alternatively, we could write script for automatic indentation, and use that
instead. However, if such a script exists, it's possible to use the indented
style locally, by having the editor apply relevant conversions on load/save,
without affecting remote repository. IMO, it makes more sense to adopt
the simpler rule.

  • Property mode set to 100644
File size: 24.1 KB
RevLine 
[16da5f8e]1/*
2 * Copyright (c) 2001-2004 Jakub Jermar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
[2f57690]29/** @addtogroup generic
[16da5f8e]30 * @{
31 */
32
33/**
34 * @file
[82bb9c1]35 * @brief String functions.
36 *
37 * Strings and characters use the Universal Character Set (UCS). The standard
38 * strings, called just strings are encoded in UTF-8. Wide strings (encoded
39 * in UTF-32) are supported to a limited degree. A single character is
[b888d5f]40 * represented as wchar_t.@n
[82bb9c1]41 *
[b888d5f]42 * Overview of the terminology:@n
[82bb9c1]43 *
[b888d5f]44 * Term Meaning
45 * -------------------- ----------------------------------------------------
46 * byte 8 bits stored in uint8_t (unsigned 8 bit integer)
[82bb9c1]47 *
[b888d5f]48 * character UTF-32 encoded Unicode character, stored in wchar_t
49 * (signed 32 bit integer), code points 0 .. 1114111
50 * are valid
[82bb9c1]51 *
[b888d5f]52 * ASCII character 7 bit encoded ASCII character, stored in char
53 * (usually signed 8 bit integer), code points 0 .. 127
54 * are valid
55 *
56 * string UTF-8 encoded NULL-terminated Unicode string, char *
57 *
58 * wide string UTF-32 encoded NULL-terminated Unicode string,
59 * wchar_t *
60 *
61 * [wide] string size number of BYTES in a [wide] string (excluding
62 * the NULL-terminator), size_t
63 *
64 * [wide] string length number of CHARACTERS in a [wide] string (excluding
[98000fb]65 * the NULL-terminator), size_t
[b888d5f]66 *
67 * [wide] string width number of display cells on a monospace display taken
[98000fb]68 * by a [wide] string, size_t
[b888d5f]69 *
70 *
71 * Overview of string metrics:@n
72 *
73 * Metric Abbrev. Type Meaning
74 * ------ ------ ------ -------------------------------------------------
75 * size n size_t number of BYTES in a string (excluding the
76 * NULL-terminator)
77 *
[98000fb]78 * length l size_t number of CHARACTERS in a string (excluding the
[b888d5f]79 * null terminator)
80 *
[98000fb]81 * width w size_t number of display cells on a monospace display
[b888d5f]82 * taken by a string
83 *
84 *
85 * Function naming prefixes:@n
86 *
87 * chr_ operate on characters
88 * ascii_ operate on ASCII characters
89 * str_ operate on strings
90 * wstr_ operate on wide strings
91 *
92 * [w]str_[n|l|w] operate on a prefix limited by size, length
93 * or width
94 *
95 *
96 * A specific character inside a [wide] string can be referred to by:@n
97 *
98 * pointer (char *, wchar_t *)
99 * byte offset (size_t)
[98000fb]100 * character index (size_t)
[82bb9c1]101 *
[16da5f8e]102 */
103
[19f857a]104#include <str.h>
[16da5f8e]105#include <print.h>
106#include <cpu.h>
107#include <arch/asm.h>
108#include <arch.h>
[d09f84e6]109#include <errno.h>
[b888d5f]110#include <align.h>
[63e27ef]111#include <assert.h>
[30a5470]112#include <macros.h>
[1066041]113#include <mm/slab.h>
[16da5f8e]114
[8e893ae]115/** Check the condition if wchar_t is signed */
[002fd5f]116#ifdef __WCHAR_UNSIGNED__
[8e893ae]117 #define WCHAR_SIGNED_CHECK(cond) (true)
118#else
119 #define WCHAR_SIGNED_CHECK(cond) (cond)
120#endif
121
[b888d5f]122/** Byte mask consisting of lowest @n bits (out of 8) */
123#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
[0dd1d444]124
[b888d5f]125/** Byte mask consisting of lowest @n bits (out of 32) */
126#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
[32704cb]127
[b888d5f]128/** Byte mask consisting of highest @n bits (out of 8) */
129#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
[32704cb]130
[b888d5f]131/** Number of data bits in a UTF-8 continuation byte */
132#define CONT_BITS 6
[0dd1d444]133
[b888d5f]134/** Decode a single character from a string.
[21a639b7]135 *
[b888d5f]136 * Decode a single character from a string of size @a size. Decoding starts
[e1813cf]137 * at @a offset and this offset is moved to the beginning of the next
138 * character. In case of decoding error, offset generally advances at least
[b888d5f]139 * by one. However, offset is never moved beyond size.
[21a639b7]140 *
[b888d5f]141 * @param str String (not necessarily NULL-terminated).
142 * @param offset Byte offset in string where to start decoding.
143 * @param size Size of the string (in bytes).
144 *
[c8bf88d]145 * @return Value of decoded character, U_SPECIAL on decoding error or
[b888d5f]146 * NULL if attempt to decode beyond @a size.
[21a639b7]147 *
148 */
[b888d5f]149wchar_t str_decode(const char *str, size_t *offset, size_t size)
[21a639b7]150{
[b888d5f]151 if (*offset + 1 > size)
152 return 0;
[a35b458]153
[b888d5f]154 /* First byte read from string */
155 uint8_t b0 = (uint8_t) str[(*offset)++];
[a35b458]156
[b888d5f]157 /* Determine code length */
[a35b458]158
[b888d5f]159 unsigned int b0_bits; /* Data bits in first byte */
160 unsigned int cbytes; /* Number of continuation bytes */
[a35b458]161
[0dd1d444]162 if ((b0 & 0x80) == 0) {
163 /* 0xxxxxxx (Plain ASCII) */
164 b0_bits = 7;
165 cbytes = 0;
166 } else if ((b0 & 0xe0) == 0xc0) {
167 /* 110xxxxx 10xxxxxx */
168 b0_bits = 5;
169 cbytes = 1;
170 } else if ((b0 & 0xf0) == 0xe0) {
171 /* 1110xxxx 10xxxxxx 10xxxxxx */
172 b0_bits = 4;
173 cbytes = 2;
174 } else if ((b0 & 0xf8) == 0xf0) {
175 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
176 b0_bits = 3;
177 cbytes = 3;
178 } else {
[b888d5f]179 /* 10xxxxxx -- unexpected continuation byte */
[c8bf88d]180 return U_SPECIAL;
[74c8da2c]181 }
[a35b458]182
[b888d5f]183 if (*offset + cbytes > size)
[c8bf88d]184 return U_SPECIAL;
[a35b458]185
[b888d5f]186 wchar_t ch = b0 & LO_MASK_8(b0_bits);
[a35b458]187
[b888d5f]188 /* Decode continuation bytes */
[0dd1d444]189 while (cbytes > 0) {
[b888d5f]190 uint8_t b = (uint8_t) str[(*offset)++];
[a35b458]191
[b888d5f]192 /* Must be 10xxxxxx */
193 if ((b & 0xc0) != 0x80)
[c8bf88d]194 return U_SPECIAL;
[a35b458]195
[b888d5f]196 /* Shift data bits to ch */
[0dd1d444]197 ch = (ch << CONT_BITS) | (wchar_t) (b & LO_MASK_8(CONT_BITS));
[b888d5f]198 cbytes--;
[74c8da2c]199 }
[a35b458]200
[0dd1d444]201 return ch;
[74c8da2c]202}
203
[e1813cf]204/** Encode a single character to string representation.
[74c8da2c]205 *
[e1813cf]206 * Encode a single character to string representation (i.e. UTF-8) and store
207 * it into a buffer at @a offset. Encoding starts at @a offset and this offset
208 * is moved to the position where the next character can be written to.
[74c8da2c]209 *
[b888d5f]210 * @param ch Input character.
211 * @param str Output buffer.
212 * @param offset Byte offset where to start writing.
213 * @param size Size of the output buffer (in bytes).
[74c8da2c]214 *
[d09f84e6]215 * @return EOK if the character was encoded successfully, EOVERFLOW if there
[8e893ae]216 * was not enough space in the output buffer or EINVAL if the character
217 * code was invalid.
[74c8da2c]218 */
[b7fd2a0]219errno_t chr_encode(const wchar_t ch, char *str, size_t *offset, size_t size)
[74c8da2c]220{
[b888d5f]221 if (*offset >= size)
[d09f84e6]222 return EOVERFLOW;
[a35b458]223
[b888d5f]224 if (!chr_check(ch))
[d09f84e6]225 return EINVAL;
[a35b458]226
[b888d5f]227 /* Unsigned version of ch (bit operations should only be done
228 on unsigned types). */
229 uint32_t cc = (uint32_t) ch;
[a35b458]230
[b888d5f]231 /* Determine how many continuation bytes are needed */
[a35b458]232
[b888d5f]233 unsigned int b0_bits; /* Data bits in first byte */
234 unsigned int cbytes; /* Number of continuation bytes */
[a35b458]235
[32704cb]236 if ((cc & ~LO_MASK_32(7)) == 0) {
237 b0_bits = 7;
238 cbytes = 0;
239 } else if ((cc & ~LO_MASK_32(11)) == 0) {
240 b0_bits = 5;
241 cbytes = 1;
242 } else if ((cc & ~LO_MASK_32(16)) == 0) {
243 b0_bits = 4;
244 cbytes = 2;
245 } else if ((cc & ~LO_MASK_32(21)) == 0) {
246 b0_bits = 3;
247 cbytes = 3;
248 } else {
[b888d5f]249 /* Codes longer than 21 bits are not supported */
[d09f84e6]250 return EINVAL;
[74c8da2c]251 }
[a35b458]252
[b888d5f]253 /* Check for available space in buffer */
254 if (*offset + cbytes >= size)
[d09f84e6]255 return EOVERFLOW;
[a35b458]256
[b888d5f]257 /* Encode continuation bytes */
258 unsigned int i;
259 for (i = cbytes; i > 0; i--) {
[e1813cf]260 str[*offset + i] = 0x80 | (cc & LO_MASK_32(CONT_BITS));
[32704cb]261 cc = cc >> CONT_BITS;
[74c8da2c]262 }
[a35b458]263
[b888d5f]264 /* Encode first byte */
[e1813cf]265 str[*offset] = (cc & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1);
[a35b458]266
[b888d5f]267 /* Advance offset */
268 *offset += cbytes + 1;
[a35b458]269
[d09f84e6]270 return EOK;
[74c8da2c]271}
272
[b888d5f]273/** Get size of string.
274 *
275 * Get the number of bytes which are used by the string @a str (excluding the
276 * NULL-terminator).
277 *
278 * @param str String to consider.
279 *
280 * @return Number of bytes used by the string
[82bb9c1]281 *
282 */
[b888d5f]283size_t str_size(const char *str)
[82bb9c1]284{
[b888d5f]285 size_t size = 0;
[a35b458]286
[b888d5f]287 while (*str++ != 0)
288 size++;
[a35b458]289
[b888d5f]290 return size;
[82bb9c1]291}
292
[b888d5f]293/** Get size of wide string.
294 *
295 * Get the number of bytes which are used by the wide string @a str (excluding the
296 * NULL-terminator).
297 *
298 * @param str Wide string to consider.
299 *
300 * @return Number of bytes used by the wide string
301 *
302 */
303size_t wstr_size(const wchar_t *str)
304{
305 return (wstr_length(str) * sizeof(wchar_t));
306}
307
308/** Get size of string with length limit.
[74c8da2c]309 *
[f25b2819]310 * Get the number of bytes which are used by up to @a max_len first
311 * characters in the string @a str. If @a max_len is greater than
[b888d5f]312 * the length of @a str, the entire string is measured (excluding the
313 * NULL-terminator).
314 *
315 * @param str String to consider.
316 * @param max_len Maximum number of characters to measure.
[74c8da2c]317 *
[b888d5f]318 * @return Number of bytes used by the characters.
[74c8da2c]319 *
320 */
[98000fb]321size_t str_lsize(const char *str, size_t max_len)
[74c8da2c]322{
[98000fb]323 size_t len = 0;
[b888d5f]324 size_t offset = 0;
[a35b458]325
[b888d5f]326 while (len < max_len) {
327 if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
[b54d2f1]328 break;
[a35b458]329
[f25b2819]330 len++;
[21a639b7]331 }
[a35b458]332
[b888d5f]333 return offset;
[74c8da2c]334}
335
[b888d5f]336/** Get size of wide string with length limit.
[82bb9c1]337 *
[b888d5f]338 * Get the number of bytes which are used by up to @a max_len first
339 * wide characters in the wide string @a str. If @a max_len is greater than
340 * the length of @a str, the entire wide string is measured (excluding the
341 * NULL-terminator).
342 *
343 * @param str Wide string to consider.
344 * @param max_len Maximum number of wide characters to measure.
[82bb9c1]345 *
[b888d5f]346 * @return Number of bytes used by the wide characters.
[82bb9c1]347 *
348 */
[98000fb]349size_t wstr_lsize(const wchar_t *str, size_t max_len)
[82bb9c1]350{
[b888d5f]351 return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
[82bb9c1]352}
353
[b888d5f]354/** Get number of characters in a string.
[82bb9c1]355 *
[b888d5f]356 * @param str NULL-terminated string.
[82bb9c1]357 *
[b888d5f]358 * @return Number of characters in string.
[82bb9c1]359 *
360 */
[98000fb]361size_t str_length(const char *str)
[82bb9c1]362{
[98000fb]363 size_t len = 0;
[b888d5f]364 size_t offset = 0;
[a35b458]365
[b888d5f]366 while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
367 len++;
[a35b458]368
[b888d5f]369 return len;
[82bb9c1]370}
371
[b888d5f]372/** Get number of characters in a wide string.
[74c8da2c]373 *
[b888d5f]374 * @param str NULL-terminated wide string.
375 *
376 * @return Number of characters in @a str.
[74c8da2c]377 *
378 */
[98000fb]379size_t wstr_length(const wchar_t *wstr)
[74c8da2c]380{
[98000fb]381 size_t len = 0;
[a35b458]382
[b888d5f]383 while (*wstr++ != 0)
384 len++;
[a35b458]385
[b888d5f]386 return len;
[74c8da2c]387}
388
[b888d5f]389/** Get number of characters in a string with size limit.
390 *
391 * @param str NULL-terminated string.
392 * @param size Maximum number of bytes to consider.
393 *
394 * @return Number of characters in string.
[74c8da2c]395 *
396 */
[98000fb]397size_t str_nlength(const char *str, size_t size)
[74c8da2c]398{
[98000fb]399 size_t len = 0;
[b888d5f]400 size_t offset = 0;
[a35b458]401
[b888d5f]402 while (str_decode(str, &offset, size) != 0)
403 len++;
[a35b458]404
[b888d5f]405 return len;
[21a639b7]406}
407
[b888d5f]408/** Get number of characters in a string with size limit.
[2f57690]409 *
[b888d5f]410 * @param str NULL-terminated string.
411 * @param size Maximum number of bytes to consider.
[74c8da2c]412 *
[f25b2819]413 * @return Number of characters in string.
[b888d5f]414 *
[74c8da2c]415 */
[98000fb]416size_t wstr_nlength(const wchar_t *str, size_t size)
[74c8da2c]417{
[98000fb]418 size_t len = 0;
419 size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
420 size_t offset = 0;
[a35b458]421
[b888d5f]422 while ((offset < limit) && (*str++ != 0)) {
[f25b2819]423 len++;
[b888d5f]424 offset += sizeof(wchar_t);
[74c8da2c]425 }
[a35b458]426
[f25b2819]427 return len;
[74c8da2c]428}
429
[b888d5f]430/** Check whether character is plain ASCII.
431 *
432 * @return True if character is plain ASCII.
[74c8da2c]433 *
434 */
[f2b8cdc]435bool ascii_check(wchar_t ch)
[74c8da2c]436{
[8e893ae]437 if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 127))
[b888d5f]438 return true;
[a35b458]439
[b888d5f]440 return false;
441}
[f25b2819]442
[b888d5f]443/** Check whether character is valid
444 *
445 * @return True if character is a valid Unicode code point.
446 *
447 */
[f2b8cdc]448bool chr_check(wchar_t ch)
[b888d5f]449{
[8e893ae]450 if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 1114111))
[b888d5f]451 return true;
[a35b458]452
[b888d5f]453 return false;
[16da5f8e]454}
455
[b888d5f]456/** Compare two NULL terminated strings.
[16da5f8e]457 *
[b888d5f]458 * Do a char-by-char comparison of two NULL-terminated strings.
[4efeab5]459 * The strings are considered equal iff their length is equal
460 * and both strings consist of the same sequence of characters.
461 *
[1772e6d]462 * A string S1 is less than another string S2 if it has a character with
463 * lower value at the first character position where the strings differ.
464 * If the strings differ in length, the shorter one is treated as if
465 * padded by characters with a value of zero.
[16da5f8e]466 *
[b888d5f]467 * @param s1 First string to compare.
468 * @param s2 Second string to compare.
[16da5f8e]469 *
[1772e6d]470 * @return 0 if the strings are equal, -1 if the first is less than the second,
471 * 1 if the second is less than the first.
[16da5f8e]472 *
473 */
[b888d5f]474int str_cmp(const char *s1, const char *s2)
[16da5f8e]475{
[a7b1071]476 wchar_t c1 = 0;
477 wchar_t c2 = 0;
[a35b458]478
[b888d5f]479 size_t off1 = 0;
480 size_t off2 = 0;
[a7b1071]481
482 while (true) {
483 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
484 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
485
[b888d5f]486 if (c1 < c2)
[16da5f8e]487 return -1;
[a35b458]488
[b888d5f]489 if (c1 > c2)
[16da5f8e]490 return 1;
[a7b1071]491
492 if (c1 == 0 || c2 == 0)
[1b20da0]493 break;
[16da5f8e]494 }
[a7b1071]495
496 return 0;
[16da5f8e]497}
498
[b888d5f]499/** Compare two NULL terminated strings with length limit.
[16da5f8e]500 *
[b888d5f]501 * Do a char-by-char comparison of two NULL-terminated strings.
[4efeab5]502 * The strings are considered equal iff
503 * min(str_length(s1), max_len) == min(str_length(s2), max_len)
504 * and both strings consist of the same sequence of characters,
505 * up to max_len characters.
506 *
[1772e6d]507 * A string S1 is less than another string S2 if it has a character with
508 * lower value at the first character position where the strings differ.
509 * If the strings differ in length, the shorter one is treated as if
510 * padded by characters with a value of zero. Only the first max_len
511 * characters are considered.
[16da5f8e]512 *
[b888d5f]513 * @param s1 First string to compare.
514 * @param s2 Second string to compare.
515 * @param max_len Maximum number of characters to consider.
516 *
[1772e6d]517 * @return 0 if the strings are equal, -1 if the first is less than the second,
518 * 1 if the second is less than the first.
[16da5f8e]519 *
520 */
[98000fb]521int str_lcmp(const char *s1, const char *s2, size_t max_len)
[16da5f8e]522{
[b888d5f]523 wchar_t c1 = 0;
524 wchar_t c2 = 0;
[a35b458]525
[b888d5f]526 size_t off1 = 0;
527 size_t off2 = 0;
[a35b458]528
[98000fb]529 size_t len = 0;
[a7b1071]530
531 while (true) {
532 if (len >= max_len)
[b888d5f]533 break;
[a7b1071]534
535 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
536 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
537
[b888d5f]538 if (c1 < c2)
[16da5f8e]539 return -1;
[a7b1071]540
[b888d5f]541 if (c1 > c2)
[16da5f8e]542 return 1;
[a7b1071]543
544 if (c1 == 0 || c2 == 0)
545 break;
546
[1b20da0]547 ++len;
[16da5f8e]548 }
[a7b1071]549
550 return 0;
551
[16da5f8e]552}
553
[f4b1535]554/** Copy string.
[b888d5f]555 *
[f4b1535]556 * Copy source string @a src to destination buffer @a dest.
557 * No more than @a size bytes are written. If the size of the output buffer
558 * is at least one byte, the output string will always be well-formed, i.e.
559 * null-terminated and containing only complete characters.
[b888d5f]560 *
[abf09311]561 * @param dest Destination buffer.
[6700ee2]562 * @param count Size of the destination buffer (must be > 0).
[f4b1535]563 * @param src Source string.
[abf09311]564 *
[b888d5f]565 */
[f4b1535]566void str_cpy(char *dest, size_t size, const char *src)
[b888d5f]567{
[6700ee2]568 /* There must be space for a null terminator in the buffer. */
[63e27ef]569 assert(size > 0);
570 assert(src != NULL);
[a35b458]571
[abf09311]572 size_t src_off = 0;
573 size_t dest_off = 0;
[a35b458]574
[abf09311]575 wchar_t ch;
[f4b1535]576 while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
577 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
578 break;
579 }
[a35b458]580
[f4b1535]581 dest[dest_off] = '\0';
582}
583
584/** Copy size-limited substring.
585 *
[6700ee2]586 * Copy prefix of string @a src of max. size @a size to destination buffer
587 * @a dest. No more than @a size bytes are written. The output string will
588 * always be well-formed, i.e. null-terminated and containing only complete
589 * characters.
[f4b1535]590 *
591 * No more than @a n bytes are read from the input string, so it does not
592 * have to be null-terminated.
593 *
[abf09311]594 * @param dest Destination buffer.
[6700ee2]595 * @param count Size of the destination buffer (must be > 0).
[f4b1535]596 * @param src Source string.
[abf09311]597 * @param n Maximum number of bytes to read from @a src.
598 *
[f4b1535]599 */
600void str_ncpy(char *dest, size_t size, const char *src, size_t n)
601{
[6700ee2]602 /* There must be space for a null terminator in the buffer. */
[63e27ef]603 assert(size > 0);
[a35b458]604
[abf09311]605 size_t src_off = 0;
606 size_t dest_off = 0;
[a35b458]607
[abf09311]608 wchar_t ch;
[f4b1535]609 while ((ch = str_decode(src, &src_off, n)) != 0) {
610 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
[b888d5f]611 break;
612 }
[a35b458]613
[f4b1535]614 dest[dest_off] = '\0';
[b888d5f]615}
[16da5f8e]616
[abf09311]617/** Duplicate string.
618 *
619 * Allocate a new string and copy characters from the source
620 * string into it. The duplicate string is allocated via sleeping
621 * malloc(), thus this function can sleep in no memory conditions.
622 *
623 * The allocation cannot fail and the return value is always
624 * a valid pointer. The duplicate string is always a well-formed
625 * null-terminated UTF-8 string, but it can differ from the source
626 * string on the byte level.
627 *
628 * @param src Source string.
629 *
630 * @return Duplicate string.
631 *
632 */
633char *str_dup(const char *src)
634{
635 size_t size = str_size(src) + 1;
636 char *dest = malloc(size, 0);
[63e27ef]637 assert(dest);
[a35b458]638
[abf09311]639 str_cpy(dest, size, src);
640 return dest;
641}
642
643/** Duplicate string with size limit.
644 *
645 * Allocate a new string and copy up to @max_size bytes from the source
646 * string into it. The duplicate string is allocated via sleeping
647 * malloc(), thus this function can sleep in no memory conditions.
648 * No more than @max_size + 1 bytes is allocated, but if the size
649 * occupied by the source string is smaller than @max_size + 1,
650 * less is allocated.
651 *
652 * The allocation cannot fail and the return value is always
653 * a valid pointer. The duplicate string is always a well-formed
654 * null-terminated UTF-8 string, but it can differ from the source
655 * string on the byte level.
656 *
657 * @param src Source string.
658 * @param n Maximum number of bytes to duplicate.
659 *
660 * @return Duplicate string.
661 *
662 */
663char *str_ndup(const char *src, size_t n)
664{
665 size_t size = str_size(src);
666 if (size > n)
667 size = n;
[a35b458]668
[abf09311]669 char *dest = malloc(size + 1, 0);
[63e27ef]670 assert(dest);
[a35b458]671
[abf09311]672 str_ncpy(dest, size + 1, src, size);
673 return dest;
674}
675
[0f06dbc]676/** Convert wide string to string.
[b888d5f]677 *
[0f06dbc]678 * Convert wide string @a src to string. The output is written to the buffer
679 * specified by @a dest and @a size. @a size must be non-zero and the string
680 * written will always be well-formed.
[16da5f8e]681 *
[0f06dbc]682 * @param dest Destination buffer.
683 * @param size Size of the destination buffer.
684 * @param src Source wide string.
[16da5f8e]685 */
[0f06dbc]686void wstr_to_str(char *dest, size_t size, const wchar_t *src)
[16da5f8e]687{
[b888d5f]688 wchar_t ch;
[0f06dbc]689 size_t src_idx;
690 size_t dest_off;
691
692 /* There must be space for a null terminator in the buffer. */
[63e27ef]693 assert(size > 0);
[0f06dbc]694
695 src_idx = 0;
696 dest_off = 0;
[a35b458]697
[b888d5f]698 while ((ch = src[src_idx++]) != 0) {
[0f06dbc]699 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
[b888d5f]700 break;
[16da5f8e]701 }
[0f06dbc]702
703 dest[dest_off] = '\0';
[16da5f8e]704}
705
[20f1597]706/** Find first occurence of character in string.
707 *
[b888d5f]708 * @param str String to search.
709 * @param ch Character to look for.
710 *
711 * @return Pointer to character in @a str or NULL if not found.
[20f1597]712 *
713 */
[dd2cfa7]714char *str_chr(const char *str, wchar_t ch)
[20f1597]715{
[b888d5f]716 wchar_t acc;
717 size_t off = 0;
[f2d2c7ba]718 size_t last = 0;
[a35b458]719
[a7b1071]720 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
[b888d5f]721 if (acc == ch)
[dd2cfa7]722 return (char *) (str + last);
[f2d2c7ba]723 last = off;
[20f1597]724 }
[a35b458]725
[20f1597]726 return NULL;
727}
728
[b888d5f]729/** Insert a wide character into a wide string.
730 *
731 * Insert a wide character into a wide string at position
732 * @a pos. The characters after the position are shifted.
733 *
734 * @param str String to insert to.
735 * @param ch Character to insert to.
736 * @param pos Character index where to insert.
737 @ @param max_pos Characters in the buffer.
738 *
739 * @return True if the insertion was sucessful, false if the position
740 * is out of bounds.
741 *
742 */
[98000fb]743bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
[b888d5f]744{
[98000fb]745 size_t len = wstr_length(str);
[a35b458]746
[b888d5f]747 if ((pos > len) || (pos + 1 > max_pos))
748 return false;
[a35b458]749
[98000fb]750 size_t i;
[b888d5f]751 for (i = len; i + 1 > pos; i--)
752 str[i + 1] = str[i];
[a35b458]753
[b888d5f]754 str[pos] = ch;
[a35b458]755
[b888d5f]756 return true;
757}
758
759/** Remove a wide character from a wide string.
760 *
761 * Remove a wide character from a wide string at position
762 * @a pos. The characters after the position are shifted.
763 *
764 * @param str String to remove from.
765 * @param pos Character index to remove.
766 *
767 * @return True if the removal was sucessful, false if the position
768 * is out of bounds.
769 *
770 */
[98000fb]771bool wstr_remove(wchar_t *str, size_t pos)
[b888d5f]772{
[98000fb]773 size_t len = wstr_length(str);
[a35b458]774
[b888d5f]775 if (pos >= len)
776 return false;
[a35b458]777
[98000fb]778 size_t i;
[b888d5f]779 for (i = pos + 1; i <= len; i++)
780 str[i - 1] = str[i];
[a35b458]781
[b888d5f]782 return true;
783}
784
[30a5470]785/** Convert string to uint64_t (internal variant).
786 *
787 * @param nptr Pointer to string.
788 * @param endptr Pointer to the first invalid character is stored here.
789 * @param base Zero or number between 2 and 36 inclusive.
790 * @param neg Indication of unary minus is stored here.
791 * @apram result Result of the conversion.
792 *
793 * @return EOK if conversion was successful.
794 *
795 */
[b7fd2a0]796static errno_t str_uint(const char *nptr, char **endptr, unsigned int base,
[30a5470]797 bool *neg, uint64_t *result)
798{
[63e27ef]799 assert(endptr != NULL);
800 assert(neg != NULL);
801 assert(result != NULL);
[a35b458]802
[30a5470]803 *neg = false;
804 const char *str = nptr;
[a35b458]805
[30a5470]806 /* Ignore leading whitespace */
807 while (isspace(*str))
808 str++;
[a35b458]809
[30a5470]810 if (*str == '-') {
811 *neg = true;
812 str++;
813 } else if (*str == '+')
814 str++;
[a35b458]815
[30a5470]816 if (base == 0) {
817 /* Decode base if not specified */
818 base = 10;
[a35b458]819
[30a5470]820 if (*str == '0') {
821 base = 8;
822 str++;
[a35b458]823
[30a5470]824 switch (*str) {
825 case 'b':
826 case 'B':
827 base = 2;
828 str++;
829 break;
830 case 'o':
831 case 'O':
832 base = 8;
833 str++;
834 break;
835 case 'd':
836 case 'D':
837 case 't':
838 case 'T':
839 base = 10;
840 str++;
841 break;
842 case 'x':
843 case 'X':
844 base = 16;
845 str++;
846 break;
[4ce914d4]847 default:
848 str--;
[30a5470]849 }
850 }
851 } else {
852 /* Check base range */
853 if ((base < 2) || (base > 36)) {
854 *endptr = (char *) str;
855 return EINVAL;
856 }
857 }
[a35b458]858
[30a5470]859 *result = 0;
860 const char *startstr = str;
[a35b458]861
[30a5470]862 while (*str != 0) {
863 unsigned int digit;
[a35b458]864
[30a5470]865 if ((*str >= 'a') && (*str <= 'z'))
866 digit = *str - 'a' + 10;
867 else if ((*str >= 'A') && (*str <= 'Z'))
868 digit = *str - 'A' + 10;
869 else if ((*str >= '0') && (*str <= '9'))
870 digit = *str - '0';
871 else
872 break;
[a35b458]873
[30a5470]874 if (digit >= base)
875 break;
[a35b458]876
[30a5470]877 uint64_t prev = *result;
878 *result = (*result) * base + digit;
[a35b458]879
[30a5470]880 if (*result < prev) {
881 /* Overflow */
882 *endptr = (char *) str;
883 return EOVERFLOW;
884 }
[a35b458]885
[30a5470]886 str++;
887 }
[a35b458]888
[30a5470]889 if (str == startstr) {
890 /*
891 * No digits were decoded => first invalid character is
892 * the first character of the string.
893 */
894 str = nptr;
895 }
[a35b458]896
[30a5470]897 *endptr = (char *) str;
[a35b458]898
[30a5470]899 if (str == nptr)
900 return EINVAL;
[a35b458]901
[30a5470]902 return EOK;
903}
904
905/** Convert string to uint64_t.
906 *
907 * @param nptr Pointer to string.
908 * @param endptr If not NULL, pointer to the first invalid character
909 * is stored here.
910 * @param base Zero or number between 2 and 36 inclusive.
911 * @param strict Do not allow any trailing characters.
[4ce914d4]912 * @param result Result of the conversion.
[30a5470]913 *
914 * @return EOK if conversion was successful.
915 *
916 */
[b7fd2a0]917errno_t str_uint64_t(const char *nptr, char **endptr, unsigned int base,
[30a5470]918 bool strict, uint64_t *result)
919{
[63e27ef]920 assert(result != NULL);
[a35b458]921
[30a5470]922 bool neg;
923 char *lendptr;
[b7fd2a0]924 errno_t ret = str_uint(nptr, &lendptr, base, &neg, result);
[a35b458]925
[30a5470]926 if (endptr != NULL)
927 *endptr = (char *) lendptr;
[a35b458]928
[30a5470]929 if (ret != EOK)
930 return ret;
[a35b458]931
[30a5470]932 /* Do not allow negative values */
933 if (neg)
934 return EINVAL;
[a35b458]935
[30a5470]936 /* Check whether we are at the end of
937 the string in strict mode */
938 if ((strict) && (*lendptr != 0))
939 return EINVAL;
[a35b458]940
[30a5470]941 return EOK;
942}
943
[e535eeb]944void order_suffix(const uint64_t val, uint64_t *rv, char *suffix)
945{
[933cadf]946 if (val > UINT64_C(10000000000000000000)) {
947 *rv = val / UINT64_C(1000000000000000000);
[e535eeb]948 *suffix = 'Z';
[933cadf]949 } else if (val > UINT64_C(1000000000000000000)) {
950 *rv = val / UINT64_C(1000000000000000);
[e535eeb]951 *suffix = 'E';
[933cadf]952 } else if (val > UINT64_C(1000000000000000)) {
953 *rv = val / UINT64_C(1000000000000);
[e535eeb]954 *suffix = 'T';
[933cadf]955 } else if (val > UINT64_C(1000000000000)) {
956 *rv = val / UINT64_C(1000000000);
[e535eeb]957 *suffix = 'G';
[933cadf]958 } else if (val > UINT64_C(1000000000)) {
959 *rv = val / UINT64_C(1000000);
[e535eeb]960 *suffix = 'M';
[933cadf]961 } else if (val > UINT64_C(1000000)) {
962 *rv = val / UINT64_C(1000);
[e535eeb]963 *suffix = 'k';
964 } else {
965 *rv = val;
966 *suffix = ' ';
967 }
968}
969
[933cadf]970void bin_order_suffix(const uint64_t val, uint64_t *rv, const char **suffix,
971 bool fixed)
972{
973 if (val > UINT64_C(1152921504606846976)) {
974 *rv = val / UINT64_C(1125899906842624);
975 *suffix = "EiB";
976 } else if (val > UINT64_C(1125899906842624)) {
977 *rv = val / UINT64_C(1099511627776);
978 *suffix = "TiB";
979 } else if (val > UINT64_C(1099511627776)) {
980 *rv = val / UINT64_C(1073741824);
981 *suffix = "GiB";
982 } else if (val > UINT64_C(1073741824)) {
983 *rv = val / UINT64_C(1048576);
984 *suffix = "MiB";
985 } else if (val > UINT64_C(1048576)) {
986 *rv = val / UINT64_C(1024);
987 *suffix = "KiB";
988 } else {
989 *rv = val;
990 if (fixed)
991 *suffix = "B ";
992 else
993 *suffix = "B";
994 }
995}
996
[16da5f8e]997/** @}
998 */
Note: See TracBrowser for help on using the repository browser.