source: mainline/kernel/generic/src/lib/str.c@ 08e103d4

Last change on this file since 08e103d4 was 08e103d4, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 6 years ago

Use clearer naming for string length functions

This and the following commit change the names of functions, as well as
their documentation, to use unambiguous terms "bytes" and "code points"
instead of ambiguous terms "size", "length", and "characters".

  • Property mode set to 100644
File size: 24.4 KB
RevLine 
[16da5f8e]1/*
2 * Copyright (c) 2001-2004 Jakub Jermar
[d066259]3 * Copyright (c) 2005 Martin Decky
4 * Copyright (c) 2008 Jiri Svoboda
5 * Copyright (c) 2011 Martin Sucha
6 * Copyright (c) 2011 Oleg Romanenko
[16da5f8e]7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 *
13 * - Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * - Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * - The name of the author may not be used to endorse or promote products
19 * derived from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
[174156fd]33/** @addtogroup kernel_generic
[16da5f8e]34 * @{
35 */
36
37/**
38 * @file
[82bb9c1]39 * @brief String functions.
40 *
41 * Strings and characters use the Universal Character Set (UCS). The standard
42 * strings, called just strings are encoded in UTF-8. Wide strings (encoded
43 * in UTF-32) are supported to a limited degree. A single character is
[b888d5f]44 * represented as wchar_t.@n
[82bb9c1]45 *
[b888d5f]46 * Overview of the terminology:@n
[82bb9c1]47 *
[b888d5f]48 * Term Meaning
49 * -------------------- ----------------------------------------------------
50 * byte 8 bits stored in uint8_t (unsigned 8 bit integer)
[82bb9c1]51 *
[b888d5f]52 * character UTF-32 encoded Unicode character, stored in wchar_t
53 * (signed 32 bit integer), code points 0 .. 1114111
54 * are valid
[82bb9c1]55 *
[b888d5f]56 * ASCII character 7 bit encoded ASCII character, stored in char
57 * (usually signed 8 bit integer), code points 0 .. 127
58 * are valid
59 *
60 * string UTF-8 encoded NULL-terminated Unicode string, char *
61 *
62 * wide string UTF-32 encoded NULL-terminated Unicode string,
63 * wchar_t *
64 *
65 * [wide] string size number of BYTES in a [wide] string (excluding
66 * the NULL-terminator), size_t
67 *
68 * [wide] string length number of CHARACTERS in a [wide] string (excluding
[98000fb]69 * the NULL-terminator), size_t
[b888d5f]70 *
71 * [wide] string width number of display cells on a monospace display taken
[98000fb]72 * by a [wide] string, size_t
[b888d5f]73 *
74 *
75 * Overview of string metrics:@n
76 *
77 * Metric Abbrev. Type Meaning
78 * ------ ------ ------ -------------------------------------------------
79 * size n size_t number of BYTES in a string (excluding the
80 * NULL-terminator)
81 *
[98000fb]82 * length l size_t number of CHARACTERS in a string (excluding the
[b888d5f]83 * null terminator)
84 *
[98000fb]85 * width w size_t number of display cells on a monospace display
[b888d5f]86 * taken by a string
87 *
88 *
89 * Function naming prefixes:@n
90 *
91 * chr_ operate on characters
92 * ascii_ operate on ASCII characters
93 * str_ operate on strings
94 * wstr_ operate on wide strings
95 *
96 * [w]str_[n|l|w] operate on a prefix limited by size, length
97 * or width
98 *
99 *
100 * A specific character inside a [wide] string can be referred to by:@n
101 *
102 * pointer (char *, wchar_t *)
103 * byte offset (size_t)
[98000fb]104 * character index (size_t)
[82bb9c1]105 *
[16da5f8e]106 */
107
[19f857a]108#include <str.h>
[d066259]109
110#include <assert.h>
[d09f84e6]111#include <errno.h>
[d066259]112#include <stdbool.h>
113#include <stddef.h>
114#include <stdint.h>
115#include <stdlib.h>
116
[b888d5f]117#include <align.h>
[30a5470]118#include <macros.h>
[16da5f8e]119
[8e893ae]120/** Check the condition if wchar_t is signed */
[002fd5f]121#ifdef __WCHAR_UNSIGNED__
[1433ecda]122#define WCHAR_SIGNED_CHECK(cond) (true)
[8e893ae]123#else
[1433ecda]124#define WCHAR_SIGNED_CHECK(cond) (cond)
[8e893ae]125#endif
126
[b888d5f]127/** Byte mask consisting of lowest @n bits (out of 8) */
128#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
[0dd1d444]129
[b888d5f]130/** Byte mask consisting of lowest @n bits (out of 32) */
131#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
[32704cb]132
[b888d5f]133/** Byte mask consisting of highest @n bits (out of 8) */
134#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
[32704cb]135
[b888d5f]136/** Number of data bits in a UTF-8 continuation byte */
137#define CONT_BITS 6
[0dd1d444]138
[b888d5f]139/** Decode a single character from a string.
[21a639b7]140 *
[b888d5f]141 * Decode a single character from a string of size @a size. Decoding starts
[e1813cf]142 * at @a offset and this offset is moved to the beginning of the next
143 * character. In case of decoding error, offset generally advances at least
[b888d5f]144 * by one. However, offset is never moved beyond size.
[21a639b7]145 *
[b888d5f]146 * @param str String (not necessarily NULL-terminated).
147 * @param offset Byte offset in string where to start decoding.
148 * @param size Size of the string (in bytes).
149 *
[c8bf88d]150 * @return Value of decoded character, U_SPECIAL on decoding error or
[b888d5f]151 * NULL if attempt to decode beyond @a size.
[21a639b7]152 *
153 */
[b888d5f]154wchar_t str_decode(const char *str, size_t *offset, size_t size)
[21a639b7]155{
[b888d5f]156 if (*offset + 1 > size)
157 return 0;
[a35b458]158
[b888d5f]159 /* First byte read from string */
160 uint8_t b0 = (uint8_t) str[(*offset)++];
[a35b458]161
[b888d5f]162 /* Determine code length */
[a35b458]163
[b888d5f]164 unsigned int b0_bits; /* Data bits in first byte */
165 unsigned int cbytes; /* Number of continuation bytes */
[a35b458]166
[0dd1d444]167 if ((b0 & 0x80) == 0) {
168 /* 0xxxxxxx (Plain ASCII) */
169 b0_bits = 7;
170 cbytes = 0;
171 } else if ((b0 & 0xe0) == 0xc0) {
172 /* 110xxxxx 10xxxxxx */
173 b0_bits = 5;
174 cbytes = 1;
175 } else if ((b0 & 0xf0) == 0xe0) {
176 /* 1110xxxx 10xxxxxx 10xxxxxx */
177 b0_bits = 4;
178 cbytes = 2;
179 } else if ((b0 & 0xf8) == 0xf0) {
180 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
181 b0_bits = 3;
182 cbytes = 3;
183 } else {
[b888d5f]184 /* 10xxxxxx -- unexpected continuation byte */
[c8bf88d]185 return U_SPECIAL;
[74c8da2c]186 }
[a35b458]187
[b888d5f]188 if (*offset + cbytes > size)
[c8bf88d]189 return U_SPECIAL;
[a35b458]190
[b888d5f]191 wchar_t ch = b0 & LO_MASK_8(b0_bits);
[a35b458]192
[b888d5f]193 /* Decode continuation bytes */
[0dd1d444]194 while (cbytes > 0) {
[b888d5f]195 uint8_t b = (uint8_t) str[(*offset)++];
[a35b458]196
[b888d5f]197 /* Must be 10xxxxxx */
198 if ((b & 0xc0) != 0x80)
[c8bf88d]199 return U_SPECIAL;
[a35b458]200
[b888d5f]201 /* Shift data bits to ch */
[0dd1d444]202 ch = (ch << CONT_BITS) | (wchar_t) (b & LO_MASK_8(CONT_BITS));
[b888d5f]203 cbytes--;
[74c8da2c]204 }
[a35b458]205
[0dd1d444]206 return ch;
[74c8da2c]207}
208
[e1813cf]209/** Encode a single character to string representation.
[74c8da2c]210 *
[e1813cf]211 * Encode a single character to string representation (i.e. UTF-8) and store
212 * it into a buffer at @a offset. Encoding starts at @a offset and this offset
213 * is moved to the position where the next character can be written to.
[74c8da2c]214 *
[b888d5f]215 * @param ch Input character.
216 * @param str Output buffer.
217 * @param offset Byte offset where to start writing.
218 * @param size Size of the output buffer (in bytes).
[74c8da2c]219 *
[d09f84e6]220 * @return EOK if the character was encoded successfully, EOVERFLOW if there
[8e893ae]221 * was not enough space in the output buffer or EINVAL if the character
222 * code was invalid.
[74c8da2c]223 */
[b7fd2a0]224errno_t chr_encode(const wchar_t ch, char *str, size_t *offset, size_t size)
[74c8da2c]225{
[b888d5f]226 if (*offset >= size)
[d09f84e6]227 return EOVERFLOW;
[a35b458]228
[b888d5f]229 if (!chr_check(ch))
[d09f84e6]230 return EINVAL;
[a35b458]231
[7c3fb9b]232 /*
233 * Unsigned version of ch (bit operations should only be done
234 * on unsigned types).
235 */
[b888d5f]236 uint32_t cc = (uint32_t) ch;
[a35b458]237
[b888d5f]238 /* Determine how many continuation bytes are needed */
[a35b458]239
[b888d5f]240 unsigned int b0_bits; /* Data bits in first byte */
241 unsigned int cbytes; /* Number of continuation bytes */
[a35b458]242
[32704cb]243 if ((cc & ~LO_MASK_32(7)) == 0) {
244 b0_bits = 7;
245 cbytes = 0;
246 } else if ((cc & ~LO_MASK_32(11)) == 0) {
247 b0_bits = 5;
248 cbytes = 1;
249 } else if ((cc & ~LO_MASK_32(16)) == 0) {
250 b0_bits = 4;
251 cbytes = 2;
252 } else if ((cc & ~LO_MASK_32(21)) == 0) {
253 b0_bits = 3;
254 cbytes = 3;
255 } else {
[b888d5f]256 /* Codes longer than 21 bits are not supported */
[d09f84e6]257 return EINVAL;
[74c8da2c]258 }
[a35b458]259
[b888d5f]260 /* Check for available space in buffer */
261 if (*offset + cbytes >= size)
[d09f84e6]262 return EOVERFLOW;
[a35b458]263
[b888d5f]264 /* Encode continuation bytes */
265 unsigned int i;
266 for (i = cbytes; i > 0; i--) {
[e1813cf]267 str[*offset + i] = 0x80 | (cc & LO_MASK_32(CONT_BITS));
[32704cb]268 cc = cc >> CONT_BITS;
[74c8da2c]269 }
[a35b458]270
[b888d5f]271 /* Encode first byte */
[e1813cf]272 str[*offset] = (cc & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1);
[a35b458]273
[b888d5f]274 /* Advance offset */
275 *offset += cbytes + 1;
[a35b458]276
[d09f84e6]277 return EOK;
[74c8da2c]278}
279
[b888d5f]280/** Get size of string.
281 *
282 * Get the number of bytes which are used by the string @a str (excluding the
283 * NULL-terminator).
284 *
285 * @param str String to consider.
286 *
287 * @return Number of bytes used by the string
[82bb9c1]288 *
289 */
[08e103d4]290size_t str_bytes(const char *str)
[82bb9c1]291{
[b888d5f]292 size_t size = 0;
[a35b458]293
[b888d5f]294 while (*str++ != 0)
295 size++;
[a35b458]296
[b888d5f]297 return size;
[82bb9c1]298}
299
[b888d5f]300/** Get size of wide string.
301 *
302 * Get the number of bytes which are used by the wide string @a str (excluding the
303 * NULL-terminator).
304 *
305 * @param str Wide string to consider.
306 *
307 * @return Number of bytes used by the wide string
308 *
309 */
[08e103d4]310size_t wstr_bytes(const wchar_t *str)
[b888d5f]311{
[08e103d4]312 return (wstr_code_points(str) * sizeof(wchar_t));
[b888d5f]313}
314
315/** Get size of string with length limit.
[74c8da2c]316 *
[f25b2819]317 * Get the number of bytes which are used by up to @a max_len first
318 * characters in the string @a str. If @a max_len is greater than
[b888d5f]319 * the length of @a str, the entire string is measured (excluding the
320 * NULL-terminator).
321 *
322 * @param str String to consider.
323 * @param max_len Maximum number of characters to measure.
[74c8da2c]324 *
[b888d5f]325 * @return Number of bytes used by the characters.
[74c8da2c]326 *
327 */
[08e103d4]328size_t str_lbytes(const char *str, size_t max_len)
[74c8da2c]329{
[98000fb]330 size_t len = 0;
[b888d5f]331 size_t offset = 0;
[a35b458]332
[b888d5f]333 while (len < max_len) {
334 if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
[b54d2f1]335 break;
[a35b458]336
[f25b2819]337 len++;
[21a639b7]338 }
[a35b458]339
[b888d5f]340 return offset;
[74c8da2c]341}
342
[b888d5f]343/** Get size of wide string with length limit.
[82bb9c1]344 *
[b888d5f]345 * Get the number of bytes which are used by up to @a max_len first
346 * wide characters in the wide string @a str. If @a max_len is greater than
347 * the length of @a str, the entire wide string is measured (excluding the
348 * NULL-terminator).
349 *
350 * @param str Wide string to consider.
351 * @param max_len Maximum number of wide characters to measure.
[82bb9c1]352 *
[b888d5f]353 * @return Number of bytes used by the wide characters.
[82bb9c1]354 *
355 */
[08e103d4]356size_t wstr_lbytes(const wchar_t *str, size_t max_len)
[82bb9c1]357{
[08e103d4]358 return (wstr_ncode_points(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
[82bb9c1]359}
360
[b888d5f]361/** Get number of characters in a string.
[82bb9c1]362 *
[b888d5f]363 * @param str NULL-terminated string.
[82bb9c1]364 *
[b888d5f]365 * @return Number of characters in string.
[82bb9c1]366 *
367 */
[08e103d4]368size_t str_code_points(const char *str)
[82bb9c1]369{
[98000fb]370 size_t len = 0;
[b888d5f]371 size_t offset = 0;
[a35b458]372
[b888d5f]373 while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
374 len++;
[a35b458]375
[b888d5f]376 return len;
[82bb9c1]377}
378
[b888d5f]379/** Get number of characters in a wide string.
[74c8da2c]380 *
[b888d5f]381 * @param str NULL-terminated wide string.
382 *
383 * @return Number of characters in @a str.
[74c8da2c]384 *
385 */
[08e103d4]386size_t wstr_code_points(const wchar_t *wstr)
[74c8da2c]387{
[98000fb]388 size_t len = 0;
[a35b458]389
[b888d5f]390 while (*wstr++ != 0)
391 len++;
[a35b458]392
[b888d5f]393 return len;
[74c8da2c]394}
395
[b888d5f]396/** Get number of characters in a string with size limit.
397 *
398 * @param str NULL-terminated string.
399 * @param size Maximum number of bytes to consider.
400 *
401 * @return Number of characters in string.
[74c8da2c]402 *
403 */
[08e103d4]404size_t str_ncode_points(const char *str, size_t size)
[74c8da2c]405{
[98000fb]406 size_t len = 0;
[b888d5f]407 size_t offset = 0;
[a35b458]408
[b888d5f]409 while (str_decode(str, &offset, size) != 0)
410 len++;
[a35b458]411
[b888d5f]412 return len;
[21a639b7]413}
414
[b888d5f]415/** Get number of characters in a string with size limit.
[2f57690]416 *
[b888d5f]417 * @param str NULL-terminated string.
418 * @param size Maximum number of bytes to consider.
[74c8da2c]419 *
[f25b2819]420 * @return Number of characters in string.
[b888d5f]421 *
[74c8da2c]422 */
[08e103d4]423size_t wstr_ncode_points(const wchar_t *str, size_t size)
[74c8da2c]424{
[98000fb]425 size_t len = 0;
426 size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
427 size_t offset = 0;
[a35b458]428
[b888d5f]429 while ((offset < limit) && (*str++ != 0)) {
[f25b2819]430 len++;
[b888d5f]431 offset += sizeof(wchar_t);
[74c8da2c]432 }
[a35b458]433
[f25b2819]434 return len;
[74c8da2c]435}
436
[b888d5f]437/** Check whether character is plain ASCII.
438 *
439 * @return True if character is plain ASCII.
[74c8da2c]440 *
441 */
[f2b8cdc]442bool ascii_check(wchar_t ch)
[74c8da2c]443{
[8e893ae]444 if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 127))
[b888d5f]445 return true;
[a35b458]446
[b888d5f]447 return false;
448}
[f25b2819]449
[b888d5f]450/** Check whether character is valid
451 *
452 * @return True if character is a valid Unicode code point.
453 *
454 */
[f2b8cdc]455bool chr_check(wchar_t ch)
[b888d5f]456{
[8e893ae]457 if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 1114111))
[b888d5f]458 return true;
[a35b458]459
[b888d5f]460 return false;
[16da5f8e]461}
462
[b888d5f]463/** Compare two NULL terminated strings.
[16da5f8e]464 *
[b888d5f]465 * Do a char-by-char comparison of two NULL-terminated strings.
[4efeab5]466 * The strings are considered equal iff their length is equal
467 * and both strings consist of the same sequence of characters.
468 *
[1772e6d]469 * A string S1 is less than another string S2 if it has a character with
470 * lower value at the first character position where the strings differ.
471 * If the strings differ in length, the shorter one is treated as if
472 * padded by characters with a value of zero.
[16da5f8e]473 *
[b888d5f]474 * @param s1 First string to compare.
475 * @param s2 Second string to compare.
[16da5f8e]476 *
[1772e6d]477 * @return 0 if the strings are equal, -1 if the first is less than the second,
478 * 1 if the second is less than the first.
[16da5f8e]479 *
480 */
[b888d5f]481int str_cmp(const char *s1, const char *s2)
[16da5f8e]482{
[a7b1071]483 wchar_t c1 = 0;
484 wchar_t c2 = 0;
[a35b458]485
[b888d5f]486 size_t off1 = 0;
487 size_t off2 = 0;
[a7b1071]488
489 while (true) {
490 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
491 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
492
[b888d5f]493 if (c1 < c2)
[16da5f8e]494 return -1;
[a35b458]495
[b888d5f]496 if (c1 > c2)
[16da5f8e]497 return 1;
[a7b1071]498
499 if (c1 == 0 || c2 == 0)
[1b20da0]500 break;
[16da5f8e]501 }
[a7b1071]502
503 return 0;
[16da5f8e]504}
505
[b888d5f]506/** Compare two NULL terminated strings with length limit.
[16da5f8e]507 *
[b888d5f]508 * Do a char-by-char comparison of two NULL-terminated strings.
[4efeab5]509 * The strings are considered equal iff
[08e103d4]510 * min(str_code_points(s1), max_len) == min(str_code_points(s2), max_len)
[4efeab5]511 * and both strings consist of the same sequence of characters,
512 * up to max_len characters.
513 *
[1772e6d]514 * A string S1 is less than another string S2 if it has a character with
515 * lower value at the first character position where the strings differ.
516 * If the strings differ in length, the shorter one is treated as if
517 * padded by characters with a value of zero. Only the first max_len
518 * characters are considered.
[16da5f8e]519 *
[b888d5f]520 * @param s1 First string to compare.
521 * @param s2 Second string to compare.
522 * @param max_len Maximum number of characters to consider.
523 *
[1772e6d]524 * @return 0 if the strings are equal, -1 if the first is less than the second,
525 * 1 if the second is less than the first.
[16da5f8e]526 *
527 */
[98000fb]528int str_lcmp(const char *s1, const char *s2, size_t max_len)
[16da5f8e]529{
[b888d5f]530 wchar_t c1 = 0;
531 wchar_t c2 = 0;
[a35b458]532
[b888d5f]533 size_t off1 = 0;
534 size_t off2 = 0;
[a35b458]535
[98000fb]536 size_t len = 0;
[a7b1071]537
538 while (true) {
539 if (len >= max_len)
[b888d5f]540 break;
[a7b1071]541
542 c1 = str_decode(s1, &off1, STR_NO_LIMIT);
543 c2 = str_decode(s2, &off2, STR_NO_LIMIT);
544
[b888d5f]545 if (c1 < c2)
[16da5f8e]546 return -1;
[a7b1071]547
[b888d5f]548 if (c1 > c2)
[16da5f8e]549 return 1;
[a7b1071]550
551 if (c1 == 0 || c2 == 0)
552 break;
553
[1b20da0]554 ++len;
[16da5f8e]555 }
[a7b1071]556
557 return 0;
558
[16da5f8e]559}
560
[f4b1535]561/** Copy string.
[b888d5f]562 *
[f4b1535]563 * Copy source string @a src to destination buffer @a dest.
564 * No more than @a size bytes are written. If the size of the output buffer
565 * is at least one byte, the output string will always be well-formed, i.e.
566 * null-terminated and containing only complete characters.
[b888d5f]567 *
[abf09311]568 * @param dest Destination buffer.
[6700ee2]569 * @param count Size of the destination buffer (must be > 0).
[f4b1535]570 * @param src Source string.
[abf09311]571 *
[b888d5f]572 */
[f4b1535]573void str_cpy(char *dest, size_t size, const char *src)
[b888d5f]574{
[6700ee2]575 /* There must be space for a null terminator in the buffer. */
[63e27ef]576 assert(size > 0);
577 assert(src != NULL);
[a35b458]578
[abf09311]579 size_t src_off = 0;
580 size_t dest_off = 0;
[a35b458]581
[abf09311]582 wchar_t ch;
[f4b1535]583 while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
584 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
585 break;
586 }
[a35b458]587
[f4b1535]588 dest[dest_off] = '\0';
589}
590
591/** Copy size-limited substring.
592 *
[6700ee2]593 * Copy prefix of string @a src of max. size @a size to destination buffer
594 * @a dest. No more than @a size bytes are written. The output string will
595 * always be well-formed, i.e. null-terminated and containing only complete
596 * characters.
[f4b1535]597 *
598 * No more than @a n bytes are read from the input string, so it does not
599 * have to be null-terminated.
600 *
[abf09311]601 * @param dest Destination buffer.
[6700ee2]602 * @param count Size of the destination buffer (must be > 0).
[f4b1535]603 * @param src Source string.
[abf09311]604 * @param n Maximum number of bytes to read from @a src.
605 *
[f4b1535]606 */
607void str_ncpy(char *dest, size_t size, const char *src, size_t n)
608{
[6700ee2]609 /* There must be space for a null terminator in the buffer. */
[63e27ef]610 assert(size > 0);
[a35b458]611
[abf09311]612 size_t src_off = 0;
613 size_t dest_off = 0;
[a35b458]614
[abf09311]615 wchar_t ch;
[f4b1535]616 while ((ch = str_decode(src, &src_off, n)) != 0) {
617 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
[b888d5f]618 break;
619 }
[a35b458]620
[f4b1535]621 dest[dest_off] = '\0';
[b888d5f]622}
[16da5f8e]623
[0f06dbc]624/** Convert wide string to string.
[b888d5f]625 *
[0f06dbc]626 * Convert wide string @a src to string. The output is written to the buffer
627 * specified by @a dest and @a size. @a size must be non-zero and the string
628 * written will always be well-formed.
[16da5f8e]629 *
[0f06dbc]630 * @param dest Destination buffer.
631 * @param size Size of the destination buffer.
632 * @param src Source wide string.
[16da5f8e]633 */
[0f06dbc]634void wstr_to_str(char *dest, size_t size, const wchar_t *src)
[16da5f8e]635{
[b888d5f]636 wchar_t ch;
[0f06dbc]637 size_t src_idx;
638 size_t dest_off;
639
640 /* There must be space for a null terminator in the buffer. */
[63e27ef]641 assert(size > 0);
[0f06dbc]642
643 src_idx = 0;
644 dest_off = 0;
[a35b458]645
[b888d5f]646 while ((ch = src[src_idx++]) != 0) {
[0f06dbc]647 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
[b888d5f]648 break;
[16da5f8e]649 }
[0f06dbc]650
651 dest[dest_off] = '\0';
[16da5f8e]652}
653
[20f1597]654/** Find first occurence of character in string.
655 *
[b888d5f]656 * @param str String to search.
657 * @param ch Character to look for.
658 *
659 * @return Pointer to character in @a str or NULL if not found.
[20f1597]660 */
[dd2cfa7]661char *str_chr(const char *str, wchar_t ch)
[20f1597]662{
[b888d5f]663 wchar_t acc;
664 size_t off = 0;
[f2d2c7ba]665 size_t last = 0;
[a35b458]666
[a7b1071]667 while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
[b888d5f]668 if (acc == ch)
[dd2cfa7]669 return (char *) (str + last);
[f2d2c7ba]670 last = off;
[20f1597]671 }
[a35b458]672
[20f1597]673 return NULL;
674}
675
[b888d5f]676/** Insert a wide character into a wide string.
677 *
678 * Insert a wide character into a wide string at position
679 * @a pos. The characters after the position are shifted.
680 *
681 * @param str String to insert to.
682 * @param ch Character to insert to.
683 * @param pos Character index where to insert.
[7c3fb9b]684 * @param max_pos Characters in the buffer.
[b888d5f]685 *
686 * @return True if the insertion was sucessful, false if the position
687 * is out of bounds.
688 *
689 */
[98000fb]690bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
[b888d5f]691{
[08e103d4]692 size_t len = wstr_code_points(str);
[a35b458]693
[b888d5f]694 if ((pos > len) || (pos + 1 > max_pos))
695 return false;
[a35b458]696
[98000fb]697 size_t i;
[b888d5f]698 for (i = len; i + 1 > pos; i--)
699 str[i + 1] = str[i];
[a35b458]700
[b888d5f]701 str[pos] = ch;
[a35b458]702
[b888d5f]703 return true;
704}
705
706/** Remove a wide character from a wide string.
707 *
708 * Remove a wide character from a wide string at position
709 * @a pos. The characters after the position are shifted.
710 *
711 * @param str String to remove from.
712 * @param pos Character index to remove.
713 *
714 * @return True if the removal was sucessful, false if the position
715 * is out of bounds.
716 *
717 */
[98000fb]718bool wstr_remove(wchar_t *str, size_t pos)
[b888d5f]719{
[08e103d4]720 size_t len = wstr_code_points(str);
[a35b458]721
[b888d5f]722 if (pos >= len)
723 return false;
[a35b458]724
[98000fb]725 size_t i;
[b888d5f]726 for (i = pos + 1; i <= len; i++)
727 str[i - 1] = str[i];
[a35b458]728
[b888d5f]729 return true;
730}
731
[d066259]732/** Duplicate string.
733 *
734 * Allocate a new string and copy characters from the source
735 * string into it. The duplicate string is allocated via sleeping
736 * malloc(), thus this function can sleep in no memory conditions.
737 *
738 * The allocation cannot fail and the return value is always
739 * a valid pointer. The duplicate string is always a well-formed
740 * null-terminated UTF-8 string, but it can differ from the source
741 * string on the byte level.
742 *
743 * @param src Source string.
744 *
745 * @return Duplicate string.
746 *
747 */
748char *str_dup(const char *src)
749{
[08e103d4]750 size_t size = str_bytes(src) + 1;
[d066259]751 char *dest = malloc(size);
752 if (!dest)
753 return NULL;
754
755 str_cpy(dest, size, src);
756 return dest;
757}
758
759/** Duplicate string with size limit.
760 *
761 * Allocate a new string and copy up to @max_size bytes from the source
762 * string into it. The duplicate string is allocated via sleeping
763 * malloc(), thus this function can sleep in no memory conditions.
764 * No more than @max_size + 1 bytes is allocated, but if the size
765 * occupied by the source string is smaller than @max_size + 1,
766 * less is allocated.
767 *
768 * The allocation cannot fail and the return value is always
769 * a valid pointer. The duplicate string is always a well-formed
770 * null-terminated UTF-8 string, but it can differ from the source
771 * string on the byte level.
772 *
773 * @param src Source string.
774 * @param n Maximum number of bytes to duplicate.
775 *
776 * @return Duplicate string.
777 *
778 */
779char *str_ndup(const char *src, size_t n)
780{
[08e103d4]781 size_t size = str_bytes(src);
[d066259]782 if (size > n)
783 size = n;
784
785 char *dest = malloc(size + 1);
786 if (!dest)
787 return NULL;
788
789 str_ncpy(dest, size + 1, src, size);
790 return dest;
791}
792
[30a5470]793/** Convert string to uint64_t (internal variant).
794 *
795 * @param nptr Pointer to string.
796 * @param endptr Pointer to the first invalid character is stored here.
797 * @param base Zero or number between 2 and 36 inclusive.
798 * @param neg Indication of unary minus is stored here.
799 * @apram result Result of the conversion.
800 *
801 * @return EOK if conversion was successful.
802 *
803 */
[b7fd2a0]804static errno_t str_uint(const char *nptr, char **endptr, unsigned int base,
[30a5470]805 bool *neg, uint64_t *result)
806{
[63e27ef]807 assert(endptr != NULL);
808 assert(neg != NULL);
809 assert(result != NULL);
[a35b458]810
[30a5470]811 *neg = false;
812 const char *str = nptr;
[a35b458]813
[30a5470]814 /* Ignore leading whitespace */
815 while (isspace(*str))
816 str++;
[a35b458]817
[30a5470]818 if (*str == '-') {
819 *neg = true;
820 str++;
821 } else if (*str == '+')
822 str++;
[a35b458]823
[30a5470]824 if (base == 0) {
825 /* Decode base if not specified */
826 base = 10;
[a35b458]827
[30a5470]828 if (*str == '0') {
829 base = 8;
830 str++;
[a35b458]831
[30a5470]832 switch (*str) {
833 case 'b':
834 case 'B':
835 base = 2;
836 str++;
837 break;
838 case 'o':
839 case 'O':
840 base = 8;
841 str++;
842 break;
843 case 'd':
844 case 'D':
845 case 't':
846 case 'T':
847 base = 10;
848 str++;
849 break;
850 case 'x':
851 case 'X':
852 base = 16;
853 str++;
854 break;
[4ce914d4]855 default:
856 str--;
[30a5470]857 }
858 }
859 } else {
860 /* Check base range */
861 if ((base < 2) || (base > 36)) {
862 *endptr = (char *) str;
863 return EINVAL;
864 }
865 }
[a35b458]866
[30a5470]867 *result = 0;
868 const char *startstr = str;
[a35b458]869
[30a5470]870 while (*str != 0) {
871 unsigned int digit;
[a35b458]872
[30a5470]873 if ((*str >= 'a') && (*str <= 'z'))
874 digit = *str - 'a' + 10;
875 else if ((*str >= 'A') && (*str <= 'Z'))
876 digit = *str - 'A' + 10;
877 else if ((*str >= '0') && (*str <= '9'))
878 digit = *str - '0';
879 else
880 break;
[a35b458]881
[30a5470]882 if (digit >= base)
883 break;
[a35b458]884
[30a5470]885 uint64_t prev = *result;
886 *result = (*result) * base + digit;
[a35b458]887
[30a5470]888 if (*result < prev) {
889 /* Overflow */
890 *endptr = (char *) str;
891 return EOVERFLOW;
892 }
[a35b458]893
[30a5470]894 str++;
895 }
[a35b458]896
[30a5470]897 if (str == startstr) {
898 /*
899 * No digits were decoded => first invalid character is
900 * the first character of the string.
901 */
902 str = nptr;
903 }
[a35b458]904
[30a5470]905 *endptr = (char *) str;
[a35b458]906
[30a5470]907 if (str == nptr)
908 return EINVAL;
[a35b458]909
[30a5470]910 return EOK;
911}
912
913/** Convert string to uint64_t.
914 *
915 * @param nptr Pointer to string.
916 * @param endptr If not NULL, pointer to the first invalid character
917 * is stored here.
918 * @param base Zero or number between 2 and 36 inclusive.
919 * @param strict Do not allow any trailing characters.
[4ce914d4]920 * @param result Result of the conversion.
[30a5470]921 *
922 * @return EOK if conversion was successful.
923 *
924 */
[b7fd2a0]925errno_t str_uint64_t(const char *nptr, char **endptr, unsigned int base,
[30a5470]926 bool strict, uint64_t *result)
927{
[63e27ef]928 assert(result != NULL);
[a35b458]929
[30a5470]930 bool neg;
931 char *lendptr;
[b7fd2a0]932 errno_t ret = str_uint(nptr, &lendptr, base, &neg, result);
[a35b458]933
[30a5470]934 if (endptr != NULL)
935 *endptr = (char *) lendptr;
[a35b458]936
[30a5470]937 if (ret != EOK)
938 return ret;
[a35b458]939
[30a5470]940 /* Do not allow negative values */
941 if (neg)
942 return EINVAL;
[a35b458]943
[7c3fb9b]944 /*
945 * Check whether we are at the end of
946 * the string in strict mode
947 */
[30a5470]948 if ((strict) && (*lendptr != 0))
949 return EINVAL;
[a35b458]950
[30a5470]951 return EOK;
952}
953
[e535eeb]954void order_suffix(const uint64_t val, uint64_t *rv, char *suffix)
955{
[933cadf]956 if (val > UINT64_C(10000000000000000000)) {
957 *rv = val / UINT64_C(1000000000000000000);
[e535eeb]958 *suffix = 'Z';
[933cadf]959 } else if (val > UINT64_C(1000000000000000000)) {
960 *rv = val / UINT64_C(1000000000000000);
[e535eeb]961 *suffix = 'E';
[933cadf]962 } else if (val > UINT64_C(1000000000000000)) {
963 *rv = val / UINT64_C(1000000000000);
[e535eeb]964 *suffix = 'T';
[933cadf]965 } else if (val > UINT64_C(1000000000000)) {
966 *rv = val / UINT64_C(1000000000);
[e535eeb]967 *suffix = 'G';
[933cadf]968 } else if (val > UINT64_C(1000000000)) {
969 *rv = val / UINT64_C(1000000);
[e535eeb]970 *suffix = 'M';
[933cadf]971 } else if (val > UINT64_C(1000000)) {
972 *rv = val / UINT64_C(1000);
[e535eeb]973 *suffix = 'k';
974 } else {
975 *rv = val;
976 *suffix = ' ';
977 }
978}
979
[933cadf]980void bin_order_suffix(const uint64_t val, uint64_t *rv, const char **suffix,
981 bool fixed)
982{
983 if (val > UINT64_C(1152921504606846976)) {
984 *rv = val / UINT64_C(1125899906842624);
985 *suffix = "EiB";
986 } else if (val > UINT64_C(1125899906842624)) {
987 *rv = val / UINT64_C(1099511627776);
988 *suffix = "TiB";
989 } else if (val > UINT64_C(1099511627776)) {
990 *rv = val / UINT64_C(1073741824);
991 *suffix = "GiB";
992 } else if (val > UINT64_C(1073741824)) {
993 *rv = val / UINT64_C(1048576);
994 *suffix = "MiB";
995 } else if (val > UINT64_C(1048576)) {
996 *rv = val / UINT64_C(1024);
997 *suffix = "KiB";
998 } else {
999 *rv = val;
1000 if (fixed)
1001 *suffix = "B ";
1002 else
1003 *suffix = "B";
1004 }
1005}
1006
[16da5f8e]1007/** @}
1008 */
Note: See TracBrowser for help on using the repository browser.