Context Navigation

str.c@ 08e103d4

Visit:

Last change on this file since 08e103d4 was 08e103d4, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 6 years ago

Use clearer naming for string length functions

This and the following commit change the names of functions, as well as
their documentation, to use unambiguous terms "bytes" and "code points"
instead of ambiguous terms "size", "length", and "characters".

Property mode set to 100644

File size: 24.4 KB

Rev	Line
[16da5f8e]	1	/*
	2	* Copyright (c) 2001-2004 Jakub Jermar
[d066259]	3	* Copyright (c) 2005 Martin Decky
	4	* Copyright (c) 2008 Jiri Svoboda
	5	* Copyright (c) 2011 Martin Sucha
	6	* Copyright (c) 2011 Oleg Romanenko
[16da5f8e]	7	* All rights reserved.
	8	*
	9	* Redistribution and use in source and binary forms, with or without
	10	* modification, are permitted provided that the following conditions
	11	* are met:
	12	*
	13	* - Redistributions of source code must retain the above copyright
	14	* notice, this list of conditions and the following disclaimer.
	15	* - Redistributions in binary form must reproduce the above copyright
	16	* notice, this list of conditions and the following disclaimer in the
	17	* documentation and/or other materials provided with the distribution.
	18	* - The name of the author may not be used to endorse or promote products
	19	* derived from this software without specific prior written permission.
	20	*
	21	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	22	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	23	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	24	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	25	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	26	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	27	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	28	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	29	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	30	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	31	*/
	32
[174156fd]	33	/** @addtogroup kernel_generic
[16da5f8e]	34	* @{
	35	*/
	36
	37	/**
	38	* @file
[82bb9c1]	39	* @brief String functions.
	40	*
	41	* Strings and characters use the Universal Character Set (UCS). The standard
	42	* strings, called just strings are encoded in UTF-8. Wide strings (encoded
	43	* in UTF-32) are supported to a limited degree. A single character is
[b888d5f]	44	* represented as wchar_t.@n
[82bb9c1]	45	*
[b888d5f]	46	* Overview of the terminology:@n
[82bb9c1]	47	*
[b888d5f]	48	* Term Meaning
	49	* -------------------- ----------------------------------------------------
	50	* byte 8 bits stored in uint8_t (unsigned 8 bit integer)
[82bb9c1]	51	*
[b888d5f]	52	* character UTF-32 encoded Unicode character, stored in wchar_t
	53	* (signed 32 bit integer), code points 0 .. 1114111
	54	* are valid
[82bb9c1]	55	*
[b888d5f]	56	* ASCII character 7 bit encoded ASCII character, stored in char
	57	* (usually signed 8 bit integer), code points 0 .. 127
	58	* are valid
	59	*
	60	* string UTF-8 encoded NULL-terminated Unicode string, char *
	61	*
	62	* wide string UTF-32 encoded NULL-terminated Unicode string,
	63	* wchar_t *
	64	*
	65	* [wide] string size number of BYTES in a [wide] string (excluding
	66	* the NULL-terminator), size_t
	67	*
	68	* [wide] string length number of CHARACTERS in a [wide] string (excluding
[98000fb]	69	* the NULL-terminator), size_t
[b888d5f]	70	*
	71	* [wide] string width number of display cells on a monospace display taken
[98000fb]	72	* by a [wide] string, size_t
[b888d5f]	73	*
	74	*
	75	* Overview of string metrics:@n
	76	*
	77	* Metric Abbrev. Type Meaning
	78	* ------ ------ ------ -------------------------------------------------
	79	* size n size_t number of BYTES in a string (excluding the
	80	* NULL-terminator)
	81	*
[98000fb]	82	* length l size_t number of CHARACTERS in a string (excluding the
[b888d5f]	83	* null terminator)
	84	*
[98000fb]	85	* width w size_t number of display cells on a monospace display
[b888d5f]	86	* taken by a string
	87	*
	88	*
	89	* Function naming prefixes:@n
	90	*
	91	* chr_ operate on characters
	92	* ascii_ operate on ASCII characters
	93	* str_ operate on strings
	94	* wstr_ operate on wide strings
	95	*
	96	* [w]str_[n\|l\|w] operate on a prefix limited by size, length
	97	* or width
	98	*
	99	*
	100	* A specific character inside a [wide] string can be referred to by:@n
	101	*
	102	* pointer (char , wchar_t )
	103	* byte offset (size_t)
[98000fb]	104	* character index (size_t)
[82bb9c1]	105	*
[16da5f8e]	106	*/
	107
[19f857a]	108	#include <str.h>
[d066259]	109
	110	#include <assert.h>
[d09f84e6]	111	#include <errno.h>
[d066259]	112	#include <stdbool.h>
	113	#include <stddef.h>
	114	#include <stdint.h>
	115	#include <stdlib.h>
	116
[b888d5f]	117	#include <align.h>
[30a5470]	118	#include <macros.h>
[16da5f8e]	119
[8e893ae]	120	/** Check the condition if wchar_t is signed */
[002fd5f]	121	#ifdef __WCHAR_UNSIGNED__
[1433ecda]	122	#define WCHAR_SIGNED_CHECK(cond) (true)
[8e893ae]	123	#else
[1433ecda]	124	#define WCHAR_SIGNED_CHECK(cond) (cond)
[8e893ae]	125	#endif
	126
[b888d5f]	127	/** Byte mask consisting of lowest @n bits (out of 8) */
	128	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
[0dd1d444]	129
[b888d5f]	130	/** Byte mask consisting of lowest @n bits (out of 32) */
	131	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
[32704cb]	132
[b888d5f]	133	/** Byte mask consisting of highest @n bits (out of 8) */
	134	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
[32704cb]	135
[b888d5f]	136	/** Number of data bits in a UTF-8 continuation byte */
	137	#define CONT_BITS 6
[0dd1d444]	138
[b888d5f]	139	/** Decode a single character from a string.
[21a639b7]	140	*
[b888d5f]	141	* Decode a single character from a string of size @a size. Decoding starts
[e1813cf]	142	* at @a offset and this offset is moved to the beginning of the next
	143	* character. In case of decoding error, offset generally advances at least
[b888d5f]	144	* by one. However, offset is never moved beyond size.
[21a639b7]	145	*
[b888d5f]	146	* @param str String (not necessarily NULL-terminated).
	147	* @param offset Byte offset in string where to start decoding.
	148	* @param size Size of the string (in bytes).
	149	*
[c8bf88d]	150	* @return Value of decoded character, U_SPECIAL on decoding error or
[b888d5f]	151	* NULL if attempt to decode beyond @a size.
[21a639b7]	152	*
	153	*/
[b888d5f]	154	wchar_t str_decode(const char str, size_t offset, size_t size)
[21a639b7]	155	{
[b888d5f]	156	if (*offset + 1 > size)
	157	return 0;
[a35b458]	158
[b888d5f]	159	/* First byte read from string */
	160	uint8_t b0 = (uint8_t) str[(*offset)++];
[a35b458]	161
[b888d5f]	162	/* Determine code length */
[a35b458]	163
[b888d5f]	164	unsigned int b0_bits; /* Data bits in first byte */
	165	unsigned int cbytes; /* Number of continuation bytes */
[a35b458]	166
[0dd1d444]	167	if ((b0 & 0x80) == 0) {
	168	/* 0xxxxxxx (Plain ASCII) */
	169	b0_bits = 7;
	170	cbytes = 0;
	171	} else if ((b0 & 0xe0) == 0xc0) {
	172	/* 110xxxxx 10xxxxxx */
	173	b0_bits = 5;
	174	cbytes = 1;
	175	} else if ((b0 & 0xf0) == 0xe0) {
	176	/* 1110xxxx 10xxxxxx 10xxxxxx */
	177	b0_bits = 4;
	178	cbytes = 2;
	179	} else if ((b0 & 0xf8) == 0xf0) {
	180	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
	181	b0_bits = 3;
	182	cbytes = 3;
	183	} else {
[b888d5f]	184	/* 10xxxxxx -- unexpected continuation byte */
[c8bf88d]	185	return U_SPECIAL;
[74c8da2c]	186	}
[a35b458]	187
[b888d5f]	188	if (*offset + cbytes > size)
[c8bf88d]	189	return U_SPECIAL;
[a35b458]	190
[b888d5f]	191	wchar_t ch = b0 & LO_MASK_8(b0_bits);
[a35b458]	192
[b888d5f]	193	/* Decode continuation bytes */
[0dd1d444]	194	while (cbytes > 0) {
[b888d5f]	195	uint8_t b = (uint8_t) str[(*offset)++];
[a35b458]	196
[b888d5f]	197	/* Must be 10xxxxxx */
	198	if ((b & 0xc0) != 0x80)
[c8bf88d]	199	return U_SPECIAL;
[a35b458]	200
[b888d5f]	201	/* Shift data bits to ch */
[0dd1d444]	202	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
[b888d5f]	203	cbytes--;
[74c8da2c]	204	}
[a35b458]	205
[0dd1d444]	206	return ch;
[74c8da2c]	207	}
	208
[e1813cf]	209	/** Encode a single character to string representation.
[74c8da2c]	210	*
[e1813cf]	211	* Encode a single character to string representation (i.e. UTF-8) and store
	212	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
	213	* is moved to the position where the next character can be written to.
[74c8da2c]	214	*
[b888d5f]	215	* @param ch Input character.
	216	* @param str Output buffer.
	217	* @param offset Byte offset where to start writing.
	218	* @param size Size of the output buffer (in bytes).
[74c8da2c]	219	*
[d09f84e6]	220	* @return EOK if the character was encoded successfully, EOVERFLOW if there
[8e893ae]	221	* was not enough space in the output buffer or EINVAL if the character
	222	* code was invalid.
[74c8da2c]	223	*/
[b7fd2a0]	224	errno_t chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
[74c8da2c]	225	{
[b888d5f]	226	if (*offset >= size)
[d09f84e6]	227	return EOVERFLOW;
[a35b458]	228
[b888d5f]	229	if (!chr_check(ch))
[d09f84e6]	230	return EINVAL;
[a35b458]	231
[7c3fb9b]	232	/*
	233	* Unsigned version of ch (bit operations should only be done
	234	* on unsigned types).
	235	*/
[b888d5f]	236	uint32_t cc = (uint32_t) ch;
[a35b458]	237
[b888d5f]	238	/* Determine how many continuation bytes are needed */
[a35b458]	239
[b888d5f]	240	unsigned int b0_bits; /* Data bits in first byte */
	241	unsigned int cbytes; /* Number of continuation bytes */
[a35b458]	242
[32704cb]	243	if ((cc & ~LO_MASK_32(7)) == 0) {
	244	b0_bits = 7;
	245	cbytes = 0;
	246	} else if ((cc & ~LO_MASK_32(11)) == 0) {
	247	b0_bits = 5;
	248	cbytes = 1;
	249	} else if ((cc & ~LO_MASK_32(16)) == 0) {
	250	b0_bits = 4;
	251	cbytes = 2;
	252	} else if ((cc & ~LO_MASK_32(21)) == 0) {
	253	b0_bits = 3;
	254	cbytes = 3;
	255	} else {
[b888d5f]	256	/* Codes longer than 21 bits are not supported */
[d09f84e6]	257	return EINVAL;
[74c8da2c]	258	}
[a35b458]	259
[b888d5f]	260	/* Check for available space in buffer */
	261	if (*offset + cbytes >= size)
[d09f84e6]	262	return EOVERFLOW;
[a35b458]	263
[b888d5f]	264	/* Encode continuation bytes */
	265	unsigned int i;
	266	for (i = cbytes; i > 0; i--) {
[e1813cf]	267	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
[32704cb]	268	cc = cc >> CONT_BITS;
[74c8da2c]	269	}
[a35b458]	270
[b888d5f]	271	/* Encode first byte */
[e1813cf]	272	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
[a35b458]	273
[b888d5f]	274	/* Advance offset */
	275	*offset += cbytes + 1;
[a35b458]	276
[d09f84e6]	277	return EOK;
[74c8da2c]	278	}
	279
[b888d5f]	280	/** Get size of string.
	281	*
	282	* Get the number of bytes which are used by the string @a str (excluding the
	283	* NULL-terminator).
	284	*
	285	* @param str String to consider.
	286	*
	287	* @return Number of bytes used by the string
[82bb9c1]	288	*
	289	*/
[08e103d4]	290	size_t str_bytes(const char *str)
[82bb9c1]	291	{
[b888d5f]	292	size_t size = 0;
[a35b458]	293
[b888d5f]	294	while (*str++ != 0)
	295	size++;
[a35b458]	296
[b888d5f]	297	return size;
[82bb9c1]	298	}
	299
[b888d5f]	300	/** Get size of wide string.
	301	*
	302	* Get the number of bytes which are used by the wide string @a str (excluding the
	303	* NULL-terminator).
	304	*
	305	* @param str Wide string to consider.
	306	*
	307	* @return Number of bytes used by the wide string
	308	*
	309	*/
[08e103d4]	310	size_t wstr_bytes(const wchar_t *str)
[b888d5f]	311	{
[08e103d4]	312	return (wstr_code_points(str) * sizeof(wchar_t));
[b888d5f]	313	}
	314
	315	/** Get size of string with length limit.
[74c8da2c]	316	*
[f25b2819]	317	* Get the number of bytes which are used by up to @a max_len first
	318	* characters in the string @a str. If @a max_len is greater than
[b888d5f]	319	* the length of @a str, the entire string is measured (excluding the
	320	* NULL-terminator).
	321	*
	322	* @param str String to consider.
	323	* @param max_len Maximum number of characters to measure.
[74c8da2c]	324	*
[b888d5f]	325	* @return Number of bytes used by the characters.
[74c8da2c]	326	*
	327	*/
[08e103d4]	328	size_t str_lbytes(const char *str, size_t max_len)
[74c8da2c]	329	{
[98000fb]	330	size_t len = 0;
[b888d5f]	331	size_t offset = 0;
[a35b458]	332
[b888d5f]	333	while (len < max_len) {
	334	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
[b54d2f1]	335	break;
[a35b458]	336
[f25b2819]	337	len++;
[21a639b7]	338	}
[a35b458]	339
[b888d5f]	340	return offset;
[74c8da2c]	341	}
	342
[b888d5f]	343	/** Get size of wide string with length limit.
[82bb9c1]	344	*
[b888d5f]	345	* Get the number of bytes which are used by up to @a max_len first
	346	* wide characters in the wide string @a str. If @a max_len is greater than
	347	* the length of @a str, the entire wide string is measured (excluding the
	348	* NULL-terminator).
	349	*
	350	* @param str Wide string to consider.
	351	* @param max_len Maximum number of wide characters to measure.
[82bb9c1]	352	*
[b888d5f]	353	* @return Number of bytes used by the wide characters.
[82bb9c1]	354	*
	355	*/
[08e103d4]	356	size_t wstr_lbytes(const wchar_t *str, size_t max_len)
[82bb9c1]	357	{
[08e103d4]	358	return (wstr_ncode_points(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
[82bb9c1]	359	}
	360
[b888d5f]	361	/** Get number of characters in a string.
[82bb9c1]	362	*
[b888d5f]	363	* @param str NULL-terminated string.
[82bb9c1]	364	*
[b888d5f]	365	* @return Number of characters in string.
[82bb9c1]	366	*
	367	*/
[08e103d4]	368	size_t str_code_points(const char *str)
[82bb9c1]	369	{
[98000fb]	370	size_t len = 0;
[b888d5f]	371	size_t offset = 0;
[a35b458]	372
[b888d5f]	373	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
	374	len++;
[a35b458]	375
[b888d5f]	376	return len;
[82bb9c1]	377	}
	378
[b888d5f]	379	/** Get number of characters in a wide string.
[74c8da2c]	380	*
[b888d5f]	381	* @param str NULL-terminated wide string.
	382	*
	383	* @return Number of characters in @a str.
[74c8da2c]	384	*
	385	*/
[08e103d4]	386	size_t wstr_code_points(const wchar_t *wstr)
[74c8da2c]	387	{
[98000fb]	388	size_t len = 0;
[a35b458]	389
[b888d5f]	390	while (*wstr++ != 0)
	391	len++;
[a35b458]	392
[b888d5f]	393	return len;
[74c8da2c]	394	}
	395
[b888d5f]	396	/** Get number of characters in a string with size limit.
	397	*
	398	* @param str NULL-terminated string.
	399	* @param size Maximum number of bytes to consider.
	400	*
	401	* @return Number of characters in string.
[74c8da2c]	402	*
	403	*/
[08e103d4]	404	size_t str_ncode_points(const char *str, size_t size)
[74c8da2c]	405	{
[98000fb]	406	size_t len = 0;
[b888d5f]	407	size_t offset = 0;
[a35b458]	408
[b888d5f]	409	while (str_decode(str, &offset, size) != 0)
	410	len++;
[a35b458]	411
[b888d5f]	412	return len;
[21a639b7]	413	}
	414
[b888d5f]	415	/** Get number of characters in a string with size limit.
[2f57690]	416	*
[b888d5f]	417	* @param str NULL-terminated string.
	418	* @param size Maximum number of bytes to consider.
[74c8da2c]	419	*
[f25b2819]	420	* @return Number of characters in string.
[b888d5f]	421	*
[74c8da2c]	422	*/
[08e103d4]	423	size_t wstr_ncode_points(const wchar_t *str, size_t size)
[74c8da2c]	424	{
[98000fb]	425	size_t len = 0;
	426	size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
	427	size_t offset = 0;
[a35b458]	428
[b888d5f]	429	while ((offset < limit) && (*str++ != 0)) {
[f25b2819]	430	len++;
[b888d5f]	431	offset += sizeof(wchar_t);
[74c8da2c]	432	}
[a35b458]	433
[f25b2819]	434	return len;
[74c8da2c]	435	}
	436
[b888d5f]	437	/** Check whether character is plain ASCII.
	438	*
	439	* @return True if character is plain ASCII.
[74c8da2c]	440	*
	441	*/
[f2b8cdc]	442	bool ascii_check(wchar_t ch)
[74c8da2c]	443	{
[8e893ae]	444	if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 127))
[b888d5f]	445	return true;
[a35b458]	446
[b888d5f]	447	return false;
	448	}
[f25b2819]	449
[b888d5f]	450	/** Check whether character is valid
	451	*
	452	* @return True if character is a valid Unicode code point.
	453	*
	454	*/
[f2b8cdc]	455	bool chr_check(wchar_t ch)
[b888d5f]	456	{
[8e893ae]	457	if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 1114111))
[b888d5f]	458	return true;
[a35b458]	459
[b888d5f]	460	return false;
[16da5f8e]	461	}
	462
[b888d5f]	463	/** Compare two NULL terminated strings.
[16da5f8e]	464	*
[b888d5f]	465	* Do a char-by-char comparison of two NULL-terminated strings.
[4efeab5]	466	* The strings are considered equal iff their length is equal
	467	* and both strings consist of the same sequence of characters.
	468	*
[1772e6d]	469	* A string S1 is less than another string S2 if it has a character with
	470	* lower value at the first character position where the strings differ.
	471	* If the strings differ in length, the shorter one is treated as if
	472	* padded by characters with a value of zero.
[16da5f8e]	473	*
[b888d5f]	474	* @param s1 First string to compare.
	475	* @param s2 Second string to compare.
[16da5f8e]	476	*
[1772e6d]	477	* @return 0 if the strings are equal, -1 if the first is less than the second,
	478	* 1 if the second is less than the first.
[16da5f8e]	479	*
	480	*/
[b888d5f]	481	int str_cmp(const char s1, const char s2)
[16da5f8e]	482	{
[a7b1071]	483	wchar_t c1 = 0;
	484	wchar_t c2 = 0;
[a35b458]	485
[b888d5f]	486	size_t off1 = 0;
	487	size_t off2 = 0;
[a7b1071]	488
	489	while (true) {
	490	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
	491	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
	492
[b888d5f]	493	if (c1 < c2)
[16da5f8e]	494	return -1;
[a35b458]	495
[b888d5f]	496	if (c1 > c2)
[16da5f8e]	497	return 1;
[a7b1071]	498
	499	if (c1 == 0 \|\| c2 == 0)
[1b20da0]	500	break;
[16da5f8e]	501	}
[a7b1071]	502
	503	return 0;
[16da5f8e]	504	}
	505
[b888d5f]	506	/** Compare two NULL terminated strings with length limit.
[16da5f8e]	507	*
[b888d5f]	508	* Do a char-by-char comparison of two NULL-terminated strings.
[4efeab5]	509	* The strings are considered equal iff
[08e103d4]	510	* min(str_code_points(s1), max_len) == min(str_code_points(s2), max_len)
[4efeab5]	511	* and both strings consist of the same sequence of characters,
	512	* up to max_len characters.
	513	*
[1772e6d]	514	* A string S1 is less than another string S2 if it has a character with
	515	* lower value at the first character position where the strings differ.
	516	* If the strings differ in length, the shorter one is treated as if
	517	* padded by characters with a value of zero. Only the first max_len
	518	* characters are considered.
[16da5f8e]	519	*
[b888d5f]	520	* @param s1 First string to compare.
	521	* @param s2 Second string to compare.
	522	* @param max_len Maximum number of characters to consider.
	523	*
[1772e6d]	524	* @return 0 if the strings are equal, -1 if the first is less than the second,
	525	* 1 if the second is less than the first.
[16da5f8e]	526	*
	527	*/
[98000fb]	528	int str_lcmp(const char s1, const char s2, size_t max_len)
[16da5f8e]	529	{
[b888d5f]	530	wchar_t c1 = 0;
	531	wchar_t c2 = 0;
[a35b458]	532
[b888d5f]	533	size_t off1 = 0;
	534	size_t off2 = 0;
[a35b458]	535
[98000fb]	536	size_t len = 0;
[a7b1071]	537
	538	while (true) {
	539	if (len >= max_len)
[b888d5f]	540	break;
[a7b1071]	541
	542	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
	543	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
	544
[b888d5f]	545	if (c1 < c2)
[16da5f8e]	546	return -1;
[a7b1071]	547
[b888d5f]	548	if (c1 > c2)
[16da5f8e]	549	return 1;
[a7b1071]	550
	551	if (c1 == 0 \|\| c2 == 0)
	552	break;
	553
[1b20da0]	554	++len;
[16da5f8e]	555	}
[a7b1071]	556
	557	return 0;
	558
[16da5f8e]	559	}
	560
[f4b1535]	561	/** Copy string.
[b888d5f]	562	*
[f4b1535]	563	* Copy source string @a src to destination buffer @a dest.
	564	* No more than @a size bytes are written. If the size of the output buffer
	565	* is at least one byte, the output string will always be well-formed, i.e.
	566	* null-terminated and containing only complete characters.
[b888d5f]	567	*
[abf09311]	568	* @param dest Destination buffer.
[6700ee2]	569	* @param count Size of the destination buffer (must be > 0).
[f4b1535]	570	* @param src Source string.
[abf09311]	571	*
[b888d5f]	572	*/
[f4b1535]	573	void str_cpy(char dest, size_t size, const char src)
[b888d5f]	574	{
[6700ee2]	575	/* There must be space for a null terminator in the buffer. */
[63e27ef]	576	assert(size > 0);
	577	assert(src != NULL);
[a35b458]	578
[abf09311]	579	size_t src_off = 0;
	580	size_t dest_off = 0;
[a35b458]	581
[abf09311]	582	wchar_t ch;
[f4b1535]	583	while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
	584	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
	585	break;
	586	}
[a35b458]	587
[f4b1535]	588	dest[dest_off] = '\0';
	589	}
	590
	591	/** Copy size-limited substring.
	592	*
[6700ee2]	593	* Copy prefix of string @a src of max. size @a size to destination buffer
	594	* @a dest. No more than @a size bytes are written. The output string will
	595	* always be well-formed, i.e. null-terminated and containing only complete
	596	* characters.
[f4b1535]	597	*
	598	* No more than @a n bytes are read from the input string, so it does not
	599	* have to be null-terminated.
	600	*
[abf09311]	601	* @param dest Destination buffer.
[6700ee2]	602	* @param count Size of the destination buffer (must be > 0).
[f4b1535]	603	* @param src Source string.
[abf09311]	604	* @param n Maximum number of bytes to read from @a src.
	605	*
[f4b1535]	606	*/
	607	void str_ncpy(char dest, size_t size, const char src, size_t n)
	608	{
[6700ee2]	609	/* There must be space for a null terminator in the buffer. */
[63e27ef]	610	assert(size > 0);
[a35b458]	611
[abf09311]	612	size_t src_off = 0;
	613	size_t dest_off = 0;
[a35b458]	614
[abf09311]	615	wchar_t ch;
[f4b1535]	616	while ((ch = str_decode(src, &src_off, n)) != 0) {
	617	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
[b888d5f]	618	break;
	619	}
[a35b458]	620
[f4b1535]	621	dest[dest_off] = '\0';
[b888d5f]	622	}
[16da5f8e]	623
[0f06dbc]	624	/** Convert wide string to string.
[b888d5f]	625	*
[0f06dbc]	626	* Convert wide string @a src to string. The output is written to the buffer
	627	* specified by @a dest and @a size. @a size must be non-zero and the string
	628	* written will always be well-formed.
[16da5f8e]	629	*
[0f06dbc]	630	* @param dest Destination buffer.
	631	* @param size Size of the destination buffer.
	632	* @param src Source wide string.
[16da5f8e]	633	*/
[0f06dbc]	634	void wstr_to_str(char dest, size_t size, const wchar_t src)
[16da5f8e]	635	{
[b888d5f]	636	wchar_t ch;
[0f06dbc]	637	size_t src_idx;
	638	size_t dest_off;
	639
	640	/* There must be space for a null terminator in the buffer. */
[63e27ef]	641	assert(size > 0);
[0f06dbc]	642
	643	src_idx = 0;
	644	dest_off = 0;
[a35b458]	645
[b888d5f]	646	while ((ch = src[src_idx++]) != 0) {
[0f06dbc]	647	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
[b888d5f]	648	break;
[16da5f8e]	649	}
[0f06dbc]	650
	651	dest[dest_off] = '\0';
[16da5f8e]	652	}
	653
[20f1597]	654	/** Find first occurence of character in string.
	655	*
[b888d5f]	656	* @param str String to search.
	657	* @param ch Character to look for.
	658	*
	659	* @return Pointer to character in @a str or NULL if not found.
[20f1597]	660	*/
[dd2cfa7]	661	char str_chr(const char str, wchar_t ch)
[20f1597]	662	{
[b888d5f]	663	wchar_t acc;
	664	size_t off = 0;
[f2d2c7ba]	665	size_t last = 0;
[a35b458]	666
[a7b1071]	667	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
[b888d5f]	668	if (acc == ch)
[dd2cfa7]	669	return (char *) (str + last);
[f2d2c7ba]	670	last = off;
[20f1597]	671	}
[a35b458]	672
[20f1597]	673	return NULL;
	674	}
	675
[b888d5f]	676	/** Insert a wide character into a wide string.
	677	*
	678	* Insert a wide character into a wide string at position
	679	* @a pos. The characters after the position are shifted.
	680	*
	681	* @param str String to insert to.
	682	* @param ch Character to insert to.
	683	* @param pos Character index where to insert.
[7c3fb9b]	684	* @param max_pos Characters in the buffer.
[b888d5f]	685	*
	686	* @return True if the insertion was sucessful, false if the position
	687	* is out of bounds.
	688	*
	689	*/
[98000fb]	690	bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
[b888d5f]	691	{
[08e103d4]	692	size_t len = wstr_code_points(str);
[a35b458]	693
[b888d5f]	694	if ((pos > len) \|\| (pos + 1 > max_pos))
	695	return false;
[a35b458]	696
[98000fb]	697	size_t i;
[b888d5f]	698	for (i = len; i + 1 > pos; i--)
	699	str[i + 1] = str[i];
[a35b458]	700
[b888d5f]	701	str[pos] = ch;
[a35b458]	702
[b888d5f]	703	return true;
	704	}
	705
	706	/** Remove a wide character from a wide string.
	707	*
	708	* Remove a wide character from a wide string at position
	709	* @a pos. The characters after the position are shifted.
	710	*
	711	* @param str String to remove from.
	712	* @param pos Character index to remove.
	713	*
	714	* @return True if the removal was sucessful, false if the position
	715	* is out of bounds.
	716	*
	717	*/
[98000fb]	718	bool wstr_remove(wchar_t *str, size_t pos)
[b888d5f]	719	{
[08e103d4]	720	size_t len = wstr_code_points(str);
[a35b458]	721
[b888d5f]	722	if (pos >= len)
	723	return false;
[a35b458]	724
[98000fb]	725	size_t i;
[b888d5f]	726	for (i = pos + 1; i <= len; i++)
	727	str[i - 1] = str[i];
[a35b458]	728
[b888d5f]	729	return true;
	730	}
	731
[d066259]	732	/** Duplicate string.
	733	*
	734	* Allocate a new string and copy characters from the source
	735	* string into it. The duplicate string is allocated via sleeping
	736	* malloc(), thus this function can sleep in no memory conditions.
	737	*
	738	* The allocation cannot fail and the return value is always
	739	* a valid pointer. The duplicate string is always a well-formed
	740	* null-terminated UTF-8 string, but it can differ from the source
	741	* string on the byte level.
	742	*
	743	* @param src Source string.
	744	*
	745	* @return Duplicate string.
	746	*
	747	*/
	748	char str_dup(const char src)
	749	{
[08e103d4]	750	size_t size = str_bytes(src) + 1;
[d066259]	751	char *dest = malloc(size);
	752	if (!dest)
	753	return NULL;
	754
	755	str_cpy(dest, size, src);
	756	return dest;
	757	}
	758
	759	/** Duplicate string with size limit.
	760	*
	761	* Allocate a new string and copy up to @max_size bytes from the source
	762	* string into it. The duplicate string is allocated via sleeping
	763	* malloc(), thus this function can sleep in no memory conditions.
	764	* No more than @max_size + 1 bytes is allocated, but if the size
	765	* occupied by the source string is smaller than @max_size + 1,
	766	* less is allocated.
	767	*
	768	* The allocation cannot fail and the return value is always
	769	* a valid pointer. The duplicate string is always a well-formed
	770	* null-terminated UTF-8 string, but it can differ from the source
	771	* string on the byte level.
	772	*
	773	* @param src Source string.
	774	* @param n Maximum number of bytes to duplicate.
	775	*
	776	* @return Duplicate string.
	777	*
	778	*/
	779	char str_ndup(const char src, size_t n)
	780	{
[08e103d4]	781	size_t size = str_bytes(src);
[d066259]	782	if (size > n)
	783	size = n;
	784
	785	char *dest = malloc(size + 1);
	786	if (!dest)
	787	return NULL;
	788
	789	str_ncpy(dest, size + 1, src, size);
	790	return dest;
	791	}
	792
[30a5470]	793	/** Convert string to uint64_t (internal variant).
	794	*
	795	* @param nptr Pointer to string.
	796	* @param endptr Pointer to the first invalid character is stored here.
	797	* @param base Zero or number between 2 and 36 inclusive.
	798	* @param neg Indication of unary minus is stored here.
	799	* @apram result Result of the conversion.
	800	*
	801	* @return EOK if conversion was successful.
	802	*
	803	*/
[b7fd2a0]	804	static errno_t str_uint(const char nptr, char *endptr, unsigned int base,
[30a5470]	805	bool neg, uint64_t result)
	806	{
[63e27ef]	807	assert(endptr != NULL);
	808	assert(neg != NULL);
	809	assert(result != NULL);
[a35b458]	810
[30a5470]	811	*neg = false;
	812	const char *str = nptr;
[a35b458]	813
[30a5470]	814	/* Ignore leading whitespace */
	815	while (isspace(*str))
	816	str++;
[a35b458]	817
[30a5470]	818	if (*str == '-') {
	819	*neg = true;
	820	str++;
	821	} else if (*str == '+')
	822	str++;
[a35b458]	823
[30a5470]	824	if (base == 0) {
	825	/* Decode base if not specified */
	826	base = 10;
[a35b458]	827
[30a5470]	828	if (*str == '0') {
	829	base = 8;
	830	str++;
[a35b458]	831
[30a5470]	832	switch (*str) {
	833	case 'b':
	834	case 'B':
	835	base = 2;
	836	str++;
	837	break;
	838	case 'o':
	839	case 'O':
	840	base = 8;
	841	str++;
	842	break;
	843	case 'd':
	844	case 'D':
	845	case 't':
	846	case 'T':
	847	base = 10;
	848	str++;
	849	break;
	850	case 'x':
	851	case 'X':
	852	base = 16;
	853	str++;
	854	break;
[4ce914d4]	855	default:
	856	str--;
[30a5470]	857	}
	858	}
	859	} else {
	860	/* Check base range */
	861	if ((base < 2) \|\| (base > 36)) {
	862	endptr = (char ) str;
	863	return EINVAL;
	864	}
	865	}
[a35b458]	866
[30a5470]	867	*result = 0;
	868	const char *startstr = str;
[a35b458]	869
[30a5470]	870	while (*str != 0) {
	871	unsigned int digit;
[a35b458]	872
[30a5470]	873	if ((str >= 'a') && (str <= 'z'))
	874	digit = *str - 'a' + 10;
	875	else if ((str >= 'A') && (str <= 'Z'))
	876	digit = *str - 'A' + 10;
	877	else if ((str >= '0') && (str <= '9'))
	878	digit = *str - '0';
	879	else
	880	break;
[a35b458]	881
[30a5470]	882	if (digit >= base)
	883	break;
[a35b458]	884
[30a5470]	885	uint64_t prev = *result;
	886	result = (result) * base + digit;
[a35b458]	887
[30a5470]	888	if (*result < prev) {
	889	/* Overflow */
	890	endptr = (char ) str;
	891	return EOVERFLOW;
	892	}
[a35b458]	893
[30a5470]	894	str++;
	895	}
[a35b458]	896
[30a5470]	897	if (str == startstr) {
	898	/*
	899	* No digits were decoded => first invalid character is
	900	* the first character of the string.
	901	*/
	902	str = nptr;
	903	}
[a35b458]	904
[30a5470]	905	endptr = (char ) str;
[a35b458]	906
[30a5470]	907	if (str == nptr)
	908	return EINVAL;
[a35b458]	909
[30a5470]	910	return EOK;
	911	}
	912
	913	/** Convert string to uint64_t.
	914	*
	915	* @param nptr Pointer to string.
	916	* @param endptr If not NULL, pointer to the first invalid character
	917	* is stored here.
	918	* @param base Zero or number between 2 and 36 inclusive.
	919	* @param strict Do not allow any trailing characters.
[4ce914d4]	920	* @param result Result of the conversion.
[30a5470]	921	*
	922	* @return EOK if conversion was successful.
	923	*
	924	*/
[b7fd2a0]	925	errno_t str_uint64_t(const char nptr, char *endptr, unsigned int base,
[30a5470]	926	bool strict, uint64_t *result)
	927	{
[63e27ef]	928	assert(result != NULL);
[a35b458]	929
[30a5470]	930	bool neg;
	931	char *lendptr;
[b7fd2a0]	932	errno_t ret = str_uint(nptr, &lendptr, base, &neg, result);
[a35b458]	933
[30a5470]	934	if (endptr != NULL)
	935	endptr = (char ) lendptr;
[a35b458]	936
[30a5470]	937	if (ret != EOK)
	938	return ret;
[a35b458]	939
[30a5470]	940	/* Do not allow negative values */
	941	if (neg)
	942	return EINVAL;
[a35b458]	943
[7c3fb9b]	944	/*
	945	* Check whether we are at the end of
	946	* the string in strict mode
	947	*/
[30a5470]	948	if ((strict) && (*lendptr != 0))
	949	return EINVAL;
[a35b458]	950
[30a5470]	951	return EOK;
	952	}
	953
[e535eeb]	954	void order_suffix(const uint64_t val, uint64_t rv, char suffix)
	955	{
[933cadf]	956	if (val > UINT64_C(10000000000000000000)) {
	957	*rv = val / UINT64_C(1000000000000000000);
[e535eeb]	958	*suffix = 'Z';
[933cadf]	959	} else if (val > UINT64_C(1000000000000000000)) {
	960	*rv = val / UINT64_C(1000000000000000);
[e535eeb]	961	*suffix = 'E';
[933cadf]	962	} else if (val > UINT64_C(1000000000000000)) {
	963	*rv = val / UINT64_C(1000000000000);
[e535eeb]	964	*suffix = 'T';
[933cadf]	965	} else if (val > UINT64_C(1000000000000)) {
	966	*rv = val / UINT64_C(1000000000);
[e535eeb]	967	*suffix = 'G';
[933cadf]	968	} else if (val > UINT64_C(1000000000)) {
	969	*rv = val / UINT64_C(1000000);
[e535eeb]	970	*suffix = 'M';
[933cadf]	971	} else if (val > UINT64_C(1000000)) {
	972	*rv = val / UINT64_C(1000);
[e535eeb]	973	*suffix = 'k';
	974	} else {
	975	*rv = val;
	976	*suffix = ' ';
	977	}
	978	}
	979
[933cadf]	980	void bin_order_suffix(const uint64_t val, uint64_t rv, const char *suffix,
	981	bool fixed)
	982	{
	983	if (val > UINT64_C(1152921504606846976)) {
	984	*rv = val / UINT64_C(1125899906842624);
	985	*suffix = "EiB";
	986	} else if (val > UINT64_C(1125899906842624)) {
	987	*rv = val / UINT64_C(1099511627776);
	988	*suffix = "TiB";
	989	} else if (val > UINT64_C(1099511627776)) {
	990	*rv = val / UINT64_C(1073741824);
	991	*suffix = "GiB";
	992	} else if (val > UINT64_C(1073741824)) {
	993	*rv = val / UINT64_C(1048576);
	994	*suffix = "MiB";
	995	} else if (val > UINT64_C(1048576)) {
	996	*rv = val / UINT64_C(1024);
	997	*suffix = "KiB";
	998	} else {
	999	*rv = val;
	1000	if (fixed)
	1001	*suffix = "B ";
	1002	else
	1003	*suffix = "B";
	1004	}
	1005	}
	1006
[16da5f8e]	1007	/** @}
	1008	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/kernel/generic/src/lib/str.c@ 08e103d4

Download in other formats: