Context Navigation

str.c@ 08e103d4

Visit:

Last change on this file since 08e103d4 was 08e103d4, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 6 years ago

Use clearer naming for string length functions

This and the following commit change the names of functions, as well as
their documentation, to use unambiguous terms "bytes" and "code points"
instead of ambiguous terms "size", "length", and "characters".

Property mode set to 100644

File size: 45.0 KB

Rev	Line
[936351c1]	1	/*
[d066259]	2	* Copyright (c) 2001-2004 Jakub Jermar
[df4ed85]	3	* Copyright (c) 2005 Martin Decky
[576845ec]	4	* Copyright (c) 2008 Jiri Svoboda
[22cf42d9]	5	* Copyright (c) 2011 Martin Sucha
[c4bbca8]	6	* Copyright (c) 2011 Oleg Romanenko
[936351c1]	7	* All rights reserved.
	8	*
	9	* Redistribution and use in source and binary forms, with or without
	10	* modification, are permitted provided that the following conditions
	11	* are met:
	12	*
	13	* - Redistributions of source code must retain the above copyright
	14	* notice, this list of conditions and the following disclaimer.
	15	* - Redistributions in binary form must reproduce the above copyright
	16	* notice, this list of conditions and the following disclaimer in the
	17	* documentation and/or other materials provided with the distribution.
	18	* - The name of the author may not be used to endorse or promote products
	19	* derived from this software without specific prior written permission.
	20	*
	21	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	22	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	23	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	24	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	25	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	26	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	27	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	28	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	29	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	30	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	31	*/
	32
[a46da63]	33	/** @addtogroup libc
[b2951e2]	34	* @{
	35	*/
[d066259]	36
	37	/**
	38	* @file
	39	* @brief String functions.
	40	*
	41	* Strings and characters use the Universal Character Set (UCS). The standard
	42	* strings, called just strings are encoded in UTF-8. Wide strings (encoded
	43	* in UTF-32) are supported to a limited degree. A single character is
	44	* represented as wchar_t.@n
	45	*
	46	* Overview of the terminology:@n
	47	*
	48	* Term Meaning
	49	* -------------------- ----------------------------------------------------
	50	* byte 8 bits stored in uint8_t (unsigned 8 bit integer)
	51	*
	52	* character UTF-32 encoded Unicode character, stored in wchar_t
	53	* (signed 32 bit integer), code points 0 .. 1114111
	54	* are valid
	55	*
	56	* ASCII character 7 bit encoded ASCII character, stored in char
	57	* (usually signed 8 bit integer), code points 0 .. 127
	58	* are valid
	59	*
	60	* string UTF-8 encoded NULL-terminated Unicode string, char *
	61	*
	62	* wide string UTF-32 encoded NULL-terminated Unicode string,
	63	* wchar_t *
	64	*
	65	* [wide] string size number of BYTES in a [wide] string (excluding
	66	* the NULL-terminator), size_t
	67	*
	68	* [wide] string length number of CHARACTERS in a [wide] string (excluding
	69	* the NULL-terminator), size_t
	70	*
	71	* [wide] string width number of display cells on a monospace display taken
	72	* by a [wide] string, size_t
	73	*
	74	*
	75	* Overview of string metrics:@n
	76	*
	77	* Metric Abbrev. Type Meaning
	78	* ------ ------ ------ -------------------------------------------------
	79	* size n size_t number of BYTES in a string (excluding the
	80	* NULL-terminator)
	81	*
	82	* length l size_t number of CHARACTERS in a string (excluding the
	83	* null terminator)
	84	*
	85	* width w size_t number of display cells on a monospace display
	86	* taken by a string
	87	*
	88	*
	89	* Function naming prefixes:@n
	90	*
	91	* chr_ operate on characters
	92	* ascii_ operate on ASCII characters
	93	* str_ operate on strings
	94	* wstr_ operate on wide strings
	95	*
	96	* [w]str_[n\|l\|w] operate on a prefix limited by size, length
	97	* or width
	98	*
	99	*
	100	* A specific character inside a [wide] string can be referred to by:@n
	101	*
	102	* pointer (char , wchar_t )
	103	* byte offset (size_t)
	104	* character index (size_t)
	105	*
[b2951e2]	106	*/
	107
[19f857a]	108	#include <str.h>
[d066259]	109
[38d150e]	110	#include <assert.h>
[e64c4b2]	111	#include <ctype.h>
[171f9a1]	112	#include <errno.h>
[d066259]	113	#include <stdbool.h>
	114	#include <stddef.h>
	115	#include <stdint.h>
	116	#include <stdlib.h>
	117
[f2b8cdc]	118	#include <align.h>
[095003a8]	119	#include <mem.h>
[171f9a1]	120
[8e893ae]	121	/** Check the condition if wchar_t is signed */
[002fd5f]	122	#ifdef __WCHAR_UNSIGNED__
[1433ecda]	123	#define WCHAR_SIGNED_CHECK(cond) (true)
[8e893ae]	124	#else
[1433ecda]	125	#define WCHAR_SIGNED_CHECK(cond) (cond)
[8e893ae]	126	#endif
	127
[171f9a1]	128	/** Byte mask consisting of lowest @n bits (out of 8) */
	129	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
	130
	131	/** Byte mask consisting of lowest @n bits (out of 32) */
	132	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
	133
	134	/** Byte mask consisting of highest @n bits (out of 8) */
	135	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
	136
	137	/** Number of data bits in a UTF-8 continuation byte */
	138	#define CONT_BITS 6
	139
	140	/** Decode a single character from a string.
	141	*
	142	* Decode a single character from a string of size @a size. Decoding starts
	143	* at @a offset and this offset is moved to the beginning of the next
	144	* character. In case of decoding error, offset generally advances at least
	145	* by one. However, offset is never moved beyond size.
	146	*
	147	* @param str String (not necessarily NULL-terminated).
	148	* @param offset Byte offset in string where to start decoding.
	149	* @param size Size of the string (in bytes).
	150	*
	151	* @return Value of decoded character, U_SPECIAL on decoding error or
	152	* NULL if attempt to decode beyond @a size.
	153	*
	154	*/
	155	wchar_t str_decode(const char str, size_t offset, size_t size)
	156	{
	157	if (*offset + 1 > size)
	158	return 0;
[a35b458]	159
[171f9a1]	160	/* First byte read from string */
	161	uint8_t b0 = (uint8_t) str[(*offset)++];
[a35b458]	162
[171f9a1]	163	/* Determine code length */
[a35b458]	164
[171f9a1]	165	unsigned int b0_bits; /* Data bits in first byte */
	166	unsigned int cbytes; /* Number of continuation bytes */
[a35b458]	167
[171f9a1]	168	if ((b0 & 0x80) == 0) {
	169	/* 0xxxxxxx (Plain ASCII) */
	170	b0_bits = 7;
	171	cbytes = 0;
	172	} else if ((b0 & 0xe0) == 0xc0) {
	173	/* 110xxxxx 10xxxxxx */
	174	b0_bits = 5;
	175	cbytes = 1;
	176	} else if ((b0 & 0xf0) == 0xe0) {
	177	/* 1110xxxx 10xxxxxx 10xxxxxx */
	178	b0_bits = 4;
	179	cbytes = 2;
	180	} else if ((b0 & 0xf8) == 0xf0) {
	181	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
	182	b0_bits = 3;
	183	cbytes = 3;
	184	} else {
	185	/* 10xxxxxx -- unexpected continuation byte */
	186	return U_SPECIAL;
	187	}
[a35b458]	188
[171f9a1]	189	if (*offset + cbytes > size)
	190	return U_SPECIAL;
[a35b458]	191
[171f9a1]	192	wchar_t ch = b0 & LO_MASK_8(b0_bits);
[a35b458]	193
[171f9a1]	194	/* Decode continuation bytes */
	195	while (cbytes > 0) {
	196	uint8_t b = (uint8_t) str[(*offset)++];
[a35b458]	197
[171f9a1]	198	/* Must be 10xxxxxx */
	199	if ((b & 0xc0) != 0x80)
	200	return U_SPECIAL;
[a35b458]	201
[171f9a1]	202	/* Shift data bits to ch */
	203	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
	204	cbytes--;
	205	}
[a35b458]	206
[171f9a1]	207	return ch;
	208	}
	209
[568693b]	210	/** Decode a single character from a string to the left.
	211	*
	212	* Decode a single character from a string of size @a size. Decoding starts
	213	* at @a offset and this offset is moved to the beginning of the previous
	214	* character. In case of decoding error, offset generally decreases at least
	215	* by one. However, offset is never moved before 0.
	216	*
	217	* @param str String (not necessarily NULL-terminated).
	218	* @param offset Byte offset in string where to start decoding.
	219	* @param size Size of the string (in bytes).
	220	*
	221	* @return Value of decoded character, U_SPECIAL on decoding error or
	222	* NULL if attempt to decode beyond @a start of str.
	223	*
	224	*/
	225	wchar_t str_decode_reverse(const char str, size_t offset, size_t size)
	226	{
	227	if (*offset == 0)
	228	return 0;
[a35b458]	229
[568693b]	230	size_t processed = 0;
	231	/* Continue while continuation bytes found */
	232	while (*offset > 0 && processed < 4) {
	233	uint8_t b = (uint8_t) str[--(*offset)];
[a35b458]	234
[568693b]	235	if (processed == 0 && (b & 0x80) == 0) {
	236	/* 0xxxxxxx (Plain ASCII) */
	237	return b & 0x7f;
[1433ecda]	238	} else if ((b & 0xe0) == 0xc0 \|\| (b & 0xf0) == 0xe0 \|\|
[568693b]	239	(b & 0xf8) == 0xf0) {
	240	/* Start byte */
	241	size_t start_offset = *offset;
	242	return str_decode(str, &start_offset, size);
[1433ecda]	243	} else if ((b & 0xc0) != 0x80) {
[568693b]	244	/* Not a continuation byte */
	245	return U_SPECIAL;
	246	}
	247	processed++;
	248	}
	249	/* Too many continuation bytes */
	250	return U_SPECIAL;
	251	}
	252
[171f9a1]	253	/** Encode a single character to string representation.
	254	*
	255	* Encode a single character to string representation (i.e. UTF-8) and store
	256	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
	257	* is moved to the position where the next character can be written to.
	258	*
	259	* @param ch Input character.
	260	* @param str Output buffer.
	261	* @param offset Byte offset where to start writing.
	262	* @param size Size of the output buffer (in bytes).
	263	*
	264	* @return EOK if the character was encoded successfully, EOVERFLOW if there
[d4a3ee5]	265	* was not enough space in the output buffer or EINVAL if the character
	266	* code was invalid.
[171f9a1]	267	*/
[b7fd2a0]	268	errno_t chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
[171f9a1]	269	{
	270	if (*offset >= size)
	271	return EOVERFLOW;
[a35b458]	272
[171f9a1]	273	if (!chr_check(ch))
	274	return EINVAL;
[a35b458]	275
[7c3fb9b]	276	/*
	277	* Unsigned version of ch (bit operations should only be done
	278	* on unsigned types).
	279	*/
[171f9a1]	280	uint32_t cc = (uint32_t) ch;
[a35b458]	281
[171f9a1]	282	/* Determine how many continuation bytes are needed */
[a35b458]	283
[171f9a1]	284	unsigned int b0_bits; /* Data bits in first byte */
	285	unsigned int cbytes; /* Number of continuation bytes */
[a35b458]	286
[171f9a1]	287	if ((cc & ~LO_MASK_32(7)) == 0) {
	288	b0_bits = 7;
	289	cbytes = 0;
	290	} else if ((cc & ~LO_MASK_32(11)) == 0) {
	291	b0_bits = 5;
	292	cbytes = 1;
	293	} else if ((cc & ~LO_MASK_32(16)) == 0) {
	294	b0_bits = 4;
	295	cbytes = 2;
	296	} else if ((cc & ~LO_MASK_32(21)) == 0) {
	297	b0_bits = 3;
	298	cbytes = 3;
	299	} else {
	300	/* Codes longer than 21 bits are not supported */
	301	return EINVAL;
	302	}
[a35b458]	303
[171f9a1]	304	/* Check for available space in buffer */
	305	if (*offset + cbytes >= size)
	306	return EOVERFLOW;
[a35b458]	307
[171f9a1]	308	/* Encode continuation bytes */
	309	unsigned int i;
	310	for (i = cbytes; i > 0; i--) {
	311	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
	312	cc = cc >> CONT_BITS;
	313	}
[a35b458]	314
[171f9a1]	315	/* Encode first byte */
	316	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
[a35b458]	317
[171f9a1]	318	/* Advance offset */
	319	*offset += cbytes + 1;
[a35b458]	320
[171f9a1]	321	return EOK;
	322	}
	323
[f2b8cdc]	324	/** Get size of string.
	325	*
	326	* Get the number of bytes which are used by the string @a str (excluding the
	327	* NULL-terminator).
	328	*
	329	* @param str String to consider.
	330	*
	331	* @return Number of bytes used by the string
	332	*
	333	*/
[08e103d4]	334	size_t str_bytes(const char *str)
[f2b8cdc]	335	{
	336	size_t size = 0;
[a35b458]	337
[f2b8cdc]	338	while (*str++ != 0)
	339	size++;
[a35b458]	340
[f2b8cdc]	341	return size;
	342	}
	343
	344	/** Get size of wide string.
	345	*
	346	* Get the number of bytes which are used by the wide string @a str (excluding the
	347	* NULL-terminator).
	348	*
	349	* @param str Wide string to consider.
	350	*
	351	* @return Number of bytes used by the wide string
	352	*
	353	*/
[08e103d4]	354	size_t wstr_bytes(const wchar_t *str)
[f2b8cdc]	355	{
[08e103d4]	356	return (wstr_code_points(str) * sizeof(wchar_t));
[f2b8cdc]	357	}
	358
	359	/** Get size of string with length limit.
	360	*
	361	* Get the number of bytes which are used by up to @a max_len first
	362	* characters in the string @a str. If @a max_len is greater than
	363	* the length of @a str, the entire string is measured (excluding the
	364	* NULL-terminator).
	365	*
	366	* @param str String to consider.
	367	* @param max_len Maximum number of characters to measure.
	368	*
	369	* @return Number of bytes used by the characters.
	370	*
	371	*/
[08e103d4]	372	size_t str_lbytes(const char *str, size_t max_len)
[f2b8cdc]	373	{
[d4a3ee5]	374	size_t len = 0;
[f2b8cdc]	375	size_t offset = 0;
[a35b458]	376
[f2b8cdc]	377	while (len < max_len) {
	378	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
	379	break;
[a35b458]	380
[f2b8cdc]	381	len++;
	382	}
[a35b458]	383
[f2b8cdc]	384	return offset;
	385	}
	386
[560d79f]	387	/** Get size of string with size limit.
	388	*
	389	* Get the number of bytes which are used by the string @a str
	390	* (excluding the NULL-terminator), but no more than @max_size bytes.
	391	*
	392	* @param str String to consider.
	393	* @param max_size Maximum number of bytes to measure.
	394	*
	395	* @return Number of bytes used by the string
	396	*
	397	*/
[08e103d4]	398	size_t str_nbytes(const char *str, size_t max_size)
[560d79f]	399	{
	400	size_t size = 0;
[a35b458]	401
[560d79f]	402	while ((*str++ != 0) && (size < max_size))
	403	size++;
[a35b458]	404
[560d79f]	405	return size;
	406	}
	407
	408	/** Get size of wide string with size limit.
	409	*
	410	* Get the number of bytes which are used by the wide string @a str
	411	* (excluding the NULL-terminator), but no more than @max_size bytes.
	412	*
	413	* @param str Wide string to consider.
	414	* @param max_size Maximum number of bytes to measure.
	415	*
	416	* @return Number of bytes used by the wide string
	417	*
	418	*/
[08e103d4]	419	size_t wstr_nbytes(const wchar_t *str, size_t max_size)
[560d79f]	420	{
[08e103d4]	421	return (wstr_ncode_points(str, max_size) * sizeof(wchar_t));
[560d79f]	422	}
	423
[f2b8cdc]	424	/** Get size of wide string with length limit.
	425	*
	426	* Get the number of bytes which are used by up to @a max_len first
	427	* wide characters in the wide string @a str. If @a max_len is greater than
	428	* the length of @a str, the entire wide string is measured (excluding the
	429	* NULL-terminator).
	430	*
	431	* @param str Wide string to consider.
	432	* @param max_len Maximum number of wide characters to measure.
	433	*
	434	* @return Number of bytes used by the wide characters.
	435	*
	436	*/
[08e103d4]	437	size_t wstr_lbytes(const wchar_t *str, size_t max_len)
[f2b8cdc]	438	{
[08e103d4]	439	return (wstr_ncode_points(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
[f2b8cdc]	440	}
	441
	442	/** Get number of characters in a string.
	443	*
	444	* @param str NULL-terminated string.
	445	*
	446	* @return Number of characters in string.
	447	*
	448	*/
[08e103d4]	449	size_t str_code_points(const char *str)
[f2b8cdc]	450	{
[d4a3ee5]	451	size_t len = 0;
[f2b8cdc]	452	size_t offset = 0;
[a35b458]	453
[f2b8cdc]	454	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
	455	len++;
[a35b458]	456
[f2b8cdc]	457	return len;
	458	}
	459
	460	/** Get number of characters in a wide string.
	461	*
	462	* @param str NULL-terminated wide string.
	463	*
	464	* @return Number of characters in @a str.
	465	*
	466	*/
[08e103d4]	467	size_t wstr_code_points(const wchar_t *wstr)
[f2b8cdc]	468	{
[d4a3ee5]	469	size_t len = 0;
[a35b458]	470
[f2b8cdc]	471	while (*wstr++ != 0)
	472	len++;
[a35b458]	473
[f2b8cdc]	474	return len;
	475	}
	476
	477	/** Get number of characters in a string with size limit.
	478	*
	479	* @param str NULL-terminated string.
	480	* @param size Maximum number of bytes to consider.
	481	*
	482	* @return Number of characters in string.
	483	*
	484	*/
[08e103d4]	485	size_t str_ncode_points(const char *str, size_t size)
[f2b8cdc]	486	{
[d4a3ee5]	487	size_t len = 0;
[f2b8cdc]	488	size_t offset = 0;
[a35b458]	489
[f2b8cdc]	490	while (str_decode(str, &offset, size) != 0)
	491	len++;
[a35b458]	492
[f2b8cdc]	493	return len;
	494	}
	495
	496	/** Get number of characters in a string with size limit.
	497	*
	498	* @param str NULL-terminated string.
	499	* @param size Maximum number of bytes to consider.
	500	*
	501	* @return Number of characters in string.
	502	*
	503	*/
[08e103d4]	504	size_t wstr_ncode_points(const wchar_t *str, size_t size)
[f2b8cdc]	505	{
[d4a3ee5]	506	size_t len = 0;
	507	size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
	508	size_t offset = 0;
[a35b458]	509
[f2b8cdc]	510	while ((offset < limit) && (*str++ != 0)) {
	511	len++;
	512	offset += sizeof(wchar_t);
	513	}
[a35b458]	514
[f2b8cdc]	515	return len;
	516	}
	517
[be2a38ad]	518	/** Get character display width on a character cell display.
	519	*
	520	* @param ch Character
	521	* @return Width of character in cells.
	522	*/
	523	size_t chr_width(wchar_t ch)
	524	{
	525	return 1;
	526	}
	527
	528	/** Get string display width on a character cell display.
	529	*
	530	* @param str String
	531	* @return Width of string in cells.
	532	*/
	533	size_t str_width(const char *str)
	534	{
	535	size_t width = 0;
	536	size_t offset = 0;
	537	wchar_t ch;
[a35b458]	538
[be2a38ad]	539	while ((ch = str_decode(str, &offset, STR_NO_LIMIT)) != 0)
	540	width += chr_width(ch);
[a35b458]	541
[be2a38ad]	542	return width;
	543	}
	544
[f2b8cdc]	545	/** Check whether character is plain ASCII.
	546	*
	547	* @return True if character is plain ASCII.
	548	*
	549	*/
	550	bool ascii_check(wchar_t ch)
	551	{
[8e893ae]	552	if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 127))
[f2b8cdc]	553	return true;
[a35b458]	554
[f2b8cdc]	555	return false;
	556	}
	557
[171f9a1]	558	/** Check whether character is valid
	559	*
	560	* @return True if character is a valid Unicode code point.
	561	*
	562	*/
[f2b8cdc]	563	bool chr_check(wchar_t ch)
[171f9a1]	564	{
[8e893ae]	565	if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 1114111))
[171f9a1]	566	return true;
[a35b458]	567
[171f9a1]	568	return false;
	569	}
[936351c1]	570
[f2b8cdc]	571	/** Compare two NULL terminated strings.
	572	*
	573	* Do a char-by-char comparison of two NULL-terminated strings.
[4efeab5]	574	* The strings are considered equal iff their length is equal
	575	* and both strings consist of the same sequence of characters.
	576	*
[1772e6d]	577	* A string S1 is less than another string S2 if it has a character with
	578	* lower value at the first character position where the strings differ.
	579	* If the strings differ in length, the shorter one is treated as if
	580	* padded by characters with a value of zero.
[f2b8cdc]	581	*
	582	* @param s1 First string to compare.
	583	* @param s2 Second string to compare.
	584	*
[1772e6d]	585	* @return 0 if the strings are equal, -1 if the first is less than the second,
	586	* 1 if the second is less than the first.
[f2b8cdc]	587	*
	588	*/
	589	int str_cmp(const char s1, const char s2)
	590	{
	591	wchar_t c1 = 0;
	592	wchar_t c2 = 0;
[8227d63]	593
[f2b8cdc]	594	size_t off1 = 0;
	595	size_t off2 = 0;
	596
	597	while (true) {
	598	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
	599	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
	600
	601	if (c1 < c2)
	602	return -1;
[8227d63]	603
[f2b8cdc]	604	if (c1 > c2)
	605	return 1;
	606
	607	if (c1 == 0 \|\| c2 == 0)
[8227d63]	608	break;
[f2b8cdc]	609	}
	610
	611	return 0;
	612	}
	613
	614	/** Compare two NULL terminated strings with length limit.
	615	*
	616	* Do a char-by-char comparison of two NULL-terminated strings.
[4efeab5]	617	* The strings are considered equal iff
[08e103d4]	618	* min(str_code_points(s1), max_len) == min(str_code_points(s2), max_len)
[4efeab5]	619	* and both strings consist of the same sequence of characters,
	620	* up to max_len characters.
	621	*
[1772e6d]	622	* A string S1 is less than another string S2 if it has a character with
	623	* lower value at the first character position where the strings differ.
	624	* If the strings differ in length, the shorter one is treated as if
	625	* padded by characters with a value of zero. Only the first max_len
	626	* characters are considered.
[f2b8cdc]	627	*
	628	* @param s1 First string to compare.
	629	* @param s2 Second string to compare.
	630	* @param max_len Maximum number of characters to consider.
	631	*
[1772e6d]	632	* @return 0 if the strings are equal, -1 if the first is less than the second,
	633	* 1 if the second is less than the first.
[f2b8cdc]	634	*
	635	*/
[d4a3ee5]	636	int str_lcmp(const char s1, const char s2, size_t max_len)
[f2b8cdc]	637	{
	638	wchar_t c1 = 0;
	639	wchar_t c2 = 0;
[8227d63]	640
[f2b8cdc]	641	size_t off1 = 0;
	642	size_t off2 = 0;
[8227d63]	643
[d4a3ee5]	644	size_t len = 0;
[f2b8cdc]	645
	646	while (true) {
	647	if (len >= max_len)
	648	break;
	649
	650	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
	651	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
	652
[8227d63]	653	if (c1 < c2)
	654	return -1;
	655
	656	if (c1 > c2)
	657	return 1;
	658
	659	if (c1 == 0 \|\| c2 == 0)
	660	break;
	661
	662	++len;
	663	}
	664
	665	return 0;
	666
	667	}
	668
	669	/** Compare two NULL terminated strings in case-insensitive manner.
	670	*
	671	* Do a char-by-char comparison of two NULL-terminated strings.
	672	* The strings are considered equal iff their length is equal
	673	* and both strings consist of the same sequence of characters
	674	* when converted to lower case.
	675	*
	676	* A string S1 is less than another string S2 if it has a character with
	677	* lower value at the first character position where the strings differ.
	678	* If the strings differ in length, the shorter one is treated as if
	679	* padded by characters with a value of zero.
	680	*
	681	* @param s1 First string to compare.
	682	* @param s2 Second string to compare.
	683	*
	684	* @return 0 if the strings are equal, -1 if the first is less than the second,
	685	* 1 if the second is less than the first.
	686	*
	687	*/
	688	int str_casecmp(const char s1, const char s2)
	689	{
	690	wchar_t c1 = 0;
	691	wchar_t c2 = 0;
	692
	693	size_t off1 = 0;
	694	size_t off2 = 0;
	695
	696	while (true) {
	697	c1 = tolower(str_decode(s1, &off1, STR_NO_LIMIT));
	698	c2 = tolower(str_decode(s2, &off2, STR_NO_LIMIT));
	699
	700	if (c1 < c2)
	701	return -1;
	702
	703	if (c1 > c2)
	704	return 1;
	705
	706	if (c1 == 0 \|\| c2 == 0)
	707	break;
	708	}
	709
	710	return 0;
	711	}
	712
	713	/** Compare two NULL terminated strings with length limit in case-insensitive
	714	* manner.
	715	*
	716	* Do a char-by-char comparison of two NULL-terminated strings.
	717	* The strings are considered equal iff
[08e103d4]	718	* min(str_code_points(s1), max_len) == min(str_code_points(s2), max_len)
[8227d63]	719	* and both strings consist of the same sequence of characters,
	720	* up to max_len characters.
	721	*
	722	* A string S1 is less than another string S2 if it has a character with
	723	* lower value at the first character position where the strings differ.
	724	* If the strings differ in length, the shorter one is treated as if
	725	* padded by characters with a value of zero. Only the first max_len
	726	* characters are considered.
	727	*
	728	* @param s1 First string to compare.
	729	* @param s2 Second string to compare.
	730	* @param max_len Maximum number of characters to consider.
	731	*
	732	* @return 0 if the strings are equal, -1 if the first is less than the second,
	733	* 1 if the second is less than the first.
	734	*
	735	*/
	736	int str_lcasecmp(const char s1, const char s2, size_t max_len)
	737	{
	738	wchar_t c1 = 0;
	739	wchar_t c2 = 0;
[a35b458]	740
[8227d63]	741	size_t off1 = 0;
	742	size_t off2 = 0;
[a35b458]	743
[8227d63]	744	size_t len = 0;
	745
	746	while (true) {
	747	if (len >= max_len)
	748	break;
	749
	750	c1 = tolower(str_decode(s1, &off1, STR_NO_LIMIT));
	751	c2 = tolower(str_decode(s2, &off2, STR_NO_LIMIT));
	752
[f2b8cdc]	753	if (c1 < c2)
	754	return -1;
	755
	756	if (c1 > c2)
	757	return 1;
	758
	759	if (c1 == 0 \|\| c2 == 0)
	760	break;
	761
[1b20da0]	762	++len;
[f2b8cdc]	763	}
	764
	765	return 0;
	766
	767	}
	768
[dce39b4]	769	/** Test whether p is a prefix of s.
	770	*
	771	* Do a char-by-char comparison of two NULL-terminated strings
	772	* and determine if p is a prefix of s.
	773	*
	774	* @param s The string in which to look
	775	* @param p The string to check if it is a prefix of s
	776	*
	777	* @return true iff p is prefix of s else false
	778	*
	779	*/
	780	bool str_test_prefix(const char s, const char p)
	781	{
	782	wchar_t c1 = 0;
	783	wchar_t c2 = 0;
[a35b458]	784
[dce39b4]	785	size_t off1 = 0;
	786	size_t off2 = 0;
	787
	788	while (true) {
	789	c1 = str_decode(s, &off1, STR_NO_LIMIT);
	790	c2 = str_decode(p, &off2, STR_NO_LIMIT);
[a35b458]	791
[dce39b4]	792	if (c2 == 0)
	793	return true;
	794
	795	if (c1 != c2)
	796	return false;
[a35b458]	797
[dce39b4]	798	if (c1 == 0)
	799	break;
	800	}
	801
	802	return false;
	803	}
	804
[6eb2e96]	805	/** Copy string.
[f2b8cdc]	806	*
[6eb2e96]	807	* Copy source string @a src to destination buffer @a dest.
	808	* No more than @a size bytes are written. If the size of the output buffer
	809	* is at least one byte, the output string will always be well-formed, i.e.
	810	* null-terminated and containing only complete characters.
[f2b8cdc]	811	*
[abf09311]	812	* @param dest Destination buffer.
[6700ee2]	813	* @param count Size of the destination buffer (must be > 0).
[6eb2e96]	814	* @param src Source string.
[8e893ae]	815	*
[f2b8cdc]	816	*/
[6eb2e96]	817	void str_cpy(char dest, size_t size, const char src)
[f2b8cdc]	818	{
[6700ee2]	819	/* There must be space for a null terminator in the buffer. */
	820	assert(size > 0);
[d066259]	821	assert(src != NULL);
[a35b458]	822
[abf09311]	823	size_t src_off = 0;
	824	size_t dest_off = 0;
[a35b458]	825
[abf09311]	826	wchar_t ch;
[6eb2e96]	827	while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
	828	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
	829	break;
	830	}
[a35b458]	831
[6eb2e96]	832	dest[dest_off] = '\0';
	833	}
	834
	835	/** Copy size-limited substring.
	836	*
[6700ee2]	837	* Copy prefix of string @a src of max. size @a size to destination buffer
	838	* @a dest. No more than @a size bytes are written. The output string will
	839	* always be well-formed, i.e. null-terminated and containing only complete
	840	* characters.
[6eb2e96]	841	*
	842	* No more than @a n bytes are read from the input string, so it does not
	843	* have to be null-terminated.
	844	*
[abf09311]	845	* @param dest Destination buffer.
[6700ee2]	846	* @param count Size of the destination buffer (must be > 0).
[6eb2e96]	847	* @param src Source string.
[abf09311]	848	* @param n Maximum number of bytes to read from @a src.
[8e893ae]	849	*
[6eb2e96]	850	*/
	851	void str_ncpy(char dest, size_t size, const char src, size_t n)
	852	{
[6700ee2]	853	/* There must be space for a null terminator in the buffer. */
	854	assert(size > 0);
[a35b458]	855
[abf09311]	856	size_t src_off = 0;
	857	size_t dest_off = 0;
[a35b458]	858
[abf09311]	859	wchar_t ch;
[6eb2e96]	860	while ((ch = str_decode(src, &src_off, n)) != 0) {
	861	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
[f2b8cdc]	862	break;
	863	}
[a35b458]	864
[6eb2e96]	865	dest[dest_off] = '\0';
[f2b8cdc]	866	}
	867
[4482bc7]	868	/** Append one string to another.
	869	*
	870	* Append source string @a src to string in destination buffer @a dest.
	871	* Size of the destination buffer is @a dest. If the size of the output buffer
	872	* is at least one byte, the output string will always be well-formed, i.e.
	873	* null-terminated and containing only complete characters.
	874	*
[0f06dbc]	875	* @param dest Destination buffer.
[4482bc7]	876	* @param count Size of the destination buffer.
	877	* @param src Source string.
	878	*/
	879	void str_append(char dest, size_t size, const char src)
	880	{
[08e103d4]	881	size_t dstr_bytes;
[4482bc7]	882
[08e103d4]	883	dstr_bytes = str_bytes(dest);
	884	if (dstr_bytes >= size)
[a8bc7f8]	885	return;
[a35b458]	886
[08e103d4]	887	str_cpy(dest + dstr_bytes, size - dstr_bytes, src);
[4482bc7]	888	}
	889
[dcb74c0a]	890	/** Convert space-padded ASCII to string.
	891	*
	892	* Common legacy text encoding in hardware is 7-bit ASCII fitted into
[c3d19ac]	893	* a fixed-width byte buffer (bit 7 always zero), right-padded with spaces
[dcb74c0a]	894	* (ASCII 0x20). Convert space-padded ascii to string representation.
	895	*
	896	* If the text does not fit into the destination buffer, the function converts
	897	* as many characters as possible and returns EOVERFLOW.
	898	*
	899	* If the text contains non-ASCII bytes (with bit 7 set), the whole string is
	900	* converted anyway and invalid characters are replaced with question marks
	901	* (U_SPECIAL) and the function returns EIO.
	902	*
	903	* Regardless of return value upon return @a dest will always be well-formed.
	904	*
	905	* @param dest Destination buffer
	906	* @param size Size of destination buffer
	907	* @param src Space-padded ASCII.
	908	* @param n Size of the source buffer in bytes.
	909	*
	910	* @return EOK on success, EOVERFLOW if the text does not fit
	911	* destination buffer, EIO if the text contains
	912	* non-ASCII bytes.
	913	*/
[b7fd2a0]	914	errno_t spascii_to_str(char dest, size_t size, const uint8_t src, size_t n)
[dcb74c0a]	915	{
	916	size_t sidx;
	917	size_t didx;
	918	size_t dlast;
	919	uint8_t byte;
[b7fd2a0]	920	errno_t rc;
	921	errno_t result;
[dcb74c0a]	922
	923	/* There must be space for a null terminator in the buffer. */
	924	assert(size > 0);
	925	result = EOK;
	926
	927	didx = 0;
	928	dlast = 0;
	929	for (sidx = 0; sidx < n; ++sidx) {
	930	byte = src[sidx];
	931	if (!ascii_check(byte)) {
	932	byte = U_SPECIAL;
	933	result = EIO;
	934	}
	935
	936	rc = chr_encode(byte, dest, &didx, size - 1);
	937	if (rc != EOK) {
	938	assert(rc == EOVERFLOW);
	939	dest[didx] = '\0';
	940	return rc;
	941	}
	942
	943	/* Remember dest index after last non-empty character */
	944	if (byte != 0x20)
	945	dlast = didx;
	946	}
	947
	948	/* Terminate string after last non-empty character */
	949	dest[dlast] = '\0';
	950	return result;
	951	}
	952
[0f06dbc]	953	/** Convert wide string to string.
[f2b8cdc]	954	*
[0f06dbc]	955	* Convert wide string @a src to string. The output is written to the buffer
	956	* specified by @a dest and @a size. @a size must be non-zero and the string
	957	* written will always be well-formed.
[f2b8cdc]	958	*
[0f06dbc]	959	* @param dest Destination buffer.
	960	* @param size Size of the destination buffer.
	961	* @param src Source wide string.
[f2b8cdc]	962	*/
[81e9cb3]	963	void wstr_to_str(char dest, size_t size, const wchar_t src)
[f2b8cdc]	964	{
	965	wchar_t ch;
[0f06dbc]	966	size_t src_idx;
	967	size_t dest_off;
	968
	969	/* There must be space for a null terminator in the buffer. */
	970	assert(size > 0);
[a35b458]	971
[0f06dbc]	972	src_idx = 0;
	973	dest_off = 0;
	974
[f2b8cdc]	975	while ((ch = src[src_idx++]) != 0) {
[81e9cb3]	976	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
[f2b8cdc]	977	break;
	978	}
[0f06dbc]	979
	980	dest[dest_off] = '\0';
[f2b8cdc]	981	}
	982
[82374b2]	983	/** Convert UTF16 string to string.
	984	*
	985	* Convert utf16 string @a src to string. The output is written to the buffer
	986	* specified by @a dest and @a size. @a size must be non-zero and the string
	987	* written will always be well-formed. Surrogate pairs also supported.
	988	*
	989	* @param dest Destination buffer.
	990	* @param size Size of the destination buffer.
	991	* @param src Source utf16 string.
	992	*
[cde999a]	993	* @return EOK, if success, an error code otherwise.
[82374b2]	994	*/
[b7fd2a0]	995	errno_t utf16_to_str(char dest, size_t size, const uint16_t src)
[82374b2]	996	{
[abb7491c]	997	size_t idx = 0, dest_off = 0;
[82374b2]	998	wchar_t ch;
[b7fd2a0]	999	errno_t rc = EOK;
[82374b2]	1000
	1001	/* There must be space for a null terminator in the buffer. */
	1002	assert(size > 0);
	1003
	1004	while (src[idx]) {
	1005	if ((src[idx] & 0xfc00) == 0xd800) {
[abb7491c]	1006	if (src[idx + 1] && (src[idx + 1] & 0xfc00) == 0xdc00) {
[82374b2]	1007	ch = 0x10000;
	1008	ch += (src[idx] & 0x03FF) << 10;
[abb7491c]	1009	ch += (src[idx + 1] & 0x03FF);
[82374b2]	1010	idx += 2;
[1433ecda]	1011	} else
[82374b2]	1012	break;
	1013	} else {
	1014	ch = src[idx];
	1015	idx++;
	1016	}
[abb7491c]	1017	rc = chr_encode(ch, dest, &dest_off, size - 1);
[82374b2]	1018	if (rc != EOK)
	1019	break;
	1020	}
	1021	dest[dest_off] = '\0';
	1022	return rc;
	1023	}
	1024
[b06414f]	1025	/** Convert string to UTF16 string.
	1026	*
	1027	* Convert string @a src to utf16 string. The output is written to the buffer
	1028	* specified by @a dest and @a dlen. @a dlen must be non-zero and the string
	1029	* written will always be well-formed. Surrogate pairs also supported.
	1030	*
	1031	* @param dest Destination buffer.
	1032	* @param dlen Number of utf16 characters that fit in the destination buffer.
	1033	* @param src Source string.
	1034	*
[cde999a]	1035	* @return EOK, if success, an error code otherwise.
[b06414f]	1036	*/
[b7fd2a0]	1037	errno_t str_to_utf16(uint16_t dest, size_t dlen, const char src)
[fc97128]	1038	{
[b7fd2a0]	1039	errno_t rc = EOK;
[abb7491c]	1040	size_t offset = 0;
	1041	size_t idx = 0;
[fc97128]	1042	wchar_t c;
	1043
[b06414f]	1044	assert(dlen > 0);
[a35b458]	1045
[fc97128]	1046	while ((c = str_decode(src, &offset, STR_NO_LIMIT)) != 0) {
	1047	if (c > 0x10000) {
[b06414f]	1048	if (idx + 2 >= dlen - 1) {
[abb7491c]	1049	rc = EOVERFLOW;
[fc97128]	1050	break;
	1051	}
	1052	c = (c - 0x10000);
	1053	dest[idx] = 0xD800 \| (c >> 10);
[abb7491c]	1054	dest[idx + 1] = 0xDC00 \| (c & 0x3FF);
[fc97128]	1055	idx++;
	1056	} else {
[1433ecda]	1057	dest[idx] = c;
[fc97128]	1058	}
	1059
	1060	idx++;
[b06414f]	1061	if (idx >= dlen - 1) {
[abb7491c]	1062	rc = EOVERFLOW;
[fc97128]	1063	break;
	1064	}
	1065	}
	1066
	1067	dest[idx] = '\0';
	1068	return rc;
[f2b8cdc]	1069	}
	1070
[b2906c0]	1071	/** Get size of UTF-16 string.
	1072	*
	1073	* Get the number of words which are used by the UTF-16 string @a ustr
	1074	* (excluding the NULL-terminator).
	1075	*
	1076	* @param ustr UTF-16 string to consider.
	1077	*
	1078	* @return Number of words used by the UTF-16 string
	1079	*
	1080	*/
	1081	size_t utf16_wsize(const uint16_t *ustr)
	1082	{
	1083	size_t wsize = 0;
	1084
	1085	while (*ustr++ != 0)
	1086	wsize++;
	1087
	1088	return wsize;
	1089	}
	1090
[b67c7d64]	1091	/** Convert wide string to new string.
	1092	*
	1093	* Convert wide string @a src to string. Space for the new string is allocated
	1094	* on the heap.
	1095	*
	1096	* @param src Source wide string.
	1097	* @return New string.
	1098	*/
	1099	char wstr_to_astr(const wchar_t src)
	1100	{
	1101	char dbuf[STR_BOUNDS(1)];
	1102	char *str;
	1103	wchar_t ch;
	1104
	1105	size_t src_idx;
	1106	size_t dest_off;
	1107	size_t dest_size;
	1108
	1109	/* Compute size of encoded string. */
	1110
	1111	src_idx = 0;
	1112	dest_size = 0;
	1113
	1114	while ((ch = src[src_idx++]) != 0) {
	1115	dest_off = 0;
	1116	if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
	1117	break;
	1118	dest_size += dest_off;
	1119	}
	1120
	1121	str = malloc(dest_size + 1);
	1122	if (str == NULL)
	1123	return NULL;
	1124
	1125	/* Encode string. */
	1126
	1127	src_idx = 0;
	1128	dest_off = 0;
	1129
	1130	while ((ch = src[src_idx++]) != 0) {
	1131	if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
	1132	break;
	1133	}
	1134
	1135	str[dest_size] = '\0';
	1136	return str;
	1137	}
	1138
[da2bd08]	1139	/** Convert string to wide string.
	1140	*
	1141	* Convert string @a src to wide string. The output is written to the
[0f06dbc]	1142	* buffer specified by @a dest and @a dlen. @a dlen must be non-zero
	1143	* and the wide string written will always be null-terminated.
[da2bd08]	1144	*
	1145	* @param dest Destination buffer.
	1146	* @param dlen Length of destination buffer (number of wchars).
	1147	* @param src Source string.
	1148	*/
[81e9cb3]	1149	void str_to_wstr(wchar_t dest, size_t dlen, const char src)
[da2bd08]	1150	{
	1151	size_t offset;
	1152	size_t di;
	1153	wchar_t c;
	1154
	1155	assert(dlen > 0);
	1156
	1157	offset = 0;
	1158	di = 0;
	1159
	1160	do {
[81e9cb3]	1161	if (di >= dlen - 1)
[da2bd08]	1162	break;
	1163
	1164	c = str_decode(src, &offset, STR_NO_LIMIT);
	1165	dest[di++] = c;
	1166	} while (c != '\0');
	1167
	1168	dest[dlen - 1] = '\0';
	1169	}
	1170
[22cf42d9]	1171	/** Convert string to wide string.
	1172	*
	1173	* Convert string @a src to wide string. A new wide NULL-terminated
	1174	* string will be allocated on the heap.
	1175	*
	1176	* @param src Source string.
	1177	*/
	1178	wchar_t str_to_awstr(const char str)
	1179	{
[08e103d4]	1180	size_t len = str_code_points(str);
[a35b458]	1181
[1433ecda]	1182	wchar_t *wstr = calloc(len + 1, sizeof(wchar_t));
[b48d046]	1183	if (wstr == NULL)
	1184	return NULL;
[a35b458]	1185
[b48d046]	1186	str_to_wstr(wstr, len + 1, str);
[22cf42d9]	1187	return wstr;
	1188	}
	1189
[f2b8cdc]	1190	/** Find first occurence of character in string.
	1191	*
	1192	* @param str String to search.
	1193	* @param ch Character to look for.
	1194	*
	1195	* @return Pointer to character in @a str or NULL if not found.
	1196	*/
[dd2cfa7]	1197	char str_chr(const char str, wchar_t ch)
[f2b8cdc]	1198	{
	1199	wchar_t acc;
	1200	size_t off = 0;
[f2d2c7ba]	1201	size_t last = 0;
[a35b458]	1202
[f2b8cdc]	1203	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
	1204	if (acc == ch)
[dd2cfa7]	1205	return (char *) (str + last);
[f2d2c7ba]	1206	last = off;
[f2b8cdc]	1207	}
[a35b458]	1208
[f2b8cdc]	1209	return NULL;
	1210	}
	1211
[da680b4b]	1212	/** Find first occurence of substring in string.
	1213	*
	1214	* @param hs Haystack (string)
	1215	* @param n Needle (substring to look for)
	1216	*
	1217	* @return Pointer to character in @a hs or @c NULL if not found.
	1218	*/
	1219	char str_str(const char hs, const char *n)
	1220	{
	1221	size_t off = 0;
	1222
[08e103d4]	1223	if (str_lcmp(hs, n, str_code_points(n)) == 0)
[da680b4b]	1224	return (char *)hs;
	1225
	1226	while (str_decode(hs, &off, STR_NO_LIMIT) != 0) {
[08e103d4]	1227	if (str_lcmp(hs + off, n, str_code_points(n)) == 0)
[da680b4b]	1228	return (char *)(hs + off);
	1229	}
	1230
	1231	return NULL;
	1232	}
	1233
[1737bfb]	1234	/** Removes specified trailing characters from a string.
	1235	*
	1236	* @param str String to remove from.
	1237	* @param ch Character to remove.
	1238	*/
	1239	void str_rtrim(char *str, wchar_t ch)
	1240	{
	1241	size_t off = 0;
	1242	size_t pos = 0;
	1243	wchar_t c;
	1244	bool update_last_chunk = true;
	1245	char *last_chunk = NULL;
	1246
	1247	while ((c = str_decode(str, &off, STR_NO_LIMIT))) {
	1248	if (c != ch) {
	1249	update_last_chunk = true;
	1250	last_chunk = NULL;
	1251	} else if (update_last_chunk) {
	1252	update_last_chunk = false;
	1253	last_chunk = (str + pos);
	1254	}
	1255	pos = off;
	1256	}
	1257
	1258	if (last_chunk)
	1259	*last_chunk = '\0';
	1260	}
	1261
	1262	/** Removes specified leading characters from a string.
	1263	*
	1264	* @param str String to remove from.
	1265	* @param ch Character to remove.
	1266	*/
	1267	void str_ltrim(char *str, wchar_t ch)
	1268	{
	1269	wchar_t acc;
	1270	size_t off = 0;
	1271	size_t pos = 0;
[08e103d4]	1272	size_t str_sz = str_bytes(str);
[1737bfb]	1273
	1274	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
	1275	if (acc != ch)
	1276	break;
	1277	else
	1278	pos = off;
	1279	}
	1280
	1281	if (pos > 0) {
	1282	memmove(str, &str[pos], str_sz - pos);
	1283	pos = str_sz - pos;
[a18a8b9]	1284	str[pos] = '\0';
[1737bfb]	1285	}
	1286	}
	1287
[7afb4a5]	1288	/** Find last occurence of character in string.
	1289	*
	1290	* @param str String to search.
	1291	* @param ch Character to look for.
	1292	*
	1293	* @return Pointer to character in @a str or NULL if not found.
	1294	*/
[dd2cfa7]	1295	char str_rchr(const char str, wchar_t ch)
[7afb4a5]	1296	{
	1297	wchar_t acc;
	1298	size_t off = 0;
[f2d2c7ba]	1299	size_t last = 0;
[d4a3ee5]	1300	const char *res = NULL;
[a35b458]	1301
[7afb4a5]	1302	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
	1303	if (acc == ch)
[f2d2c7ba]	1304	res = (str + last);
	1305	last = off;
[7afb4a5]	1306	}
[a35b458]	1307
[dd2cfa7]	1308	return (char *) res;
[7afb4a5]	1309	}
	1310
[f2b8cdc]	1311	/** Insert a wide character into a wide string.
	1312	*
	1313	* Insert a wide character into a wide string at position
	1314	* @a pos. The characters after the position are shifted.
	1315	*
	1316	* @param str String to insert to.
	1317	* @param ch Character to insert to.
	1318	* @param pos Character index where to insert.
[7c3fb9b]	1319	* @param max_pos Characters in the buffer.
[f2b8cdc]	1320	*
	1321	* @return True if the insertion was sucessful, false if the position
	1322	* is out of bounds.
	1323	*
	1324	*/
[d4a3ee5]	1325	bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
[f2b8cdc]	1326	{
[08e103d4]	1327	size_t len = wstr_code_points(str);
[a35b458]	1328
[f2b8cdc]	1329	if ((pos > len) \|\| (pos + 1 > max_pos))
	1330	return false;
[a35b458]	1331
[d4a3ee5]	1332	size_t i;
[f2b8cdc]	1333	for (i = len; i + 1 > pos; i--)
	1334	str[i + 1] = str[i];
[a35b458]	1335
[f2b8cdc]	1336	str[pos] = ch;
[a35b458]	1337
[f2b8cdc]	1338	return true;
	1339	}
	1340
	1341	/** Remove a wide character from a wide string.
	1342	*
	1343	* Remove a wide character from a wide string at position
	1344	* @a pos. The characters after the position are shifted.
	1345	*
	1346	* @param str String to remove from.
	1347	* @param pos Character index to remove.
	1348	*
	1349	* @return True if the removal was sucessful, false if the position
	1350	* is out of bounds.
	1351	*
	1352	*/
[d4a3ee5]	1353	bool wstr_remove(wchar_t *str, size_t pos)
[f2b8cdc]	1354	{
[08e103d4]	1355	size_t len = wstr_code_points(str);
[a35b458]	1356
[f2b8cdc]	1357	if (pos >= len)
	1358	return false;
[a35b458]	1359
[d4a3ee5]	1360	size_t i;
[f2b8cdc]	1361	for (i = pos + 1; i <= len; i++)
	1362	str[i - 1] = str[i];
[a35b458]	1363
[f2b8cdc]	1364	return true;
	1365	}
	1366
[abf09311]	1367	/** Duplicate string.
	1368	*
	1369	* Allocate a new string and copy characters from the source
	1370	* string into it. The duplicate string is allocated via sleeping
	1371	* malloc(), thus this function can sleep in no memory conditions.
	1372	*
	1373	* The allocation cannot fail and the return value is always
	1374	* a valid pointer. The duplicate string is always a well-formed
	1375	* null-terminated UTF-8 string, but it can differ from the source
	1376	* string on the byte level.
	1377	*
	1378	* @param src Source string.
	1379	*
	1380	* @return Duplicate string.
	1381	*
	1382	*/
[fc6dd18]	1383	char str_dup(const char src)
	1384	{
[08e103d4]	1385	size_t size = str_bytes(src) + 1;
[d066259]	1386	char *dest = malloc(size);
	1387	if (!dest)
	1388	return NULL;
[a35b458]	1389
[abf09311]	1390	str_cpy(dest, size, src);
	1391	return dest;
[fc6dd18]	1392	}
	1393
[abf09311]	1394	/** Duplicate string with size limit.
	1395	*
	1396	* Allocate a new string and copy up to @max_size bytes from the source
	1397	* string into it. The duplicate string is allocated via sleeping
	1398	* malloc(), thus this function can sleep in no memory conditions.
	1399	* No more than @max_size + 1 bytes is allocated, but if the size
	1400	* occupied by the source string is smaller than @max_size + 1,
	1401	* less is allocated.
	1402	*
	1403	* The allocation cannot fail and the return value is always
	1404	* a valid pointer. The duplicate string is always a well-formed
	1405	* null-terminated UTF-8 string, but it can differ from the source
	1406	* string on the byte level.
	1407	*
	1408	* @param src Source string.
	1409	* @param n Maximum number of bytes to duplicate.
	1410	*
	1411	* @return Duplicate string.
	1412	*
	1413	*/
	1414	char str_ndup(const char src, size_t n)
[fc6dd18]	1415	{
[08e103d4]	1416	size_t size = str_bytes(src);
[abf09311]	1417	if (size > n)
	1418	size = n;
[a35b458]	1419
[d066259]	1420	char *dest = malloc(size + 1);
	1421	if (!dest)
	1422	return NULL;
[a35b458]	1423
[abf09311]	1424	str_ncpy(dest, size + 1, src, size);
[fc6dd18]	1425	return dest;
	1426	}
	1427
[ee3f6f6]	1428	/** Split string by delimiters.
	1429	*
	1430	* @param s String to be tokenized. May not be NULL.
	1431	* @param delim String with the delimiters.
	1432	* @param next Variable which will receive the pointer to the
	1433	* continuation of the string following the first
	1434	* occurrence of any of the delimiter characters.
	1435	* May be NULL.
	1436	* @return Pointer to the prefix of @a s before the first
	1437	* delimiter character. NULL if no such prefix
	1438	* exists.
	1439	*/
	1440	char str_tok(char s, const char delim, char *next)
[576845ec]	1441	{
	1442	char start, end;
[69df837f]	1443
[ee3f6f6]	1444	if (!s)
	1445	return NULL;
[a35b458]	1446
[08e103d4]	1447	size_t len = str_bytes(s);
[ee3f6f6]	1448	size_t cur;
	1449	size_t tmp;
	1450	wchar_t ch;
[69df837f]	1451
[576845ec]	1452	/* Skip over leading delimiters. */
[948222e4]	1453	tmp = 0;
	1454	cur = 0;
	1455	while ((ch = str_decode(s, &tmp, len)) && str_chr(delim, ch))
[ee3f6f6]	1456	cur = tmp;
	1457	start = &s[cur];
[69df837f]	1458
[576845ec]	1459	/* Skip over token characters. */
[948222e4]	1460	tmp = cur;
	1461	while ((ch = str_decode(s, &tmp, len)) && !str_chr(delim, ch))
[ee3f6f6]	1462	cur = tmp;
	1463	end = &s[cur];
	1464	if (next)
	1465	*next = (ch ? &s[tmp] : &s[cur]);
	1466
	1467	if (start == end)
[576845ec]	1468	return NULL; /* No more tokens. */
[69df837f]	1469
[576845ec]	1470	/* Overwrite delimiter with NULL terminator. */
	1471	*end = '\0';
	1472	return start;
[69df837f]	1473	}
	1474
[d47279b]	1475	/** Convert string to uint64_t (internal variant).
	1476	*
	1477	* @param nptr Pointer to string.
	1478	* @param endptr Pointer to the first invalid character is stored here.
	1479	* @param base Zero or number between 2 and 36 inclusive.
	1480	* @param neg Indication of unary minus is stored here.
	1481	* @apram result Result of the conversion.
	1482	*
	1483	* @return EOK if conversion was successful.
	1484	*
	1485	*/
[b7fd2a0]	1486	static errno_t str_uint(const char nptr, char *endptr, unsigned int base,
[d47279b]	1487	bool neg, uint64_t result)
	1488	{
	1489	assert(endptr != NULL);
	1490	assert(neg != NULL);
	1491	assert(result != NULL);
[a35b458]	1492
[d47279b]	1493	*neg = false;
	1494	const char *str = nptr;
[a35b458]	1495
[d47279b]	1496	/* Ignore leading whitespace */
	1497	while (isspace(*str))
	1498	str++;
[a35b458]	1499
[d47279b]	1500	if (*str == '-') {
	1501	*neg = true;
	1502	str++;
	1503	} else if (*str == '+')
	1504	str++;
[a35b458]	1505
[d47279b]	1506	if (base == 0) {
	1507	/* Decode base if not specified */
	1508	base = 10;
[a35b458]	1509
[d47279b]	1510	if (*str == '0') {
	1511	base = 8;
	1512	str++;
[a35b458]	1513
[d47279b]	1514	switch (*str) {
	1515	case 'b':
	1516	case 'B':
	1517	base = 2;
	1518	str++;
	1519	break;
	1520	case 'o':
	1521	case 'O':
	1522	base = 8;
	1523	str++;
	1524	break;
	1525	case 'd':
	1526	case 'D':
	1527	case 't':
	1528	case 'T':
	1529	base = 10;
	1530	str++;
	1531	break;
	1532	case 'x':
	1533	case 'X':
	1534	base = 16;
	1535	str++;
	1536	break;
	1537	default:
	1538	str--;
	1539	}
	1540	}
	1541	} else {
	1542	/* Check base range */
	1543	if ((base < 2) \|\| (base > 36)) {
	1544	endptr = (char ) str;
	1545	return EINVAL;
	1546	}
	1547	}
[a35b458]	1548
[d47279b]	1549	*result = 0;
	1550	const char *startstr = str;
[a35b458]	1551
[d47279b]	1552	while (*str != 0) {
	1553	unsigned int digit;
[a35b458]	1554
[d47279b]	1555	if ((str >= 'a') && (str <= 'z'))
	1556	digit = *str - 'a' + 10;
	1557	else if ((str >= 'A') && (str <= 'Z'))
	1558	digit = *str - 'A' + 10;
	1559	else if ((str >= '0') && (str <= '9'))
	1560	digit = *str - '0';
	1561	else
	1562	break;
[a35b458]	1563
[d47279b]	1564	if (digit >= base)
	1565	break;
[a35b458]	1566
[d47279b]	1567	uint64_t prev = *result;
	1568	result = (result) * base + digit;
[a35b458]	1569
[d47279b]	1570	if (*result < prev) {
	1571	/* Overflow */
	1572	endptr = (char ) str;
	1573	return EOVERFLOW;
	1574	}
[a35b458]	1575
[d47279b]	1576	str++;
	1577	}
[a35b458]	1578
[d47279b]	1579	if (str == startstr) {
	1580	/*
	1581	* No digits were decoded => first invalid character is
	1582	* the first character of the string.
	1583	*/
	1584	str = nptr;
	1585	}
[a35b458]	1586
[d47279b]	1587	endptr = (char ) str;
[a35b458]	1588
[d47279b]	1589	if (str == nptr)
	1590	return EINVAL;
[a35b458]	1591
[d47279b]	1592	return EOK;
	1593	}
	1594
[d7f6248]	1595	/** Convert string to uint8_t.
	1596	*
	1597	* @param nptr Pointer to string.
	1598	* @param endptr If not NULL, pointer to the first invalid character
	1599	* is stored here.
	1600	* @param base Zero or number between 2 and 36 inclusive.
	1601	* @param strict Do not allow any trailing characters.
	1602	* @param result Result of the conversion.
	1603	*
	1604	* @return EOK if conversion was successful.
	1605	*
	1606	*/
[b7fd2a0]	1607	errno_t str_uint8_t(const char nptr, const char *endptr, unsigned int base,
[d7f6248]	1608	bool strict, uint8_t *result)
	1609	{
	1610	assert(result != NULL);
[a35b458]	1611
[d7f6248]	1612	bool neg;
	1613	char *lendptr;
	1614	uint64_t res;
[b7fd2a0]	1615	errno_t ret = str_uint(nptr, &lendptr, base, &neg, &res);
[a35b458]	1616
[d7f6248]	1617	if (endptr != NULL)
	1618	endptr = (char ) lendptr;
[a35b458]	1619
[d7f6248]	1620	if (ret != EOK)
	1621	return ret;
[a35b458]	1622
[d7f6248]	1623	/* Do not allow negative values */
	1624	if (neg)
	1625	return EINVAL;
[a35b458]	1626
[7c3fb9b]	1627	/*
	1628	* Check whether we are at the end of
	1629	* the string in strict mode
	1630	*/
[d7f6248]	1631	if ((strict) && (*lendptr != 0))
	1632	return EINVAL;
[a35b458]	1633
[d7f6248]	1634	/* Check for overflow */
	1635	uint8_t _res = (uint8_t) res;
	1636	if (_res != res)
	1637	return EOVERFLOW;
[a35b458]	1638
[d7f6248]	1639	*result = _res;
[a35b458]	1640
[d7f6248]	1641	return EOK;
	1642	}
	1643
	1644	/** Convert string to uint16_t.
	1645	*
	1646	* @param nptr Pointer to string.
	1647	* @param endptr If not NULL, pointer to the first invalid character
	1648	* is stored here.
	1649	* @param base Zero or number between 2 and 36 inclusive.
	1650	* @param strict Do not allow any trailing characters.
	1651	* @param result Result of the conversion.
	1652	*
	1653	* @return EOK if conversion was successful.
	1654	*
	1655	*/
[b7fd2a0]	1656	errno_t str_uint16_t(const char nptr, const char *endptr, unsigned int base,
[d7f6248]	1657	bool strict, uint16_t *result)
	1658	{
	1659	assert(result != NULL);
[a35b458]	1660
[d7f6248]	1661	bool neg;
	1662	char *lendptr;
	1663	uint64_t res;
[b7fd2a0]	1664	errno_t ret = str_uint(nptr, &lendptr, base, &neg, &res);
[a35b458]	1665
[d7f6248]	1666	if (endptr != NULL)
	1667	endptr = (char ) lendptr;
[a35b458]	1668
[d7f6248]	1669	if (ret != EOK)
	1670	return ret;
[a35b458]	1671
[d7f6248]	1672	/* Do not allow negative values */
	1673	if (neg)
	1674	return EINVAL;
[a35b458]	1675
[7c3fb9b]	1676	/*
	1677	* Check whether we are at the end of
	1678	* the string in strict mode
	1679	*/
[d7f6248]	1680	if ((strict) && (*lendptr != 0))
	1681	return EINVAL;
[a35b458]	1682
[d7f6248]	1683	/* Check for overflow */
	1684	uint16_t _res = (uint16_t) res;
	1685	if (_res != res)
	1686	return EOVERFLOW;
[a35b458]	1687
[d7f6248]	1688	*result = _res;
[a35b458]	1689
[d7f6248]	1690	return EOK;
	1691	}
	1692
	1693	/** Convert string to uint32_t.
	1694	*
	1695	* @param nptr Pointer to string.
	1696	* @param endptr If not NULL, pointer to the first invalid character
	1697	* is stored here.
	1698	* @param base Zero or number between 2 and 36 inclusive.
	1699	* @param strict Do not allow any trailing characters.
	1700	* @param result Result of the conversion.
	1701	*
	1702	* @return EOK if conversion was successful.
	1703	*
	1704	*/
[b7fd2a0]	1705	errno_t str_uint32_t(const char nptr, const char *endptr, unsigned int base,
[d7f6248]	1706	bool strict, uint32_t *result)
	1707	{
	1708	assert(result != NULL);
[a35b458]	1709
[d7f6248]	1710	bool neg;
	1711	char *lendptr;
	1712	uint64_t res;
[b7fd2a0]	1713	errno_t ret = str_uint(nptr, &lendptr, base, &neg, &res);
[a35b458]	1714
[d7f6248]	1715	if (endptr != NULL)
	1716	endptr = (char ) lendptr;
[a35b458]	1717
[d7f6248]	1718	if (ret != EOK)
	1719	return ret;
[a35b458]	1720
[d7f6248]	1721	/* Do not allow negative values */
	1722	if (neg)
	1723	return EINVAL;
[a35b458]	1724
[7c3fb9b]	1725	/*
	1726	* Check whether we are at the end of
	1727	* the string in strict mode
	1728	*/
[d7f6248]	1729	if ((strict) && (*lendptr != 0))
	1730	return EINVAL;
[a35b458]	1731
[d7f6248]	1732	/* Check for overflow */
	1733	uint32_t _res = (uint32_t) res;
	1734	if (_res != res)
	1735	return EOVERFLOW;
[a35b458]	1736
[d7f6248]	1737	*result = _res;
[a35b458]	1738
[d7f6248]	1739	return EOK;
	1740	}
	1741
[d47279b]	1742	/** Convert string to uint64_t.
	1743	*
	1744	* @param nptr Pointer to string.
	1745	* @param endptr If not NULL, pointer to the first invalid character
	1746	* is stored here.
	1747	* @param base Zero or number between 2 and 36 inclusive.
	1748	* @param strict Do not allow any trailing characters.
	1749	* @param result Result of the conversion.
	1750	*
	1751	* @return EOK if conversion was successful.
	1752	*
	1753	*/
[b7fd2a0]	1754	errno_t str_uint64_t(const char nptr, const char *endptr, unsigned int base,
[d47279b]	1755	bool strict, uint64_t *result)
	1756	{
	1757	assert(result != NULL);
[a35b458]	1758
[d47279b]	1759	bool neg;
	1760	char *lendptr;
[b7fd2a0]	1761	errno_t ret = str_uint(nptr, &lendptr, base, &neg, result);
[a35b458]	1762
[d47279b]	1763	if (endptr != NULL)
	1764	endptr = (char ) lendptr;
[a35b458]	1765
[d47279b]	1766	if (ret != EOK)
	1767	return ret;
[a35b458]	1768
[d47279b]	1769	/* Do not allow negative values */
	1770	if (neg)
	1771	return EINVAL;
[a35b458]	1772
[7c3fb9b]	1773	/*
	1774	* Check whether we are at the end of
	1775	* the string in strict mode
	1776	*/
[d47279b]	1777	if ((strict) && (*lendptr != 0))
	1778	return EINVAL;
[a35b458]	1779
[d47279b]	1780	return EOK;
	1781	}
	1782
[af8bda0]	1783	/** Convert string to int64_t.
	1784	*
	1785	* @param nptr Pointer to string.
	1786	* @param endptr If not NULL, pointer to the first invalid character
	1787	* is stored here.
	1788	* @param base Zero or number between 2 and 36 inclusive.
	1789	* @param strict Do not allow any trailing characters.
	1790	* @param result Result of the conversion.
	1791	*
	1792	* @return EOK if conversion was successful.
	1793	*
	1794	*/
	1795	int str_int64_t(const char nptr, const char *endptr, unsigned int base,
	1796	bool strict, int64_t *result)
	1797	{
	1798	assert(result != NULL);
	1799
	1800	bool neg;
	1801	char *lendptr;
	1802	uint64_t unsigned_result;
	1803	int ret = str_uint(nptr, &lendptr, base, &neg, &unsigned_result);
	1804
	1805	if (endptr != NULL)
	1806	endptr = (char ) lendptr;
	1807
	1808	if (ret != EOK)
	1809	return ret;
	1810
	1811	/* Do not allow negative values */
	1812	if (neg) {
	1813	if (unsigned_result == UINT64_MAX)
	1814	return EINVAL;
	1815
[be0f5e4]	1816	*result = -(int64_t) unsigned_result;
[af8bda0]	1817	} else
	1818	*result = unsigned_result;
	1819
[1583793]	1820	/*
	1821	* Check whether we are at the end of
	1822	* the string in strict mode
	1823	*/
[af8bda0]	1824	if ((strict) && (*lendptr != 0))
	1825	return EINVAL;
	1826
	1827	return EOK;
	1828	}
	1829
[d47279b]	1830	/** Convert string to size_t.
	1831	*
	1832	* @param nptr Pointer to string.
	1833	* @param endptr If not NULL, pointer to the first invalid character
	1834	* is stored here.
	1835	* @param base Zero or number between 2 and 36 inclusive.
	1836	* @param strict Do not allow any trailing characters.
	1837	* @param result Result of the conversion.
	1838	*
	1839	* @return EOK if conversion was successful.
	1840	*
	1841	*/
[b7fd2a0]	1842	errno_t str_size_t(const char nptr, const char *endptr, unsigned int base,
[d47279b]	1843	bool strict, size_t *result)
	1844	{
	1845	assert(result != NULL);
[a35b458]	1846
[d47279b]	1847	bool neg;
	1848	char *lendptr;
	1849	uint64_t res;
[b7fd2a0]	1850	errno_t ret = str_uint(nptr, &lendptr, base, &neg, &res);
[a35b458]	1851
[d47279b]	1852	if (endptr != NULL)
	1853	endptr = (char ) lendptr;
[a35b458]	1854
[d47279b]	1855	if (ret != EOK)
	1856	return ret;
[a35b458]	1857
[d47279b]	1858	/* Do not allow negative values */
	1859	if (neg)
	1860	return EINVAL;
[a35b458]	1861
[7c3fb9b]	1862	/*
	1863	* Check whether we are at the end of
	1864	* the string in strict mode
	1865	*/
[d47279b]	1866	if ((strict) && (*lendptr != 0))
	1867	return EINVAL;
[a35b458]	1868
[d47279b]	1869	/* Check for overflow */
	1870	size_t _res = (size_t) res;
	1871	if (_res != res)
	1872	return EOVERFLOW;
[a35b458]	1873
[d47279b]	1874	*result = _res;
[a35b458]	1875
[d47279b]	1876	return EOK;
	1877	}
	1878
[e535eeb]	1879	void order_suffix(const uint64_t val, uint64_t rv, char suffix)
	1880	{
[933cadf]	1881	if (val > UINT64_C(10000000000000000000)) {
	1882	*rv = val / UINT64_C(1000000000000000000);
[e535eeb]	1883	*suffix = 'Z';
[933cadf]	1884	} else if (val > UINT64_C(1000000000000000000)) {
	1885	*rv = val / UINT64_C(1000000000000000);
[e535eeb]	1886	*suffix = 'E';
[933cadf]	1887	} else if (val > UINT64_C(1000000000000000)) {
	1888	*rv = val / UINT64_C(1000000000000);
[e535eeb]	1889	*suffix = 'T';
[933cadf]	1890	} else if (val > UINT64_C(1000000000000)) {
	1891	*rv = val / UINT64_C(1000000000);
[e535eeb]	1892	*suffix = 'G';
[933cadf]	1893	} else if (val > UINT64_C(1000000000)) {
	1894	*rv = val / UINT64_C(1000000);
[e535eeb]	1895	*suffix = 'M';
[933cadf]	1896	} else if (val > UINT64_C(1000000)) {
	1897	*rv = val / UINT64_C(1000);
[e535eeb]	1898	*suffix = 'k';
	1899	} else {
	1900	*rv = val;
	1901	*suffix = ' ';
	1902	}
	1903	}
	1904
[933cadf]	1905	void bin_order_suffix(const uint64_t val, uint64_t rv, const char *suffix,
	1906	bool fixed)
	1907	{
	1908	if (val > UINT64_C(1152921504606846976)) {
	1909	*rv = val / UINT64_C(1125899906842624);
	1910	*suffix = "EiB";
	1911	} else if (val > UINT64_C(1125899906842624)) {
	1912	*rv = val / UINT64_C(1099511627776);
	1913	*suffix = "TiB";
	1914	} else if (val > UINT64_C(1099511627776)) {
	1915	*rv = val / UINT64_C(1073741824);
	1916	*suffix = "GiB";
	1917	} else if (val > UINT64_C(1073741824)) {
	1918	*rv = val / UINT64_C(1048576);
	1919	*suffix = "MiB";
	1920	} else if (val > UINT64_C(1048576)) {
	1921	*rv = val / UINT64_C(1024);
	1922	*suffix = "KiB";
	1923	} else {
	1924	*rv = val;
	1925	if (fixed)
	1926	*suffix = "B ";
	1927	else
	1928	*suffix = "B";
	1929	}
	1930	}
	1931
[a46da63]	1932	/** @}
[b2951e2]	1933	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/uspace/lib/c/generic/str.c@ 08e103d4

Download in other formats: