Context Navigation

source: mainline/common/str.c@ 6d0e133

Visit:

Last change on this file since 6d0e133 was 6d0e133, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 3 months ago
Improve handling of invalid data in str_decode()
Property mode set to `100644`
File size: 39.8 KB

Rev	Line
[936351c1]	1	/*
[d066259]	2	* Copyright (c) 2001-2004 Jakub Jermar
[df4ed85]	3	* Copyright (c) 2005 Martin Decky
[576845ec]	4	* Copyright (c) 2008 Jiri Svoboda
[22cf42d9]	5	* Copyright (c) 2011 Martin Sucha
[c4bbca8]	6	* Copyright (c) 2011 Oleg Romanenko
[936351c1]	7	* All rights reserved.
	8	*
	9	* Redistribution and use in source and binary forms, with or without
	10	* modification, are permitted provided that the following conditions
	11	* are met:
	12	*
	13	* - Redistributions of source code must retain the above copyright
	14	* notice, this list of conditions and the following disclaimer.
	15	* - Redistributions in binary form must reproduce the above copyright
	16	* notice, this list of conditions and the following disclaimer in the
	17	* documentation and/or other materials provided with the distribution.
	18	* - The name of the author may not be used to endorse or promote products
	19	* derived from this software without specific prior written permission.
	20	*
	21	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	22	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	23	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	24	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	25	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	26	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	27	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	28	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	29	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	30	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	31	*/
	32
[a46da63]	33	/** @addtogroup libc
[b2951e2]	34	* @{
	35	*/
[d066259]	36
	37	/**
	38	* @file
	39	* @brief String functions.
	40	*
	41	* Strings and characters use the Universal Character Set (UCS). The standard
	42	* strings, called just strings are encoded in UTF-8. Wide strings (encoded
	43	* in UTF-32) are supported to a limited degree. A single character is
[28a5ebd]	44	* represented as char32_t.@n
[d066259]	45	*
	46	* Overview of the terminology:@n
	47	*
	48	* Term Meaning
	49	* -------------------- ----------------------------------------------------
	50	* byte 8 bits stored in uint8_t (unsigned 8 bit integer)
	51	*
[28a5ebd]	52	* character UTF-32 encoded Unicode character, stored in char32_t
	53	* (unsigned 32 bit integer), code points 0 .. 1114111
[d066259]	54	* are valid
	55	*
[28c39f3]	56	* Note that Unicode characters do not match
	57	* one-to-one with displayed characters or glyphs on
	58	* screen. For that level of precision, look up
	59	* Grapheme Clusters.
	60	*
[d066259]	61	* ASCII character 7 bit encoded ASCII character, stored in char
	62	* (usually signed 8 bit integer), code points 0 .. 127
	63	* are valid
	64	*
	65	* string UTF-8 encoded NULL-terminated Unicode string, char *
	66	*
	67	* wide string UTF-32 encoded NULL-terminated Unicode string,
[28a5ebd]	68	* char32_t *
[d066259]	69	*
	70	* [wide] string size number of BYTES in a [wide] string (excluding
	71	* the NULL-terminator), size_t
	72	*
	73	* [wide] string length number of CHARACTERS in a [wide] string (excluding
	74	* the NULL-terminator), size_t
	75	*
	76	* [wide] string width number of display cells on a monospace display taken
	77	* by a [wide] string, size_t
	78	*
[28c39f3]	79	* This is virtually impossible to determine exactly for
	80	* all strings without knowing specifics of the display
	81	* device, due to various factors affecting text output.
	82	* If you have the option to query the terminal for
	83	* position change caused by outputting the string,
	84	* it is preferrable to determine width that way.
	85	*
[d066259]	86	*
	87	* Overview of string metrics:@n
	88	*
	89	* Metric Abbrev. Type Meaning
	90	* ------ ------ ------ -------------------------------------------------
	91	* size n size_t number of BYTES in a string (excluding the
	92	* NULL-terminator)
	93	*
	94	* length l size_t number of CHARACTERS in a string (excluding the
	95	* null terminator)
	96	*
	97	* width w size_t number of display cells on a monospace display
	98	* taken by a string
	99	*
	100	*
	101	* Function naming prefixes:@n
	102	*
	103	* chr_ operate on characters
	104	* ascii_ operate on ASCII characters
	105	* str_ operate on strings
	106	* wstr_ operate on wide strings
	107	*
	108	* [w]str_[n\|l\|w] operate on a prefix limited by size, length
	109	* or width
	110	*
	111	*
	112	* A specific character inside a [wide] string can be referred to by:@n
	113	*
[28a5ebd]	114	* pointer (char , char32_t )
[d066259]	115	* byte offset (size_t)
	116	* character index (size_t)
	117	*
[b2951e2]	118	*/
	119
[19f857a]	120	#include <str.h>
[d066259]	121
[28c39f3]	122	#include <align.h>
[38d150e]	123	#include <assert.h>
[e64c4b2]	124	#include <ctype.h>
[171f9a1]	125	#include <errno.h>
[28c39f3]	126	#include <macros.h>
	127	#include <mem.h>
[d066259]	128	#include <stdbool.h>
	129	#include <stddef.h>
	130	#include <stdint.h>
	131	#include <stdlib.h>
[28c39f3]	132	#include <uchar.h>
[171f9a1]	133
	134	/** Byte mask consisting of lowest @n bits (out of 8) */
	135	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
	136
	137	/** Byte mask consisting of lowest @n bits (out of 32) */
	138	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
	139
	140	/** Byte mask consisting of highest @n bits (out of 8) */
	141	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
	142
	143	/** Number of data bits in a UTF-8 continuation byte */
	144	#define CONT_BITS 6
	145
[28c39f3]	146	static inline bool _is_ascii(uint8_t b)
	147	{
	148	return b < 0x80;
	149	}
	150
	151	static inline bool _is_continuation_byte(uint8_t b)
	152	{
	153	return (b & 0xc0) == 0x80;
	154	}
	155
	156	static inline int _char_continuation_bytes(char32_t c)
	157	{
	158	if ((c & ~LO_MASK_32(11)) == 0)
	159	return 1;
	160
	161	if ((c & ~LO_MASK_32(16)) == 0)
	162	return 2;
	163
	164	if ((c & ~LO_MASK_32(21)) == 0)
	165	return 3;
	166
	167	/* Codes longer than 21 bits are not supported */
	168	return -1;
	169	}
	170
	171	static inline int _continuation_bytes(uint8_t b)
	172	{
	173	/* 0xxxxxxx */
	174	if (_is_ascii(b))
	175	return 0;
	176
	177	/* 110xxxxx 10xxxxxx */
	178	if ((b & 0xe0) == 0xc0)
	179	return 1;
	180
	181	/* 1110xxxx 10xxxxxx 10xxxxxx */
	182	if ((b & 0xf0) == 0xe0)
	183	return 2;
	184
	185	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
	186	if ((b & 0xf8) == 0xf0)
	187	return 3;
	188
	189	return -1;
	190	}
	191
[171f9a1]	192	/** Decode a single character from a string.
	193	*
	194	* Decode a single character from a string of size @a size. Decoding starts
	195	* at @a offset and this offset is moved to the beginning of the next
	196	* character. In case of decoding error, offset generally advances at least
	197	* by one. However, offset is never moved beyond size.
	198	*
	199	* @param str String (not necessarily NULL-terminated).
	200	* @param offset Byte offset in string where to start decoding.
	201	* @param size Size of the string (in bytes).
	202	*
	203	* @return Value of decoded character, U_SPECIAL on decoding error or
	204	* NULL if attempt to decode beyond @a size.
	205	*
	206	*/
[28a5ebd]	207	char32_t str_decode(const char str, size_t offset, size_t size)
[171f9a1]	208	{
	209	if (*offset + 1 > size)
	210	return 0;
[a35b458]	211
[171f9a1]	212	/* First byte read from string */
	213	uint8_t b0 = (uint8_t) str[(*offset)++];
[a35b458]	214
[28c39f3]	215	/* Fast exit for the most common case. */
	216	if (_is_ascii(b0))
	217	return b0;
[a35b458]	218
[28c39f3]	219	/* 10xxxxxx -- unexpected continuation byte */
	220	if (_is_continuation_byte(b0))
[171f9a1]	221	return U_SPECIAL;
[28c39f3]	222
	223	/* Determine code length */
	224
	225	unsigned int cbytes = _continuation_bytes(b0);
	226	unsigned int b0_bits = 6 - cbytes; /* Data bits in first byte */
[a35b458]	227
[6d0e133]	228	if (cbytes < 0 \|\| *offset + cbytes > size)
[171f9a1]	229	return U_SPECIAL;
[a35b458]	230
[28a5ebd]	231	char32_t ch = b0 & LO_MASK_8(b0_bits);
[a35b458]	232
[171f9a1]	233	/* Decode continuation bytes */
	234	while (cbytes > 0) {
[6d0e133]	235	uint8_t b = (uint8_t) str[*offset];
[a35b458]	236
[28c39f3]	237	if (!_is_continuation_byte(b))
[171f9a1]	238	return U_SPECIAL;
[a35b458]	239
[6d0e133]	240	(*offset)++;
	241
[171f9a1]	242	/* Shift data bits to ch */
[28a5ebd]	243	ch = (ch << CONT_BITS) \| (char32_t) (b & LO_MASK_8(CONT_BITS));
[171f9a1]	244	cbytes--;
	245	}
[a35b458]	246
[171f9a1]	247	return ch;
	248	}
	249
[568693b]	250	/** Decode a single character from a string to the left.
	251	*
	252	* Decode a single character from a string of size @a size. Decoding starts
	253	* at @a offset and this offset is moved to the beginning of the previous
	254	* character. In case of decoding error, offset generally decreases at least
	255	* by one. However, offset is never moved before 0.
	256	*
	257	* @param str String (not necessarily NULL-terminated).
	258	* @param offset Byte offset in string where to start decoding.
	259	* @param size Size of the string (in bytes).
	260	*
	261	* @return Value of decoded character, U_SPECIAL on decoding error or
	262	* NULL if attempt to decode beyond @a start of str.
	263	*
	264	*/
[28a5ebd]	265	char32_t str_decode_reverse(const char str, size_t offset, size_t size)
[568693b]	266	{
	267	if (*offset == 0)
	268	return 0;
[a35b458]	269
[28c39f3]	270	int cbytes = 0;
[568693b]	271	/* Continue while continuation bytes found */
[28c39f3]	272	while (*offset > 0 && cbytes < 4) {
[568693b]	273	uint8_t b = (uint8_t) str[--(*offset)];
[a35b458]	274
[28c39f3]	275	if (_is_continuation_byte(b)) {
	276	cbytes++;
	277	continue;
[568693b]	278	}
[28c39f3]	279
	280	/* Invalid byte. */
	281	if (cbytes != _continuation_bytes(b))
	282	return U_SPECIAL;
	283
	284	/* Start byte */
	285	size_t start_offset = *offset;
	286	return str_decode(str, &start_offset, size);
[568693b]	287	}
[28c39f3]	288
[568693b]	289	/* Too many continuation bytes */
	290	return U_SPECIAL;
	291	}
	292
[171f9a1]	293	/** Encode a single character to string representation.
	294	*
	295	* Encode a single character to string representation (i.e. UTF-8) and store
	296	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
	297	* is moved to the position where the next character can be written to.
	298	*
	299	* @param ch Input character.
	300	* @param str Output buffer.
	301	* @param offset Byte offset where to start writing.
	302	* @param size Size of the output buffer (in bytes).
	303	*
	304	* @return EOK if the character was encoded successfully, EOVERFLOW if there
[d4a3ee5]	305	* was not enough space in the output buffer or EINVAL if the character
	306	* code was invalid.
[171f9a1]	307	*/
[28c39f3]	308	errno_t chr_encode(char32_t ch, char str, size_t offset, size_t size)
[171f9a1]	309	{
	310	if (*offset >= size)
	311	return EOVERFLOW;
[a35b458]	312
[28c39f3]	313	/* Fast exit for the most common case. */
	314	if (ch < 0x80) {
	315	str[(*offset)++] = (char) ch;
	316	return EOK;
	317	}
	318
	319	/* Codes longer than 21 bits are not supported */
[171f9a1]	320	if (!chr_check(ch))
	321	return EINVAL;
[a35b458]	322
[171f9a1]	323	/* Determine how many continuation bytes are needed */
[a35b458]	324
[28c39f3]	325	unsigned int cbytes = _char_continuation_bytes(ch);
	326	unsigned int b0_bits = 6 - cbytes; /* Data bits in first byte */
[a35b458]	327
[171f9a1]	328	/* Check for available space in buffer */
	329	if (*offset + cbytes >= size)
	330	return EOVERFLOW;
[a35b458]	331
[171f9a1]	332	/* Encode continuation bytes */
	333	unsigned int i;
	334	for (i = cbytes; i > 0; i--) {
[28c39f3]	335	str[*offset + i] = 0x80 \| (ch & LO_MASK_32(CONT_BITS));
	336	ch >>= CONT_BITS;
[171f9a1]	337	}
[a35b458]	338
[171f9a1]	339	/* Encode first byte */
[28c39f3]	340	str[*offset] = (ch & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
[a35b458]	341
[171f9a1]	342	/* Advance offset */
	343	*offset += cbytes + 1;
[a35b458]	344
[171f9a1]	345	return EOK;
	346	}
	347
[28c39f3]	348	/* Convert in place any bytes that don't form a valid character into U_SPECIAL. */
	349	static void _repair_string(char *str, size_t n)
	350	{
	351	for (; *str && n > 0; str++, n--) {
	352	int cont = _continuation_bytes(*str);
	353	if (cont == 0)
	354	continue;
	355
	356	if (cont < 0 \|\| n <= (size_t) cont) {
	357	*str = U_SPECIAL;
	358	continue;
	359	}
	360
	361	for (int i = 1; i <= cont; i++) {
	362	if (!_is_continuation_byte(str[i])) {
	363	*str = U_SPECIAL;
	364	continue;
	365	}
	366	}
	367	}
	368	}
	369
	370	static size_t _str_size(const char *str)
	371	{
	372	size_t size = 0;
	373
	374	while (*str++ != 0)
	375	size++;
	376
	377	return size;
	378	}
	379
[f2b8cdc]	380	/** Get size of string.
	381	*
	382	* Get the number of bytes which are used by the string @a str (excluding the
	383	* NULL-terminator).
	384	*
	385	* @param str String to consider.
	386	*
	387	* @return Number of bytes used by the string
	388	*
	389	*/
	390	size_t str_size(const char *str)
	391	{
[28c39f3]	392	return _str_size(str);
[f2b8cdc]	393	}
	394
	395	/** Get size of wide string.
	396	*
	397	* Get the number of bytes which are used by the wide string @a str (excluding the
	398	* NULL-terminator).
	399	*
	400	* @param str Wide string to consider.
	401	*
	402	* @return Number of bytes used by the wide string
	403	*
	404	*/
[28a5ebd]	405	size_t wstr_size(const char32_t *str)
[f2b8cdc]	406	{
[28a5ebd]	407	return (wstr_length(str) * sizeof(char32_t));
[f2b8cdc]	408	}
	409
	410	/** Get size of string with length limit.
	411	*
	412	* Get the number of bytes which are used by up to @a max_len first
	413	* characters in the string @a str. If @a max_len is greater than
	414	* the length of @a str, the entire string is measured (excluding the
	415	* NULL-terminator).
	416	*
	417	* @param str String to consider.
	418	* @param max_len Maximum number of characters to measure.
	419	*
	420	* @return Number of bytes used by the characters.
	421	*
	422	*/
[d4a3ee5]	423	size_t str_lsize(const char *str, size_t max_len)
[f2b8cdc]	424	{
[d4a3ee5]	425	size_t len = 0;
[f2b8cdc]	426	size_t offset = 0;
[a35b458]	427
[f2b8cdc]	428	while (len < max_len) {
	429	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
	430	break;
[a35b458]	431
[f2b8cdc]	432	len++;
	433	}
[a35b458]	434
[f2b8cdc]	435	return offset;
	436	}
	437
[28c39f3]	438	static size_t _str_nsize(const char *str, size_t max_size)
	439	{
	440	size_t size = 0;
	441
	442	while ((*str++ != 0) && (size < max_size))
	443	size++;
	444
	445	return size;
	446	}
	447
[560d79f]	448	/** Get size of string with size limit.
	449	*
	450	* Get the number of bytes which are used by the string @a str
	451	* (excluding the NULL-terminator), but no more than @max_size bytes.
	452	*
	453	* @param str String to consider.
	454	* @param max_size Maximum number of bytes to measure.
	455	*
	456	* @return Number of bytes used by the string
	457	*
	458	*/
	459	size_t str_nsize(const char *str, size_t max_size)
	460	{
[28c39f3]	461	return _str_nsize(str, max_size);
[560d79f]	462	}
	463
	464	/** Get size of wide string with size limit.
	465	*
	466	* Get the number of bytes which are used by the wide string @a str
	467	* (excluding the NULL-terminator), but no more than @max_size bytes.
	468	*
	469	* @param str Wide string to consider.
	470	* @param max_size Maximum number of bytes to measure.
	471	*
	472	* @return Number of bytes used by the wide string
	473	*
	474	*/
[28a5ebd]	475	size_t wstr_nsize(const char32_t *str, size_t max_size)
[560d79f]	476	{
[28a5ebd]	477	return (wstr_nlength(str, max_size) * sizeof(char32_t));
[560d79f]	478	}
	479
[f2b8cdc]	480	/** Get size of wide string with length limit.
	481	*
	482	* Get the number of bytes which are used by up to @a max_len first
	483	* wide characters in the wide string @a str. If @a max_len is greater than
	484	* the length of @a str, the entire wide string is measured (excluding the
	485	* NULL-terminator).
	486	*
	487	* @param str Wide string to consider.
	488	* @param max_len Maximum number of wide characters to measure.
	489	*
	490	* @return Number of bytes used by the wide characters.
	491	*
	492	*/
[28a5ebd]	493	size_t wstr_lsize(const char32_t *str, size_t max_len)
[f2b8cdc]	494	{
[28a5ebd]	495	return (wstr_nlength(str, max_len * sizeof(char32_t)) * sizeof(char32_t));
[f2b8cdc]	496	}
	497
	498	/** Get number of characters in a string.
	499	*
	500	* @param str NULL-terminated string.
	501	*
	502	* @return Number of characters in string.
	503	*
	504	*/
[d4a3ee5]	505	size_t str_length(const char *str)
[f2b8cdc]	506	{
[d4a3ee5]	507	size_t len = 0;
[f2b8cdc]	508	size_t offset = 0;
[a35b458]	509
[f2b8cdc]	510	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
	511	len++;
[a35b458]	512
[f2b8cdc]	513	return len;
	514	}
	515
	516	/** Get number of characters in a wide string.
	517	*
	518	* @param str NULL-terminated wide string.
	519	*
	520	* @return Number of characters in @a str.
	521	*
	522	*/
[28a5ebd]	523	size_t wstr_length(const char32_t *wstr)
[f2b8cdc]	524	{
[d4a3ee5]	525	size_t len = 0;
[a35b458]	526
[f2b8cdc]	527	while (*wstr++ != 0)
	528	len++;
[a35b458]	529
[f2b8cdc]	530	return len;
	531	}
	532
	533	/** Get number of characters in a string with size limit.
	534	*
	535	* @param str NULL-terminated string.
	536	* @param size Maximum number of bytes to consider.
	537	*
	538	* @return Number of characters in string.
	539	*
	540	*/
[d4a3ee5]	541	size_t str_nlength(const char *str, size_t size)
[f2b8cdc]	542	{
[d4a3ee5]	543	size_t len = 0;
[f2b8cdc]	544	size_t offset = 0;
[a35b458]	545
[f2b8cdc]	546	while (str_decode(str, &offset, size) != 0)
	547	len++;
[a35b458]	548
[f2b8cdc]	549	return len;
	550	}
	551
	552	/** Get number of characters in a string with size limit.
	553	*
	554	* @param str NULL-terminated string.
	555	* @param size Maximum number of bytes to consider.
	556	*
	557	* @return Number of characters in string.
	558	*
	559	*/
[28a5ebd]	560	size_t wstr_nlength(const char32_t *str, size_t size)
[f2b8cdc]	561	{
[d4a3ee5]	562	size_t len = 0;
[28a5ebd]	563	size_t limit = ALIGN_DOWN(size, sizeof(char32_t));
[d4a3ee5]	564	size_t offset = 0;
[a35b458]	565
[f2b8cdc]	566	while ((offset < limit) && (*str++ != 0)) {
	567	len++;
[28a5ebd]	568	offset += sizeof(char32_t);
[f2b8cdc]	569	}
[a35b458]	570
[f2b8cdc]	571	return len;
	572	}
	573
[be2a38ad]	574	/** Get character display width on a character cell display.
	575	*
	576	* @param ch Character
	577	* @return Width of character in cells.
	578	*/
[28a5ebd]	579	size_t chr_width(char32_t ch)
[be2a38ad]	580	{
	581	return 1;
	582	}
	583
	584	/** Get string display width on a character cell display.
	585	*
	586	* @param str String
	587	* @return Width of string in cells.
	588	*/
	589	size_t str_width(const char *str)
	590	{
	591	size_t width = 0;
	592	size_t offset = 0;
[28a5ebd]	593	char32_t ch;
[a35b458]	594
[be2a38ad]	595	while ((ch = str_decode(str, &offset, STR_NO_LIMIT)) != 0)
	596	width += chr_width(ch);
[a35b458]	597
[be2a38ad]	598	return width;
	599	}
	600
[f2b8cdc]	601	/** Check whether character is plain ASCII.
	602	*
	603	* @return True if character is plain ASCII.
	604	*
	605	*/
[28a5ebd]	606	bool ascii_check(char32_t ch)
[f2b8cdc]	607	{
[28a5ebd]	608	if (ch <= 127)
[f2b8cdc]	609	return true;
[a35b458]	610
[f2b8cdc]	611	return false;
	612	}
	613
[171f9a1]	614	/** Check whether character is valid
	615	*
	616	* @return True if character is a valid Unicode code point.
	617	*
	618	*/
[28a5ebd]	619	bool chr_check(char32_t ch)
[171f9a1]	620	{
[28a5ebd]	621	if (ch <= 1114111)
[171f9a1]	622	return true;
[a35b458]	623
[171f9a1]	624	return false;
	625	}
[936351c1]	626
[f2b8cdc]	627	/** Compare two NULL terminated strings.
	628	*
	629	* Do a char-by-char comparison of two NULL-terminated strings.
[4efeab5]	630	* The strings are considered equal iff their length is equal
	631	* and both strings consist of the same sequence of characters.
	632	*
[1772e6d]	633	* A string S1 is less than another string S2 if it has a character with
	634	* lower value at the first character position where the strings differ.
	635	* If the strings differ in length, the shorter one is treated as if
	636	* padded by characters with a value of zero.
[f2b8cdc]	637	*
	638	* @param s1 First string to compare.
	639	* @param s2 Second string to compare.
	640	*
[1772e6d]	641	* @return 0 if the strings are equal, -1 if the first is less than the second,
	642	* 1 if the second is less than the first.
[f2b8cdc]	643	*
	644	*/
	645	int str_cmp(const char s1, const char s2)
	646	{
[28c39f3]	647	/*
	648	* UTF-8 has the nice property that lexicographic ordering on bytes is
	649	* the same as the lexicographic ordering of the character sequences.
	650	*/
	651	while (s1 == s2 && *s1 != 0) {
	652	s1++;
	653	s2++;
[f2b8cdc]	654	}
	655
[28c39f3]	656	if (s1 == s2)
	657	return 0;
	658
	659	return (s1 < s2) ? -1 : 1;
[f2b8cdc]	660	}
	661
	662	/** Compare two NULL terminated strings with length limit.
	663	*
	664	* Do a char-by-char comparison of two NULL-terminated strings.
[4efeab5]	665	* The strings are considered equal iff
	666	* min(str_length(s1), max_len) == min(str_length(s2), max_len)
	667	* and both strings consist of the same sequence of characters,
	668	* up to max_len characters.
	669	*
[1772e6d]	670	* A string S1 is less than another string S2 if it has a character with
	671	* lower value at the first character position where the strings differ.
	672	* If the strings differ in length, the shorter one is treated as if
	673	* padded by characters with a value of zero. Only the first max_len
	674	* characters are considered.
[f2b8cdc]	675	*
	676	* @param s1 First string to compare.
	677	* @param s2 Second string to compare.
	678	* @param max_len Maximum number of characters to consider.
	679	*
[1772e6d]	680	* @return 0 if the strings are equal, -1 if the first is less than the second,
	681	* 1 if the second is less than the first.
[f2b8cdc]	682	*
	683	*/
[d4a3ee5]	684	int str_lcmp(const char s1, const char s2, size_t max_len)
[f2b8cdc]	685	{
[28a5ebd]	686	char32_t c1 = 0;
	687	char32_t c2 = 0;
[8227d63]	688
[f2b8cdc]	689	size_t off1 = 0;
	690	size_t off2 = 0;
[8227d63]	691
[d4a3ee5]	692	size_t len = 0;
[f2b8cdc]	693
	694	while (true) {
	695	if (len >= max_len)
	696	break;
	697
	698	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
	699	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
	700
[8227d63]	701	if (c1 < c2)
	702	return -1;
	703
	704	if (c1 > c2)
	705	return 1;
	706
	707	if (c1 == 0 \|\| c2 == 0)
	708	break;
	709
	710	++len;
	711	}
	712
	713	return 0;
	714
	715	}
	716
	717	/** Compare two NULL terminated strings in case-insensitive manner.
	718	*
	719	* Do a char-by-char comparison of two NULL-terminated strings.
	720	* The strings are considered equal iff their length is equal
	721	* and both strings consist of the same sequence of characters
	722	* when converted to lower case.
	723	*
	724	* A string S1 is less than another string S2 if it has a character with
	725	* lower value at the first character position where the strings differ.
	726	* If the strings differ in length, the shorter one is treated as if
	727	* padded by characters with a value of zero.
	728	*
	729	* @param s1 First string to compare.
	730	* @param s2 Second string to compare.
	731	*
	732	* @return 0 if the strings are equal, -1 if the first is less than the second,
	733	* 1 if the second is less than the first.
	734	*
	735	*/
	736	int str_casecmp(const char s1, const char s2)
	737	{
[28c39f3]	738	// FIXME: doesn't work for non-ASCII caseful characters
	739
[28a5ebd]	740	char32_t c1 = 0;
	741	char32_t c2 = 0;
[8227d63]	742
	743	size_t off1 = 0;
	744	size_t off2 = 0;
	745
	746	while (true) {
	747	c1 = tolower(str_decode(s1, &off1, STR_NO_LIMIT));
	748	c2 = tolower(str_decode(s2, &off2, STR_NO_LIMIT));
	749
	750	if (c1 < c2)
	751	return -1;
	752
	753	if (c1 > c2)
	754	return 1;
	755
	756	if (c1 == 0 \|\| c2 == 0)
	757	break;
	758	}
	759
	760	return 0;
	761	}
	762
	763	/** Compare two NULL terminated strings with length limit in case-insensitive
	764	* manner.
	765	*
	766	* Do a char-by-char comparison of two NULL-terminated strings.
	767	* The strings are considered equal iff
	768	* min(str_length(s1), max_len) == min(str_length(s2), max_len)
	769	* and both strings consist of the same sequence of characters,
	770	* up to max_len characters.
	771	*
	772	* A string S1 is less than another string S2 if it has a character with
	773	* lower value at the first character position where the strings differ.
	774	* If the strings differ in length, the shorter one is treated as if
	775	* padded by characters with a value of zero. Only the first max_len
	776	* characters are considered.
	777	*
	778	* @param s1 First string to compare.
	779	* @param s2 Second string to compare.
	780	* @param max_len Maximum number of characters to consider.
	781	*
	782	* @return 0 if the strings are equal, -1 if the first is less than the second,
	783	* 1 if the second is less than the first.
	784	*
	785	*/
	786	int str_lcasecmp(const char s1, const char s2, size_t max_len)
	787	{
[28c39f3]	788	// FIXME: doesn't work for non-ASCII caseful characters
	789
[28a5ebd]	790	char32_t c1 = 0;
	791	char32_t c2 = 0;
[a35b458]	792
[8227d63]	793	size_t off1 = 0;
	794	size_t off2 = 0;
[a35b458]	795
[8227d63]	796	size_t len = 0;
	797
	798	while (true) {
	799	if (len >= max_len)
	800	break;
	801
	802	c1 = tolower(str_decode(s1, &off1, STR_NO_LIMIT));
	803	c2 = tolower(str_decode(s2, &off2, STR_NO_LIMIT));
	804
[f2b8cdc]	805	if (c1 < c2)
	806	return -1;
	807
	808	if (c1 > c2)
	809	return 1;
	810
	811	if (c1 == 0 \|\| c2 == 0)
	812	break;
	813
[1b20da0]	814	++len;
[f2b8cdc]	815	}
	816
	817	return 0;
	818
	819	}
	820
[28c39f3]	821	static bool _test_prefix(const char s, const char p)
	822	{
	823	while (s == p && *s != 0) {
	824	s++;
	825	p++;
	826	}
	827
	828	return *p == 0;
	829	}
	830
[dce39b4]	831	/** Test whether p is a prefix of s.
	832	*
	833	* Do a char-by-char comparison of two NULL-terminated strings
	834	* and determine if p is a prefix of s.
	835	*
	836	* @param s The string in which to look
	837	* @param p The string to check if it is a prefix of s
	838	*
	839	* @return true iff p is prefix of s else false
	840	*
	841	*/
	842	bool str_test_prefix(const char s, const char p)
	843	{
[28c39f3]	844	return _test_prefix(s, p);
[dce39b4]	845	}
	846
[086cab0]	847	/** Get a string suffix.
	848	*
	849	* Return a string suffix defined by the prefix length.
	850	*
	851	* @param s The string to get the suffix from.
	852	* @param prefix_length Number of prefix characters to ignore.
	853	*
	854	* @return String suffix.
	855	*
	856	*/
	857	const char str_suffix(const char s, size_t prefix_length)
	858	{
	859	size_t off = 0;
	860	size_t i = 0;
	861
	862	while (true) {
	863	str_decode(s, &off, STR_NO_LIMIT);
	864	i++;
	865
	866	if (i >= prefix_length)
	867	break;
	868	}
	869
	870	return s + off;
	871	}
	872
[28c39f3]	873	/** Copy string as a sequence of bytes. */
	874	static void _str_cpy(char dest, const char src)
	875	{
	876	while (*src)
	877	(dest++) = (src++);
	878
	879	*dest = 0;
	880	}
	881
	882	/** Copy string as a sequence of bytes. */
	883	static void _str_cpyn(char dest, size_t size, const char src)
	884	{
	885	char *dest_top = dest + size - 1;
	886
	887	while (*src && dest < dest_top)
	888	(dest++) = (src++);
	889
	890	*dest = 0;
	891	}
	892
[6eb2e96]	893	/** Copy string.
[f2b8cdc]	894	*
[6eb2e96]	895	* Copy source string @a src to destination buffer @a dest.
	896	* No more than @a size bytes are written. If the size of the output buffer
	897	* is at least one byte, the output string will always be well-formed, i.e.
	898	* null-terminated and containing only complete characters.
[f2b8cdc]	899	*
[abf09311]	900	* @param dest Destination buffer.
[6700ee2]	901	* @param count Size of the destination buffer (must be > 0).
[6eb2e96]	902	* @param src Source string.
[8e893ae]	903	*
[f2b8cdc]	904	*/
[6eb2e96]	905	void str_cpy(char dest, size_t size, const char src)
[f2b8cdc]	906	{
[6700ee2]	907	/* There must be space for a null terminator in the buffer. */
	908	assert(size > 0);
[d066259]	909	assert(src != NULL);
[28c39f3]	910	assert(dest != NULL);
[a35b458]	911
[28c39f3]	912	/* Copy data. */
	913	_str_cpyn(dest, size, src);
[a35b458]	914
[28c39f3]	915	/* In-place translate invalid bytes to U_SPECIAL. */
	916	_repair_string(dest, size);
[6eb2e96]	917	}
	918
	919	/** Copy size-limited substring.
	920	*
[6700ee2]	921	* Copy prefix of string @a src of max. size @a size to destination buffer
	922	* @a dest. No more than @a size bytes are written. The output string will
	923	* always be well-formed, i.e. null-terminated and containing only complete
	924	* characters.
[6eb2e96]	925	*
	926	* No more than @a n bytes are read from the input string, so it does not
	927	* have to be null-terminated.
	928	*
[abf09311]	929	* @param dest Destination buffer.
[6700ee2]	930	* @param count Size of the destination buffer (must be > 0).
[6eb2e96]	931	* @param src Source string.
[abf09311]	932	* @param n Maximum number of bytes to read from @a src.
[8e893ae]	933	*
[6eb2e96]	934	*/
	935	void str_ncpy(char dest, size_t size, const char src, size_t n)
	936	{
[6700ee2]	937	/* There must be space for a null terminator in the buffer. */
	938	assert(size > 0);
[28c39f3]	939	assert(src != NULL);
[a35b458]	940
[28c39f3]	941	/* Copy data. */
	942	_str_cpyn(dest, min(size, n + 1), src);
[a35b458]	943
[28c39f3]	944	/* In-place translate invalid bytes to U_SPECIAL. */
	945	_repair_string(dest, size);
[f2b8cdc]	946	}
	947
[4482bc7]	948	/** Append one string to another.
	949	*
	950	* Append source string @a src to string in destination buffer @a dest.
	951	* Size of the destination buffer is @a dest. If the size of the output buffer
	952	* is at least one byte, the output string will always be well-formed, i.e.
	953	* null-terminated and containing only complete characters.
	954	*
[0f06dbc]	955	* @param dest Destination buffer.
[4482bc7]	956	* @param count Size of the destination buffer.
	957	* @param src Source string.
	958	*/
	959	void str_append(char dest, size_t size, const char src)
	960	{
[28c39f3]	961	assert(src != NULL);
	962	assert(dest != NULL);
	963	assert(size > 0);
[a35b458]	964
[28c39f3]	965	size_t dstr_size = _str_nsize(dest, size);
	966	_str_cpyn(dest + dstr_size, size - dstr_size, src);
	967	_repair_string(dest + dstr_size, size - dstr_size);
[4482bc7]	968	}
	969
[dcb74c0a]	970	/** Convert space-padded ASCII to string.
	971	*
	972	* Common legacy text encoding in hardware is 7-bit ASCII fitted into
[c3d19ac]	973	* a fixed-width byte buffer (bit 7 always zero), right-padded with spaces
[dcb74c0a]	974	* (ASCII 0x20). Convert space-padded ascii to string representation.
	975	*
	976	* If the text does not fit into the destination buffer, the function converts
	977	* as many characters as possible and returns EOVERFLOW.
	978	*
	979	* If the text contains non-ASCII bytes (with bit 7 set), the whole string is
	980	* converted anyway and invalid characters are replaced with question marks
	981	* (U_SPECIAL) and the function returns EIO.
	982	*
	983	* Regardless of return value upon return @a dest will always be well-formed.
	984	*
	985	* @param dest Destination buffer
	986	* @param size Size of destination buffer
	987	* @param src Space-padded ASCII.
	988	* @param n Size of the source buffer in bytes.
	989	*
	990	* @return EOK on success, EOVERFLOW if the text does not fit
	991	* destination buffer, EIO if the text contains
	992	* non-ASCII bytes.
	993	*/
[b7fd2a0]	994	errno_t spascii_to_str(char dest, size_t size, const uint8_t src, size_t n)
[dcb74c0a]	995	{
[28c39f3]	996	size_t len = 0;
[dcb74c0a]	997
[28c39f3]	998	/* Determine the length of the source string. */
	999	for (size_t i = 0; i < n; i++) {
	1000	if (src[i] == 0)
	1001	break;
	1002
	1003	if (src[i] != ' ')
	1004	len = i + 1;
	1005	}
	1006
	1007	errno_t result = EOK;
	1008	size_t out_len = min(len, size - 1);
	1009
	1010	/* Copy characters */
	1011	for (size_t i = 0; i < out_len; i++) {
	1012	dest[i] = src[i];
	1013
	1014	if (dest[i] < 0) {
	1015	dest[i] = U_SPECIAL;
[dcb74c0a]	1016	result = EIO;
	1017	}
[28c39f3]	1018	}
[dcb74c0a]	1019
[28c39f3]	1020	dest[out_len] = 0;
[dcb74c0a]	1021
[28c39f3]	1022	if (out_len < len)
	1023	return EOVERFLOW;
[dcb74c0a]	1024
	1025	return result;
	1026	}
	1027
[0f06dbc]	1028	/** Convert wide string to string.
[f2b8cdc]	1029	*
[0f06dbc]	1030	* Convert wide string @a src to string. The output is written to the buffer
	1031	* specified by @a dest and @a size. @a size must be non-zero and the string
	1032	* written will always be well-formed.
[f2b8cdc]	1033	*
[0f06dbc]	1034	* @param dest Destination buffer.
	1035	* @param size Size of the destination buffer.
	1036	* @param src Source wide string.
[f2b8cdc]	1037	*/
[28a5ebd]	1038	void wstr_to_str(char dest, size_t size, const char32_t src)
[f2b8cdc]	1039	{
[28a5ebd]	1040	char32_t ch;
[0f06dbc]	1041	size_t src_idx;
	1042	size_t dest_off;
	1043
	1044	/* There must be space for a null terminator in the buffer. */
	1045	assert(size > 0);
[a35b458]	1046
[0f06dbc]	1047	src_idx = 0;
	1048	dest_off = 0;
	1049
[f2b8cdc]	1050	while ((ch = src[src_idx++]) != 0) {
[81e9cb3]	1051	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
[f2b8cdc]	1052	break;
	1053	}
[0f06dbc]	1054
	1055	dest[dest_off] = '\0';
[f2b8cdc]	1056	}
	1057
[82374b2]	1058	/** Convert UTF16 string to string.
	1059	*
	1060	* Convert utf16 string @a src to string. The output is written to the buffer
	1061	* specified by @a dest and @a size. @a size must be non-zero and the string
	1062	* written will always be well-formed. Surrogate pairs also supported.
	1063	*
	1064	* @param dest Destination buffer.
	1065	* @param size Size of the destination buffer.
	1066	* @param src Source utf16 string.
	1067	*
[cde999a]	1068	* @return EOK, if success, an error code otherwise.
[82374b2]	1069	*/
[b7fd2a0]	1070	errno_t utf16_to_str(char dest, size_t size, const uint16_t src)
[82374b2]	1071	{
[abb7491c]	1072	size_t idx = 0, dest_off = 0;
[28a5ebd]	1073	char32_t ch;
[b7fd2a0]	1074	errno_t rc = EOK;
[82374b2]	1075
	1076	/* There must be space for a null terminator in the buffer. */
	1077	assert(size > 0);
	1078
	1079	while (src[idx]) {
	1080	if ((src[idx] & 0xfc00) == 0xd800) {
[abb7491c]	1081	if (src[idx + 1] && (src[idx + 1] & 0xfc00) == 0xdc00) {
[82374b2]	1082	ch = 0x10000;
	1083	ch += (src[idx] & 0x03FF) << 10;
[abb7491c]	1084	ch += (src[idx + 1] & 0x03FF);
[82374b2]	1085	idx += 2;
[1433ecda]	1086	} else
[82374b2]	1087	break;
	1088	} else {
	1089	ch = src[idx];
	1090	idx++;
	1091	}
[abb7491c]	1092	rc = chr_encode(ch, dest, &dest_off, size - 1);
[82374b2]	1093	if (rc != EOK)
	1094	break;
	1095	}
	1096	dest[dest_off] = '\0';
	1097	return rc;
	1098	}
	1099
[b06414f]	1100	/** Convert string to UTF16 string.
	1101	*
	1102	* Convert string @a src to utf16 string. The output is written to the buffer
	1103	* specified by @a dest and @a dlen. @a dlen must be non-zero and the string
	1104	* written will always be well-formed. Surrogate pairs also supported.
	1105	*
	1106	* @param dest Destination buffer.
	1107	* @param dlen Number of utf16 characters that fit in the destination buffer.
	1108	* @param src Source string.
	1109	*
[cde999a]	1110	* @return EOK, if success, an error code otherwise.
[b06414f]	1111	*/
[b7fd2a0]	1112	errno_t str_to_utf16(uint16_t dest, size_t dlen, const char src)
[fc97128]	1113	{
[b7fd2a0]	1114	errno_t rc = EOK;
[abb7491c]	1115	size_t offset = 0;
	1116	size_t idx = 0;
[28a5ebd]	1117	char32_t c;
[fc97128]	1118
[b06414f]	1119	assert(dlen > 0);
[a35b458]	1120
[fc97128]	1121	while ((c = str_decode(src, &offset, STR_NO_LIMIT)) != 0) {
	1122	if (c > 0x10000) {
[b06414f]	1123	if (idx + 2 >= dlen - 1) {
[abb7491c]	1124	rc = EOVERFLOW;
[fc97128]	1125	break;
	1126	}
	1127	c = (c - 0x10000);
	1128	dest[idx] = 0xD800 \| (c >> 10);
[abb7491c]	1129	dest[idx + 1] = 0xDC00 \| (c & 0x3FF);
[fc97128]	1130	idx++;
	1131	} else {
[1433ecda]	1132	dest[idx] = c;
[fc97128]	1133	}
	1134
	1135	idx++;
[b06414f]	1136	if (idx >= dlen - 1) {
[abb7491c]	1137	rc = EOVERFLOW;
[fc97128]	1138	break;
	1139	}
	1140	}
	1141
	1142	dest[idx] = '\0';
	1143	return rc;
[f2b8cdc]	1144	}
	1145
[b2906c0]	1146	/** Get size of UTF-16 string.
	1147	*
	1148	* Get the number of words which are used by the UTF-16 string @a ustr
	1149	* (excluding the NULL-terminator).
	1150	*
	1151	* @param ustr UTF-16 string to consider.
	1152	*
	1153	* @return Number of words used by the UTF-16 string
	1154	*
	1155	*/
	1156	size_t utf16_wsize(const uint16_t *ustr)
	1157	{
	1158	size_t wsize = 0;
	1159
	1160	while (*ustr++ != 0)
	1161	wsize++;
	1162
	1163	return wsize;
	1164	}
	1165
[b67c7d64]	1166	/** Convert wide string to new string.
	1167	*
	1168	* Convert wide string @a src to string. Space for the new string is allocated
	1169	* on the heap.
	1170	*
	1171	* @param src Source wide string.
	1172	* @return New string.
	1173	*/
[28a5ebd]	1174	char wstr_to_astr(const char32_t src)
[b67c7d64]	1175	{
	1176	char dbuf[STR_BOUNDS(1)];
	1177	char *str;
[28a5ebd]	1178	char32_t ch;
[b67c7d64]	1179
	1180	size_t src_idx;
	1181	size_t dest_off;
	1182	size_t dest_size;
	1183
	1184	/* Compute size of encoded string. */
	1185
	1186	src_idx = 0;
	1187	dest_size = 0;
	1188
	1189	while ((ch = src[src_idx++]) != 0) {
	1190	dest_off = 0;
	1191	if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
	1192	break;
	1193	dest_size += dest_off;
	1194	}
	1195
	1196	str = malloc(dest_size + 1);
	1197	if (str == NULL)
	1198	return NULL;
	1199
	1200	/* Encode string. */
	1201
	1202	src_idx = 0;
	1203	dest_off = 0;
	1204
	1205	while ((ch = src[src_idx++]) != 0) {
	1206	if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
	1207	break;
	1208	}
	1209
	1210	str[dest_size] = '\0';
	1211	return str;
	1212	}
	1213
[da2bd08]	1214	/** Convert string to wide string.
	1215	*
	1216	* Convert string @a src to wide string. The output is written to the
[0f06dbc]	1217	* buffer specified by @a dest and @a dlen. @a dlen must be non-zero
	1218	* and the wide string written will always be null-terminated.
[da2bd08]	1219	*
	1220	* @param dest Destination buffer.
	1221	* @param dlen Length of destination buffer (number of wchars).
	1222	* @param src Source string.
	1223	*/
[28a5ebd]	1224	void str_to_wstr(char32_t dest, size_t dlen, const char src)
[da2bd08]	1225	{
	1226	size_t offset;
	1227	size_t di;
[28a5ebd]	1228	char32_t c;
[da2bd08]	1229
	1230	assert(dlen > 0);
	1231
	1232	offset = 0;
	1233	di = 0;
	1234
	1235	do {
[81e9cb3]	1236	if (di >= dlen - 1)
[da2bd08]	1237	break;
	1238
	1239	c = str_decode(src, &offset, STR_NO_LIMIT);
	1240	dest[di++] = c;
	1241	} while (c != '\0');
	1242
	1243	dest[dlen - 1] = '\0';
	1244	}
	1245
[22cf42d9]	1246	/** Convert string to wide string.
	1247	*
	1248	* Convert string @a src to wide string. A new wide NULL-terminated
	1249	* string will be allocated on the heap.
	1250	*
	1251	* @param src Source string.
	1252	*/
[28a5ebd]	1253	char32_t str_to_awstr(const char str)
[22cf42d9]	1254	{
	1255	size_t len = str_length(str);
[a35b458]	1256
[28a5ebd]	1257	char32_t *wstr = calloc(len + 1, sizeof(char32_t));
[b48d046]	1258	if (wstr == NULL)
	1259	return NULL;
[a35b458]	1260
[b48d046]	1261	str_to_wstr(wstr, len + 1, str);
[22cf42d9]	1262	return wstr;
	1263	}
	1264
[28c39f3]	1265	static char _strchr(const char str, char c)
	1266	{
	1267	while (str != 0 && str != c)
	1268	str++;
	1269
	1270	return (str == c) ? (char ) str : NULL;
	1271	}
	1272
[f2b8cdc]	1273	/** Find first occurence of character in string.
	1274	*
	1275	* @param str String to search.
	1276	* @param ch Character to look for.
	1277	*
	1278	* @return Pointer to character in @a str or NULL if not found.
	1279	*/
[28a5ebd]	1280	char str_chr(const char str, char32_t ch)
[f2b8cdc]	1281	{
[28c39f3]	1282	/* Fast path for an ASCII character. */
	1283	if (ascii_check(ch))
	1284	return _strchr(str, ch);
[a35b458]	1285
[28c39f3]	1286	/* Convert character to UTF-8. */
	1287	char utf8[STR_BOUNDS(1) + 1];
	1288	size_t offset = 0;
	1289
	1290	if (chr_encode(ch, utf8, &offset, sizeof(utf8)) != EOK \|\| offset == 0)
	1291	return NULL;
	1292
	1293	utf8[offset] = '\0';
	1294
	1295	/* Find the first byte, then check if all of them are correct. */
	1296	while (*str != 0) {
	1297	str = _strchr(str, utf8[0]);
	1298	if (!str)
	1299	return NULL;
	1300
	1301	if (_test_prefix(str, utf8))
	1302	return (char *) str;
	1303
	1304	str++;
[f2b8cdc]	1305	}
[a35b458]	1306
[f2b8cdc]	1307	return NULL;
	1308	}
	1309
[da680b4b]	1310	/** Find first occurence of substring in string.
	1311	*
	1312	* @param hs Haystack (string)
	1313	* @param n Needle (substring to look for)
	1314	*
	1315	* @return Pointer to character in @a hs or @c NULL if not found.
	1316	*/
	1317	char str_str(const char hs, const char *n)
	1318	{
[28c39f3]	1319	size_t hsize = _str_size(hs);
	1320	size_t nsize = _str_size(n);
[da680b4b]	1321
[28c39f3]	1322	while (hsize >= nsize) {
	1323	if (_test_prefix(hs, n))
	1324	return (char *) hs;
[da680b4b]	1325
[28c39f3]	1326	hs++;
	1327	hsize--;
[da680b4b]	1328	}
	1329
	1330	return NULL;
	1331	}
	1332
[28c39f3]	1333	static void _str_rtrim(char *str, char c)
	1334	{
	1335	char *last = str;
	1336
	1337	while (*str) {
	1338	if (*str != c)
	1339	last = str;
	1340
	1341	str++;
	1342	}
	1343
	1344	/* Truncate string. */
	1345	last[1] = 0;
	1346	}
	1347
[1737bfb]	1348	/** Removes specified trailing characters from a string.
	1349	*
	1350	* @param str String to remove from.
	1351	* @param ch Character to remove.
	1352	*/
[28a5ebd]	1353	void str_rtrim(char *str, char32_t ch)
[1737bfb]	1354	{
[28c39f3]	1355	/* Fast path for the ASCII case. */
	1356	if (ascii_check(ch)) {
	1357	_str_rtrim(str, ch);
	1358	return;
	1359	}
	1360
[1737bfb]	1361	size_t off = 0;
	1362	size_t pos = 0;
[28a5ebd]	1363	char32_t c;
[1737bfb]	1364	bool update_last_chunk = true;
	1365	char *last_chunk = NULL;
	1366
	1367	while ((c = str_decode(str, &off, STR_NO_LIMIT))) {
	1368	if (c != ch) {
	1369	update_last_chunk = true;
	1370	last_chunk = NULL;
	1371	} else if (update_last_chunk) {
	1372	update_last_chunk = false;
	1373	last_chunk = (str + pos);
	1374	}
	1375	pos = off;
	1376	}
	1377
	1378	if (last_chunk)
	1379	*last_chunk = '\0';
	1380	}
	1381
[28c39f3]	1382	static void _str_ltrim(char *str, char c)
	1383	{
	1384	char *p = str;
	1385
	1386	while (*p == c)
	1387	p++;
	1388
	1389	if (str != p)
	1390	_str_cpy(str, p);
	1391	}
	1392
[1737bfb]	1393	/** Removes specified leading characters from a string.
	1394	*
	1395	* @param str String to remove from.
	1396	* @param ch Character to remove.
	1397	*/
[28a5ebd]	1398	void str_ltrim(char *str, char32_t ch)
[1737bfb]	1399	{
[28c39f3]	1400	/* Fast path for the ASCII case. */
	1401	if (ascii_check(ch)) {
	1402	_str_ltrim(str, ch);
	1403	return;
	1404	}
	1405
[28a5ebd]	1406	char32_t acc;
[1737bfb]	1407	size_t off = 0;
	1408	size_t pos = 0;
	1409	size_t str_sz = str_size(str);
	1410
	1411	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
	1412	if (acc != ch)
	1413	break;
	1414	else
	1415	pos = off;
	1416	}
	1417
	1418	if (pos > 0) {
	1419	memmove(str, &str[pos], str_sz - pos);
	1420	pos = str_sz - pos;
[a18a8b9]	1421	str[pos] = '\0';
[1737bfb]	1422	}
	1423	}
	1424
[28c39f3]	1425	static char _str_rchr(const char str, char c)
	1426	{
	1427	const char *last = NULL;
	1428
	1429	while (*str) {
	1430	if (*str == c)
	1431	last = str;
	1432
	1433	str++;
	1434	}
	1435
	1436	return (char *) last;
	1437	}
	1438
[7afb4a5]	1439	/** Find last occurence of character in string.
	1440	*
	1441	* @param str String to search.
	1442	* @param ch Character to look for.
	1443	*
	1444	* @return Pointer to character in @a str or NULL if not found.
	1445	*/
[28a5ebd]	1446	char str_rchr(const char str, char32_t ch)
[7afb4a5]	1447	{
[28c39f3]	1448	if (ascii_check(ch))
	1449	return _str_rchr(str, ch);
	1450
[28a5ebd]	1451	char32_t acc;
[7afb4a5]	1452	size_t off = 0;
[f2d2c7ba]	1453	size_t last = 0;
[d4a3ee5]	1454	const char *res = NULL;
[a35b458]	1455
[7afb4a5]	1456	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
	1457	if (acc == ch)
[f2d2c7ba]	1458	res = (str + last);
	1459	last = off;
[7afb4a5]	1460	}
[a35b458]	1461
[dd2cfa7]	1462	return (char *) res;
[7afb4a5]	1463	}
	1464
[f2b8cdc]	1465	/** Insert a wide character into a wide string.
	1466	*
	1467	* Insert a wide character into a wide string at position
	1468	* @a pos. The characters after the position are shifted.
	1469	*
	1470	* @param str String to insert to.
	1471	* @param ch Character to insert to.
	1472	* @param pos Character index where to insert.
[7c3fb9b]	1473	* @param max_pos Characters in the buffer.
[f2b8cdc]	1474	*
	1475	* @return True if the insertion was sucessful, false if the position
	1476	* is out of bounds.
	1477	*
	1478	*/
[28a5ebd]	1479	bool wstr_linsert(char32_t *str, char32_t ch, size_t pos, size_t max_pos)
[f2b8cdc]	1480	{
[d4a3ee5]	1481	size_t len = wstr_length(str);
[a35b458]	1482
[f2b8cdc]	1483	if ((pos > len) \|\| (pos + 1 > max_pos))
	1484	return false;
[a35b458]	1485
[d4a3ee5]	1486	size_t i;
[f2b8cdc]	1487	for (i = len; i + 1 > pos; i--)
	1488	str[i + 1] = str[i];
[a35b458]	1489
[f2b8cdc]	1490	str[pos] = ch;
[a35b458]	1491
[f2b8cdc]	1492	return true;
	1493	}
	1494
	1495	/** Remove a wide character from a wide string.
	1496	*
	1497	* Remove a wide character from a wide string at position
	1498	* @a pos. The characters after the position are shifted.
	1499	*
	1500	* @param str String to remove from.
	1501	* @param pos Character index to remove.
	1502	*
	1503	* @return True if the removal was sucessful, false if the position
	1504	* is out of bounds.
	1505	*
	1506	*/
[28a5ebd]	1507	bool wstr_remove(char32_t *str, size_t pos)
[f2b8cdc]	1508	{
[d4a3ee5]	1509	size_t len = wstr_length(str);
[a35b458]	1510
[f2b8cdc]	1511	if (pos >= len)
	1512	return false;
[a35b458]	1513
[d4a3ee5]	1514	size_t i;
[f2b8cdc]	1515	for (i = pos + 1; i <= len; i++)
	1516	str[i - 1] = str[i];
[a35b458]	1517
[f2b8cdc]	1518	return true;
	1519	}
	1520
[abf09311]	1521	/** Duplicate string.
	1522	*
	1523	* Allocate a new string and copy characters from the source
	1524	* string into it. The duplicate string is allocated via sleeping
	1525	* malloc(), thus this function can sleep in no memory conditions.
	1526	*
	1527	* The allocation cannot fail and the return value is always
	1528	* a valid pointer. The duplicate string is always a well-formed
	1529	* null-terminated UTF-8 string, but it can differ from the source
	1530	* string on the byte level.
	1531	*
	1532	* @param src Source string.
	1533	*
	1534	* @return Duplicate string.
	1535	*
	1536	*/
[fc6dd18]	1537	char str_dup(const char src)
	1538	{
[28c39f3]	1539	size_t size = _str_size(src) + 1;
[d066259]	1540	char *dest = malloc(size);
	1541	if (!dest)
	1542	return NULL;
[a35b458]	1543
[28c39f3]	1544	_str_cpy(dest, src);
	1545	_repair_string(dest, size);
[abf09311]	1546	return dest;
[fc6dd18]	1547	}
	1548
[abf09311]	1549	/** Duplicate string with size limit.
	1550	*
	1551	* Allocate a new string and copy up to @max_size bytes from the source
	1552	* string into it. The duplicate string is allocated via sleeping
	1553	* malloc(), thus this function can sleep in no memory conditions.
	1554	* No more than @max_size + 1 bytes is allocated, but if the size
	1555	* occupied by the source string is smaller than @max_size + 1,
	1556	* less is allocated.
	1557	*
	1558	* The allocation cannot fail and the return value is always
	1559	* a valid pointer. The duplicate string is always a well-formed
	1560	* null-terminated UTF-8 string, but it can differ from the source
	1561	* string on the byte level.
	1562	*
	1563	* @param src Source string.
	1564	* @param n Maximum number of bytes to duplicate.
	1565	*
	1566	* @return Duplicate string.
	1567	*
	1568	*/
	1569	char str_ndup(const char src, size_t n)
[fc6dd18]	1570	{
[28c39f3]	1571	size_t size = _str_nsize(src, n) + 1;
[a35b458]	1572
[28c39f3]	1573	char *dest = malloc(size);
[d066259]	1574	if (!dest)
	1575	return NULL;
[a35b458]	1576
[28c39f3]	1577	_str_cpyn(dest, size, src);
	1578	_repair_string(dest, size);
[fc6dd18]	1579	return dest;
	1580	}
	1581
[ee3f6f6]	1582	/** Split string by delimiters.
	1583	*
	1584	* @param s String to be tokenized. May not be NULL.
	1585	* @param delim String with the delimiters.
	1586	* @param next Variable which will receive the pointer to the
	1587	* continuation of the string following the first
	1588	* occurrence of any of the delimiter characters.
	1589	* May be NULL.
	1590	* @return Pointer to the prefix of @a s before the first
	1591	* delimiter character. NULL if no such prefix
	1592	* exists.
	1593	*/
	1594	char str_tok(char s, const char delim, char *next)
[576845ec]	1595	{
	1596	char start, end;
[69df837f]	1597
[ee3f6f6]	1598	if (!s)
	1599	return NULL;
[a35b458]	1600
[ee3f6f6]	1601	size_t len = str_size(s);
	1602	size_t cur;
	1603	size_t tmp;
[28a5ebd]	1604	char32_t ch;
[69df837f]	1605
[576845ec]	1606	/* Skip over leading delimiters. */
[948222e4]	1607	tmp = 0;
	1608	cur = 0;
	1609	while ((ch = str_decode(s, &tmp, len)) && str_chr(delim, ch))
[ee3f6f6]	1610	cur = tmp;
	1611	start = &s[cur];
[69df837f]	1612
[576845ec]	1613	/* Skip over token characters. */
[948222e4]	1614	tmp = cur;
	1615	while ((ch = str_decode(s, &tmp, len)) && !str_chr(delim, ch))
[ee3f6f6]	1616	cur = tmp;
	1617	end = &s[cur];
	1618	if (next)
	1619	*next = (ch ? &s[tmp] : &s[cur]);
	1620
	1621	if (start == end)
[576845ec]	1622	return NULL; /* No more tokens. */
[69df837f]	1623
[576845ec]	1624	/* Overwrite delimiter with NULL terminator. */
	1625	*end = '\0';
	1626	return start;
[69df837f]	1627	}
	1628
[e535eeb]	1629	void order_suffix(const uint64_t val, uint64_t rv, char suffix)
	1630	{
[933cadf]	1631	if (val > UINT64_C(10000000000000000000)) {
	1632	*rv = val / UINT64_C(1000000000000000000);
[e535eeb]	1633	*suffix = 'Z';
[933cadf]	1634	} else if (val > UINT64_C(1000000000000000000)) {
	1635	*rv = val / UINT64_C(1000000000000000);
[e535eeb]	1636	*suffix = 'E';
[933cadf]	1637	} else if (val > UINT64_C(1000000000000000)) {
	1638	*rv = val / UINT64_C(1000000000000);
[e535eeb]	1639	*suffix = 'T';
[933cadf]	1640	} else if (val > UINT64_C(1000000000000)) {
	1641	*rv = val / UINT64_C(1000000000);
[e535eeb]	1642	*suffix = 'G';
[933cadf]	1643	} else if (val > UINT64_C(1000000000)) {
	1644	*rv = val / UINT64_C(1000000);
[e535eeb]	1645	*suffix = 'M';
[933cadf]	1646	} else if (val > UINT64_C(1000000)) {
	1647	*rv = val / UINT64_C(1000);
[e535eeb]	1648	*suffix = 'k';
	1649	} else {
	1650	*rv = val;
	1651	*suffix = ' ';
	1652	}
	1653	}
	1654
[933cadf]	1655	void bin_order_suffix(const uint64_t val, uint64_t rv, const char *suffix,
	1656	bool fixed)
	1657	{
	1658	if (val > UINT64_C(1152921504606846976)) {
	1659	*rv = val / UINT64_C(1125899906842624);
	1660	*suffix = "EiB";
	1661	} else if (val > UINT64_C(1125899906842624)) {
	1662	*rv = val / UINT64_C(1099511627776);
	1663	*suffix = "TiB";
	1664	} else if (val > UINT64_C(1099511627776)) {
	1665	*rv = val / UINT64_C(1073741824);
	1666	*suffix = "GiB";
	1667	} else if (val > UINT64_C(1073741824)) {
	1668	*rv = val / UINT64_C(1048576);
	1669	*suffix = "MiB";
	1670	} else if (val > UINT64_C(1048576)) {
	1671	*rv = val / UINT64_C(1024);
	1672	*suffix = "KiB";
	1673	} else {
	1674	*rv = val;
	1675	if (fixed)
	1676	*suffix = "B ";
	1677	else
	1678	*suffix = "B";
	1679	}
	1680	}
	1681
[a46da63]	1682	/** @}
[b2951e2]	1683	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: