Context Navigation

source: mainline/kernel/generic/src/lib/str.c@ 1d2f85e

Visit:

Last change on this file since 1d2f85e was 1d2f85e, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 6 years ago
Change documentation of <str.h> functions to use unambiguous terms
Property mode set to `100644`
File size: 24.3 KB

Rev	Line
[16da5f8e]	1	/*
	2	* Copyright (c) 2001-2004 Jakub Jermar
[d066259]	3	* Copyright (c) 2005 Martin Decky
	4	* Copyright (c) 2008 Jiri Svoboda
	5	* Copyright (c) 2011 Martin Sucha
	6	* Copyright (c) 2011 Oleg Romanenko
[16da5f8e]	7	* All rights reserved.
	8	*
	9	* Redistribution and use in source and binary forms, with or without
	10	* modification, are permitted provided that the following conditions
	11	* are met:
	12	*
	13	* - Redistributions of source code must retain the above copyright
	14	* notice, this list of conditions and the following disclaimer.
	15	* - Redistributions in binary form must reproduce the above copyright
	16	* notice, this list of conditions and the following disclaimer in the
	17	* documentation and/or other materials provided with the distribution.
	18	* - The name of the author may not be used to endorse or promote products
	19	* derived from this software without specific prior written permission.
	20	*
	21	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	22	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	23	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	24	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	25	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	26	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	27	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	28	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	29	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	30	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	31	*/
	32
[174156fd]	33	/** @addtogroup kernel_generic
[16da5f8e]	34	* @{
	35	*/
	36
	37	/**
	38	* @file
[82bb9c1]	39	* @brief String functions.
	40	*
	41	* Strings and characters use the Universal Character Set (UCS). The standard
	42	* strings, called just strings are encoded in UTF-8. Wide strings (encoded
[1d2f85e]	43	* in UTF-32) are supported to a limited degree. A single code point is
[b888d5f]	44	* represented as wchar_t.@n
[82bb9c1]	45	*
[b888d5f]	46	* Overview of the terminology:@n
[82bb9c1]	47	*
[b888d5f]	48	* Term Meaning
	49	* -------------------- ----------------------------------------------------
	50	* byte 8 bits stored in uint8_t (unsigned 8 bit integer)
[82bb9c1]	51	*
[1d2f85e]	52	* character UTF-32 encoded Unicode code point, stored in wchar_t
[b888d5f]	53	* (signed 32 bit integer), code points 0 .. 1114111
	54	* are valid
[82bb9c1]	55	*
[b888d5f]	56	* ASCII character 7 bit encoded ASCII character, stored in char
	57	* (usually signed 8 bit integer), code points 0 .. 127
	58	* are valid
	59	*
	60	* string UTF-8 encoded NULL-terminated Unicode string, char *
	61	*
	62	* wide string UTF-32 encoded NULL-terminated Unicode string,
	63	* wchar_t *
	64	*
	65	* [wide] string size number of BYTES in a [wide] string (excluding
	66	* the NULL-terminator), size_t
	67	*
[1d2f85e]	68	* [wide] string length number of CODE POINTS in a [wide] string (excluding
[98000fb]	69	* the NULL-terminator), size_t
[b888d5f]	70	*
	71	* [wide] string width number of display cells on a monospace display taken
[98000fb]	72	* by a [wide] string, size_t
[b888d5f]	73	*
	74	*
	75	* Overview of string metrics:@n
	76	*
	77	* Metric Abbrev. Type Meaning
	78	* ------ ------ ------ -------------------------------------------------
	79	* size n size_t number of BYTES in a string (excluding the
	80	* NULL-terminator)
	81	*
[1d2f85e]	82	* length l size_t number of CODE POINTS in a string (excluding the
[b888d5f]	83	* null terminator)
	84	*
[98000fb]	85	* width w size_t number of display cells on a monospace display
[b888d5f]	86	* taken by a string
	87	*
	88	*
	89	* Function naming prefixes:@n
	90	*
[1d2f85e]	91	* chr_ operate on code points
[b888d5f]	92	* ascii_ operate on ASCII characters
	93	* str_ operate on strings
	94	* wstr_ operate on wide strings
	95	*
	96	* [w]str_[n\|l\|w] operate on a prefix limited by size, length
	97	* or width
	98	*
	99	*
	100	* A specific character inside a [wide] string can be referred to by:@n
	101	*
	102	* pointer (char , wchar_t )
	103	* byte offset (size_t)
[1d2f85e]	104	* code point index (size_t)
[82bb9c1]	105	*
[16da5f8e]	106	*/
	107
[19f857a]	108	#include <str.h>
[d066259]	109
	110	#include <assert.h>
[d09f84e6]	111	#include <errno.h>
[d066259]	112	#include <stdbool.h>
	113	#include <stddef.h>
	114	#include <stdint.h>
	115	#include <stdlib.h>
	116
[b888d5f]	117	#include <align.h>
[30a5470]	118	#include <macros.h>
[16da5f8e]	119
[8e893ae]	120	/** Check the condition if wchar_t is signed */
[002fd5f]	121	#ifdef __WCHAR_UNSIGNED__
[1433ecda]	122	#define WCHAR_SIGNED_CHECK(cond) (true)
[8e893ae]	123	#else
[1433ecda]	124	#define WCHAR_SIGNED_CHECK(cond) (cond)
[8e893ae]	125	#endif
	126
[b888d5f]	127	/** Byte mask consisting of lowest @n bits (out of 8) */
	128	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
[0dd1d444]	129
[b888d5f]	130	/** Byte mask consisting of lowest @n bits (out of 32) */
	131	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
[32704cb]	132
[b888d5f]	133	/** Byte mask consisting of highest @n bits (out of 8) */
	134	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
[32704cb]	135
[b888d5f]	136	/** Number of data bits in a UTF-8 continuation byte */
	137	#define CONT_BITS 6
[0dd1d444]	138
[1d2f85e]	139	/** Decode a single code point from an UTF-8 encoded string.
[21a639b7]	140	*
[1d2f85e]	141	* Decode a single code point from a string of size @a size. Decoding starts
[e1813cf]	142	* at @a offset and this offset is moved to the beginning of the next
[1d2f85e]	143	* code point. In case of decoding error, offset generally advances at least
[b888d5f]	144	* by one. However, offset is never moved beyond size.
[21a639b7]	145	*
[b888d5f]	146	* @param str String (not necessarily NULL-terminated).
	147	* @param offset Byte offset in string where to start decoding.
	148	* @param size Size of the string (in bytes).
	149	*
[1d2f85e]	150	* @return Value of decoded code point, U_SPECIAL on decoding error or
[b888d5f]	151	* NULL if attempt to decode beyond @a size.
[21a639b7]	152	*
	153	*/
[b888d5f]	154	wchar_t str_decode(const char str, size_t offset, size_t size)
[21a639b7]	155	{
[b888d5f]	156	if (*offset + 1 > size)
	157	return 0;
[a35b458]	158
[b888d5f]	159	/* First byte read from string */
	160	uint8_t b0 = (uint8_t) str[(*offset)++];
[a35b458]	161
[b888d5f]	162	/* Determine code length */
[a35b458]	163
[b888d5f]	164	unsigned int b0_bits; /* Data bits in first byte */
	165	unsigned int cbytes; /* Number of continuation bytes */
[a35b458]	166
[0dd1d444]	167	if ((b0 & 0x80) == 0) {
	168	/* 0xxxxxxx (Plain ASCII) */
	169	b0_bits = 7;
	170	cbytes = 0;
	171	} else if ((b0 & 0xe0) == 0xc0) {
	172	/* 110xxxxx 10xxxxxx */
	173	b0_bits = 5;
	174	cbytes = 1;
	175	} else if ((b0 & 0xf0) == 0xe0) {
	176	/* 1110xxxx 10xxxxxx 10xxxxxx */
	177	b0_bits = 4;
	178	cbytes = 2;
	179	} else if ((b0 & 0xf8) == 0xf0) {
	180	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
	181	b0_bits = 3;
	182	cbytes = 3;
	183	} else {
[b888d5f]	184	/* 10xxxxxx -- unexpected continuation byte */
[c8bf88d]	185	return U_SPECIAL;
[74c8da2c]	186	}
[a35b458]	187
[b888d5f]	188	if (*offset + cbytes > size)
[c8bf88d]	189	return U_SPECIAL;
[a35b458]	190
[b888d5f]	191	wchar_t ch = b0 & LO_MASK_8(b0_bits);
[a35b458]	192
[b888d5f]	193	/* Decode continuation bytes */
[0dd1d444]	194	while (cbytes > 0) {
[b888d5f]	195	uint8_t b = (uint8_t) str[(*offset)++];
[a35b458]	196
[b888d5f]	197	/* Must be 10xxxxxx */
	198	if ((b & 0xc0) != 0x80)
[c8bf88d]	199	return U_SPECIAL;
[a35b458]	200
[b888d5f]	201	/* Shift data bits to ch */
[0dd1d444]	202	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
[b888d5f]	203	cbytes--;
[74c8da2c]	204	}
[a35b458]	205
[0dd1d444]	206	return ch;
[74c8da2c]	207	}
	208
[1d2f85e]	209	/** Encode a single code point to a UTF-8 string representation.
[74c8da2c]	210	*
[1d2f85e]	211	* Encode a single code point to a UTF-8 string representation and store
[e1813cf]	212	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
[1d2f85e]	213	* is moved to the position where the next code point can be written to.
[74c8da2c]	214	*
[1d2f85e]	215	* @param ch Input code point.
[b888d5f]	216	* @param str Output buffer.
	217	* @param offset Byte offset where to start writing.
	218	* @param size Size of the output buffer (in bytes).
[74c8da2c]	219	*
[1d2f85e]	220	* @return EOK if the code point was encoded successfully, EOVERFLOW if there
	221	* was not enough space in the output buffer or EINVAL if the code point
[8e893ae]	222	* code was invalid.
[74c8da2c]	223	*/
[b7fd2a0]	224	errno_t chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
[74c8da2c]	225	{
[b888d5f]	226	if (*offset >= size)
[d09f84e6]	227	return EOVERFLOW;
[a35b458]	228
[b888d5f]	229	if (!chr_check(ch))
[d09f84e6]	230	return EINVAL;
[a35b458]	231
[7c3fb9b]	232	/*
	233	* Unsigned version of ch (bit operations should only be done
	234	* on unsigned types).
	235	*/
[b888d5f]	236	uint32_t cc = (uint32_t) ch;
[a35b458]	237
[b888d5f]	238	/* Determine how many continuation bytes are needed */
[a35b458]	239
[b888d5f]	240	unsigned int b0_bits; /* Data bits in first byte */
	241	unsigned int cbytes; /* Number of continuation bytes */
[a35b458]	242
[32704cb]	243	if ((cc & ~LO_MASK_32(7)) == 0) {
	244	b0_bits = 7;
	245	cbytes = 0;
	246	} else if ((cc & ~LO_MASK_32(11)) == 0) {
	247	b0_bits = 5;
	248	cbytes = 1;
	249	} else if ((cc & ~LO_MASK_32(16)) == 0) {
	250	b0_bits = 4;
	251	cbytes = 2;
	252	} else if ((cc & ~LO_MASK_32(21)) == 0) {
	253	b0_bits = 3;
	254	cbytes = 3;
	255	} else {
[b888d5f]	256	/* Codes longer than 21 bits are not supported */
[d09f84e6]	257	return EINVAL;
[74c8da2c]	258	}
[a35b458]	259
[b888d5f]	260	/* Check for available space in buffer */
	261	if (*offset + cbytes >= size)
[d09f84e6]	262	return EOVERFLOW;
[a35b458]	263
[b888d5f]	264	/* Encode continuation bytes */
	265	unsigned int i;
	266	for (i = cbytes; i > 0; i--) {
[e1813cf]	267	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
[32704cb]	268	cc = cc >> CONT_BITS;
[74c8da2c]	269	}
[a35b458]	270
[b888d5f]	271	/* Encode first byte */
[e1813cf]	272	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
[a35b458]	273
[b888d5f]	274	/* Advance offset */
	275	*offset += cbytes + 1;
[a35b458]	276
[d09f84e6]	277	return EOK;
[74c8da2c]	278	}
	279
[b888d5f]	280	/** Get size of string.
	281	*
	282	* Get the number of bytes which are used by the string @a str (excluding the
	283	* NULL-terminator).
	284	*
	285	* @param str String to consider.
	286	*
	287	* @return Number of bytes used by the string
[82bb9c1]	288	*
	289	*/
[08e103d4]	290	size_t str_bytes(const char *str)
[82bb9c1]	291	{
[b888d5f]	292	size_t size = 0;
[a35b458]	293
[b888d5f]	294	while (*str++ != 0)
	295	size++;
[a35b458]	296
[b888d5f]	297	return size;
[82bb9c1]	298	}
	299
[b888d5f]	300	/** Get size of wide string.
	301	*
	302	* Get the number of bytes which are used by the wide string @a str (excluding the
	303	* NULL-terminator).
	304	*
	305	* @param str Wide string to consider.
	306	*
	307	* @return Number of bytes used by the wide string
	308	*
	309	*/
[08e103d4]	310	size_t wstr_bytes(const wchar_t *str)
[b888d5f]	311	{
[08e103d4]	312	return (wstr_code_points(str) * sizeof(wchar_t));
[b888d5f]	313	}
	314
[1d2f85e]	315	/** Get size of string with code point count limit.
[74c8da2c]	316	*
[f25b2819]	317	* Get the number of bytes which are used by up to @a max_len first
[1d2f85e]	318	* code points in the string @a str. If @a max_len is greater than
	319	* the number of code points in @a str, the entire string is measured
	320	* (excluding the NULL-terminator).
[b888d5f]	321	*
	322	* @param str String to consider.
[1d2f85e]	323	* @param max_len Maximum number of code points to measure.
[74c8da2c]	324	*
[1d2f85e]	325	* @return Number of bytes used by the code points.
[74c8da2c]	326	*
	327	*/
[08e103d4]	328	size_t str_lbytes(const char *str, size_t max_len)
[74c8da2c]	329	{
[98000fb]	330	size_t len = 0;
[b888d5f]	331	size_t offset = 0;
[a35b458]	332
[b888d5f]	333	while (len < max_len) {
	334	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
[b54d2f1]	335	break;
[a35b458]	336
[f25b2819]	337	len++;
[21a639b7]	338	}
[a35b458]	339
[b888d5f]	340	return offset;
[74c8da2c]	341	}
	342
[b888d5f]	343	/** Get size of wide string with length limit.
[82bb9c1]	344	*
[b888d5f]	345	* Get the number of bytes which are used by up to @a max_len first
[1d2f85e]	346	* code points in the wide string @a str. If @a max_len is greater than
[b888d5f]	347	* the length of @a str, the entire wide string is measured (excluding the
	348	* NULL-terminator).
	349	*
	350	* @param str Wide string to consider.
[1d2f85e]	351	* @param max_len Maximum number of code points to measure.
[82bb9c1]	352	*
[1d2f85e]	353	* @return Number of bytes used by the code points.
[82bb9c1]	354	*
	355	*/
[08e103d4]	356	size_t wstr_lbytes(const wchar_t *str, size_t max_len)
[82bb9c1]	357	{
[08e103d4]	358	return (wstr_ncode_points(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
[82bb9c1]	359	}
	360
[1d2f85e]	361	/** Get number of unicode code points in a UTF-8 encoded string.
[82bb9c1]	362	*
[1d2f85e]	363	* @param str NULL-terminated UTF-8 string.
[82bb9c1]	364	*
[1d2f85e]	365	* @return Number of code points in the string.
[82bb9c1]	366	*
	367	*/
[08e103d4]	368	size_t str_code_points(const char *str)
[82bb9c1]	369	{
[98000fb]	370	size_t len = 0;
[b888d5f]	371	size_t offset = 0;
[a35b458]	372
[b888d5f]	373	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
	374	len++;
[a35b458]	375
[b888d5f]	376	return len;
[82bb9c1]	377	}
	378
[1d2f85e]	379	/** Get number of code points in a wide string.
[74c8da2c]	380	*
[b888d5f]	381	* @param str NULL-terminated wide string.
	382	*
[1d2f85e]	383	* @return Number of code points in @a str.
[74c8da2c]	384	*
	385	*/
[08e103d4]	386	size_t wstr_code_points(const wchar_t *wstr)
[74c8da2c]	387	{
[98000fb]	388	size_t len = 0;
[a35b458]	389
[b888d5f]	390	while (*wstr++ != 0)
	391	len++;
[a35b458]	392
[b888d5f]	393	return len;
[74c8da2c]	394	}
	395
[1d2f85e]	396	/** Get number of code points in a string with size limit.
[b888d5f]	397	*
	398	* @param str NULL-terminated string.
	399	* @param size Maximum number of bytes to consider.
	400	*
[1d2f85e]	401	* @return Number of code points in string.
[74c8da2c]	402	*
	403	*/
[08e103d4]	404	size_t str_ncode_points(const char *str, size_t size)
[74c8da2c]	405	{
[98000fb]	406	size_t len = 0;
[b888d5f]	407	size_t offset = 0;
[a35b458]	408
[b888d5f]	409	while (str_decode(str, &offset, size) != 0)
	410	len++;
[a35b458]	411
[b888d5f]	412	return len;
[21a639b7]	413	}
	414
[1d2f85e]	415	/** Get number of code points in a string with size limit.
[2f57690]	416	*
[b888d5f]	417	* @param str NULL-terminated string.
	418	* @param size Maximum number of bytes to consider.
[74c8da2c]	419	*
[1d2f85e]	420	* @return Number of code points in string.
[b888d5f]	421	*
[74c8da2c]	422	*/
[08e103d4]	423	size_t wstr_ncode_points(const wchar_t *str, size_t size)
[74c8da2c]	424	{
[98000fb]	425	size_t len = 0;
	426	size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
	427	size_t offset = 0;
[a35b458]	428
[b888d5f]	429	while ((offset < limit) && (*str++ != 0)) {
[f25b2819]	430	len++;
[b888d5f]	431	offset += sizeof(wchar_t);
[74c8da2c]	432	}
[a35b458]	433
[f25b2819]	434	return len;
[74c8da2c]	435	}
	436
[1d2f85e]	437	/** Check whether code point is plain ASCII.
[b888d5f]	438	*
[1d2f85e]	439	* @return True if code point is plain ASCII.
[74c8da2c]	440	*
	441	*/
[f2b8cdc]	442	bool ascii_check(wchar_t ch)
[74c8da2c]	443	{
[8e893ae]	444	if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 127))
[b888d5f]	445	return true;
[a35b458]	446
[b888d5f]	447	return false;
	448	}
[f25b2819]	449
[1d2f85e]	450	/** Check whether code point is valid
[b888d5f]	451	*
[1d2f85e]	452	* @return True if code point is a valid Unicode code point.
[b888d5f]	453	*
	454	*/
[f2b8cdc]	455	bool chr_check(wchar_t ch)
[b888d5f]	456	{
[8e893ae]	457	if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 1114111))
[b888d5f]	458	return true;
[a35b458]	459
[b888d5f]	460	return false;
[16da5f8e]	461	}
	462
[b888d5f]	463	/** Compare two NULL terminated strings.
[16da5f8e]	464	*
[b888d5f]	465	* Do a char-by-char comparison of two NULL-terminated strings.
[4efeab5]	466	* The strings are considered equal iff their length is equal
[1d2f85e]	467	* and both strings consist of the same sequence of code points.
[4efeab5]	468	*
[1d2f85e]	469	* A string S1 is less than another string S2 if it has a code point with
	470	* lower value at the first code point position where the strings differ.
[1772e6d]	471	* If the strings differ in length, the shorter one is treated as if
[1d2f85e]	472	* padded by code points with a value of zero.
[16da5f8e]	473	*
[b888d5f]	474	* @param s1 First string to compare.
	475	* @param s2 Second string to compare.
[16da5f8e]	476	*
[1772e6d]	477	* @return 0 if the strings are equal, -1 if the first is less than the second,
	478	* 1 if the second is less than the first.
[16da5f8e]	479	*
	480	*/
[b888d5f]	481	int str_cmp(const char s1, const char s2)
[16da5f8e]	482	{
[a7b1071]	483	wchar_t c1 = 0;
	484	wchar_t c2 = 0;
[a35b458]	485
[b888d5f]	486	size_t off1 = 0;
	487	size_t off2 = 0;
[a7b1071]	488
	489	while (true) {
	490	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
	491	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
	492
[b888d5f]	493	if (c1 < c2)
[16da5f8e]	494	return -1;
[a35b458]	495
[b888d5f]	496	if (c1 > c2)
[16da5f8e]	497	return 1;
[a7b1071]	498
	499	if (c1 == 0 \|\| c2 == 0)
[1b20da0]	500	break;
[16da5f8e]	501	}
[a7b1071]	502
	503	return 0;
[16da5f8e]	504	}
	505
[b888d5f]	506	/** Compare two NULL terminated strings with length limit.
[16da5f8e]	507	*
[b888d5f]	508	* Do a char-by-char comparison of two NULL-terminated strings.
[4efeab5]	509	* The strings are considered equal iff
[08e103d4]	510	* min(str_code_points(s1), max_len) == min(str_code_points(s2), max_len)
[1d2f85e]	511	* and both strings consist of the same sequence of code points,
	512	* up to max_len code points.
[4efeab5]	513	*
[1d2f85e]	514	* A string S1 is less than another string S2 if it has a code point with
	515	* lower value at the first code point position where the strings differ.
[1772e6d]	516	* If the strings differ in length, the shorter one is treated as if
[1d2f85e]	517	* padded by code points with a value of zero. Only the first max_len
	518	* code points are considered.
[16da5f8e]	519	*
[b888d5f]	520	* @param s1 First string to compare.
	521	* @param s2 Second string to compare.
[1d2f85e]	522	* @param max_len Maximum number of code points to consider.
[b888d5f]	523	*
[1772e6d]	524	* @return 0 if the strings are equal, -1 if the first is less than the second,
	525	* 1 if the second is less than the first.
[16da5f8e]	526	*
	527	*/
[98000fb]	528	int str_lcmp(const char s1, const char s2, size_t max_len)
[16da5f8e]	529	{
[b888d5f]	530	wchar_t c1 = 0;
	531	wchar_t c2 = 0;
[a35b458]	532
[b888d5f]	533	size_t off1 = 0;
	534	size_t off2 = 0;
[a35b458]	535
[98000fb]	536	size_t len = 0;
[a7b1071]	537
	538	while (true) {
	539	if (len >= max_len)
[b888d5f]	540	break;
[a7b1071]	541
	542	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
	543	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
	544
[b888d5f]	545	if (c1 < c2)
[16da5f8e]	546	return -1;
[a7b1071]	547
[b888d5f]	548	if (c1 > c2)
[16da5f8e]	549	return 1;
[a7b1071]	550
	551	if (c1 == 0 \|\| c2 == 0)
	552	break;
	553
[1b20da0]	554	++len;
[16da5f8e]	555	}
[a7b1071]	556
	557	return 0;
	558
[16da5f8e]	559	}
	560
[f4b1535]	561	/** Copy string.
[b888d5f]	562	*
[f4b1535]	563	* Copy source string @a src to destination buffer @a dest.
	564	* No more than @a size bytes are written. If the size of the output buffer
	565	* is at least one byte, the output string will always be well-formed, i.e.
[1d2f85e]	566	* null-terminated and containing only complete code points.
[b888d5f]	567	*
[abf09311]	568	* @param dest Destination buffer.
[6700ee2]	569	* @param count Size of the destination buffer (must be > 0).
[f4b1535]	570	* @param src Source string.
[abf09311]	571	*
[b888d5f]	572	*/
[f4b1535]	573	void str_cpy(char dest, size_t size, const char src)
[b888d5f]	574	{
[6700ee2]	575	/* There must be space for a null terminator in the buffer. */
[63e27ef]	576	assert(size > 0);
	577	assert(src != NULL);
[a35b458]	578
[abf09311]	579	size_t src_off = 0;
	580	size_t dest_off = 0;
[a35b458]	581
[abf09311]	582	wchar_t ch;
[f4b1535]	583	while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
	584	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
	585	break;
	586	}
[a35b458]	587
[f4b1535]	588	dest[dest_off] = '\0';
	589	}
	590
	591	/** Copy size-limited substring.
	592	*
[6700ee2]	593	* Copy prefix of string @a src of max. size @a size to destination buffer
	594	* @a dest. No more than @a size bytes are written. The output string will
	595	* always be well-formed, i.e. null-terminated and containing only complete
[1d2f85e]	596	* code points.
[f4b1535]	597	*
	598	* No more than @a n bytes are read from the input string, so it does not
	599	* have to be null-terminated.
	600	*
[abf09311]	601	* @param dest Destination buffer.
[6700ee2]	602	* @param count Size of the destination buffer (must be > 0).
[f4b1535]	603	* @param src Source string.
[abf09311]	604	* @param n Maximum number of bytes to read from @a src.
	605	*
[f4b1535]	606	*/
	607	void str_ncpy(char dest, size_t size, const char src, size_t n)
	608	{
[6700ee2]	609	/* There must be space for a null terminator in the buffer. */
[63e27ef]	610	assert(size > 0);
[a35b458]	611
[abf09311]	612	size_t src_off = 0;
	613	size_t dest_off = 0;
[a35b458]	614
[abf09311]	615	wchar_t ch;
[f4b1535]	616	while ((ch = str_decode(src, &src_off, n)) != 0) {
	617	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
[b888d5f]	618	break;
	619	}
[a35b458]	620
[f4b1535]	621	dest[dest_off] = '\0';
[b888d5f]	622	}
[16da5f8e]	623
[0f06dbc]	624	/** Convert wide string to string.
[b888d5f]	625	*
[0f06dbc]	626	* Convert wide string @a src to string. The output is written to the buffer
	627	* specified by @a dest and @a size. @a size must be non-zero and the string
	628	* written will always be well-formed.
[16da5f8e]	629	*
[0f06dbc]	630	* @param dest Destination buffer.
	631	* @param size Size of the destination buffer.
	632	* @param src Source wide string.
[16da5f8e]	633	*/
[0f06dbc]	634	void wstr_to_str(char dest, size_t size, const wchar_t src)
[16da5f8e]	635	{
[b888d5f]	636	wchar_t ch;
[0f06dbc]	637	size_t src_idx;
	638	size_t dest_off;
	639
	640	/* There must be space for a null terminator in the buffer. */
[63e27ef]	641	assert(size > 0);
[0f06dbc]	642
	643	src_idx = 0;
	644	dest_off = 0;
[a35b458]	645
[b888d5f]	646	while ((ch = src[src_idx++]) != 0) {
[0f06dbc]	647	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
[b888d5f]	648	break;
[16da5f8e]	649	}
[0f06dbc]	650
	651	dest[dest_off] = '\0';
[16da5f8e]	652	}
	653
[1d2f85e]	654	/** Find first occurence of code point in string.
[20f1597]	655	*
[b888d5f]	656	* @param str String to search.
[1d2f85e]	657	* @param ch code point to look for.
[b888d5f]	658	*
[1d2f85e]	659	* @return Pointer to code point in @a str or NULL if not found.
[20f1597]	660	*/
[dd2cfa7]	661	char str_chr(const char str, wchar_t ch)
[20f1597]	662	{
[b888d5f]	663	wchar_t acc;
	664	size_t off = 0;
[f2d2c7ba]	665	size_t last = 0;
[a35b458]	666
[a7b1071]	667	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
[b888d5f]	668	if (acc == ch)
[dd2cfa7]	669	return (char *) (str + last);
[f2d2c7ba]	670	last = off;
[20f1597]	671	}
[a35b458]	672
[20f1597]	673	return NULL;
	674	}
	675
[1d2f85e]	676	/** Insert a code point into a wide string.
[b888d5f]	677	*
[1d2f85e]	678	* Insert a code point into a wide string at position
	679	* @a pos. The code points after the position are shifted.
[b888d5f]	680	*
	681	* @param str String to insert to.
[1d2f85e]	682	* @param ch Code point to insert.
	683	* @param pos Code point index where to insert.
	684	* @param max_pos Number of code points that fit in the buffer.
[b888d5f]	685	*
	686	* @return True if the insertion was sucessful, false if the position
	687	* is out of bounds.
	688	*
	689	*/
[98000fb]	690	bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
[b888d5f]	691	{
[08e103d4]	692	size_t len = wstr_code_points(str);
[a35b458]	693
[b888d5f]	694	if ((pos > len) \|\| (pos + 1 > max_pos))
	695	return false;
[a35b458]	696
[98000fb]	697	size_t i;
[b888d5f]	698	for (i = len; i + 1 > pos; i--)
	699	str[i + 1] = str[i];
[a35b458]	700
[b888d5f]	701	str[pos] = ch;
[a35b458]	702
[b888d5f]	703	return true;
	704	}
	705
[1d2f85e]	706	/** Remove a code point from a wide string.
[b888d5f]	707	*
[1d2f85e]	708	* Remove a code point from a wide string at position
	709	* @a pos. The code points after the position are shifted.
[b888d5f]	710	*
	711	* @param str String to remove from.
[1d2f85e]	712	* @param pos Code point index to remove.
[b888d5f]	713	*
	714	* @return True if the removal was sucessful, false if the position
	715	* is out of bounds.
	716	*
	717	*/
[98000fb]	718	bool wstr_remove(wchar_t *str, size_t pos)
[b888d5f]	719	{
[08e103d4]	720	size_t len = wstr_code_points(str);
[a35b458]	721
[b888d5f]	722	if (pos >= len)
	723	return false;
[a35b458]	724
[98000fb]	725	size_t i;
[b888d5f]	726	for (i = pos + 1; i <= len; i++)
	727	str[i - 1] = str[i];
[a35b458]	728
[b888d5f]	729	return true;
	730	}
	731
[d066259]	732	/** Duplicate string.
	733	*
[1d2f85e]	734	* Allocate a new string and copy the contents of the source string into it.
	735	* The duplicate string is allocated as if by malloc().
[d066259]	736	*
[1d2f85e]	737	* If successful, the duplicate string is always a well-formed
[d066259]	738	* null-terminated UTF-8 string, but it can differ from the source
	739	* string on the byte level.
	740	*
	741	* @param src Source string.
	742	*
[1d2f85e]	743	* @return Duplicate string, or NULL if allocation failed.
[d066259]	744	*
	745	*/
	746	char str_dup(const char src)
	747	{
[08e103d4]	748	size_t size = str_bytes(src) + 1;
[d066259]	749	char *dest = malloc(size);
	750	if (!dest)
	751	return NULL;
	752
	753	str_cpy(dest, size, src);
	754	return dest;
	755	}
	756
	757	/** Duplicate string with size limit.
	758	*
	759	* Allocate a new string and copy up to @max_size bytes from the source
[1d2f85e]	760	* string into it. The duplicate string is allocated as if by malloc().
[d066259]	761	* No more than @max_size + 1 bytes is allocated, but if the size
	762	* occupied by the source string is smaller than @max_size + 1,
	763	* less is allocated.
	764	*
[1d2f85e]	765	* If successful, the duplicate string is always a well-formed
[d066259]	766	* null-terminated UTF-8 string, but it can differ from the source
	767	* string on the byte level.
	768	*
	769	* @param src Source string.
	770	* @param n Maximum number of bytes to duplicate.
	771	*
	772	* @return Duplicate string.
	773	*
	774	*/
	775	char str_ndup(const char src, size_t n)
	776	{
[08e103d4]	777	size_t size = str_bytes(src);
[d066259]	778	if (size > n)
	779	size = n;
	780
	781	char *dest = malloc(size + 1);
	782	if (!dest)
	783	return NULL;
	784
	785	str_ncpy(dest, size + 1, src, size);
	786	return dest;
	787	}
	788
[30a5470]	789	/** Convert string to uint64_t (internal variant).
	790	*
	791	* @param nptr Pointer to string.
	792	* @param endptr Pointer to the first invalid character is stored here.
	793	* @param base Zero or number between 2 and 36 inclusive.
	794	* @param neg Indication of unary minus is stored here.
	795	* @apram result Result of the conversion.
	796	*
	797	* @return EOK if conversion was successful.
	798	*
	799	*/
[b7fd2a0]	800	static errno_t str_uint(const char nptr, char *endptr, unsigned int base,
[30a5470]	801	bool neg, uint64_t result)
	802	{
[63e27ef]	803	assert(endptr != NULL);
	804	assert(neg != NULL);
	805	assert(result != NULL);
[a35b458]	806
[30a5470]	807	*neg = false;
	808	const char *str = nptr;
[a35b458]	809
[30a5470]	810	/* Ignore leading whitespace */
	811	while (isspace(*str))
	812	str++;
[a35b458]	813
[30a5470]	814	if (*str == '-') {
	815	*neg = true;
	816	str++;
	817	} else if (*str == '+')
	818	str++;
[a35b458]	819
[30a5470]	820	if (base == 0) {
	821	/* Decode base if not specified */
	822	base = 10;
[a35b458]	823
[30a5470]	824	if (*str == '0') {
	825	base = 8;
	826	str++;
[a35b458]	827
[30a5470]	828	switch (*str) {
	829	case 'b':
	830	case 'B':
	831	base = 2;
	832	str++;
	833	break;
	834	case 'o':
	835	case 'O':
	836	base = 8;
	837	str++;
	838	break;
	839	case 'd':
	840	case 'D':
	841	case 't':
	842	case 'T':
	843	base = 10;
	844	str++;
	845	break;
	846	case 'x':
	847	case 'X':
	848	base = 16;
	849	str++;
	850	break;
[4ce914d4]	851	default:
	852	str--;
[30a5470]	853	}
	854	}
	855	} else {
	856	/* Check base range */
	857	if ((base < 2) \|\| (base > 36)) {
	858	endptr = (char ) str;
	859	return EINVAL;
	860	}
	861	}
[a35b458]	862
[30a5470]	863	*result = 0;
	864	const char *startstr = str;
[a35b458]	865
[30a5470]	866	while (*str != 0) {
	867	unsigned int digit;
[a35b458]	868
[30a5470]	869	if ((str >= 'a') && (str <= 'z'))
	870	digit = *str - 'a' + 10;
	871	else if ((str >= 'A') && (str <= 'Z'))
	872	digit = *str - 'A' + 10;
	873	else if ((str >= '0') && (str <= '9'))
	874	digit = *str - '0';
	875	else
	876	break;
[a35b458]	877
[30a5470]	878	if (digit >= base)
	879	break;
[a35b458]	880
[30a5470]	881	uint64_t prev = *result;
	882	result = (result) * base + digit;
[a35b458]	883
[30a5470]	884	if (*result < prev) {
	885	/* Overflow */
	886	endptr = (char ) str;
	887	return EOVERFLOW;
	888	}
[a35b458]	889
[30a5470]	890	str++;
	891	}
[a35b458]	892
[30a5470]	893	if (str == startstr) {
	894	/*
	895	* No digits were decoded => first invalid character is
	896	* the first character of the string.
	897	*/
	898	str = nptr;
	899	}
[a35b458]	900
[30a5470]	901	endptr = (char ) str;
[a35b458]	902
[30a5470]	903	if (str == nptr)
	904	return EINVAL;
[a35b458]	905
[30a5470]	906	return EOK;
	907	}
	908
	909	/** Convert string to uint64_t.
	910	*
	911	* @param nptr Pointer to string.
	912	* @param endptr If not NULL, pointer to the first invalid character
	913	* is stored here.
	914	* @param base Zero or number between 2 and 36 inclusive.
	915	* @param strict Do not allow any trailing characters.
[4ce914d4]	916	* @param result Result of the conversion.
[30a5470]	917	*
	918	* @return EOK if conversion was successful.
	919	*
	920	*/
[b7fd2a0]	921	errno_t str_uint64_t(const char nptr, char *endptr, unsigned int base,
[30a5470]	922	bool strict, uint64_t *result)
	923	{
[63e27ef]	924	assert(result != NULL);
[a35b458]	925
[30a5470]	926	bool neg;
	927	char *lendptr;
[b7fd2a0]	928	errno_t ret = str_uint(nptr, &lendptr, base, &neg, result);
[a35b458]	929
[30a5470]	930	if (endptr != NULL)
	931	endptr = (char ) lendptr;
[a35b458]	932
[30a5470]	933	if (ret != EOK)
	934	return ret;
[a35b458]	935
[30a5470]	936	/* Do not allow negative values */
	937	if (neg)
	938	return EINVAL;
[a35b458]	939
[7c3fb9b]	940	/*
	941	* Check whether we are at the end of
	942	* the string in strict mode
	943	*/
[30a5470]	944	if ((strict) && (*lendptr != 0))
	945	return EINVAL;
[a35b458]	946
[30a5470]	947	return EOK;
	948	}
	949
[e535eeb]	950	void order_suffix(const uint64_t val, uint64_t rv, char suffix)
	951	{
[933cadf]	952	if (val > UINT64_C(10000000000000000000)) {
	953	*rv = val / UINT64_C(1000000000000000000);
[e535eeb]	954	*suffix = 'Z';
[933cadf]	955	} else if (val > UINT64_C(1000000000000000000)) {
	956	*rv = val / UINT64_C(1000000000000000);
[e535eeb]	957	*suffix = 'E';
[933cadf]	958	} else if (val > UINT64_C(1000000000000000)) {
	959	*rv = val / UINT64_C(1000000000000);
[e535eeb]	960	*suffix = 'T';
[933cadf]	961	} else if (val > UINT64_C(1000000000000)) {
	962	*rv = val / UINT64_C(1000000000);
[e535eeb]	963	*suffix = 'G';
[933cadf]	964	} else if (val > UINT64_C(1000000000)) {
	965	*rv = val / UINT64_C(1000000);
[e535eeb]	966	*suffix = 'M';
[933cadf]	967	} else if (val > UINT64_C(1000000)) {
	968	*rv = val / UINT64_C(1000);
[e535eeb]	969	*suffix = 'k';
	970	} else {
	971	*rv = val;
	972	*suffix = ' ';
	973	}
	974	}
	975
[933cadf]	976	void bin_order_suffix(const uint64_t val, uint64_t rv, const char *suffix,
	977	bool fixed)
	978	{
	979	if (val > UINT64_C(1152921504606846976)) {
	980	*rv = val / UINT64_C(1125899906842624);
	981	*suffix = "EiB";
	982	} else if (val > UINT64_C(1125899906842624)) {
	983	*rv = val / UINT64_C(1099511627776);
	984	*suffix = "TiB";
	985	} else if (val > UINT64_C(1099511627776)) {
	986	*rv = val / UINT64_C(1073741824);
	987	*suffix = "GiB";
	988	} else if (val > UINT64_C(1073741824)) {
	989	*rv = val / UINT64_C(1048576);
	990	*suffix = "MiB";
	991	} else if (val > UINT64_C(1048576)) {
	992	*rv = val / UINT64_C(1024);
	993	*suffix = "KiB";
	994	} else {
	995	*rv = val;
	996	if (fixed)
	997	*suffix = "B ";
	998	else
	999	*suffix = "B";
	1000	}
	1001	}
	1002
[16da5f8e]	1003	/** @}
	1004	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: