Context Navigation

source: mainline/kernel/generic/src/lib/str.c@ cc74cb5

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since cc74cb5 was 7c3fb9b, checked in by Jiri Svoboda <jiri@…>, 7 years ago
Fix block comment formatting (ccheck).
Property mode set to `100644`
File size: 24.1 KB

Rev	Line
[16da5f8e]	1	/*
	2	* Copyright (c) 2001-2004 Jakub Jermar
	3	* All rights reserved.
	4	*
	5	* Redistribution and use in source and binary forms, with or without
	6	* modification, are permitted provided that the following conditions
	7	* are met:
	8	*
	9	* - Redistributions of source code must retain the above copyright
	10	* notice, this list of conditions and the following disclaimer.
	11	* - Redistributions in binary form must reproduce the above copyright
	12	* notice, this list of conditions and the following disclaimer in the
	13	* documentation and/or other materials provided with the distribution.
	14	* - The name of the author may not be used to endorse or promote products
	15	* derived from this software without specific prior written permission.
	16	*
	17	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	18	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	19	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	20	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	21	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	22	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	23	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	24	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	25	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	26	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	27	*/
	28
[2f57690]	29	/** @addtogroup generic
[16da5f8e]	30	* @{
	31	*/
	32
	33	/**
	34	* @file
[82bb9c1]	35	* @brief String functions.
	36	*
	37	* Strings and characters use the Universal Character Set (UCS). The standard
	38	* strings, called just strings are encoded in UTF-8. Wide strings (encoded
	39	* in UTF-32) are supported to a limited degree. A single character is
[b888d5f]	40	* represented as wchar_t.@n
[82bb9c1]	41	*
[b888d5f]	42	* Overview of the terminology:@n
[82bb9c1]	43	*
[b888d5f]	44	* Term Meaning
	45	* -------------------- ----------------------------------------------------
	46	* byte 8 bits stored in uint8_t (unsigned 8 bit integer)
[82bb9c1]	47	*
[b888d5f]	48	* character UTF-32 encoded Unicode character, stored in wchar_t
	49	* (signed 32 bit integer), code points 0 .. 1114111
	50	* are valid
[82bb9c1]	51	*
[b888d5f]	52	* ASCII character 7 bit encoded ASCII character, stored in char
	53	* (usually signed 8 bit integer), code points 0 .. 127
	54	* are valid
	55	*
	56	* string UTF-8 encoded NULL-terminated Unicode string, char *
	57	*
	58	* wide string UTF-32 encoded NULL-terminated Unicode string,
	59	* wchar_t *
	60	*
	61	* [wide] string size number of BYTES in a [wide] string (excluding
	62	* the NULL-terminator), size_t
	63	*
	64	* [wide] string length number of CHARACTERS in a [wide] string (excluding
[98000fb]	65	* the NULL-terminator), size_t
[b888d5f]	66	*
	67	* [wide] string width number of display cells on a monospace display taken
[98000fb]	68	* by a [wide] string, size_t
[b888d5f]	69	*
	70	*
	71	* Overview of string metrics:@n
	72	*
	73	* Metric Abbrev. Type Meaning
	74	* ------ ------ ------ -------------------------------------------------
	75	* size n size_t number of BYTES in a string (excluding the
	76	* NULL-terminator)
	77	*
[98000fb]	78	* length l size_t number of CHARACTERS in a string (excluding the
[b888d5f]	79	* null terminator)
	80	*
[98000fb]	81	* width w size_t number of display cells on a monospace display
[b888d5f]	82	* taken by a string
	83	*
	84	*
	85	* Function naming prefixes:@n
	86	*
	87	* chr_ operate on characters
	88	* ascii_ operate on ASCII characters
	89	* str_ operate on strings
	90	* wstr_ operate on wide strings
	91	*
	92	* [w]str_[n\|l\|w] operate on a prefix limited by size, length
	93	* or width
	94	*
	95	*
	96	* A specific character inside a [wide] string can be referred to by:@n
	97	*
	98	* pointer (char , wchar_t )
	99	* byte offset (size_t)
[98000fb]	100	* character index (size_t)
[82bb9c1]	101	*
[16da5f8e]	102	*/
	103
[19f857a]	104	#include <str.h>
[16da5f8e]	105	#include <print.h>
	106	#include <cpu.h>
	107	#include <arch/asm.h>
	108	#include <arch.h>
[d09f84e6]	109	#include <errno.h>
[b888d5f]	110	#include <align.h>
[63e27ef]	111	#include <assert.h>
[30a5470]	112	#include <macros.h>
[1066041]	113	#include <mm/slab.h>
[16da5f8e]	114
[8e893ae]	115	/** Check the condition if wchar_t is signed */
[002fd5f]	116	#ifdef __WCHAR_UNSIGNED__
[1433ecda]	117	#define WCHAR_SIGNED_CHECK(cond) (true)
[8e893ae]	118	#else
[1433ecda]	119	#define WCHAR_SIGNED_CHECK(cond) (cond)
[8e893ae]	120	#endif
	121
[b888d5f]	122	/** Byte mask consisting of lowest @n bits (out of 8) */
	123	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
[0dd1d444]	124
[b888d5f]	125	/** Byte mask consisting of lowest @n bits (out of 32) */
	126	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
[32704cb]	127
[b888d5f]	128	/** Byte mask consisting of highest @n bits (out of 8) */
	129	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
[32704cb]	130
[b888d5f]	131	/** Number of data bits in a UTF-8 continuation byte */
	132	#define CONT_BITS 6
[0dd1d444]	133
[b888d5f]	134	/** Decode a single character from a string.
[21a639b7]	135	*
[b888d5f]	136	* Decode a single character from a string of size @a size. Decoding starts
[e1813cf]	137	* at @a offset and this offset is moved to the beginning of the next
	138	* character. In case of decoding error, offset generally advances at least
[b888d5f]	139	* by one. However, offset is never moved beyond size.
[21a639b7]	140	*
[b888d5f]	141	* @param str String (not necessarily NULL-terminated).
	142	* @param offset Byte offset in string where to start decoding.
	143	* @param size Size of the string (in bytes).
	144	*
[c8bf88d]	145	* @return Value of decoded character, U_SPECIAL on decoding error or
[b888d5f]	146	* NULL if attempt to decode beyond @a size.
[21a639b7]	147	*
	148	*/
[b888d5f]	149	wchar_t str_decode(const char str, size_t offset, size_t size)
[21a639b7]	150	{
[b888d5f]	151	if (*offset + 1 > size)
	152	return 0;
[a35b458]	153
[b888d5f]	154	/* First byte read from string */
	155	uint8_t b0 = (uint8_t) str[(*offset)++];
[a35b458]	156
[b888d5f]	157	/* Determine code length */
[a35b458]	158
[b888d5f]	159	unsigned int b0_bits; /* Data bits in first byte */
	160	unsigned int cbytes; /* Number of continuation bytes */
[a35b458]	161
[0dd1d444]	162	if ((b0 & 0x80) == 0) {
	163	/* 0xxxxxxx (Plain ASCII) */
	164	b0_bits = 7;
	165	cbytes = 0;
	166	} else if ((b0 & 0xe0) == 0xc0) {
	167	/* 110xxxxx 10xxxxxx */
	168	b0_bits = 5;
	169	cbytes = 1;
	170	} else if ((b0 & 0xf0) == 0xe0) {
	171	/* 1110xxxx 10xxxxxx 10xxxxxx */
	172	b0_bits = 4;
	173	cbytes = 2;
	174	} else if ((b0 & 0xf8) == 0xf0) {
	175	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
	176	b0_bits = 3;
	177	cbytes = 3;
	178	} else {
[b888d5f]	179	/* 10xxxxxx -- unexpected continuation byte */
[c8bf88d]	180	return U_SPECIAL;
[74c8da2c]	181	}
[a35b458]	182
[b888d5f]	183	if (*offset + cbytes > size)
[c8bf88d]	184	return U_SPECIAL;
[a35b458]	185
[b888d5f]	186	wchar_t ch = b0 & LO_MASK_8(b0_bits);
[a35b458]	187
[b888d5f]	188	/* Decode continuation bytes */
[0dd1d444]	189	while (cbytes > 0) {
[b888d5f]	190	uint8_t b = (uint8_t) str[(*offset)++];
[a35b458]	191
[b888d5f]	192	/* Must be 10xxxxxx */
	193	if ((b & 0xc0) != 0x80)
[c8bf88d]	194	return U_SPECIAL;
[a35b458]	195
[b888d5f]	196	/* Shift data bits to ch */
[0dd1d444]	197	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
[b888d5f]	198	cbytes--;
[74c8da2c]	199	}
[a35b458]	200
[0dd1d444]	201	return ch;
[74c8da2c]	202	}
	203
[e1813cf]	204	/** Encode a single character to string representation.
[74c8da2c]	205	*
[e1813cf]	206	* Encode a single character to string representation (i.e. UTF-8) and store
	207	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
	208	* is moved to the position where the next character can be written to.
[74c8da2c]	209	*
[b888d5f]	210	* @param ch Input character.
	211	* @param str Output buffer.
	212	* @param offset Byte offset where to start writing.
	213	* @param size Size of the output buffer (in bytes).
[74c8da2c]	214	*
[d09f84e6]	215	* @return EOK if the character was encoded successfully, EOVERFLOW if there
[8e893ae]	216	* was not enough space in the output buffer or EINVAL if the character
	217	* code was invalid.
[74c8da2c]	218	*/
[b7fd2a0]	219	errno_t chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
[74c8da2c]	220	{
[b888d5f]	221	if (*offset >= size)
[d09f84e6]	222	return EOVERFLOW;
[a35b458]	223
[b888d5f]	224	if (!chr_check(ch))
[d09f84e6]	225	return EINVAL;
[a35b458]	226
[7c3fb9b]	227	/*
	228	* Unsigned version of ch (bit operations should only be done
	229	* on unsigned types).
	230	*/
[b888d5f]	231	uint32_t cc = (uint32_t) ch;
[a35b458]	232
[b888d5f]	233	/* Determine how many continuation bytes are needed */
[a35b458]	234
[b888d5f]	235	unsigned int b0_bits; /* Data bits in first byte */
	236	unsigned int cbytes; /* Number of continuation bytes */
[a35b458]	237
[32704cb]	238	if ((cc & ~LO_MASK_32(7)) == 0) {
	239	b0_bits = 7;
	240	cbytes = 0;
	241	} else if ((cc & ~LO_MASK_32(11)) == 0) {
	242	b0_bits = 5;
	243	cbytes = 1;
	244	} else if ((cc & ~LO_MASK_32(16)) == 0) {
	245	b0_bits = 4;
	246	cbytes = 2;
	247	} else if ((cc & ~LO_MASK_32(21)) == 0) {
	248	b0_bits = 3;
	249	cbytes = 3;
	250	} else {
[b888d5f]	251	/* Codes longer than 21 bits are not supported */
[d09f84e6]	252	return EINVAL;
[74c8da2c]	253	}
[a35b458]	254
[b888d5f]	255	/* Check for available space in buffer */
	256	if (*offset + cbytes >= size)
[d09f84e6]	257	return EOVERFLOW;
[a35b458]	258
[b888d5f]	259	/* Encode continuation bytes */
	260	unsigned int i;
	261	for (i = cbytes; i > 0; i--) {
[e1813cf]	262	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
[32704cb]	263	cc = cc >> CONT_BITS;
[74c8da2c]	264	}
[a35b458]	265
[b888d5f]	266	/* Encode first byte */
[e1813cf]	267	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
[a35b458]	268
[b888d5f]	269	/* Advance offset */
	270	*offset += cbytes + 1;
[a35b458]	271
[d09f84e6]	272	return EOK;
[74c8da2c]	273	}
	274
[b888d5f]	275	/** Get size of string.
	276	*
	277	* Get the number of bytes which are used by the string @a str (excluding the
	278	* NULL-terminator).
	279	*
	280	* @param str String to consider.
	281	*
	282	* @return Number of bytes used by the string
[82bb9c1]	283	*
	284	*/
[b888d5f]	285	size_t str_size(const char *str)
[82bb9c1]	286	{
[b888d5f]	287	size_t size = 0;
[a35b458]	288
[b888d5f]	289	while (*str++ != 0)
	290	size++;
[a35b458]	291
[b888d5f]	292	return size;
[82bb9c1]	293	}
	294
[b888d5f]	295	/** Get size of wide string.
	296	*
	297	* Get the number of bytes which are used by the wide string @a str (excluding the
	298	* NULL-terminator).
	299	*
	300	* @param str Wide string to consider.
	301	*
	302	* @return Number of bytes used by the wide string
	303	*
	304	*/
	305	size_t wstr_size(const wchar_t *str)
	306	{
	307	return (wstr_length(str) * sizeof(wchar_t));
	308	}
	309
	310	/** Get size of string with length limit.
[74c8da2c]	311	*
[f25b2819]	312	* Get the number of bytes which are used by up to @a max_len first
	313	* characters in the string @a str. If @a max_len is greater than
[b888d5f]	314	* the length of @a str, the entire string is measured (excluding the
	315	* NULL-terminator).
	316	*
	317	* @param str String to consider.
	318	* @param max_len Maximum number of characters to measure.
[74c8da2c]	319	*
[b888d5f]	320	* @return Number of bytes used by the characters.
[74c8da2c]	321	*
	322	*/
[98000fb]	323	size_t str_lsize(const char *str, size_t max_len)
[74c8da2c]	324	{
[98000fb]	325	size_t len = 0;
[b888d5f]	326	size_t offset = 0;
[a35b458]	327
[b888d5f]	328	while (len < max_len) {
	329	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
[b54d2f1]	330	break;
[a35b458]	331
[f25b2819]	332	len++;
[21a639b7]	333	}
[a35b458]	334
[b888d5f]	335	return offset;
[74c8da2c]	336	}
	337
[b888d5f]	338	/** Get size of wide string with length limit.
[82bb9c1]	339	*
[b888d5f]	340	* Get the number of bytes which are used by up to @a max_len first
	341	* wide characters in the wide string @a str. If @a max_len is greater than
	342	* the length of @a str, the entire wide string is measured (excluding the
	343	* NULL-terminator).
	344	*
	345	* @param str Wide string to consider.
	346	* @param max_len Maximum number of wide characters to measure.
[82bb9c1]	347	*
[b888d5f]	348	* @return Number of bytes used by the wide characters.
[82bb9c1]	349	*
	350	*/
[98000fb]	351	size_t wstr_lsize(const wchar_t *str, size_t max_len)
[82bb9c1]	352	{
[b888d5f]	353	return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
[82bb9c1]	354	}
	355
[b888d5f]	356	/** Get number of characters in a string.
[82bb9c1]	357	*
[b888d5f]	358	* @param str NULL-terminated string.
[82bb9c1]	359	*
[b888d5f]	360	* @return Number of characters in string.
[82bb9c1]	361	*
	362	*/
[98000fb]	363	size_t str_length(const char *str)
[82bb9c1]	364	{
[98000fb]	365	size_t len = 0;
[b888d5f]	366	size_t offset = 0;
[a35b458]	367
[b888d5f]	368	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
	369	len++;
[a35b458]	370
[b888d5f]	371	return len;
[82bb9c1]	372	}
	373
[b888d5f]	374	/** Get number of characters in a wide string.
[74c8da2c]	375	*
[b888d5f]	376	* @param str NULL-terminated wide string.
	377	*
	378	* @return Number of characters in @a str.
[74c8da2c]	379	*
	380	*/
[98000fb]	381	size_t wstr_length(const wchar_t *wstr)
[74c8da2c]	382	{
[98000fb]	383	size_t len = 0;
[a35b458]	384
[b888d5f]	385	while (*wstr++ != 0)
	386	len++;
[a35b458]	387
[b888d5f]	388	return len;
[74c8da2c]	389	}
	390
[b888d5f]	391	/** Get number of characters in a string with size limit.
	392	*
	393	* @param str NULL-terminated string.
	394	* @param size Maximum number of bytes to consider.
	395	*
	396	* @return Number of characters in string.
[74c8da2c]	397	*
	398	*/
[98000fb]	399	size_t str_nlength(const char *str, size_t size)
[74c8da2c]	400	{
[98000fb]	401	size_t len = 0;
[b888d5f]	402	size_t offset = 0;
[a35b458]	403
[b888d5f]	404	while (str_decode(str, &offset, size) != 0)
	405	len++;
[a35b458]	406
[b888d5f]	407	return len;
[21a639b7]	408	}
	409
[b888d5f]	410	/** Get number of characters in a string with size limit.
[2f57690]	411	*
[b888d5f]	412	* @param str NULL-terminated string.
	413	* @param size Maximum number of bytes to consider.
[74c8da2c]	414	*
[f25b2819]	415	* @return Number of characters in string.
[b888d5f]	416	*
[74c8da2c]	417	*/
[98000fb]	418	size_t wstr_nlength(const wchar_t *str, size_t size)
[74c8da2c]	419	{
[98000fb]	420	size_t len = 0;
	421	size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
	422	size_t offset = 0;
[a35b458]	423
[b888d5f]	424	while ((offset < limit) && (*str++ != 0)) {
[f25b2819]	425	len++;
[b888d5f]	426	offset += sizeof(wchar_t);
[74c8da2c]	427	}
[a35b458]	428
[f25b2819]	429	return len;
[74c8da2c]	430	}
	431
[b888d5f]	432	/** Check whether character is plain ASCII.
	433	*
	434	* @return True if character is plain ASCII.
[74c8da2c]	435	*
	436	*/
[f2b8cdc]	437	bool ascii_check(wchar_t ch)
[74c8da2c]	438	{
[8e893ae]	439	if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 127))
[b888d5f]	440	return true;
[a35b458]	441
[b888d5f]	442	return false;
	443	}
[f25b2819]	444
[b888d5f]	445	/** Check whether character is valid
	446	*
	447	* @return True if character is a valid Unicode code point.
	448	*
	449	*/
[f2b8cdc]	450	bool chr_check(wchar_t ch)
[b888d5f]	451	{
[8e893ae]	452	if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 1114111))
[b888d5f]	453	return true;
[a35b458]	454
[b888d5f]	455	return false;
[16da5f8e]	456	}
	457
[b888d5f]	458	/** Compare two NULL terminated strings.
[16da5f8e]	459	*
[b888d5f]	460	* Do a char-by-char comparison of two NULL-terminated strings.
[4efeab5]	461	* The strings are considered equal iff their length is equal
	462	* and both strings consist of the same sequence of characters.
	463	*
[1772e6d]	464	* A string S1 is less than another string S2 if it has a character with
	465	* lower value at the first character position where the strings differ.
	466	* If the strings differ in length, the shorter one is treated as if
	467	* padded by characters with a value of zero.
[16da5f8e]	468	*
[b888d5f]	469	* @param s1 First string to compare.
	470	* @param s2 Second string to compare.
[16da5f8e]	471	*
[1772e6d]	472	* @return 0 if the strings are equal, -1 if the first is less than the second,
	473	* 1 if the second is less than the first.
[16da5f8e]	474	*
	475	*/
[b888d5f]	476	int str_cmp(const char s1, const char s2)
[16da5f8e]	477	{
[a7b1071]	478	wchar_t c1 = 0;
	479	wchar_t c2 = 0;
[a35b458]	480
[b888d5f]	481	size_t off1 = 0;
	482	size_t off2 = 0;
[a7b1071]	483
	484	while (true) {
	485	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
	486	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
	487
[b888d5f]	488	if (c1 < c2)
[16da5f8e]	489	return -1;
[a35b458]	490
[b888d5f]	491	if (c1 > c2)
[16da5f8e]	492	return 1;
[a7b1071]	493
	494	if (c1 == 0 \|\| c2 == 0)
[1b20da0]	495	break;
[16da5f8e]	496	}
[a7b1071]	497
	498	return 0;
[16da5f8e]	499	}
	500
[b888d5f]	501	/** Compare two NULL terminated strings with length limit.
[16da5f8e]	502	*
[b888d5f]	503	* Do a char-by-char comparison of two NULL-terminated strings.
[4efeab5]	504	* The strings are considered equal iff
	505	* min(str_length(s1), max_len) == min(str_length(s2), max_len)
	506	* and both strings consist of the same sequence of characters,
	507	* up to max_len characters.
	508	*
[1772e6d]	509	* A string S1 is less than another string S2 if it has a character with
	510	* lower value at the first character position where the strings differ.
	511	* If the strings differ in length, the shorter one is treated as if
	512	* padded by characters with a value of zero. Only the first max_len
	513	* characters are considered.
[16da5f8e]	514	*
[b888d5f]	515	* @param s1 First string to compare.
	516	* @param s2 Second string to compare.
	517	* @param max_len Maximum number of characters to consider.
	518	*
[1772e6d]	519	* @return 0 if the strings are equal, -1 if the first is less than the second,
	520	* 1 if the second is less than the first.
[16da5f8e]	521	*
	522	*/
[98000fb]	523	int str_lcmp(const char s1, const char s2, size_t max_len)
[16da5f8e]	524	{
[b888d5f]	525	wchar_t c1 = 0;
	526	wchar_t c2 = 0;
[a35b458]	527
[b888d5f]	528	size_t off1 = 0;
	529	size_t off2 = 0;
[a35b458]	530
[98000fb]	531	size_t len = 0;
[a7b1071]	532
	533	while (true) {
	534	if (len >= max_len)
[b888d5f]	535	break;
[a7b1071]	536
	537	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
	538	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
	539
[b888d5f]	540	if (c1 < c2)
[16da5f8e]	541	return -1;
[a7b1071]	542
[b888d5f]	543	if (c1 > c2)
[16da5f8e]	544	return 1;
[a7b1071]	545
	546	if (c1 == 0 \|\| c2 == 0)
	547	break;
	548
[1b20da0]	549	++len;
[16da5f8e]	550	}
[a7b1071]	551
	552	return 0;
	553
[16da5f8e]	554	}
	555
[f4b1535]	556	/** Copy string.
[b888d5f]	557	*
[f4b1535]	558	* Copy source string @a src to destination buffer @a dest.
	559	* No more than @a size bytes are written. If the size of the output buffer
	560	* is at least one byte, the output string will always be well-formed, i.e.
	561	* null-terminated and containing only complete characters.
[b888d5f]	562	*
[abf09311]	563	* @param dest Destination buffer.
[6700ee2]	564	* @param count Size of the destination buffer (must be > 0).
[f4b1535]	565	* @param src Source string.
[abf09311]	566	*
[b888d5f]	567	*/
[f4b1535]	568	void str_cpy(char dest, size_t size, const char src)
[b888d5f]	569	{
[6700ee2]	570	/* There must be space for a null terminator in the buffer. */
[63e27ef]	571	assert(size > 0);
	572	assert(src != NULL);
[a35b458]	573
[abf09311]	574	size_t src_off = 0;
	575	size_t dest_off = 0;
[a35b458]	576
[abf09311]	577	wchar_t ch;
[f4b1535]	578	while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
	579	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
	580	break;
	581	}
[a35b458]	582
[f4b1535]	583	dest[dest_off] = '\0';
	584	}
	585
	586	/** Copy size-limited substring.
	587	*
[6700ee2]	588	* Copy prefix of string @a src of max. size @a size to destination buffer
	589	* @a dest. No more than @a size bytes are written. The output string will
	590	* always be well-formed, i.e. null-terminated and containing only complete
	591	* characters.
[f4b1535]	592	*
	593	* No more than @a n bytes are read from the input string, so it does not
	594	* have to be null-terminated.
	595	*
[abf09311]	596	* @param dest Destination buffer.
[6700ee2]	597	* @param count Size of the destination buffer (must be > 0).
[f4b1535]	598	* @param src Source string.
[abf09311]	599	* @param n Maximum number of bytes to read from @a src.
	600	*
[f4b1535]	601	*/
	602	void str_ncpy(char dest, size_t size, const char src, size_t n)
	603	{
[6700ee2]	604	/* There must be space for a null terminator in the buffer. */
[63e27ef]	605	assert(size > 0);
[a35b458]	606
[abf09311]	607	size_t src_off = 0;
	608	size_t dest_off = 0;
[a35b458]	609
[abf09311]	610	wchar_t ch;
[f4b1535]	611	while ((ch = str_decode(src, &src_off, n)) != 0) {
	612	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
[b888d5f]	613	break;
	614	}
[a35b458]	615
[f4b1535]	616	dest[dest_off] = '\0';
[b888d5f]	617	}
[16da5f8e]	618
[abf09311]	619	/** Duplicate string.
	620	*
	621	* Allocate a new string and copy characters from the source
	622	* string into it. The duplicate string is allocated via sleeping
	623	* malloc(), thus this function can sleep in no memory conditions.
	624	*
	625	* The allocation cannot fail and the return value is always
	626	* a valid pointer. The duplicate string is always a well-formed
	627	* null-terminated UTF-8 string, but it can differ from the source
	628	* string on the byte level.
	629	*
	630	* @param src Source string.
	631	*
	632	* @return Duplicate string.
	633	*
	634	*/
	635	char str_dup(const char src)
	636	{
	637	size_t size = str_size(src) + 1;
[11b285d]	638	char *dest = nfmalloc(size);
[63e27ef]	639	assert(dest);
[a35b458]	640
[abf09311]	641	str_cpy(dest, size, src);
	642	return dest;
	643	}
	644
	645	/** Duplicate string with size limit.
	646	*
	647	* Allocate a new string and copy up to @max_size bytes from the source
	648	* string into it. The duplicate string is allocated via sleeping
	649	* malloc(), thus this function can sleep in no memory conditions.
	650	* No more than @max_size + 1 bytes is allocated, but if the size
	651	* occupied by the source string is smaller than @max_size + 1,
	652	* less is allocated.
	653	*
	654	* The allocation cannot fail and the return value is always
	655	* a valid pointer. The duplicate string is always a well-formed
	656	* null-terminated UTF-8 string, but it can differ from the source
	657	* string on the byte level.
	658	*
	659	* @param src Source string.
	660	* @param n Maximum number of bytes to duplicate.
	661	*
	662	* @return Duplicate string.
	663	*
	664	*/
	665	char str_ndup(const char src, size_t n)
	666	{
	667	size_t size = str_size(src);
	668	if (size > n)
	669	size = n;
[a35b458]	670
[11b285d]	671	char *dest = nfmalloc(size + 1);
[63e27ef]	672	assert(dest);
[a35b458]	673
[abf09311]	674	str_ncpy(dest, size + 1, src, size);
	675	return dest;
	676	}
	677
[0f06dbc]	678	/** Convert wide string to string.
[b888d5f]	679	*
[0f06dbc]	680	* Convert wide string @a src to string. The output is written to the buffer
	681	* specified by @a dest and @a size. @a size must be non-zero and the string
	682	* written will always be well-formed.
[16da5f8e]	683	*
[0f06dbc]	684	* @param dest Destination buffer.
	685	* @param size Size of the destination buffer.
	686	* @param src Source wide string.
[16da5f8e]	687	*/
[0f06dbc]	688	void wstr_to_str(char dest, size_t size, const wchar_t src)
[16da5f8e]	689	{
[b888d5f]	690	wchar_t ch;
[0f06dbc]	691	size_t src_idx;
	692	size_t dest_off;
	693
	694	/* There must be space for a null terminator in the buffer. */
[63e27ef]	695	assert(size > 0);
[0f06dbc]	696
	697	src_idx = 0;
	698	dest_off = 0;
[a35b458]	699
[b888d5f]	700	while ((ch = src[src_idx++]) != 0) {
[0f06dbc]	701	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
[b888d5f]	702	break;
[16da5f8e]	703	}
[0f06dbc]	704
	705	dest[dest_off] = '\0';
[16da5f8e]	706	}
	707
[20f1597]	708	/** Find first occurence of character in string.
	709	*
[b888d5f]	710	* @param str String to search.
	711	* @param ch Character to look for.
	712	*
	713	* @return Pointer to character in @a str or NULL if not found.
[20f1597]	714	*
	715	*/
[dd2cfa7]	716	char str_chr(const char str, wchar_t ch)
[20f1597]	717	{
[b888d5f]	718	wchar_t acc;
	719	size_t off = 0;
[f2d2c7ba]	720	size_t last = 0;
[a35b458]	721
[a7b1071]	722	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
[b888d5f]	723	if (acc == ch)
[dd2cfa7]	724	return (char *) (str + last);
[f2d2c7ba]	725	last = off;
[20f1597]	726	}
[a35b458]	727
[20f1597]	728	return NULL;
	729	}
	730
[b888d5f]	731	/** Insert a wide character into a wide string.
	732	*
	733	* Insert a wide character into a wide string at position
	734	* @a pos. The characters after the position are shifted.
	735	*
	736	* @param str String to insert to.
	737	* @param ch Character to insert to.
	738	* @param pos Character index where to insert.
[7c3fb9b]	739	* @param max_pos Characters in the buffer.
[b888d5f]	740	*
	741	* @return True if the insertion was sucessful, false if the position
	742	* is out of bounds.
	743	*
	744	*/
[98000fb]	745	bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
[b888d5f]	746	{
[98000fb]	747	size_t len = wstr_length(str);
[a35b458]	748
[b888d5f]	749	if ((pos > len) \|\| (pos + 1 > max_pos))
	750	return false;
[a35b458]	751
[98000fb]	752	size_t i;
[b888d5f]	753	for (i = len; i + 1 > pos; i--)
	754	str[i + 1] = str[i];
[a35b458]	755
[b888d5f]	756	str[pos] = ch;
[a35b458]	757
[b888d5f]	758	return true;
	759	}
	760
	761	/** Remove a wide character from a wide string.
	762	*
	763	* Remove a wide character from a wide string at position
	764	* @a pos. The characters after the position are shifted.
	765	*
	766	* @param str String to remove from.
	767	* @param pos Character index to remove.
	768	*
	769	* @return True if the removal was sucessful, false if the position
	770	* is out of bounds.
	771	*
	772	*/
[98000fb]	773	bool wstr_remove(wchar_t *str, size_t pos)
[b888d5f]	774	{
[98000fb]	775	size_t len = wstr_length(str);
[a35b458]	776
[b888d5f]	777	if (pos >= len)
	778	return false;
[a35b458]	779
[98000fb]	780	size_t i;
[b888d5f]	781	for (i = pos + 1; i <= len; i++)
	782	str[i - 1] = str[i];
[a35b458]	783
[b888d5f]	784	return true;
	785	}
	786
[30a5470]	787	/** Convert string to uint64_t (internal variant).
	788	*
	789	* @param nptr Pointer to string.
	790	* @param endptr Pointer to the first invalid character is stored here.
	791	* @param base Zero or number between 2 and 36 inclusive.
	792	* @param neg Indication of unary minus is stored here.
	793	* @apram result Result of the conversion.
	794	*
	795	* @return EOK if conversion was successful.
	796	*
	797	*/
[b7fd2a0]	798	static errno_t str_uint(const char nptr, char *endptr, unsigned int base,
[30a5470]	799	bool neg, uint64_t result)
	800	{
[63e27ef]	801	assert(endptr != NULL);
	802	assert(neg != NULL);
	803	assert(result != NULL);
[a35b458]	804
[30a5470]	805	*neg = false;
	806	const char *str = nptr;
[a35b458]	807
[30a5470]	808	/* Ignore leading whitespace */
	809	while (isspace(*str))
	810	str++;
[a35b458]	811
[30a5470]	812	if (*str == '-') {
	813	*neg = true;
	814	str++;
	815	} else if (*str == '+')
	816	str++;
[a35b458]	817
[30a5470]	818	if (base == 0) {
	819	/* Decode base if not specified */
	820	base = 10;
[a35b458]	821
[30a5470]	822	if (*str == '0') {
	823	base = 8;
	824	str++;
[a35b458]	825
[30a5470]	826	switch (*str) {
	827	case 'b':
	828	case 'B':
	829	base = 2;
	830	str++;
	831	break;
	832	case 'o':
	833	case 'O':
	834	base = 8;
	835	str++;
	836	break;
	837	case 'd':
	838	case 'D':
	839	case 't':
	840	case 'T':
	841	base = 10;
	842	str++;
	843	break;
	844	case 'x':
	845	case 'X':
	846	base = 16;
	847	str++;
	848	break;
[4ce914d4]	849	default:
	850	str--;
[30a5470]	851	}
	852	}
	853	} else {
	854	/* Check base range */
	855	if ((base < 2) \|\| (base > 36)) {
	856	endptr = (char ) str;
	857	return EINVAL;
	858	}
	859	}
[a35b458]	860
[30a5470]	861	*result = 0;
	862	const char *startstr = str;
[a35b458]	863
[30a5470]	864	while (*str != 0) {
	865	unsigned int digit;
[a35b458]	866
[30a5470]	867	if ((str >= 'a') && (str <= 'z'))
	868	digit = *str - 'a' + 10;
	869	else if ((str >= 'A') && (str <= 'Z'))
	870	digit = *str - 'A' + 10;
	871	else if ((str >= '0') && (str <= '9'))
	872	digit = *str - '0';
	873	else
	874	break;
[a35b458]	875
[30a5470]	876	if (digit >= base)
	877	break;
[a35b458]	878
[30a5470]	879	uint64_t prev = *result;
	880	result = (result) * base + digit;
[a35b458]	881
[30a5470]	882	if (*result < prev) {
	883	/* Overflow */
	884	endptr = (char ) str;
	885	return EOVERFLOW;
	886	}
[a35b458]	887
[30a5470]	888	str++;
	889	}
[a35b458]	890
[30a5470]	891	if (str == startstr) {
	892	/*
	893	* No digits were decoded => first invalid character is
	894	* the first character of the string.
	895	*/
	896	str = nptr;
	897	}
[a35b458]	898
[30a5470]	899	endptr = (char ) str;
[a35b458]	900
[30a5470]	901	if (str == nptr)
	902	return EINVAL;
[a35b458]	903
[30a5470]	904	return EOK;
	905	}
	906
	907	/** Convert string to uint64_t.
	908	*
	909	* @param nptr Pointer to string.
	910	* @param endptr If not NULL, pointer to the first invalid character
	911	* is stored here.
	912	* @param base Zero or number between 2 and 36 inclusive.
	913	* @param strict Do not allow any trailing characters.
[4ce914d4]	914	* @param result Result of the conversion.
[30a5470]	915	*
	916	* @return EOK if conversion was successful.
	917	*
	918	*/
[b7fd2a0]	919	errno_t str_uint64_t(const char nptr, char *endptr, unsigned int base,
[30a5470]	920	bool strict, uint64_t *result)
	921	{
[63e27ef]	922	assert(result != NULL);
[a35b458]	923
[30a5470]	924	bool neg;
	925	char *lendptr;
[b7fd2a0]	926	errno_t ret = str_uint(nptr, &lendptr, base, &neg, result);
[a35b458]	927
[30a5470]	928	if (endptr != NULL)
	929	endptr = (char ) lendptr;
[a35b458]	930
[30a5470]	931	if (ret != EOK)
	932	return ret;
[a35b458]	933
[30a5470]	934	/* Do not allow negative values */
	935	if (neg)
	936	return EINVAL;
[a35b458]	937
[7c3fb9b]	938	/*
	939	* Check whether we are at the end of
	940	* the string in strict mode
	941	*/
[30a5470]	942	if ((strict) && (*lendptr != 0))
	943	return EINVAL;
[a35b458]	944
[30a5470]	945	return EOK;
	946	}
	947
[e535eeb]	948	void order_suffix(const uint64_t val, uint64_t rv, char suffix)
	949	{
[933cadf]	950	if (val > UINT64_C(10000000000000000000)) {
	951	*rv = val / UINT64_C(1000000000000000000);
[e535eeb]	952	*suffix = 'Z';
[933cadf]	953	} else if (val > UINT64_C(1000000000000000000)) {
	954	*rv = val / UINT64_C(1000000000000000);
[e535eeb]	955	*suffix = 'E';
[933cadf]	956	} else if (val > UINT64_C(1000000000000000)) {
	957	*rv = val / UINT64_C(1000000000000);
[e535eeb]	958	*suffix = 'T';
[933cadf]	959	} else if (val > UINT64_C(1000000000000)) {
	960	*rv = val / UINT64_C(1000000000);
[e535eeb]	961	*suffix = 'G';
[933cadf]	962	} else if (val > UINT64_C(1000000000)) {
	963	*rv = val / UINT64_C(1000000);
[e535eeb]	964	*suffix = 'M';
[933cadf]	965	} else if (val > UINT64_C(1000000)) {
	966	*rv = val / UINT64_C(1000);
[e535eeb]	967	*suffix = 'k';
	968	} else {
	969	*rv = val;
	970	*suffix = ' ';
	971	}
	972	}
	973
[933cadf]	974	void bin_order_suffix(const uint64_t val, uint64_t rv, const char *suffix,
	975	bool fixed)
	976	{
	977	if (val > UINT64_C(1152921504606846976)) {
	978	*rv = val / UINT64_C(1125899906842624);
	979	*suffix = "EiB";
	980	} else if (val > UINT64_C(1125899906842624)) {
	981	*rv = val / UINT64_C(1099511627776);
	982	*suffix = "TiB";
	983	} else if (val > UINT64_C(1099511627776)) {
	984	*rv = val / UINT64_C(1073741824);
	985	*suffix = "GiB";
	986	} else if (val > UINT64_C(1073741824)) {
	987	*rv = val / UINT64_C(1048576);
	988	*suffix = "MiB";
	989	} else if (val > UINT64_C(1048576)) {
	990	*rv = val / UINT64_C(1024);
	991	*suffix = "KiB";
	992	} else {
	993	*rv = val;
	994	if (fixed)
	995	*suffix = "B ";
	996	else
	997	*suffix = "B";
	998	}
	999	}
	1000
[16da5f8e]	1001	/** @}
	1002	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: