Context Navigation

source: mainline/kernel/generic/src/lib/str.c@ bab75df6

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since bab75df6 was bab75df6, checked in by Jiri Svoboda <jiri@…>, 7 years ago
Let kernel code get printf via the standard stdio header. Clean up unused includes.
Property mode set to `100644`
File size: 24.1 KB

Rev	Line
[16da5f8e]	1	/*
	2	* Copyright (c) 2001-2004 Jakub Jermar
	3	* All rights reserved.
	4	*
	5	* Redistribution and use in source and binary forms, with or without
	6	* modification, are permitted provided that the following conditions
	7	* are met:
	8	*
	9	* - Redistributions of source code must retain the above copyright
	10	* notice, this list of conditions and the following disclaimer.
	11	* - Redistributions in binary form must reproduce the above copyright
	12	* notice, this list of conditions and the following disclaimer in the
	13	* documentation and/or other materials provided with the distribution.
	14	* - The name of the author may not be used to endorse or promote products
	15	* derived from this software without specific prior written permission.
	16	*
	17	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	18	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	19	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	20	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	21	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	22	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	23	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	24	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	25	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	26	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	27	*/
	28
[174156fd]	29	/** @addtogroup kernel_generic
[16da5f8e]	30	* @{
	31	*/
	32
	33	/**
	34	* @file
[82bb9c1]	35	* @brief String functions.
	36	*
	37	* Strings and characters use the Universal Character Set (UCS). The standard
	38	* strings, called just strings are encoded in UTF-8. Wide strings (encoded
	39	* in UTF-32) are supported to a limited degree. A single character is
[b888d5f]	40	* represented as wchar_t.@n
[82bb9c1]	41	*
[b888d5f]	42	* Overview of the terminology:@n
[82bb9c1]	43	*
[b888d5f]	44	* Term Meaning
	45	* -------------------- ----------------------------------------------------
	46	* byte 8 bits stored in uint8_t (unsigned 8 bit integer)
[82bb9c1]	47	*
[b888d5f]	48	* character UTF-32 encoded Unicode character, stored in wchar_t
	49	* (signed 32 bit integer), code points 0 .. 1114111
	50	* are valid
[82bb9c1]	51	*
[b888d5f]	52	* ASCII character 7 bit encoded ASCII character, stored in char
	53	* (usually signed 8 bit integer), code points 0 .. 127
	54	* are valid
	55	*
	56	* string UTF-8 encoded NULL-terminated Unicode string, char *
	57	*
	58	* wide string UTF-32 encoded NULL-terminated Unicode string,
	59	* wchar_t *
	60	*
	61	* [wide] string size number of BYTES in a [wide] string (excluding
	62	* the NULL-terminator), size_t
	63	*
	64	* [wide] string length number of CHARACTERS in a [wide] string (excluding
[98000fb]	65	* the NULL-terminator), size_t
[b888d5f]	66	*
	67	* [wide] string width number of display cells on a monospace display taken
[98000fb]	68	* by a [wide] string, size_t
[b888d5f]	69	*
	70	*
	71	* Overview of string metrics:@n
	72	*
	73	* Metric Abbrev. Type Meaning
	74	* ------ ------ ------ -------------------------------------------------
	75	* size n size_t number of BYTES in a string (excluding the
	76	* NULL-terminator)
	77	*
[98000fb]	78	* length l size_t number of CHARACTERS in a string (excluding the
[b888d5f]	79	* null terminator)
	80	*
[98000fb]	81	* width w size_t number of display cells on a monospace display
[b888d5f]	82	* taken by a string
	83	*
	84	*
	85	* Function naming prefixes:@n
	86	*
	87	* chr_ operate on characters
	88	* ascii_ operate on ASCII characters
	89	* str_ operate on strings
	90	* wstr_ operate on wide strings
	91	*
	92	* [w]str_[n\|l\|w] operate on a prefix limited by size, length
	93	* or width
	94	*
	95	*
	96	* A specific character inside a [wide] string can be referred to by:@n
	97	*
	98	* pointer (char , wchar_t )
	99	* byte offset (size_t)
[98000fb]	100	* character index (size_t)
[82bb9c1]	101	*
[16da5f8e]	102	*/
	103
[19f857a]	104	#include <str.h>
[16da5f8e]	105	#include <cpu.h>
	106	#include <arch/asm.h>
	107	#include <arch.h>
[d09f84e6]	108	#include <errno.h>
[b888d5f]	109	#include <align.h>
[63e27ef]	110	#include <assert.h>
[30a5470]	111	#include <macros.h>
[1066041]	112	#include <mm/slab.h>
[16da5f8e]	113
[8e893ae]	114	/** Check the condition if wchar_t is signed */
[002fd5f]	115	#ifdef __WCHAR_UNSIGNED__
[1433ecda]	116	#define WCHAR_SIGNED_CHECK(cond) (true)
[8e893ae]	117	#else
[1433ecda]	118	#define WCHAR_SIGNED_CHECK(cond) (cond)
[8e893ae]	119	#endif
	120
[b888d5f]	121	/** Byte mask consisting of lowest @n bits (out of 8) */
	122	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
[0dd1d444]	123
[b888d5f]	124	/** Byte mask consisting of lowest @n bits (out of 32) */
	125	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
[32704cb]	126
[b888d5f]	127	/** Byte mask consisting of highest @n bits (out of 8) */
	128	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
[32704cb]	129
[b888d5f]	130	/** Number of data bits in a UTF-8 continuation byte */
	131	#define CONT_BITS 6
[0dd1d444]	132
[b888d5f]	133	/** Decode a single character from a string.
[21a639b7]	134	*
[b888d5f]	135	* Decode a single character from a string of size @a size. Decoding starts
[e1813cf]	136	* at @a offset and this offset is moved to the beginning of the next
	137	* character. In case of decoding error, offset generally advances at least
[b888d5f]	138	* by one. However, offset is never moved beyond size.
[21a639b7]	139	*
[b888d5f]	140	* @param str String (not necessarily NULL-terminated).
	141	* @param offset Byte offset in string where to start decoding.
	142	* @param size Size of the string (in bytes).
	143	*
[c8bf88d]	144	* @return Value of decoded character, U_SPECIAL on decoding error or
[b888d5f]	145	* NULL if attempt to decode beyond @a size.
[21a639b7]	146	*
	147	*/
[b888d5f]	148	wchar_t str_decode(const char str, size_t offset, size_t size)
[21a639b7]	149	{
[b888d5f]	150	if (*offset + 1 > size)
	151	return 0;
[a35b458]	152
[b888d5f]	153	/* First byte read from string */
	154	uint8_t b0 = (uint8_t) str[(*offset)++];
[a35b458]	155
[b888d5f]	156	/* Determine code length */
[a35b458]	157
[b888d5f]	158	unsigned int b0_bits; /* Data bits in first byte */
	159	unsigned int cbytes; /* Number of continuation bytes */
[a35b458]	160
[0dd1d444]	161	if ((b0 & 0x80) == 0) {
	162	/* 0xxxxxxx (Plain ASCII) */
	163	b0_bits = 7;
	164	cbytes = 0;
	165	} else if ((b0 & 0xe0) == 0xc0) {
	166	/* 110xxxxx 10xxxxxx */
	167	b0_bits = 5;
	168	cbytes = 1;
	169	} else if ((b0 & 0xf0) == 0xe0) {
	170	/* 1110xxxx 10xxxxxx 10xxxxxx */
	171	b0_bits = 4;
	172	cbytes = 2;
	173	} else if ((b0 & 0xf8) == 0xf0) {
	174	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
	175	b0_bits = 3;
	176	cbytes = 3;
	177	} else {
[b888d5f]	178	/* 10xxxxxx -- unexpected continuation byte */
[c8bf88d]	179	return U_SPECIAL;
[74c8da2c]	180	}
[a35b458]	181
[b888d5f]	182	if (*offset + cbytes > size)
[c8bf88d]	183	return U_SPECIAL;
[a35b458]	184
[b888d5f]	185	wchar_t ch = b0 & LO_MASK_8(b0_bits);
[a35b458]	186
[b888d5f]	187	/* Decode continuation bytes */
[0dd1d444]	188	while (cbytes > 0) {
[b888d5f]	189	uint8_t b = (uint8_t) str[(*offset)++];
[a35b458]	190
[b888d5f]	191	/* Must be 10xxxxxx */
	192	if ((b & 0xc0) != 0x80)
[c8bf88d]	193	return U_SPECIAL;
[a35b458]	194
[b888d5f]	195	/* Shift data bits to ch */
[0dd1d444]	196	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
[b888d5f]	197	cbytes--;
[74c8da2c]	198	}
[a35b458]	199
[0dd1d444]	200	return ch;
[74c8da2c]	201	}
	202
[e1813cf]	203	/** Encode a single character to string representation.
[74c8da2c]	204	*
[e1813cf]	205	* Encode a single character to string representation (i.e. UTF-8) and store
	206	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
	207	* is moved to the position where the next character can be written to.
[74c8da2c]	208	*
[b888d5f]	209	* @param ch Input character.
	210	* @param str Output buffer.
	211	* @param offset Byte offset where to start writing.
	212	* @param size Size of the output buffer (in bytes).
[74c8da2c]	213	*
[d09f84e6]	214	* @return EOK if the character was encoded successfully, EOVERFLOW if there
[8e893ae]	215	* was not enough space in the output buffer or EINVAL if the character
	216	* code was invalid.
[74c8da2c]	217	*/
[b7fd2a0]	218	errno_t chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
[74c8da2c]	219	{
[b888d5f]	220	if (*offset >= size)
[d09f84e6]	221	return EOVERFLOW;
[a35b458]	222
[b888d5f]	223	if (!chr_check(ch))
[d09f84e6]	224	return EINVAL;
[a35b458]	225
[7c3fb9b]	226	/*
	227	* Unsigned version of ch (bit operations should only be done
	228	* on unsigned types).
	229	*/
[b888d5f]	230	uint32_t cc = (uint32_t) ch;
[a35b458]	231
[b888d5f]	232	/* Determine how many continuation bytes are needed */
[a35b458]	233
[b888d5f]	234	unsigned int b0_bits; /* Data bits in first byte */
	235	unsigned int cbytes; /* Number of continuation bytes */
[a35b458]	236
[32704cb]	237	if ((cc & ~LO_MASK_32(7)) == 0) {
	238	b0_bits = 7;
	239	cbytes = 0;
	240	} else if ((cc & ~LO_MASK_32(11)) == 0) {
	241	b0_bits = 5;
	242	cbytes = 1;
	243	} else if ((cc & ~LO_MASK_32(16)) == 0) {
	244	b0_bits = 4;
	245	cbytes = 2;
	246	} else if ((cc & ~LO_MASK_32(21)) == 0) {
	247	b0_bits = 3;
	248	cbytes = 3;
	249	} else {
[b888d5f]	250	/* Codes longer than 21 bits are not supported */
[d09f84e6]	251	return EINVAL;
[74c8da2c]	252	}
[a35b458]	253
[b888d5f]	254	/* Check for available space in buffer */
	255	if (*offset + cbytes >= size)
[d09f84e6]	256	return EOVERFLOW;
[a35b458]	257
[b888d5f]	258	/* Encode continuation bytes */
	259	unsigned int i;
	260	for (i = cbytes; i > 0; i--) {
[e1813cf]	261	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
[32704cb]	262	cc = cc >> CONT_BITS;
[74c8da2c]	263	}
[a35b458]	264
[b888d5f]	265	/* Encode first byte */
[e1813cf]	266	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
[a35b458]	267
[b888d5f]	268	/* Advance offset */
	269	*offset += cbytes + 1;
[a35b458]	270
[d09f84e6]	271	return EOK;
[74c8da2c]	272	}
	273
[b888d5f]	274	/** Get size of string.
	275	*
	276	* Get the number of bytes which are used by the string @a str (excluding the
	277	* NULL-terminator).
	278	*
	279	* @param str String to consider.
	280	*
	281	* @return Number of bytes used by the string
[82bb9c1]	282	*
	283	*/
[b888d5f]	284	size_t str_size(const char *str)
[82bb9c1]	285	{
[b888d5f]	286	size_t size = 0;
[a35b458]	287
[b888d5f]	288	while (*str++ != 0)
	289	size++;
[a35b458]	290
[b888d5f]	291	return size;
[82bb9c1]	292	}
	293
[b888d5f]	294	/** Get size of wide string.
	295	*
	296	* Get the number of bytes which are used by the wide string @a str (excluding the
	297	* NULL-terminator).
	298	*
	299	* @param str Wide string to consider.
	300	*
	301	* @return Number of bytes used by the wide string
	302	*
	303	*/
	304	size_t wstr_size(const wchar_t *str)
	305	{
	306	return (wstr_length(str) * sizeof(wchar_t));
	307	}
	308
	309	/** Get size of string with length limit.
[74c8da2c]	310	*
[f25b2819]	311	* Get the number of bytes which are used by up to @a max_len first
	312	* characters in the string @a str. If @a max_len is greater than
[b888d5f]	313	* the length of @a str, the entire string is measured (excluding the
	314	* NULL-terminator).
	315	*
	316	* @param str String to consider.
	317	* @param max_len Maximum number of characters to measure.
[74c8da2c]	318	*
[b888d5f]	319	* @return Number of bytes used by the characters.
[74c8da2c]	320	*
	321	*/
[98000fb]	322	size_t str_lsize(const char *str, size_t max_len)
[74c8da2c]	323	{
[98000fb]	324	size_t len = 0;
[b888d5f]	325	size_t offset = 0;
[a35b458]	326
[b888d5f]	327	while (len < max_len) {
	328	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
[b54d2f1]	329	break;
[a35b458]	330
[f25b2819]	331	len++;
[21a639b7]	332	}
[a35b458]	333
[b888d5f]	334	return offset;
[74c8da2c]	335	}
	336
[b888d5f]	337	/** Get size of wide string with length limit.
[82bb9c1]	338	*
[b888d5f]	339	* Get the number of bytes which are used by up to @a max_len first
	340	* wide characters in the wide string @a str. If @a max_len is greater than
	341	* the length of @a str, the entire wide string is measured (excluding the
	342	* NULL-terminator).
	343	*
	344	* @param str Wide string to consider.
	345	* @param max_len Maximum number of wide characters to measure.
[82bb9c1]	346	*
[b888d5f]	347	* @return Number of bytes used by the wide characters.
[82bb9c1]	348	*
	349	*/
[98000fb]	350	size_t wstr_lsize(const wchar_t *str, size_t max_len)
[82bb9c1]	351	{
[b888d5f]	352	return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
[82bb9c1]	353	}
	354
[b888d5f]	355	/** Get number of characters in a string.
[82bb9c1]	356	*
[b888d5f]	357	* @param str NULL-terminated string.
[82bb9c1]	358	*
[b888d5f]	359	* @return Number of characters in string.
[82bb9c1]	360	*
	361	*/
[98000fb]	362	size_t str_length(const char *str)
[82bb9c1]	363	{
[98000fb]	364	size_t len = 0;
[b888d5f]	365	size_t offset = 0;
[a35b458]	366
[b888d5f]	367	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
	368	len++;
[a35b458]	369
[b888d5f]	370	return len;
[82bb9c1]	371	}
	372
[b888d5f]	373	/** Get number of characters in a wide string.
[74c8da2c]	374	*
[b888d5f]	375	* @param str NULL-terminated wide string.
	376	*
	377	* @return Number of characters in @a str.
[74c8da2c]	378	*
	379	*/
[98000fb]	380	size_t wstr_length(const wchar_t *wstr)
[74c8da2c]	381	{
[98000fb]	382	size_t len = 0;
[a35b458]	383
[b888d5f]	384	while (*wstr++ != 0)
	385	len++;
[a35b458]	386
[b888d5f]	387	return len;
[74c8da2c]	388	}
	389
[b888d5f]	390	/** Get number of characters in a string with size limit.
	391	*
	392	* @param str NULL-terminated string.
	393	* @param size Maximum number of bytes to consider.
	394	*
	395	* @return Number of characters in string.
[74c8da2c]	396	*
	397	*/
[98000fb]	398	size_t str_nlength(const char *str, size_t size)
[74c8da2c]	399	{
[98000fb]	400	size_t len = 0;
[b888d5f]	401	size_t offset = 0;
[a35b458]	402
[b888d5f]	403	while (str_decode(str, &offset, size) != 0)
	404	len++;
[a35b458]	405
[b888d5f]	406	return len;
[21a639b7]	407	}
	408
[b888d5f]	409	/** Get number of characters in a string with size limit.
[2f57690]	410	*
[b888d5f]	411	* @param str NULL-terminated string.
	412	* @param size Maximum number of bytes to consider.
[74c8da2c]	413	*
[f25b2819]	414	* @return Number of characters in string.
[b888d5f]	415	*
[74c8da2c]	416	*/
[98000fb]	417	size_t wstr_nlength(const wchar_t *str, size_t size)
[74c8da2c]	418	{
[98000fb]	419	size_t len = 0;
	420	size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
	421	size_t offset = 0;
[a35b458]	422
[b888d5f]	423	while ((offset < limit) && (*str++ != 0)) {
[f25b2819]	424	len++;
[b888d5f]	425	offset += sizeof(wchar_t);
[74c8da2c]	426	}
[a35b458]	427
[f25b2819]	428	return len;
[74c8da2c]	429	}
	430
[b888d5f]	431	/** Check whether character is plain ASCII.
	432	*
	433	* @return True if character is plain ASCII.
[74c8da2c]	434	*
	435	*/
[f2b8cdc]	436	bool ascii_check(wchar_t ch)
[74c8da2c]	437	{
[8e893ae]	438	if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 127))
[b888d5f]	439	return true;
[a35b458]	440
[b888d5f]	441	return false;
	442	}
[f25b2819]	443
[b888d5f]	444	/** Check whether character is valid
	445	*
	446	* @return True if character is a valid Unicode code point.
	447	*
	448	*/
[f2b8cdc]	449	bool chr_check(wchar_t ch)
[b888d5f]	450	{
[8e893ae]	451	if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 1114111))
[b888d5f]	452	return true;
[a35b458]	453
[b888d5f]	454	return false;
[16da5f8e]	455	}
	456
[b888d5f]	457	/** Compare two NULL terminated strings.
[16da5f8e]	458	*
[b888d5f]	459	* Do a char-by-char comparison of two NULL-terminated strings.
[4efeab5]	460	* The strings are considered equal iff their length is equal
	461	* and both strings consist of the same sequence of characters.
	462	*
[1772e6d]	463	* A string S1 is less than another string S2 if it has a character with
	464	* lower value at the first character position where the strings differ.
	465	* If the strings differ in length, the shorter one is treated as if
	466	* padded by characters with a value of zero.
[16da5f8e]	467	*
[b888d5f]	468	* @param s1 First string to compare.
	469	* @param s2 Second string to compare.
[16da5f8e]	470	*
[1772e6d]	471	* @return 0 if the strings are equal, -1 if the first is less than the second,
	472	* 1 if the second is less than the first.
[16da5f8e]	473	*
	474	*/
[b888d5f]	475	int str_cmp(const char s1, const char s2)
[16da5f8e]	476	{
[a7b1071]	477	wchar_t c1 = 0;
	478	wchar_t c2 = 0;
[a35b458]	479
[b888d5f]	480	size_t off1 = 0;
	481	size_t off2 = 0;
[a7b1071]	482
	483	while (true) {
	484	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
	485	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
	486
[b888d5f]	487	if (c1 < c2)
[16da5f8e]	488	return -1;
[a35b458]	489
[b888d5f]	490	if (c1 > c2)
[16da5f8e]	491	return 1;
[a7b1071]	492
	493	if (c1 == 0 \|\| c2 == 0)
[1b20da0]	494	break;
[16da5f8e]	495	}
[a7b1071]	496
	497	return 0;
[16da5f8e]	498	}
	499
[b888d5f]	500	/** Compare two NULL terminated strings with length limit.
[16da5f8e]	501	*
[b888d5f]	502	* Do a char-by-char comparison of two NULL-terminated strings.
[4efeab5]	503	* The strings are considered equal iff
	504	* min(str_length(s1), max_len) == min(str_length(s2), max_len)
	505	* and both strings consist of the same sequence of characters,
	506	* up to max_len characters.
	507	*
[1772e6d]	508	* A string S1 is less than another string S2 if it has a character with
	509	* lower value at the first character position where the strings differ.
	510	* If the strings differ in length, the shorter one is treated as if
	511	* padded by characters with a value of zero. Only the first max_len
	512	* characters are considered.
[16da5f8e]	513	*
[b888d5f]	514	* @param s1 First string to compare.
	515	* @param s2 Second string to compare.
	516	* @param max_len Maximum number of characters to consider.
	517	*
[1772e6d]	518	* @return 0 if the strings are equal, -1 if the first is less than the second,
	519	* 1 if the second is less than the first.
[16da5f8e]	520	*
	521	*/
[98000fb]	522	int str_lcmp(const char s1, const char s2, size_t max_len)
[16da5f8e]	523	{
[b888d5f]	524	wchar_t c1 = 0;
	525	wchar_t c2 = 0;
[a35b458]	526
[b888d5f]	527	size_t off1 = 0;
	528	size_t off2 = 0;
[a35b458]	529
[98000fb]	530	size_t len = 0;
[a7b1071]	531
	532	while (true) {
	533	if (len >= max_len)
[b888d5f]	534	break;
[a7b1071]	535
	536	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
	537	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
	538
[b888d5f]	539	if (c1 < c2)
[16da5f8e]	540	return -1;
[a7b1071]	541
[b888d5f]	542	if (c1 > c2)
[16da5f8e]	543	return 1;
[a7b1071]	544
	545	if (c1 == 0 \|\| c2 == 0)
	546	break;
	547
[1b20da0]	548	++len;
[16da5f8e]	549	}
[a7b1071]	550
	551	return 0;
	552
[16da5f8e]	553	}
	554
[f4b1535]	555	/** Copy string.
[b888d5f]	556	*
[f4b1535]	557	* Copy source string @a src to destination buffer @a dest.
	558	* No more than @a size bytes are written. If the size of the output buffer
	559	* is at least one byte, the output string will always be well-formed, i.e.
	560	* null-terminated and containing only complete characters.
[b888d5f]	561	*
[abf09311]	562	* @param dest Destination buffer.
[6700ee2]	563	* @param count Size of the destination buffer (must be > 0).
[f4b1535]	564	* @param src Source string.
[abf09311]	565	*
[b888d5f]	566	*/
[f4b1535]	567	void str_cpy(char dest, size_t size, const char src)
[b888d5f]	568	{
[6700ee2]	569	/* There must be space for a null terminator in the buffer. */
[63e27ef]	570	assert(size > 0);
	571	assert(src != NULL);
[a35b458]	572
[abf09311]	573	size_t src_off = 0;
	574	size_t dest_off = 0;
[a35b458]	575
[abf09311]	576	wchar_t ch;
[f4b1535]	577	while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
	578	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
	579	break;
	580	}
[a35b458]	581
[f4b1535]	582	dest[dest_off] = '\0';
	583	}
	584
	585	/** Copy size-limited substring.
	586	*
[6700ee2]	587	* Copy prefix of string @a src of max. size @a size to destination buffer
	588	* @a dest. No more than @a size bytes are written. The output string will
	589	* always be well-formed, i.e. null-terminated and containing only complete
	590	* characters.
[f4b1535]	591	*
	592	* No more than @a n bytes are read from the input string, so it does not
	593	* have to be null-terminated.
	594	*
[abf09311]	595	* @param dest Destination buffer.
[6700ee2]	596	* @param count Size of the destination buffer (must be > 0).
[f4b1535]	597	* @param src Source string.
[abf09311]	598	* @param n Maximum number of bytes to read from @a src.
	599	*
[f4b1535]	600	*/
	601	void str_ncpy(char dest, size_t size, const char src, size_t n)
	602	{
[6700ee2]	603	/* There must be space for a null terminator in the buffer. */
[63e27ef]	604	assert(size > 0);
[a35b458]	605
[abf09311]	606	size_t src_off = 0;
	607	size_t dest_off = 0;
[a35b458]	608
[abf09311]	609	wchar_t ch;
[f4b1535]	610	while ((ch = str_decode(src, &src_off, n)) != 0) {
	611	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
[b888d5f]	612	break;
	613	}
[a35b458]	614
[f4b1535]	615	dest[dest_off] = '\0';
[b888d5f]	616	}
[16da5f8e]	617
[abf09311]	618	/** Duplicate string.
	619	*
	620	* Allocate a new string and copy characters from the source
	621	* string into it. The duplicate string is allocated via sleeping
	622	* malloc(), thus this function can sleep in no memory conditions.
	623	*
	624	* The allocation cannot fail and the return value is always
	625	* a valid pointer. The duplicate string is always a well-formed
	626	* null-terminated UTF-8 string, but it can differ from the source
	627	* string on the byte level.
	628	*
	629	* @param src Source string.
	630	*
	631	* @return Duplicate string.
	632	*
	633	*/
	634	char str_dup(const char src)
	635	{
	636	size_t size = str_size(src) + 1;
[11b285d]	637	char *dest = nfmalloc(size);
[63e27ef]	638	assert(dest);
[a35b458]	639
[abf09311]	640	str_cpy(dest, size, src);
	641	return dest;
	642	}
	643
	644	/** Duplicate string with size limit.
	645	*
	646	* Allocate a new string and copy up to @max_size bytes from the source
	647	* string into it. The duplicate string is allocated via sleeping
	648	* malloc(), thus this function can sleep in no memory conditions.
	649	* No more than @max_size + 1 bytes is allocated, but if the size
	650	* occupied by the source string is smaller than @max_size + 1,
	651	* less is allocated.
	652	*
	653	* The allocation cannot fail and the return value is always
	654	* a valid pointer. The duplicate string is always a well-formed
	655	* null-terminated UTF-8 string, but it can differ from the source
	656	* string on the byte level.
	657	*
	658	* @param src Source string.
	659	* @param n Maximum number of bytes to duplicate.
	660	*
	661	* @return Duplicate string.
	662	*
	663	*/
	664	char str_ndup(const char src, size_t n)
	665	{
	666	size_t size = str_size(src);
	667	if (size > n)
	668	size = n;
[a35b458]	669
[11b285d]	670	char *dest = nfmalloc(size + 1);
[63e27ef]	671	assert(dest);
[a35b458]	672
[abf09311]	673	str_ncpy(dest, size + 1, src, size);
	674	return dest;
	675	}
	676
[0f06dbc]	677	/** Convert wide string to string.
[b888d5f]	678	*
[0f06dbc]	679	* Convert wide string @a src to string. The output is written to the buffer
	680	* specified by @a dest and @a size. @a size must be non-zero and the string
	681	* written will always be well-formed.
[16da5f8e]	682	*
[0f06dbc]	683	* @param dest Destination buffer.
	684	* @param size Size of the destination buffer.
	685	* @param src Source wide string.
[16da5f8e]	686	*/
[0f06dbc]	687	void wstr_to_str(char dest, size_t size, const wchar_t src)
[16da5f8e]	688	{
[b888d5f]	689	wchar_t ch;
[0f06dbc]	690	size_t src_idx;
	691	size_t dest_off;
	692
	693	/* There must be space for a null terminator in the buffer. */
[63e27ef]	694	assert(size > 0);
[0f06dbc]	695
	696	src_idx = 0;
	697	dest_off = 0;
[a35b458]	698
[b888d5f]	699	while ((ch = src[src_idx++]) != 0) {
[0f06dbc]	700	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
[b888d5f]	701	break;
[16da5f8e]	702	}
[0f06dbc]	703
	704	dest[dest_off] = '\0';
[16da5f8e]	705	}
	706
[20f1597]	707	/** Find first occurence of character in string.
	708	*
[b888d5f]	709	* @param str String to search.
	710	* @param ch Character to look for.
	711	*
	712	* @return Pointer to character in @a str or NULL if not found.
[20f1597]	713	*
	714	*/
[dd2cfa7]	715	char str_chr(const char str, wchar_t ch)
[20f1597]	716	{
[b888d5f]	717	wchar_t acc;
	718	size_t off = 0;
[f2d2c7ba]	719	size_t last = 0;
[a35b458]	720
[a7b1071]	721	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
[b888d5f]	722	if (acc == ch)
[dd2cfa7]	723	return (char *) (str + last);
[f2d2c7ba]	724	last = off;
[20f1597]	725	}
[a35b458]	726
[20f1597]	727	return NULL;
	728	}
	729
[b888d5f]	730	/** Insert a wide character into a wide string.
	731	*
	732	* Insert a wide character into a wide string at position
	733	* @a pos. The characters after the position are shifted.
	734	*
	735	* @param str String to insert to.
	736	* @param ch Character to insert to.
	737	* @param pos Character index where to insert.
[7c3fb9b]	738	* @param max_pos Characters in the buffer.
[b888d5f]	739	*
	740	* @return True if the insertion was sucessful, false if the position
	741	* is out of bounds.
	742	*
	743	*/
[98000fb]	744	bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
[b888d5f]	745	{
[98000fb]	746	size_t len = wstr_length(str);
[a35b458]	747
[b888d5f]	748	if ((pos > len) \|\| (pos + 1 > max_pos))
	749	return false;
[a35b458]	750
[98000fb]	751	size_t i;
[b888d5f]	752	for (i = len; i + 1 > pos; i--)
	753	str[i + 1] = str[i];
[a35b458]	754
[b888d5f]	755	str[pos] = ch;
[a35b458]	756
[b888d5f]	757	return true;
	758	}
	759
	760	/** Remove a wide character from a wide string.
	761	*
	762	* Remove a wide character from a wide string at position
	763	* @a pos. The characters after the position are shifted.
	764	*
	765	* @param str String to remove from.
	766	* @param pos Character index to remove.
	767	*
	768	* @return True if the removal was sucessful, false if the position
	769	* is out of bounds.
	770	*
	771	*/
[98000fb]	772	bool wstr_remove(wchar_t *str, size_t pos)
[b888d5f]	773	{
[98000fb]	774	size_t len = wstr_length(str);
[a35b458]	775
[b888d5f]	776	if (pos >= len)
	777	return false;
[a35b458]	778
[98000fb]	779	size_t i;
[b888d5f]	780	for (i = pos + 1; i <= len; i++)
	781	str[i - 1] = str[i];
[a35b458]	782
[b888d5f]	783	return true;
	784	}
	785
[30a5470]	786	/** Convert string to uint64_t (internal variant).
	787	*
	788	* @param nptr Pointer to string.
	789	* @param endptr Pointer to the first invalid character is stored here.
	790	* @param base Zero or number between 2 and 36 inclusive.
	791	* @param neg Indication of unary minus is stored here.
	792	* @apram result Result of the conversion.
	793	*
	794	* @return EOK if conversion was successful.
	795	*
	796	*/
[b7fd2a0]	797	static errno_t str_uint(const char nptr, char *endptr, unsigned int base,
[30a5470]	798	bool neg, uint64_t result)
	799	{
[63e27ef]	800	assert(endptr != NULL);
	801	assert(neg != NULL);
	802	assert(result != NULL);
[a35b458]	803
[30a5470]	804	*neg = false;
	805	const char *str = nptr;
[a35b458]	806
[30a5470]	807	/* Ignore leading whitespace */
	808	while (isspace(*str))
	809	str++;
[a35b458]	810
[30a5470]	811	if (*str == '-') {
	812	*neg = true;
	813	str++;
	814	} else if (*str == '+')
	815	str++;
[a35b458]	816
[30a5470]	817	if (base == 0) {
	818	/* Decode base if not specified */
	819	base = 10;
[a35b458]	820
[30a5470]	821	if (*str == '0') {
	822	base = 8;
	823	str++;
[a35b458]	824
[30a5470]	825	switch (*str) {
	826	case 'b':
	827	case 'B':
	828	base = 2;
	829	str++;
	830	break;
	831	case 'o':
	832	case 'O':
	833	base = 8;
	834	str++;
	835	break;
	836	case 'd':
	837	case 'D':
	838	case 't':
	839	case 'T':
	840	base = 10;
	841	str++;
	842	break;
	843	case 'x':
	844	case 'X':
	845	base = 16;
	846	str++;
	847	break;
[4ce914d4]	848	default:
	849	str--;
[30a5470]	850	}
	851	}
	852	} else {
	853	/* Check base range */
	854	if ((base < 2) \|\| (base > 36)) {
	855	endptr = (char ) str;
	856	return EINVAL;
	857	}
	858	}
[a35b458]	859
[30a5470]	860	*result = 0;
	861	const char *startstr = str;
[a35b458]	862
[30a5470]	863	while (*str != 0) {
	864	unsigned int digit;
[a35b458]	865
[30a5470]	866	if ((str >= 'a') && (str <= 'z'))
	867	digit = *str - 'a' + 10;
	868	else if ((str >= 'A') && (str <= 'Z'))
	869	digit = *str - 'A' + 10;
	870	else if ((str >= '0') && (str <= '9'))
	871	digit = *str - '0';
	872	else
	873	break;
[a35b458]	874
[30a5470]	875	if (digit >= base)
	876	break;
[a35b458]	877
[30a5470]	878	uint64_t prev = *result;
	879	result = (result) * base + digit;
[a35b458]	880
[30a5470]	881	if (*result < prev) {
	882	/* Overflow */
	883	endptr = (char ) str;
	884	return EOVERFLOW;
	885	}
[a35b458]	886
[30a5470]	887	str++;
	888	}
[a35b458]	889
[30a5470]	890	if (str == startstr) {
	891	/*
	892	* No digits were decoded => first invalid character is
	893	* the first character of the string.
	894	*/
	895	str = nptr;
	896	}
[a35b458]	897
[30a5470]	898	endptr = (char ) str;
[a35b458]	899
[30a5470]	900	if (str == nptr)
	901	return EINVAL;
[a35b458]	902
[30a5470]	903	return EOK;
	904	}
	905
	906	/** Convert string to uint64_t.
	907	*
	908	* @param nptr Pointer to string.
	909	* @param endptr If not NULL, pointer to the first invalid character
	910	* is stored here.
	911	* @param base Zero or number between 2 and 36 inclusive.
	912	* @param strict Do not allow any trailing characters.
[4ce914d4]	913	* @param result Result of the conversion.
[30a5470]	914	*
	915	* @return EOK if conversion was successful.
	916	*
	917	*/
[b7fd2a0]	918	errno_t str_uint64_t(const char nptr, char *endptr, unsigned int base,
[30a5470]	919	bool strict, uint64_t *result)
	920	{
[63e27ef]	921	assert(result != NULL);
[a35b458]	922
[30a5470]	923	bool neg;
	924	char *lendptr;
[b7fd2a0]	925	errno_t ret = str_uint(nptr, &lendptr, base, &neg, result);
[a35b458]	926
[30a5470]	927	if (endptr != NULL)
	928	endptr = (char ) lendptr;
[a35b458]	929
[30a5470]	930	if (ret != EOK)
	931	return ret;
[a35b458]	932
[30a5470]	933	/* Do not allow negative values */
	934	if (neg)
	935	return EINVAL;
[a35b458]	936
[7c3fb9b]	937	/*
	938	* Check whether we are at the end of
	939	* the string in strict mode
	940	*/
[30a5470]	941	if ((strict) && (*lendptr != 0))
	942	return EINVAL;
[a35b458]	943
[30a5470]	944	return EOK;
	945	}
	946
[e535eeb]	947	void order_suffix(const uint64_t val, uint64_t rv, char suffix)
	948	{
[933cadf]	949	if (val > UINT64_C(10000000000000000000)) {
	950	*rv = val / UINT64_C(1000000000000000000);
[e535eeb]	951	*suffix = 'Z';
[933cadf]	952	} else if (val > UINT64_C(1000000000000000000)) {
	953	*rv = val / UINT64_C(1000000000000000);
[e535eeb]	954	*suffix = 'E';
[933cadf]	955	} else if (val > UINT64_C(1000000000000000)) {
	956	*rv = val / UINT64_C(1000000000000);
[e535eeb]	957	*suffix = 'T';
[933cadf]	958	} else if (val > UINT64_C(1000000000000)) {
	959	*rv = val / UINT64_C(1000000000);
[e535eeb]	960	*suffix = 'G';
[933cadf]	961	} else if (val > UINT64_C(1000000000)) {
	962	*rv = val / UINT64_C(1000000);
[e535eeb]	963	*suffix = 'M';
[933cadf]	964	} else if (val > UINT64_C(1000000)) {
	965	*rv = val / UINT64_C(1000);
[e535eeb]	966	*suffix = 'k';
	967	} else {
	968	*rv = val;
	969	*suffix = ' ';
	970	}
	971	}
	972
[933cadf]	973	void bin_order_suffix(const uint64_t val, uint64_t rv, const char *suffix,
	974	bool fixed)
	975	{
	976	if (val > UINT64_C(1152921504606846976)) {
	977	*rv = val / UINT64_C(1125899906842624);
	978	*suffix = "EiB";
	979	} else if (val > UINT64_C(1125899906842624)) {
	980	*rv = val / UINT64_C(1099511627776);
	981	*suffix = "TiB";
	982	} else if (val > UINT64_C(1099511627776)) {
	983	*rv = val / UINT64_C(1073741824);
	984	*suffix = "GiB";
	985	} else if (val > UINT64_C(1073741824)) {
	986	*rv = val / UINT64_C(1048576);
	987	*suffix = "MiB";
	988	} else if (val > UINT64_C(1048576)) {
	989	*rv = val / UINT64_C(1024);
	990	*suffix = "KiB";
	991	} else {
	992	*rv = val;
	993	if (fixed)
	994	*suffix = "B ";
	995	else
	996	*suffix = "B";
	997	}
	998	}
	999
[16da5f8e]	1000	/** @}
	1001	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: