Context Navigation

source: mainline/kernel/generic/src/lib/str.c@ d066259

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since d066259 was d066259, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 6 years ago
Synchronize str.c/str.h across boot/kernel/uspace
Property mode set to `100644`
File size: 24.3 KB

Rev	Line
[16da5f8e]	1	/*
	2	* Copyright (c) 2001-2004 Jakub Jermar
[d066259]	3	* Copyright (c) 2005 Martin Decky
	4	* Copyright (c) 2008 Jiri Svoboda
	5	* Copyright (c) 2011 Martin Sucha
	6	* Copyright (c) 2011 Oleg Romanenko
[16da5f8e]	7	* All rights reserved.
	8	*
	9	* Redistribution and use in source and binary forms, with or without
	10	* modification, are permitted provided that the following conditions
	11	* are met:
	12	*
	13	* - Redistributions of source code must retain the above copyright
	14	* notice, this list of conditions and the following disclaimer.
	15	* - Redistributions in binary form must reproduce the above copyright
	16	* notice, this list of conditions and the following disclaimer in the
	17	* documentation and/or other materials provided with the distribution.
	18	* - The name of the author may not be used to endorse or promote products
	19	* derived from this software without specific prior written permission.
	20	*
	21	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	22	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	23	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	24	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	25	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	26	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	27	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	28	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	29	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	30	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	31	*/
	32
[174156fd]	33	/** @addtogroup kernel_generic
[16da5f8e]	34	* @{
	35	*/
	36
	37	/**
	38	* @file
[82bb9c1]	39	* @brief String functions.
	40	*
	41	* Strings and characters use the Universal Character Set (UCS). The standard
	42	* strings, called just strings are encoded in UTF-8. Wide strings (encoded
	43	* in UTF-32) are supported to a limited degree. A single character is
[b888d5f]	44	* represented as wchar_t.@n
[82bb9c1]	45	*
[b888d5f]	46	* Overview of the terminology:@n
[82bb9c1]	47	*
[b888d5f]	48	* Term Meaning
	49	* -------------------- ----------------------------------------------------
	50	* byte 8 bits stored in uint8_t (unsigned 8 bit integer)
[82bb9c1]	51	*
[b888d5f]	52	* character UTF-32 encoded Unicode character, stored in wchar_t
	53	* (signed 32 bit integer), code points 0 .. 1114111
	54	* are valid
[82bb9c1]	55	*
[b888d5f]	56	* ASCII character 7 bit encoded ASCII character, stored in char
	57	* (usually signed 8 bit integer), code points 0 .. 127
	58	* are valid
	59	*
	60	* string UTF-8 encoded NULL-terminated Unicode string, char *
	61	*
	62	* wide string UTF-32 encoded NULL-terminated Unicode string,
	63	* wchar_t *
	64	*
	65	* [wide] string size number of BYTES in a [wide] string (excluding
	66	* the NULL-terminator), size_t
	67	*
	68	* [wide] string length number of CHARACTERS in a [wide] string (excluding
[98000fb]	69	* the NULL-terminator), size_t
[b888d5f]	70	*
	71	* [wide] string width number of display cells on a monospace display taken
[98000fb]	72	* by a [wide] string, size_t
[b888d5f]	73	*
	74	*
	75	* Overview of string metrics:@n
	76	*
	77	* Metric Abbrev. Type Meaning
	78	* ------ ------ ------ -------------------------------------------------
	79	* size n size_t number of BYTES in a string (excluding the
	80	* NULL-terminator)
	81	*
[98000fb]	82	* length l size_t number of CHARACTERS in a string (excluding the
[b888d5f]	83	* null terminator)
	84	*
[98000fb]	85	* width w size_t number of display cells on a monospace display
[b888d5f]	86	* taken by a string
	87	*
	88	*
	89	* Function naming prefixes:@n
	90	*
	91	* chr_ operate on characters
	92	* ascii_ operate on ASCII characters
	93	* str_ operate on strings
	94	* wstr_ operate on wide strings
	95	*
	96	* [w]str_[n\|l\|w] operate on a prefix limited by size, length
	97	* or width
	98	*
	99	*
	100	* A specific character inside a [wide] string can be referred to by:@n
	101	*
	102	* pointer (char , wchar_t )
	103	* byte offset (size_t)
[98000fb]	104	* character index (size_t)
[82bb9c1]	105	*
[16da5f8e]	106	*/
	107
[19f857a]	108	#include <str.h>
[d066259]	109
	110	#include <assert.h>
[d09f84e6]	111	#include <errno.h>
[d066259]	112	#include <stdbool.h>
	113	#include <stddef.h>
	114	#include <stdint.h>
	115	#include <stdlib.h>
	116
[b888d5f]	117	#include <align.h>
[30a5470]	118	#include <macros.h>
[16da5f8e]	119
[8e893ae]	120	/** Check the condition if wchar_t is signed */
[002fd5f]	121	#ifdef __WCHAR_UNSIGNED__
[1433ecda]	122	#define WCHAR_SIGNED_CHECK(cond) (true)
[8e893ae]	123	#else
[1433ecda]	124	#define WCHAR_SIGNED_CHECK(cond) (cond)
[8e893ae]	125	#endif
	126
[b888d5f]	127	/** Byte mask consisting of lowest @n bits (out of 8) */
	128	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
[0dd1d444]	129
[b888d5f]	130	/** Byte mask consisting of lowest @n bits (out of 32) */
	131	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
[32704cb]	132
[b888d5f]	133	/** Byte mask consisting of highest @n bits (out of 8) */
	134	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
[32704cb]	135
[b888d5f]	136	/** Number of data bits in a UTF-8 continuation byte */
	137	#define CONT_BITS 6
[0dd1d444]	138
[b888d5f]	139	/** Decode a single character from a string.
[21a639b7]	140	*
[b888d5f]	141	* Decode a single character from a string of size @a size. Decoding starts
[e1813cf]	142	* at @a offset and this offset is moved to the beginning of the next
	143	* character. In case of decoding error, offset generally advances at least
[b888d5f]	144	* by one. However, offset is never moved beyond size.
[21a639b7]	145	*
[b888d5f]	146	* @param str String (not necessarily NULL-terminated).
	147	* @param offset Byte offset in string where to start decoding.
	148	* @param size Size of the string (in bytes).
	149	*
[c8bf88d]	150	* @return Value of decoded character, U_SPECIAL on decoding error or
[b888d5f]	151	* NULL if attempt to decode beyond @a size.
[21a639b7]	152	*
	153	*/
[b888d5f]	154	wchar_t str_decode(const char str, size_t offset, size_t size)
[21a639b7]	155	{
[b888d5f]	156	if (*offset + 1 > size)
	157	return 0;
[a35b458]	158
[b888d5f]	159	/* First byte read from string */
	160	uint8_t b0 = (uint8_t) str[(*offset)++];
[a35b458]	161
[b888d5f]	162	/* Determine code length */
[a35b458]	163
[b888d5f]	164	unsigned int b0_bits; /* Data bits in first byte */
	165	unsigned int cbytes; /* Number of continuation bytes */
[a35b458]	166
[0dd1d444]	167	if ((b0 & 0x80) == 0) {
	168	/* 0xxxxxxx (Plain ASCII) */
	169	b0_bits = 7;
	170	cbytes = 0;
	171	} else if ((b0 & 0xe0) == 0xc0) {
	172	/* 110xxxxx 10xxxxxx */
	173	b0_bits = 5;
	174	cbytes = 1;
	175	} else if ((b0 & 0xf0) == 0xe0) {
	176	/* 1110xxxx 10xxxxxx 10xxxxxx */
	177	b0_bits = 4;
	178	cbytes = 2;
	179	} else if ((b0 & 0xf8) == 0xf0) {
	180	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
	181	b0_bits = 3;
	182	cbytes = 3;
	183	} else {
[b888d5f]	184	/* 10xxxxxx -- unexpected continuation byte */
[c8bf88d]	185	return U_SPECIAL;
[74c8da2c]	186	}
[a35b458]	187
[b888d5f]	188	if (*offset + cbytes > size)
[c8bf88d]	189	return U_SPECIAL;
[a35b458]	190
[b888d5f]	191	wchar_t ch = b0 & LO_MASK_8(b0_bits);
[a35b458]	192
[b888d5f]	193	/* Decode continuation bytes */
[0dd1d444]	194	while (cbytes > 0) {
[b888d5f]	195	uint8_t b = (uint8_t) str[(*offset)++];
[a35b458]	196
[b888d5f]	197	/* Must be 10xxxxxx */
	198	if ((b & 0xc0) != 0x80)
[c8bf88d]	199	return U_SPECIAL;
[a35b458]	200
[b888d5f]	201	/* Shift data bits to ch */
[0dd1d444]	202	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
[b888d5f]	203	cbytes--;
[74c8da2c]	204	}
[a35b458]	205
[0dd1d444]	206	return ch;
[74c8da2c]	207	}
	208
[e1813cf]	209	/** Encode a single character to string representation.
[74c8da2c]	210	*
[e1813cf]	211	* Encode a single character to string representation (i.e. UTF-8) and store
	212	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
	213	* is moved to the position where the next character can be written to.
[74c8da2c]	214	*
[b888d5f]	215	* @param ch Input character.
	216	* @param str Output buffer.
	217	* @param offset Byte offset where to start writing.
	218	* @param size Size of the output buffer (in bytes).
[74c8da2c]	219	*
[d09f84e6]	220	* @return EOK if the character was encoded successfully, EOVERFLOW if there
[8e893ae]	221	* was not enough space in the output buffer or EINVAL if the character
	222	* code was invalid.
[74c8da2c]	223	*/
[b7fd2a0]	224	errno_t chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
[74c8da2c]	225	{
[b888d5f]	226	if (*offset >= size)
[d09f84e6]	227	return EOVERFLOW;
[a35b458]	228
[b888d5f]	229	if (!chr_check(ch))
[d09f84e6]	230	return EINVAL;
[a35b458]	231
[7c3fb9b]	232	/*
	233	* Unsigned version of ch (bit operations should only be done
	234	* on unsigned types).
	235	*/
[b888d5f]	236	uint32_t cc = (uint32_t) ch;
[a35b458]	237
[b888d5f]	238	/* Determine how many continuation bytes are needed */
[a35b458]	239
[b888d5f]	240	unsigned int b0_bits; /* Data bits in first byte */
	241	unsigned int cbytes; /* Number of continuation bytes */
[a35b458]	242
[32704cb]	243	if ((cc & ~LO_MASK_32(7)) == 0) {
	244	b0_bits = 7;
	245	cbytes = 0;
	246	} else if ((cc & ~LO_MASK_32(11)) == 0) {
	247	b0_bits = 5;
	248	cbytes = 1;
	249	} else if ((cc & ~LO_MASK_32(16)) == 0) {
	250	b0_bits = 4;
	251	cbytes = 2;
	252	} else if ((cc & ~LO_MASK_32(21)) == 0) {
	253	b0_bits = 3;
	254	cbytes = 3;
	255	} else {
[b888d5f]	256	/* Codes longer than 21 bits are not supported */
[d09f84e6]	257	return EINVAL;
[74c8da2c]	258	}
[a35b458]	259
[b888d5f]	260	/* Check for available space in buffer */
	261	if (*offset + cbytes >= size)
[d09f84e6]	262	return EOVERFLOW;
[a35b458]	263
[b888d5f]	264	/* Encode continuation bytes */
	265	unsigned int i;
	266	for (i = cbytes; i > 0; i--) {
[e1813cf]	267	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
[32704cb]	268	cc = cc >> CONT_BITS;
[74c8da2c]	269	}
[a35b458]	270
[b888d5f]	271	/* Encode first byte */
[e1813cf]	272	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
[a35b458]	273
[b888d5f]	274	/* Advance offset */
	275	*offset += cbytes + 1;
[a35b458]	276
[d09f84e6]	277	return EOK;
[74c8da2c]	278	}
	279
[b888d5f]	280	/** Get size of string.
	281	*
	282	* Get the number of bytes which are used by the string @a str (excluding the
	283	* NULL-terminator).
	284	*
	285	* @param str String to consider.
	286	*
	287	* @return Number of bytes used by the string
[82bb9c1]	288	*
	289	*/
[b888d5f]	290	size_t str_size(const char *str)
[82bb9c1]	291	{
[b888d5f]	292	size_t size = 0;
[a35b458]	293
[b888d5f]	294	while (*str++ != 0)
	295	size++;
[a35b458]	296
[b888d5f]	297	return size;
[82bb9c1]	298	}
	299
[b888d5f]	300	/** Get size of wide string.
	301	*
	302	* Get the number of bytes which are used by the wide string @a str (excluding the
	303	* NULL-terminator).
	304	*
	305	* @param str Wide string to consider.
	306	*
	307	* @return Number of bytes used by the wide string
	308	*
	309	*/
	310	size_t wstr_size(const wchar_t *str)
	311	{
	312	return (wstr_length(str) * sizeof(wchar_t));
	313	}
	314
	315	/** Get size of string with length limit.
[74c8da2c]	316	*
[f25b2819]	317	* Get the number of bytes which are used by up to @a max_len first
	318	* characters in the string @a str. If @a max_len is greater than
[b888d5f]	319	* the length of @a str, the entire string is measured (excluding the
	320	* NULL-terminator).
	321	*
	322	* @param str String to consider.
	323	* @param max_len Maximum number of characters to measure.
[74c8da2c]	324	*
[b888d5f]	325	* @return Number of bytes used by the characters.
[74c8da2c]	326	*
	327	*/
[98000fb]	328	size_t str_lsize(const char *str, size_t max_len)
[74c8da2c]	329	{
[98000fb]	330	size_t len = 0;
[b888d5f]	331	size_t offset = 0;
[a35b458]	332
[b888d5f]	333	while (len < max_len) {
	334	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
[b54d2f1]	335	break;
[a35b458]	336
[f25b2819]	337	len++;
[21a639b7]	338	}
[a35b458]	339
[b888d5f]	340	return offset;
[74c8da2c]	341	}
	342
[b888d5f]	343	/** Get size of wide string with length limit.
[82bb9c1]	344	*
[b888d5f]	345	* Get the number of bytes which are used by up to @a max_len first
	346	* wide characters in the wide string @a str. If @a max_len is greater than
	347	* the length of @a str, the entire wide string is measured (excluding the
	348	* NULL-terminator).
	349	*
	350	* @param str Wide string to consider.
	351	* @param max_len Maximum number of wide characters to measure.
[82bb9c1]	352	*
[b888d5f]	353	* @return Number of bytes used by the wide characters.
[82bb9c1]	354	*
	355	*/
[98000fb]	356	size_t wstr_lsize(const wchar_t *str, size_t max_len)
[82bb9c1]	357	{
[b888d5f]	358	return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
[82bb9c1]	359	}
	360
[b888d5f]	361	/** Get number of characters in a string.
[82bb9c1]	362	*
[b888d5f]	363	* @param str NULL-terminated string.
[82bb9c1]	364	*
[b888d5f]	365	* @return Number of characters in string.
[82bb9c1]	366	*
	367	*/
[98000fb]	368	size_t str_length(const char *str)
[82bb9c1]	369	{
[98000fb]	370	size_t len = 0;
[b888d5f]	371	size_t offset = 0;
[a35b458]	372
[b888d5f]	373	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
	374	len++;
[a35b458]	375
[b888d5f]	376	return len;
[82bb9c1]	377	}
	378
[b888d5f]	379	/** Get number of characters in a wide string.
[74c8da2c]	380	*
[b888d5f]	381	* @param str NULL-terminated wide string.
	382	*
	383	* @return Number of characters in @a str.
[74c8da2c]	384	*
	385	*/
[98000fb]	386	size_t wstr_length(const wchar_t *wstr)
[74c8da2c]	387	{
[98000fb]	388	size_t len = 0;
[a35b458]	389
[b888d5f]	390	while (*wstr++ != 0)
	391	len++;
[a35b458]	392
[b888d5f]	393	return len;
[74c8da2c]	394	}
	395
[b888d5f]	396	/** Get number of characters in a string with size limit.
	397	*
	398	* @param str NULL-terminated string.
	399	* @param size Maximum number of bytes to consider.
	400	*
	401	* @return Number of characters in string.
[74c8da2c]	402	*
	403	*/
[98000fb]	404	size_t str_nlength(const char *str, size_t size)
[74c8da2c]	405	{
[98000fb]	406	size_t len = 0;
[b888d5f]	407	size_t offset = 0;
[a35b458]	408
[b888d5f]	409	while (str_decode(str, &offset, size) != 0)
	410	len++;
[a35b458]	411
[b888d5f]	412	return len;
[21a639b7]	413	}
	414
[b888d5f]	415	/** Get number of characters in a string with size limit.
[2f57690]	416	*
[b888d5f]	417	* @param str NULL-terminated string.
	418	* @param size Maximum number of bytes to consider.
[74c8da2c]	419	*
[f25b2819]	420	* @return Number of characters in string.
[b888d5f]	421	*
[74c8da2c]	422	*/
[98000fb]	423	size_t wstr_nlength(const wchar_t *str, size_t size)
[74c8da2c]	424	{
[98000fb]	425	size_t len = 0;
	426	size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
	427	size_t offset = 0;
[a35b458]	428
[b888d5f]	429	while ((offset < limit) && (*str++ != 0)) {
[f25b2819]	430	len++;
[b888d5f]	431	offset += sizeof(wchar_t);
[74c8da2c]	432	}
[a35b458]	433
[f25b2819]	434	return len;
[74c8da2c]	435	}
	436
[b888d5f]	437	/** Check whether character is plain ASCII.
	438	*
	439	* @return True if character is plain ASCII.
[74c8da2c]	440	*
	441	*/
[f2b8cdc]	442	bool ascii_check(wchar_t ch)
[74c8da2c]	443	{
[8e893ae]	444	if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 127))
[b888d5f]	445	return true;
[a35b458]	446
[b888d5f]	447	return false;
	448	}
[f25b2819]	449
[b888d5f]	450	/** Check whether character is valid
	451	*
	452	* @return True if character is a valid Unicode code point.
	453	*
	454	*/
[f2b8cdc]	455	bool chr_check(wchar_t ch)
[b888d5f]	456	{
[8e893ae]	457	if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 1114111))
[b888d5f]	458	return true;
[a35b458]	459
[b888d5f]	460	return false;
[16da5f8e]	461	}
	462
[b888d5f]	463	/** Compare two NULL terminated strings.
[16da5f8e]	464	*
[b888d5f]	465	* Do a char-by-char comparison of two NULL-terminated strings.
[4efeab5]	466	* The strings are considered equal iff their length is equal
	467	* and both strings consist of the same sequence of characters.
	468	*
[1772e6d]	469	* A string S1 is less than another string S2 if it has a character with
	470	* lower value at the first character position where the strings differ.
	471	* If the strings differ in length, the shorter one is treated as if
	472	* padded by characters with a value of zero.
[16da5f8e]	473	*
[b888d5f]	474	* @param s1 First string to compare.
	475	* @param s2 Second string to compare.
[16da5f8e]	476	*
[1772e6d]	477	* @return 0 if the strings are equal, -1 if the first is less than the second,
	478	* 1 if the second is less than the first.
[16da5f8e]	479	*
	480	*/
[b888d5f]	481	int str_cmp(const char s1, const char s2)
[16da5f8e]	482	{
[a7b1071]	483	wchar_t c1 = 0;
	484	wchar_t c2 = 0;
[a35b458]	485
[b888d5f]	486	size_t off1 = 0;
	487	size_t off2 = 0;
[a7b1071]	488
	489	while (true) {
	490	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
	491	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
	492
[b888d5f]	493	if (c1 < c2)
[16da5f8e]	494	return -1;
[a35b458]	495
[b888d5f]	496	if (c1 > c2)
[16da5f8e]	497	return 1;
[a7b1071]	498
	499	if (c1 == 0 \|\| c2 == 0)
[1b20da0]	500	break;
[16da5f8e]	501	}
[a7b1071]	502
	503	return 0;
[16da5f8e]	504	}
	505
[b888d5f]	506	/** Compare two NULL terminated strings with length limit.
[16da5f8e]	507	*
[b888d5f]	508	* Do a char-by-char comparison of two NULL-terminated strings.
[4efeab5]	509	* The strings are considered equal iff
	510	* min(str_length(s1), max_len) == min(str_length(s2), max_len)
	511	* and both strings consist of the same sequence of characters,
	512	* up to max_len characters.
	513	*
[1772e6d]	514	* A string S1 is less than another string S2 if it has a character with
	515	* lower value at the first character position where the strings differ.
	516	* If the strings differ in length, the shorter one is treated as if
	517	* padded by characters with a value of zero. Only the first max_len
	518	* characters are considered.
[16da5f8e]	519	*
[b888d5f]	520	* @param s1 First string to compare.
	521	* @param s2 Second string to compare.
	522	* @param max_len Maximum number of characters to consider.
	523	*
[1772e6d]	524	* @return 0 if the strings are equal, -1 if the first is less than the second,
	525	* 1 if the second is less than the first.
[16da5f8e]	526	*
	527	*/
[98000fb]	528	int str_lcmp(const char s1, const char s2, size_t max_len)
[16da5f8e]	529	{
[b888d5f]	530	wchar_t c1 = 0;
	531	wchar_t c2 = 0;
[a35b458]	532
[b888d5f]	533	size_t off1 = 0;
	534	size_t off2 = 0;
[a35b458]	535
[98000fb]	536	size_t len = 0;
[a7b1071]	537
	538	while (true) {
	539	if (len >= max_len)
[b888d5f]	540	break;
[a7b1071]	541
	542	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
	543	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
	544
[b888d5f]	545	if (c1 < c2)
[16da5f8e]	546	return -1;
[a7b1071]	547
[b888d5f]	548	if (c1 > c2)
[16da5f8e]	549	return 1;
[a7b1071]	550
	551	if (c1 == 0 \|\| c2 == 0)
	552	break;
	553
[1b20da0]	554	++len;
[16da5f8e]	555	}
[a7b1071]	556
	557	return 0;
	558
[16da5f8e]	559	}
	560
[f4b1535]	561	/** Copy string.
[b888d5f]	562	*
[f4b1535]	563	* Copy source string @a src to destination buffer @a dest.
	564	* No more than @a size bytes are written. If the size of the output buffer
	565	* is at least one byte, the output string will always be well-formed, i.e.
	566	* null-terminated and containing only complete characters.
[b888d5f]	567	*
[abf09311]	568	* @param dest Destination buffer.
[6700ee2]	569	* @param count Size of the destination buffer (must be > 0).
[f4b1535]	570	* @param src Source string.
[abf09311]	571	*
[b888d5f]	572	*/
[f4b1535]	573	void str_cpy(char dest, size_t size, const char src)
[b888d5f]	574	{
[6700ee2]	575	/* There must be space for a null terminator in the buffer. */
[63e27ef]	576	assert(size > 0);
	577	assert(src != NULL);
[a35b458]	578
[abf09311]	579	size_t src_off = 0;
	580	size_t dest_off = 0;
[a35b458]	581
[abf09311]	582	wchar_t ch;
[f4b1535]	583	while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
	584	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
	585	break;
	586	}
[a35b458]	587
[f4b1535]	588	dest[dest_off] = '\0';
	589	}
	590
	591	/** Copy size-limited substring.
	592	*
[6700ee2]	593	* Copy prefix of string @a src of max. size @a size to destination buffer
	594	* @a dest. No more than @a size bytes are written. The output string will
	595	* always be well-formed, i.e. null-terminated and containing only complete
	596	* characters.
[f4b1535]	597	*
	598	* No more than @a n bytes are read from the input string, so it does not
	599	* have to be null-terminated.
	600	*
[abf09311]	601	* @param dest Destination buffer.
[6700ee2]	602	* @param count Size of the destination buffer (must be > 0).
[f4b1535]	603	* @param src Source string.
[abf09311]	604	* @param n Maximum number of bytes to read from @a src.
	605	*
[f4b1535]	606	*/
	607	void str_ncpy(char dest, size_t size, const char src, size_t n)
	608	{
[6700ee2]	609	/* There must be space for a null terminator in the buffer. */
[63e27ef]	610	assert(size > 0);
[a35b458]	611
[abf09311]	612	size_t src_off = 0;
	613	size_t dest_off = 0;
[a35b458]	614
[abf09311]	615	wchar_t ch;
[f4b1535]	616	while ((ch = str_decode(src, &src_off, n)) != 0) {
	617	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
[b888d5f]	618	break;
	619	}
[a35b458]	620
[f4b1535]	621	dest[dest_off] = '\0';
[b888d5f]	622	}
[16da5f8e]	623
[0f06dbc]	624	/** Convert wide string to string.
[b888d5f]	625	*
[0f06dbc]	626	* Convert wide string @a src to string. The output is written to the buffer
	627	* specified by @a dest and @a size. @a size must be non-zero and the string
	628	* written will always be well-formed.
[16da5f8e]	629	*
[0f06dbc]	630	* @param dest Destination buffer.
	631	* @param size Size of the destination buffer.
	632	* @param src Source wide string.
[16da5f8e]	633	*/
[0f06dbc]	634	void wstr_to_str(char dest, size_t size, const wchar_t src)
[16da5f8e]	635	{
[b888d5f]	636	wchar_t ch;
[0f06dbc]	637	size_t src_idx;
	638	size_t dest_off;
	639
	640	/* There must be space for a null terminator in the buffer. */
[63e27ef]	641	assert(size > 0);
[0f06dbc]	642
	643	src_idx = 0;
	644	dest_off = 0;
[a35b458]	645
[b888d5f]	646	while ((ch = src[src_idx++]) != 0) {
[0f06dbc]	647	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
[b888d5f]	648	break;
[16da5f8e]	649	}
[0f06dbc]	650
	651	dest[dest_off] = '\0';
[16da5f8e]	652	}
	653
[20f1597]	654	/** Find first occurence of character in string.
	655	*
[b888d5f]	656	* @param str String to search.
	657	* @param ch Character to look for.
	658	*
	659	* @return Pointer to character in @a str or NULL if not found.
[20f1597]	660	*/
[dd2cfa7]	661	char str_chr(const char str, wchar_t ch)
[20f1597]	662	{
[b888d5f]	663	wchar_t acc;
	664	size_t off = 0;
[f2d2c7ba]	665	size_t last = 0;
[a35b458]	666
[a7b1071]	667	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
[b888d5f]	668	if (acc == ch)
[dd2cfa7]	669	return (char *) (str + last);
[f2d2c7ba]	670	last = off;
[20f1597]	671	}
[a35b458]	672
[20f1597]	673	return NULL;
	674	}
	675
[b888d5f]	676	/** Insert a wide character into a wide string.
	677	*
	678	* Insert a wide character into a wide string at position
	679	* @a pos. The characters after the position are shifted.
	680	*
	681	* @param str String to insert to.
	682	* @param ch Character to insert to.
	683	* @param pos Character index where to insert.
[7c3fb9b]	684	* @param max_pos Characters in the buffer.
[b888d5f]	685	*
	686	* @return True if the insertion was sucessful, false if the position
	687	* is out of bounds.
	688	*
	689	*/
[98000fb]	690	bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
[b888d5f]	691	{
[98000fb]	692	size_t len = wstr_length(str);
[a35b458]	693
[b888d5f]	694	if ((pos > len) \|\| (pos + 1 > max_pos))
	695	return false;
[a35b458]	696
[98000fb]	697	size_t i;
[b888d5f]	698	for (i = len; i + 1 > pos; i--)
	699	str[i + 1] = str[i];
[a35b458]	700
[b888d5f]	701	str[pos] = ch;
[a35b458]	702
[b888d5f]	703	return true;
	704	}
	705
	706	/** Remove a wide character from a wide string.
	707	*
	708	* Remove a wide character from a wide string at position
	709	* @a pos. The characters after the position are shifted.
	710	*
	711	* @param str String to remove from.
	712	* @param pos Character index to remove.
	713	*
	714	* @return True if the removal was sucessful, false if the position
	715	* is out of bounds.
	716	*
	717	*/
[98000fb]	718	bool wstr_remove(wchar_t *str, size_t pos)
[b888d5f]	719	{
[98000fb]	720	size_t len = wstr_length(str);
[a35b458]	721
[b888d5f]	722	if (pos >= len)
	723	return false;
[a35b458]	724
[98000fb]	725	size_t i;
[b888d5f]	726	for (i = pos + 1; i <= len; i++)
	727	str[i - 1] = str[i];
[a35b458]	728
[b888d5f]	729	return true;
	730	}
	731
[d066259]	732	/** Duplicate string.
	733	*
	734	* Allocate a new string and copy characters from the source
	735	* string into it. The duplicate string is allocated via sleeping
	736	* malloc(), thus this function can sleep in no memory conditions.
	737	*
	738	* The allocation cannot fail and the return value is always
	739	* a valid pointer. The duplicate string is always a well-formed
	740	* null-terminated UTF-8 string, but it can differ from the source
	741	* string on the byte level.
	742	*
	743	* @param src Source string.
	744	*
	745	* @return Duplicate string.
	746	*
	747	*/
	748	char str_dup(const char src)
	749	{
	750	size_t size = str_size(src) + 1;
	751	char *dest = malloc(size);
	752	if (!dest)
	753	return NULL;
	754
	755	str_cpy(dest, size, src);
	756	return dest;
	757	}
	758
	759	/** Duplicate string with size limit.
	760	*
	761	* Allocate a new string and copy up to @max_size bytes from the source
	762	* string into it. The duplicate string is allocated via sleeping
	763	* malloc(), thus this function can sleep in no memory conditions.
	764	* No more than @max_size + 1 bytes is allocated, but if the size
	765	* occupied by the source string is smaller than @max_size + 1,
	766	* less is allocated.
	767	*
	768	* The allocation cannot fail and the return value is always
	769	* a valid pointer. The duplicate string is always a well-formed
	770	* null-terminated UTF-8 string, but it can differ from the source
	771	* string on the byte level.
	772	*
	773	* @param src Source string.
	774	* @param n Maximum number of bytes to duplicate.
	775	*
	776	* @return Duplicate string.
	777	*
	778	*/
	779	char str_ndup(const char src, size_t n)
	780	{
	781	size_t size = str_size(src);
	782	if (size > n)
	783	size = n;
	784
	785	char *dest = malloc(size + 1);
	786	if (!dest)
	787	return NULL;
	788
	789	str_ncpy(dest, size + 1, src, size);
	790	return dest;
	791	}
	792
[30a5470]	793	/** Convert string to uint64_t (internal variant).
	794	*
	795	* @param nptr Pointer to string.
	796	* @param endptr Pointer to the first invalid character is stored here.
	797	* @param base Zero or number between 2 and 36 inclusive.
	798	* @param neg Indication of unary minus is stored here.
	799	* @apram result Result of the conversion.
	800	*
	801	* @return EOK if conversion was successful.
	802	*
	803	*/
[b7fd2a0]	804	static errno_t str_uint(const char nptr, char *endptr, unsigned int base,
[30a5470]	805	bool neg, uint64_t result)
	806	{
[63e27ef]	807	assert(endptr != NULL);
	808	assert(neg != NULL);
	809	assert(result != NULL);
[a35b458]	810
[30a5470]	811	*neg = false;
	812	const char *str = nptr;
[a35b458]	813
[30a5470]	814	/* Ignore leading whitespace */
	815	while (isspace(*str))
	816	str++;
[a35b458]	817
[30a5470]	818	if (*str == '-') {
	819	*neg = true;
	820	str++;
	821	} else if (*str == '+')
	822	str++;
[a35b458]	823
[30a5470]	824	if (base == 0) {
	825	/* Decode base if not specified */
	826	base = 10;
[a35b458]	827
[30a5470]	828	if (*str == '0') {
	829	base = 8;
	830	str++;
[a35b458]	831
[30a5470]	832	switch (*str) {
	833	case 'b':
	834	case 'B':
	835	base = 2;
	836	str++;
	837	break;
	838	case 'o':
	839	case 'O':
	840	base = 8;
	841	str++;
	842	break;
	843	case 'd':
	844	case 'D':
	845	case 't':
	846	case 'T':
	847	base = 10;
	848	str++;
	849	break;
	850	case 'x':
	851	case 'X':
	852	base = 16;
	853	str++;
	854	break;
[4ce914d4]	855	default:
	856	str--;
[30a5470]	857	}
	858	}
	859	} else {
	860	/* Check base range */
	861	if ((base < 2) \|\| (base > 36)) {
	862	endptr = (char ) str;
	863	return EINVAL;
	864	}
	865	}
[a35b458]	866
[30a5470]	867	*result = 0;
	868	const char *startstr = str;
[a35b458]	869
[30a5470]	870	while (*str != 0) {
	871	unsigned int digit;
[a35b458]	872
[30a5470]	873	if ((str >= 'a') && (str <= 'z'))
	874	digit = *str - 'a' + 10;
	875	else if ((str >= 'A') && (str <= 'Z'))
	876	digit = *str - 'A' + 10;
	877	else if ((str >= '0') && (str <= '9'))
	878	digit = *str - '0';
	879	else
	880	break;
[a35b458]	881
[30a5470]	882	if (digit >= base)
	883	break;
[a35b458]	884
[30a5470]	885	uint64_t prev = *result;
	886	result = (result) * base + digit;
[a35b458]	887
[30a5470]	888	if (*result < prev) {
	889	/* Overflow */
	890	endptr = (char ) str;
	891	return EOVERFLOW;
	892	}
[a35b458]	893
[30a5470]	894	str++;
	895	}
[a35b458]	896
[30a5470]	897	if (str == startstr) {
	898	/*
	899	* No digits were decoded => first invalid character is
	900	* the first character of the string.
	901	*/
	902	str = nptr;
	903	}
[a35b458]	904
[30a5470]	905	endptr = (char ) str;
[a35b458]	906
[30a5470]	907	if (str == nptr)
	908	return EINVAL;
[a35b458]	909
[30a5470]	910	return EOK;
	911	}
	912
	913	/** Convert string to uint64_t.
	914	*
	915	* @param nptr Pointer to string.
	916	* @param endptr If not NULL, pointer to the first invalid character
	917	* is stored here.
	918	* @param base Zero or number between 2 and 36 inclusive.
	919	* @param strict Do not allow any trailing characters.
[4ce914d4]	920	* @param result Result of the conversion.
[30a5470]	921	*
	922	* @return EOK if conversion was successful.
	923	*
	924	*/
[b7fd2a0]	925	errno_t str_uint64_t(const char nptr, char *endptr, unsigned int base,
[30a5470]	926	bool strict, uint64_t *result)
	927	{
[63e27ef]	928	assert(result != NULL);
[a35b458]	929
[30a5470]	930	bool neg;
	931	char *lendptr;
[b7fd2a0]	932	errno_t ret = str_uint(nptr, &lendptr, base, &neg, result);
[a35b458]	933
[30a5470]	934	if (endptr != NULL)
	935	endptr = (char ) lendptr;
[a35b458]	936
[30a5470]	937	if (ret != EOK)
	938	return ret;
[a35b458]	939
[30a5470]	940	/* Do not allow negative values */
	941	if (neg)
	942	return EINVAL;
[a35b458]	943
[7c3fb9b]	944	/*
	945	* Check whether we are at the end of
	946	* the string in strict mode
	947	*/
[30a5470]	948	if ((strict) && (*lendptr != 0))
	949	return EINVAL;
[a35b458]	950
[30a5470]	951	return EOK;
	952	}
	953
[e535eeb]	954	void order_suffix(const uint64_t val, uint64_t rv, char suffix)
	955	{
[933cadf]	956	if (val > UINT64_C(10000000000000000000)) {
	957	*rv = val / UINT64_C(1000000000000000000);
[e535eeb]	958	*suffix = 'Z';
[933cadf]	959	} else if (val > UINT64_C(1000000000000000000)) {
	960	*rv = val / UINT64_C(1000000000000000);
[e535eeb]	961	*suffix = 'E';
[933cadf]	962	} else if (val > UINT64_C(1000000000000000)) {
	963	*rv = val / UINT64_C(1000000000000);
[e535eeb]	964	*suffix = 'T';
[933cadf]	965	} else if (val > UINT64_C(1000000000000)) {
	966	*rv = val / UINT64_C(1000000000);
[e535eeb]	967	*suffix = 'G';
[933cadf]	968	} else if (val > UINT64_C(1000000000)) {
	969	*rv = val / UINT64_C(1000000);
[e535eeb]	970	*suffix = 'M';
[933cadf]	971	} else if (val > UINT64_C(1000000)) {
	972	*rv = val / UINT64_C(1000);
[e535eeb]	973	*suffix = 'k';
	974	} else {
	975	*rv = val;
	976	*suffix = ' ';
	977	}
	978	}
	979
[933cadf]	980	void bin_order_suffix(const uint64_t val, uint64_t rv, const char *suffix,
	981	bool fixed)
	982	{
	983	if (val > UINT64_C(1152921504606846976)) {
	984	*rv = val / UINT64_C(1125899906842624);
	985	*suffix = "EiB";
	986	} else if (val > UINT64_C(1125899906842624)) {
	987	*rv = val / UINT64_C(1099511627776);
	988	*suffix = "TiB";
	989	} else if (val > UINT64_C(1099511627776)) {
	990	*rv = val / UINT64_C(1073741824);
	991	*suffix = "GiB";
	992	} else if (val > UINT64_C(1073741824)) {
	993	*rv = val / UINT64_C(1048576);
	994	*suffix = "MiB";
	995	} else if (val > UINT64_C(1048576)) {
	996	*rv = val / UINT64_C(1024);
	997	*suffix = "KiB";
	998	} else {
	999	*rv = val;
	1000	if (fixed)
	1001	*suffix = "B ";
	1002	else
	1003	*suffix = "B";
	1004	}
	1005	}
	1006
[16da5f8e]	1007	/** @}
	1008	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: