Context Navigation

str.c@ ce52c333

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since ce52c333 was 28a5ebd, checked in by Martin Decky <martin@…>, 5 years ago

Use char32_t instead of wchat_t to represent UTF-32 strings

The intention of the native HelenOS string API has been always to
support Unicode in the UTF-8 and UTF-32 encodings as the sole character
representations and ignore the obsolete mess of older single-byte and
multibyte character encodings. Before C11, the wchar_t type has been
slightly misused for the purpose of the UTF-32 strings. The newer
char32_t type is obviously a much more suitable option. The standard
defines char32_t as uint_least32_t, thus we can take the liberty to fix
it to uint32_t.

To maintain compatilibity with the C Standard, the putwchar(wchar_t)
functions has been replaced by our custom putuchar(char32_t) functions
where appropriate.

Property mode set to 100644

File size: 21.2 KB

Rev	Line
[16da5f8e]	1	/*
	2	* Copyright (c) 2001-2004 Jakub Jermar
[d066259]	3	* Copyright (c) 2005 Martin Decky
	4	* Copyright (c) 2008 Jiri Svoboda
	5	* Copyright (c) 2011 Martin Sucha
	6	* Copyright (c) 2011 Oleg Romanenko
[16da5f8e]	7	* All rights reserved.
	8	*
	9	* Redistribution and use in source and binary forms, with or without
	10	* modification, are permitted provided that the following conditions
	11	* are met:
	12	*
	13	* - Redistributions of source code must retain the above copyright
	14	* notice, this list of conditions and the following disclaimer.
	15	* - Redistributions in binary form must reproduce the above copyright
	16	* notice, this list of conditions and the following disclaimer in the
	17	* documentation and/or other materials provided with the distribution.
	18	* - The name of the author may not be used to endorse or promote products
	19	* derived from this software without specific prior written permission.
	20	*
	21	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	22	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	23	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	24	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	25	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	26	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	27	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	28	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	29	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	30	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	31	*/
	32
[174156fd]	33	/** @addtogroup kernel_generic
[16da5f8e]	34	* @{
	35	*/
	36
	37	/**
	38	* @file
[82bb9c1]	39	* @brief String functions.
	40	*
	41	* Strings and characters use the Universal Character Set (UCS). The standard
	42	* strings, called just strings are encoded in UTF-8. Wide strings (encoded
	43	* in UTF-32) are supported to a limited degree. A single character is
[28a5ebd]	44	* represented as char32_t.@n
[82bb9c1]	45	*
[b888d5f]	46	* Overview of the terminology:@n
[82bb9c1]	47	*
[b888d5f]	48	* Term Meaning
	49	* -------------------- ----------------------------------------------------
	50	* byte 8 bits stored in uint8_t (unsigned 8 bit integer)
[82bb9c1]	51	*
[28a5ebd]	52	* character UTF-32 encoded Unicode character, stored in char32_t
	53	* (unsigned 32 bit integer), code points 0 .. 1114111
[b888d5f]	54	* are valid
[82bb9c1]	55	*
[b888d5f]	56	* ASCII character 7 bit encoded ASCII character, stored in char
	57	* (usually signed 8 bit integer), code points 0 .. 127
	58	* are valid
	59	*
	60	* string UTF-8 encoded NULL-terminated Unicode string, char *
	61	*
	62	* wide string UTF-32 encoded NULL-terminated Unicode string,
[28a5ebd]	63	* char32_t *
[b888d5f]	64	*
	65	* [wide] string size number of BYTES in a [wide] string (excluding
	66	* the NULL-terminator), size_t
	67	*
	68	* [wide] string length number of CHARACTERS in a [wide] string (excluding
[98000fb]	69	* the NULL-terminator), size_t
[b888d5f]	70	*
	71	* [wide] string width number of display cells on a monospace display taken
[98000fb]	72	* by a [wide] string, size_t
[b888d5f]	73	*
	74	*
	75	* Overview of string metrics:@n
	76	*
	77	* Metric Abbrev. Type Meaning
	78	* ------ ------ ------ -------------------------------------------------
	79	* size n size_t number of BYTES in a string (excluding the
	80	* NULL-terminator)
	81	*
[98000fb]	82	* length l size_t number of CHARACTERS in a string (excluding the
[b888d5f]	83	* null terminator)
	84	*
[98000fb]	85	* width w size_t number of display cells on a monospace display
[b888d5f]	86	* taken by a string
	87	*
	88	*
	89	* Function naming prefixes:@n
	90	*
	91	* chr_ operate on characters
	92	* ascii_ operate on ASCII characters
	93	* str_ operate on strings
	94	* wstr_ operate on wide strings
	95	*
	96	* [w]str_[n\|l\|w] operate on a prefix limited by size, length
	97	* or width
	98	*
	99	*
	100	* A specific character inside a [wide] string can be referred to by:@n
	101	*
[28a5ebd]	102	* pointer (char , char32_t )
[b888d5f]	103	* byte offset (size_t)
[98000fb]	104	* character index (size_t)
[82bb9c1]	105	*
[16da5f8e]	106	*/
	107
[19f857a]	108	#include <str.h>
[d066259]	109
	110	#include <assert.h>
[d09f84e6]	111	#include <errno.h>
[d066259]	112	#include <stdbool.h>
	113	#include <stddef.h>
	114	#include <stdint.h>
	115	#include <stdlib.h>
	116
[b888d5f]	117	#include <align.h>
[30a5470]	118	#include <macros.h>
[16da5f8e]	119
[b888d5f]	120	/** Byte mask consisting of lowest @n bits (out of 8) */
	121	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
[0dd1d444]	122
[b888d5f]	123	/** Byte mask consisting of lowest @n bits (out of 32) */
	124	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
[32704cb]	125
[b888d5f]	126	/** Byte mask consisting of highest @n bits (out of 8) */
	127	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
[32704cb]	128
[b888d5f]	129	/** Number of data bits in a UTF-8 continuation byte */
	130	#define CONT_BITS 6
[0dd1d444]	131
[b888d5f]	132	/** Decode a single character from a string.
[21a639b7]	133	*
[b888d5f]	134	* Decode a single character from a string of size @a size. Decoding starts
[e1813cf]	135	* at @a offset and this offset is moved to the beginning of the next
	136	* character. In case of decoding error, offset generally advances at least
[b888d5f]	137	* by one. However, offset is never moved beyond size.
[21a639b7]	138	*
[b888d5f]	139	* @param str String (not necessarily NULL-terminated).
	140	* @param offset Byte offset in string where to start decoding.
	141	* @param size Size of the string (in bytes).
	142	*
[c8bf88d]	143	* @return Value of decoded character, U_SPECIAL on decoding error or
[b888d5f]	144	* NULL if attempt to decode beyond @a size.
[21a639b7]	145	*
	146	*/
[28a5ebd]	147	char32_t str_decode(const char str, size_t offset, size_t size)
[21a639b7]	148	{
[b888d5f]	149	if (*offset + 1 > size)
	150	return 0;
[a35b458]	151
[b888d5f]	152	/* First byte read from string */
	153	uint8_t b0 = (uint8_t) str[(*offset)++];
[a35b458]	154
[b888d5f]	155	/* Determine code length */
[a35b458]	156
[b888d5f]	157	unsigned int b0_bits; /* Data bits in first byte */
	158	unsigned int cbytes; /* Number of continuation bytes */
[a35b458]	159
[0dd1d444]	160	if ((b0 & 0x80) == 0) {
	161	/* 0xxxxxxx (Plain ASCII) */
	162	b0_bits = 7;
	163	cbytes = 0;
	164	} else if ((b0 & 0xe0) == 0xc0) {
	165	/* 110xxxxx 10xxxxxx */
	166	b0_bits = 5;
	167	cbytes = 1;
	168	} else if ((b0 & 0xf0) == 0xe0) {
	169	/* 1110xxxx 10xxxxxx 10xxxxxx */
	170	b0_bits = 4;
	171	cbytes = 2;
	172	} else if ((b0 & 0xf8) == 0xf0) {
	173	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
	174	b0_bits = 3;
	175	cbytes = 3;
	176	} else {
[b888d5f]	177	/* 10xxxxxx -- unexpected continuation byte */
[c8bf88d]	178	return U_SPECIAL;
[74c8da2c]	179	}
[a35b458]	180
[b888d5f]	181	if (*offset + cbytes > size)
[c8bf88d]	182	return U_SPECIAL;
[a35b458]	183
[28a5ebd]	184	char32_t ch = b0 & LO_MASK_8(b0_bits);
[a35b458]	185
[b888d5f]	186	/* Decode continuation bytes */
[0dd1d444]	187	while (cbytes > 0) {
[b888d5f]	188	uint8_t b = (uint8_t) str[(*offset)++];
[a35b458]	189
[b888d5f]	190	/* Must be 10xxxxxx */
	191	if ((b & 0xc0) != 0x80)
[c8bf88d]	192	return U_SPECIAL;
[a35b458]	193
[b888d5f]	194	/* Shift data bits to ch */
[28a5ebd]	195	ch = (ch << CONT_BITS) \| (char32_t) (b & LO_MASK_8(CONT_BITS));
[b888d5f]	196	cbytes--;
[74c8da2c]	197	}
[a35b458]	198
[0dd1d444]	199	return ch;
[74c8da2c]	200	}
	201
[e1813cf]	202	/** Encode a single character to string representation.
[74c8da2c]	203	*
[e1813cf]	204	* Encode a single character to string representation (i.e. UTF-8) and store
	205	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
	206	* is moved to the position where the next character can be written to.
[74c8da2c]	207	*
[b888d5f]	208	* @param ch Input character.
	209	* @param str Output buffer.
	210	* @param offset Byte offset where to start writing.
	211	* @param size Size of the output buffer (in bytes).
[74c8da2c]	212	*
[d09f84e6]	213	* @return EOK if the character was encoded successfully, EOVERFLOW if there
[8e893ae]	214	* was not enough space in the output buffer or EINVAL if the character
	215	* code was invalid.
[74c8da2c]	216	*/
[28a5ebd]	217	errno_t chr_encode(const char32_t ch, char str, size_t offset, size_t size)
[74c8da2c]	218	{
[b888d5f]	219	if (*offset >= size)
[d09f84e6]	220	return EOVERFLOW;
[a35b458]	221
[b888d5f]	222	if (!chr_check(ch))
[d09f84e6]	223	return EINVAL;
[a35b458]	224
[7c3fb9b]	225	/*
	226	* Unsigned version of ch (bit operations should only be done
	227	* on unsigned types).
	228	*/
[b888d5f]	229	uint32_t cc = (uint32_t) ch;
[a35b458]	230
[b888d5f]	231	/* Determine how many continuation bytes are needed */
[a35b458]	232
[b888d5f]	233	unsigned int b0_bits; /* Data bits in first byte */
	234	unsigned int cbytes; /* Number of continuation bytes */
[a35b458]	235
[32704cb]	236	if ((cc & ~LO_MASK_32(7)) == 0) {
	237	b0_bits = 7;
	238	cbytes = 0;
	239	} else if ((cc & ~LO_MASK_32(11)) == 0) {
	240	b0_bits = 5;
	241	cbytes = 1;
	242	} else if ((cc & ~LO_MASK_32(16)) == 0) {
	243	b0_bits = 4;
	244	cbytes = 2;
	245	} else if ((cc & ~LO_MASK_32(21)) == 0) {
	246	b0_bits = 3;
	247	cbytes = 3;
	248	} else {
[b888d5f]	249	/* Codes longer than 21 bits are not supported */
[d09f84e6]	250	return EINVAL;
[74c8da2c]	251	}
[a35b458]	252
[b888d5f]	253	/* Check for available space in buffer */
	254	if (*offset + cbytes >= size)
[d09f84e6]	255	return EOVERFLOW;
[a35b458]	256
[b888d5f]	257	/* Encode continuation bytes */
	258	unsigned int i;
	259	for (i = cbytes; i > 0; i--) {
[e1813cf]	260	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
[32704cb]	261	cc = cc >> CONT_BITS;
[74c8da2c]	262	}
[a35b458]	263
[b888d5f]	264	/* Encode first byte */
[e1813cf]	265	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
[a35b458]	266
[b888d5f]	267	/* Advance offset */
	268	*offset += cbytes + 1;
[a35b458]	269
[d09f84e6]	270	return EOK;
[74c8da2c]	271	}
	272
[b888d5f]	273	/** Get size of string.
	274	*
	275	* Get the number of bytes which are used by the string @a str (excluding the
	276	* NULL-terminator).
	277	*
	278	* @param str String to consider.
	279	*
	280	* @return Number of bytes used by the string
[82bb9c1]	281	*
	282	*/
[b888d5f]	283	size_t str_size(const char *str)
[82bb9c1]	284	{
[b888d5f]	285	size_t size = 0;
[a35b458]	286
[b888d5f]	287	while (*str++ != 0)
	288	size++;
[a35b458]	289
[b888d5f]	290	return size;
[82bb9c1]	291	}
	292
[b888d5f]	293	/** Get size of wide string.
	294	*
	295	* Get the number of bytes which are used by the wide string @a str (excluding the
	296	* NULL-terminator).
	297	*
	298	* @param str Wide string to consider.
	299	*
	300	* @return Number of bytes used by the wide string
	301	*
	302	*/
[28a5ebd]	303	size_t wstr_size(const char32_t *str)
[b888d5f]	304	{
[28a5ebd]	305	return (wstr_length(str) * sizeof(char32_t));
[b888d5f]	306	}
	307
	308	/** Get size of string with length limit.
[74c8da2c]	309	*
[f25b2819]	310	* Get the number of bytes which are used by up to @a max_len first
	311	* characters in the string @a str. If @a max_len is greater than
[b888d5f]	312	* the length of @a str, the entire string is measured (excluding the
	313	* NULL-terminator).
	314	*
	315	* @param str String to consider.
	316	* @param max_len Maximum number of characters to measure.
[74c8da2c]	317	*
[b888d5f]	318	* @return Number of bytes used by the characters.
[74c8da2c]	319	*
	320	*/
[98000fb]	321	size_t str_lsize(const char *str, size_t max_len)
[74c8da2c]	322	{
[98000fb]	323	size_t len = 0;
[b888d5f]	324	size_t offset = 0;
[a35b458]	325
[b888d5f]	326	while (len < max_len) {
	327	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
[b54d2f1]	328	break;
[a35b458]	329
[f25b2819]	330	len++;
[21a639b7]	331	}
[a35b458]	332
[b888d5f]	333	return offset;
[74c8da2c]	334	}
	335
[b888d5f]	336	/** Get size of wide string with length limit.
[82bb9c1]	337	*
[b888d5f]	338	* Get the number of bytes which are used by up to @a max_len first
	339	* wide characters in the wide string @a str. If @a max_len is greater than
	340	* the length of @a str, the entire wide string is measured (excluding the
	341	* NULL-terminator).
	342	*
	343	* @param str Wide string to consider.
	344	* @param max_len Maximum number of wide characters to measure.
[82bb9c1]	345	*
[b888d5f]	346	* @return Number of bytes used by the wide characters.
[82bb9c1]	347	*
	348	*/
[28a5ebd]	349	size_t wstr_lsize(const char32_t *str, size_t max_len)
[82bb9c1]	350	{
[28a5ebd]	351	return (wstr_nlength(str, max_len * sizeof(char32_t)) * sizeof(char32_t));
[82bb9c1]	352	}
	353
[b888d5f]	354	/** Get number of characters in a string.
[82bb9c1]	355	*
[b888d5f]	356	* @param str NULL-terminated string.
[82bb9c1]	357	*
[b888d5f]	358	* @return Number of characters in string.
[82bb9c1]	359	*
	360	*/
[98000fb]	361	size_t str_length(const char *str)
[82bb9c1]	362	{
[98000fb]	363	size_t len = 0;
[b888d5f]	364	size_t offset = 0;
[a35b458]	365
[b888d5f]	366	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
	367	len++;
[a35b458]	368
[b888d5f]	369	return len;
[82bb9c1]	370	}
	371
[b888d5f]	372	/** Get number of characters in a wide string.
[74c8da2c]	373	*
[b888d5f]	374	* @param str NULL-terminated wide string.
	375	*
	376	* @return Number of characters in @a str.
[74c8da2c]	377	*
	378	*/
[28a5ebd]	379	size_t wstr_length(const char32_t *wstr)
[74c8da2c]	380	{
[98000fb]	381	size_t len = 0;
[a35b458]	382
[b888d5f]	383	while (*wstr++ != 0)
	384	len++;
[a35b458]	385
[b888d5f]	386	return len;
[74c8da2c]	387	}
	388
[b888d5f]	389	/** Get number of characters in a string with size limit.
	390	*
	391	* @param str NULL-terminated string.
	392	* @param size Maximum number of bytes to consider.
	393	*
	394	* @return Number of characters in string.
[74c8da2c]	395	*
	396	*/
[98000fb]	397	size_t str_nlength(const char *str, size_t size)
[74c8da2c]	398	{
[98000fb]	399	size_t len = 0;
[b888d5f]	400	size_t offset = 0;
[a35b458]	401
[b888d5f]	402	while (str_decode(str, &offset, size) != 0)
	403	len++;
[a35b458]	404
[b888d5f]	405	return len;
[21a639b7]	406	}
	407
[b888d5f]	408	/** Get number of characters in a string with size limit.
[2f57690]	409	*
[b888d5f]	410	* @param str NULL-terminated string.
	411	* @param size Maximum number of bytes to consider.
[74c8da2c]	412	*
[f25b2819]	413	* @return Number of characters in string.
[b888d5f]	414	*
[74c8da2c]	415	*/
[28a5ebd]	416	size_t wstr_nlength(const char32_t *str, size_t size)
[74c8da2c]	417	{
[98000fb]	418	size_t len = 0;
[28a5ebd]	419	size_t limit = ALIGN_DOWN(size, sizeof(char32_t));
[98000fb]	420	size_t offset = 0;
[a35b458]	421
[b888d5f]	422	while ((offset < limit) && (*str++ != 0)) {
[f25b2819]	423	len++;
[28a5ebd]	424	offset += sizeof(char32_t);
[74c8da2c]	425	}
[a35b458]	426
[f25b2819]	427	return len;
[74c8da2c]	428	}
	429
[b888d5f]	430	/** Check whether character is plain ASCII.
	431	*
	432	* @return True if character is plain ASCII.
[74c8da2c]	433	*
	434	*/
[28a5ebd]	435	bool ascii_check(char32_t ch)
[74c8da2c]	436	{
[28a5ebd]	437	if (ch <= 127)
[b888d5f]	438	return true;
[a35b458]	439
[b888d5f]	440	return false;
	441	}
[f25b2819]	442
[b888d5f]	443	/** Check whether character is valid
	444	*
	445	* @return True if character is a valid Unicode code point.
	446	*
	447	*/
[28a5ebd]	448	bool chr_check(char32_t ch)
[b888d5f]	449	{
[28a5ebd]	450	if (ch <= 1114111)
[b888d5f]	451	return true;
[a35b458]	452
[b888d5f]	453	return false;
[16da5f8e]	454	}
	455
[b888d5f]	456	/** Compare two NULL terminated strings.
[16da5f8e]	457	*
[b888d5f]	458	* Do a char-by-char comparison of two NULL-terminated strings.
[4efeab5]	459	* The strings are considered equal iff their length is equal
	460	* and both strings consist of the same sequence of characters.
	461	*
[1772e6d]	462	* A string S1 is less than another string S2 if it has a character with
	463	* lower value at the first character position where the strings differ.
	464	* If the strings differ in length, the shorter one is treated as if
	465	* padded by characters with a value of zero.
[16da5f8e]	466	*
[b888d5f]	467	* @param s1 First string to compare.
	468	* @param s2 Second string to compare.
[16da5f8e]	469	*
[1772e6d]	470	* @return 0 if the strings are equal, -1 if the first is less than the second,
	471	* 1 if the second is less than the first.
[16da5f8e]	472	*
	473	*/
[b888d5f]	474	int str_cmp(const char s1, const char s2)
[16da5f8e]	475	{
[28a5ebd]	476	char32_t c1 = 0;
	477	char32_t c2 = 0;
[a35b458]	478
[b888d5f]	479	size_t off1 = 0;
	480	size_t off2 = 0;
[a7b1071]	481
	482	while (true) {
	483	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
	484	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
	485
[b888d5f]	486	if (c1 < c2)
[16da5f8e]	487	return -1;
[a35b458]	488
[b888d5f]	489	if (c1 > c2)
[16da5f8e]	490	return 1;
[a7b1071]	491
	492	if (c1 == 0 \|\| c2 == 0)
[1b20da0]	493	break;
[16da5f8e]	494	}
[a7b1071]	495
	496	return 0;
[16da5f8e]	497	}
	498
[b888d5f]	499	/** Compare two NULL terminated strings with length limit.
[16da5f8e]	500	*
[b888d5f]	501	* Do a char-by-char comparison of two NULL-terminated strings.
[4efeab5]	502	* The strings are considered equal iff
	503	* min(str_length(s1), max_len) == min(str_length(s2), max_len)
	504	* and both strings consist of the same sequence of characters,
	505	* up to max_len characters.
	506	*
[1772e6d]	507	* A string S1 is less than another string S2 if it has a character with
	508	* lower value at the first character position where the strings differ.
	509	* If the strings differ in length, the shorter one is treated as if
	510	* padded by characters with a value of zero. Only the first max_len
	511	* characters are considered.
[16da5f8e]	512	*
[b888d5f]	513	* @param s1 First string to compare.
	514	* @param s2 Second string to compare.
	515	* @param max_len Maximum number of characters to consider.
	516	*
[1772e6d]	517	* @return 0 if the strings are equal, -1 if the first is less than the second,
	518	* 1 if the second is less than the first.
[16da5f8e]	519	*
	520	*/
[98000fb]	521	int str_lcmp(const char s1, const char s2, size_t max_len)
[16da5f8e]	522	{
[28a5ebd]	523	char32_t c1 = 0;
	524	char32_t c2 = 0;
[a35b458]	525
[b888d5f]	526	size_t off1 = 0;
	527	size_t off2 = 0;
[a35b458]	528
[98000fb]	529	size_t len = 0;
[a7b1071]	530
	531	while (true) {
	532	if (len >= max_len)
[b888d5f]	533	break;
[a7b1071]	534
	535	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
	536	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
	537
[b888d5f]	538	if (c1 < c2)
[16da5f8e]	539	return -1;
[a7b1071]	540
[b888d5f]	541	if (c1 > c2)
[16da5f8e]	542	return 1;
[a7b1071]	543
	544	if (c1 == 0 \|\| c2 == 0)
	545	break;
	546
[1b20da0]	547	++len;
[16da5f8e]	548	}
[a7b1071]	549
	550	return 0;
	551
[16da5f8e]	552	}
	553
[f4b1535]	554	/** Copy string.
[b888d5f]	555	*
[f4b1535]	556	* Copy source string @a src to destination buffer @a dest.
	557	* No more than @a size bytes are written. If the size of the output buffer
	558	* is at least one byte, the output string will always be well-formed, i.e.
	559	* null-terminated and containing only complete characters.
[b888d5f]	560	*
[abf09311]	561	* @param dest Destination buffer.
[6700ee2]	562	* @param count Size of the destination buffer (must be > 0).
[f4b1535]	563	* @param src Source string.
[abf09311]	564	*
[b888d5f]	565	*/
[f4b1535]	566	void str_cpy(char dest, size_t size, const char src)
[b888d5f]	567	{
[6700ee2]	568	/* There must be space for a null terminator in the buffer. */
[63e27ef]	569	assert(size > 0);
	570	assert(src != NULL);
[a35b458]	571
[abf09311]	572	size_t src_off = 0;
	573	size_t dest_off = 0;
[a35b458]	574
[28a5ebd]	575	char32_t ch;
[f4b1535]	576	while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
	577	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
	578	break;
	579	}
[a35b458]	580
[f4b1535]	581	dest[dest_off] = '\0';
	582	}
	583
	584	/** Copy size-limited substring.
	585	*
[6700ee2]	586	* Copy prefix of string @a src of max. size @a size to destination buffer
	587	* @a dest. No more than @a size bytes are written. The output string will
	588	* always be well-formed, i.e. null-terminated and containing only complete
	589	* characters.
[f4b1535]	590	*
	591	* No more than @a n bytes are read from the input string, so it does not
	592	* have to be null-terminated.
	593	*
[abf09311]	594	* @param dest Destination buffer.
[6700ee2]	595	* @param count Size of the destination buffer (must be > 0).
[f4b1535]	596	* @param src Source string.
[abf09311]	597	* @param n Maximum number of bytes to read from @a src.
	598	*
[f4b1535]	599	*/
	600	void str_ncpy(char dest, size_t size, const char src, size_t n)
	601	{
[6700ee2]	602	/* There must be space for a null terminator in the buffer. */
[63e27ef]	603	assert(size > 0);
[a35b458]	604
[abf09311]	605	size_t src_off = 0;
	606	size_t dest_off = 0;
[a35b458]	607
[28a5ebd]	608	char32_t ch;
[f4b1535]	609	while ((ch = str_decode(src, &src_off, n)) != 0) {
	610	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
[b888d5f]	611	break;
	612	}
[a35b458]	613
[f4b1535]	614	dest[dest_off] = '\0';
[b888d5f]	615	}
[16da5f8e]	616
[0f06dbc]	617	/** Convert wide string to string.
[b888d5f]	618	*
[0f06dbc]	619	* Convert wide string @a src to string. The output is written to the buffer
	620	* specified by @a dest and @a size. @a size must be non-zero and the string
	621	* written will always be well-formed.
[16da5f8e]	622	*
[28a5ebd]	623	* @param dest Destination buffer.
	624	* @param size Size of the destination buffer.
	625	* @param src Source wide string.
[16da5f8e]	626	*/
[28a5ebd]	627	void wstr_to_str(char dest, size_t size, const char32_t src)
[16da5f8e]	628	{
[28a5ebd]	629	char32_t ch;
[0f06dbc]	630	size_t src_idx;
	631	size_t dest_off;
	632
	633	/* There must be space for a null terminator in the buffer. */
[63e27ef]	634	assert(size > 0);
[0f06dbc]	635
	636	src_idx = 0;
	637	dest_off = 0;
[a35b458]	638
[b888d5f]	639	while ((ch = src[src_idx++]) != 0) {
[0f06dbc]	640	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
[b888d5f]	641	break;
[16da5f8e]	642	}
[0f06dbc]	643
	644	dest[dest_off] = '\0';
[16da5f8e]	645	}
	646
[20f1597]	647	/** Find first occurence of character in string.
	648	*
[b888d5f]	649	* @param str String to search.
	650	* @param ch Character to look for.
	651	*
	652	* @return Pointer to character in @a str or NULL if not found.
[20f1597]	653	*/
[28a5ebd]	654	char str_chr(const char str, char32_t ch)
[20f1597]	655	{
[28a5ebd]	656	char32_t acc;
[b888d5f]	657	size_t off = 0;
[f2d2c7ba]	658	size_t last = 0;
[a35b458]	659
[a7b1071]	660	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
[b888d5f]	661	if (acc == ch)
[dd2cfa7]	662	return (char *) (str + last);
[f2d2c7ba]	663	last = off;
[20f1597]	664	}
[a35b458]	665
[20f1597]	666	return NULL;
	667	}
	668
[b888d5f]	669	/** Insert a wide character into a wide string.
	670	*
	671	* Insert a wide character into a wide string at position
	672	* @a pos. The characters after the position are shifted.
	673	*
	674	* @param str String to insert to.
	675	* @param ch Character to insert to.
	676	* @param pos Character index where to insert.
[7c3fb9b]	677	* @param max_pos Characters in the buffer.
[b888d5f]	678	*
	679	* @return True if the insertion was sucessful, false if the position
	680	* is out of bounds.
	681	*
	682	*/
[28a5ebd]	683	bool wstr_linsert(char32_t *str, char32_t ch, size_t pos, size_t max_pos)
[b888d5f]	684	{
[98000fb]	685	size_t len = wstr_length(str);
[a35b458]	686
[b888d5f]	687	if ((pos > len) \|\| (pos + 1 > max_pos))
	688	return false;
[a35b458]	689
[98000fb]	690	size_t i;
[b888d5f]	691	for (i = len; i + 1 > pos; i--)
	692	str[i + 1] = str[i];
[a35b458]	693
[b888d5f]	694	str[pos] = ch;
[a35b458]	695
[b888d5f]	696	return true;
	697	}
	698
	699	/** Remove a wide character from a wide string.
	700	*
	701	* Remove a wide character from a wide string at position
	702	* @a pos. The characters after the position are shifted.
	703	*
	704	* @param str String to remove from.
	705	* @param pos Character index to remove.
	706	*
	707	* @return True if the removal was sucessful, false if the position
	708	* is out of bounds.
	709	*
	710	*/
[28a5ebd]	711	bool wstr_remove(char32_t *str, size_t pos)
[b888d5f]	712	{
[98000fb]	713	size_t len = wstr_length(str);
[a35b458]	714
[b888d5f]	715	if (pos >= len)
	716	return false;
[a35b458]	717
[98000fb]	718	size_t i;
[b888d5f]	719	for (i = pos + 1; i <= len; i++)
	720	str[i - 1] = str[i];
[a35b458]	721
[b888d5f]	722	return true;
	723	}
	724
[d066259]	725	/** Duplicate string.
	726	*
	727	* Allocate a new string and copy characters from the source
	728	* string into it. The duplicate string is allocated via sleeping
	729	* malloc(), thus this function can sleep in no memory conditions.
	730	*
	731	* The allocation cannot fail and the return value is always
	732	* a valid pointer. The duplicate string is always a well-formed
	733	* null-terminated UTF-8 string, but it can differ from the source
	734	* string on the byte level.
	735	*
	736	* @param src Source string.
	737	*
	738	* @return Duplicate string.
	739	*
	740	*/
	741	char str_dup(const char src)
	742	{
	743	size_t size = str_size(src) + 1;
	744	char *dest = malloc(size);
	745	if (!dest)
	746	return NULL;
	747
	748	str_cpy(dest, size, src);
	749	return dest;
	750	}
	751
	752	/** Duplicate string with size limit.
	753	*
	754	* Allocate a new string and copy up to @max_size bytes from the source
	755	* string into it. The duplicate string is allocated via sleeping
	756	* malloc(), thus this function can sleep in no memory conditions.
	757	* No more than @max_size + 1 bytes is allocated, but if the size
	758	* occupied by the source string is smaller than @max_size + 1,
	759	* less is allocated.
	760	*
	761	* The allocation cannot fail and the return value is always
	762	* a valid pointer. The duplicate string is always a well-formed
	763	* null-terminated UTF-8 string, but it can differ from the source
	764	* string on the byte level.
	765	*
	766	* @param src Source string.
	767	* @param n Maximum number of bytes to duplicate.
	768	*
	769	* @return Duplicate string.
	770	*
	771	*/
	772	char str_ndup(const char src, size_t n)
	773	{
	774	size_t size = str_size(src);
	775	if (size > n)
	776	size = n;
	777
	778	char *dest = malloc(size + 1);
	779	if (!dest)
	780	return NULL;
	781
	782	str_ncpy(dest, size + 1, src, size);
	783	return dest;
	784	}
	785
[e535eeb]	786	void order_suffix(const uint64_t val, uint64_t rv, char suffix)
	787	{
[933cadf]	788	if (val > UINT64_C(10000000000000000000)) {
	789	*rv = val / UINT64_C(1000000000000000000);
[e535eeb]	790	*suffix = 'Z';
[933cadf]	791	} else if (val > UINT64_C(1000000000000000000)) {
	792	*rv = val / UINT64_C(1000000000000000);
[e535eeb]	793	*suffix = 'E';
[933cadf]	794	} else if (val > UINT64_C(1000000000000000)) {
	795	*rv = val / UINT64_C(1000000000000);
[e535eeb]	796	*suffix = 'T';
[933cadf]	797	} else if (val > UINT64_C(1000000000000)) {
	798	*rv = val / UINT64_C(1000000000);
[e535eeb]	799	*suffix = 'G';
[933cadf]	800	} else if (val > UINT64_C(1000000000)) {
	801	*rv = val / UINT64_C(1000000);
[e535eeb]	802	*suffix = 'M';
[933cadf]	803	} else if (val > UINT64_C(1000000)) {
	804	*rv = val / UINT64_C(1000);
[e535eeb]	805	*suffix = 'k';
	806	} else {
	807	*rv = val;
	808	*suffix = ' ';
	809	}
	810	}
	811
[933cadf]	812	void bin_order_suffix(const uint64_t val, uint64_t rv, const char *suffix,
	813	bool fixed)
	814	{
	815	if (val > UINT64_C(1152921504606846976)) {
	816	*rv = val / UINT64_C(1125899906842624);
	817	*suffix = "EiB";
	818	} else if (val > UINT64_C(1125899906842624)) {
	819	*rv = val / UINT64_C(1099511627776);
	820	*suffix = "TiB";
	821	} else if (val > UINT64_C(1099511627776)) {
	822	*rv = val / UINT64_C(1073741824);
	823	*suffix = "GiB";
	824	} else if (val > UINT64_C(1073741824)) {
	825	*rv = val / UINT64_C(1048576);
	826	*suffix = "MiB";
	827	} else if (val > UINT64_C(1048576)) {
	828	*rv = val / UINT64_C(1024);
	829	*suffix = "KiB";
	830	} else {
	831	*rv = val;
	832	if (fixed)
	833	*suffix = "B ";
	834	else
	835	*suffix = "B";
	836	}
	837	}
	838
[16da5f8e]	839	/** @}
	840	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/kernel/generic/src/lib/str.c@ ce52c333

Download in other formats: