Context Navigation

str.c@ 61e29a4d

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 61e29a4d was 61e29a4d, checked in by Oleg Romanenko <romanenko.oleg@…>, 14 years ago

Modifications in str.c

Add function wstr_is_ascii
Add return value (error code) to functions: wstr_to_str and str_to_wstr

Property mode set to 100644

File size: 29.2 KB

Rev	Line
[936351c1]	1	/*
[df4ed85]	2	* Copyright (c) 2005 Martin Decky
[576845ec]	3	* Copyright (c) 2008 Jiri Svoboda
[936351c1]	4	* All rights reserved.
	5	*
	6	* Redistribution and use in source and binary forms, with or without
	7	* modification, are permitted provided that the following conditions
	8	* are met:
	9	*
	10	* - Redistributions of source code must retain the above copyright
	11	* notice, this list of conditions and the following disclaimer.
	12	* - Redistributions in binary form must reproduce the above copyright
	13	* notice, this list of conditions and the following disclaimer in the
	14	* documentation and/or other materials provided with the distribution.
	15	* - The name of the author may not be used to endorse or promote products
	16	* derived from this software without specific prior written permission.
	17	*
	18	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	19	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	20	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	21	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	22	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	23	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	24	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	25	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	26	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	27	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	28	*/
	29
[a46da63]	30	/** @addtogroup libc
[b2951e2]	31	* @{
	32	*/
	33	/** @file
	34	*/
	35
[19f857a]	36	#include <str.h>
[e64c4b2]	37	#include <stdlib.h>
[6700ee2]	38	#include <assert.h>
[9539be6]	39	#include <stdint.h>
[e64c4b2]	40	#include <ctype.h>
[566987b0]	41	#include <malloc.h>
[171f9a1]	42	#include <errno.h>
[f2b8cdc]	43	#include <align.h>
[095003a8]	44	#include <mem.h>
[19f857a]	45	#include <str.h>
[171f9a1]	46
	47	/** Byte mask consisting of lowest @n bits (out of 8) */
	48	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
	49
	50	/** Byte mask consisting of lowest @n bits (out of 32) */
	51	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
	52
	53	/** Byte mask consisting of highest @n bits (out of 8) */
	54	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
	55
	56	/** Number of data bits in a UTF-8 continuation byte */
	57	#define CONT_BITS 6
	58
	59	/** Decode a single character from a string.
	60	*
	61	* Decode a single character from a string of size @a size. Decoding starts
	62	* at @a offset and this offset is moved to the beginning of the next
	63	* character. In case of decoding error, offset generally advances at least
	64	* by one. However, offset is never moved beyond size.
	65	*
	66	* @param str String (not necessarily NULL-terminated).
	67	* @param offset Byte offset in string where to start decoding.
	68	* @param size Size of the string (in bytes).
	69	*
	70	* @return Value of decoded character, U_SPECIAL on decoding error or
	71	* NULL if attempt to decode beyond @a size.
	72	*
	73	*/
	74	wchar_t str_decode(const char str, size_t offset, size_t size)
	75	{
	76	if (*offset + 1 > size)
	77	return 0;
	78
	79	/* First byte read from string */
	80	uint8_t b0 = (uint8_t) str[(*offset)++];
	81
	82	/* Determine code length */
	83
	84	unsigned int b0_bits; /* Data bits in first byte */
	85	unsigned int cbytes; /* Number of continuation bytes */
	86
	87	if ((b0 & 0x80) == 0) {
	88	/* 0xxxxxxx (Plain ASCII) */
	89	b0_bits = 7;
	90	cbytes = 0;
	91	} else if ((b0 & 0xe0) == 0xc0) {
	92	/* 110xxxxx 10xxxxxx */
	93	b0_bits = 5;
	94	cbytes = 1;
	95	} else if ((b0 & 0xf0) == 0xe0) {
	96	/* 1110xxxx 10xxxxxx 10xxxxxx */
	97	b0_bits = 4;
	98	cbytes = 2;
	99	} else if ((b0 & 0xf8) == 0xf0) {
	100	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
	101	b0_bits = 3;
	102	cbytes = 3;
	103	} else {
	104	/* 10xxxxxx -- unexpected continuation byte */
	105	return U_SPECIAL;
	106	}
	107
	108	if (*offset + cbytes > size)
	109	return U_SPECIAL;
	110
	111	wchar_t ch = b0 & LO_MASK_8(b0_bits);
	112
	113	/* Decode continuation bytes */
	114	while (cbytes > 0) {
	115	uint8_t b = (uint8_t) str[(*offset)++];
	116
	117	/* Must be 10xxxxxx */
	118	if ((b & 0xc0) != 0x80)
	119	return U_SPECIAL;
	120
	121	/* Shift data bits to ch */
	122	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
	123	cbytes--;
	124	}
	125
	126	return ch;
	127	}
	128
	129	/** Encode a single character to string representation.
	130	*
	131	* Encode a single character to string representation (i.e. UTF-8) and store
	132	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
	133	* is moved to the position where the next character can be written to.
	134	*
	135	* @param ch Input character.
	136	* @param str Output buffer.
	137	* @param offset Byte offset where to start writing.
	138	* @param size Size of the output buffer (in bytes).
	139	*
	140	* @return EOK if the character was encoded successfully, EOVERFLOW if there
[d4a3ee5]	141	* was not enough space in the output buffer or EINVAL if the character
	142	* code was invalid.
[171f9a1]	143	*/
	144	int chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
	145	{
	146	if (*offset >= size)
	147	return EOVERFLOW;
	148
	149	if (!chr_check(ch))
	150	return EINVAL;
	151
	152	/* Unsigned version of ch (bit operations should only be done
	153	on unsigned types). */
	154	uint32_t cc = (uint32_t) ch;
	155
	156	/* Determine how many continuation bytes are needed */
	157
	158	unsigned int b0_bits; /* Data bits in first byte */
	159	unsigned int cbytes; /* Number of continuation bytes */
	160
	161	if ((cc & ~LO_MASK_32(7)) == 0) {
	162	b0_bits = 7;
	163	cbytes = 0;
	164	} else if ((cc & ~LO_MASK_32(11)) == 0) {
	165	b0_bits = 5;
	166	cbytes = 1;
	167	} else if ((cc & ~LO_MASK_32(16)) == 0) {
	168	b0_bits = 4;
	169	cbytes = 2;
	170	} else if ((cc & ~LO_MASK_32(21)) == 0) {
	171	b0_bits = 3;
	172	cbytes = 3;
	173	} else {
	174	/* Codes longer than 21 bits are not supported */
	175	return EINVAL;
	176	}
	177
	178	/* Check for available space in buffer */
	179	if (*offset + cbytes >= size)
	180	return EOVERFLOW;
	181
	182	/* Encode continuation bytes */
	183	unsigned int i;
	184	for (i = cbytes; i > 0; i--) {
	185	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
	186	cc = cc >> CONT_BITS;
	187	}
	188
	189	/* Encode first byte */
	190	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
	191
	192	/* Advance offset */
	193	*offset += cbytes + 1;
	194
	195	return EOK;
	196	}
	197
[f2b8cdc]	198	/** Get size of string.
	199	*
	200	* Get the number of bytes which are used by the string @a str (excluding the
	201	* NULL-terminator).
	202	*
	203	* @param str String to consider.
	204	*
	205	* @return Number of bytes used by the string
	206	*
	207	*/
	208	size_t str_size(const char *str)
	209	{
	210	size_t size = 0;
	211
	212	while (*str++ != 0)
	213	size++;
	214
	215	return size;
	216	}
	217
	218	/** Get size of wide string.
	219	*
	220	* Get the number of bytes which are used by the wide string @a str (excluding the
	221	* NULL-terminator).
	222	*
	223	* @param str Wide string to consider.
	224	*
	225	* @return Number of bytes used by the wide string
	226	*
	227	*/
	228	size_t wstr_size(const wchar_t *str)
	229	{
	230	return (wstr_length(str) * sizeof(wchar_t));
	231	}
	232
	233	/** Get size of string with length limit.
	234	*
	235	* Get the number of bytes which are used by up to @a max_len first
	236	* characters in the string @a str. If @a max_len is greater than
	237	* the length of @a str, the entire string is measured (excluding the
	238	* NULL-terminator).
	239	*
	240	* @param str String to consider.
	241	* @param max_len Maximum number of characters to measure.
	242	*
	243	* @return Number of bytes used by the characters.
	244	*
	245	*/
[d4a3ee5]	246	size_t str_lsize(const char *str, size_t max_len)
[f2b8cdc]	247	{
[d4a3ee5]	248	size_t len = 0;
[f2b8cdc]	249	size_t offset = 0;
	250
	251	while (len < max_len) {
	252	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
	253	break;
	254
	255	len++;
	256	}
	257
	258	return offset;
	259	}
	260
	261	/** Get size of wide string with length limit.
	262	*
	263	* Get the number of bytes which are used by up to @a max_len first
	264	* wide characters in the wide string @a str. If @a max_len is greater than
	265	* the length of @a str, the entire wide string is measured (excluding the
	266	* NULL-terminator).
	267	*
	268	* @param str Wide string to consider.
	269	* @param max_len Maximum number of wide characters to measure.
	270	*
	271	* @return Number of bytes used by the wide characters.
	272	*
	273	*/
[d4a3ee5]	274	size_t wstr_lsize(const wchar_t *str, size_t max_len)
[f2b8cdc]	275	{
	276	return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
	277	}
	278
	279	/** Get number of characters in a string.
	280	*
	281	* @param str NULL-terminated string.
	282	*
	283	* @return Number of characters in string.
	284	*
	285	*/
[d4a3ee5]	286	size_t str_length(const char *str)
[f2b8cdc]	287	{
[d4a3ee5]	288	size_t len = 0;
[f2b8cdc]	289	size_t offset = 0;
	290
	291	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
	292	len++;
	293
	294	return len;
	295	}
	296
	297	/** Get number of characters in a wide string.
	298	*
	299	* @param str NULL-terminated wide string.
	300	*
	301	* @return Number of characters in @a str.
	302	*
	303	*/
[d4a3ee5]	304	size_t wstr_length(const wchar_t *wstr)
[f2b8cdc]	305	{
[d4a3ee5]	306	size_t len = 0;
[f2b8cdc]	307
	308	while (*wstr++ != 0)
	309	len++;
	310
	311	return len;
	312	}
	313
	314	/** Get number of characters in a string with size limit.
	315	*
	316	* @param str NULL-terminated string.
	317	* @param size Maximum number of bytes to consider.
	318	*
	319	* @return Number of characters in string.
	320	*
	321	*/
[d4a3ee5]	322	size_t str_nlength(const char *str, size_t size)
[f2b8cdc]	323	{
[d4a3ee5]	324	size_t len = 0;
[f2b8cdc]	325	size_t offset = 0;
	326
	327	while (str_decode(str, &offset, size) != 0)
	328	len++;
	329
	330	return len;
	331	}
	332
	333	/** Get number of characters in a string with size limit.
	334	*
	335	* @param str NULL-terminated string.
	336	* @param size Maximum number of bytes to consider.
	337	*
	338	* @return Number of characters in string.
	339	*
	340	*/
[d4a3ee5]	341	size_t wstr_nlength(const wchar_t *str, size_t size)
[f2b8cdc]	342	{
[d4a3ee5]	343	size_t len = 0;
	344	size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
	345	size_t offset = 0;
[f2b8cdc]	346
	347	while ((offset < limit) && (*str++ != 0)) {
	348	len++;
	349	offset += sizeof(wchar_t);
	350	}
	351
	352	return len;
	353	}
	354
	355	/** Check whether character is plain ASCII.
	356	*
	357	* @return True if character is plain ASCII.
	358	*
	359	*/
	360	bool ascii_check(wchar_t ch)
	361	{
	362	if ((ch >= 0) && (ch <= 127))
	363	return true;
	364
	365	return false;
	366	}
	367
[61e29a4d]	368	/** Check whether wide string is plain ASCII.
	369	*
	370	* @return True if wide string is plain ASCII.
	371	*
	372	*/
	373	bool wstr_is_ascii(const wchar_t *wstr)
	374	{
	375	while (wstr && ascii_check(wstr))
	376	wstr++;
	377	return *wstr == 0;
	378	}
	379
[171f9a1]	380	/** Check whether character is valid
	381	*
	382	* @return True if character is a valid Unicode code point.
	383	*
	384	*/
[f2b8cdc]	385	bool chr_check(wchar_t ch)
[171f9a1]	386	{
	387	if ((ch >= 0) && (ch <= 1114111))
	388	return true;
	389
	390	return false;
	391	}
[936351c1]	392
[f2b8cdc]	393	/** Compare two NULL terminated strings.
	394	*
	395	* Do a char-by-char comparison of two NULL-terminated strings.
	396	* The strings are considered equal iff they consist of the same
	397	* characters on the minimum of their lengths.
	398	*
	399	* @param s1 First string to compare.
	400	* @param s2 Second string to compare.
	401	*
	402	* @return 0 if the strings are equal, -1 if first is smaller,
	403	* 1 if second smaller.
	404	*
	405	*/
	406	int str_cmp(const char s1, const char s2)
	407	{
	408	wchar_t c1 = 0;
	409	wchar_t c2 = 0;
	410
	411	size_t off1 = 0;
	412	size_t off2 = 0;
	413
	414	while (true) {
	415	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
	416	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
	417
	418	if (c1 < c2)
	419	return -1;
	420
	421	if (c1 > c2)
	422	return 1;
	423
	424	if (c1 == 0 \|\| c2 == 0)
	425	break;
	426	}
	427
	428	return 0;
	429	}
	430
	431	/** Compare two NULL terminated strings with length limit.
	432	*
	433	* Do a char-by-char comparison of two NULL-terminated strings.
	434	* The strings are considered equal iff they consist of the same
	435	* characters on the minimum of their lengths and the length limit.
	436	*
	437	* @param s1 First string to compare.
	438	* @param s2 Second string to compare.
	439	* @param max_len Maximum number of characters to consider.
	440	*
	441	* @return 0 if the strings are equal, -1 if first is smaller,
	442	* 1 if second smaller.
	443	*
	444	*/
[d4a3ee5]	445	int str_lcmp(const char s1, const char s2, size_t max_len)
[f2b8cdc]	446	{
	447	wchar_t c1 = 0;
	448	wchar_t c2 = 0;
	449
	450	size_t off1 = 0;
	451	size_t off2 = 0;
	452
[d4a3ee5]	453	size_t len = 0;
[f2b8cdc]	454
	455	while (true) {
	456	if (len >= max_len)
	457	break;
	458
	459	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
	460	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
	461
	462	if (c1 < c2)
	463	return -1;
	464
	465	if (c1 > c2)
	466	return 1;
	467
	468	if (c1 == 0 \|\| c2 == 0)
	469	break;
	470
	471	++len;
	472	}
	473
	474	return 0;
	475
	476	}
	477
[6eb2e96]	478	/** Copy string.
[f2b8cdc]	479	*
[6eb2e96]	480	* Copy source string @a src to destination buffer @a dest.
	481	* No more than @a size bytes are written. If the size of the output buffer
	482	* is at least one byte, the output string will always be well-formed, i.e.
	483	* null-terminated and containing only complete characters.
[f2b8cdc]	484	*
[abf09311]	485	* @param dest Destination buffer.
[6700ee2]	486	* @param count Size of the destination buffer (must be > 0).
[6eb2e96]	487	* @param src Source string.
[f2b8cdc]	488	*/
[6eb2e96]	489	void str_cpy(char dest, size_t size, const char src)
[f2b8cdc]	490	{
[6700ee2]	491	/* There must be space for a null terminator in the buffer. */
	492	assert(size > 0);
[f2b8cdc]	493
[abf09311]	494	size_t src_off = 0;
	495	size_t dest_off = 0;
	496
	497	wchar_t ch;
[6eb2e96]	498	while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
	499	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
	500	break;
	501	}
[abf09311]	502
[6eb2e96]	503	dest[dest_off] = '\0';
	504	}
	505
	506	/** Copy size-limited substring.
	507	*
[6700ee2]	508	* Copy prefix of string @a src of max. size @a size to destination buffer
	509	* @a dest. No more than @a size bytes are written. The output string will
	510	* always be well-formed, i.e. null-terminated and containing only complete
	511	* characters.
[6eb2e96]	512	*
	513	* No more than @a n bytes are read from the input string, so it does not
	514	* have to be null-terminated.
	515	*
[abf09311]	516	* @param dest Destination buffer.
[6700ee2]	517	* @param count Size of the destination buffer (must be > 0).
[6eb2e96]	518	* @param src Source string.
[abf09311]	519	* @param n Maximum number of bytes to read from @a src.
[6eb2e96]	520	*/
	521	void str_ncpy(char dest, size_t size, const char src, size_t n)
	522	{
[6700ee2]	523	/* There must be space for a null terminator in the buffer. */
	524	assert(size > 0);
[f2b8cdc]	525
[abf09311]	526	size_t src_off = 0;
	527	size_t dest_off = 0;
	528
	529	wchar_t ch;
[6eb2e96]	530	while ((ch = str_decode(src, &src_off, n)) != 0) {
	531	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
[f2b8cdc]	532	break;
	533	}
[abf09311]	534
[6eb2e96]	535	dest[dest_off] = '\0';
[f2b8cdc]	536	}
	537
[4482bc7]	538	/** Append one string to another.
	539	*
	540	* Append source string @a src to string in destination buffer @a dest.
	541	* Size of the destination buffer is @a dest. If the size of the output buffer
	542	* is at least one byte, the output string will always be well-formed, i.e.
	543	* null-terminated and containing only complete characters.
	544	*
[0f06dbc]	545	* @param dest Destination buffer.
[4482bc7]	546	* @param count Size of the destination buffer.
	547	* @param src Source string.
	548	*/
	549	void str_append(char dest, size_t size, const char src)
	550	{
	551	size_t dstr_size;
	552
	553	dstr_size = str_size(dest);
	554	str_cpy(dest + dstr_size, size - dstr_size, src);
	555	}
	556
[0f06dbc]	557	/** Convert wide string to string.
[f2b8cdc]	558	*
[0f06dbc]	559	* Convert wide string @a src to string. The output is written to the buffer
	560	* specified by @a dest and @a size. @a size must be non-zero and the string
	561	* written will always be well-formed.
[f2b8cdc]	562	*
[0f06dbc]	563	* @param dest Destination buffer.
	564	* @param size Size of the destination buffer.
	565	* @param src Source wide string.
[61e29a4d]	566	*
	567	* @return EOK, if success, negative otherwise.
[f2b8cdc]	568	*/
[61e29a4d]	569	int wstr_to_str(char dest, size_t size, const wchar_t src)
[f2b8cdc]	570	{
[61e29a4d]	571	int rc;
[f2b8cdc]	572	wchar_t ch;
[0f06dbc]	573	size_t src_idx;
	574	size_t dest_off;
	575
	576	/* There must be space for a null terminator in the buffer. */
	577	assert(size > 0);
[f2b8cdc]	578
[0f06dbc]	579	src_idx = 0;
	580	dest_off = 0;
	581
[f2b8cdc]	582	while ((ch = src[src_idx++]) != 0) {
[61e29a4d]	583	rc = chr_encode(ch, dest, &dest_off, size - 1);
	584	if (rc != EOK)
[f2b8cdc]	585	break;
	586	}
[0f06dbc]	587
	588	dest[dest_off] = '\0';
[61e29a4d]	589	return rc;
[f2b8cdc]	590	}
	591
[b67c7d64]	592	/** Convert wide string to new string.
	593	*
	594	* Convert wide string @a src to string. Space for the new string is allocated
	595	* on the heap.
	596	*
	597	* @param src Source wide string.
	598	* @return New string.
	599	*/
	600	char wstr_to_astr(const wchar_t src)
	601	{
	602	char dbuf[STR_BOUNDS(1)];
	603	char *str;
	604	wchar_t ch;
	605
	606	size_t src_idx;
	607	size_t dest_off;
	608	size_t dest_size;
	609
	610	/* Compute size of encoded string. */
	611
	612	src_idx = 0;
	613	dest_size = 0;
	614
	615	while ((ch = src[src_idx++]) != 0) {
	616	dest_off = 0;
	617	if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
	618	break;
	619	dest_size += dest_off;
	620	}
	621
	622	str = malloc(dest_size + 1);
	623	if (str == NULL)
	624	return NULL;
	625
	626	/* Encode string. */
	627
	628	src_idx = 0;
	629	dest_off = 0;
	630
	631	while ((ch = src[src_idx++]) != 0) {
	632	if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
	633	break;
	634	}
	635
	636	str[dest_size] = '\0';
	637	return str;
	638	}
	639
	640
[da2bd08]	641	/** Convert string to wide string.
	642	*
	643	* Convert string @a src to wide string. The output is written to the
[0f06dbc]	644	* buffer specified by @a dest and @a dlen. @a dlen must be non-zero
	645	* and the wide string written will always be null-terminated.
[da2bd08]	646	*
	647	* @param dest Destination buffer.
	648	* @param dlen Length of destination buffer (number of wchars).
	649	* @param src Source string.
[61e29a4d]	650	*
	651	* @return EOK, if success, negative otherwise.
[da2bd08]	652	*/
[61e29a4d]	653	int str_to_wstr(wchar_t dest, size_t dlen, const char src)
[da2bd08]	654	{
[61e29a4d]	655	int rc=EOK;
[da2bd08]	656	size_t offset;
	657	size_t di;
	658	wchar_t c;
	659
	660	assert(dlen > 0);
	661
	662	offset = 0;
	663	di = 0;
	664
	665	do {
[61e29a4d]	666	if (di >= dlen - 1) {
	667	rc = EOVERFLOW;
[da2bd08]	668	break;
[61e29a4d]	669	}
[da2bd08]	670
	671	c = str_decode(src, &offset, STR_NO_LIMIT);
	672	dest[di++] = c;
	673	} while (c != '\0');
	674
	675	dest[dlen - 1] = '\0';
[61e29a4d]	676	return rc;
[da2bd08]	677	}
	678
[f2b8cdc]	679	/** Find first occurence of character in string.
	680	*
	681	* @param str String to search.
	682	* @param ch Character to look for.
	683	*
	684	* @return Pointer to character in @a str or NULL if not found.
	685	*/
[dd2cfa7]	686	char str_chr(const char str, wchar_t ch)
[f2b8cdc]	687	{
	688	wchar_t acc;
	689	size_t off = 0;
[f2d2c7ba]	690	size_t last = 0;
[f2b8cdc]	691
	692	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
	693	if (acc == ch)
[dd2cfa7]	694	return (char *) (str + last);
[f2d2c7ba]	695	last = off;
[f2b8cdc]	696	}
	697
	698	return NULL;
	699	}
	700
[7afb4a5]	701	/** Find last occurence of character in string.
	702	*
	703	* @param str String to search.
	704	* @param ch Character to look for.
	705	*
	706	* @return Pointer to character in @a str or NULL if not found.
	707	*/
[dd2cfa7]	708	char str_rchr(const char str, wchar_t ch)
[7afb4a5]	709	{
	710	wchar_t acc;
	711	size_t off = 0;
[f2d2c7ba]	712	size_t last = 0;
[d4a3ee5]	713	const char *res = NULL;
[f2d2c7ba]	714
[7afb4a5]	715	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
	716	if (acc == ch)
[f2d2c7ba]	717	res = (str + last);
	718	last = off;
[7afb4a5]	719	}
[f2d2c7ba]	720
[dd2cfa7]	721	return (char *) res;
[7afb4a5]	722	}
	723
[f2b8cdc]	724	/** Insert a wide character into a wide string.
	725	*
	726	* Insert a wide character into a wide string at position
	727	* @a pos. The characters after the position are shifted.
	728	*
	729	* @param str String to insert to.
	730	* @param ch Character to insert to.
	731	* @param pos Character index where to insert.
	732	@ @param max_pos Characters in the buffer.
	733	*
	734	* @return True if the insertion was sucessful, false if the position
	735	* is out of bounds.
	736	*
	737	*/
[d4a3ee5]	738	bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
[f2b8cdc]	739	{
[d4a3ee5]	740	size_t len = wstr_length(str);
[f2b8cdc]	741
	742	if ((pos > len) \|\| (pos + 1 > max_pos))
	743	return false;
	744
[d4a3ee5]	745	size_t i;
[f2b8cdc]	746	for (i = len; i + 1 > pos; i--)
	747	str[i + 1] = str[i];
	748
	749	str[pos] = ch;
	750
	751	return true;
	752	}
	753
	754	/** Remove a wide character from a wide string.
	755	*
	756	* Remove a wide character from a wide string at position
	757	* @a pos. The characters after the position are shifted.
	758	*
	759	* @param str String to remove from.
	760	* @param pos Character index to remove.
	761	*
	762	* @return True if the removal was sucessful, false if the position
	763	* is out of bounds.
	764	*
	765	*/
[d4a3ee5]	766	bool wstr_remove(wchar_t *str, size_t pos)
[f2b8cdc]	767	{
[d4a3ee5]	768	size_t len = wstr_length(str);
[f2b8cdc]	769
	770	if (pos >= len)
	771	return false;
	772
[d4a3ee5]	773	size_t i;
[f2b8cdc]	774	for (i = pos + 1; i <= len; i++)
	775	str[i - 1] = str[i];
	776
	777	return true;
	778	}
	779
[2dd7288]	780	int stricmp(const char a, const char b)
	781	{
	782	int c = 0;
	783
	784	while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
	785	c++;
	786
	787	return (tolower(a[c]) - tolower(b[c]));
	788	}
	789
[672a24d]	790	/** Convert string to a number.
	791	* Core of strtol and strtoul functions.
[838e14e2]	792	*
	793	* @param nptr Pointer to string.
	794	* @param endptr If not NULL, function stores here pointer to the first
	795	* invalid character.
	796	* @param base Zero or number between 2 and 36 inclusive.
	797	* @param sgn It's set to 1 if minus found.
	798	* @return Result of conversion.
[672a24d]	799	*/
[838e14e2]	800	static unsigned long
	801	_strtoul(const char nptr, char endptr, int base, char sgn)
[672a24d]	802	{
	803	unsigned char c;
	804	unsigned long result = 0;
	805	unsigned long a, b;
	806	const char *str = nptr;
	807	const char *tmpptr;
	808
	809	while (isspace(*str))
	810	str++;
	811
	812	if (*str == '-') {
	813	*sgn = 1;
	814	++str;
	815	} else if (*str == '+')
	816	++str;
	817
	818	if (base) {
	819	if ((base == 1) \|\| (base > 36)) {
	820	/* FIXME: set errno to EINVAL */
	821	return 0;
	822	}
[838e14e2]	823	if ((base == 16) && (*str == '0') && ((str[1] == 'x') \|\|
	824	(str[1] == 'X'))) {
[672a24d]	825	str += 2;
	826	}
	827	} else {
	828	base = 10;
	829
	830	if (*str == '0') {
	831	base = 8;
	832	if ((str[1] == 'X') \|\| (str[1] == 'x')) {
	833	base = 16;
	834	str += 2;
	835	}
	836	}
	837	}
	838
	839	tmpptr = str;
	840
	841	while (*str) {
	842	c = *str;
[838e14e2]	843	c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
	844	(c <= '9' ? c - '0' : 0xff)));
[672a24d]	845	if (c > base) {
	846	break;
	847	}
	848
	849	a = (result & 0xff) * base + c;
	850	b = (result >> 8) * base + (a >> 8);
	851
	852	if (b > (ULONG_MAX >> 8)) {
	853	/* overflow */
	854	/* FIXME: errno = ERANGE*/
	855	return ULONG_MAX;
	856	}
	857
	858	result = (b << 8) + (a & 0xff);
	859	++str;
	860	}
	861
	862	if (str == tmpptr) {
[838e14e2]	863	/*
	864	* No number was found => first invalid character is the first
	865	* character of the string.
	866	*/
[672a24d]	867	/* FIXME: set errno to EINVAL */
	868	str = nptr;
	869	result = 0;
	870	}
	871
	872	if (endptr)
[a46da63]	873	endptr = (char ) str;
[672a24d]	874
	875	if (nptr == str) {
	876	/FIXME: errno = EINVAL/
	877	return 0;
	878	}
	879
	880	return result;
	881	}
	882
	883	/** Convert initial part of string to long int according to given base.
[838e14e2]	884	* The number may begin with an arbitrary number of whitespaces followed by
	885	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
	886	* inserted and the number will be taken as hexadecimal one. If the base is 0
	887	* and the number begin with a zero, number will be taken as octal one (as with
	888	* base 8). Otherwise the base 0 is taken as decimal.
	889	*
	890	* @param nptr Pointer to string.
	891	* @param endptr If not NULL, function stores here pointer to the first
	892	* invalid character.
	893	* @param base Zero or number between 2 and 36 inclusive.
	894	* @return Result of conversion.
[672a24d]	895	*/
	896	long int strtol(const char nptr, char *endptr, int base)
	897	{
	898	char sgn = 0;
	899	unsigned long number = 0;
	900
	901	number = _strtoul(nptr, endptr, base, &sgn);
	902
	903	if (number > LONG_MAX) {
[a46da63]	904	if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
[672a24d]	905	/* FIXME: set 0 to errno */
	906	return number;
	907	}
	908	/* FIXME: set ERANGE to errno */
[a46da63]	909	return (sgn ? LONG_MIN : LONG_MAX);
[672a24d]	910	}
	911
[a46da63]	912	return (sgn ? -number : number);
[672a24d]	913	}
	914
[abf09311]	915	/** Duplicate string.
	916	*
	917	* Allocate a new string and copy characters from the source
	918	* string into it. The duplicate string is allocated via sleeping
	919	* malloc(), thus this function can sleep in no memory conditions.
	920	*
	921	* The allocation cannot fail and the return value is always
	922	* a valid pointer. The duplicate string is always a well-formed
	923	* null-terminated UTF-8 string, but it can differ from the source
	924	* string on the byte level.
	925	*
	926	* @param src Source string.
	927	*
	928	* @return Duplicate string.
	929	*
	930	*/
[fc6dd18]	931	char str_dup(const char src)
	932	{
[abf09311]	933	size_t size = str_size(src) + 1;
	934	char dest = (char ) malloc(size);
[fc6dd18]	935	if (dest == NULL)
	936	return (char *) NULL;
	937
[abf09311]	938	str_cpy(dest, size, src);
	939	return dest;
[fc6dd18]	940	}
	941
[abf09311]	942	/** Duplicate string with size limit.
	943	*
	944	* Allocate a new string and copy up to @max_size bytes from the source
	945	* string into it. The duplicate string is allocated via sleeping
	946	* malloc(), thus this function can sleep in no memory conditions.
	947	* No more than @max_size + 1 bytes is allocated, but if the size
	948	* occupied by the source string is smaller than @max_size + 1,
	949	* less is allocated.
	950	*
	951	* The allocation cannot fail and the return value is always
	952	* a valid pointer. The duplicate string is always a well-formed
	953	* null-terminated UTF-8 string, but it can differ from the source
	954	* string on the byte level.
	955	*
	956	* @param src Source string.
	957	* @param n Maximum number of bytes to duplicate.
	958	*
	959	* @return Duplicate string.
	960	*
	961	*/
	962	char str_ndup(const char src, size_t n)
[fc6dd18]	963	{
	964	size_t size = str_size(src);
[abf09311]	965	if (size > n)
	966	size = n;
[fc6dd18]	967
	968	char dest = (char ) malloc(size + 1);
	969	if (dest == NULL)
	970	return (char *) NULL;
	971
[abf09311]	972	str_ncpy(dest, size + 1, src, size);
[fc6dd18]	973	return dest;
	974	}
	975
[672a24d]	976
	977	/** Convert initial part of string to unsigned long according to given base.
[838e14e2]	978	* The number may begin with an arbitrary number of whitespaces followed by
	979	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
	980	* inserted and the number will be taken as hexadecimal one. If the base is 0
	981	* and the number begin with a zero, number will be taken as octal one (as with
	982	* base 8). Otherwise the base 0 is taken as decimal.
	983	*
	984	* @param nptr Pointer to string.
	985	* @param endptr If not NULL, function stores here pointer to the first
	986	* invalid character
	987	* @param base Zero or number between 2 and 36 inclusive.
	988	* @return Result of conversion.
[672a24d]	989	*/
	990	unsigned long strtoul(const char nptr, char *endptr, int base)
	991	{
	992	char sgn = 0;
	993	unsigned long number = 0;
	994
	995	number = _strtoul(nptr, endptr, base, &sgn);
	996
[a46da63]	997	return (sgn ? -number : number);
[672a24d]	998	}
[c594489]	999
[576845ec]	1000	char strtok(char s, const char *delim)
[69df837f]	1001	{
[576845ec]	1002	static char *next;
[69df837f]	1003
[576845ec]	1004	return strtok_r(s, delim, &next);
	1005	}
[69df837f]	1006
[576845ec]	1007	char strtok_r(char s, const char delim, char *next)
	1008	{
	1009	char start, end;
[69df837f]	1010
[576845ec]	1011	if (s == NULL)
	1012	s = *next;
[69df837f]	1013
[576845ec]	1014	/* Skip over leading delimiters. */
[7afb4a5]	1015	while (s && (str_chr(delim, s) != NULL)) ++s;
[576845ec]	1016	start = s;
[69df837f]	1017
[576845ec]	1018	/* Skip over token characters. */
[7afb4a5]	1019	while (s && (str_chr(delim, s) == NULL)) ++s;
[576845ec]	1020	end = s;
	1021	next = (s ? s + 1 : s);
	1022
	1023	if (start == end) {
	1024	return NULL; /* No more tokens. */
	1025	}
[69df837f]	1026
[576845ec]	1027	/* Overwrite delimiter with NULL terminator. */
	1028	*end = '\0';
	1029	return start;
[69df837f]	1030	}
	1031
[d47279b]	1032	/** Convert string to uint64_t (internal variant).
	1033	*
	1034	* @param nptr Pointer to string.
	1035	* @param endptr Pointer to the first invalid character is stored here.
	1036	* @param base Zero or number between 2 and 36 inclusive.
	1037	* @param neg Indication of unary minus is stored here.
	1038	* @apram result Result of the conversion.
	1039	*
	1040	* @return EOK if conversion was successful.
	1041	*
	1042	*/
	1043	static int str_uint(const char nptr, char *endptr, unsigned int base,
	1044	bool neg, uint64_t result)
	1045	{
	1046	assert(endptr != NULL);
	1047	assert(neg != NULL);
	1048	assert(result != NULL);
	1049
	1050	*neg = false;
	1051	const char *str = nptr;
	1052
	1053	/* Ignore leading whitespace */
	1054	while (isspace(*str))
	1055	str++;
	1056
	1057	if (*str == '-') {
	1058	*neg = true;
	1059	str++;
	1060	} else if (*str == '+')
	1061	str++;
	1062
	1063	if (base == 0) {
	1064	/* Decode base if not specified */
	1065	base = 10;
	1066
	1067	if (*str == '0') {
	1068	base = 8;
	1069	str++;
	1070
	1071	switch (*str) {
	1072	case 'b':
	1073	case 'B':
	1074	base = 2;
	1075	str++;
	1076	break;
	1077	case 'o':
	1078	case 'O':
	1079	base = 8;
	1080	str++;
	1081	break;
	1082	case 'd':
	1083	case 'D':
	1084	case 't':
	1085	case 'T':
	1086	base = 10;
	1087	str++;
	1088	break;
	1089	case 'x':
	1090	case 'X':
	1091	base = 16;
	1092	str++;
	1093	break;
	1094	default:
	1095	str--;
	1096	}
	1097	}
	1098	} else {
	1099	/* Check base range */
	1100	if ((base < 2) \|\| (base > 36)) {
	1101	endptr = (char ) str;
	1102	return EINVAL;
	1103	}
	1104	}
	1105
	1106	*result = 0;
	1107	const char *startstr = str;
	1108
	1109	while (*str != 0) {
	1110	unsigned int digit;
	1111
	1112	if ((str >= 'a') && (str <= 'z'))
	1113	digit = *str - 'a' + 10;
	1114	else if ((str >= 'A') && (str <= 'Z'))
	1115	digit = *str - 'A' + 10;
	1116	else if ((str >= '0') && (str <= '9'))
	1117	digit = *str - '0';
	1118	else
	1119	break;
	1120
	1121	if (digit >= base)
	1122	break;
	1123
	1124	uint64_t prev = *result;
	1125	result = (result) * base + digit;
	1126
	1127	if (*result < prev) {
	1128	/* Overflow */
	1129	endptr = (char ) str;
	1130	return EOVERFLOW;
	1131	}
	1132
	1133	str++;
	1134	}
	1135
	1136	if (str == startstr) {
	1137	/*
	1138	* No digits were decoded => first invalid character is
	1139	* the first character of the string.
	1140	*/
	1141	str = nptr;
	1142	}
	1143
	1144	endptr = (char ) str;
	1145
	1146	if (str == nptr)
	1147	return EINVAL;
	1148
	1149	return EOK;
	1150	}
	1151
	1152	/** Convert string to uint64_t.
	1153	*
	1154	* @param nptr Pointer to string.
	1155	* @param endptr If not NULL, pointer to the first invalid character
	1156	* is stored here.
	1157	* @param base Zero or number between 2 and 36 inclusive.
	1158	* @param strict Do not allow any trailing characters.
	1159	* @param result Result of the conversion.
	1160	*
	1161	* @return EOK if conversion was successful.
	1162	*
	1163	*/
	1164	int str_uint64(const char nptr, char *endptr, unsigned int base,
	1165	bool strict, uint64_t *result)
	1166	{
	1167	assert(result != NULL);
	1168
	1169	bool neg;
	1170	char *lendptr;
	1171	int ret = str_uint(nptr, &lendptr, base, &neg, result);
	1172
	1173	if (endptr != NULL)
	1174	endptr = (char ) lendptr;
	1175
	1176	if (ret != EOK)
	1177	return ret;
	1178
	1179	/* Do not allow negative values */
	1180	if (neg)
	1181	return EINVAL;
	1182
	1183	/* Check whether we are at the end of
	1184	the string in strict mode */
	1185	if ((strict) && (*lendptr != 0))
	1186	return EINVAL;
	1187
	1188	return EOK;
	1189	}
	1190
	1191	/** Convert string to size_t.
	1192	*
	1193	* @param nptr Pointer to string.
	1194	* @param endptr If not NULL, pointer to the first invalid character
	1195	* is stored here.
	1196	* @param base Zero or number between 2 and 36 inclusive.
	1197	* @param strict Do not allow any trailing characters.
	1198	* @param result Result of the conversion.
	1199	*
	1200	* @return EOK if conversion was successful.
	1201	*
	1202	*/
	1203	int str_size_t(const char nptr, char *endptr, unsigned int base,
	1204	bool strict, size_t *result)
	1205	{
	1206	assert(result != NULL);
	1207
	1208	bool neg;
	1209	char *lendptr;
	1210	uint64_t res;
	1211	int ret = str_uint(nptr, &lendptr, base, &neg, &res);
	1212
	1213	if (endptr != NULL)
	1214	endptr = (char ) lendptr;
	1215
	1216	if (ret != EOK)
	1217	return ret;
	1218
	1219	/* Do not allow negative values */
	1220	if (neg)
	1221	return EINVAL;
	1222
	1223	/* Check whether we are at the end of
	1224	the string in strict mode */
	1225	if ((strict) && (*lendptr != 0))
	1226	return EINVAL;
	1227
	1228	/* Check for overflow */
	1229	size_t _res = (size_t) res;
	1230	if (_res != res)
	1231	return EOVERFLOW;
	1232
	1233	*result = _res;
	1234
	1235	return EOK;
	1236	}
	1237
[e535eeb]	1238	void order_suffix(const uint64_t val, uint64_t rv, char suffix)
	1239	{
[933cadf]	1240	if (val > UINT64_C(10000000000000000000)) {
	1241	*rv = val / UINT64_C(1000000000000000000);
[e535eeb]	1242	*suffix = 'Z';
[933cadf]	1243	} else if (val > UINT64_C(1000000000000000000)) {
	1244	*rv = val / UINT64_C(1000000000000000);
[e535eeb]	1245	*suffix = 'E';
[933cadf]	1246	} else if (val > UINT64_C(1000000000000000)) {
	1247	*rv = val / UINT64_C(1000000000000);
[e535eeb]	1248	*suffix = 'T';
[933cadf]	1249	} else if (val > UINT64_C(1000000000000)) {
	1250	*rv = val / UINT64_C(1000000000);
[e535eeb]	1251	*suffix = 'G';
[933cadf]	1252	} else if (val > UINT64_C(1000000000)) {
	1253	*rv = val / UINT64_C(1000000);
[e535eeb]	1254	*suffix = 'M';
[933cadf]	1255	} else if (val > UINT64_C(1000000)) {
	1256	*rv = val / UINT64_C(1000);
[e535eeb]	1257	*suffix = 'k';
	1258	} else {
	1259	*rv = val;
	1260	*suffix = ' ';
	1261	}
	1262	}
	1263
[933cadf]	1264	void bin_order_suffix(const uint64_t val, uint64_t rv, const char *suffix,
	1265	bool fixed)
	1266	{
	1267	if (val > UINT64_C(1152921504606846976)) {
	1268	*rv = val / UINT64_C(1125899906842624);
	1269	*suffix = "EiB";
	1270	} else if (val > UINT64_C(1125899906842624)) {
	1271	*rv = val / UINT64_C(1099511627776);
	1272	*suffix = "TiB";
	1273	} else if (val > UINT64_C(1099511627776)) {
	1274	*rv = val / UINT64_C(1073741824);
	1275	*suffix = "GiB";
	1276	} else if (val > UINT64_C(1073741824)) {
	1277	*rv = val / UINT64_C(1048576);
	1278	*suffix = "MiB";
	1279	} else if (val > UINT64_C(1048576)) {
	1280	*rv = val / UINT64_C(1024);
	1281	*suffix = "KiB";
	1282	} else {
	1283	*rv = val;
	1284	if (fixed)
	1285	*suffix = "B ";
	1286	else
	1287	*suffix = "B";
	1288	}
	1289	}
	1290
[a46da63]	1291	/** @}
[b2951e2]	1292	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/uspace/lib/c/generic/str.c@ 61e29a4d

Download in other formats: