Context Navigation

source: mainline/uspace/lib/c/generic/str.c@ 22cf42d9

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 22cf42d9 was 22cf42d9, checked in by Martin Sucha <sucha14@…>, 14 years ago
Add formatting library and display help message wrapped
Property mode set to `100644`
File size: 31.0 KB

Rev	Line
[936351c1]	1	/*
[df4ed85]	2	* Copyright (c) 2005 Martin Decky
[576845ec]	3	* Copyright (c) 2008 Jiri Svoboda
[22cf42d9]	4	* Copyright (c) 2011 Martin Sucha
[936351c1]	5	* All rights reserved.
	6	*
	7	* Redistribution and use in source and binary forms, with or without
	8	* modification, are permitted provided that the following conditions
	9	* are met:
	10	*
	11	* - Redistributions of source code must retain the above copyright
	12	* notice, this list of conditions and the following disclaimer.
	13	* - Redistributions in binary form must reproduce the above copyright
	14	* notice, this list of conditions and the following disclaimer in the
	15	* documentation and/or other materials provided with the distribution.
	16	* - The name of the author may not be used to endorse or promote products
	17	* derived from this software without specific prior written permission.
	18	*
	19	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	20	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	21	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	22	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	23	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	24	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	25	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	26	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	27	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	28	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	29	*/
	30
[a46da63]	31	/** @addtogroup libc
[b2951e2]	32	* @{
	33	*/
	34	/** @file
	35	*/
	36
[19f857a]	37	#include <str.h>
[e64c4b2]	38	#include <stdlib.h>
[6700ee2]	39	#include <assert.h>
[9539be6]	40	#include <stdint.h>
[e64c4b2]	41	#include <ctype.h>
[566987b0]	42	#include <malloc.h>
[171f9a1]	43	#include <errno.h>
[f2b8cdc]	44	#include <align.h>
[095003a8]	45	#include <mem.h>
[19f857a]	46	#include <str.h>
[171f9a1]	47
	48	/** Byte mask consisting of lowest @n bits (out of 8) */
	49	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
	50
	51	/** Byte mask consisting of lowest @n bits (out of 32) */
	52	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
	53
	54	/** Byte mask consisting of highest @n bits (out of 8) */
	55	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
	56
	57	/** Number of data bits in a UTF-8 continuation byte */
	58	#define CONT_BITS 6
	59
	60	/** Decode a single character from a string.
	61	*
	62	* Decode a single character from a string of size @a size. Decoding starts
	63	* at @a offset and this offset is moved to the beginning of the next
	64	* character. In case of decoding error, offset generally advances at least
	65	* by one. However, offset is never moved beyond size.
	66	*
	67	* @param str String (not necessarily NULL-terminated).
	68	* @param offset Byte offset in string where to start decoding.
	69	* @param size Size of the string (in bytes).
	70	*
	71	* @return Value of decoded character, U_SPECIAL on decoding error or
	72	* NULL if attempt to decode beyond @a size.
	73	*
	74	*/
	75	wchar_t str_decode(const char str, size_t offset, size_t size)
	76	{
	77	if (*offset + 1 > size)
	78	return 0;
	79
	80	/* First byte read from string */
	81	uint8_t b0 = (uint8_t) str[(*offset)++];
	82
	83	/* Determine code length */
	84
	85	unsigned int b0_bits; /* Data bits in first byte */
	86	unsigned int cbytes; /* Number of continuation bytes */
	87
	88	if ((b0 & 0x80) == 0) {
	89	/* 0xxxxxxx (Plain ASCII) */
	90	b0_bits = 7;
	91	cbytes = 0;
	92	} else if ((b0 & 0xe0) == 0xc0) {
	93	/* 110xxxxx 10xxxxxx */
	94	b0_bits = 5;
	95	cbytes = 1;
	96	} else if ((b0 & 0xf0) == 0xe0) {
	97	/* 1110xxxx 10xxxxxx 10xxxxxx */
	98	b0_bits = 4;
	99	cbytes = 2;
	100	} else if ((b0 & 0xf8) == 0xf0) {
	101	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
	102	b0_bits = 3;
	103	cbytes = 3;
	104	} else {
	105	/* 10xxxxxx -- unexpected continuation byte */
	106	return U_SPECIAL;
	107	}
	108
	109	if (*offset + cbytes > size)
	110	return U_SPECIAL;
	111
	112	wchar_t ch = b0 & LO_MASK_8(b0_bits);
	113
	114	/* Decode continuation bytes */
	115	while (cbytes > 0) {
	116	uint8_t b = (uint8_t) str[(*offset)++];
	117
	118	/* Must be 10xxxxxx */
	119	if ((b & 0xc0) != 0x80)
	120	return U_SPECIAL;
	121
	122	/* Shift data bits to ch */
	123	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
	124	cbytes--;
	125	}
	126
	127	return ch;
	128	}
	129
	130	/** Encode a single character to string representation.
	131	*
	132	* Encode a single character to string representation (i.e. UTF-8) and store
	133	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
	134	* is moved to the position where the next character can be written to.
	135	*
	136	* @param ch Input character.
	137	* @param str Output buffer.
	138	* @param offset Byte offset where to start writing.
	139	* @param size Size of the output buffer (in bytes).
	140	*
	141	* @return EOK if the character was encoded successfully, EOVERFLOW if there
[d4a3ee5]	142	* was not enough space in the output buffer or EINVAL if the character
	143	* code was invalid.
[171f9a1]	144	*/
	145	int chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
	146	{
	147	if (*offset >= size)
	148	return EOVERFLOW;
	149
	150	if (!chr_check(ch))
	151	return EINVAL;
	152
	153	/* Unsigned version of ch (bit operations should only be done
	154	on unsigned types). */
	155	uint32_t cc = (uint32_t) ch;
	156
	157	/* Determine how many continuation bytes are needed */
	158
	159	unsigned int b0_bits; /* Data bits in first byte */
	160	unsigned int cbytes; /* Number of continuation bytes */
	161
	162	if ((cc & ~LO_MASK_32(7)) == 0) {
	163	b0_bits = 7;
	164	cbytes = 0;
	165	} else if ((cc & ~LO_MASK_32(11)) == 0) {
	166	b0_bits = 5;
	167	cbytes = 1;
	168	} else if ((cc & ~LO_MASK_32(16)) == 0) {
	169	b0_bits = 4;
	170	cbytes = 2;
	171	} else if ((cc & ~LO_MASK_32(21)) == 0) {
	172	b0_bits = 3;
	173	cbytes = 3;
	174	} else {
	175	/* Codes longer than 21 bits are not supported */
	176	return EINVAL;
	177	}
	178
	179	/* Check for available space in buffer */
	180	if (*offset + cbytes >= size)
	181	return EOVERFLOW;
	182
	183	/* Encode continuation bytes */
	184	unsigned int i;
	185	for (i = cbytes; i > 0; i--) {
	186	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
	187	cc = cc >> CONT_BITS;
	188	}
	189
	190	/* Encode first byte */
	191	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
	192
	193	/* Advance offset */
	194	*offset += cbytes + 1;
	195
	196	return EOK;
	197	}
	198
[f2b8cdc]	199	/** Get size of string.
	200	*
	201	* Get the number of bytes which are used by the string @a str (excluding the
	202	* NULL-terminator).
	203	*
	204	* @param str String to consider.
	205	*
	206	* @return Number of bytes used by the string
	207	*
	208	*/
	209	size_t str_size(const char *str)
	210	{
	211	size_t size = 0;
	212
	213	while (*str++ != 0)
	214	size++;
	215
	216	return size;
	217	}
	218
	219	/** Get size of wide string.
	220	*
	221	* Get the number of bytes which are used by the wide string @a str (excluding the
	222	* NULL-terminator).
	223	*
	224	* @param str Wide string to consider.
	225	*
	226	* @return Number of bytes used by the wide string
	227	*
	228	*/
	229	size_t wstr_size(const wchar_t *str)
	230	{
	231	return (wstr_length(str) * sizeof(wchar_t));
	232	}
	233
	234	/** Get size of string with length limit.
	235	*
	236	* Get the number of bytes which are used by up to @a max_len first
	237	* characters in the string @a str. If @a max_len is greater than
	238	* the length of @a str, the entire string is measured (excluding the
	239	* NULL-terminator).
	240	*
	241	* @param str String to consider.
	242	* @param max_len Maximum number of characters to measure.
	243	*
	244	* @return Number of bytes used by the characters.
	245	*
	246	*/
[d4a3ee5]	247	size_t str_lsize(const char *str, size_t max_len)
[f2b8cdc]	248	{
[d4a3ee5]	249	size_t len = 0;
[f2b8cdc]	250	size_t offset = 0;
	251
	252	while (len < max_len) {
	253	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
	254	break;
	255
	256	len++;
	257	}
	258
	259	return offset;
	260	}
	261
	262	/** Get size of wide string with length limit.
	263	*
	264	* Get the number of bytes which are used by up to @a max_len first
	265	* wide characters in the wide string @a str. If @a max_len is greater than
	266	* the length of @a str, the entire wide string is measured (excluding the
	267	* NULL-terminator).
	268	*
	269	* @param str Wide string to consider.
	270	* @param max_len Maximum number of wide characters to measure.
	271	*
	272	* @return Number of bytes used by the wide characters.
	273	*
	274	*/
[d4a3ee5]	275	size_t wstr_lsize(const wchar_t *str, size_t max_len)
[f2b8cdc]	276	{
	277	return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
	278	}
	279
	280	/** Get number of characters in a string.
	281	*
	282	* @param str NULL-terminated string.
	283	*
	284	* @return Number of characters in string.
	285	*
	286	*/
[d4a3ee5]	287	size_t str_length(const char *str)
[f2b8cdc]	288	{
[d4a3ee5]	289	size_t len = 0;
[f2b8cdc]	290	size_t offset = 0;
	291
	292	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
	293	len++;
	294
	295	return len;
	296	}
	297
	298	/** Get number of characters in a wide string.
	299	*
	300	* @param str NULL-terminated wide string.
	301	*
	302	* @return Number of characters in @a str.
	303	*
	304	*/
[d4a3ee5]	305	size_t wstr_length(const wchar_t *wstr)
[f2b8cdc]	306	{
[d4a3ee5]	307	size_t len = 0;
[f2b8cdc]	308
	309	while (*wstr++ != 0)
	310	len++;
	311
	312	return len;
	313	}
	314
	315	/** Get number of characters in a string with size limit.
	316	*
	317	* @param str NULL-terminated string.
	318	* @param size Maximum number of bytes to consider.
	319	*
	320	* @return Number of characters in string.
	321	*
	322	*/
[d4a3ee5]	323	size_t str_nlength(const char *str, size_t size)
[f2b8cdc]	324	{
[d4a3ee5]	325	size_t len = 0;
[f2b8cdc]	326	size_t offset = 0;
	327
	328	while (str_decode(str, &offset, size) != 0)
	329	len++;
	330
	331	return len;
	332	}
	333
	334	/** Get number of characters in a string with size limit.
	335	*
	336	* @param str NULL-terminated string.
	337	* @param size Maximum number of bytes to consider.
	338	*
	339	* @return Number of characters in string.
	340	*
	341	*/
[d4a3ee5]	342	size_t wstr_nlength(const wchar_t *str, size_t size)
[f2b8cdc]	343	{
[d4a3ee5]	344	size_t len = 0;
	345	size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
	346	size_t offset = 0;
[f2b8cdc]	347
	348	while ((offset < limit) && (*str++ != 0)) {
	349	len++;
	350	offset += sizeof(wchar_t);
	351	}
	352
	353	return len;
	354	}
	355
	356	/** Check whether character is plain ASCII.
	357	*
	358	* @return True if character is plain ASCII.
	359	*
	360	*/
	361	bool ascii_check(wchar_t ch)
	362	{
	363	if ((ch >= 0) && (ch <= 127))
	364	return true;
	365
	366	return false;
	367	}
	368
[171f9a1]	369	/** Check whether character is valid
	370	*
	371	* @return True if character is a valid Unicode code point.
	372	*
	373	*/
[f2b8cdc]	374	bool chr_check(wchar_t ch)
[171f9a1]	375	{
	376	if ((ch >= 0) && (ch <= 1114111))
	377	return true;
	378
	379	return false;
	380	}
[936351c1]	381
[f2b8cdc]	382	/** Compare two NULL terminated strings.
	383	*
	384	* Do a char-by-char comparison of two NULL-terminated strings.
	385	* The strings are considered equal iff they consist of the same
	386	* characters on the minimum of their lengths.
	387	*
	388	* @param s1 First string to compare.
	389	* @param s2 Second string to compare.
	390	*
	391	* @return 0 if the strings are equal, -1 if first is smaller,
	392	* 1 if second smaller.
	393	*
	394	*/
	395	int str_cmp(const char s1, const char s2)
	396	{
	397	wchar_t c1 = 0;
	398	wchar_t c2 = 0;
	399
	400	size_t off1 = 0;
	401	size_t off2 = 0;
	402
	403	while (true) {
	404	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
	405	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
	406
	407	if (c1 < c2)
	408	return -1;
	409
	410	if (c1 > c2)
	411	return 1;
	412
	413	if (c1 == 0 \|\| c2 == 0)
	414	break;
	415	}
	416
	417	return 0;
	418	}
	419
	420	/** Compare two NULL terminated strings with length limit.
	421	*
	422	* Do a char-by-char comparison of two NULL-terminated strings.
	423	* The strings are considered equal iff they consist of the same
	424	* characters on the minimum of their lengths and the length limit.
	425	*
	426	* @param s1 First string to compare.
	427	* @param s2 Second string to compare.
	428	* @param max_len Maximum number of characters to consider.
	429	*
	430	* @return 0 if the strings are equal, -1 if first is smaller,
	431	* 1 if second smaller.
	432	*
	433	*/
[d4a3ee5]	434	int str_lcmp(const char s1, const char s2, size_t max_len)
[f2b8cdc]	435	{
	436	wchar_t c1 = 0;
	437	wchar_t c2 = 0;
	438
	439	size_t off1 = 0;
	440	size_t off2 = 0;
	441
[d4a3ee5]	442	size_t len = 0;
[f2b8cdc]	443
	444	while (true) {
	445	if (len >= max_len)
	446	break;
	447
	448	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
	449	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
	450
	451	if (c1 < c2)
	452	return -1;
	453
	454	if (c1 > c2)
	455	return 1;
	456
	457	if (c1 == 0 \|\| c2 == 0)
	458	break;
	459
	460	++len;
	461	}
	462
	463	return 0;
	464
	465	}
	466
[6eb2e96]	467	/** Copy string.
[f2b8cdc]	468	*
[6eb2e96]	469	* Copy source string @a src to destination buffer @a dest.
	470	* No more than @a size bytes are written. If the size of the output buffer
	471	* is at least one byte, the output string will always be well-formed, i.e.
	472	* null-terminated and containing only complete characters.
[f2b8cdc]	473	*
[abf09311]	474	* @param dest Destination buffer.
[6700ee2]	475	* @param count Size of the destination buffer (must be > 0).
[6eb2e96]	476	* @param src Source string.
[f2b8cdc]	477	*/
[6eb2e96]	478	void str_cpy(char dest, size_t size, const char src)
[f2b8cdc]	479	{
[6700ee2]	480	/* There must be space for a null terminator in the buffer. */
	481	assert(size > 0);
[f2b8cdc]	482
[abf09311]	483	size_t src_off = 0;
	484	size_t dest_off = 0;
	485
	486	wchar_t ch;
[6eb2e96]	487	while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
	488	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
	489	break;
	490	}
[abf09311]	491
[6eb2e96]	492	dest[dest_off] = '\0';
	493	}
	494
	495	/** Copy size-limited substring.
	496	*
[6700ee2]	497	* Copy prefix of string @a src of max. size @a size to destination buffer
	498	* @a dest. No more than @a size bytes are written. The output string will
	499	* always be well-formed, i.e. null-terminated and containing only complete
	500	* characters.
[6eb2e96]	501	*
	502	* No more than @a n bytes are read from the input string, so it does not
	503	* have to be null-terminated.
	504	*
[abf09311]	505	* @param dest Destination buffer.
[6700ee2]	506	* @param count Size of the destination buffer (must be > 0).
[6eb2e96]	507	* @param src Source string.
[abf09311]	508	* @param n Maximum number of bytes to read from @a src.
[6eb2e96]	509	*/
	510	void str_ncpy(char dest, size_t size, const char src, size_t n)
	511	{
[6700ee2]	512	/* There must be space for a null terminator in the buffer. */
	513	assert(size > 0);
[f2b8cdc]	514
[abf09311]	515	size_t src_off = 0;
	516	size_t dest_off = 0;
	517
	518	wchar_t ch;
[6eb2e96]	519	while ((ch = str_decode(src, &src_off, n)) != 0) {
	520	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
[f2b8cdc]	521	break;
	522	}
[abf09311]	523
[6eb2e96]	524	dest[dest_off] = '\0';
[f2b8cdc]	525	}
	526
[4482bc7]	527	/** Append one string to another.
	528	*
	529	* Append source string @a src to string in destination buffer @a dest.
	530	* Size of the destination buffer is @a dest. If the size of the output buffer
	531	* is at least one byte, the output string will always be well-formed, i.e.
	532	* null-terminated and containing only complete characters.
	533	*
[0f06dbc]	534	* @param dest Destination buffer.
[4482bc7]	535	* @param count Size of the destination buffer.
	536	* @param src Source string.
	537	*/
	538	void str_append(char dest, size_t size, const char src)
	539	{
	540	size_t dstr_size;
	541
	542	dstr_size = str_size(dest);
[3815efb]	543	if (dstr_size >= size)
[a8bc7f8]	544	return;
[3815efb]	545
[4482bc7]	546	str_cpy(dest + dstr_size, size - dstr_size, src);
	547	}
	548
[dcb74c0a]	549	/** Convert space-padded ASCII to string.
	550	*
	551	* Common legacy text encoding in hardware is 7-bit ASCII fitted into
	552	* a fixed-with byte buffer (bit 7 always zero), right-padded with spaces
	553	* (ASCII 0x20). Convert space-padded ascii to string representation.
	554	*
	555	* If the text does not fit into the destination buffer, the function converts
	556	* as many characters as possible and returns EOVERFLOW.
	557	*
	558	* If the text contains non-ASCII bytes (with bit 7 set), the whole string is
	559	* converted anyway and invalid characters are replaced with question marks
	560	* (U_SPECIAL) and the function returns EIO.
	561	*
	562	* Regardless of return value upon return @a dest will always be well-formed.
	563	*
	564	* @param dest Destination buffer
	565	* @param size Size of destination buffer
	566	* @param src Space-padded ASCII.
	567	* @param n Size of the source buffer in bytes.
	568	*
	569	* @return EOK on success, EOVERFLOW if the text does not fit
	570	* destination buffer, EIO if the text contains
	571	* non-ASCII bytes.
	572	*/
	573	int spascii_to_str(char dest, size_t size, const uint8_t src, size_t n)
	574	{
	575	size_t sidx;
	576	size_t didx;
	577	size_t dlast;
	578	uint8_t byte;
	579	int rc;
	580	int result;
	581
	582	/* There must be space for a null terminator in the buffer. */
	583	assert(size > 0);
	584	result = EOK;
	585
	586	didx = 0;
	587	dlast = 0;
	588	for (sidx = 0; sidx < n; ++sidx) {
	589	byte = src[sidx];
	590	if (!ascii_check(byte)) {
	591	byte = U_SPECIAL;
	592	result = EIO;
	593	}
	594
	595	rc = chr_encode(byte, dest, &didx, size - 1);
	596	if (rc != EOK) {
	597	assert(rc == EOVERFLOW);
	598	dest[didx] = '\0';
	599	return rc;
	600	}
	601
	602	/* Remember dest index after last non-empty character */
	603	if (byte != 0x20)
	604	dlast = didx;
	605	}
	606
	607	/* Terminate string after last non-empty character */
	608	dest[dlast] = '\0';
	609	return result;
	610	}
	611
[0f06dbc]	612	/** Convert wide string to string.
[f2b8cdc]	613	*
[0f06dbc]	614	* Convert wide string @a src to string. The output is written to the buffer
	615	* specified by @a dest and @a size. @a size must be non-zero and the string
	616	* written will always be well-formed.
[f2b8cdc]	617	*
[0f06dbc]	618	* @param dest Destination buffer.
	619	* @param size Size of the destination buffer.
	620	* @param src Source wide string.
[f2b8cdc]	621	*/
[0f06dbc]	622	void wstr_to_str(char dest, size_t size, const wchar_t src)
[f2b8cdc]	623	{
	624	wchar_t ch;
[0f06dbc]	625	size_t src_idx;
	626	size_t dest_off;
	627
	628	/* There must be space for a null terminator in the buffer. */
	629	assert(size > 0);
[f2b8cdc]	630
[0f06dbc]	631	src_idx = 0;
	632	dest_off = 0;
	633
[f2b8cdc]	634	while ((ch = src[src_idx++]) != 0) {
[0f06dbc]	635	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
[f2b8cdc]	636	break;
	637	}
[0f06dbc]	638
	639	dest[dest_off] = '\0';
[f2b8cdc]	640	}
	641
[b67c7d64]	642	/** Convert wide string to new string.
	643	*
	644	* Convert wide string @a src to string. Space for the new string is allocated
	645	* on the heap.
	646	*
	647	* @param src Source wide string.
	648	* @return New string.
	649	*/
	650	char wstr_to_astr(const wchar_t src)
	651	{
	652	char dbuf[STR_BOUNDS(1)];
	653	char *str;
	654	wchar_t ch;
	655
	656	size_t src_idx;
	657	size_t dest_off;
	658	size_t dest_size;
	659
	660	/* Compute size of encoded string. */
	661
	662	src_idx = 0;
	663	dest_size = 0;
	664
	665	while ((ch = src[src_idx++]) != 0) {
	666	dest_off = 0;
	667	if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
	668	break;
	669	dest_size += dest_off;
	670	}
	671
	672	str = malloc(dest_size + 1);
	673	if (str == NULL)
	674	return NULL;
	675
	676	/* Encode string. */
	677
	678	src_idx = 0;
	679	dest_off = 0;
	680
	681	while ((ch = src[src_idx++]) != 0) {
	682	if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
	683	break;
	684	}
	685
	686	str[dest_size] = '\0';
	687	return str;
	688	}
	689
	690
[da2bd08]	691	/** Convert string to wide string.
	692	*
	693	* Convert string @a src to wide string. The output is written to the
[0f06dbc]	694	* buffer specified by @a dest and @a dlen. @a dlen must be non-zero
	695	* and the wide string written will always be null-terminated.
[da2bd08]	696	*
	697	* @param dest Destination buffer.
	698	* @param dlen Length of destination buffer (number of wchars).
	699	* @param src Source string.
	700	*/
	701	void str_to_wstr(wchar_t dest, size_t dlen, const char src)
	702	{
	703	size_t offset;
	704	size_t di;
	705	wchar_t c;
	706
	707	assert(dlen > 0);
	708
	709	offset = 0;
	710	di = 0;
	711
	712	do {
	713	if (di >= dlen - 1)
	714	break;
	715
	716	c = str_decode(src, &offset, STR_NO_LIMIT);
	717	dest[di++] = c;
	718	} while (c != '\0');
	719
	720	dest[dlen - 1] = '\0';
	721	}
	722
[22cf42d9]	723	/** Convert string to wide string.
	724	*
	725	* Convert string @a src to wide string. A new wide NULL-terminated
	726	* string will be allocated on the heap.
	727	*
	728	* @param src Source string.
	729	*/
	730	wchar_t str_to_awstr(const char str)
	731	{
	732	size_t len = str_length(str);
	733	wchar_t *wstr = calloc(len+1, sizeof(wchar_t));
	734	if (wstr == NULL) {
	735	return NULL;
	736	}
	737	str_to_wstr(wstr, len+1, str);
	738	return wstr;
	739	}
	740
[f2b8cdc]	741	/** Find first occurence of character in string.
	742	*
	743	* @param str String to search.
	744	* @param ch Character to look for.
	745	*
	746	* @return Pointer to character in @a str or NULL if not found.
	747	*/
[dd2cfa7]	748	char str_chr(const char str, wchar_t ch)
[f2b8cdc]	749	{
	750	wchar_t acc;
	751	size_t off = 0;
[f2d2c7ba]	752	size_t last = 0;
[f2b8cdc]	753
	754	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
	755	if (acc == ch)
[dd2cfa7]	756	return (char *) (str + last);
[f2d2c7ba]	757	last = off;
[f2b8cdc]	758	}
	759
	760	return NULL;
	761	}
	762
[7afb4a5]	763	/** Find last occurence of character in string.
	764	*
	765	* @param str String to search.
	766	* @param ch Character to look for.
	767	*
	768	* @return Pointer to character in @a str or NULL if not found.
	769	*/
[dd2cfa7]	770	char str_rchr(const char str, wchar_t ch)
[7afb4a5]	771	{
	772	wchar_t acc;
	773	size_t off = 0;
[f2d2c7ba]	774	size_t last = 0;
[d4a3ee5]	775	const char *res = NULL;
[f2d2c7ba]	776
[7afb4a5]	777	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
	778	if (acc == ch)
[f2d2c7ba]	779	res = (str + last);
	780	last = off;
[7afb4a5]	781	}
[f2d2c7ba]	782
[dd2cfa7]	783	return (char *) res;
[7afb4a5]	784	}
	785
[f2b8cdc]	786	/** Insert a wide character into a wide string.
	787	*
	788	* Insert a wide character into a wide string at position
	789	* @a pos. The characters after the position are shifted.
	790	*
	791	* @param str String to insert to.
	792	* @param ch Character to insert to.
	793	* @param pos Character index where to insert.
	794	@ @param max_pos Characters in the buffer.
	795	*
	796	* @return True if the insertion was sucessful, false if the position
	797	* is out of bounds.
	798	*
	799	*/
[d4a3ee5]	800	bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
[f2b8cdc]	801	{
[d4a3ee5]	802	size_t len = wstr_length(str);
[f2b8cdc]	803
	804	if ((pos > len) \|\| (pos + 1 > max_pos))
	805	return false;
	806
[d4a3ee5]	807	size_t i;
[f2b8cdc]	808	for (i = len; i + 1 > pos; i--)
	809	str[i + 1] = str[i];
	810
	811	str[pos] = ch;
	812
	813	return true;
	814	}
	815
	816	/** Remove a wide character from a wide string.
	817	*
	818	* Remove a wide character from a wide string at position
	819	* @a pos. The characters after the position are shifted.
	820	*
	821	* @param str String to remove from.
	822	* @param pos Character index to remove.
	823	*
	824	* @return True if the removal was sucessful, false if the position
	825	* is out of bounds.
	826	*
	827	*/
[d4a3ee5]	828	bool wstr_remove(wchar_t *str, size_t pos)
[f2b8cdc]	829	{
[d4a3ee5]	830	size_t len = wstr_length(str);
[f2b8cdc]	831
	832	if (pos >= len)
	833	return false;
	834
[d4a3ee5]	835	size_t i;
[f2b8cdc]	836	for (i = pos + 1; i <= len; i++)
	837	str[i - 1] = str[i];
	838
	839	return true;
	840	}
	841
[2dd7288]	842	int stricmp(const char a, const char b)
	843	{
	844	int c = 0;
	845
	846	while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
	847	c++;
	848
	849	return (tolower(a[c]) - tolower(b[c]));
	850	}
	851
[672a24d]	852	/** Convert string to a number.
	853	* Core of strtol and strtoul functions.
[838e14e2]	854	*
	855	* @param nptr Pointer to string.
	856	* @param endptr If not NULL, function stores here pointer to the first
	857	* invalid character.
	858	* @param base Zero or number between 2 and 36 inclusive.
	859	* @param sgn It's set to 1 if minus found.
	860	* @return Result of conversion.
[672a24d]	861	*/
[838e14e2]	862	static unsigned long
	863	_strtoul(const char nptr, char endptr, int base, char sgn)
[672a24d]	864	{
	865	unsigned char c;
	866	unsigned long result = 0;
	867	unsigned long a, b;
	868	const char *str = nptr;
	869	const char *tmpptr;
	870
	871	while (isspace(*str))
	872	str++;
	873
	874	if (*str == '-') {
	875	*sgn = 1;
	876	++str;
	877	} else if (*str == '+')
	878	++str;
	879
	880	if (base) {
	881	if ((base == 1) \|\| (base > 36)) {
	882	/* FIXME: set errno to EINVAL */
	883	return 0;
	884	}
[838e14e2]	885	if ((base == 16) && (*str == '0') && ((str[1] == 'x') \|\|
	886	(str[1] == 'X'))) {
[672a24d]	887	str += 2;
	888	}
	889	} else {
	890	base = 10;
	891
	892	if (*str == '0') {
	893	base = 8;
	894	if ((str[1] == 'X') \|\| (str[1] == 'x')) {
	895	base = 16;
	896	str += 2;
	897	}
	898	}
	899	}
	900
	901	tmpptr = str;
	902
	903	while (*str) {
	904	c = *str;
[838e14e2]	905	c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
	906	(c <= '9' ? c - '0' : 0xff)));
[672a24d]	907	if (c > base) {
	908	break;
	909	}
	910
	911	a = (result & 0xff) * base + c;
	912	b = (result >> 8) * base + (a >> 8);
	913
	914	if (b > (ULONG_MAX >> 8)) {
	915	/* overflow */
	916	/* FIXME: errno = ERANGE*/
	917	return ULONG_MAX;
	918	}
	919
	920	result = (b << 8) + (a & 0xff);
	921	++str;
	922	}
	923
	924	if (str == tmpptr) {
[838e14e2]	925	/*
	926	* No number was found => first invalid character is the first
	927	* character of the string.
	928	*/
[672a24d]	929	/* FIXME: set errno to EINVAL */
	930	str = nptr;
	931	result = 0;
	932	}
	933
	934	if (endptr)
[a46da63]	935	endptr = (char ) str;
[672a24d]	936
	937	if (nptr == str) {
	938	/FIXME: errno = EINVAL/
	939	return 0;
	940	}
	941
	942	return result;
	943	}
	944
	945	/** Convert initial part of string to long int according to given base.
[838e14e2]	946	* The number may begin with an arbitrary number of whitespaces followed by
	947	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
	948	* inserted and the number will be taken as hexadecimal one. If the base is 0
	949	* and the number begin with a zero, number will be taken as octal one (as with
	950	* base 8). Otherwise the base 0 is taken as decimal.
	951	*
	952	* @param nptr Pointer to string.
	953	* @param endptr If not NULL, function stores here pointer to the first
	954	* invalid character.
	955	* @param base Zero or number between 2 and 36 inclusive.
	956	* @return Result of conversion.
[672a24d]	957	*/
	958	long int strtol(const char nptr, char *endptr, int base)
	959	{
	960	char sgn = 0;
	961	unsigned long number = 0;
	962
	963	number = _strtoul(nptr, endptr, base, &sgn);
	964
	965	if (number > LONG_MAX) {
[a46da63]	966	if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
[672a24d]	967	/* FIXME: set 0 to errno */
	968	return number;
	969	}
	970	/* FIXME: set ERANGE to errno */
[a46da63]	971	return (sgn ? LONG_MIN : LONG_MAX);
[672a24d]	972	}
	973
[a46da63]	974	return (sgn ? -number : number);
[672a24d]	975	}
	976
[abf09311]	977	/** Duplicate string.
	978	*
	979	* Allocate a new string and copy characters from the source
	980	* string into it. The duplicate string is allocated via sleeping
	981	* malloc(), thus this function can sleep in no memory conditions.
	982	*
	983	* The allocation cannot fail and the return value is always
	984	* a valid pointer. The duplicate string is always a well-formed
	985	* null-terminated UTF-8 string, but it can differ from the source
	986	* string on the byte level.
	987	*
	988	* @param src Source string.
	989	*
	990	* @return Duplicate string.
	991	*
	992	*/
[fc6dd18]	993	char str_dup(const char src)
	994	{
[abf09311]	995	size_t size = str_size(src) + 1;
	996	char dest = (char ) malloc(size);
[fc6dd18]	997	if (dest == NULL)
	998	return (char *) NULL;
	999
[abf09311]	1000	str_cpy(dest, size, src);
	1001	return dest;
[fc6dd18]	1002	}
	1003
[abf09311]	1004	/** Duplicate string with size limit.
	1005	*
	1006	* Allocate a new string and copy up to @max_size bytes from the source
	1007	* string into it. The duplicate string is allocated via sleeping
	1008	* malloc(), thus this function can sleep in no memory conditions.
	1009	* No more than @max_size + 1 bytes is allocated, but if the size
	1010	* occupied by the source string is smaller than @max_size + 1,
	1011	* less is allocated.
	1012	*
	1013	* The allocation cannot fail and the return value is always
	1014	* a valid pointer. The duplicate string is always a well-formed
	1015	* null-terminated UTF-8 string, but it can differ from the source
	1016	* string on the byte level.
	1017	*
	1018	* @param src Source string.
	1019	* @param n Maximum number of bytes to duplicate.
	1020	*
	1021	* @return Duplicate string.
	1022	*
	1023	*/
	1024	char str_ndup(const char src, size_t n)
[fc6dd18]	1025	{
	1026	size_t size = str_size(src);
[abf09311]	1027	if (size > n)
	1028	size = n;
[fc6dd18]	1029
	1030	char dest = (char ) malloc(size + 1);
	1031	if (dest == NULL)
	1032	return (char *) NULL;
	1033
[abf09311]	1034	str_ncpy(dest, size + 1, src, size);
[fc6dd18]	1035	return dest;
	1036	}
	1037
[672a24d]	1038
	1039	/** Convert initial part of string to unsigned long according to given base.
[838e14e2]	1040	* The number may begin with an arbitrary number of whitespaces followed by
	1041	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
	1042	* inserted and the number will be taken as hexadecimal one. If the base is 0
	1043	* and the number begin with a zero, number will be taken as octal one (as with
	1044	* base 8). Otherwise the base 0 is taken as decimal.
	1045	*
	1046	* @param nptr Pointer to string.
	1047	* @param endptr If not NULL, function stores here pointer to the first
	1048	* invalid character
	1049	* @param base Zero or number between 2 and 36 inclusive.
	1050	* @return Result of conversion.
[672a24d]	1051	*/
	1052	unsigned long strtoul(const char nptr, char *endptr, int base)
	1053	{
	1054	char sgn = 0;
	1055	unsigned long number = 0;
	1056
	1057	number = _strtoul(nptr, endptr, base, &sgn);
	1058
[a46da63]	1059	return (sgn ? -number : number);
[672a24d]	1060	}
[c594489]	1061
[576845ec]	1062	char strtok(char s, const char *delim)
[69df837f]	1063	{
[576845ec]	1064	static char *next;
[69df837f]	1065
[576845ec]	1066	return strtok_r(s, delim, &next);
	1067	}
[69df837f]	1068
[576845ec]	1069	char strtok_r(char s, const char delim, char *next)
	1070	{
	1071	char start, end;
[69df837f]	1072
[576845ec]	1073	if (s == NULL)
	1074	s = *next;
[69df837f]	1075
[576845ec]	1076	/* Skip over leading delimiters. */
[7afb4a5]	1077	while (s && (str_chr(delim, s) != NULL)) ++s;
[576845ec]	1078	start = s;
[69df837f]	1079
[576845ec]	1080	/* Skip over token characters. */
[7afb4a5]	1081	while (s && (str_chr(delim, s) == NULL)) ++s;
[576845ec]	1082	end = s;
	1083	next = (s ? s + 1 : s);
	1084
	1085	if (start == end) {
	1086	return NULL; /* No more tokens. */
	1087	}
[69df837f]	1088
[576845ec]	1089	/* Overwrite delimiter with NULL terminator. */
	1090	*end = '\0';
	1091	return start;
[69df837f]	1092	}
	1093
[d47279b]	1094	/** Convert string to uint64_t (internal variant).
	1095	*
	1096	* @param nptr Pointer to string.
	1097	* @param endptr Pointer to the first invalid character is stored here.
	1098	* @param base Zero or number between 2 and 36 inclusive.
	1099	* @param neg Indication of unary minus is stored here.
	1100	* @apram result Result of the conversion.
	1101	*
	1102	* @return EOK if conversion was successful.
	1103	*
	1104	*/
	1105	static int str_uint(const char nptr, char *endptr, unsigned int base,
	1106	bool neg, uint64_t result)
	1107	{
	1108	assert(endptr != NULL);
	1109	assert(neg != NULL);
	1110	assert(result != NULL);
	1111
	1112	*neg = false;
	1113	const char *str = nptr;
	1114
	1115	/* Ignore leading whitespace */
	1116	while (isspace(*str))
	1117	str++;
	1118
	1119	if (*str == '-') {
	1120	*neg = true;
	1121	str++;
	1122	} else if (*str == '+')
	1123	str++;
	1124
	1125	if (base == 0) {
	1126	/* Decode base if not specified */
	1127	base = 10;
	1128
	1129	if (*str == '0') {
	1130	base = 8;
	1131	str++;
	1132
	1133	switch (*str) {
	1134	case 'b':
	1135	case 'B':
	1136	base = 2;
	1137	str++;
	1138	break;
	1139	case 'o':
	1140	case 'O':
	1141	base = 8;
	1142	str++;
	1143	break;
	1144	case 'd':
	1145	case 'D':
	1146	case 't':
	1147	case 'T':
	1148	base = 10;
	1149	str++;
	1150	break;
	1151	case 'x':
	1152	case 'X':
	1153	base = 16;
	1154	str++;
	1155	break;
	1156	default:
	1157	str--;
	1158	}
	1159	}
	1160	} else {
	1161	/* Check base range */
	1162	if ((base < 2) \|\| (base > 36)) {
	1163	endptr = (char ) str;
	1164	return EINVAL;
	1165	}
	1166	}
	1167
	1168	*result = 0;
	1169	const char *startstr = str;
	1170
	1171	while (*str != 0) {
	1172	unsigned int digit;
	1173
	1174	if ((str >= 'a') && (str <= 'z'))
	1175	digit = *str - 'a' + 10;
	1176	else if ((str >= 'A') && (str <= 'Z'))
	1177	digit = *str - 'A' + 10;
	1178	else if ((str >= '0') && (str <= '9'))
	1179	digit = *str - '0';
	1180	else
	1181	break;
	1182
	1183	if (digit >= base)
	1184	break;
	1185
	1186	uint64_t prev = *result;
	1187	result = (result) * base + digit;
	1188
	1189	if (*result < prev) {
	1190	/* Overflow */
	1191	endptr = (char ) str;
	1192	return EOVERFLOW;
	1193	}
	1194
	1195	str++;
	1196	}
	1197
	1198	if (str == startstr) {
	1199	/*
	1200	* No digits were decoded => first invalid character is
	1201	* the first character of the string.
	1202	*/
	1203	str = nptr;
	1204	}
	1205
	1206	endptr = (char ) str;
	1207
	1208	if (str == nptr)
	1209	return EINVAL;
	1210
	1211	return EOK;
	1212	}
	1213
	1214	/** Convert string to uint64_t.
	1215	*
	1216	* @param nptr Pointer to string.
	1217	* @param endptr If not NULL, pointer to the first invalid character
	1218	* is stored here.
	1219	* @param base Zero or number between 2 and 36 inclusive.
	1220	* @param strict Do not allow any trailing characters.
	1221	* @param result Result of the conversion.
	1222	*
	1223	* @return EOK if conversion was successful.
	1224	*
	1225	*/
	1226	int str_uint64(const char nptr, char *endptr, unsigned int base,
	1227	bool strict, uint64_t *result)
	1228	{
	1229	assert(result != NULL);
	1230
	1231	bool neg;
	1232	char *lendptr;
	1233	int ret = str_uint(nptr, &lendptr, base, &neg, result);
	1234
	1235	if (endptr != NULL)
	1236	endptr = (char ) lendptr;
	1237
	1238	if (ret != EOK)
	1239	return ret;
	1240
	1241	/* Do not allow negative values */
	1242	if (neg)
	1243	return EINVAL;
	1244
	1245	/* Check whether we are at the end of
	1246	the string in strict mode */
	1247	if ((strict) && (*lendptr != 0))
	1248	return EINVAL;
	1249
	1250	return EOK;
	1251	}
	1252
	1253	/** Convert string to size_t.
	1254	*
	1255	* @param nptr Pointer to string.
	1256	* @param endptr If not NULL, pointer to the first invalid character
	1257	* is stored here.
	1258	* @param base Zero or number between 2 and 36 inclusive.
	1259	* @param strict Do not allow any trailing characters.
	1260	* @param result Result of the conversion.
	1261	*
	1262	* @return EOK if conversion was successful.
	1263	*
	1264	*/
	1265	int str_size_t(const char nptr, char *endptr, unsigned int base,
	1266	bool strict, size_t *result)
	1267	{
	1268	assert(result != NULL);
	1269
	1270	bool neg;
	1271	char *lendptr;
	1272	uint64_t res;
	1273	int ret = str_uint(nptr, &lendptr, base, &neg, &res);
	1274
	1275	if (endptr != NULL)
	1276	endptr = (char ) lendptr;
	1277
	1278	if (ret != EOK)
	1279	return ret;
	1280
	1281	/* Do not allow negative values */
	1282	if (neg)
	1283	return EINVAL;
	1284
	1285	/* Check whether we are at the end of
	1286	the string in strict mode */
	1287	if ((strict) && (*lendptr != 0))
	1288	return EINVAL;
	1289
	1290	/* Check for overflow */
	1291	size_t _res = (size_t) res;
	1292	if (_res != res)
	1293	return EOVERFLOW;
	1294
	1295	*result = _res;
	1296
	1297	return EOK;
	1298	}
	1299
[e535eeb]	1300	void order_suffix(const uint64_t val, uint64_t rv, char suffix)
	1301	{
[933cadf]	1302	if (val > UINT64_C(10000000000000000000)) {
	1303	*rv = val / UINT64_C(1000000000000000000);
[e535eeb]	1304	*suffix = 'Z';
[933cadf]	1305	} else if (val > UINT64_C(1000000000000000000)) {
	1306	*rv = val / UINT64_C(1000000000000000);
[e535eeb]	1307	*suffix = 'E';
[933cadf]	1308	} else if (val > UINT64_C(1000000000000000)) {
	1309	*rv = val / UINT64_C(1000000000000);
[e535eeb]	1310	*suffix = 'T';
[933cadf]	1311	} else if (val > UINT64_C(1000000000000)) {
	1312	*rv = val / UINT64_C(1000000000);
[e535eeb]	1313	*suffix = 'G';
[933cadf]	1314	} else if (val > UINT64_C(1000000000)) {
	1315	*rv = val / UINT64_C(1000000);
[e535eeb]	1316	*suffix = 'M';
[933cadf]	1317	} else if (val > UINT64_C(1000000)) {
	1318	*rv = val / UINT64_C(1000);
[e535eeb]	1319	*suffix = 'k';
	1320	} else {
	1321	*rv = val;
	1322	*suffix = ' ';
	1323	}
	1324	}
	1325
[933cadf]	1326	void bin_order_suffix(const uint64_t val, uint64_t rv, const char *suffix,
	1327	bool fixed)
	1328	{
	1329	if (val > UINT64_C(1152921504606846976)) {
	1330	*rv = val / UINT64_C(1125899906842624);
	1331	*suffix = "EiB";
	1332	} else if (val > UINT64_C(1125899906842624)) {
	1333	*rv = val / UINT64_C(1099511627776);
	1334	*suffix = "TiB";
	1335	} else if (val > UINT64_C(1099511627776)) {
	1336	*rv = val / UINT64_C(1073741824);
	1337	*suffix = "GiB";
	1338	} else if (val > UINT64_C(1073741824)) {
	1339	*rv = val / UINT64_C(1048576);
	1340	*suffix = "MiB";
	1341	} else if (val > UINT64_C(1048576)) {
	1342	*rv = val / UINT64_C(1024);
	1343	*suffix = "KiB";
	1344	} else {
	1345	*rv = val;
	1346	if (fixed)
	1347	*suffix = "B ";
	1348	else
	1349	*suffix = "B";
	1350	}
	1351	}
	1352
[a46da63]	1353	/** @}
[b2951e2]	1354	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: