Context Navigation

source: mainline/uspace/lib/c/generic/str.c@ b48d046

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since b48d046 was b48d046, checked in by Martin Decky <martin@…>, 14 years ago
cstyle (no change in functionality)
Property mode set to `100644`
File size: 30.9 KB

Line
1	/*
2	* Copyright (c) 2005 Martin Decky
3	* Copyright (c) 2008 Jiri Svoboda
4	* Copyright (c) 2011 Martin Sucha
5	* All rights reserved.
6	*
7	* Redistribution and use in source and binary forms, with or without
8	* modification, are permitted provided that the following conditions
9	* are met:
10	*
11	* - Redistributions of source code must retain the above copyright
12	* notice, this list of conditions and the following disclaimer.
13	* - Redistributions in binary form must reproduce the above copyright
14	* notice, this list of conditions and the following disclaimer in the
15	* documentation and/or other materials provided with the distribution.
16	* - The name of the author may not be used to endorse or promote products
17	* derived from this software without specific prior written permission.
18	*
19	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29	*/
30
31	/** @addtogroup libc
32	* @{
33	*/
34	/** @file
35	*/
36
37	#include <str.h>
38	#include <stdlib.h>
39	#include <assert.h>
40	#include <stdint.h>
41	#include <ctype.h>
42	#include <malloc.h>
43	#include <errno.h>
44	#include <align.h>
45	#include <mem.h>
46	#include <str.h>
47
48	/** Byte mask consisting of lowest @n bits (out of 8) */
49	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
50
51	/** Byte mask consisting of lowest @n bits (out of 32) */
52	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
53
54	/** Byte mask consisting of highest @n bits (out of 8) */
55	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
56
57	/** Number of data bits in a UTF-8 continuation byte */
58	#define CONT_BITS 6
59
60	/** Decode a single character from a string.
61	*
62	* Decode a single character from a string of size @a size. Decoding starts
63	* at @a offset and this offset is moved to the beginning of the next
64	* character. In case of decoding error, offset generally advances at least
65	* by one. However, offset is never moved beyond size.
66	*
67	* @param str String (not necessarily NULL-terminated).
68	* @param offset Byte offset in string where to start decoding.
69	* @param size Size of the string (in bytes).
70	*
71	* @return Value of decoded character, U_SPECIAL on decoding error or
72	* NULL if attempt to decode beyond @a size.
73	*
74	*/
75	wchar_t str_decode(const char str, size_t offset, size_t size)
76	{
77	if (*offset + 1 > size)
78	return 0;
79
80	/* First byte read from string */
81	uint8_t b0 = (uint8_t) str[(*offset)++];
82
83	/* Determine code length */
84
85	unsigned int b0_bits; /* Data bits in first byte */
86	unsigned int cbytes; /* Number of continuation bytes */
87
88	if ((b0 & 0x80) == 0) {
89	/* 0xxxxxxx (Plain ASCII) */
90	b0_bits = 7;
91	cbytes = 0;
92	} else if ((b0 & 0xe0) == 0xc0) {
93	/* 110xxxxx 10xxxxxx */
94	b0_bits = 5;
95	cbytes = 1;
96	} else if ((b0 & 0xf0) == 0xe0) {
97	/* 1110xxxx 10xxxxxx 10xxxxxx */
98	b0_bits = 4;
99	cbytes = 2;
100	} else if ((b0 & 0xf8) == 0xf0) {
101	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
102	b0_bits = 3;
103	cbytes = 3;
104	} else {
105	/* 10xxxxxx -- unexpected continuation byte */
106	return U_SPECIAL;
107	}
108
109	if (*offset + cbytes > size)
110	return U_SPECIAL;
111
112	wchar_t ch = b0 & LO_MASK_8(b0_bits);
113
114	/* Decode continuation bytes */
115	while (cbytes > 0) {
116	uint8_t b = (uint8_t) str[(*offset)++];
117
118	/* Must be 10xxxxxx */
119	if ((b & 0xc0) != 0x80)
120	return U_SPECIAL;
121
122	/* Shift data bits to ch */
123	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
124	cbytes--;
125	}
126
127	return ch;
128	}
129
130	/** Encode a single character to string representation.
131	*
132	* Encode a single character to string representation (i.e. UTF-8) and store
133	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
134	* is moved to the position where the next character can be written to.
135	*
136	* @param ch Input character.
137	* @param str Output buffer.
138	* @param offset Byte offset where to start writing.
139	* @param size Size of the output buffer (in bytes).
140	*
141	* @return EOK if the character was encoded successfully, EOVERFLOW if there
142	* was not enough space in the output buffer or EINVAL if the character
143	* code was invalid.
144	*/
145	int chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
146	{
147	if (*offset >= size)
148	return EOVERFLOW;
149
150	if (!chr_check(ch))
151	return EINVAL;
152
153	/* Unsigned version of ch (bit operations should only be done
154	on unsigned types). */
155	uint32_t cc = (uint32_t) ch;
156
157	/* Determine how many continuation bytes are needed */
158
159	unsigned int b0_bits; /* Data bits in first byte */
160	unsigned int cbytes; /* Number of continuation bytes */
161
162	if ((cc & ~LO_MASK_32(7)) == 0) {
163	b0_bits = 7;
164	cbytes = 0;
165	} else if ((cc & ~LO_MASK_32(11)) == 0) {
166	b0_bits = 5;
167	cbytes = 1;
168	} else if ((cc & ~LO_MASK_32(16)) == 0) {
169	b0_bits = 4;
170	cbytes = 2;
171	} else if ((cc & ~LO_MASK_32(21)) == 0) {
172	b0_bits = 3;
173	cbytes = 3;
174	} else {
175	/* Codes longer than 21 bits are not supported */
176	return EINVAL;
177	}
178
179	/* Check for available space in buffer */
180	if (*offset + cbytes >= size)
181	return EOVERFLOW;
182
183	/* Encode continuation bytes */
184	unsigned int i;
185	for (i = cbytes; i > 0; i--) {
186	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
187	cc = cc >> CONT_BITS;
188	}
189
190	/* Encode first byte */
191	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
192
193	/* Advance offset */
194	*offset += cbytes + 1;
195
196	return EOK;
197	}
198
199	/** Get size of string.
200	*
201	* Get the number of bytes which are used by the string @a str (excluding the
202	* NULL-terminator).
203	*
204	* @param str String to consider.
205	*
206	* @return Number of bytes used by the string
207	*
208	*/
209	size_t str_size(const char *str)
210	{
211	size_t size = 0;
212
213	while (*str++ != 0)
214	size++;
215
216	return size;
217	}
218
219	/** Get size of wide string.
220	*
221	* Get the number of bytes which are used by the wide string @a str (excluding the
222	* NULL-terminator).
223	*
224	* @param str Wide string to consider.
225	*
226	* @return Number of bytes used by the wide string
227	*
228	*/
229	size_t wstr_size(const wchar_t *str)
230	{
231	return (wstr_length(str) * sizeof(wchar_t));
232	}
233
234	/** Get size of string with length limit.
235	*
236	* Get the number of bytes which are used by up to @a max_len first
237	* characters in the string @a str. If @a max_len is greater than
238	* the length of @a str, the entire string is measured (excluding the
239	* NULL-terminator).
240	*
241	* @param str String to consider.
242	* @param max_len Maximum number of characters to measure.
243	*
244	* @return Number of bytes used by the characters.
245	*
246	*/
247	size_t str_lsize(const char *str, size_t max_len)
248	{
249	size_t len = 0;
250	size_t offset = 0;
251
252	while (len < max_len) {
253	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
254	break;
255
256	len++;
257	}
258
259	return offset;
260	}
261
262	/** Get size of wide string with length limit.
263	*
264	* Get the number of bytes which are used by up to @a max_len first
265	* wide characters in the wide string @a str. If @a max_len is greater than
266	* the length of @a str, the entire wide string is measured (excluding the
267	* NULL-terminator).
268	*
269	* @param str Wide string to consider.
270	* @param max_len Maximum number of wide characters to measure.
271	*
272	* @return Number of bytes used by the wide characters.
273	*
274	*/
275	size_t wstr_lsize(const wchar_t *str, size_t max_len)
276	{
277	return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
278	}
279
280	/** Get number of characters in a string.
281	*
282	* @param str NULL-terminated string.
283	*
284	* @return Number of characters in string.
285	*
286	*/
287	size_t str_length(const char *str)
288	{
289	size_t len = 0;
290	size_t offset = 0;
291
292	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
293	len++;
294
295	return len;
296	}
297
298	/** Get number of characters in a wide string.
299	*
300	* @param str NULL-terminated wide string.
301	*
302	* @return Number of characters in @a str.
303	*
304	*/
305	size_t wstr_length(const wchar_t *wstr)
306	{
307	size_t len = 0;
308
309	while (*wstr++ != 0)
310	len++;
311
312	return len;
313	}
314
315	/** Get number of characters in a string with size limit.
316	*
317	* @param str NULL-terminated string.
318	* @param size Maximum number of bytes to consider.
319	*
320	* @return Number of characters in string.
321	*
322	*/
323	size_t str_nlength(const char *str, size_t size)
324	{
325	size_t len = 0;
326	size_t offset = 0;
327
328	while (str_decode(str, &offset, size) != 0)
329	len++;
330
331	return len;
332	}
333
334	/** Get number of characters in a string with size limit.
335	*
336	* @param str NULL-terminated string.
337	* @param size Maximum number of bytes to consider.
338	*
339	* @return Number of characters in string.
340	*
341	*/
342	size_t wstr_nlength(const wchar_t *str, size_t size)
343	{
344	size_t len = 0;
345	size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
346	size_t offset = 0;
347
348	while ((offset < limit) && (*str++ != 0)) {
349	len++;
350	offset += sizeof(wchar_t);
351	}
352
353	return len;
354	}
355
356	/** Check whether character is plain ASCII.
357	*
358	* @return True if character is plain ASCII.
359	*
360	*/
361	bool ascii_check(wchar_t ch)
362	{
363	if ((ch >= 0) && (ch <= 127))
364	return true;
365
366	return false;
367	}
368
369	/** Check whether character is valid
370	*
371	* @return True if character is a valid Unicode code point.
372	*
373	*/
374	bool chr_check(wchar_t ch)
375	{
376	if ((ch >= 0) && (ch <= 1114111))
377	return true;
378
379	return false;
380	}
381
382	/** Compare two NULL terminated strings.
383	*
384	* Do a char-by-char comparison of two NULL-terminated strings.
385	* The strings are considered equal iff they consist of the same
386	* characters on the minimum of their lengths.
387	*
388	* @param s1 First string to compare.
389	* @param s2 Second string to compare.
390	*
391	* @return 0 if the strings are equal, -1 if first is smaller,
392	* 1 if second smaller.
393	*
394	*/
395	int str_cmp(const char s1, const char s2)
396	{
397	wchar_t c1 = 0;
398	wchar_t c2 = 0;
399
400	size_t off1 = 0;
401	size_t off2 = 0;
402
403	while (true) {
404	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
405	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
406
407	if (c1 < c2)
408	return -1;
409
410	if (c1 > c2)
411	return 1;
412
413	if (c1 == 0 \|\| c2 == 0)
414	break;
415	}
416
417	return 0;
418	}
419
420	/** Compare two NULL terminated strings with length limit.
421	*
422	* Do a char-by-char comparison of two NULL-terminated strings.
423	* The strings are considered equal iff they consist of the same
424	* characters on the minimum of their lengths and the length limit.
425	*
426	* @param s1 First string to compare.
427	* @param s2 Second string to compare.
428	* @param max_len Maximum number of characters to consider.
429	*
430	* @return 0 if the strings are equal, -1 if first is smaller,
431	* 1 if second smaller.
432	*
433	*/
434	int str_lcmp(const char s1, const char s2, size_t max_len)
435	{
436	wchar_t c1 = 0;
437	wchar_t c2 = 0;
438
439	size_t off1 = 0;
440	size_t off2 = 0;
441
442	size_t len = 0;
443
444	while (true) {
445	if (len >= max_len)
446	break;
447
448	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
449	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
450
451	if (c1 < c2)
452	return -1;
453
454	if (c1 > c2)
455	return 1;
456
457	if (c1 == 0 \|\| c2 == 0)
458	break;
459
460	++len;
461	}
462
463	return 0;
464
465	}
466
467	/** Copy string.
468	*
469	* Copy source string @a src to destination buffer @a dest.
470	* No more than @a size bytes are written. If the size of the output buffer
471	* is at least one byte, the output string will always be well-formed, i.e.
472	* null-terminated and containing only complete characters.
473	*
474	* @param dest Destination buffer.
475	* @param count Size of the destination buffer (must be > 0).
476	* @param src Source string.
477	*/
478	void str_cpy(char dest, size_t size, const char src)
479	{
480	/* There must be space for a null terminator in the buffer. */
481	assert(size > 0);
482
483	size_t src_off = 0;
484	size_t dest_off = 0;
485
486	wchar_t ch;
487	while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
488	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
489	break;
490	}
491
492	dest[dest_off] = '\0';
493	}
494
495	/** Copy size-limited substring.
496	*
497	* Copy prefix of string @a src of max. size @a size to destination buffer
498	* @a dest. No more than @a size bytes are written. The output string will
499	* always be well-formed, i.e. null-terminated and containing only complete
500	* characters.
501	*
502	* No more than @a n bytes are read from the input string, so it does not
503	* have to be null-terminated.
504	*
505	* @param dest Destination buffer.
506	* @param count Size of the destination buffer (must be > 0).
507	* @param src Source string.
508	* @param n Maximum number of bytes to read from @a src.
509	*/
510	void str_ncpy(char dest, size_t size, const char src, size_t n)
511	{
512	/* There must be space for a null terminator in the buffer. */
513	assert(size > 0);
514
515	size_t src_off = 0;
516	size_t dest_off = 0;
517
518	wchar_t ch;
519	while ((ch = str_decode(src, &src_off, n)) != 0) {
520	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
521	break;
522	}
523
524	dest[dest_off] = '\0';
525	}
526
527	/** Append one string to another.
528	*
529	* Append source string @a src to string in destination buffer @a dest.
530	* Size of the destination buffer is @a dest. If the size of the output buffer
531	* is at least one byte, the output string will always be well-formed, i.e.
532	* null-terminated and containing only complete characters.
533	*
534	* @param dest Destination buffer.
535	* @param count Size of the destination buffer.
536	* @param src Source string.
537	*/
538	void str_append(char dest, size_t size, const char src)
539	{
540	size_t dstr_size;
541
542	dstr_size = str_size(dest);
543	if (dstr_size >= size)
544	return;
545
546	str_cpy(dest + dstr_size, size - dstr_size, src);
547	}
548
549	/** Convert space-padded ASCII to string.
550	*
551	* Common legacy text encoding in hardware is 7-bit ASCII fitted into
552	* a fixed-with byte buffer (bit 7 always zero), right-padded with spaces
553	* (ASCII 0x20). Convert space-padded ascii to string representation.
554	*
555	* If the text does not fit into the destination buffer, the function converts
556	* as many characters as possible and returns EOVERFLOW.
557	*
558	* If the text contains non-ASCII bytes (with bit 7 set), the whole string is
559	* converted anyway and invalid characters are replaced with question marks
560	* (U_SPECIAL) and the function returns EIO.
561	*
562	* Regardless of return value upon return @a dest will always be well-formed.
563	*
564	* @param dest Destination buffer
565	* @param size Size of destination buffer
566	* @param src Space-padded ASCII.
567	* @param n Size of the source buffer in bytes.
568	*
569	* @return EOK on success, EOVERFLOW if the text does not fit
570	* destination buffer, EIO if the text contains
571	* non-ASCII bytes.
572	*/
573	int spascii_to_str(char dest, size_t size, const uint8_t src, size_t n)
574	{
575	size_t sidx;
576	size_t didx;
577	size_t dlast;
578	uint8_t byte;
579	int rc;
580	int result;
581
582	/* There must be space for a null terminator in the buffer. */
583	assert(size > 0);
584	result = EOK;
585
586	didx = 0;
587	dlast = 0;
588	for (sidx = 0; sidx < n; ++sidx) {
589	byte = src[sidx];
590	if (!ascii_check(byte)) {
591	byte = U_SPECIAL;
592	result = EIO;
593	}
594
595	rc = chr_encode(byte, dest, &didx, size - 1);
596	if (rc != EOK) {
597	assert(rc == EOVERFLOW);
598	dest[didx] = '\0';
599	return rc;
600	}
601
602	/* Remember dest index after last non-empty character */
603	if (byte != 0x20)
604	dlast = didx;
605	}
606
607	/* Terminate string after last non-empty character */
608	dest[dlast] = '\0';
609	return result;
610	}
611
612	/** Convert wide string to string.
613	*
614	* Convert wide string @a src to string. The output is written to the buffer
615	* specified by @a dest and @a size. @a size must be non-zero and the string
616	* written will always be well-formed.
617	*
618	* @param dest Destination buffer.
619	* @param size Size of the destination buffer.
620	* @param src Source wide string.
621	*/
622	void wstr_to_str(char dest, size_t size, const wchar_t src)
623	{
624	wchar_t ch;
625	size_t src_idx;
626	size_t dest_off;
627
628	/* There must be space for a null terminator in the buffer. */
629	assert(size > 0);
630
631	src_idx = 0;
632	dest_off = 0;
633
634	while ((ch = src[src_idx++]) != 0) {
635	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
636	break;
637	}
638
639	dest[dest_off] = '\0';
640	}
641
642	/** Convert wide string to new string.
643	*
644	* Convert wide string @a src to string. Space for the new string is allocated
645	* on the heap.
646	*
647	* @param src Source wide string.
648	* @return New string.
649	*/
650	char wstr_to_astr(const wchar_t src)
651	{
652	char dbuf[STR_BOUNDS(1)];
653	char *str;
654	wchar_t ch;
655
656	size_t src_idx;
657	size_t dest_off;
658	size_t dest_size;
659
660	/* Compute size of encoded string. */
661
662	src_idx = 0;
663	dest_size = 0;
664
665	while ((ch = src[src_idx++]) != 0) {
666	dest_off = 0;
667	if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
668	break;
669	dest_size += dest_off;
670	}
671
672	str = malloc(dest_size + 1);
673	if (str == NULL)
674	return NULL;
675
676	/* Encode string. */
677
678	src_idx = 0;
679	dest_off = 0;
680
681	while ((ch = src[src_idx++]) != 0) {
682	if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
683	break;
684	}
685
686	str[dest_size] = '\0';
687	return str;
688	}
689
690
691	/** Convert string to wide string.
692	*
693	* Convert string @a src to wide string. The output is written to the
694	* buffer specified by @a dest and @a dlen. @a dlen must be non-zero
695	* and the wide string written will always be null-terminated.
696	*
697	* @param dest Destination buffer.
698	* @param dlen Length of destination buffer (number of wchars).
699	* @param src Source string.
700	*/
701	void str_to_wstr(wchar_t dest, size_t dlen, const char src)
702	{
703	size_t offset;
704	size_t di;
705	wchar_t c;
706
707	assert(dlen > 0);
708
709	offset = 0;
710	di = 0;
711
712	do {
713	if (di >= dlen - 1)
714	break;
715
716	c = str_decode(src, &offset, STR_NO_LIMIT);
717	dest[di++] = c;
718	} while (c != '\0');
719
720	dest[dlen - 1] = '\0';
721	}
722
723	/** Convert string to wide string.
724	*
725	* Convert string @a src to wide string. A new wide NULL-terminated
726	* string will be allocated on the heap.
727	*
728	* @param src Source string.
729	*/
730	wchar_t str_to_awstr(const char str)
731	{
732	size_t len = str_length(str);
733
734	wchar_t *wstr = calloc(len+1, sizeof(wchar_t));
735	if (wstr == NULL)
736	return NULL;
737
738	str_to_wstr(wstr, len + 1, str);
739	return wstr;
740	}
741
742	/** Find first occurence of character in string.
743	*
744	* @param str String to search.
745	* @param ch Character to look for.
746	*
747	* @return Pointer to character in @a str or NULL if not found.
748	*/
749	char str_chr(const char str, wchar_t ch)
750	{
751	wchar_t acc;
752	size_t off = 0;
753	size_t last = 0;
754
755	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
756	if (acc == ch)
757	return (char *) (str + last);
758	last = off;
759	}
760
761	return NULL;
762	}
763
764	/** Find last occurence of character in string.
765	*
766	* @param str String to search.
767	* @param ch Character to look for.
768	*
769	* @return Pointer to character in @a str or NULL if not found.
770	*/
771	char str_rchr(const char str, wchar_t ch)
772	{
773	wchar_t acc;
774	size_t off = 0;
775	size_t last = 0;
776	const char *res = NULL;
777
778	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
779	if (acc == ch)
780	res = (str + last);
781	last = off;
782	}
783
784	return (char *) res;
785	}
786
787	/** Insert a wide character into a wide string.
788	*
789	* Insert a wide character into a wide string at position
790	* @a pos. The characters after the position are shifted.
791	*
792	* @param str String to insert to.
793	* @param ch Character to insert to.
794	* @param pos Character index where to insert.
795	@ @param max_pos Characters in the buffer.
796	*
797	* @return True if the insertion was sucessful, false if the position
798	* is out of bounds.
799	*
800	*/
801	bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
802	{
803	size_t len = wstr_length(str);
804
805	if ((pos > len) \|\| (pos + 1 > max_pos))
806	return false;
807
808	size_t i;
809	for (i = len; i + 1 > pos; i--)
810	str[i + 1] = str[i];
811
812	str[pos] = ch;
813
814	return true;
815	}
816
817	/** Remove a wide character from a wide string.
818	*
819	* Remove a wide character from a wide string at position
820	* @a pos. The characters after the position are shifted.
821	*
822	* @param str String to remove from.
823	* @param pos Character index to remove.
824	*
825	* @return True if the removal was sucessful, false if the position
826	* is out of bounds.
827	*
828	*/
829	bool wstr_remove(wchar_t *str, size_t pos)
830	{
831	size_t len = wstr_length(str);
832
833	if (pos >= len)
834	return false;
835
836	size_t i;
837	for (i = pos + 1; i <= len; i++)
838	str[i - 1] = str[i];
839
840	return true;
841	}
842
843	int stricmp(const char a, const char b)
844	{
845	int c = 0;
846
847	while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
848	c++;
849
850	return (tolower(a[c]) - tolower(b[c]));
851	}
852
853	/** Convert string to a number.
854	* Core of strtol and strtoul functions.
855	*
856	* @param nptr Pointer to string.
857	* @param endptr If not NULL, function stores here pointer to the first
858	* invalid character.
859	* @param base Zero or number between 2 and 36 inclusive.
860	* @param sgn It's set to 1 if minus found.
861	* @return Result of conversion.
862	*/
863	static unsigned long
864	_strtoul(const char nptr, char endptr, int base, char sgn)
865	{
866	unsigned char c;
867	unsigned long result = 0;
868	unsigned long a, b;
869	const char *str = nptr;
870	const char *tmpptr;
871
872	while (isspace(*str))
873	str++;
874
875	if (*str == '-') {
876	*sgn = 1;
877	++str;
878	} else if (*str == '+')
879	++str;
880
881	if (base) {
882	if ((base == 1) \|\| (base > 36)) {
883	/* FIXME: set errno to EINVAL */
884	return 0;
885	}
886	if ((base == 16) && (*str == '0') && ((str[1] == 'x') \|\|
887	(str[1] == 'X'))) {
888	str += 2;
889	}
890	} else {
891	base = 10;
892
893	if (*str == '0') {
894	base = 8;
895	if ((str[1] == 'X') \|\| (str[1] == 'x')) {
896	base = 16;
897	str += 2;
898	}
899	}
900	}
901
902	tmpptr = str;
903
904	while (*str) {
905	c = *str;
906	c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
907	(c <= '9' ? c - '0' : 0xff)));
908	if (c > base) {
909	break;
910	}
911
912	a = (result & 0xff) * base + c;
913	b = (result >> 8) * base + (a >> 8);
914
915	if (b > (ULONG_MAX >> 8)) {
916	/* overflow */
917	/* FIXME: errno = ERANGE*/
918	return ULONG_MAX;
919	}
920
921	result = (b << 8) + (a & 0xff);
922	++str;
923	}
924
925	if (str == tmpptr) {
926	/*
927	* No number was found => first invalid character is the first
928	* character of the string.
929	*/
930	/* FIXME: set errno to EINVAL */
931	str = nptr;
932	result = 0;
933	}
934
935	if (endptr)
936	endptr = (char ) str;
937
938	if (nptr == str) {
939	/FIXME: errno = EINVAL/
940	return 0;
941	}
942
943	return result;
944	}
945
946	/** Convert initial part of string to long int according to given base.
947	* The number may begin with an arbitrary number of whitespaces followed by
948	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
949	* inserted and the number will be taken as hexadecimal one. If the base is 0
950	* and the number begin with a zero, number will be taken as octal one (as with
951	* base 8). Otherwise the base 0 is taken as decimal.
952	*
953	* @param nptr Pointer to string.
954	* @param endptr If not NULL, function stores here pointer to the first
955	* invalid character.
956	* @param base Zero or number between 2 and 36 inclusive.
957	* @return Result of conversion.
958	*/
959	long int strtol(const char nptr, char *endptr, int base)
960	{
961	char sgn = 0;
962	unsigned long number = 0;
963
964	number = _strtoul(nptr, endptr, base, &sgn);
965
966	if (number > LONG_MAX) {
967	if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
968	/* FIXME: set 0 to errno */
969	return number;
970	}
971	/* FIXME: set ERANGE to errno */
972	return (sgn ? LONG_MIN : LONG_MAX);
973	}
974
975	return (sgn ? -number : number);
976	}
977
978	/** Duplicate string.
979	*
980	* Allocate a new string and copy characters from the source
981	* string into it. The duplicate string is allocated via sleeping
982	* malloc(), thus this function can sleep in no memory conditions.
983	*
984	* The allocation cannot fail and the return value is always
985	* a valid pointer. The duplicate string is always a well-formed
986	* null-terminated UTF-8 string, but it can differ from the source
987	* string on the byte level.
988	*
989	* @param src Source string.
990	*
991	* @return Duplicate string.
992	*
993	*/
994	char str_dup(const char src)
995	{
996	size_t size = str_size(src) + 1;
997	char dest = (char ) malloc(size);
998	if (dest == NULL)
999	return (char *) NULL;
1000
1001	str_cpy(dest, size, src);
1002	return dest;
1003	}
1004
1005	/** Duplicate string with size limit.
1006	*
1007	* Allocate a new string and copy up to @max_size bytes from the source
1008	* string into it. The duplicate string is allocated via sleeping
1009	* malloc(), thus this function can sleep in no memory conditions.
1010	* No more than @max_size + 1 bytes is allocated, but if the size
1011	* occupied by the source string is smaller than @max_size + 1,
1012	* less is allocated.
1013	*
1014	* The allocation cannot fail and the return value is always
1015	* a valid pointer. The duplicate string is always a well-formed
1016	* null-terminated UTF-8 string, but it can differ from the source
1017	* string on the byte level.
1018	*
1019	* @param src Source string.
1020	* @param n Maximum number of bytes to duplicate.
1021	*
1022	* @return Duplicate string.
1023	*
1024	*/
1025	char str_ndup(const char src, size_t n)
1026	{
1027	size_t size = str_size(src);
1028	if (size > n)
1029	size = n;
1030
1031	char dest = (char ) malloc(size + 1);
1032	if (dest == NULL)
1033	return (char *) NULL;
1034
1035	str_ncpy(dest, size + 1, src, size);
1036	return dest;
1037	}
1038
1039
1040	/** Convert initial part of string to unsigned long according to given base.
1041	* The number may begin with an arbitrary number of whitespaces followed by
1042	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
1043	* inserted and the number will be taken as hexadecimal one. If the base is 0
1044	* and the number begin with a zero, number will be taken as octal one (as with
1045	* base 8). Otherwise the base 0 is taken as decimal.
1046	*
1047	* @param nptr Pointer to string.
1048	* @param endptr If not NULL, function stores here pointer to the first
1049	* invalid character
1050	* @param base Zero or number between 2 and 36 inclusive.
1051	* @return Result of conversion.
1052	*/
1053	unsigned long strtoul(const char nptr, char *endptr, int base)
1054	{
1055	char sgn = 0;
1056	unsigned long number = 0;
1057
1058	number = _strtoul(nptr, endptr, base, &sgn);
1059
1060	return (sgn ? -number : number);
1061	}
1062
1063	char strtok(char s, const char *delim)
1064	{
1065	static char *next;
1066
1067	return strtok_r(s, delim, &next);
1068	}
1069
1070	char strtok_r(char s, const char delim, char *next)
1071	{
1072	char start, end;
1073
1074	if (s == NULL)
1075	s = *next;
1076
1077	/* Skip over leading delimiters. */
1078	while (s && (str_chr(delim, s) != NULL)) ++s;
1079	start = s;
1080
1081	/* Skip over token characters. */
1082	while (s && (str_chr(delim, s) == NULL)) ++s;
1083	end = s;
1084	next = (s ? s + 1 : s);
1085
1086	if (start == end) {
1087	return NULL; /* No more tokens. */
1088	}
1089
1090	/* Overwrite delimiter with NULL terminator. */
1091	*end = '\0';
1092	return start;
1093	}
1094
1095	/** Convert string to uint64_t (internal variant).
1096	*
1097	* @param nptr Pointer to string.
1098	* @param endptr Pointer to the first invalid character is stored here.
1099	* @param base Zero or number between 2 and 36 inclusive.
1100	* @param neg Indication of unary minus is stored here.
1101	* @apram result Result of the conversion.
1102	*
1103	* @return EOK if conversion was successful.
1104	*
1105	*/
1106	static int str_uint(const char nptr, char *endptr, unsigned int base,
1107	bool neg, uint64_t result)
1108	{
1109	assert(endptr != NULL);
1110	assert(neg != NULL);
1111	assert(result != NULL);
1112
1113	*neg = false;
1114	const char *str = nptr;
1115
1116	/* Ignore leading whitespace */
1117	while (isspace(*str))
1118	str++;
1119
1120	if (*str == '-') {
1121	*neg = true;
1122	str++;
1123	} else if (*str == '+')
1124	str++;
1125
1126	if (base == 0) {
1127	/* Decode base if not specified */
1128	base = 10;
1129
1130	if (*str == '0') {
1131	base = 8;
1132	str++;
1133
1134	switch (*str) {
1135	case 'b':
1136	case 'B':
1137	base = 2;
1138	str++;
1139	break;
1140	case 'o':
1141	case 'O':
1142	base = 8;
1143	str++;
1144	break;
1145	case 'd':
1146	case 'D':
1147	case 't':
1148	case 'T':
1149	base = 10;
1150	str++;
1151	break;
1152	case 'x':
1153	case 'X':
1154	base = 16;
1155	str++;
1156	break;
1157	default:
1158	str--;
1159	}
1160	}
1161	} else {
1162	/* Check base range */
1163	if ((base < 2) \|\| (base > 36)) {
1164	endptr = (char ) str;
1165	return EINVAL;
1166	}
1167	}
1168
1169	*result = 0;
1170	const char *startstr = str;
1171
1172	while (*str != 0) {
1173	unsigned int digit;
1174
1175	if ((str >= 'a') && (str <= 'z'))
1176	digit = *str - 'a' + 10;
1177	else if ((str >= 'A') && (str <= 'Z'))
1178	digit = *str - 'A' + 10;
1179	else if ((str >= '0') && (str <= '9'))
1180	digit = *str - '0';
1181	else
1182	break;
1183
1184	if (digit >= base)
1185	break;
1186
1187	uint64_t prev = *result;
1188	result = (result) * base + digit;
1189
1190	if (*result < prev) {
1191	/* Overflow */
1192	endptr = (char ) str;
1193	return EOVERFLOW;
1194	}
1195
1196	str++;
1197	}
1198
1199	if (str == startstr) {
1200	/*
1201	* No digits were decoded => first invalid character is
1202	* the first character of the string.
1203	*/
1204	str = nptr;
1205	}
1206
1207	endptr = (char ) str;
1208
1209	if (str == nptr)
1210	return EINVAL;
1211
1212	return EOK;
1213	}
1214
1215	/** Convert string to uint64_t.
1216	*
1217	* @param nptr Pointer to string.
1218	* @param endptr If not NULL, pointer to the first invalid character
1219	* is stored here.
1220	* @param base Zero or number between 2 and 36 inclusive.
1221	* @param strict Do not allow any trailing characters.
1222	* @param result Result of the conversion.
1223	*
1224	* @return EOK if conversion was successful.
1225	*
1226	*/
1227	int str_uint64(const char nptr, char *endptr, unsigned int base,
1228	bool strict, uint64_t *result)
1229	{
1230	assert(result != NULL);
1231
1232	bool neg;
1233	char *lendptr;
1234	int ret = str_uint(nptr, &lendptr, base, &neg, result);
1235
1236	if (endptr != NULL)
1237	endptr = (char ) lendptr;
1238
1239	if (ret != EOK)
1240	return ret;
1241
1242	/* Do not allow negative values */
1243	if (neg)
1244	return EINVAL;
1245
1246	/* Check whether we are at the end of
1247	the string in strict mode */
1248	if ((strict) && (*lendptr != 0))
1249	return EINVAL;
1250
1251	return EOK;
1252	}
1253
1254	/** Convert string to size_t.
1255	*
1256	* @param nptr Pointer to string.
1257	* @param endptr If not NULL, pointer to the first invalid character
1258	* is stored here.
1259	* @param base Zero or number between 2 and 36 inclusive.
1260	* @param strict Do not allow any trailing characters.
1261	* @param result Result of the conversion.
1262	*
1263	* @return EOK if conversion was successful.
1264	*
1265	*/
1266	int str_size_t(const char nptr, char *endptr, unsigned int base,
1267	bool strict, size_t *result)
1268	{
1269	assert(result != NULL);
1270
1271	bool neg;
1272	char *lendptr;
1273	uint64_t res;
1274	int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1275
1276	if (endptr != NULL)
1277	endptr = (char ) lendptr;
1278
1279	if (ret != EOK)
1280	return ret;
1281
1282	/* Do not allow negative values */
1283	if (neg)
1284	return EINVAL;
1285
1286	/* Check whether we are at the end of
1287	the string in strict mode */
1288	if ((strict) && (*lendptr != 0))
1289	return EINVAL;
1290
1291	/* Check for overflow */
1292	size_t _res = (size_t) res;
1293	if (_res != res)
1294	return EOVERFLOW;
1295
1296	*result = _res;
1297
1298	return EOK;
1299	}
1300
1301	void order_suffix(const uint64_t val, uint64_t rv, char suffix)
1302	{
1303	if (val > UINT64_C(10000000000000000000)) {
1304	*rv = val / UINT64_C(1000000000000000000);
1305	*suffix = 'Z';
1306	} else if (val > UINT64_C(1000000000000000000)) {
1307	*rv = val / UINT64_C(1000000000000000);
1308	*suffix = 'E';
1309	} else if (val > UINT64_C(1000000000000000)) {
1310	*rv = val / UINT64_C(1000000000000);
1311	*suffix = 'T';
1312	} else if (val > UINT64_C(1000000000000)) {
1313	*rv = val / UINT64_C(1000000000);
1314	*suffix = 'G';
1315	} else if (val > UINT64_C(1000000000)) {
1316	*rv = val / UINT64_C(1000000);
1317	*suffix = 'M';
1318	} else if (val > UINT64_C(1000000)) {
1319	*rv = val / UINT64_C(1000);
1320	*suffix = 'k';
1321	} else {
1322	*rv = val;
1323	*suffix = ' ';
1324	}
1325	}
1326
1327	void bin_order_suffix(const uint64_t val, uint64_t rv, const char *suffix,
1328	bool fixed)
1329	{
1330	if (val > UINT64_C(1152921504606846976)) {
1331	*rv = val / UINT64_C(1125899906842624);
1332	*suffix = "EiB";
1333	} else if (val > UINT64_C(1125899906842624)) {
1334	*rv = val / UINT64_C(1099511627776);
1335	*suffix = "TiB";
1336	} else if (val > UINT64_C(1099511627776)) {
1337	*rv = val / UINT64_C(1073741824);
1338	*suffix = "GiB";
1339	} else if (val > UINT64_C(1073741824)) {
1340	*rv = val / UINT64_C(1048576);
1341	*suffix = "MiB";
1342	} else if (val > UINT64_C(1048576)) {
1343	*rv = val / UINT64_C(1024);
1344	*suffix = "KiB";
1345	} else {
1346	*rv = val;
1347	if (fixed)
1348	*suffix = "B ";
1349	else
1350	*suffix = "B";
1351	}
1352	}
1353
1354	/** @}
1355	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: