Context Navigation

source: mainline/uspace/lib/c/generic/str.c@ fcc3cd8

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since fcc3cd8 was 82374b2, checked in by Oleg Romanenko <romanenko.oleg@…>, 14 years ago
Add utf16_to_str function doing conversion between UTF16 string and UTF8 with surrogate pairs support
Property mode set to `100644`
File size: 31.5 KB

Line
1	/*
2	* Copyright (c) 2005 Martin Decky
3	* Copyright (c) 2008 Jiri Svoboda
4	* Copyright (c) 2011 Oleg Romanenko
5	* All rights reserved.
6	*
7	* Redistribution and use in source and binary forms, with or without
8	* modification, are permitted provided that the following conditions
9	* are met:
10	*
11	* - Redistributions of source code must retain the above copyright
12	* notice, this list of conditions and the following disclaimer.
13	* - Redistributions in binary form must reproduce the above copyright
14	* notice, this list of conditions and the following disclaimer in the
15	* documentation and/or other materials provided with the distribution.
16	* - The name of the author may not be used to endorse or promote products
17	* derived from this software without specific prior written permission.
18	*
19	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29	*/
30
31	/** @addtogroup libc
32	* @{
33	*/
34	/** @file
35	*/
36
37	#include <str.h>
38	#include <stdlib.h>
39	#include <assert.h>
40	#include <stdint.h>
41	#include <ctype.h>
42	#include <malloc.h>
43	#include <errno.h>
44	#include <align.h>
45	#include <mem.h>
46	#include <str.h>
47
48	/** Byte mask consisting of lowest @n bits (out of 8) */
49	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
50
51	/** Byte mask consisting of lowest @n bits (out of 32) */
52	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
53
54	/** Byte mask consisting of highest @n bits (out of 8) */
55	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
56
57	/** Number of data bits in a UTF-8 continuation byte */
58	#define CONT_BITS 6
59
60	/** Decode a single character from a string.
61	*
62	* Decode a single character from a string of size @a size. Decoding starts
63	* at @a offset and this offset is moved to the beginning of the next
64	* character. In case of decoding error, offset generally advances at least
65	* by one. However, offset is never moved beyond size.
66	*
67	* @param str String (not necessarily NULL-terminated).
68	* @param offset Byte offset in string where to start decoding.
69	* @param size Size of the string (in bytes).
70	*
71	* @return Value of decoded character, U_SPECIAL on decoding error or
72	* NULL if attempt to decode beyond @a size.
73	*
74	*/
75	wchar_t str_decode(const char str, size_t offset, size_t size)
76	{
77	if (*offset + 1 > size)
78	return 0;
79
80	/* First byte read from string */
81	uint8_t b0 = (uint8_t) str[(*offset)++];
82
83	/* Determine code length */
84
85	unsigned int b0_bits; /* Data bits in first byte */
86	unsigned int cbytes; /* Number of continuation bytes */
87
88	if ((b0 & 0x80) == 0) {
89	/* 0xxxxxxx (Plain ASCII) */
90	b0_bits = 7;
91	cbytes = 0;
92	} else if ((b0 & 0xe0) == 0xc0) {
93	/* 110xxxxx 10xxxxxx */
94	b0_bits = 5;
95	cbytes = 1;
96	} else if ((b0 & 0xf0) == 0xe0) {
97	/* 1110xxxx 10xxxxxx 10xxxxxx */
98	b0_bits = 4;
99	cbytes = 2;
100	} else if ((b0 & 0xf8) == 0xf0) {
101	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
102	b0_bits = 3;
103	cbytes = 3;
104	} else {
105	/* 10xxxxxx -- unexpected continuation byte */
106	return U_SPECIAL;
107	}
108
109	if (*offset + cbytes > size)
110	return U_SPECIAL;
111
112	wchar_t ch = b0 & LO_MASK_8(b0_bits);
113
114	/* Decode continuation bytes */
115	while (cbytes > 0) {
116	uint8_t b = (uint8_t) str[(*offset)++];
117
118	/* Must be 10xxxxxx */
119	if ((b & 0xc0) != 0x80)
120	return U_SPECIAL;
121
122	/* Shift data bits to ch */
123	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
124	cbytes--;
125	}
126
127	return ch;
128	}
129
130	/** Encode a single character to string representation.
131	*
132	* Encode a single character to string representation (i.e. UTF-8) and store
133	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
134	* is moved to the position where the next character can be written to.
135	*
136	* @param ch Input character.
137	* @param str Output buffer.
138	* @param offset Byte offset where to start writing.
139	* @param size Size of the output buffer (in bytes).
140	*
141	* @return EOK if the character was encoded successfully, EOVERFLOW if there
142	* was not enough space in the output buffer or EINVAL if the character
143	* code was invalid.
144	*/
145	int chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
146	{
147	if (*offset >= size)
148	return EOVERFLOW;
149
150	if (!chr_check(ch))
151	return EINVAL;
152
153	/* Unsigned version of ch (bit operations should only be done
154	on unsigned types). */
155	uint32_t cc = (uint32_t) ch;
156
157	/* Determine how many continuation bytes are needed */
158
159	unsigned int b0_bits; /* Data bits in first byte */
160	unsigned int cbytes; /* Number of continuation bytes */
161
162	if ((cc & ~LO_MASK_32(7)) == 0) {
163	b0_bits = 7;
164	cbytes = 0;
165	} else if ((cc & ~LO_MASK_32(11)) == 0) {
166	b0_bits = 5;
167	cbytes = 1;
168	} else if ((cc & ~LO_MASK_32(16)) == 0) {
169	b0_bits = 4;
170	cbytes = 2;
171	} else if ((cc & ~LO_MASK_32(21)) == 0) {
172	b0_bits = 3;
173	cbytes = 3;
174	} else {
175	/* Codes longer than 21 bits are not supported */
176	return EINVAL;
177	}
178
179	/* Check for available space in buffer */
180	if (*offset + cbytes >= size)
181	return EOVERFLOW;
182
183	/* Encode continuation bytes */
184	unsigned int i;
185	for (i = cbytes; i > 0; i--) {
186	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
187	cc = cc >> CONT_BITS;
188	}
189
190	/* Encode first byte */
191	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
192
193	/* Advance offset */
194	*offset += cbytes + 1;
195
196	return EOK;
197	}
198
199	/** Get size of string.
200	*
201	* Get the number of bytes which are used by the string @a str (excluding the
202	* NULL-terminator).
203	*
204	* @param str String to consider.
205	*
206	* @return Number of bytes used by the string
207	*
208	*/
209	size_t str_size(const char *str)
210	{
211	size_t size = 0;
212
213	while (*str++ != 0)
214	size++;
215
216	return size;
217	}
218
219	/** Get size of wide string.
220	*
221	* Get the number of bytes which are used by the wide string @a str (excluding the
222	* NULL-terminator).
223	*
224	* @param str Wide string to consider.
225	*
226	* @return Number of bytes used by the wide string
227	*
228	*/
229	size_t wstr_size(const wchar_t *str)
230	{
231	return (wstr_length(str) * sizeof(wchar_t));
232	}
233
234	/** Get size of string with length limit.
235	*
236	* Get the number of bytes which are used by up to @a max_len first
237	* characters in the string @a str. If @a max_len is greater than
238	* the length of @a str, the entire string is measured (excluding the
239	* NULL-terminator).
240	*
241	* @param str String to consider.
242	* @param max_len Maximum number of characters to measure.
243	*
244	* @return Number of bytes used by the characters.
245	*
246	*/
247	size_t str_lsize(const char *str, size_t max_len)
248	{
249	size_t len = 0;
250	size_t offset = 0;
251
252	while (len < max_len) {
253	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
254	break;
255
256	len++;
257	}
258
259	return offset;
260	}
261
262	/** Get size of wide string with length limit.
263	*
264	* Get the number of bytes which are used by up to @a max_len first
265	* wide characters in the wide string @a str. If @a max_len is greater than
266	* the length of @a str, the entire wide string is measured (excluding the
267	* NULL-terminator).
268	*
269	* @param str Wide string to consider.
270	* @param max_len Maximum number of wide characters to measure.
271	*
272	* @return Number of bytes used by the wide characters.
273	*
274	*/
275	size_t wstr_lsize(const wchar_t *str, size_t max_len)
276	{
277	return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
278	}
279
280	/** Get number of characters in a string.
281	*
282	* @param str NULL-terminated string.
283	*
284	* @return Number of characters in string.
285	*
286	*/
287	size_t str_length(const char *str)
288	{
289	size_t len = 0;
290	size_t offset = 0;
291
292	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
293	len++;
294
295	return len;
296	}
297
298	/** Get number of characters in a wide string.
299	*
300	* @param str NULL-terminated wide string.
301	*
302	* @return Number of characters in @a str.
303	*
304	*/
305	size_t wstr_length(const wchar_t *wstr)
306	{
307	size_t len = 0;
308
309	while (*wstr++ != 0)
310	len++;
311
312	return len;
313	}
314
315	/** Get number of characters in a string with size limit.
316	*
317	* @param str NULL-terminated string.
318	* @param size Maximum number of bytes to consider.
319	*
320	* @return Number of characters in string.
321	*
322	*/
323	size_t str_nlength(const char *str, size_t size)
324	{
325	size_t len = 0;
326	size_t offset = 0;
327
328	while (str_decode(str, &offset, size) != 0)
329	len++;
330
331	return len;
332	}
333
334	/** Get number of characters in a string with size limit.
335	*
336	* @param str NULL-terminated string.
337	* @param size Maximum number of bytes to consider.
338	*
339	* @return Number of characters in string.
340	*
341	*/
342	size_t wstr_nlength(const wchar_t *str, size_t size)
343	{
344	size_t len = 0;
345	size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
346	size_t offset = 0;
347
348	while ((offset < limit) && (*str++ != 0)) {
349	len++;
350	offset += sizeof(wchar_t);
351	}
352
353	return len;
354	}
355
356	/** Check whether character is plain ASCII.
357	*
358	* @return True if character is plain ASCII.
359	*
360	*/
361	bool ascii_check(wchar_t ch)
362	{
363	if ((ch >= 0) && (ch <= 127))
364	return true;
365
366	return false;
367	}
368
369	/** Check whether wide string is plain ASCII.
370	*
371	* @return True if wide string is plain ASCII.
372	*
373	*/
374	bool wstr_is_ascii(const wchar_t *wstr)
375	{
376	while (wstr && ascii_check(wstr))
377	wstr++;
378	return *wstr == 0;
379	}
380
381	/** Check whether character is valid
382	*
383	* @return True if character is a valid Unicode code point.
384	*
385	*/
386	bool chr_check(wchar_t ch)
387	{
388	if ((ch >= 0) && (ch <= 1114111))
389	return true;
390
391	return false;
392	}
393
394	/** Compare two NULL terminated strings.
395	*
396	* Do a char-by-char comparison of two NULL-terminated strings.
397	* The strings are considered equal iff they consist of the same
398	* characters on the minimum of their lengths.
399	*
400	* @param s1 First string to compare.
401	* @param s2 Second string to compare.
402	*
403	* @return 0 if the strings are equal, -1 if first is smaller,
404	* 1 if second smaller.
405	*
406	*/
407	int str_cmp(const char s1, const char s2)
408	{
409	wchar_t c1 = 0;
410	wchar_t c2 = 0;
411
412	size_t off1 = 0;
413	size_t off2 = 0;
414
415	while (true) {
416	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
417	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
418
419	if (c1 < c2)
420	return -1;
421
422	if (c1 > c2)
423	return 1;
424
425	if (c1 == 0 \|\| c2 == 0)
426	break;
427	}
428
429	return 0;
430	}
431
432	/** Compare two NULL terminated strings with length limit.
433	*
434	* Do a char-by-char comparison of two NULL-terminated strings.
435	* The strings are considered equal iff they consist of the same
436	* characters on the minimum of their lengths and the length limit.
437	*
438	* @param s1 First string to compare.
439	* @param s2 Second string to compare.
440	* @param max_len Maximum number of characters to consider.
441	*
442	* @return 0 if the strings are equal, -1 if first is smaller,
443	* 1 if second smaller.
444	*
445	*/
446	int str_lcmp(const char s1, const char s2, size_t max_len)
447	{
448	wchar_t c1 = 0;
449	wchar_t c2 = 0;
450
451	size_t off1 = 0;
452	size_t off2 = 0;
453
454	size_t len = 0;
455
456	while (true) {
457	if (len >= max_len)
458	break;
459
460	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
461	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
462
463	if (c1 < c2)
464	return -1;
465
466	if (c1 > c2)
467	return 1;
468
469	if (c1 == 0 \|\| c2 == 0)
470	break;
471
472	++len;
473	}
474
475	return 0;
476
477	}
478
479	/** Copy string.
480	*
481	* Copy source string @a src to destination buffer @a dest.
482	* No more than @a size bytes are written. If the size of the output buffer
483	* is at least one byte, the output string will always be well-formed, i.e.
484	* null-terminated and containing only complete characters.
485	*
486	* @param dest Destination buffer.
487	* @param count Size of the destination buffer (must be > 0).
488	* @param src Source string.
489	*/
490	void str_cpy(char dest, size_t size, const char src)
491	{
492	/* There must be space for a null terminator in the buffer. */
493	assert(size > 0);
494
495	size_t src_off = 0;
496	size_t dest_off = 0;
497
498	wchar_t ch;
499	while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
500	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
501	break;
502	}
503
504	dest[dest_off] = '\0';
505	}
506
507	/** Copy size-limited substring.
508	*
509	* Copy prefix of string @a src of max. size @a size to destination buffer
510	* @a dest. No more than @a size bytes are written. The output string will
511	* always be well-formed, i.e. null-terminated and containing only complete
512	* characters.
513	*
514	* No more than @a n bytes are read from the input string, so it does not
515	* have to be null-terminated.
516	*
517	* @param dest Destination buffer.
518	* @param count Size of the destination buffer (must be > 0).
519	* @param src Source string.
520	* @param n Maximum number of bytes to read from @a src.
521	*/
522	void str_ncpy(char dest, size_t size, const char src, size_t n)
523	{
524	/* There must be space for a null terminator in the buffer. */
525	assert(size > 0);
526
527	size_t src_off = 0;
528	size_t dest_off = 0;
529
530	wchar_t ch;
531	while ((ch = str_decode(src, &src_off, n)) != 0) {
532	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
533	break;
534	}
535
536	dest[dest_off] = '\0';
537	}
538
539	/** Append one string to another.
540	*
541	* Append source string @a src to string in destination buffer @a dest.
542	* Size of the destination buffer is @a dest. If the size of the output buffer
543	* is at least one byte, the output string will always be well-formed, i.e.
544	* null-terminated and containing only complete characters.
545	*
546	* @param dest Destination buffer.
547	* @param count Size of the destination buffer.
548	* @param src Source string.
549	*/
550	void str_append(char dest, size_t size, const char src)
551	{
552	size_t dstr_size;
553
554	dstr_size = str_size(dest);
555	str_cpy(dest + dstr_size, size - dstr_size, src);
556	}
557
558	/** Convert wide string to string.
559	*
560	* Convert wide string @a src to string. The output is written to the buffer
561	* specified by @a dest and @a size. @a size must be non-zero and the string
562	* written will always be well-formed.
563	*
564	* @param dest Destination buffer.
565	* @param size Size of the destination buffer.
566	* @param src Source wide string.
567	*
568	* @return EOK, if success, negative otherwise.
569	*/
570	int wstr_to_str(char dest, size_t size, const wchar_t src)
571	{
572	int rc;
573	wchar_t ch;
574	size_t src_idx;
575	size_t dest_off;
576
577	/* There must be space for a null terminator in the buffer. */
578	assert(size > 0);
579
580	src_idx = 0;
581	dest_off = 0;
582
583	while ((ch = src[src_idx++]) != 0) {
584	rc = chr_encode(ch, dest, &dest_off, size - 1);
585	if (rc != EOK)
586	break;
587	}
588
589	dest[dest_off] = '\0';
590	return rc;
591	}
592
593	/** Convert UTF16 string to string.
594	*
595	* Convert utf16 string @a src to string. The output is written to the buffer
596	* specified by @a dest and @a size. @a size must be non-zero and the string
597	* written will always be well-formed. Surrogate pairs also supported.
598	*
599	* @param dest Destination buffer.
600	* @param size Size of the destination buffer.
601	* @param src Source utf16 string.
602	*
603	* @return EOK, if success, negative otherwise.
604	*/
605	int utf16_to_str(char dest, size_t size, const uint16_t src)
606	{
607	size_t idx=0, dest_off=0;
608	wchar_t ch;
609	int rc = EOK;
610
611	/* There must be space for a null terminator in the buffer. */
612	assert(size > 0);
613
614	while (src[idx]) {
615	if ((src[idx] & 0xfc00) == 0xd800) {
616	if (src[idx+1] && (src[idx+1] & 0xfc00) == 0xdc00) {
617	ch = 0x10000;
618	ch += (src[idx] & 0x03FF) << 10;
619	ch += (src[idx+1] & 0x03FF);
620	idx += 2;
621	}
622	else
623	break;
624	} else {
625	ch = src[idx];
626	idx++;
627	}
628	rc = chr_encode(ch, dest, &dest_off, size-1);
629	if (rc != EOK)
630	break;
631	}
632	dest[dest_off] = '\0';
633	return rc;
634	}
635
636	/** Convert wide string to new string.
637	*
638	* Convert wide string @a src to string. Space for the new string is allocated
639	* on the heap.
640	*
641	* @param src Source wide string.
642	* @return New string.
643	*/
644	char wstr_to_astr(const wchar_t src)
645	{
646	char dbuf[STR_BOUNDS(1)];
647	char *str;
648	wchar_t ch;
649
650	size_t src_idx;
651	size_t dest_off;
652	size_t dest_size;
653
654	/* Compute size of encoded string. */
655
656	src_idx = 0;
657	dest_size = 0;
658
659	while ((ch = src[src_idx++]) != 0) {
660	dest_off = 0;
661	if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
662	break;
663	dest_size += dest_off;
664	}
665
666	str = malloc(dest_size + 1);
667	if (str == NULL)
668	return NULL;
669
670	/* Encode string. */
671
672	src_idx = 0;
673	dest_off = 0;
674
675	while ((ch = src[src_idx++]) != 0) {
676	if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
677	break;
678	}
679
680	str[dest_size] = '\0';
681	return str;
682	}
683
684
685	/** Convert string to wide string.
686	*
687	* Convert string @a src to wide string. The output is written to the
688	* buffer specified by @a dest and @a dlen. @a dlen must be non-zero
689	* and the wide string written will always be null-terminated.
690	*
691	* @param dest Destination buffer.
692	* @param dlen Length of destination buffer (number of wchars).
693	* @param src Source string.
694	*
695	* @return EOK, if success, negative otherwise.
696	*/
697	int str_to_wstr(wchar_t dest, size_t dlen, const char src)
698	{
699	int rc=EOK;
700	size_t offset;
701	size_t di;
702	wchar_t c;
703
704	assert(dlen > 0);
705
706	offset = 0;
707	di = 0;
708
709	do {
710	if (di >= dlen - 1) {
711	rc = EOVERFLOW;
712	break;
713	}
714
715	c = str_decode(src, &offset, STR_NO_LIMIT);
716	dest[di++] = c;
717	} while (c != '\0');
718
719	dest[dlen - 1] = '\0';
720	return rc;
721	}
722
723	/** Find first occurence of character in string.
724	*
725	* @param str String to search.
726	* @param ch Character to look for.
727	*
728	* @return Pointer to character in @a str or NULL if not found.
729	*/
730	char str_chr(const char str, wchar_t ch)
731	{
732	wchar_t acc;
733	size_t off = 0;
734	size_t last = 0;
735
736	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
737	if (acc == ch)
738	return (char *) (str + last);
739	last = off;
740	}
741
742	return NULL;
743	}
744
745	/** Find last occurence of character in string.
746	*
747	* @param str String to search.
748	* @param ch Character to look for.
749	*
750	* @return Pointer to character in @a str or NULL if not found.
751	*/
752	char str_rchr(const char str, wchar_t ch)
753	{
754	wchar_t acc;
755	size_t off = 0;
756	size_t last = 0;
757	const char *res = NULL;
758
759	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
760	if (acc == ch)
761	res = (str + last);
762	last = off;
763	}
764
765	return (char *) res;
766	}
767
768	/** Find first occurence of character in wide string.
769	*
770	* @param wstr String to search.
771	* @param ch Character to look for.
772	*
773	* @return Pointer to character in @a wstr or NULL if not found.
774	*/
775	wchar_t wstr_chr(const wchar_t wstr, wchar_t ch)
776	{
777	while (wstr && wstr != ch)
778	wstr++;
779	if (*wstr)
780	return (wchar_t *) wstr;
781	else
782	return NULL;
783	}
784
785	/** Find last occurence of character in wide string.
786	*
787	* @param wstr String to search.
788	* @param ch Character to look for.
789	*
790	* @return Pointer to character in @a wstr or NULL if not found.
791	*/
792	wchar_t wstr_rchr(const wchar_t wstr, wchar_t ch)
793	{
794	const wchar_t *res = NULL;
795	while (*wstr) {
796	if (*wstr == ch)
797	res = wstr;
798	wstr++;
799	}
800	return (wchar_t *) res;
801	}
802
803	/** Insert a wide character into a wide string.
804	*
805	* Insert a wide character into a wide string at position
806	* @a pos. The characters after the position are shifted.
807	*
808	* @param str String to insert to.
809	* @param ch Character to insert to.
810	* @param pos Character index where to insert.
811	@ @param max_pos Characters in the buffer.
812	*
813	* @return True if the insertion was sucessful, false if the position
814	* is out of bounds.
815	*
816	*/
817	bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
818	{
819	size_t len = wstr_length(str);
820
821	if ((pos > len) \|\| (pos + 1 > max_pos))
822	return false;
823
824	size_t i;
825	for (i = len; i + 1 > pos; i--)
826	str[i + 1] = str[i];
827
828	str[pos] = ch;
829
830	return true;
831	}
832
833	/** Remove a wide character from a wide string.
834	*
835	* Remove a wide character from a wide string at position
836	* @a pos. The characters after the position are shifted.
837	*
838	* @param str String to remove from.
839	* @param pos Character index to remove.
840	*
841	* @return True if the removal was sucessful, false if the position
842	* is out of bounds.
843	*
844	*/
845	bool wstr_remove(wchar_t *str, size_t pos)
846	{
847	size_t len = wstr_length(str);
848
849	if (pos >= len)
850	return false;
851
852	size_t i;
853	for (i = pos + 1; i <= len; i++)
854	str[i - 1] = str[i];
855
856	return true;
857	}
858
859	int stricmp(const char a, const char b)
860	{
861	int c = 0;
862
863	while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
864	c++;
865
866	return (tolower(a[c]) - tolower(b[c]));
867	}
868
869	/** Convert string to a number.
870	* Core of strtol and strtoul functions.
871	*
872	* @param nptr Pointer to string.
873	* @param endptr If not NULL, function stores here pointer to the first
874	* invalid character.
875	* @param base Zero or number between 2 and 36 inclusive.
876	* @param sgn It's set to 1 if minus found.
877	* @return Result of conversion.
878	*/
879	static unsigned long
880	_strtoul(const char nptr, char endptr, int base, char sgn)
881	{
882	unsigned char c;
883	unsigned long result = 0;
884	unsigned long a, b;
885	const char *str = nptr;
886	const char *tmpptr;
887
888	while (isspace(*str))
889	str++;
890
891	if (*str == '-') {
892	*sgn = 1;
893	++str;
894	} else if (*str == '+')
895	++str;
896
897	if (base) {
898	if ((base == 1) \|\| (base > 36)) {
899	/* FIXME: set errno to EINVAL */
900	return 0;
901	}
902	if ((base == 16) && (*str == '0') && ((str[1] == 'x') \|\|
903	(str[1] == 'X'))) {
904	str += 2;
905	}
906	} else {
907	base = 10;
908
909	if (*str == '0') {
910	base = 8;
911	if ((str[1] == 'X') \|\| (str[1] == 'x')) {
912	base = 16;
913	str += 2;
914	}
915	}
916	}
917
918	tmpptr = str;
919
920	while (*str) {
921	c = *str;
922	c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
923	(c <= '9' ? c - '0' : 0xff)));
924	if (c > base) {
925	break;
926	}
927
928	a = (result & 0xff) * base + c;
929	b = (result >> 8) * base + (a >> 8);
930
931	if (b > (ULONG_MAX >> 8)) {
932	/* overflow */
933	/* FIXME: errno = ERANGE*/
934	return ULONG_MAX;
935	}
936
937	result = (b << 8) + (a & 0xff);
938	++str;
939	}
940
941	if (str == tmpptr) {
942	/*
943	* No number was found => first invalid character is the first
944	* character of the string.
945	*/
946	/* FIXME: set errno to EINVAL */
947	str = nptr;
948	result = 0;
949	}
950
951	if (endptr)
952	endptr = (char ) str;
953
954	if (nptr == str) {
955	/FIXME: errno = EINVAL/
956	return 0;
957	}
958
959	return result;
960	}
961
962	/** Convert initial part of string to long int according to given base.
963	* The number may begin with an arbitrary number of whitespaces followed by
964	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
965	* inserted and the number will be taken as hexadecimal one. If the base is 0
966	* and the number begin with a zero, number will be taken as octal one (as with
967	* base 8). Otherwise the base 0 is taken as decimal.
968	*
969	* @param nptr Pointer to string.
970	* @param endptr If not NULL, function stores here pointer to the first
971	* invalid character.
972	* @param base Zero or number between 2 and 36 inclusive.
973	* @return Result of conversion.
974	*/
975	long int strtol(const char nptr, char *endptr, int base)
976	{
977	char sgn = 0;
978	unsigned long number = 0;
979
980	number = _strtoul(nptr, endptr, base, &sgn);
981
982	if (number > LONG_MAX) {
983	if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
984	/* FIXME: set 0 to errno */
985	return number;
986	}
987	/* FIXME: set ERANGE to errno */
988	return (sgn ? LONG_MIN : LONG_MAX);
989	}
990
991	return (sgn ? -number : number);
992	}
993
994	/** Duplicate string.
995	*
996	* Allocate a new string and copy characters from the source
997	* string into it. The duplicate string is allocated via sleeping
998	* malloc(), thus this function can sleep in no memory conditions.
999	*
1000	* The allocation cannot fail and the return value is always
1001	* a valid pointer. The duplicate string is always a well-formed
1002	* null-terminated UTF-8 string, but it can differ from the source
1003	* string on the byte level.
1004	*
1005	* @param src Source string.
1006	*
1007	* @return Duplicate string.
1008	*
1009	*/
1010	char str_dup(const char src)
1011	{
1012	size_t size = str_size(src) + 1;
1013	char dest = (char ) malloc(size);
1014	if (dest == NULL)
1015	return (char *) NULL;
1016
1017	str_cpy(dest, size, src);
1018	return dest;
1019	}
1020
1021	/** Duplicate string with size limit.
1022	*
1023	* Allocate a new string and copy up to @max_size bytes from the source
1024	* string into it. The duplicate string is allocated via sleeping
1025	* malloc(), thus this function can sleep in no memory conditions.
1026	* No more than @max_size + 1 bytes is allocated, but if the size
1027	* occupied by the source string is smaller than @max_size + 1,
1028	* less is allocated.
1029	*
1030	* The allocation cannot fail and the return value is always
1031	* a valid pointer. The duplicate string is always a well-formed
1032	* null-terminated UTF-8 string, but it can differ from the source
1033	* string on the byte level.
1034	*
1035	* @param src Source string.
1036	* @param n Maximum number of bytes to duplicate.
1037	*
1038	* @return Duplicate string.
1039	*
1040	*/
1041	char str_ndup(const char src, size_t n)
1042	{
1043	size_t size = str_size(src);
1044	if (size > n)
1045	size = n;
1046
1047	char dest = (char ) malloc(size + 1);
1048	if (dest == NULL)
1049	return (char *) NULL;
1050
1051	str_ncpy(dest, size + 1, src, size);
1052	return dest;
1053	}
1054
1055	void str_reverse(char* begin, char* end)
1056	{
1057	char aux;
1058	while(end>begin)
1059	aux=end, end--=begin, begin++=aux;
1060	}
1061
1062	int size_t_str(size_t value, int base, char* str, size_t size)
1063	{
1064	static char num[] = "0123456789abcdefghijklmnopqrstuvwxyz";
1065	char* wstr=str;
1066
1067	if (size == 0)
1068	return EINVAL;
1069	if (base<2 \|\| base>35) {
1070	*str='\0';
1071	return EINVAL;
1072	}
1073
1074	do {
1075	*wstr++ = num[value % base];
1076	if (--size == 0)
1077	return EOVERFLOW;
1078	} while(value /= base);
1079	*wstr='\0';
1080
1081	// Reverse string
1082	str_reverse(str,wstr-1);
1083	return EOK;
1084	}
1085
1086	/** Convert initial part of string to unsigned long according to given base.
1087	* The number may begin with an arbitrary number of whitespaces followed by
1088	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
1089	* inserted and the number will be taken as hexadecimal one. If the base is 0
1090	* and the number begin with a zero, number will be taken as octal one (as with
1091	* base 8). Otherwise the base 0 is taken as decimal.
1092	*
1093	* @param nptr Pointer to string.
1094	* @param endptr If not NULL, function stores here pointer to the first
1095	* invalid character
1096	* @param base Zero or number between 2 and 36 inclusive.
1097	* @return Result of conversion.
1098	*/
1099	unsigned long strtoul(const char nptr, char *endptr, int base)
1100	{
1101	char sgn = 0;
1102	unsigned long number = 0;
1103
1104	number = _strtoul(nptr, endptr, base, &sgn);
1105
1106	return (sgn ? -number : number);
1107	}
1108
1109	char strtok(char s, const char *delim)
1110	{
1111	static char *next;
1112
1113	return strtok_r(s, delim, &next);
1114	}
1115
1116	char strtok_r(char s, const char delim, char *next)
1117	{
1118	char start, end;
1119
1120	if (s == NULL)
1121	s = *next;
1122
1123	/* Skip over leading delimiters. */
1124	while (s && (str_chr(delim, s) != NULL)) ++s;
1125	start = s;
1126
1127	/* Skip over token characters. */
1128	while (s && (str_chr(delim, s) == NULL)) ++s;
1129	end = s;
1130	next = (s ? s + 1 : s);
1131
1132	if (start == end) {
1133	return NULL; /* No more tokens. */
1134	}
1135
1136	/* Overwrite delimiter with NULL terminator. */
1137	*end = '\0';
1138	return start;
1139	}
1140
1141	/** Convert string to uint64_t (internal variant).
1142	*
1143	* @param nptr Pointer to string.
1144	* @param endptr Pointer to the first invalid character is stored here.
1145	* @param base Zero or number between 2 and 36 inclusive.
1146	* @param neg Indication of unary minus is stored here.
1147	* @apram result Result of the conversion.
1148	*
1149	* @return EOK if conversion was successful.
1150	*
1151	*/
1152	static int str_uint(const char nptr, char *endptr, unsigned int base,
1153	bool neg, uint64_t result)
1154	{
1155	assert(endptr != NULL);
1156	assert(neg != NULL);
1157	assert(result != NULL);
1158
1159	*neg = false;
1160	const char *str = nptr;
1161
1162	/* Ignore leading whitespace */
1163	while (isspace(*str))
1164	str++;
1165
1166	if (*str == '-') {
1167	*neg = true;
1168	str++;
1169	} else if (*str == '+')
1170	str++;
1171
1172	if (base == 0) {
1173	/* Decode base if not specified */
1174	base = 10;
1175
1176	if (*str == '0') {
1177	base = 8;
1178	str++;
1179
1180	switch (*str) {
1181	case 'b':
1182	case 'B':
1183	base = 2;
1184	str++;
1185	break;
1186	case 'o':
1187	case 'O':
1188	base = 8;
1189	str++;
1190	break;
1191	case 'd':
1192	case 'D':
1193	case 't':
1194	case 'T':
1195	base = 10;
1196	str++;
1197	break;
1198	case 'x':
1199	case 'X':
1200	base = 16;
1201	str++;
1202	break;
1203	default:
1204	str--;
1205	}
1206	}
1207	} else {
1208	/* Check base range */
1209	if ((base < 2) \|\| (base > 36)) {
1210	endptr = (char ) str;
1211	return EINVAL;
1212	}
1213	}
1214
1215	*result = 0;
1216	const char *startstr = str;
1217
1218	while (*str != 0) {
1219	unsigned int digit;
1220
1221	if ((str >= 'a') && (str <= 'z'))
1222	digit = *str - 'a' + 10;
1223	else if ((str >= 'A') && (str <= 'Z'))
1224	digit = *str - 'A' + 10;
1225	else if ((str >= '0') && (str <= '9'))
1226	digit = *str - '0';
1227	else
1228	break;
1229
1230	if (digit >= base)
1231	break;
1232
1233	uint64_t prev = *result;
1234	result = (result) * base + digit;
1235
1236	if (*result < prev) {
1237	/* Overflow */
1238	endptr = (char ) str;
1239	return EOVERFLOW;
1240	}
1241
1242	str++;
1243	}
1244
1245	if (str == startstr) {
1246	/*
1247	* No digits were decoded => first invalid character is
1248	* the first character of the string.
1249	*/
1250	str = nptr;
1251	}
1252
1253	endptr = (char ) str;
1254
1255	if (str == nptr)
1256	return EINVAL;
1257
1258	return EOK;
1259	}
1260
1261	/** Convert string to uint64_t.
1262	*
1263	* @param nptr Pointer to string.
1264	* @param endptr If not NULL, pointer to the first invalid character
1265	* is stored here.
1266	* @param base Zero or number between 2 and 36 inclusive.
1267	* @param strict Do not allow any trailing characters.
1268	* @param result Result of the conversion.
1269	*
1270	* @return EOK if conversion was successful.
1271	*
1272	*/
1273	int str_uint64(const char nptr, char *endptr, unsigned int base,
1274	bool strict, uint64_t *result)
1275	{
1276	assert(result != NULL);
1277
1278	bool neg;
1279	char *lendptr;
1280	int ret = str_uint(nptr, &lendptr, base, &neg, result);
1281
1282	if (endptr != NULL)
1283	endptr = (char ) lendptr;
1284
1285	if (ret != EOK)
1286	return ret;
1287
1288	/* Do not allow negative values */
1289	if (neg)
1290	return EINVAL;
1291
1292	/* Check whether we are at the end of
1293	the string in strict mode */
1294	if ((strict) && (*lendptr != 0))
1295	return EINVAL;
1296
1297	return EOK;
1298	}
1299
1300	/** Convert string to size_t.
1301	*
1302	* @param nptr Pointer to string.
1303	* @param endptr If not NULL, pointer to the first invalid character
1304	* is stored here.
1305	* @param base Zero or number between 2 and 36 inclusive.
1306	* @param strict Do not allow any trailing characters.
1307	* @param result Result of the conversion.
1308	*
1309	* @return EOK if conversion was successful.
1310	*
1311	*/
1312	int str_size_t(const char nptr, char *endptr, unsigned int base,
1313	bool strict, size_t *result)
1314	{
1315	assert(result != NULL);
1316
1317	bool neg;
1318	char *lendptr;
1319	uint64_t res;
1320	int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1321
1322	if (endptr != NULL)
1323	endptr = (char ) lendptr;
1324
1325	if (ret != EOK)
1326	return ret;
1327
1328	/* Do not allow negative values */
1329	if (neg)
1330	return EINVAL;
1331
1332	/* Check whether we are at the end of
1333	the string in strict mode */
1334	if ((strict) && (*lendptr != 0))
1335	return EINVAL;
1336
1337	/* Check for overflow */
1338	size_t _res = (size_t) res;
1339	if (_res != res)
1340	return EOVERFLOW;
1341
1342	*result = _res;
1343
1344	return EOK;
1345	}
1346
1347	void order_suffix(const uint64_t val, uint64_t rv, char suffix)
1348	{
1349	if (val > UINT64_C(10000000000000000000)) {
1350	*rv = val / UINT64_C(1000000000000000000);
1351	*suffix = 'Z';
1352	} else if (val > UINT64_C(1000000000000000000)) {
1353	*rv = val / UINT64_C(1000000000000000);
1354	*suffix = 'E';
1355	} else if (val > UINT64_C(1000000000000000)) {
1356	*rv = val / UINT64_C(1000000000000);
1357	*suffix = 'T';
1358	} else if (val > UINT64_C(1000000000000)) {
1359	*rv = val / UINT64_C(1000000000);
1360	*suffix = 'G';
1361	} else if (val > UINT64_C(1000000000)) {
1362	*rv = val / UINT64_C(1000000);
1363	*suffix = 'M';
1364	} else if (val > UINT64_C(1000000)) {
1365	*rv = val / UINT64_C(1000);
1366	*suffix = 'k';
1367	} else {
1368	*rv = val;
1369	*suffix = ' ';
1370	}
1371	}
1372
1373	void bin_order_suffix(const uint64_t val, uint64_t rv, const char *suffix,
1374	bool fixed)
1375	{
1376	if (val > UINT64_C(1152921504606846976)) {
1377	*rv = val / UINT64_C(1125899906842624);
1378	*suffix = "EiB";
1379	} else if (val > UINT64_C(1125899906842624)) {
1380	*rv = val / UINT64_C(1099511627776);
1381	*suffix = "TiB";
1382	} else if (val > UINT64_C(1099511627776)) {
1383	*rv = val / UINT64_C(1073741824);
1384	*suffix = "GiB";
1385	} else if (val > UINT64_C(1073741824)) {
1386	*rv = val / UINT64_C(1048576);
1387	*suffix = "MiB";
1388	} else if (val > UINT64_C(1048576)) {
1389	*rv = val / UINT64_C(1024);
1390	*suffix = "KiB";
1391	} else {
1392	*rv = val;
1393	if (fixed)
1394	*suffix = "B ";
1395	else
1396	*suffix = "B";
1397	}
1398	}
1399
1400	/** @}
1401	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: