Context Navigation

source: mainline/uspace/lib/c/generic/str.c@ fc97128

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since fc97128 was fc97128, checked in by Oleg Romanenko <romanenko.oleg@…>, 14 years ago
Update to str.h: new function str_to_utf16
Property mode set to `100644`
File size: 32.0 KB

Line
1	/*
2	* Copyright (c) 2005 Martin Decky
3	* Copyright (c) 2008 Jiri Svoboda
4	* Copyright (c) 2011 Oleg Romanenko
5	* All rights reserved.
6	*
7	* Redistribution and use in source and binary forms, with or without
8	* modification, are permitted provided that the following conditions
9	* are met:
10	*
11	* - Redistributions of source code must retain the above copyright
12	* notice, this list of conditions and the following disclaimer.
13	* - Redistributions in binary form must reproduce the above copyright
14	* notice, this list of conditions and the following disclaimer in the
15	* documentation and/or other materials provided with the distribution.
16	* - The name of the author may not be used to endorse or promote products
17	* derived from this software without specific prior written permission.
18	*
19	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29	*/
30
31	/** @addtogroup libc
32	* @{
33	*/
34	/** @file
35	*/
36
37	#include <str.h>
38	#include <stdlib.h>
39	#include <assert.h>
40	#include <stdint.h>
41	#include <ctype.h>
42	#include <malloc.h>
43	#include <errno.h>
44	#include <align.h>
45	#include <mem.h>
46	#include <str.h>
47
48	/** Byte mask consisting of lowest @n bits (out of 8) */
49	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
50
51	/** Byte mask consisting of lowest @n bits (out of 32) */
52	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
53
54	/** Byte mask consisting of highest @n bits (out of 8) */
55	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
56
57	/** Number of data bits in a UTF-8 continuation byte */
58	#define CONT_BITS 6
59
60	/** Decode a single character from a string.
61	*
62	* Decode a single character from a string of size @a size. Decoding starts
63	* at @a offset and this offset is moved to the beginning of the next
64	* character. In case of decoding error, offset generally advances at least
65	* by one. However, offset is never moved beyond size.
66	*
67	* @param str String (not necessarily NULL-terminated).
68	* @param offset Byte offset in string where to start decoding.
69	* @param size Size of the string (in bytes).
70	*
71	* @return Value of decoded character, U_SPECIAL on decoding error or
72	* NULL if attempt to decode beyond @a size.
73	*
74	*/
75	wchar_t str_decode(const char str, size_t offset, size_t size)
76	{
77	if (*offset + 1 > size)
78	return 0;
79
80	/* First byte read from string */
81	uint8_t b0 = (uint8_t) str[(*offset)++];
82
83	/* Determine code length */
84
85	unsigned int b0_bits; /* Data bits in first byte */
86	unsigned int cbytes; /* Number of continuation bytes */
87
88	if ((b0 & 0x80) == 0) {
89	/* 0xxxxxxx (Plain ASCII) */
90	b0_bits = 7;
91	cbytes = 0;
92	} else if ((b0 & 0xe0) == 0xc0) {
93	/* 110xxxxx 10xxxxxx */
94	b0_bits = 5;
95	cbytes = 1;
96	} else if ((b0 & 0xf0) == 0xe0) {
97	/* 1110xxxx 10xxxxxx 10xxxxxx */
98	b0_bits = 4;
99	cbytes = 2;
100	} else if ((b0 & 0xf8) == 0xf0) {
101	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
102	b0_bits = 3;
103	cbytes = 3;
104	} else {
105	/* 10xxxxxx -- unexpected continuation byte */
106	return U_SPECIAL;
107	}
108
109	if (*offset + cbytes > size)
110	return U_SPECIAL;
111
112	wchar_t ch = b0 & LO_MASK_8(b0_bits);
113
114	/* Decode continuation bytes */
115	while (cbytes > 0) {
116	uint8_t b = (uint8_t) str[(*offset)++];
117
118	/* Must be 10xxxxxx */
119	if ((b & 0xc0) != 0x80)
120	return U_SPECIAL;
121
122	/* Shift data bits to ch */
123	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
124	cbytes--;
125	}
126
127	return ch;
128	}
129
130	/** Encode a single character to string representation.
131	*
132	* Encode a single character to string representation (i.e. UTF-8) and store
133	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
134	* is moved to the position where the next character can be written to.
135	*
136	* @param ch Input character.
137	* @param str Output buffer.
138	* @param offset Byte offset where to start writing.
139	* @param size Size of the output buffer (in bytes).
140	*
141	* @return EOK if the character was encoded successfully, EOVERFLOW if there
142	* was not enough space in the output buffer or EINVAL if the character
143	* code was invalid.
144	*/
145	int chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
146	{
147	if (*offset >= size)
148	return EOVERFLOW;
149
150	if (!chr_check(ch))
151	return EINVAL;
152
153	/* Unsigned version of ch (bit operations should only be done
154	on unsigned types). */
155	uint32_t cc = (uint32_t) ch;
156
157	/* Determine how many continuation bytes are needed */
158
159	unsigned int b0_bits; /* Data bits in first byte */
160	unsigned int cbytes; /* Number of continuation bytes */
161
162	if ((cc & ~LO_MASK_32(7)) == 0) {
163	b0_bits = 7;
164	cbytes = 0;
165	} else if ((cc & ~LO_MASK_32(11)) == 0) {
166	b0_bits = 5;
167	cbytes = 1;
168	} else if ((cc & ~LO_MASK_32(16)) == 0) {
169	b0_bits = 4;
170	cbytes = 2;
171	} else if ((cc & ~LO_MASK_32(21)) == 0) {
172	b0_bits = 3;
173	cbytes = 3;
174	} else {
175	/* Codes longer than 21 bits are not supported */
176	return EINVAL;
177	}
178
179	/* Check for available space in buffer */
180	if (*offset + cbytes >= size)
181	return EOVERFLOW;
182
183	/* Encode continuation bytes */
184	unsigned int i;
185	for (i = cbytes; i > 0; i--) {
186	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
187	cc = cc >> CONT_BITS;
188	}
189
190	/* Encode first byte */
191	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
192
193	/* Advance offset */
194	*offset += cbytes + 1;
195
196	return EOK;
197	}
198
199	/** Get size of string.
200	*
201	* Get the number of bytes which are used by the string @a str (excluding the
202	* NULL-terminator).
203	*
204	* @param str String to consider.
205	*
206	* @return Number of bytes used by the string
207	*
208	*/
209	size_t str_size(const char *str)
210	{
211	size_t size = 0;
212
213	while (*str++ != 0)
214	size++;
215
216	return size;
217	}
218
219	/** Get size of wide string.
220	*
221	* Get the number of bytes which are used by the wide string @a str (excluding the
222	* NULL-terminator).
223	*
224	* @param str Wide string to consider.
225	*
226	* @return Number of bytes used by the wide string
227	*
228	*/
229	size_t wstr_size(const wchar_t *str)
230	{
231	return (wstr_length(str) * sizeof(wchar_t));
232	}
233
234	/** Get size of string with length limit.
235	*
236	* Get the number of bytes which are used by up to @a max_len first
237	* characters in the string @a str. If @a max_len is greater than
238	* the length of @a str, the entire string is measured (excluding the
239	* NULL-terminator).
240	*
241	* @param str String to consider.
242	* @param max_len Maximum number of characters to measure.
243	*
244	* @return Number of bytes used by the characters.
245	*
246	*/
247	size_t str_lsize(const char *str, size_t max_len)
248	{
249	size_t len = 0;
250	size_t offset = 0;
251
252	while (len < max_len) {
253	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
254	break;
255
256	len++;
257	}
258
259	return offset;
260	}
261
262	/** Get size of wide string with length limit.
263	*
264	* Get the number of bytes which are used by up to @a max_len first
265	* wide characters in the wide string @a str. If @a max_len is greater than
266	* the length of @a str, the entire wide string is measured (excluding the
267	* NULL-terminator).
268	*
269	* @param str Wide string to consider.
270	* @param max_len Maximum number of wide characters to measure.
271	*
272	* @return Number of bytes used by the wide characters.
273	*
274	*/
275	size_t wstr_lsize(const wchar_t *str, size_t max_len)
276	{
277	return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
278	}
279
280	/** Get number of characters in a string.
281	*
282	* @param str NULL-terminated string.
283	*
284	* @return Number of characters in string.
285	*
286	*/
287	size_t str_length(const char *str)
288	{
289	size_t len = 0;
290	size_t offset = 0;
291
292	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
293	len++;
294
295	return len;
296	}
297
298	/** Get number of characters in a wide string.
299	*
300	* @param str NULL-terminated wide string.
301	*
302	* @return Number of characters in @a str.
303	*
304	*/
305	size_t wstr_length(const wchar_t *wstr)
306	{
307	size_t len = 0;
308
309	while (*wstr++ != 0)
310	len++;
311
312	return len;
313	}
314
315	/** Get number of characters in a string with size limit.
316	*
317	* @param str NULL-terminated string.
318	* @param size Maximum number of bytes to consider.
319	*
320	* @return Number of characters in string.
321	*
322	*/
323	size_t str_nlength(const char *str, size_t size)
324	{
325	size_t len = 0;
326	size_t offset = 0;
327
328	while (str_decode(str, &offset, size) != 0)
329	len++;
330
331	return len;
332	}
333
334	/** Get number of characters in a string with size limit.
335	*
336	* @param str NULL-terminated string.
337	* @param size Maximum number of bytes to consider.
338	*
339	* @return Number of characters in string.
340	*
341	*/
342	size_t wstr_nlength(const wchar_t *str, size_t size)
343	{
344	size_t len = 0;
345	size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
346	size_t offset = 0;
347
348	while ((offset < limit) && (*str++ != 0)) {
349	len++;
350	offset += sizeof(wchar_t);
351	}
352
353	return len;
354	}
355
356	/** Check whether character is plain ASCII.
357	*
358	* @return True if character is plain ASCII.
359	*
360	*/
361	bool ascii_check(wchar_t ch)
362	{
363	if ((ch >= 0) && (ch <= 127))
364	return true;
365
366	return false;
367	}
368
369	/** Check whether wide string is plain ASCII.
370	*
371	* @return True if wide string is plain ASCII.
372	*
373	*/
374	bool wstr_is_ascii(const wchar_t *wstr)
375	{
376	while (wstr && ascii_check(wstr))
377	wstr++;
378	return *wstr == 0;
379	}
380
381	/** Check whether character is valid
382	*
383	* @return True if character is a valid Unicode code point.
384	*
385	*/
386	bool chr_check(wchar_t ch)
387	{
388	if ((ch >= 0) && (ch <= 1114111))
389	return true;
390
391	return false;
392	}
393
394	/** Compare two NULL terminated strings.
395	*
396	* Do a char-by-char comparison of two NULL-terminated strings.
397	* The strings are considered equal iff they consist of the same
398	* characters on the minimum of their lengths.
399	*
400	* @param s1 First string to compare.
401	* @param s2 Second string to compare.
402	*
403	* @return 0 if the strings are equal, -1 if first is smaller,
404	* 1 if second smaller.
405	*
406	*/
407	int str_cmp(const char s1, const char s2)
408	{
409	wchar_t c1 = 0;
410	wchar_t c2 = 0;
411
412	size_t off1 = 0;
413	size_t off2 = 0;
414
415	while (true) {
416	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
417	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
418
419	if (c1 < c2)
420	return -1;
421
422	if (c1 > c2)
423	return 1;
424
425	if (c1 == 0 \|\| c2 == 0)
426	break;
427	}
428
429	return 0;
430	}
431
432	/** Compare two NULL terminated strings with length limit.
433	*
434	* Do a char-by-char comparison of two NULL-terminated strings.
435	* The strings are considered equal iff they consist of the same
436	* characters on the minimum of their lengths and the length limit.
437	*
438	* @param s1 First string to compare.
439	* @param s2 Second string to compare.
440	* @param max_len Maximum number of characters to consider.
441	*
442	* @return 0 if the strings are equal, -1 if first is smaller,
443	* 1 if second smaller.
444	*
445	*/
446	int str_lcmp(const char s1, const char s2, size_t max_len)
447	{
448	wchar_t c1 = 0;
449	wchar_t c2 = 0;
450
451	size_t off1 = 0;
452	size_t off2 = 0;
453
454	size_t len = 0;
455
456	while (true) {
457	if (len >= max_len)
458	break;
459
460	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
461	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
462
463	if (c1 < c2)
464	return -1;
465
466	if (c1 > c2)
467	return 1;
468
469	if (c1 == 0 \|\| c2 == 0)
470	break;
471
472	++len;
473	}
474
475	return 0;
476
477	}
478
479	/** Copy string.
480	*
481	* Copy source string @a src to destination buffer @a dest.
482	* No more than @a size bytes are written. If the size of the output buffer
483	* is at least one byte, the output string will always be well-formed, i.e.
484	* null-terminated and containing only complete characters.
485	*
486	* @param dest Destination buffer.
487	* @param count Size of the destination buffer (must be > 0).
488	* @param src Source string.
489	*/
490	void str_cpy(char dest, size_t size, const char src)
491	{
492	/* There must be space for a null terminator in the buffer. */
493	assert(size > 0);
494
495	size_t src_off = 0;
496	size_t dest_off = 0;
497
498	wchar_t ch;
499	while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
500	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
501	break;
502	}
503
504	dest[dest_off] = '\0';
505	}
506
507	/** Copy size-limited substring.
508	*
509	* Copy prefix of string @a src of max. size @a size to destination buffer
510	* @a dest. No more than @a size bytes are written. The output string will
511	* always be well-formed, i.e. null-terminated and containing only complete
512	* characters.
513	*
514	* No more than @a n bytes are read from the input string, so it does not
515	* have to be null-terminated.
516	*
517	* @param dest Destination buffer.
518	* @param count Size of the destination buffer (must be > 0).
519	* @param src Source string.
520	* @param n Maximum number of bytes to read from @a src.
521	*/
522	void str_ncpy(char dest, size_t size, const char src, size_t n)
523	{
524	/* There must be space for a null terminator in the buffer. */
525	assert(size > 0);
526
527	size_t src_off = 0;
528	size_t dest_off = 0;
529
530	wchar_t ch;
531	while ((ch = str_decode(src, &src_off, n)) != 0) {
532	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
533	break;
534	}
535
536	dest[dest_off] = '\0';
537	}
538
539	/** Append one string to another.
540	*
541	* Append source string @a src to string in destination buffer @a dest.
542	* Size of the destination buffer is @a dest. If the size of the output buffer
543	* is at least one byte, the output string will always be well-formed, i.e.
544	* null-terminated and containing only complete characters.
545	*
546	* @param dest Destination buffer.
547	* @param count Size of the destination buffer.
548	* @param src Source string.
549	*/
550	void str_append(char dest, size_t size, const char src)
551	{
552	size_t dstr_size;
553
554	dstr_size = str_size(dest);
555	str_cpy(dest + dstr_size, size - dstr_size, src);
556	}
557
558	/** Convert wide string to string.
559	*
560	* Convert wide string @a src to string. The output is written to the buffer
561	* specified by @a dest and @a size. @a size must be non-zero and the string
562	* written will always be well-formed.
563	*
564	* @param dest Destination buffer.
565	* @param size Size of the destination buffer.
566	* @param src Source wide string.
567	*
568	* @return EOK, if success, negative otherwise.
569	*/
570	int wstr_to_str(char dest, size_t size, const wchar_t src)
571	{
572	int rc;
573	wchar_t ch;
574	size_t src_idx;
575	size_t dest_off;
576
577	/* There must be space for a null terminator in the buffer. */
578	assert(size > 0);
579
580	src_idx = 0;
581	dest_off = 0;
582
583	while ((ch = src[src_idx++]) != 0) {
584	rc = chr_encode(ch, dest, &dest_off, size - 1);
585	if (rc != EOK)
586	break;
587	}
588
589	dest[dest_off] = '\0';
590	return rc;
591	}
592
593	/** Convert UTF16 string to string.
594	*
595	* Convert utf16 string @a src to string. The output is written to the buffer
596	* specified by @a dest and @a size. @a size must be non-zero and the string
597	* written will always be well-formed. Surrogate pairs also supported.
598	*
599	* @param dest Destination buffer.
600	* @param size Size of the destination buffer.
601	* @param src Source utf16 string.
602	*
603	* @return EOK, if success, negative otherwise.
604	*/
605	int utf16_to_str(char dest, size_t size, const uint16_t src)
606	{
607	size_t idx=0, dest_off=0;
608	wchar_t ch;
609	int rc = EOK;
610
611	/* There must be space for a null terminator in the buffer. */
612	assert(size > 0);
613
614	while (src[idx]) {
615	if ((src[idx] & 0xfc00) == 0xd800) {
616	if (src[idx+1] && (src[idx+1] & 0xfc00) == 0xdc00) {
617	ch = 0x10000;
618	ch += (src[idx] & 0x03FF) << 10;
619	ch += (src[idx+1] & 0x03FF);
620	idx += 2;
621	}
622	else
623	break;
624	} else {
625	ch = src[idx];
626	idx++;
627	}
628	rc = chr_encode(ch, dest, &dest_off, size-1);
629	if (rc != EOK)
630	break;
631	}
632	dest[dest_off] = '\0';
633	return rc;
634	}
635
636	int str_to_utf16(uint16_t dest, size_t size, const char src)
637	{
638	int rc=EOK;
639	size_t offset=0;
640	size_t idx=0;
641	wchar_t c;
642
643	assert(size > 0);
644
645	while ((c = str_decode(src, &offset, STR_NO_LIMIT)) != 0) {
646	if (c > 0x10000) {
647	if (idx+2 >= size-1) {
648	rc=EOVERFLOW;
649	break;
650	}
651	c = (c - 0x10000);
652	dest[idx] = 0xD800 \| (c >> 10);
653	dest[idx+1] = 0xDC00 \| (c & 0x3FF);
654	idx++;
655	} else {
656	dest[idx] = c;
657	}
658
659	idx++;
660	if (idx >= size-1) {
661	rc=EOVERFLOW;
662	break;
663	}
664	}
665
666	dest[idx] = '\0';
667	return rc;
668	}
669
670
671	/** Convert wide string to new string.
672	*
673	* Convert wide string @a src to string. Space for the new string is allocated
674	* on the heap.
675	*
676	* @param src Source wide string.
677	* @return New string.
678	*/
679	char wstr_to_astr(const wchar_t src)
680	{
681	char dbuf[STR_BOUNDS(1)];
682	char *str;
683	wchar_t ch;
684
685	size_t src_idx;
686	size_t dest_off;
687	size_t dest_size;
688
689	/* Compute size of encoded string. */
690
691	src_idx = 0;
692	dest_size = 0;
693
694	while ((ch = src[src_idx++]) != 0) {
695	dest_off = 0;
696	if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
697	break;
698	dest_size += dest_off;
699	}
700
701	str = malloc(dest_size + 1);
702	if (str == NULL)
703	return NULL;
704
705	/* Encode string. */
706
707	src_idx = 0;
708	dest_off = 0;
709
710	while ((ch = src[src_idx++]) != 0) {
711	if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
712	break;
713	}
714
715	str[dest_size] = '\0';
716	return str;
717	}
718
719
720	/** Convert string to wide string.
721	*
722	* Convert string @a src to wide string. The output is written to the
723	* buffer specified by @a dest and @a dlen. @a dlen must be non-zero
724	* and the wide string written will always be null-terminated.
725	*
726	* @param dest Destination buffer.
727	* @param dlen Length of destination buffer (number of wchars).
728	* @param src Source string.
729	*
730	* @return EOK, if success, negative otherwise.
731	*/
732	int str_to_wstr(wchar_t dest, size_t dlen, const char src)
733	{
734	int rc=EOK;
735	size_t offset;
736	size_t di;
737	wchar_t c;
738
739	assert(dlen > 0);
740
741	offset = 0;
742	di = 0;
743
744	do {
745	if (di >= dlen - 1) {
746	rc = EOVERFLOW;
747	break;
748	}
749
750	c = str_decode(src, &offset, STR_NO_LIMIT);
751	dest[di++] = c;
752	} while (c != '\0');
753
754	dest[dlen - 1] = '\0';
755	return rc;
756	}
757
758	/** Find first occurence of character in string.
759	*
760	* @param str String to search.
761	* @param ch Character to look for.
762	*
763	* @return Pointer to character in @a str or NULL if not found.
764	*/
765	char str_chr(const char str, wchar_t ch)
766	{
767	wchar_t acc;
768	size_t off = 0;
769	size_t last = 0;
770
771	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
772	if (acc == ch)
773	return (char *) (str + last);
774	last = off;
775	}
776
777	return NULL;
778	}
779
780	/** Find last occurence of character in string.
781	*
782	* @param str String to search.
783	* @param ch Character to look for.
784	*
785	* @return Pointer to character in @a str or NULL if not found.
786	*/
787	char str_rchr(const char str, wchar_t ch)
788	{
789	wchar_t acc;
790	size_t off = 0;
791	size_t last = 0;
792	const char *res = NULL;
793
794	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
795	if (acc == ch)
796	res = (str + last);
797	last = off;
798	}
799
800	return (char *) res;
801	}
802
803	/** Find first occurence of character in wide string.
804	*
805	* @param wstr String to search.
806	* @param ch Character to look for.
807	*
808	* @return Pointer to character in @a wstr or NULL if not found.
809	*/
810	wchar_t wstr_chr(const wchar_t wstr, wchar_t ch)
811	{
812	while (wstr && wstr != ch)
813	wstr++;
814	if (*wstr)
815	return (wchar_t *) wstr;
816	else
817	return NULL;
818	}
819
820	/** Find last occurence of character in wide string.
821	*
822	* @param wstr String to search.
823	* @param ch Character to look for.
824	*
825	* @return Pointer to character in @a wstr or NULL if not found.
826	*/
827	wchar_t wstr_rchr(const wchar_t wstr, wchar_t ch)
828	{
829	const wchar_t *res = NULL;
830	while (*wstr) {
831	if (*wstr == ch)
832	res = wstr;
833	wstr++;
834	}
835	return (wchar_t *) res;
836	}
837
838	/** Insert a wide character into a wide string.
839	*
840	* Insert a wide character into a wide string at position
841	* @a pos. The characters after the position are shifted.
842	*
843	* @param str String to insert to.
844	* @param ch Character to insert to.
845	* @param pos Character index where to insert.
846	@ @param max_pos Characters in the buffer.
847	*
848	* @return True if the insertion was sucessful, false if the position
849	* is out of bounds.
850	*
851	*/
852	bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
853	{
854	size_t len = wstr_length(str);
855
856	if ((pos > len) \|\| (pos + 1 > max_pos))
857	return false;
858
859	size_t i;
860	for (i = len; i + 1 > pos; i--)
861	str[i + 1] = str[i];
862
863	str[pos] = ch;
864
865	return true;
866	}
867
868	/** Remove a wide character from a wide string.
869	*
870	* Remove a wide character from a wide string at position
871	* @a pos. The characters after the position are shifted.
872	*
873	* @param str String to remove from.
874	* @param pos Character index to remove.
875	*
876	* @return True if the removal was sucessful, false if the position
877	* is out of bounds.
878	*
879	*/
880	bool wstr_remove(wchar_t *str, size_t pos)
881	{
882	size_t len = wstr_length(str);
883
884	if (pos >= len)
885	return false;
886
887	size_t i;
888	for (i = pos + 1; i <= len; i++)
889	str[i - 1] = str[i];
890
891	return true;
892	}
893
894	int stricmp(const char a, const char b)
895	{
896	int c = 0;
897
898	while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
899	c++;
900
901	return (tolower(a[c]) - tolower(b[c]));
902	}
903
904	/** Convert string to a number.
905	* Core of strtol and strtoul functions.
906	*
907	* @param nptr Pointer to string.
908	* @param endptr If not NULL, function stores here pointer to the first
909	* invalid character.
910	* @param base Zero or number between 2 and 36 inclusive.
911	* @param sgn It's set to 1 if minus found.
912	* @return Result of conversion.
913	*/
914	static unsigned long
915	_strtoul(const char nptr, char endptr, int base, char sgn)
916	{
917	unsigned char c;
918	unsigned long result = 0;
919	unsigned long a, b;
920	const char *str = nptr;
921	const char *tmpptr;
922
923	while (isspace(*str))
924	str++;
925
926	if (*str == '-') {
927	*sgn = 1;
928	++str;
929	} else if (*str == '+')
930	++str;
931
932	if (base) {
933	if ((base == 1) \|\| (base > 36)) {
934	/* FIXME: set errno to EINVAL */
935	return 0;
936	}
937	if ((base == 16) && (*str == '0') && ((str[1] == 'x') \|\|
938	(str[1] == 'X'))) {
939	str += 2;
940	}
941	} else {
942	base = 10;
943
944	if (*str == '0') {
945	base = 8;
946	if ((str[1] == 'X') \|\| (str[1] == 'x')) {
947	base = 16;
948	str += 2;
949	}
950	}
951	}
952
953	tmpptr = str;
954
955	while (*str) {
956	c = *str;
957	c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
958	(c <= '9' ? c - '0' : 0xff)));
959	if (c > base) {
960	break;
961	}
962
963	a = (result & 0xff) * base + c;
964	b = (result >> 8) * base + (a >> 8);
965
966	if (b > (ULONG_MAX >> 8)) {
967	/* overflow */
968	/* FIXME: errno = ERANGE*/
969	return ULONG_MAX;
970	}
971
972	result = (b << 8) + (a & 0xff);
973	++str;
974	}
975
976	if (str == tmpptr) {
977	/*
978	* No number was found => first invalid character is the first
979	* character of the string.
980	*/
981	/* FIXME: set errno to EINVAL */
982	str = nptr;
983	result = 0;
984	}
985
986	if (endptr)
987	endptr = (char ) str;
988
989	if (nptr == str) {
990	/FIXME: errno = EINVAL/
991	return 0;
992	}
993
994	return result;
995	}
996
997	/** Convert initial part of string to long int according to given base.
998	* The number may begin with an arbitrary number of whitespaces followed by
999	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
1000	* inserted and the number will be taken as hexadecimal one. If the base is 0
1001	* and the number begin with a zero, number will be taken as octal one (as with
1002	* base 8). Otherwise the base 0 is taken as decimal.
1003	*
1004	* @param nptr Pointer to string.
1005	* @param endptr If not NULL, function stores here pointer to the first
1006	* invalid character.
1007	* @param base Zero or number between 2 and 36 inclusive.
1008	* @return Result of conversion.
1009	*/
1010	long int strtol(const char nptr, char *endptr, int base)
1011	{
1012	char sgn = 0;
1013	unsigned long number = 0;
1014
1015	number = _strtoul(nptr, endptr, base, &sgn);
1016
1017	if (number > LONG_MAX) {
1018	if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
1019	/* FIXME: set 0 to errno */
1020	return number;
1021	}
1022	/* FIXME: set ERANGE to errno */
1023	return (sgn ? LONG_MIN : LONG_MAX);
1024	}
1025
1026	return (sgn ? -number : number);
1027	}
1028
1029	/** Duplicate string.
1030	*
1031	* Allocate a new string and copy characters from the source
1032	* string into it. The duplicate string is allocated via sleeping
1033	* malloc(), thus this function can sleep in no memory conditions.
1034	*
1035	* The allocation cannot fail and the return value is always
1036	* a valid pointer. The duplicate string is always a well-formed
1037	* null-terminated UTF-8 string, but it can differ from the source
1038	* string on the byte level.
1039	*
1040	* @param src Source string.
1041	*
1042	* @return Duplicate string.
1043	*
1044	*/
1045	char str_dup(const char src)
1046	{
1047	size_t size = str_size(src) + 1;
1048	char dest = (char ) malloc(size);
1049	if (dest == NULL)
1050	return (char *) NULL;
1051
1052	str_cpy(dest, size, src);
1053	return dest;
1054	}
1055
1056	/** Duplicate string with size limit.
1057	*
1058	* Allocate a new string and copy up to @max_size bytes from the source
1059	* string into it. The duplicate string is allocated via sleeping
1060	* malloc(), thus this function can sleep in no memory conditions.
1061	* No more than @max_size + 1 bytes is allocated, but if the size
1062	* occupied by the source string is smaller than @max_size + 1,
1063	* less is allocated.
1064	*
1065	* The allocation cannot fail and the return value is always
1066	* a valid pointer. The duplicate string is always a well-formed
1067	* null-terminated UTF-8 string, but it can differ from the source
1068	* string on the byte level.
1069	*
1070	* @param src Source string.
1071	* @param n Maximum number of bytes to duplicate.
1072	*
1073	* @return Duplicate string.
1074	*
1075	*/
1076	char str_ndup(const char src, size_t n)
1077	{
1078	size_t size = str_size(src);
1079	if (size > n)
1080	size = n;
1081
1082	char dest = (char ) malloc(size + 1);
1083	if (dest == NULL)
1084	return (char *) NULL;
1085
1086	str_ncpy(dest, size + 1, src, size);
1087	return dest;
1088	}
1089
1090	void str_reverse(char* begin, char* end)
1091	{
1092	char aux;
1093	while(end>begin)
1094	aux=end, end--=begin, begin++=aux;
1095	}
1096
1097	int size_t_str(size_t value, int base, char* str, size_t size)
1098	{
1099	static char num[] = "0123456789abcdefghijklmnopqrstuvwxyz";
1100	char* wstr=str;
1101
1102	if (size == 0)
1103	return EINVAL;
1104	if (base<2 \|\| base>35) {
1105	*str='\0';
1106	return EINVAL;
1107	}
1108
1109	do {
1110	*wstr++ = num[value % base];
1111	if (--size == 0)
1112	return EOVERFLOW;
1113	} while(value /= base);
1114	*wstr='\0';
1115
1116	// Reverse string
1117	str_reverse(str,wstr-1);
1118	return EOK;
1119	}
1120
1121	/** Convert initial part of string to unsigned long according to given base.
1122	* The number may begin with an arbitrary number of whitespaces followed by
1123	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
1124	* inserted and the number will be taken as hexadecimal one. If the base is 0
1125	* and the number begin with a zero, number will be taken as octal one (as with
1126	* base 8). Otherwise the base 0 is taken as decimal.
1127	*
1128	* @param nptr Pointer to string.
1129	* @param endptr If not NULL, function stores here pointer to the first
1130	* invalid character
1131	* @param base Zero or number between 2 and 36 inclusive.
1132	* @return Result of conversion.
1133	*/
1134	unsigned long strtoul(const char nptr, char *endptr, int base)
1135	{
1136	char sgn = 0;
1137	unsigned long number = 0;
1138
1139	number = _strtoul(nptr, endptr, base, &sgn);
1140
1141	return (sgn ? -number : number);
1142	}
1143
1144	char strtok(char s, const char *delim)
1145	{
1146	static char *next;
1147
1148	return strtok_r(s, delim, &next);
1149	}
1150
1151	char strtok_r(char s, const char delim, char *next)
1152	{
1153	char start, end;
1154
1155	if (s == NULL)
1156	s = *next;
1157
1158	/* Skip over leading delimiters. */
1159	while (s && (str_chr(delim, s) != NULL)) ++s;
1160	start = s;
1161
1162	/* Skip over token characters. */
1163	while (s && (str_chr(delim, s) == NULL)) ++s;
1164	end = s;
1165	next = (s ? s + 1 : s);
1166
1167	if (start == end) {
1168	return NULL; /* No more tokens. */
1169	}
1170
1171	/* Overwrite delimiter with NULL terminator. */
1172	*end = '\0';
1173	return start;
1174	}
1175
1176	/** Convert string to uint64_t (internal variant).
1177	*
1178	* @param nptr Pointer to string.
1179	* @param endptr Pointer to the first invalid character is stored here.
1180	* @param base Zero or number between 2 and 36 inclusive.
1181	* @param neg Indication of unary minus is stored here.
1182	* @apram result Result of the conversion.
1183	*
1184	* @return EOK if conversion was successful.
1185	*
1186	*/
1187	static int str_uint(const char nptr, char *endptr, unsigned int base,
1188	bool neg, uint64_t result)
1189	{
1190	assert(endptr != NULL);
1191	assert(neg != NULL);
1192	assert(result != NULL);
1193
1194	*neg = false;
1195	const char *str = nptr;
1196
1197	/* Ignore leading whitespace */
1198	while (isspace(*str))
1199	str++;
1200
1201	if (*str == '-') {
1202	*neg = true;
1203	str++;
1204	} else if (*str == '+')
1205	str++;
1206
1207	if (base == 0) {
1208	/* Decode base if not specified */
1209	base = 10;
1210
1211	if (*str == '0') {
1212	base = 8;
1213	str++;
1214
1215	switch (*str) {
1216	case 'b':
1217	case 'B':
1218	base = 2;
1219	str++;
1220	break;
1221	case 'o':
1222	case 'O':
1223	base = 8;
1224	str++;
1225	break;
1226	case 'd':
1227	case 'D':
1228	case 't':
1229	case 'T':
1230	base = 10;
1231	str++;
1232	break;
1233	case 'x':
1234	case 'X':
1235	base = 16;
1236	str++;
1237	break;
1238	default:
1239	str--;
1240	}
1241	}
1242	} else {
1243	/* Check base range */
1244	if ((base < 2) \|\| (base > 36)) {
1245	endptr = (char ) str;
1246	return EINVAL;
1247	}
1248	}
1249
1250	*result = 0;
1251	const char *startstr = str;
1252
1253	while (*str != 0) {
1254	unsigned int digit;
1255
1256	if ((str >= 'a') && (str <= 'z'))
1257	digit = *str - 'a' + 10;
1258	else if ((str >= 'A') && (str <= 'Z'))
1259	digit = *str - 'A' + 10;
1260	else if ((str >= '0') && (str <= '9'))
1261	digit = *str - '0';
1262	else
1263	break;
1264
1265	if (digit >= base)
1266	break;
1267
1268	uint64_t prev = *result;
1269	result = (result) * base + digit;
1270
1271	if (*result < prev) {
1272	/* Overflow */
1273	endptr = (char ) str;
1274	return EOVERFLOW;
1275	}
1276
1277	str++;
1278	}
1279
1280	if (str == startstr) {
1281	/*
1282	* No digits were decoded => first invalid character is
1283	* the first character of the string.
1284	*/
1285	str = nptr;
1286	}
1287
1288	endptr = (char ) str;
1289
1290	if (str == nptr)
1291	return EINVAL;
1292
1293	return EOK;
1294	}
1295
1296	/** Convert string to uint64_t.
1297	*
1298	* @param nptr Pointer to string.
1299	* @param endptr If not NULL, pointer to the first invalid character
1300	* is stored here.
1301	* @param base Zero or number between 2 and 36 inclusive.
1302	* @param strict Do not allow any trailing characters.
1303	* @param result Result of the conversion.
1304	*
1305	* @return EOK if conversion was successful.
1306	*
1307	*/
1308	int str_uint64(const char nptr, char *endptr, unsigned int base,
1309	bool strict, uint64_t *result)
1310	{
1311	assert(result != NULL);
1312
1313	bool neg;
1314	char *lendptr;
1315	int ret = str_uint(nptr, &lendptr, base, &neg, result);
1316
1317	if (endptr != NULL)
1318	endptr = (char ) lendptr;
1319
1320	if (ret != EOK)
1321	return ret;
1322
1323	/* Do not allow negative values */
1324	if (neg)
1325	return EINVAL;
1326
1327	/* Check whether we are at the end of
1328	the string in strict mode */
1329	if ((strict) && (*lendptr != 0))
1330	return EINVAL;
1331
1332	return EOK;
1333	}
1334
1335	/** Convert string to size_t.
1336	*
1337	* @param nptr Pointer to string.
1338	* @param endptr If not NULL, pointer to the first invalid character
1339	* is stored here.
1340	* @param base Zero or number between 2 and 36 inclusive.
1341	* @param strict Do not allow any trailing characters.
1342	* @param result Result of the conversion.
1343	*
1344	* @return EOK if conversion was successful.
1345	*
1346	*/
1347	int str_size_t(const char nptr, char *endptr, unsigned int base,
1348	bool strict, size_t *result)
1349	{
1350	assert(result != NULL);
1351
1352	bool neg;
1353	char *lendptr;
1354	uint64_t res;
1355	int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1356
1357	if (endptr != NULL)
1358	endptr = (char ) lendptr;
1359
1360	if (ret != EOK)
1361	return ret;
1362
1363	/* Do not allow negative values */
1364	if (neg)
1365	return EINVAL;
1366
1367	/* Check whether we are at the end of
1368	the string in strict mode */
1369	if ((strict) && (*lendptr != 0))
1370	return EINVAL;
1371
1372	/* Check for overflow */
1373	size_t _res = (size_t) res;
1374	if (_res != res)
1375	return EOVERFLOW;
1376
1377	*result = _res;
1378
1379	return EOK;
1380	}
1381
1382	void order_suffix(const uint64_t val, uint64_t rv, char suffix)
1383	{
1384	if (val > UINT64_C(10000000000000000000)) {
1385	*rv = val / UINT64_C(1000000000000000000);
1386	*suffix = 'Z';
1387	} else if (val > UINT64_C(1000000000000000000)) {
1388	*rv = val / UINT64_C(1000000000000000);
1389	*suffix = 'E';
1390	} else if (val > UINT64_C(1000000000000000)) {
1391	*rv = val / UINT64_C(1000000000000);
1392	*suffix = 'T';
1393	} else if (val > UINT64_C(1000000000000)) {
1394	*rv = val / UINT64_C(1000000000);
1395	*suffix = 'G';
1396	} else if (val > UINT64_C(1000000000)) {
1397	*rv = val / UINT64_C(1000000);
1398	*suffix = 'M';
1399	} else if (val > UINT64_C(1000000)) {
1400	*rv = val / UINT64_C(1000);
1401	*suffix = 'k';
1402	} else {
1403	*rv = val;
1404	*suffix = ' ';
1405	}
1406	}
1407
1408	void bin_order_suffix(const uint64_t val, uint64_t rv, const char *suffix,
1409	bool fixed)
1410	{
1411	if (val > UINT64_C(1152921504606846976)) {
1412	*rv = val / UINT64_C(1125899906842624);
1413	*suffix = "EiB";
1414	} else if (val > UINT64_C(1125899906842624)) {
1415	*rv = val / UINT64_C(1099511627776);
1416	*suffix = "TiB";
1417	} else if (val > UINT64_C(1099511627776)) {
1418	*rv = val / UINT64_C(1073741824);
1419	*suffix = "GiB";
1420	} else if (val > UINT64_C(1073741824)) {
1421	*rv = val / UINT64_C(1048576);
1422	*suffix = "MiB";
1423	} else if (val > UINT64_C(1048576)) {
1424	*rv = val / UINT64_C(1024);
1425	*suffix = "KiB";
1426	} else {
1427	*rv = val;
1428	if (fixed)
1429	*suffix = "B ";
1430	else
1431	*suffix = "B";
1432	}
1433	}
1434
1435	/** @}
1436	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: