Context Navigation

source: mainline/uspace/lib/c/generic/str.c@ 81e9cb3

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 81e9cb3 was 81e9cb3, checked in by Jakub Jermar <jakub@…>, 14 years ago
Make wstr_to_str() and str_to_wstr() void functions again.
Property mode set to `100644`
File size: 32.5 KB

Line
1	/*
2	* Copyright (c) 2005 Martin Decky
3	* Copyright (c) 2008 Jiri Svoboda
4	* Copyright (c) 2011 Martin Sucha
5	* Copyright (c) 2011 Oleg Romanenko
6	* All rights reserved.
7	*
8	* Redistribution and use in source and binary forms, with or without
9	* modification, are permitted provided that the following conditions
10	* are met:
11	*
12	* - Redistributions of source code must retain the above copyright
13	* notice, this list of conditions and the following disclaimer.
14	* - Redistributions in binary form must reproduce the above copyright
15	* notice, this list of conditions and the following disclaimer in the
16	* documentation and/or other materials provided with the distribution.
17	* - The name of the author may not be used to endorse or promote products
18	* derived from this software without specific prior written permission.
19	*
20	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30	*/
31
32	/** @addtogroup libc
33	* @{
34	*/
35	/** @file
36	*/
37
38	#include <str.h>
39	#include <stdlib.h>
40	#include <assert.h>
41	#include <stdint.h>
42	#include <ctype.h>
43	#include <malloc.h>
44	#include <errno.h>
45	#include <align.h>
46	#include <mem.h>
47	#include <str.h>
48
49	/** Byte mask consisting of lowest @n bits (out of 8) */
50	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
51
52	/** Byte mask consisting of lowest @n bits (out of 32) */
53	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
54
55	/** Byte mask consisting of highest @n bits (out of 8) */
56	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
57
58	/** Number of data bits in a UTF-8 continuation byte */
59	#define CONT_BITS 6
60
61	/** Decode a single character from a string.
62	*
63	* Decode a single character from a string of size @a size. Decoding starts
64	* at @a offset and this offset is moved to the beginning of the next
65	* character. In case of decoding error, offset generally advances at least
66	* by one. However, offset is never moved beyond size.
67	*
68	* @param str String (not necessarily NULL-terminated).
69	* @param offset Byte offset in string where to start decoding.
70	* @param size Size of the string (in bytes).
71	*
72	* @return Value of decoded character, U_SPECIAL on decoding error or
73	* NULL if attempt to decode beyond @a size.
74	*
75	*/
76	wchar_t str_decode(const char str, size_t offset, size_t size)
77	{
78	if (*offset + 1 > size)
79	return 0;
80
81	/* First byte read from string */
82	uint8_t b0 = (uint8_t) str[(*offset)++];
83
84	/* Determine code length */
85
86	unsigned int b0_bits; /* Data bits in first byte */
87	unsigned int cbytes; /* Number of continuation bytes */
88
89	if ((b0 & 0x80) == 0) {
90	/* 0xxxxxxx (Plain ASCII) */
91	b0_bits = 7;
92	cbytes = 0;
93	} else if ((b0 & 0xe0) == 0xc0) {
94	/* 110xxxxx 10xxxxxx */
95	b0_bits = 5;
96	cbytes = 1;
97	} else if ((b0 & 0xf0) == 0xe0) {
98	/* 1110xxxx 10xxxxxx 10xxxxxx */
99	b0_bits = 4;
100	cbytes = 2;
101	} else if ((b0 & 0xf8) == 0xf0) {
102	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
103	b0_bits = 3;
104	cbytes = 3;
105	} else {
106	/* 10xxxxxx -- unexpected continuation byte */
107	return U_SPECIAL;
108	}
109
110	if (*offset + cbytes > size)
111	return U_SPECIAL;
112
113	wchar_t ch = b0 & LO_MASK_8(b0_bits);
114
115	/* Decode continuation bytes */
116	while (cbytes > 0) {
117	uint8_t b = (uint8_t) str[(*offset)++];
118
119	/* Must be 10xxxxxx */
120	if ((b & 0xc0) != 0x80)
121	return U_SPECIAL;
122
123	/* Shift data bits to ch */
124	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
125	cbytes--;
126	}
127
128	return ch;
129	}
130
131	/** Encode a single character to string representation.
132	*
133	* Encode a single character to string representation (i.e. UTF-8) and store
134	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
135	* is moved to the position where the next character can be written to.
136	*
137	* @param ch Input character.
138	* @param str Output buffer.
139	* @param offset Byte offset where to start writing.
140	* @param size Size of the output buffer (in bytes).
141	*
142	* @return EOK if the character was encoded successfully, EOVERFLOW if there
143	* was not enough space in the output buffer or EINVAL if the character
144	* code was invalid.
145	*/
146	int chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
147	{
148	if (*offset >= size)
149	return EOVERFLOW;
150
151	if (!chr_check(ch))
152	return EINVAL;
153
154	/* Unsigned version of ch (bit operations should only be done
155	on unsigned types). */
156	uint32_t cc = (uint32_t) ch;
157
158	/* Determine how many continuation bytes are needed */
159
160	unsigned int b0_bits; /* Data bits in first byte */
161	unsigned int cbytes; /* Number of continuation bytes */
162
163	if ((cc & ~LO_MASK_32(7)) == 0) {
164	b0_bits = 7;
165	cbytes = 0;
166	} else if ((cc & ~LO_MASK_32(11)) == 0) {
167	b0_bits = 5;
168	cbytes = 1;
169	} else if ((cc & ~LO_MASK_32(16)) == 0) {
170	b0_bits = 4;
171	cbytes = 2;
172	} else if ((cc & ~LO_MASK_32(21)) == 0) {
173	b0_bits = 3;
174	cbytes = 3;
175	} else {
176	/* Codes longer than 21 bits are not supported */
177	return EINVAL;
178	}
179
180	/* Check for available space in buffer */
181	if (*offset + cbytes >= size)
182	return EOVERFLOW;
183
184	/* Encode continuation bytes */
185	unsigned int i;
186	for (i = cbytes; i > 0; i--) {
187	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
188	cc = cc >> CONT_BITS;
189	}
190
191	/* Encode first byte */
192	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
193
194	/* Advance offset */
195	*offset += cbytes + 1;
196
197	return EOK;
198	}
199
200	/** Get size of string.
201	*
202	* Get the number of bytes which are used by the string @a str (excluding the
203	* NULL-terminator).
204	*
205	* @param str String to consider.
206	*
207	* @return Number of bytes used by the string
208	*
209	*/
210	size_t str_size(const char *str)
211	{
212	size_t size = 0;
213
214	while (*str++ != 0)
215	size++;
216
217	return size;
218	}
219
220	/** Get size of wide string.
221	*
222	* Get the number of bytes which are used by the wide string @a str (excluding the
223	* NULL-terminator).
224	*
225	* @param str Wide string to consider.
226	*
227	* @return Number of bytes used by the wide string
228	*
229	*/
230	size_t wstr_size(const wchar_t *str)
231	{
232	return (wstr_length(str) * sizeof(wchar_t));
233	}
234
235	/** Get size of string with length limit.
236	*
237	* Get the number of bytes which are used by up to @a max_len first
238	* characters in the string @a str. If @a max_len is greater than
239	* the length of @a str, the entire string is measured (excluding the
240	* NULL-terminator).
241	*
242	* @param str String to consider.
243	* @param max_len Maximum number of characters to measure.
244	*
245	* @return Number of bytes used by the characters.
246	*
247	*/
248	size_t str_lsize(const char *str, size_t max_len)
249	{
250	size_t len = 0;
251	size_t offset = 0;
252
253	while (len < max_len) {
254	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
255	break;
256
257	len++;
258	}
259
260	return offset;
261	}
262
263	/** Get size of wide string with length limit.
264	*
265	* Get the number of bytes which are used by up to @a max_len first
266	* wide characters in the wide string @a str. If @a max_len is greater than
267	* the length of @a str, the entire wide string is measured (excluding the
268	* NULL-terminator).
269	*
270	* @param str Wide string to consider.
271	* @param max_len Maximum number of wide characters to measure.
272	*
273	* @return Number of bytes used by the wide characters.
274	*
275	*/
276	size_t wstr_lsize(const wchar_t *str, size_t max_len)
277	{
278	return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
279	}
280
281	/** Get number of characters in a string.
282	*
283	* @param str NULL-terminated string.
284	*
285	* @return Number of characters in string.
286	*
287	*/
288	size_t str_length(const char *str)
289	{
290	size_t len = 0;
291	size_t offset = 0;
292
293	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
294	len++;
295
296	return len;
297	}
298
299	/** Get number of characters in a wide string.
300	*
301	* @param str NULL-terminated wide string.
302	*
303	* @return Number of characters in @a str.
304	*
305	*/
306	size_t wstr_length(const wchar_t *wstr)
307	{
308	size_t len = 0;
309
310	while (*wstr++ != 0)
311	len++;
312
313	return len;
314	}
315
316	/** Get number of characters in a string with size limit.
317	*
318	* @param str NULL-terminated string.
319	* @param size Maximum number of bytes to consider.
320	*
321	* @return Number of characters in string.
322	*
323	*/
324	size_t str_nlength(const char *str, size_t size)
325	{
326	size_t len = 0;
327	size_t offset = 0;
328
329	while (str_decode(str, &offset, size) != 0)
330	len++;
331
332	return len;
333	}
334
335	/** Get number of characters in a string with size limit.
336	*
337	* @param str NULL-terminated string.
338	* @param size Maximum number of bytes to consider.
339	*
340	* @return Number of characters in string.
341	*
342	*/
343	size_t wstr_nlength(const wchar_t *str, size_t size)
344	{
345	size_t len = 0;
346	size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
347	size_t offset = 0;
348
349	while ((offset < limit) && (*str++ != 0)) {
350	len++;
351	offset += sizeof(wchar_t);
352	}
353
354	return len;
355	}
356
357	/** Check whether character is plain ASCII.
358	*
359	* @return True if character is plain ASCII.
360	*
361	*/
362	bool ascii_check(wchar_t ch)
363	{
364	if ((ch >= 0) && (ch <= 127))
365	return true;
366
367	return false;
368	}
369
370	/** Check whether character is valid
371	*
372	* @return True if character is a valid Unicode code point.
373	*
374	*/
375	bool chr_check(wchar_t ch)
376	{
377	if ((ch >= 0) && (ch <= 1114111))
378	return true;
379
380	return false;
381	}
382
383	/** Compare two NULL terminated strings.
384	*
385	* Do a char-by-char comparison of two NULL-terminated strings.
386	* The strings are considered equal iff they consist of the same
387	* characters on the minimum of their lengths.
388	*
389	* @param s1 First string to compare.
390	* @param s2 Second string to compare.
391	*
392	* @return 0 if the strings are equal, -1 if first is smaller,
393	* 1 if second smaller.
394	*
395	*/
396	int str_cmp(const char s1, const char s2)
397	{
398	wchar_t c1 = 0;
399	wchar_t c2 = 0;
400
401	size_t off1 = 0;
402	size_t off2 = 0;
403
404	while (true) {
405	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
406	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
407
408	if (c1 < c2)
409	return -1;
410
411	if (c1 > c2)
412	return 1;
413
414	if (c1 == 0 \|\| c2 == 0)
415	break;
416	}
417
418	return 0;
419	}
420
421	/** Compare two NULL terminated strings with length limit.
422	*
423	* Do a char-by-char comparison of two NULL-terminated strings.
424	* The strings are considered equal iff they consist of the same
425	* characters on the minimum of their lengths and the length limit.
426	*
427	* @param s1 First string to compare.
428	* @param s2 Second string to compare.
429	* @param max_len Maximum number of characters to consider.
430	*
431	* @return 0 if the strings are equal, -1 if first is smaller,
432	* 1 if second smaller.
433	*
434	*/
435	int str_lcmp(const char s1, const char s2, size_t max_len)
436	{
437	wchar_t c1 = 0;
438	wchar_t c2 = 0;
439
440	size_t off1 = 0;
441	size_t off2 = 0;
442
443	size_t len = 0;
444
445	while (true) {
446	if (len >= max_len)
447	break;
448
449	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
450	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
451
452	if (c1 < c2)
453	return -1;
454
455	if (c1 > c2)
456	return 1;
457
458	if (c1 == 0 \|\| c2 == 0)
459	break;
460
461	++len;
462	}
463
464	return 0;
465
466	}
467
468	/** Copy string.
469	*
470	* Copy source string @a src to destination buffer @a dest.
471	* No more than @a size bytes are written. If the size of the output buffer
472	* is at least one byte, the output string will always be well-formed, i.e.
473	* null-terminated and containing only complete characters.
474	*
475	* @param dest Destination buffer.
476	* @param count Size of the destination buffer (must be > 0).
477	* @param src Source string.
478	*/
479	void str_cpy(char dest, size_t size, const char src)
480	{
481	/* There must be space for a null terminator in the buffer. */
482	assert(size > 0);
483
484	size_t src_off = 0;
485	size_t dest_off = 0;
486
487	wchar_t ch;
488	while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
489	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
490	break;
491	}
492
493	dest[dest_off] = '\0';
494	}
495
496	/** Copy size-limited substring.
497	*
498	* Copy prefix of string @a src of max. size @a size to destination buffer
499	* @a dest. No more than @a size bytes are written. The output string will
500	* always be well-formed, i.e. null-terminated and containing only complete
501	* characters.
502	*
503	* No more than @a n bytes are read from the input string, so it does not
504	* have to be null-terminated.
505	*
506	* @param dest Destination buffer.
507	* @param count Size of the destination buffer (must be > 0).
508	* @param src Source string.
509	* @param n Maximum number of bytes to read from @a src.
510	*/
511	void str_ncpy(char dest, size_t size, const char src, size_t n)
512	{
513	/* There must be space for a null terminator in the buffer. */
514	assert(size > 0);
515
516	size_t src_off = 0;
517	size_t dest_off = 0;
518
519	wchar_t ch;
520	while ((ch = str_decode(src, &src_off, n)) != 0) {
521	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
522	break;
523	}
524
525	dest[dest_off] = '\0';
526	}
527
528	/** Append one string to another.
529	*
530	* Append source string @a src to string in destination buffer @a dest.
531	* Size of the destination buffer is @a dest. If the size of the output buffer
532	* is at least one byte, the output string will always be well-formed, i.e.
533	* null-terminated and containing only complete characters.
534	*
535	* @param dest Destination buffer.
536	* @param count Size of the destination buffer.
537	* @param src Source string.
538	*/
539	void str_append(char dest, size_t size, const char src)
540	{
541	size_t dstr_size;
542
543	dstr_size = str_size(dest);
544	if (dstr_size >= size)
545	return;
546
547	str_cpy(dest + dstr_size, size - dstr_size, src);
548	}
549
550	/** Convert space-padded ASCII to string.
551	*
552	* Common legacy text encoding in hardware is 7-bit ASCII fitted into
553	* a fixed-with byte buffer (bit 7 always zero), right-padded with spaces
554	* (ASCII 0x20). Convert space-padded ascii to string representation.
555	*
556	* If the text does not fit into the destination buffer, the function converts
557	* as many characters as possible and returns EOVERFLOW.
558	*
559	* If the text contains non-ASCII bytes (with bit 7 set), the whole string is
560	* converted anyway and invalid characters are replaced with question marks
561	* (U_SPECIAL) and the function returns EIO.
562	*
563	* Regardless of return value upon return @a dest will always be well-formed.
564	*
565	* @param dest Destination buffer
566	* @param size Size of destination buffer
567	* @param src Space-padded ASCII.
568	* @param n Size of the source buffer in bytes.
569	*
570	* @return EOK on success, EOVERFLOW if the text does not fit
571	* destination buffer, EIO if the text contains
572	* non-ASCII bytes.
573	*/
574	int spascii_to_str(char dest, size_t size, const uint8_t src, size_t n)
575	{
576	size_t sidx;
577	size_t didx;
578	size_t dlast;
579	uint8_t byte;
580	int rc;
581	int result;
582
583	/* There must be space for a null terminator in the buffer. */
584	assert(size > 0);
585	result = EOK;
586
587	didx = 0;
588	dlast = 0;
589	for (sidx = 0; sidx < n; ++sidx) {
590	byte = src[sidx];
591	if (!ascii_check(byte)) {
592	byte = U_SPECIAL;
593	result = EIO;
594	}
595
596	rc = chr_encode(byte, dest, &didx, size - 1);
597	if (rc != EOK) {
598	assert(rc == EOVERFLOW);
599	dest[didx] = '\0';
600	return rc;
601	}
602
603	/* Remember dest index after last non-empty character */
604	if (byte != 0x20)
605	dlast = didx;
606	}
607
608	/* Terminate string after last non-empty character */
609	dest[dlast] = '\0';
610	return result;
611	}
612
613	/** Convert wide string to string.
614	*
615	* Convert wide string @a src to string. The output is written to the buffer
616	* specified by @a dest and @a size. @a size must be non-zero and the string
617	* written will always be well-formed.
618	*
619	* @param dest Destination buffer.
620	* @param size Size of the destination buffer.
621	* @param src Source wide string.
622	*/
623	void wstr_to_str(char dest, size_t size, const wchar_t src)
624	{
625	wchar_t ch;
626	size_t src_idx;
627	size_t dest_off;
628
629	/* There must be space for a null terminator in the buffer. */
630	assert(size > 0);
631
632	src_idx = 0;
633	dest_off = 0;
634
635	while ((ch = src[src_idx++]) != 0) {
636	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
637	break;
638	}
639
640	dest[dest_off] = '\0';
641	}
642
643	/** Convert UTF16 string to string.
644	*
645	* Convert utf16 string @a src to string. The output is written to the buffer
646	* specified by @a dest and @a size. @a size must be non-zero and the string
647	* written will always be well-formed. Surrogate pairs also supported.
648	*
649	* @param dest Destination buffer.
650	* @param size Size of the destination buffer.
651	* @param src Source utf16 string.
652	*
653	* @return EOK, if success, negative otherwise.
654	*/
655	int utf16_to_str(char dest, size_t size, const uint16_t src)
656	{
657	size_t idx=0, dest_off=0;
658	wchar_t ch;
659	int rc = EOK;
660
661	/* There must be space for a null terminator in the buffer. */
662	assert(size > 0);
663
664	while (src[idx]) {
665	if ((src[idx] & 0xfc00) == 0xd800) {
666	if (src[idx+1] && (src[idx+1] & 0xfc00) == 0xdc00) {
667	ch = 0x10000;
668	ch += (src[idx] & 0x03FF) << 10;
669	ch += (src[idx+1] & 0x03FF);
670	idx += 2;
671	}
672	else
673	break;
674	} else {
675	ch = src[idx];
676	idx++;
677	}
678	rc = chr_encode(ch, dest, &dest_off, size-1);
679	if (rc != EOK)
680	break;
681	}
682	dest[dest_off] = '\0';
683	return rc;
684	}
685
686	int str_to_utf16(uint16_t dest, size_t size, const char src)
687	{
688	int rc=EOK;
689	size_t offset=0;
690	size_t idx=0;
691	wchar_t c;
692
693	assert(size > 0);
694
695	while ((c = str_decode(src, &offset, STR_NO_LIMIT)) != 0) {
696	if (c > 0x10000) {
697	if (idx+2 >= size-1) {
698	rc=EOVERFLOW;
699	break;
700	}
701	c = (c - 0x10000);
702	dest[idx] = 0xD800 \| (c >> 10);
703	dest[idx+1] = 0xDC00 \| (c & 0x3FF);
704	idx++;
705	} else {
706	dest[idx] = c;
707	}
708
709	idx++;
710	if (idx >= size-1) {
711	rc=EOVERFLOW;
712	break;
713	}
714	}
715
716	dest[idx] = '\0';
717	return rc;
718	}
719
720
721	/** Convert wide string to new string.
722	*
723	* Convert wide string @a src to string. Space for the new string is allocated
724	* on the heap.
725	*
726	* @param src Source wide string.
727	* @return New string.
728	*/
729	char wstr_to_astr(const wchar_t src)
730	{
731	char dbuf[STR_BOUNDS(1)];
732	char *str;
733	wchar_t ch;
734
735	size_t src_idx;
736	size_t dest_off;
737	size_t dest_size;
738
739	/* Compute size of encoded string. */
740
741	src_idx = 0;
742	dest_size = 0;
743
744	while ((ch = src[src_idx++]) != 0) {
745	dest_off = 0;
746	if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
747	break;
748	dest_size += dest_off;
749	}
750
751	str = malloc(dest_size + 1);
752	if (str == NULL)
753	return NULL;
754
755	/* Encode string. */
756
757	src_idx = 0;
758	dest_off = 0;
759
760	while ((ch = src[src_idx++]) != 0) {
761	if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
762	break;
763	}
764
765	str[dest_size] = '\0';
766	return str;
767	}
768
769
770	/** Convert string to wide string.
771	*
772	* Convert string @a src to wide string. The output is written to the
773	* buffer specified by @a dest and @a dlen. @a dlen must be non-zero
774	* and the wide string written will always be null-terminated.
775	*
776	* @param dest Destination buffer.
777	* @param dlen Length of destination buffer (number of wchars).
778	* @param src Source string.
779	*/
780	void str_to_wstr(wchar_t dest, size_t dlen, const char src)
781	{
782	size_t offset;
783	size_t di;
784	wchar_t c;
785
786	assert(dlen > 0);
787
788	offset = 0;
789	di = 0;
790
791	do {
792	if (di >= dlen - 1)
793	break;
794
795	c = str_decode(src, &offset, STR_NO_LIMIT);
796	dest[di++] = c;
797	} while (c != '\0');
798
799	dest[dlen - 1] = '\0';
800	}
801
802	/** Convert string to wide string.
803	*
804	* Convert string @a src to wide string. A new wide NULL-terminated
805	* string will be allocated on the heap.
806	*
807	* @param src Source string.
808	*/
809	wchar_t str_to_awstr(const char str)
810	{
811	size_t len = str_length(str);
812
813	wchar_t *wstr = calloc(len+1, sizeof(wchar_t));
814	if (wstr == NULL)
815	return NULL;
816
817	str_to_wstr(wstr, len + 1, str);
818	return wstr;
819	}
820
821	/** Find first occurence of character in string.
822	*
823	* @param str String to search.
824	* @param ch Character to look for.
825	*
826	* @return Pointer to character in @a str or NULL if not found.
827	*/
828	char str_chr(const char str, wchar_t ch)
829	{
830	wchar_t acc;
831	size_t off = 0;
832	size_t last = 0;
833
834	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
835	if (acc == ch)
836	return (char *) (str + last);
837	last = off;
838	}
839
840	return NULL;
841	}
842
843	/** Find last occurence of character in string.
844	*
845	* @param str String to search.
846	* @param ch Character to look for.
847	*
848	* @return Pointer to character in @a str or NULL if not found.
849	*/
850	char str_rchr(const char str, wchar_t ch)
851	{
852	wchar_t acc;
853	size_t off = 0;
854	size_t last = 0;
855	const char *res = NULL;
856
857	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
858	if (acc == ch)
859	res = (str + last);
860	last = off;
861	}
862
863	return (char *) res;
864	}
865
866	/** Insert a wide character into a wide string.
867	*
868	* Insert a wide character into a wide string at position
869	* @a pos. The characters after the position are shifted.
870	*
871	* @param str String to insert to.
872	* @param ch Character to insert to.
873	* @param pos Character index where to insert.
874	@ @param max_pos Characters in the buffer.
875	*
876	* @return True if the insertion was sucessful, false if the position
877	* is out of bounds.
878	*
879	*/
880	bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
881	{
882	size_t len = wstr_length(str);
883
884	if ((pos > len) \|\| (pos + 1 > max_pos))
885	return false;
886
887	size_t i;
888	for (i = len; i + 1 > pos; i--)
889	str[i + 1] = str[i];
890
891	str[pos] = ch;
892
893	return true;
894	}
895
896	/** Remove a wide character from a wide string.
897	*
898	* Remove a wide character from a wide string at position
899	* @a pos. The characters after the position are shifted.
900	*
901	* @param str String to remove from.
902	* @param pos Character index to remove.
903	*
904	* @return True if the removal was sucessful, false if the position
905	* is out of bounds.
906	*
907	*/
908	bool wstr_remove(wchar_t *str, size_t pos)
909	{
910	size_t len = wstr_length(str);
911
912	if (pos >= len)
913	return false;
914
915	size_t i;
916	for (i = pos + 1; i <= len; i++)
917	str[i - 1] = str[i];
918
919	return true;
920	}
921
922	int stricmp(const char a, const char b)
923	{
924	int c = 0;
925
926	while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
927	c++;
928
929	return (tolower(a[c]) - tolower(b[c]));
930	}
931
932	/** Convert string to a number.
933	* Core of strtol and strtoul functions.
934	*
935	* @param nptr Pointer to string.
936	* @param endptr If not NULL, function stores here pointer to the first
937	* invalid character.
938	* @param base Zero or number between 2 and 36 inclusive.
939	* @param sgn It's set to 1 if minus found.
940	* @return Result of conversion.
941	*/
942	static unsigned long
943	_strtoul(const char nptr, char endptr, int base, char sgn)
944	{
945	unsigned char c;
946	unsigned long result = 0;
947	unsigned long a, b;
948	const char *str = nptr;
949	const char *tmpptr;
950
951	while (isspace(*str))
952	str++;
953
954	if (*str == '-') {
955	*sgn = 1;
956	++str;
957	} else if (*str == '+')
958	++str;
959
960	if (base) {
961	if ((base == 1) \|\| (base > 36)) {
962	/* FIXME: set errno to EINVAL */
963	return 0;
964	}
965	if ((base == 16) && (*str == '0') && ((str[1] == 'x') \|\|
966	(str[1] == 'X'))) {
967	str += 2;
968	}
969	} else {
970	base = 10;
971
972	if (*str == '0') {
973	base = 8;
974	if ((str[1] == 'X') \|\| (str[1] == 'x')) {
975	base = 16;
976	str += 2;
977	}
978	}
979	}
980
981	tmpptr = str;
982
983	while (*str) {
984	c = *str;
985	c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
986	(c <= '9' ? c - '0' : 0xff)));
987	if (c > base) {
988	break;
989	}
990
991	a = (result & 0xff) * base + c;
992	b = (result >> 8) * base + (a >> 8);
993
994	if (b > (ULONG_MAX >> 8)) {
995	/* overflow */
996	/* FIXME: errno = ERANGE*/
997	return ULONG_MAX;
998	}
999
1000	result = (b << 8) + (a & 0xff);
1001	++str;
1002	}
1003
1004	if (str == tmpptr) {
1005	/*
1006	* No number was found => first invalid character is the first
1007	* character of the string.
1008	*/
1009	/* FIXME: set errno to EINVAL */
1010	str = nptr;
1011	result = 0;
1012	}
1013
1014	if (endptr)
1015	endptr = (char ) str;
1016
1017	if (nptr == str) {
1018	/FIXME: errno = EINVAL/
1019	return 0;
1020	}
1021
1022	return result;
1023	}
1024
1025	/** Convert initial part of string to long int according to given base.
1026	* The number may begin with an arbitrary number of whitespaces followed by
1027	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
1028	* inserted and the number will be taken as hexadecimal one. If the base is 0
1029	* and the number begin with a zero, number will be taken as octal one (as with
1030	* base 8). Otherwise the base 0 is taken as decimal.
1031	*
1032	* @param nptr Pointer to string.
1033	* @param endptr If not NULL, function stores here pointer to the first
1034	* invalid character.
1035	* @param base Zero or number between 2 and 36 inclusive.
1036	* @return Result of conversion.
1037	*/
1038	long int strtol(const char nptr, char *endptr, int base)
1039	{
1040	char sgn = 0;
1041	unsigned long number = 0;
1042
1043	number = _strtoul(nptr, endptr, base, &sgn);
1044
1045	if (number > LONG_MAX) {
1046	if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
1047	/* FIXME: set 0 to errno */
1048	return number;
1049	}
1050	/* FIXME: set ERANGE to errno */
1051	return (sgn ? LONG_MIN : LONG_MAX);
1052	}
1053
1054	return (sgn ? -number : number);
1055	}
1056
1057	/** Duplicate string.
1058	*
1059	* Allocate a new string and copy characters from the source
1060	* string into it. The duplicate string is allocated via sleeping
1061	* malloc(), thus this function can sleep in no memory conditions.
1062	*
1063	* The allocation cannot fail and the return value is always
1064	* a valid pointer. The duplicate string is always a well-formed
1065	* null-terminated UTF-8 string, but it can differ from the source
1066	* string on the byte level.
1067	*
1068	* @param src Source string.
1069	*
1070	* @return Duplicate string.
1071	*
1072	*/
1073	char str_dup(const char src)
1074	{
1075	size_t size = str_size(src) + 1;
1076	char dest = (char ) malloc(size);
1077	if (dest == NULL)
1078	return (char *) NULL;
1079
1080	str_cpy(dest, size, src);
1081	return dest;
1082	}
1083
1084	/** Duplicate string with size limit.
1085	*
1086	* Allocate a new string and copy up to @max_size bytes from the source
1087	* string into it. The duplicate string is allocated via sleeping
1088	* malloc(), thus this function can sleep in no memory conditions.
1089	* No more than @max_size + 1 bytes is allocated, but if the size
1090	* occupied by the source string is smaller than @max_size + 1,
1091	* less is allocated.
1092	*
1093	* The allocation cannot fail and the return value is always
1094	* a valid pointer. The duplicate string is always a well-formed
1095	* null-terminated UTF-8 string, but it can differ from the source
1096	* string on the byte level.
1097	*
1098	* @param src Source string.
1099	* @param n Maximum number of bytes to duplicate.
1100	*
1101	* @return Duplicate string.
1102	*
1103	*/
1104	char str_ndup(const char src, size_t n)
1105	{
1106	size_t size = str_size(src);
1107	if (size > n)
1108	size = n;
1109
1110	char dest = (char ) malloc(size + 1);
1111	if (dest == NULL)
1112	return (char *) NULL;
1113
1114	str_ncpy(dest, size + 1, src, size);
1115	return dest;
1116	}
1117
1118	/** Convert initial part of string to unsigned long according to given base.
1119	* The number may begin with an arbitrary number of whitespaces followed by
1120	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
1121	* inserted and the number will be taken as hexadecimal one. If the base is 0
1122	* and the number begin with a zero, number will be taken as octal one (as with
1123	* base 8). Otherwise the base 0 is taken as decimal.
1124	*
1125	* @param nptr Pointer to string.
1126	* @param endptr If not NULL, function stores here pointer to the first
1127	* invalid character
1128	* @param base Zero or number between 2 and 36 inclusive.
1129	* @return Result of conversion.
1130	*/
1131	unsigned long strtoul(const char nptr, char *endptr, int base)
1132	{
1133	char sgn = 0;
1134	unsigned long number = 0;
1135
1136	number = _strtoul(nptr, endptr, base, &sgn);
1137
1138	return (sgn ? -number : number);
1139	}
1140
1141	char strtok(char s, const char *delim)
1142	{
1143	static char *next;
1144
1145	return strtok_r(s, delim, &next);
1146	}
1147
1148	char strtok_r(char s, const char delim, char *next)
1149	{
1150	char start, end;
1151
1152	if (s == NULL)
1153	s = *next;
1154
1155	/* Skip over leading delimiters. */
1156	while (s && (str_chr(delim, s) != NULL)) ++s;
1157	start = s;
1158
1159	/* Skip over token characters. */
1160	while (s && (str_chr(delim, s) == NULL)) ++s;
1161	end = s;
1162	next = (s ? s + 1 : s);
1163
1164	if (start == end) {
1165	return NULL; /* No more tokens. */
1166	}
1167
1168	/* Overwrite delimiter with NULL terminator. */
1169	*end = '\0';
1170	return start;
1171	}
1172
1173	/** Convert string to uint64_t (internal variant).
1174	*
1175	* @param nptr Pointer to string.
1176	* @param endptr Pointer to the first invalid character is stored here.
1177	* @param base Zero or number between 2 and 36 inclusive.
1178	* @param neg Indication of unary minus is stored here.
1179	* @apram result Result of the conversion.
1180	*
1181	* @return EOK if conversion was successful.
1182	*
1183	*/
1184	static int str_uint(const char nptr, char *endptr, unsigned int base,
1185	bool neg, uint64_t result)
1186	{
1187	assert(endptr != NULL);
1188	assert(neg != NULL);
1189	assert(result != NULL);
1190
1191	*neg = false;
1192	const char *str = nptr;
1193
1194	/* Ignore leading whitespace */
1195	while (isspace(*str))
1196	str++;
1197
1198	if (*str == '-') {
1199	*neg = true;
1200	str++;
1201	} else if (*str == '+')
1202	str++;
1203
1204	if (base == 0) {
1205	/* Decode base if not specified */
1206	base = 10;
1207
1208	if (*str == '0') {
1209	base = 8;
1210	str++;
1211
1212	switch (*str) {
1213	case 'b':
1214	case 'B':
1215	base = 2;
1216	str++;
1217	break;
1218	case 'o':
1219	case 'O':
1220	base = 8;
1221	str++;
1222	break;
1223	case 'd':
1224	case 'D':
1225	case 't':
1226	case 'T':
1227	base = 10;
1228	str++;
1229	break;
1230	case 'x':
1231	case 'X':
1232	base = 16;
1233	str++;
1234	break;
1235	default:
1236	str--;
1237	}
1238	}
1239	} else {
1240	/* Check base range */
1241	if ((base < 2) \|\| (base > 36)) {
1242	endptr = (char ) str;
1243	return EINVAL;
1244	}
1245	}
1246
1247	*result = 0;
1248	const char *startstr = str;
1249
1250	while (*str != 0) {
1251	unsigned int digit;
1252
1253	if ((str >= 'a') && (str <= 'z'))
1254	digit = *str - 'a' + 10;
1255	else if ((str >= 'A') && (str <= 'Z'))
1256	digit = *str - 'A' + 10;
1257	else if ((str >= '0') && (str <= '9'))
1258	digit = *str - '0';
1259	else
1260	break;
1261
1262	if (digit >= base)
1263	break;
1264
1265	uint64_t prev = *result;
1266	result = (result) * base + digit;
1267
1268	if (*result < prev) {
1269	/* Overflow */
1270	endptr = (char ) str;
1271	return EOVERFLOW;
1272	}
1273
1274	str++;
1275	}
1276
1277	if (str == startstr) {
1278	/*
1279	* No digits were decoded => first invalid character is
1280	* the first character of the string.
1281	*/
1282	str = nptr;
1283	}
1284
1285	endptr = (char ) str;
1286
1287	if (str == nptr)
1288	return EINVAL;
1289
1290	return EOK;
1291	}
1292
1293	/** Convert string to uint64_t.
1294	*
1295	* @param nptr Pointer to string.
1296	* @param endptr If not NULL, pointer to the first invalid character
1297	* is stored here.
1298	* @param base Zero or number between 2 and 36 inclusive.
1299	* @param strict Do not allow any trailing characters.
1300	* @param result Result of the conversion.
1301	*
1302	* @return EOK if conversion was successful.
1303	*
1304	*/
1305	int str_uint64(const char nptr, char *endptr, unsigned int base,
1306	bool strict, uint64_t *result)
1307	{
1308	assert(result != NULL);
1309
1310	bool neg;
1311	char *lendptr;
1312	int ret = str_uint(nptr, &lendptr, base, &neg, result);
1313
1314	if (endptr != NULL)
1315	endptr = (char ) lendptr;
1316
1317	if (ret != EOK)
1318	return ret;
1319
1320	/* Do not allow negative values */
1321	if (neg)
1322	return EINVAL;
1323
1324	/* Check whether we are at the end of
1325	the string in strict mode */
1326	if ((strict) && (*lendptr != 0))
1327	return EINVAL;
1328
1329	return EOK;
1330	}
1331
1332	/** Convert string to size_t.
1333	*
1334	* @param nptr Pointer to string.
1335	* @param endptr If not NULL, pointer to the first invalid character
1336	* is stored here.
1337	* @param base Zero or number between 2 and 36 inclusive.
1338	* @param strict Do not allow any trailing characters.
1339	* @param result Result of the conversion.
1340	*
1341	* @return EOK if conversion was successful.
1342	*
1343	*/
1344	int str_size_t(const char nptr, char *endptr, unsigned int base,
1345	bool strict, size_t *result)
1346	{
1347	assert(result != NULL);
1348
1349	bool neg;
1350	char *lendptr;
1351	uint64_t res;
1352	int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1353
1354	if (endptr != NULL)
1355	endptr = (char ) lendptr;
1356
1357	if (ret != EOK)
1358	return ret;
1359
1360	/* Do not allow negative values */
1361	if (neg)
1362	return EINVAL;
1363
1364	/* Check whether we are at the end of
1365	the string in strict mode */
1366	if ((strict) && (*lendptr != 0))
1367	return EINVAL;
1368
1369	/* Check for overflow */
1370	size_t _res = (size_t) res;
1371	if (_res != res)
1372	return EOVERFLOW;
1373
1374	*result = _res;
1375
1376	return EOK;
1377	}
1378
1379	void order_suffix(const uint64_t val, uint64_t rv, char suffix)
1380	{
1381	if (val > UINT64_C(10000000000000000000)) {
1382	*rv = val / UINT64_C(1000000000000000000);
1383	*suffix = 'Z';
1384	} else if (val > UINT64_C(1000000000000000000)) {
1385	*rv = val / UINT64_C(1000000000000000);
1386	*suffix = 'E';
1387	} else if (val > UINT64_C(1000000000000000)) {
1388	*rv = val / UINT64_C(1000000000000);
1389	*suffix = 'T';
1390	} else if (val > UINT64_C(1000000000000)) {
1391	*rv = val / UINT64_C(1000000000);
1392	*suffix = 'G';
1393	} else if (val > UINT64_C(1000000000)) {
1394	*rv = val / UINT64_C(1000000);
1395	*suffix = 'M';
1396	} else if (val > UINT64_C(1000000)) {
1397	*rv = val / UINT64_C(1000);
1398	*suffix = 'k';
1399	} else {
1400	*rv = val;
1401	*suffix = ' ';
1402	}
1403	}
1404
1405	void bin_order_suffix(const uint64_t val, uint64_t rv, const char *suffix,
1406	bool fixed)
1407	{
1408	if (val > UINT64_C(1152921504606846976)) {
1409	*rv = val / UINT64_C(1125899906842624);
1410	*suffix = "EiB";
1411	} else if (val > UINT64_C(1125899906842624)) {
1412	*rv = val / UINT64_C(1099511627776);
1413	*suffix = "TiB";
1414	} else if (val > UINT64_C(1099511627776)) {
1415	*rv = val / UINT64_C(1073741824);
1416	*suffix = "GiB";
1417	} else if (val > UINT64_C(1073741824)) {
1418	*rv = val / UINT64_C(1048576);
1419	*suffix = "MiB";
1420	} else if (val > UINT64_C(1048576)) {
1421	*rv = val / UINT64_C(1024);
1422	*suffix = "KiB";
1423	} else {
1424	*rv = val;
1425	if (fixed)
1426	*suffix = "B ";
1427	else
1428	*suffix = "B";
1429	}
1430	}
1431
1432	/** @}
1433	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: