Context Navigation

source: mainline/uspace/lib/c/generic/str.c@ 375ab5e

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 375ab5e was 375ab5e, checked in by Jakub Jermar <jakub@…>, 14 years ago
Merge from lp:~romanenko-oleg/helenos/fat.
Property mode set to `100644`
File size: 34.2 KB

Line
1	/*
2	* Copyright (c) 2005 Martin Decky
3	* Copyright (c) 2008 Jiri Svoboda
4	* Copyright (c) 2011 Martin Sucha
5	* Copyright (c) 2011 Oleg Romanenko
6	* All rights reserved.
7	*
8	* Redistribution and use in source and binary forms, with or without
9	* modification, are permitted provided that the following conditions
10	* are met:
11	*
12	* - Redistributions of source code must retain the above copyright
13	* notice, this list of conditions and the following disclaimer.
14	* - Redistributions in binary form must reproduce the above copyright
15	* notice, this list of conditions and the following disclaimer in the
16	* documentation and/or other materials provided with the distribution.
17	* - The name of the author may not be used to endorse or promote products
18	* derived from this software without specific prior written permission.
19	*
20	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30	*/
31
32	/** @addtogroup libc
33	* @{
34	*/
35	/** @file
36	*/
37
38	#include <str.h>
39	#include <stdlib.h>
40	#include <assert.h>
41	#include <stdint.h>
42	#include <ctype.h>
43	#include <malloc.h>
44	#include <errno.h>
45	#include <align.h>
46	#include <mem.h>
47	#include <str.h>
48
49	/** Byte mask consisting of lowest @n bits (out of 8) */
50	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
51
52	/** Byte mask consisting of lowest @n bits (out of 32) */
53	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
54
55	/** Byte mask consisting of highest @n bits (out of 8) */
56	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
57
58	/** Number of data bits in a UTF-8 continuation byte */
59	#define CONT_BITS 6
60
61	/** Decode a single character from a string.
62	*
63	* Decode a single character from a string of size @a size. Decoding starts
64	* at @a offset and this offset is moved to the beginning of the next
65	* character. In case of decoding error, offset generally advances at least
66	* by one. However, offset is never moved beyond size.
67	*
68	* @param str String (not necessarily NULL-terminated).
69	* @param offset Byte offset in string where to start decoding.
70	* @param size Size of the string (in bytes).
71	*
72	* @return Value of decoded character, U_SPECIAL on decoding error or
73	* NULL if attempt to decode beyond @a size.
74	*
75	*/
76	wchar_t str_decode(const char str, size_t offset, size_t size)
77	{
78	if (*offset + 1 > size)
79	return 0;
80
81	/* First byte read from string */
82	uint8_t b0 = (uint8_t) str[(*offset)++];
83
84	/* Determine code length */
85
86	unsigned int b0_bits; /* Data bits in first byte */
87	unsigned int cbytes; /* Number of continuation bytes */
88
89	if ((b0 & 0x80) == 0) {
90	/* 0xxxxxxx (Plain ASCII) */
91	b0_bits = 7;
92	cbytes = 0;
93	} else if ((b0 & 0xe0) == 0xc0) {
94	/* 110xxxxx 10xxxxxx */
95	b0_bits = 5;
96	cbytes = 1;
97	} else if ((b0 & 0xf0) == 0xe0) {
98	/* 1110xxxx 10xxxxxx 10xxxxxx */
99	b0_bits = 4;
100	cbytes = 2;
101	} else if ((b0 & 0xf8) == 0xf0) {
102	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
103	b0_bits = 3;
104	cbytes = 3;
105	} else {
106	/* 10xxxxxx -- unexpected continuation byte */
107	return U_SPECIAL;
108	}
109
110	if (*offset + cbytes > size)
111	return U_SPECIAL;
112
113	wchar_t ch = b0 & LO_MASK_8(b0_bits);
114
115	/* Decode continuation bytes */
116	while (cbytes > 0) {
117	uint8_t b = (uint8_t) str[(*offset)++];
118
119	/* Must be 10xxxxxx */
120	if ((b & 0xc0) != 0x80)
121	return U_SPECIAL;
122
123	/* Shift data bits to ch */
124	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
125	cbytes--;
126	}
127
128	return ch;
129	}
130
131	/** Encode a single character to string representation.
132	*
133	* Encode a single character to string representation (i.e. UTF-8) and store
134	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
135	* is moved to the position where the next character can be written to.
136	*
137	* @param ch Input character.
138	* @param str Output buffer.
139	* @param offset Byte offset where to start writing.
140	* @param size Size of the output buffer (in bytes).
141	*
142	* @return EOK if the character was encoded successfully, EOVERFLOW if there
143	* was not enough space in the output buffer or EINVAL if the character
144	* code was invalid.
145	*/
146	int chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
147	{
148	if (*offset >= size)
149	return EOVERFLOW;
150
151	if (!chr_check(ch))
152	return EINVAL;
153
154	/* Unsigned version of ch (bit operations should only be done
155	on unsigned types). */
156	uint32_t cc = (uint32_t) ch;
157
158	/* Determine how many continuation bytes are needed */
159
160	unsigned int b0_bits; /* Data bits in first byte */
161	unsigned int cbytes; /* Number of continuation bytes */
162
163	if ((cc & ~LO_MASK_32(7)) == 0) {
164	b0_bits = 7;
165	cbytes = 0;
166	} else if ((cc & ~LO_MASK_32(11)) == 0) {
167	b0_bits = 5;
168	cbytes = 1;
169	} else if ((cc & ~LO_MASK_32(16)) == 0) {
170	b0_bits = 4;
171	cbytes = 2;
172	} else if ((cc & ~LO_MASK_32(21)) == 0) {
173	b0_bits = 3;
174	cbytes = 3;
175	} else {
176	/* Codes longer than 21 bits are not supported */
177	return EINVAL;
178	}
179
180	/* Check for available space in buffer */
181	if (*offset + cbytes >= size)
182	return EOVERFLOW;
183
184	/* Encode continuation bytes */
185	unsigned int i;
186	for (i = cbytes; i > 0; i--) {
187	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
188	cc = cc >> CONT_BITS;
189	}
190
191	/* Encode first byte */
192	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
193
194	/* Advance offset */
195	*offset += cbytes + 1;
196
197	return EOK;
198	}
199
200	/** Get size of string.
201	*
202	* Get the number of bytes which are used by the string @a str (excluding the
203	* NULL-terminator).
204	*
205	* @param str String to consider.
206	*
207	* @return Number of bytes used by the string
208	*
209	*/
210	size_t str_size(const char *str)
211	{
212	size_t size = 0;
213
214	while (*str++ != 0)
215	size++;
216
217	return size;
218	}
219
220	/** Get size of wide string.
221	*
222	* Get the number of bytes which are used by the wide string @a str (excluding the
223	* NULL-terminator).
224	*
225	* @param str Wide string to consider.
226	*
227	* @return Number of bytes used by the wide string
228	*
229	*/
230	size_t wstr_size(const wchar_t *str)
231	{
232	return (wstr_length(str) * sizeof(wchar_t));
233	}
234
235	/** Get size of string with length limit.
236	*
237	* Get the number of bytes which are used by up to @a max_len first
238	* characters in the string @a str. If @a max_len is greater than
239	* the length of @a str, the entire string is measured (excluding the
240	* NULL-terminator).
241	*
242	* @param str String to consider.
243	* @param max_len Maximum number of characters to measure.
244	*
245	* @return Number of bytes used by the characters.
246	*
247	*/
248	size_t str_lsize(const char *str, size_t max_len)
249	{
250	size_t len = 0;
251	size_t offset = 0;
252
253	while (len < max_len) {
254	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
255	break;
256
257	len++;
258	}
259
260	return offset;
261	}
262
263	/** Get size of wide string with length limit.
264	*
265	* Get the number of bytes which are used by up to @a max_len first
266	* wide characters in the wide string @a str. If @a max_len is greater than
267	* the length of @a str, the entire wide string is measured (excluding the
268	* NULL-terminator).
269	*
270	* @param str Wide string to consider.
271	* @param max_len Maximum number of wide characters to measure.
272	*
273	* @return Number of bytes used by the wide characters.
274	*
275	*/
276	size_t wstr_lsize(const wchar_t *str, size_t max_len)
277	{
278	return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
279	}
280
281	/** Get number of characters in a string.
282	*
283	* @param str NULL-terminated string.
284	*
285	* @return Number of characters in string.
286	*
287	*/
288	size_t str_length(const char *str)
289	{
290	size_t len = 0;
291	size_t offset = 0;
292
293	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
294	len++;
295
296	return len;
297	}
298
299	/** Get number of characters in a wide string.
300	*
301	* @param str NULL-terminated wide string.
302	*
303	* @return Number of characters in @a str.
304	*
305	*/
306	size_t wstr_length(const wchar_t *wstr)
307	{
308	size_t len = 0;
309
310	while (*wstr++ != 0)
311	len++;
312
313	return len;
314	}
315
316	/** Get number of characters in a string with size limit.
317	*
318	* @param str NULL-terminated string.
319	* @param size Maximum number of bytes to consider.
320	*
321	* @return Number of characters in string.
322	*
323	*/
324	size_t str_nlength(const char *str, size_t size)
325	{
326	size_t len = 0;
327	size_t offset = 0;
328
329	while (str_decode(str, &offset, size) != 0)
330	len++;
331
332	return len;
333	}
334
335	/** Get number of characters in a string with size limit.
336	*
337	* @param str NULL-terminated string.
338	* @param size Maximum number of bytes to consider.
339	*
340	* @return Number of characters in string.
341	*
342	*/
343	size_t wstr_nlength(const wchar_t *str, size_t size)
344	{
345	size_t len = 0;
346	size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
347	size_t offset = 0;
348
349	while ((offset < limit) && (*str++ != 0)) {
350	len++;
351	offset += sizeof(wchar_t);
352	}
353
354	return len;
355	}
356
357	/** Check whether character is plain ASCII.
358	*
359	* @return True if character is plain ASCII.
360	*
361	*/
362	bool ascii_check(wchar_t ch)
363	{
364	if ((ch >= 0) && (ch <= 127))
365	return true;
366
367	return false;
368	}
369
370	/** Check whether wide string is plain ASCII.
371	*
372	* @return True if wide string is plain ASCII.
373	*
374	*/
375	bool wstr_is_ascii(const wchar_t *wstr)
376	{
377	while (wstr && ascii_check(wstr))
378	wstr++;
379	return *wstr == 0;
380	}
381
382	/** Check whether character is valid
383	*
384	* @return True if character is a valid Unicode code point.
385	*
386	*/
387	bool chr_check(wchar_t ch)
388	{
389	if ((ch >= 0) && (ch <= 1114111))
390	return true;
391
392	return false;
393	}
394
395	/** Compare two NULL terminated strings.
396	*
397	* Do a char-by-char comparison of two NULL-terminated strings.
398	* The strings are considered equal iff they consist of the same
399	* characters on the minimum of their lengths.
400	*
401	* @param s1 First string to compare.
402	* @param s2 Second string to compare.
403	*
404	* @return 0 if the strings are equal, -1 if first is smaller,
405	* 1 if second smaller.
406	*
407	*/
408	int str_cmp(const char s1, const char s2)
409	{
410	wchar_t c1 = 0;
411	wchar_t c2 = 0;
412
413	size_t off1 = 0;
414	size_t off2 = 0;
415
416	while (true) {
417	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
418	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
419
420	if (c1 < c2)
421	return -1;
422
423	if (c1 > c2)
424	return 1;
425
426	if (c1 == 0 \|\| c2 == 0)
427	break;
428	}
429
430	return 0;
431	}
432
433	/** Compare two NULL terminated strings with length limit.
434	*
435	* Do a char-by-char comparison of two NULL-terminated strings.
436	* The strings are considered equal iff they consist of the same
437	* characters on the minimum of their lengths and the length limit.
438	*
439	* @param s1 First string to compare.
440	* @param s2 Second string to compare.
441	* @param max_len Maximum number of characters to consider.
442	*
443	* @return 0 if the strings are equal, -1 if first is smaller,
444	* 1 if second smaller.
445	*
446	*/
447	int str_lcmp(const char s1, const char s2, size_t max_len)
448	{
449	wchar_t c1 = 0;
450	wchar_t c2 = 0;
451
452	size_t off1 = 0;
453	size_t off2 = 0;
454
455	size_t len = 0;
456
457	while (true) {
458	if (len >= max_len)
459	break;
460
461	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
462	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
463
464	if (c1 < c2)
465	return -1;
466
467	if (c1 > c2)
468	return 1;
469
470	if (c1 == 0 \|\| c2 == 0)
471	break;
472
473	++len;
474	}
475
476	return 0;
477
478	}
479
480	/** Copy string.
481	*
482	* Copy source string @a src to destination buffer @a dest.
483	* No more than @a size bytes are written. If the size of the output buffer
484	* is at least one byte, the output string will always be well-formed, i.e.
485	* null-terminated and containing only complete characters.
486	*
487	* @param dest Destination buffer.
488	* @param count Size of the destination buffer (must be > 0).
489	* @param src Source string.
490	*/
491	void str_cpy(char dest, size_t size, const char src)
492	{
493	/* There must be space for a null terminator in the buffer. */
494	assert(size > 0);
495
496	size_t src_off = 0;
497	size_t dest_off = 0;
498
499	wchar_t ch;
500	while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
501	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
502	break;
503	}
504
505	dest[dest_off] = '\0';
506	}
507
508	/** Copy size-limited substring.
509	*
510	* Copy prefix of string @a src of max. size @a size to destination buffer
511	* @a dest. No more than @a size bytes are written. The output string will
512	* always be well-formed, i.e. null-terminated and containing only complete
513	* characters.
514	*
515	* No more than @a n bytes are read from the input string, so it does not
516	* have to be null-terminated.
517	*
518	* @param dest Destination buffer.
519	* @param count Size of the destination buffer (must be > 0).
520	* @param src Source string.
521	* @param n Maximum number of bytes to read from @a src.
522	*/
523	void str_ncpy(char dest, size_t size, const char src, size_t n)
524	{
525	/* There must be space for a null terminator in the buffer. */
526	assert(size > 0);
527
528	size_t src_off = 0;
529	size_t dest_off = 0;
530
531	wchar_t ch;
532	while ((ch = str_decode(src, &src_off, n)) != 0) {
533	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
534	break;
535	}
536
537	dest[dest_off] = '\0';
538	}
539
540	/** Append one string to another.
541	*
542	* Append source string @a src to string in destination buffer @a dest.
543	* Size of the destination buffer is @a dest. If the size of the output buffer
544	* is at least one byte, the output string will always be well-formed, i.e.
545	* null-terminated and containing only complete characters.
546	*
547	* @param dest Destination buffer.
548	* @param count Size of the destination buffer.
549	* @param src Source string.
550	*/
551	void str_append(char dest, size_t size, const char src)
552	{
553	size_t dstr_size;
554
555	dstr_size = str_size(dest);
556	if (dstr_size >= size)
557	return;
558
559	str_cpy(dest + dstr_size, size - dstr_size, src);
560	}
561
562	/** Convert space-padded ASCII to string.
563	*
564	* Common legacy text encoding in hardware is 7-bit ASCII fitted into
565	* a fixed-with byte buffer (bit 7 always zero), right-padded with spaces
566	* (ASCII 0x20). Convert space-padded ascii to string representation.
567	*
568	* If the text does not fit into the destination buffer, the function converts
569	* as many characters as possible and returns EOVERFLOW.
570	*
571	* If the text contains non-ASCII bytes (with bit 7 set), the whole string is
572	* converted anyway and invalid characters are replaced with question marks
573	* (U_SPECIAL) and the function returns EIO.
574	*
575	* Regardless of return value upon return @a dest will always be well-formed.
576	*
577	* @param dest Destination buffer
578	* @param size Size of destination buffer
579	* @param src Space-padded ASCII.
580	* @param n Size of the source buffer in bytes.
581	*
582	* @return EOK on success, EOVERFLOW if the text does not fit
583	* destination buffer, EIO if the text contains
584	* non-ASCII bytes.
585	*/
586	int spascii_to_str(char dest, size_t size, const uint8_t src, size_t n)
587	{
588	size_t sidx;
589	size_t didx;
590	size_t dlast;
591	uint8_t byte;
592	int rc;
593	int result;
594
595	/* There must be space for a null terminator in the buffer. */
596	assert(size > 0);
597	result = EOK;
598
599	didx = 0;
600	dlast = 0;
601	for (sidx = 0; sidx < n; ++sidx) {
602	byte = src[sidx];
603	if (!ascii_check(byte)) {
604	byte = U_SPECIAL;
605	result = EIO;
606	}
607
608	rc = chr_encode(byte, dest, &didx, size - 1);
609	if (rc != EOK) {
610	assert(rc == EOVERFLOW);
611	dest[didx] = '\0';
612	return rc;
613	}
614
615	/* Remember dest index after last non-empty character */
616	if (byte != 0x20)
617	dlast = didx;
618	}
619
620	/* Terminate string after last non-empty character */
621	dest[dlast] = '\0';
622	return result;
623	}
624
625	/** Convert wide string to string.
626	*
627	* Convert wide string @a src to string. The output is written to the buffer
628	* specified by @a dest and @a size. @a size must be non-zero and the string
629	* written will always be well-formed.
630	*
631	* @param dest Destination buffer.
632	* @param size Size of the destination buffer.
633	* @param src Source wide string.
634	*
635	* @return EOK, if success, negative otherwise.
636	*/
637	int wstr_to_str(char dest, size_t size, const wchar_t src)
638	{
639	int rc;
640	wchar_t ch;
641	size_t src_idx;
642	size_t dest_off;
643
644	/* There must be space for a null terminator in the buffer. */
645	assert(size > 0);
646
647	src_idx = 0;
648	dest_off = 0;
649
650	while ((ch = src[src_idx++]) != 0) {
651	rc = chr_encode(ch, dest, &dest_off, size - 1);
652	if (rc != EOK)
653	break;
654	}
655
656	dest[dest_off] = '\0';
657	return rc;
658	}
659
660	/** Convert UTF16 string to string.
661	*
662	* Convert utf16 string @a src to string. The output is written to the buffer
663	* specified by @a dest and @a size. @a size must be non-zero and the string
664	* written will always be well-formed. Surrogate pairs also supported.
665	*
666	* @param dest Destination buffer.
667	* @param size Size of the destination buffer.
668	* @param src Source utf16 string.
669	*
670	* @return EOK, if success, negative otherwise.
671	*/
672	int utf16_to_str(char dest, size_t size, const uint16_t src)
673	{
674	size_t idx=0, dest_off=0;
675	wchar_t ch;
676	int rc = EOK;
677
678	/* There must be space for a null terminator in the buffer. */
679	assert(size > 0);
680
681	while (src[idx]) {
682	if ((src[idx] & 0xfc00) == 0xd800) {
683	if (src[idx+1] && (src[idx+1] & 0xfc00) == 0xdc00) {
684	ch = 0x10000;
685	ch += (src[idx] & 0x03FF) << 10;
686	ch += (src[idx+1] & 0x03FF);
687	idx += 2;
688	}
689	else
690	break;
691	} else {
692	ch = src[idx];
693	idx++;
694	}
695	rc = chr_encode(ch, dest, &dest_off, size-1);
696	if (rc != EOK)
697	break;
698	}
699	dest[dest_off] = '\0';
700	return rc;
701	}
702
703	int str_to_utf16(uint16_t dest, size_t size, const char src)
704	{
705	int rc=EOK;
706	size_t offset=0;
707	size_t idx=0;
708	wchar_t c;
709
710	assert(size > 0);
711
712	while ((c = str_decode(src, &offset, STR_NO_LIMIT)) != 0) {
713	if (c > 0x10000) {
714	if (idx+2 >= size-1) {
715	rc=EOVERFLOW;
716	break;
717	}
718	c = (c - 0x10000);
719	dest[idx] = 0xD800 \| (c >> 10);
720	dest[idx+1] = 0xDC00 \| (c & 0x3FF);
721	idx++;
722	} else {
723	dest[idx] = c;
724	}
725
726	idx++;
727	if (idx >= size-1) {
728	rc=EOVERFLOW;
729	break;
730	}
731	}
732
733	dest[idx] = '\0';
734	return rc;
735	}
736
737
738	/** Convert wide string to new string.
739	*
740	* Convert wide string @a src to string. Space for the new string is allocated
741	* on the heap.
742	*
743	* @param src Source wide string.
744	* @return New string.
745	*/
746	char wstr_to_astr(const wchar_t src)
747	{
748	char dbuf[STR_BOUNDS(1)];
749	char *str;
750	wchar_t ch;
751
752	size_t src_idx;
753	size_t dest_off;
754	size_t dest_size;
755
756	/* Compute size of encoded string. */
757
758	src_idx = 0;
759	dest_size = 0;
760
761	while ((ch = src[src_idx++]) != 0) {
762	dest_off = 0;
763	if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
764	break;
765	dest_size += dest_off;
766	}
767
768	str = malloc(dest_size + 1);
769	if (str == NULL)
770	return NULL;
771
772	/* Encode string. */
773
774	src_idx = 0;
775	dest_off = 0;
776
777	while ((ch = src[src_idx++]) != 0) {
778	if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
779	break;
780	}
781
782	str[dest_size] = '\0';
783	return str;
784	}
785
786
787	/** Convert string to wide string.
788	*
789	* Convert string @a src to wide string. The output is written to the
790	* buffer specified by @a dest and @a dlen. @a dlen must be non-zero
791	* and the wide string written will always be null-terminated.
792	*
793	* @param dest Destination buffer.
794	* @param dlen Length of destination buffer (number of wchars).
795	* @param src Source string.
796	*
797	* @return EOK, if success, negative otherwise.
798	*/
799	int str_to_wstr(wchar_t dest, size_t dlen, const char src)
800	{
801	int rc=EOK;
802	size_t offset;
803	size_t di;
804	wchar_t c;
805
806	assert(dlen > 0);
807
808	offset = 0;
809	di = 0;
810
811	do {
812	if (di >= dlen - 1) {
813	rc = EOVERFLOW;
814	break;
815	}
816
817	c = str_decode(src, &offset, STR_NO_LIMIT);
818	dest[di++] = c;
819	} while (c != '\0');
820
821	dest[dlen - 1] = '\0';
822	return rc;
823	}
824
825	/** Convert string to wide string.
826	*
827	* Convert string @a src to wide string. A new wide NULL-terminated
828	* string will be allocated on the heap.
829	*
830	* @param src Source string.
831	*/
832	wchar_t str_to_awstr(const char str)
833	{
834	size_t len = str_length(str);
835
836	wchar_t *wstr = calloc(len+1, sizeof(wchar_t));
837	if (wstr == NULL)
838	return NULL;
839
840	str_to_wstr(wstr, len + 1, str);
841	return wstr;
842	}
843
844	/** Find first occurence of character in string.
845	*
846	* @param str String to search.
847	* @param ch Character to look for.
848	*
849	* @return Pointer to character in @a str or NULL if not found.
850	*/
851	char str_chr(const char str, wchar_t ch)
852	{
853	wchar_t acc;
854	size_t off = 0;
855	size_t last = 0;
856
857	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
858	if (acc == ch)
859	return (char *) (str + last);
860	last = off;
861	}
862
863	return NULL;
864	}
865
866	/** Find last occurence of character in string.
867	*
868	* @param str String to search.
869	* @param ch Character to look for.
870	*
871	* @return Pointer to character in @a str or NULL if not found.
872	*/
873	char str_rchr(const char str, wchar_t ch)
874	{
875	wchar_t acc;
876	size_t off = 0;
877	size_t last = 0;
878	const char *res = NULL;
879
880	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
881	if (acc == ch)
882	res = (str + last);
883	last = off;
884	}
885
886	return (char *) res;
887	}
888
889	/** Find first occurence of character in wide string.
890	*
891	* @param wstr String to search.
892	* @param ch Character to look for.
893	*
894	* @return Pointer to character in @a wstr or NULL if not found.
895	*/
896	wchar_t wstr_chr(const wchar_t wstr, wchar_t ch)
897	{
898	while (wstr && wstr != ch)
899	wstr++;
900	if (*wstr)
901	return (wchar_t *) wstr;
902	else
903	return NULL;
904	}
905
906	/** Find last occurence of character in wide string.
907	*
908	* @param wstr String to search.
909	* @param ch Character to look for.
910	*
911	* @return Pointer to character in @a wstr or NULL if not found.
912	*/
913	wchar_t wstr_rchr(const wchar_t wstr, wchar_t ch)
914	{
915	const wchar_t *res = NULL;
916	while (*wstr) {
917	if (*wstr == ch)
918	res = wstr;
919	wstr++;
920	}
921	return (wchar_t *) res;
922	}
923
924	/** Insert a wide character into a wide string.
925	*
926	* Insert a wide character into a wide string at position
927	* @a pos. The characters after the position are shifted.
928	*
929	* @param str String to insert to.
930	* @param ch Character to insert to.
931	* @param pos Character index where to insert.
932	@ @param max_pos Characters in the buffer.
933	*
934	* @return True if the insertion was sucessful, false if the position
935	* is out of bounds.
936	*
937	*/
938	bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
939	{
940	size_t len = wstr_length(str);
941
942	if ((pos > len) \|\| (pos + 1 > max_pos))
943	return false;
944
945	size_t i;
946	for (i = len; i + 1 > pos; i--)
947	str[i + 1] = str[i];
948
949	str[pos] = ch;
950
951	return true;
952	}
953
954	/** Remove a wide character from a wide string.
955	*
956	* Remove a wide character from a wide string at position
957	* @a pos. The characters after the position are shifted.
958	*
959	* @param str String to remove from.
960	* @param pos Character index to remove.
961	*
962	* @return True if the removal was sucessful, false if the position
963	* is out of bounds.
964	*
965	*/
966	bool wstr_remove(wchar_t *str, size_t pos)
967	{
968	size_t len = wstr_length(str);
969
970	if (pos >= len)
971	return false;
972
973	size_t i;
974	for (i = pos + 1; i <= len; i++)
975	str[i - 1] = str[i];
976
977	return true;
978	}
979
980	int stricmp(const char a, const char b)
981	{
982	int c = 0;
983
984	while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
985	c++;
986
987	return (tolower(a[c]) - tolower(b[c]));
988	}
989
990	/** Convert string to a number.
991	* Core of strtol and strtoul functions.
992	*
993	* @param nptr Pointer to string.
994	* @param endptr If not NULL, function stores here pointer to the first
995	* invalid character.
996	* @param base Zero or number between 2 and 36 inclusive.
997	* @param sgn It's set to 1 if minus found.
998	* @return Result of conversion.
999	*/
1000	static unsigned long
1001	_strtoul(const char nptr, char endptr, int base, char sgn)
1002	{
1003	unsigned char c;
1004	unsigned long result = 0;
1005	unsigned long a, b;
1006	const char *str = nptr;
1007	const char *tmpptr;
1008
1009	while (isspace(*str))
1010	str++;
1011
1012	if (*str == '-') {
1013	*sgn = 1;
1014	++str;
1015	} else if (*str == '+')
1016	++str;
1017
1018	if (base) {
1019	if ((base == 1) \|\| (base > 36)) {
1020	/* FIXME: set errno to EINVAL */
1021	return 0;
1022	}
1023	if ((base == 16) && (*str == '0') && ((str[1] == 'x') \|\|
1024	(str[1] == 'X'))) {
1025	str += 2;
1026	}
1027	} else {
1028	base = 10;
1029
1030	if (*str == '0') {
1031	base = 8;
1032	if ((str[1] == 'X') \|\| (str[1] == 'x')) {
1033	base = 16;
1034	str += 2;
1035	}
1036	}
1037	}
1038
1039	tmpptr = str;
1040
1041	while (*str) {
1042	c = *str;
1043	c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
1044	(c <= '9' ? c - '0' : 0xff)));
1045	if (c > base) {
1046	break;
1047	}
1048
1049	a = (result & 0xff) * base + c;
1050	b = (result >> 8) * base + (a >> 8);
1051
1052	if (b > (ULONG_MAX >> 8)) {
1053	/* overflow */
1054	/* FIXME: errno = ERANGE*/
1055	return ULONG_MAX;
1056	}
1057
1058	result = (b << 8) + (a & 0xff);
1059	++str;
1060	}
1061
1062	if (str == tmpptr) {
1063	/*
1064	* No number was found => first invalid character is the first
1065	* character of the string.
1066	*/
1067	/* FIXME: set errno to EINVAL */
1068	str = nptr;
1069	result = 0;
1070	}
1071
1072	if (endptr)
1073	endptr = (char ) str;
1074
1075	if (nptr == str) {
1076	/FIXME: errno = EINVAL/
1077	return 0;
1078	}
1079
1080	return result;
1081	}
1082
1083	/** Convert initial part of string to long int according to given base.
1084	* The number may begin with an arbitrary number of whitespaces followed by
1085	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
1086	* inserted and the number will be taken as hexadecimal one. If the base is 0
1087	* and the number begin with a zero, number will be taken as octal one (as with
1088	* base 8). Otherwise the base 0 is taken as decimal.
1089	*
1090	* @param nptr Pointer to string.
1091	* @param endptr If not NULL, function stores here pointer to the first
1092	* invalid character.
1093	* @param base Zero or number between 2 and 36 inclusive.
1094	* @return Result of conversion.
1095	*/
1096	long int strtol(const char nptr, char *endptr, int base)
1097	{
1098	char sgn = 0;
1099	unsigned long number = 0;
1100
1101	number = _strtoul(nptr, endptr, base, &sgn);
1102
1103	if (number > LONG_MAX) {
1104	if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
1105	/* FIXME: set 0 to errno */
1106	return number;
1107	}
1108	/* FIXME: set ERANGE to errno */
1109	return (sgn ? LONG_MIN : LONG_MAX);
1110	}
1111
1112	return (sgn ? -number : number);
1113	}
1114
1115	/** Duplicate string.
1116	*
1117	* Allocate a new string and copy characters from the source
1118	* string into it. The duplicate string is allocated via sleeping
1119	* malloc(), thus this function can sleep in no memory conditions.
1120	*
1121	* The allocation cannot fail and the return value is always
1122	* a valid pointer. The duplicate string is always a well-formed
1123	* null-terminated UTF-8 string, but it can differ from the source
1124	* string on the byte level.
1125	*
1126	* @param src Source string.
1127	*
1128	* @return Duplicate string.
1129	*
1130	*/
1131	char str_dup(const char src)
1132	{
1133	size_t size = str_size(src) + 1;
1134	char dest = (char ) malloc(size);
1135	if (dest == NULL)
1136	return (char *) NULL;
1137
1138	str_cpy(dest, size, src);
1139	return dest;
1140	}
1141
1142	/** Duplicate string with size limit.
1143	*
1144	* Allocate a new string and copy up to @max_size bytes from the source
1145	* string into it. The duplicate string is allocated via sleeping
1146	* malloc(), thus this function can sleep in no memory conditions.
1147	* No more than @max_size + 1 bytes is allocated, but if the size
1148	* occupied by the source string is smaller than @max_size + 1,
1149	* less is allocated.
1150	*
1151	* The allocation cannot fail and the return value is always
1152	* a valid pointer. The duplicate string is always a well-formed
1153	* null-terminated UTF-8 string, but it can differ from the source
1154	* string on the byte level.
1155	*
1156	* @param src Source string.
1157	* @param n Maximum number of bytes to duplicate.
1158	*
1159	* @return Duplicate string.
1160	*
1161	*/
1162	char str_ndup(const char src, size_t n)
1163	{
1164	size_t size = str_size(src);
1165	if (size > n)
1166	size = n;
1167
1168	char dest = (char ) malloc(size + 1);
1169	if (dest == NULL)
1170	return (char *) NULL;
1171
1172	str_ncpy(dest, size + 1, src, size);
1173	return dest;
1174	}
1175
1176	void str_reverse(char* begin, char* end)
1177	{
1178	char aux;
1179	while(end>begin)
1180	aux=end, end--=begin, begin++=aux;
1181	}
1182
1183	int size_t_str(size_t value, int base, char* str, size_t size)
1184	{
1185	static char num[] = "0123456789abcdefghijklmnopqrstuvwxyz";
1186	char* wstr=str;
1187
1188	if (size == 0)
1189	return EINVAL;
1190	if (base<2 \|\| base>35) {
1191	*str='\0';
1192	return EINVAL;
1193	}
1194
1195	do {
1196	*wstr++ = num[value % base];
1197	if (--size == 0)
1198	return EOVERFLOW;
1199	} while(value /= base);
1200	*wstr='\0';
1201
1202	// Reverse string
1203	str_reverse(str,wstr-1);
1204	return EOK;
1205	}
1206
1207	/** Convert initial part of string to unsigned long according to given base.
1208	* The number may begin with an arbitrary number of whitespaces followed by
1209	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
1210	* inserted and the number will be taken as hexadecimal one. If the base is 0
1211	* and the number begin with a zero, number will be taken as octal one (as with
1212	* base 8). Otherwise the base 0 is taken as decimal.
1213	*
1214	* @param nptr Pointer to string.
1215	* @param endptr If not NULL, function stores here pointer to the first
1216	* invalid character
1217	* @param base Zero or number between 2 and 36 inclusive.
1218	* @return Result of conversion.
1219	*/
1220	unsigned long strtoul(const char nptr, char *endptr, int base)
1221	{
1222	char sgn = 0;
1223	unsigned long number = 0;
1224
1225	number = _strtoul(nptr, endptr, base, &sgn);
1226
1227	return (sgn ? -number : number);
1228	}
1229
1230	char strtok(char s, const char *delim)
1231	{
1232	static char *next;
1233
1234	return strtok_r(s, delim, &next);
1235	}
1236
1237	char strtok_r(char s, const char delim, char *next)
1238	{
1239	char start, end;
1240
1241	if (s == NULL)
1242	s = *next;
1243
1244	/* Skip over leading delimiters. */
1245	while (s && (str_chr(delim, s) != NULL)) ++s;
1246	start = s;
1247
1248	/* Skip over token characters. */
1249	while (s && (str_chr(delim, s) == NULL)) ++s;
1250	end = s;
1251	next = (s ? s + 1 : s);
1252
1253	if (start == end) {
1254	return NULL; /* No more tokens. */
1255	}
1256
1257	/* Overwrite delimiter with NULL terminator. */
1258	*end = '\0';
1259	return start;
1260	}
1261
1262	/** Convert string to uint64_t (internal variant).
1263	*
1264	* @param nptr Pointer to string.
1265	* @param endptr Pointer to the first invalid character is stored here.
1266	* @param base Zero or number between 2 and 36 inclusive.
1267	* @param neg Indication of unary minus is stored here.
1268	* @apram result Result of the conversion.
1269	*
1270	* @return EOK if conversion was successful.
1271	*
1272	*/
1273	static int str_uint(const char nptr, char *endptr, unsigned int base,
1274	bool neg, uint64_t result)
1275	{
1276	assert(endptr != NULL);
1277	assert(neg != NULL);
1278	assert(result != NULL);
1279
1280	*neg = false;
1281	const char *str = nptr;
1282
1283	/* Ignore leading whitespace */
1284	while (isspace(*str))
1285	str++;
1286
1287	if (*str == '-') {
1288	*neg = true;
1289	str++;
1290	} else if (*str == '+')
1291	str++;
1292
1293	if (base == 0) {
1294	/* Decode base if not specified */
1295	base = 10;
1296
1297	if (*str == '0') {
1298	base = 8;
1299	str++;
1300
1301	switch (*str) {
1302	case 'b':
1303	case 'B':
1304	base = 2;
1305	str++;
1306	break;
1307	case 'o':
1308	case 'O':
1309	base = 8;
1310	str++;
1311	break;
1312	case 'd':
1313	case 'D':
1314	case 't':
1315	case 'T':
1316	base = 10;
1317	str++;
1318	break;
1319	case 'x':
1320	case 'X':
1321	base = 16;
1322	str++;
1323	break;
1324	default:
1325	str--;
1326	}
1327	}
1328	} else {
1329	/* Check base range */
1330	if ((base < 2) \|\| (base > 36)) {
1331	endptr = (char ) str;
1332	return EINVAL;
1333	}
1334	}
1335
1336	*result = 0;
1337	const char *startstr = str;
1338
1339	while (*str != 0) {
1340	unsigned int digit;
1341
1342	if ((str >= 'a') && (str <= 'z'))
1343	digit = *str - 'a' + 10;
1344	else if ((str >= 'A') && (str <= 'Z'))
1345	digit = *str - 'A' + 10;
1346	else if ((str >= '0') && (str <= '9'))
1347	digit = *str - '0';
1348	else
1349	break;
1350
1351	if (digit >= base)
1352	break;
1353
1354	uint64_t prev = *result;
1355	result = (result) * base + digit;
1356
1357	if (*result < prev) {
1358	/* Overflow */
1359	endptr = (char ) str;
1360	return EOVERFLOW;
1361	}
1362
1363	str++;
1364	}
1365
1366	if (str == startstr) {
1367	/*
1368	* No digits were decoded => first invalid character is
1369	* the first character of the string.
1370	*/
1371	str = nptr;
1372	}
1373
1374	endptr = (char ) str;
1375
1376	if (str == nptr)
1377	return EINVAL;
1378
1379	return EOK;
1380	}
1381
1382	/** Convert string to uint64_t.
1383	*
1384	* @param nptr Pointer to string.
1385	* @param endptr If not NULL, pointer to the first invalid character
1386	* is stored here.
1387	* @param base Zero or number between 2 and 36 inclusive.
1388	* @param strict Do not allow any trailing characters.
1389	* @param result Result of the conversion.
1390	*
1391	* @return EOK if conversion was successful.
1392	*
1393	*/
1394	int str_uint64(const char nptr, char *endptr, unsigned int base,
1395	bool strict, uint64_t *result)
1396	{
1397	assert(result != NULL);
1398
1399	bool neg;
1400	char *lendptr;
1401	int ret = str_uint(nptr, &lendptr, base, &neg, result);
1402
1403	if (endptr != NULL)
1404	endptr = (char ) lendptr;
1405
1406	if (ret != EOK)
1407	return ret;
1408
1409	/* Do not allow negative values */
1410	if (neg)
1411	return EINVAL;
1412
1413	/* Check whether we are at the end of
1414	the string in strict mode */
1415	if ((strict) && (*lendptr != 0))
1416	return EINVAL;
1417
1418	return EOK;
1419	}
1420
1421	/** Convert string to size_t.
1422	*
1423	* @param nptr Pointer to string.
1424	* @param endptr If not NULL, pointer to the first invalid character
1425	* is stored here.
1426	* @param base Zero or number between 2 and 36 inclusive.
1427	* @param strict Do not allow any trailing characters.
1428	* @param result Result of the conversion.
1429	*
1430	* @return EOK if conversion was successful.
1431	*
1432	*/
1433	int str_size_t(const char nptr, char *endptr, unsigned int base,
1434	bool strict, size_t *result)
1435	{
1436	assert(result != NULL);
1437
1438	bool neg;
1439	char *lendptr;
1440	uint64_t res;
1441	int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1442
1443	if (endptr != NULL)
1444	endptr = (char ) lendptr;
1445
1446	if (ret != EOK)
1447	return ret;
1448
1449	/* Do not allow negative values */
1450	if (neg)
1451	return EINVAL;
1452
1453	/* Check whether we are at the end of
1454	the string in strict mode */
1455	if ((strict) && (*lendptr != 0))
1456	return EINVAL;
1457
1458	/* Check for overflow */
1459	size_t _res = (size_t) res;
1460	if (_res != res)
1461	return EOVERFLOW;
1462
1463	*result = _res;
1464
1465	return EOK;
1466	}
1467
1468	void order_suffix(const uint64_t val, uint64_t rv, char suffix)
1469	{
1470	if (val > UINT64_C(10000000000000000000)) {
1471	*rv = val / UINT64_C(1000000000000000000);
1472	*suffix = 'Z';
1473	} else if (val > UINT64_C(1000000000000000000)) {
1474	*rv = val / UINT64_C(1000000000000000);
1475	*suffix = 'E';
1476	} else if (val > UINT64_C(1000000000000000)) {
1477	*rv = val / UINT64_C(1000000000000);
1478	*suffix = 'T';
1479	} else if (val > UINT64_C(1000000000000)) {
1480	*rv = val / UINT64_C(1000000000);
1481	*suffix = 'G';
1482	} else if (val > UINT64_C(1000000000)) {
1483	*rv = val / UINT64_C(1000000);
1484	*suffix = 'M';
1485	} else if (val > UINT64_C(1000000)) {
1486	*rv = val / UINT64_C(1000);
1487	*suffix = 'k';
1488	} else {
1489	*rv = val;
1490	*suffix = ' ';
1491	}
1492	}
1493
1494	void bin_order_suffix(const uint64_t val, uint64_t rv, const char *suffix,
1495	bool fixed)
1496	{
1497	if (val > UINT64_C(1152921504606846976)) {
1498	*rv = val / UINT64_C(1125899906842624);
1499	*suffix = "EiB";
1500	} else if (val > UINT64_C(1125899906842624)) {
1501	*rv = val / UINT64_C(1099511627776);
1502	*suffix = "TiB";
1503	} else if (val > UINT64_C(1099511627776)) {
1504	*rv = val / UINT64_C(1073741824);
1505	*suffix = "GiB";
1506	} else if (val > UINT64_C(1073741824)) {
1507	*rv = val / UINT64_C(1048576);
1508	*suffix = "MiB";
1509	} else if (val > UINT64_C(1048576)) {
1510	*rv = val / UINT64_C(1024);
1511	*suffix = "KiB";
1512	} else {
1513	*rv = val;
1514	if (fixed)
1515	*suffix = "B ";
1516	else
1517	*suffix = "B";
1518	}
1519	}
1520
1521	/** @}
1522	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: