Context Navigation

str.c@ 972c60ce

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 972c60ce was 1737bfb, checked in by Jakub Jermar <jakub@…>, 14 years ago

cp improvements
(Thanks to Maurizio Lombardi)

Support for the -r flag (recursive directory copying) and the -f flag

(force copying even if the destination file already exists) has been
added.

Property mode set to 100644

File size: 36.6 KB

Line
1	/*
2	* Copyright (c) 2005 Martin Decky
3	* Copyright (c) 2008 Jiri Svoboda
4	* Copyright (c) 2011 Martin Sucha
5	* Copyright (c) 2011 Oleg Romanenko
6	* All rights reserved.
7	*
8	* Redistribution and use in source and binary forms, with or without
9	* modification, are permitted provided that the following conditions
10	* are met:
11	*
12	* - Redistributions of source code must retain the above copyright
13	* notice, this list of conditions and the following disclaimer.
14	* - Redistributions in binary form must reproduce the above copyright
15	* notice, this list of conditions and the following disclaimer in the
16	* documentation and/or other materials provided with the distribution.
17	* - The name of the author may not be used to endorse or promote products
18	* derived from this software without specific prior written permission.
19	*
20	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30	*/
31
32	/** @addtogroup libc
33	* @{
34	*/
35	/** @file
36	*/
37
38	#include <str.h>
39	#include <stdlib.h>
40	#include <assert.h>
41	#include <stdint.h>
42	#include <ctype.h>
43	#include <malloc.h>
44	#include <errno.h>
45	#include <align.h>
46	#include <mem.h>
47	#include <str.h>
48
49	/** Byte mask consisting of lowest @n bits (out of 8) */
50	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
51
52	/** Byte mask consisting of lowest @n bits (out of 32) */
53	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
54
55	/** Byte mask consisting of highest @n bits (out of 8) */
56	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
57
58	/** Number of data bits in a UTF-8 continuation byte */
59	#define CONT_BITS 6
60
61	/** Decode a single character from a string.
62	*
63	* Decode a single character from a string of size @a size. Decoding starts
64	* at @a offset and this offset is moved to the beginning of the next
65	* character. In case of decoding error, offset generally advances at least
66	* by one. However, offset is never moved beyond size.
67	*
68	* @param str String (not necessarily NULL-terminated).
69	* @param offset Byte offset in string where to start decoding.
70	* @param size Size of the string (in bytes).
71	*
72	* @return Value of decoded character, U_SPECIAL on decoding error or
73	* NULL if attempt to decode beyond @a size.
74	*
75	*/
76	wchar_t str_decode(const char str, size_t offset, size_t size)
77	{
78	if (*offset + 1 > size)
79	return 0;
80
81	/* First byte read from string */
82	uint8_t b0 = (uint8_t) str[(*offset)++];
83
84	/* Determine code length */
85
86	unsigned int b0_bits; /* Data bits in first byte */
87	unsigned int cbytes; /* Number of continuation bytes */
88
89	if ((b0 & 0x80) == 0) {
90	/* 0xxxxxxx (Plain ASCII) */
91	b0_bits = 7;
92	cbytes = 0;
93	} else if ((b0 & 0xe0) == 0xc0) {
94	/* 110xxxxx 10xxxxxx */
95	b0_bits = 5;
96	cbytes = 1;
97	} else if ((b0 & 0xf0) == 0xe0) {
98	/* 1110xxxx 10xxxxxx 10xxxxxx */
99	b0_bits = 4;
100	cbytes = 2;
101	} else if ((b0 & 0xf8) == 0xf0) {
102	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
103	b0_bits = 3;
104	cbytes = 3;
105	} else {
106	/* 10xxxxxx -- unexpected continuation byte */
107	return U_SPECIAL;
108	}
109
110	if (*offset + cbytes > size)
111	return U_SPECIAL;
112
113	wchar_t ch = b0 & LO_MASK_8(b0_bits);
114
115	/* Decode continuation bytes */
116	while (cbytes > 0) {
117	uint8_t b = (uint8_t) str[(*offset)++];
118
119	/* Must be 10xxxxxx */
120	if ((b & 0xc0) != 0x80)
121	return U_SPECIAL;
122
123	/* Shift data bits to ch */
124	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
125	cbytes--;
126	}
127
128	return ch;
129	}
130
131	/** Encode a single character to string representation.
132	*
133	* Encode a single character to string representation (i.e. UTF-8) and store
134	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
135	* is moved to the position where the next character can be written to.
136	*
137	* @param ch Input character.
138	* @param str Output buffer.
139	* @param offset Byte offset where to start writing.
140	* @param size Size of the output buffer (in bytes).
141	*
142	* @return EOK if the character was encoded successfully, EOVERFLOW if there
143	* was not enough space in the output buffer or EINVAL if the character
144	* code was invalid.
145	*/
146	int chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
147	{
148	if (*offset >= size)
149	return EOVERFLOW;
150
151	if (!chr_check(ch))
152	return EINVAL;
153
154	/* Unsigned version of ch (bit operations should only be done
155	on unsigned types). */
156	uint32_t cc = (uint32_t) ch;
157
158	/* Determine how many continuation bytes are needed */
159
160	unsigned int b0_bits; /* Data bits in first byte */
161	unsigned int cbytes; /* Number of continuation bytes */
162
163	if ((cc & ~LO_MASK_32(7)) == 0) {
164	b0_bits = 7;
165	cbytes = 0;
166	} else if ((cc & ~LO_MASK_32(11)) == 0) {
167	b0_bits = 5;
168	cbytes = 1;
169	} else if ((cc & ~LO_MASK_32(16)) == 0) {
170	b0_bits = 4;
171	cbytes = 2;
172	} else if ((cc & ~LO_MASK_32(21)) == 0) {
173	b0_bits = 3;
174	cbytes = 3;
175	} else {
176	/* Codes longer than 21 bits are not supported */
177	return EINVAL;
178	}
179
180	/* Check for available space in buffer */
181	if (*offset + cbytes >= size)
182	return EOVERFLOW;
183
184	/* Encode continuation bytes */
185	unsigned int i;
186	for (i = cbytes; i > 0; i--) {
187	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
188	cc = cc >> CONT_BITS;
189	}
190
191	/* Encode first byte */
192	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
193
194	/* Advance offset */
195	*offset += cbytes + 1;
196
197	return EOK;
198	}
199
200	/** Get size of string.
201	*
202	* Get the number of bytes which are used by the string @a str (excluding the
203	* NULL-terminator).
204	*
205	* @param str String to consider.
206	*
207	* @return Number of bytes used by the string
208	*
209	*/
210	size_t str_size(const char *str)
211	{
212	size_t size = 0;
213
214	while (*str++ != 0)
215	size++;
216
217	return size;
218	}
219
220	/** Get size of wide string.
221	*
222	* Get the number of bytes which are used by the wide string @a str (excluding the
223	* NULL-terminator).
224	*
225	* @param str Wide string to consider.
226	*
227	* @return Number of bytes used by the wide string
228	*
229	*/
230	size_t wstr_size(const wchar_t *str)
231	{
232	return (wstr_length(str) * sizeof(wchar_t));
233	}
234
235	/** Get size of string with length limit.
236	*
237	* Get the number of bytes which are used by up to @a max_len first
238	* characters in the string @a str. If @a max_len is greater than
239	* the length of @a str, the entire string is measured (excluding the
240	* NULL-terminator).
241	*
242	* @param str String to consider.
243	* @param max_len Maximum number of characters to measure.
244	*
245	* @return Number of bytes used by the characters.
246	*
247	*/
248	size_t str_lsize(const char *str, size_t max_len)
249	{
250	size_t len = 0;
251	size_t offset = 0;
252
253	while (len < max_len) {
254	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
255	break;
256
257	len++;
258	}
259
260	return offset;
261	}
262
263	/** Get size of wide string with length limit.
264	*
265	* Get the number of bytes which are used by up to @a max_len first
266	* wide characters in the wide string @a str. If @a max_len is greater than
267	* the length of @a str, the entire wide string is measured (excluding the
268	* NULL-terminator).
269	*
270	* @param str Wide string to consider.
271	* @param max_len Maximum number of wide characters to measure.
272	*
273	* @return Number of bytes used by the wide characters.
274	*
275	*/
276	size_t wstr_lsize(const wchar_t *str, size_t max_len)
277	{
278	return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
279	}
280
281	/** Get number of characters in a string.
282	*
283	* @param str NULL-terminated string.
284	*
285	* @return Number of characters in string.
286	*
287	*/
288	size_t str_length(const char *str)
289	{
290	size_t len = 0;
291	size_t offset = 0;
292
293	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
294	len++;
295
296	return len;
297	}
298
299	/** Get number of characters in a wide string.
300	*
301	* @param str NULL-terminated wide string.
302	*
303	* @return Number of characters in @a str.
304	*
305	*/
306	size_t wstr_length(const wchar_t *wstr)
307	{
308	size_t len = 0;
309
310	while (*wstr++ != 0)
311	len++;
312
313	return len;
314	}
315
316	/** Get number of characters in a string with size limit.
317	*
318	* @param str NULL-terminated string.
319	* @param size Maximum number of bytes to consider.
320	*
321	* @return Number of characters in string.
322	*
323	*/
324	size_t str_nlength(const char *str, size_t size)
325	{
326	size_t len = 0;
327	size_t offset = 0;
328
329	while (str_decode(str, &offset, size) != 0)
330	len++;
331
332	return len;
333	}
334
335	/** Get number of characters in a string with size limit.
336	*
337	* @param str NULL-terminated string.
338	* @param size Maximum number of bytes to consider.
339	*
340	* @return Number of characters in string.
341	*
342	*/
343	size_t wstr_nlength(const wchar_t *str, size_t size)
344	{
345	size_t len = 0;
346	size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
347	size_t offset = 0;
348
349	while ((offset < limit) && (*str++ != 0)) {
350	len++;
351	offset += sizeof(wchar_t);
352	}
353
354	return len;
355	}
356
357	/** Check whether character is plain ASCII.
358	*
359	* @return True if character is plain ASCII.
360	*
361	*/
362	bool ascii_check(wchar_t ch)
363	{
364	if ((ch >= 0) && (ch <= 127))
365	return true;
366
367	return false;
368	}
369
370	/** Check whether character is valid
371	*
372	* @return True if character is a valid Unicode code point.
373	*
374	*/
375	bool chr_check(wchar_t ch)
376	{
377	if ((ch >= 0) && (ch <= 1114111))
378	return true;
379
380	return false;
381	}
382
383	/** Compare two NULL terminated strings.
384	*
385	* Do a char-by-char comparison of two NULL-terminated strings.
386	* The strings are considered equal iff they consist of the same
387	* characters on the minimum of their lengths.
388	*
389	* @param s1 First string to compare.
390	* @param s2 Second string to compare.
391	*
392	* @return 0 if the strings are equal, -1 if first is smaller,
393	* 1 if second smaller.
394	*
395	*/
396	int str_cmp(const char s1, const char s2)
397	{
398	wchar_t c1 = 0;
399	wchar_t c2 = 0;
400
401	size_t off1 = 0;
402	size_t off2 = 0;
403
404	while (true) {
405	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
406	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
407
408	if (c1 < c2)
409	return -1;
410
411	if (c1 > c2)
412	return 1;
413
414	if (c1 == 0 \|\| c2 == 0)
415	break;
416	}
417
418	return 0;
419	}
420
421	/** Compare two NULL terminated strings with length limit.
422	*
423	* Do a char-by-char comparison of two NULL-terminated strings.
424	* The strings are considered equal iff they consist of the same
425	* characters on the minimum of their lengths and the length limit.
426	*
427	* @param s1 First string to compare.
428	* @param s2 Second string to compare.
429	* @param max_len Maximum number of characters to consider.
430	*
431	* @return 0 if the strings are equal, -1 if first is smaller,
432	* 1 if second smaller.
433	*
434	*/
435	int str_lcmp(const char s1, const char s2, size_t max_len)
436	{
437	wchar_t c1 = 0;
438	wchar_t c2 = 0;
439
440	size_t off1 = 0;
441	size_t off2 = 0;
442
443	size_t len = 0;
444
445	while (true) {
446	if (len >= max_len)
447	break;
448
449	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
450	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
451
452	if (c1 < c2)
453	return -1;
454
455	if (c1 > c2)
456	return 1;
457
458	if (c1 == 0 \|\| c2 == 0)
459	break;
460
461	++len;
462	}
463
464	return 0;
465
466	}
467
468	/** Copy string.
469	*
470	* Copy source string @a src to destination buffer @a dest.
471	* No more than @a size bytes are written. If the size of the output buffer
472	* is at least one byte, the output string will always be well-formed, i.e.
473	* null-terminated and containing only complete characters.
474	*
475	* @param dest Destination buffer.
476	* @param count Size of the destination buffer (must be > 0).
477	* @param src Source string.
478	*/
479	void str_cpy(char dest, size_t size, const char src)
480	{
481	/* There must be space for a null terminator in the buffer. */
482	assert(size > 0);
483
484	size_t src_off = 0;
485	size_t dest_off = 0;
486
487	wchar_t ch;
488	while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
489	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
490	break;
491	}
492
493	dest[dest_off] = '\0';
494	}
495
496	/** Copy size-limited substring.
497	*
498	* Copy prefix of string @a src of max. size @a size to destination buffer
499	* @a dest. No more than @a size bytes are written. The output string will
500	* always be well-formed, i.e. null-terminated and containing only complete
501	* characters.
502	*
503	* No more than @a n bytes are read from the input string, so it does not
504	* have to be null-terminated.
505	*
506	* @param dest Destination buffer.
507	* @param count Size of the destination buffer (must be > 0).
508	* @param src Source string.
509	* @param n Maximum number of bytes to read from @a src.
510	*/
511	void str_ncpy(char dest, size_t size, const char src, size_t n)
512	{
513	/* There must be space for a null terminator in the buffer. */
514	assert(size > 0);
515
516	size_t src_off = 0;
517	size_t dest_off = 0;
518
519	wchar_t ch;
520	while ((ch = str_decode(src, &src_off, n)) != 0) {
521	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
522	break;
523	}
524
525	dest[dest_off] = '\0';
526	}
527
528	/** Append one string to another.
529	*
530	* Append source string @a src to string in destination buffer @a dest.
531	* Size of the destination buffer is @a dest. If the size of the output buffer
532	* is at least one byte, the output string will always be well-formed, i.e.
533	* null-terminated and containing only complete characters.
534	*
535	* @param dest Destination buffer.
536	* @param count Size of the destination buffer.
537	* @param src Source string.
538	*/
539	void str_append(char dest, size_t size, const char src)
540	{
541	size_t dstr_size;
542
543	dstr_size = str_size(dest);
544	if (dstr_size >= size)
545	return;
546
547	str_cpy(dest + dstr_size, size - dstr_size, src);
548	}
549
550	/** Convert space-padded ASCII to string.
551	*
552	* Common legacy text encoding in hardware is 7-bit ASCII fitted into
553	* a fixed-width byte buffer (bit 7 always zero), right-padded with spaces
554	* (ASCII 0x20). Convert space-padded ascii to string representation.
555	*
556	* If the text does not fit into the destination buffer, the function converts
557	* as many characters as possible and returns EOVERFLOW.
558	*
559	* If the text contains non-ASCII bytes (with bit 7 set), the whole string is
560	* converted anyway and invalid characters are replaced with question marks
561	* (U_SPECIAL) and the function returns EIO.
562	*
563	* Regardless of return value upon return @a dest will always be well-formed.
564	*
565	* @param dest Destination buffer
566	* @param size Size of destination buffer
567	* @param src Space-padded ASCII.
568	* @param n Size of the source buffer in bytes.
569	*
570	* @return EOK on success, EOVERFLOW if the text does not fit
571	* destination buffer, EIO if the text contains
572	* non-ASCII bytes.
573	*/
574	int spascii_to_str(char dest, size_t size, const uint8_t src, size_t n)
575	{
576	size_t sidx;
577	size_t didx;
578	size_t dlast;
579	uint8_t byte;
580	int rc;
581	int result;
582
583	/* There must be space for a null terminator in the buffer. */
584	assert(size > 0);
585	result = EOK;
586
587	didx = 0;
588	dlast = 0;
589	for (sidx = 0; sidx < n; ++sidx) {
590	byte = src[sidx];
591	if (!ascii_check(byte)) {
592	byte = U_SPECIAL;
593	result = EIO;
594	}
595
596	rc = chr_encode(byte, dest, &didx, size - 1);
597	if (rc != EOK) {
598	assert(rc == EOVERFLOW);
599	dest[didx] = '\0';
600	return rc;
601	}
602
603	/* Remember dest index after last non-empty character */
604	if (byte != 0x20)
605	dlast = didx;
606	}
607
608	/* Terminate string after last non-empty character */
609	dest[dlast] = '\0';
610	return result;
611	}
612
613	/** Convert wide string to string.
614	*
615	* Convert wide string @a src to string. The output is written to the buffer
616	* specified by @a dest and @a size. @a size must be non-zero and the string
617	* written will always be well-formed.
618	*
619	* @param dest Destination buffer.
620	* @param size Size of the destination buffer.
621	* @param src Source wide string.
622	*/
623	void wstr_to_str(char dest, size_t size, const wchar_t src)
624	{
625	wchar_t ch;
626	size_t src_idx;
627	size_t dest_off;
628
629	/* There must be space for a null terminator in the buffer. */
630	assert(size > 0);
631
632	src_idx = 0;
633	dest_off = 0;
634
635	while ((ch = src[src_idx++]) != 0) {
636	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
637	break;
638	}
639
640	dest[dest_off] = '\0';
641	}
642
643	/** Convert UTF16 string to string.
644	*
645	* Convert utf16 string @a src to string. The output is written to the buffer
646	* specified by @a dest and @a size. @a size must be non-zero and the string
647	* written will always be well-formed. Surrogate pairs also supported.
648	*
649	* @param dest Destination buffer.
650	* @param size Size of the destination buffer.
651	* @param src Source utf16 string.
652	*
653	* @return EOK, if success, negative otherwise.
654	*/
655	int utf16_to_str(char dest, size_t size, const uint16_t src)
656	{
657	size_t idx = 0, dest_off = 0;
658	wchar_t ch;
659	int rc = EOK;
660
661	/* There must be space for a null terminator in the buffer. */
662	assert(size > 0);
663
664	while (src[idx]) {
665	if ((src[idx] & 0xfc00) == 0xd800) {
666	if (src[idx + 1] && (src[idx + 1] & 0xfc00) == 0xdc00) {
667	ch = 0x10000;
668	ch += (src[idx] & 0x03FF) << 10;
669	ch += (src[idx + 1] & 0x03FF);
670	idx += 2;
671	}
672	else
673	break;
674	} else {
675	ch = src[idx];
676	idx++;
677	}
678	rc = chr_encode(ch, dest, &dest_off, size - 1);
679	if (rc != EOK)
680	break;
681	}
682	dest[dest_off] = '\0';
683	return rc;
684	}
685
686	int str_to_utf16(uint16_t dest, size_t size, const char src)
687	{
688	int rc = EOK;
689	size_t offset = 0;
690	size_t idx = 0;
691	wchar_t c;
692
693	assert(size > 0);
694
695	while ((c = str_decode(src, &offset, STR_NO_LIMIT)) != 0) {
696	if (c > 0x10000) {
697	if (idx + 2 >= size - 1) {
698	rc = EOVERFLOW;
699	break;
700	}
701	c = (c - 0x10000);
702	dest[idx] = 0xD800 \| (c >> 10);
703	dest[idx + 1] = 0xDC00 \| (c & 0x3FF);
704	idx++;
705	} else {
706	dest[idx] = c;
707	}
708
709	idx++;
710	if (idx >= size - 1) {
711	rc = EOVERFLOW;
712	break;
713	}
714	}
715
716	dest[idx] = '\0';
717	return rc;
718	}
719
720
721	/** Convert wide string to new string.
722	*
723	* Convert wide string @a src to string. Space for the new string is allocated
724	* on the heap.
725	*
726	* @param src Source wide string.
727	* @return New string.
728	*/
729	char wstr_to_astr(const wchar_t src)
730	{
731	char dbuf[STR_BOUNDS(1)];
732	char *str;
733	wchar_t ch;
734
735	size_t src_idx;
736	size_t dest_off;
737	size_t dest_size;
738
739	/* Compute size of encoded string. */
740
741	src_idx = 0;
742	dest_size = 0;
743
744	while ((ch = src[src_idx++]) != 0) {
745	dest_off = 0;
746	if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
747	break;
748	dest_size += dest_off;
749	}
750
751	str = malloc(dest_size + 1);
752	if (str == NULL)
753	return NULL;
754
755	/* Encode string. */
756
757	src_idx = 0;
758	dest_off = 0;
759
760	while ((ch = src[src_idx++]) != 0) {
761	if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
762	break;
763	}
764
765	str[dest_size] = '\0';
766	return str;
767	}
768
769
770	/** Convert string to wide string.
771	*
772	* Convert string @a src to wide string. The output is written to the
773	* buffer specified by @a dest and @a dlen. @a dlen must be non-zero
774	* and the wide string written will always be null-terminated.
775	*
776	* @param dest Destination buffer.
777	* @param dlen Length of destination buffer (number of wchars).
778	* @param src Source string.
779	*/
780	void str_to_wstr(wchar_t dest, size_t dlen, const char src)
781	{
782	size_t offset;
783	size_t di;
784	wchar_t c;
785
786	assert(dlen > 0);
787
788	offset = 0;
789	di = 0;
790
791	do {
792	if (di >= dlen - 1)
793	break;
794
795	c = str_decode(src, &offset, STR_NO_LIMIT);
796	dest[di++] = c;
797	} while (c != '\0');
798
799	dest[dlen - 1] = '\0';
800	}
801
802	/** Convert string to wide string.
803	*
804	* Convert string @a src to wide string. A new wide NULL-terminated
805	* string will be allocated on the heap.
806	*
807	* @param src Source string.
808	*/
809	wchar_t str_to_awstr(const char str)
810	{
811	size_t len = str_length(str);
812
813	wchar_t *wstr = calloc(len+1, sizeof(wchar_t));
814	if (wstr == NULL)
815	return NULL;
816
817	str_to_wstr(wstr, len + 1, str);
818	return wstr;
819	}
820
821	/** Find first occurence of character in string.
822	*
823	* @param str String to search.
824	* @param ch Character to look for.
825	*
826	* @return Pointer to character in @a str or NULL if not found.
827	*/
828	char str_chr(const char str, wchar_t ch)
829	{
830	wchar_t acc;
831	size_t off = 0;
832	size_t last = 0;
833
834	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
835	if (acc == ch)
836	return (char *) (str + last);
837	last = off;
838	}
839
840	return NULL;
841	}
842
843	/** Removes specified trailing characters from a string.
844	*
845	* @param str String to remove from.
846	* @param ch Character to remove.
847	*/
848	void str_rtrim(char *str, wchar_t ch)
849	{
850	size_t off = 0;
851	size_t pos = 0;
852	wchar_t c;
853	bool update_last_chunk = true;
854	char *last_chunk = NULL;
855
856	while ((c = str_decode(str, &off, STR_NO_LIMIT))) {
857	if (c != ch) {
858	update_last_chunk = true;
859	last_chunk = NULL;
860	} else if (update_last_chunk) {
861	update_last_chunk = false;
862	last_chunk = (str + pos);
863	}
864	pos = off;
865	}
866
867	if (last_chunk)
868	*last_chunk = '\0';
869	}
870
871	/** Removes specified leading characters from a string.
872	*
873	* @param str String to remove from.
874	* @param ch Character to remove.
875	*/
876	void str_ltrim(char *str, wchar_t ch)
877	{
878	wchar_t acc;
879	size_t off = 0;
880	size_t pos = 0;
881	size_t str_sz = str_size(str);
882
883	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
884	if (acc != ch)
885	break;
886	else
887	pos = off;
888	}
889
890	if (pos > 0) {
891	memmove(str, &str[pos], str_sz - pos);
892	pos = str_sz - pos;
893	str[str_sz - pos] = '\0';
894	}
895	}
896
897	/** Find last occurence of character in string.
898	*
899	* @param str String to search.
900	* @param ch Character to look for.
901	*
902	* @return Pointer to character in @a str or NULL if not found.
903	*/
904	char str_rchr(const char str, wchar_t ch)
905	{
906	wchar_t acc;
907	size_t off = 0;
908	size_t last = 0;
909	const char *res = NULL;
910
911	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
912	if (acc == ch)
913	res = (str + last);
914	last = off;
915	}
916
917	return (char *) res;
918	}
919
920	/** Insert a wide character into a wide string.
921	*
922	* Insert a wide character into a wide string at position
923	* @a pos. The characters after the position are shifted.
924	*
925	* @param str String to insert to.
926	* @param ch Character to insert to.
927	* @param pos Character index where to insert.
928	@ @param max_pos Characters in the buffer.
929	*
930	* @return True if the insertion was sucessful, false if the position
931	* is out of bounds.
932	*
933	*/
934	bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
935	{
936	size_t len = wstr_length(str);
937
938	if ((pos > len) \|\| (pos + 1 > max_pos))
939	return false;
940
941	size_t i;
942	for (i = len; i + 1 > pos; i--)
943	str[i + 1] = str[i];
944
945	str[pos] = ch;
946
947	return true;
948	}
949
950	/** Remove a wide character from a wide string.
951	*
952	* Remove a wide character from a wide string at position
953	* @a pos. The characters after the position are shifted.
954	*
955	* @param str String to remove from.
956	* @param pos Character index to remove.
957	*
958	* @return True if the removal was sucessful, false if the position
959	* is out of bounds.
960	*
961	*/
962	bool wstr_remove(wchar_t *str, size_t pos)
963	{
964	size_t len = wstr_length(str);
965
966	if (pos >= len)
967	return false;
968
969	size_t i;
970	for (i = pos + 1; i <= len; i++)
971	str[i - 1] = str[i];
972
973	return true;
974	}
975
976	int stricmp(const char a, const char b)
977	{
978	int c = 0;
979
980	while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
981	c++;
982
983	return (tolower(a[c]) - tolower(b[c]));
984	}
985
986	/** Convert string to a number.
987	* Core of strtol and strtoul functions.
988	*
989	* @param nptr Pointer to string.
990	* @param endptr If not NULL, function stores here pointer to the first
991	* invalid character.
992	* @param base Zero or number between 2 and 36 inclusive.
993	* @param sgn It's set to 1 if minus found.
994	* @return Result of conversion.
995	*/
996	static unsigned long
997	_strtoul(const char nptr, char endptr, int base, char sgn)
998	{
999	unsigned char c;
1000	unsigned long result = 0;
1001	unsigned long a, b;
1002	const char *str = nptr;
1003	const char *tmpptr;
1004
1005	while (isspace(*str))
1006	str++;
1007
1008	if (*str == '-') {
1009	*sgn = 1;
1010	++str;
1011	} else if (*str == '+')
1012	++str;
1013
1014	if (base) {
1015	if ((base == 1) \|\| (base > 36)) {
1016	/* FIXME: set errno to EINVAL */
1017	return 0;
1018	}
1019	if ((base == 16) && (*str == '0') && ((str[1] == 'x') \|\|
1020	(str[1] == 'X'))) {
1021	str += 2;
1022	}
1023	} else {
1024	base = 10;
1025
1026	if (*str == '0') {
1027	base = 8;
1028	if ((str[1] == 'X') \|\| (str[1] == 'x')) {
1029	base = 16;
1030	str += 2;
1031	}
1032	}
1033	}
1034
1035	tmpptr = str;
1036
1037	while (*str) {
1038	c = *str;
1039	c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
1040	(c <= '9' ? c - '0' : 0xff)));
1041	if (c > base) {
1042	break;
1043	}
1044
1045	a = (result & 0xff) * base + c;
1046	b = (result >> 8) * base + (a >> 8);
1047
1048	if (b > (ULONG_MAX >> 8)) {
1049	/* overflow */
1050	/* FIXME: errno = ERANGE*/
1051	return ULONG_MAX;
1052	}
1053
1054	result = (b << 8) + (a & 0xff);
1055	++str;
1056	}
1057
1058	if (str == tmpptr) {
1059	/*
1060	* No number was found => first invalid character is the first
1061	* character of the string.
1062	*/
1063	/* FIXME: set errno to EINVAL */
1064	str = nptr;
1065	result = 0;
1066	}
1067
1068	if (endptr)
1069	endptr = (char ) str;
1070
1071	if (nptr == str) {
1072	/FIXME: errno = EINVAL/
1073	return 0;
1074	}
1075
1076	return result;
1077	}
1078
1079	/** Convert initial part of string to long int according to given base.
1080	* The number may begin with an arbitrary number of whitespaces followed by
1081	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
1082	* inserted and the number will be taken as hexadecimal one. If the base is 0
1083	* and the number begin with a zero, number will be taken as octal one (as with
1084	* base 8). Otherwise the base 0 is taken as decimal.
1085	*
1086	* @param nptr Pointer to string.
1087	* @param endptr If not NULL, function stores here pointer to the first
1088	* invalid character.
1089	* @param base Zero or number between 2 and 36 inclusive.
1090	* @return Result of conversion.
1091	*/
1092	long int strtol(const char nptr, char *endptr, int base)
1093	{
1094	char sgn = 0;
1095	unsigned long number = 0;
1096
1097	number = _strtoul(nptr, endptr, base, &sgn);
1098
1099	if (number > LONG_MAX) {
1100	if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
1101	/* FIXME: set 0 to errno */
1102	return number;
1103	}
1104	/* FIXME: set ERANGE to errno */
1105	return (sgn ? LONG_MIN : LONG_MAX);
1106	}
1107
1108	return (sgn ? -number : number);
1109	}
1110
1111	/** Duplicate string.
1112	*
1113	* Allocate a new string and copy characters from the source
1114	* string into it. The duplicate string is allocated via sleeping
1115	* malloc(), thus this function can sleep in no memory conditions.
1116	*
1117	* The allocation cannot fail and the return value is always
1118	* a valid pointer. The duplicate string is always a well-formed
1119	* null-terminated UTF-8 string, but it can differ from the source
1120	* string on the byte level.
1121	*
1122	* @param src Source string.
1123	*
1124	* @return Duplicate string.
1125	*
1126	*/
1127	char str_dup(const char src)
1128	{
1129	size_t size = str_size(src) + 1;
1130	char dest = (char ) malloc(size);
1131	if (dest == NULL)
1132	return (char *) NULL;
1133
1134	str_cpy(dest, size, src);
1135	return dest;
1136	}
1137
1138	/** Duplicate string with size limit.
1139	*
1140	* Allocate a new string and copy up to @max_size bytes from the source
1141	* string into it. The duplicate string is allocated via sleeping
1142	* malloc(), thus this function can sleep in no memory conditions.
1143	* No more than @max_size + 1 bytes is allocated, but if the size
1144	* occupied by the source string is smaller than @max_size + 1,
1145	* less is allocated.
1146	*
1147	* The allocation cannot fail and the return value is always
1148	* a valid pointer. The duplicate string is always a well-formed
1149	* null-terminated UTF-8 string, but it can differ from the source
1150	* string on the byte level.
1151	*
1152	* @param src Source string.
1153	* @param n Maximum number of bytes to duplicate.
1154	*
1155	* @return Duplicate string.
1156	*
1157	*/
1158	char str_ndup(const char src, size_t n)
1159	{
1160	size_t size = str_size(src);
1161	if (size > n)
1162	size = n;
1163
1164	char dest = (char ) malloc(size + 1);
1165	if (dest == NULL)
1166	return (char *) NULL;
1167
1168	str_ncpy(dest, size + 1, src, size);
1169	return dest;
1170	}
1171
1172	/** Convert initial part of string to unsigned long according to given base.
1173	* The number may begin with an arbitrary number of whitespaces followed by
1174	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
1175	* inserted and the number will be taken as hexadecimal one. If the base is 0
1176	* and the number begin with a zero, number will be taken as octal one (as with
1177	* base 8). Otherwise the base 0 is taken as decimal.
1178	*
1179	* @param nptr Pointer to string.
1180	* @param endptr If not NULL, function stores here pointer to the first
1181	* invalid character
1182	* @param base Zero or number between 2 and 36 inclusive.
1183	* @return Result of conversion.
1184	*/
1185	unsigned long strtoul(const char nptr, char *endptr, int base)
1186	{
1187	char sgn = 0;
1188	unsigned long number = 0;
1189
1190	number = _strtoul(nptr, endptr, base, &sgn);
1191
1192	return (sgn ? -number : number);
1193	}
1194
1195	char strtok(char s, const char *delim)
1196	{
1197	static char *next;
1198
1199	return strtok_r(s, delim, &next);
1200	}
1201
1202	char strtok_r(char s, const char delim, char *next)
1203	{
1204	char start, end;
1205
1206	if (s == NULL)
1207	s = *next;
1208
1209	/* Skip over leading delimiters. */
1210	while (s && (str_chr(delim, s) != NULL)) ++s;
1211	start = s;
1212
1213	/* Skip over token characters. */
1214	while (s && (str_chr(delim, s) == NULL)) ++s;
1215	end = s;
1216	next = (s ? s + 1 : s);
1217
1218	if (start == end) {
1219	return NULL; /* No more tokens. */
1220	}
1221
1222	/* Overwrite delimiter with NULL terminator. */
1223	*end = '\0';
1224	return start;
1225	}
1226
1227	/** Convert string to uint64_t (internal variant).
1228	*
1229	* @param nptr Pointer to string.
1230	* @param endptr Pointer to the first invalid character is stored here.
1231	* @param base Zero or number between 2 and 36 inclusive.
1232	* @param neg Indication of unary minus is stored here.
1233	* @apram result Result of the conversion.
1234	*
1235	* @return EOK if conversion was successful.
1236	*
1237	*/
1238	static int str_uint(const char nptr, char *endptr, unsigned int base,
1239	bool neg, uint64_t result)
1240	{
1241	assert(endptr != NULL);
1242	assert(neg != NULL);
1243	assert(result != NULL);
1244
1245	*neg = false;
1246	const char *str = nptr;
1247
1248	/* Ignore leading whitespace */
1249	while (isspace(*str))
1250	str++;
1251
1252	if (*str == '-') {
1253	*neg = true;
1254	str++;
1255	} else if (*str == '+')
1256	str++;
1257
1258	if (base == 0) {
1259	/* Decode base if not specified */
1260	base = 10;
1261
1262	if (*str == '0') {
1263	base = 8;
1264	str++;
1265
1266	switch (*str) {
1267	case 'b':
1268	case 'B':
1269	base = 2;
1270	str++;
1271	break;
1272	case 'o':
1273	case 'O':
1274	base = 8;
1275	str++;
1276	break;
1277	case 'd':
1278	case 'D':
1279	case 't':
1280	case 'T':
1281	base = 10;
1282	str++;
1283	break;
1284	case 'x':
1285	case 'X':
1286	base = 16;
1287	str++;
1288	break;
1289	default:
1290	str--;
1291	}
1292	}
1293	} else {
1294	/* Check base range */
1295	if ((base < 2) \|\| (base > 36)) {
1296	endptr = (char ) str;
1297	return EINVAL;
1298	}
1299	}
1300
1301	*result = 0;
1302	const char *startstr = str;
1303
1304	while (*str != 0) {
1305	unsigned int digit;
1306
1307	if ((str >= 'a') && (str <= 'z'))
1308	digit = *str - 'a' + 10;
1309	else if ((str >= 'A') && (str <= 'Z'))
1310	digit = *str - 'A' + 10;
1311	else if ((str >= '0') && (str <= '9'))
1312	digit = *str - '0';
1313	else
1314	break;
1315
1316	if (digit >= base)
1317	break;
1318
1319	uint64_t prev = *result;
1320	result = (result) * base + digit;
1321
1322	if (*result < prev) {
1323	/* Overflow */
1324	endptr = (char ) str;
1325	return EOVERFLOW;
1326	}
1327
1328	str++;
1329	}
1330
1331	if (str == startstr) {
1332	/*
1333	* No digits were decoded => first invalid character is
1334	* the first character of the string.
1335	*/
1336	str = nptr;
1337	}
1338
1339	endptr = (char ) str;
1340
1341	if (str == nptr)
1342	return EINVAL;
1343
1344	return EOK;
1345	}
1346
1347	/** Convert string to uint8_t.
1348	*
1349	* @param nptr Pointer to string.
1350	* @param endptr If not NULL, pointer to the first invalid character
1351	* is stored here.
1352	* @param base Zero or number between 2 and 36 inclusive.
1353	* @param strict Do not allow any trailing characters.
1354	* @param result Result of the conversion.
1355	*
1356	* @return EOK if conversion was successful.
1357	*
1358	*/
1359	int str_uint8_t(const char nptr, char *endptr, unsigned int base,
1360	bool strict, uint8_t *result)
1361	{
1362	assert(result != NULL);
1363
1364	bool neg;
1365	char *lendptr;
1366	uint64_t res;
1367	int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1368
1369	if (endptr != NULL)
1370	endptr = (char ) lendptr;
1371
1372	if (ret != EOK)
1373	return ret;
1374
1375	/* Do not allow negative values */
1376	if (neg)
1377	return EINVAL;
1378
1379	/* Check whether we are at the end of
1380	the string in strict mode */
1381	if ((strict) && (*lendptr != 0))
1382	return EINVAL;
1383
1384	/* Check for overflow */
1385	uint8_t _res = (uint8_t) res;
1386	if (_res != res)
1387	return EOVERFLOW;
1388
1389	*result = _res;
1390
1391	return EOK;
1392	}
1393
1394	/** Convert string to uint16_t.
1395	*
1396	* @param nptr Pointer to string.
1397	* @param endptr If not NULL, pointer to the first invalid character
1398	* is stored here.
1399	* @param base Zero or number between 2 and 36 inclusive.
1400	* @param strict Do not allow any trailing characters.
1401	* @param result Result of the conversion.
1402	*
1403	* @return EOK if conversion was successful.
1404	*
1405	*/
1406	int str_uint16_t(const char nptr, char *endptr, unsigned int base,
1407	bool strict, uint16_t *result)
1408	{
1409	assert(result != NULL);
1410
1411	bool neg;
1412	char *lendptr;
1413	uint64_t res;
1414	int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1415
1416	if (endptr != NULL)
1417	endptr = (char ) lendptr;
1418
1419	if (ret != EOK)
1420	return ret;
1421
1422	/* Do not allow negative values */
1423	if (neg)
1424	return EINVAL;
1425
1426	/* Check whether we are at the end of
1427	the string in strict mode */
1428	if ((strict) && (*lendptr != 0))
1429	return EINVAL;
1430
1431	/* Check for overflow */
1432	uint16_t _res = (uint16_t) res;
1433	if (_res != res)
1434	return EOVERFLOW;
1435
1436	*result = _res;
1437
1438	return EOK;
1439	}
1440
1441	/** Convert string to uint32_t.
1442	*
1443	* @param nptr Pointer to string.
1444	* @param endptr If not NULL, pointer to the first invalid character
1445	* is stored here.
1446	* @param base Zero or number between 2 and 36 inclusive.
1447	* @param strict Do not allow any trailing characters.
1448	* @param result Result of the conversion.
1449	*
1450	* @return EOK if conversion was successful.
1451	*
1452	*/
1453	int str_uint32_t(const char nptr, char *endptr, unsigned int base,
1454	bool strict, uint32_t *result)
1455	{
1456	assert(result != NULL);
1457
1458	bool neg;
1459	char *lendptr;
1460	uint64_t res;
1461	int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1462
1463	if (endptr != NULL)
1464	endptr = (char ) lendptr;
1465
1466	if (ret != EOK)
1467	return ret;
1468
1469	/* Do not allow negative values */
1470	if (neg)
1471	return EINVAL;
1472
1473	/* Check whether we are at the end of
1474	the string in strict mode */
1475	if ((strict) && (*lendptr != 0))
1476	return EINVAL;
1477
1478	/* Check for overflow */
1479	uint32_t _res = (uint32_t) res;
1480	if (_res != res)
1481	return EOVERFLOW;
1482
1483	*result = _res;
1484
1485	return EOK;
1486	}
1487
1488	/** Convert string to uint64_t.
1489	*
1490	* @param nptr Pointer to string.
1491	* @param endptr If not NULL, pointer to the first invalid character
1492	* is stored here.
1493	* @param base Zero or number between 2 and 36 inclusive.
1494	* @param strict Do not allow any trailing characters.
1495	* @param result Result of the conversion.
1496	*
1497	* @return EOK if conversion was successful.
1498	*
1499	*/
1500	int str_uint64(const char nptr, char *endptr, unsigned int base,
1501	bool strict, uint64_t *result)
1502	{
1503	assert(result != NULL);
1504
1505	bool neg;
1506	char *lendptr;
1507	int ret = str_uint(nptr, &lendptr, base, &neg, result);
1508
1509	if (endptr != NULL)
1510	endptr = (char ) lendptr;
1511
1512	if (ret != EOK)
1513	return ret;
1514
1515	/* Do not allow negative values */
1516	if (neg)
1517	return EINVAL;
1518
1519	/* Check whether we are at the end of
1520	the string in strict mode */
1521	if ((strict) && (*lendptr != 0))
1522	return EINVAL;
1523
1524	return EOK;
1525	}
1526
1527	/** Convert string to size_t.
1528	*
1529	* @param nptr Pointer to string.
1530	* @param endptr If not NULL, pointer to the first invalid character
1531	* is stored here.
1532	* @param base Zero or number between 2 and 36 inclusive.
1533	* @param strict Do not allow any trailing characters.
1534	* @param result Result of the conversion.
1535	*
1536	* @return EOK if conversion was successful.
1537	*
1538	*/
1539	int str_size_t(const char nptr, char *endptr, unsigned int base,
1540	bool strict, size_t *result)
1541	{
1542	assert(result != NULL);
1543
1544	bool neg;
1545	char *lendptr;
1546	uint64_t res;
1547	int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1548
1549	if (endptr != NULL)
1550	endptr = (char ) lendptr;
1551
1552	if (ret != EOK)
1553	return ret;
1554
1555	/* Do not allow negative values */
1556	if (neg)
1557	return EINVAL;
1558
1559	/* Check whether we are at the end of
1560	the string in strict mode */
1561	if ((strict) && (*lendptr != 0))
1562	return EINVAL;
1563
1564	/* Check for overflow */
1565	size_t _res = (size_t) res;
1566	if (_res != res)
1567	return EOVERFLOW;
1568
1569	*result = _res;
1570
1571	return EOK;
1572	}
1573
1574	void order_suffix(const uint64_t val, uint64_t rv, char suffix)
1575	{
1576	if (val > UINT64_C(10000000000000000000)) {
1577	*rv = val / UINT64_C(1000000000000000000);
1578	*suffix = 'Z';
1579	} else if (val > UINT64_C(1000000000000000000)) {
1580	*rv = val / UINT64_C(1000000000000000);
1581	*suffix = 'E';
1582	} else if (val > UINT64_C(1000000000000000)) {
1583	*rv = val / UINT64_C(1000000000000);
1584	*suffix = 'T';
1585	} else if (val > UINT64_C(1000000000000)) {
1586	*rv = val / UINT64_C(1000000000);
1587	*suffix = 'G';
1588	} else if (val > UINT64_C(1000000000)) {
1589	*rv = val / UINT64_C(1000000);
1590	*suffix = 'M';
1591	} else if (val > UINT64_C(1000000)) {
1592	*rv = val / UINT64_C(1000);
1593	*suffix = 'k';
1594	} else {
1595	*rv = val;
1596	*suffix = ' ';
1597	}
1598	}
1599
1600	void bin_order_suffix(const uint64_t val, uint64_t rv, const char *suffix,
1601	bool fixed)
1602	{
1603	if (val > UINT64_C(1152921504606846976)) {
1604	*rv = val / UINT64_C(1125899906842624);
1605	*suffix = "EiB";
1606	} else if (val > UINT64_C(1125899906842624)) {
1607	*rv = val / UINT64_C(1099511627776);
1608	*suffix = "TiB";
1609	} else if (val > UINT64_C(1099511627776)) {
1610	*rv = val / UINT64_C(1073741824);
1611	*suffix = "GiB";
1612	} else if (val > UINT64_C(1073741824)) {
1613	*rv = val / UINT64_C(1048576);
1614	*suffix = "MiB";
1615	} else if (val > UINT64_C(1048576)) {
1616	*rv = val / UINT64_C(1024);
1617	*suffix = "KiB";
1618	} else {
1619	*rv = val;
1620	if (fixed)
1621	*suffix = "B ";
1622	else
1623	*suffix = "B";
1624	}
1625	}
1626
1627	/** @}
1628	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/uspace/lib/c/generic/str.c@ 972c60ce

Download in other formats: