Context Navigation

str.c@ 61e29a4d

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 61e29a4d was 61e29a4d, checked in by Oleg Romanenko <romanenko.oleg@…>, 14 years ago

Modifications in str.c

Add function wstr_is_ascii
Add return value (error code) to functions: wstr_to_str and str_to_wstr

Property mode set to 100644

File size: 29.2 KB

Line
1	/*
2	* Copyright (c) 2005 Martin Decky
3	* Copyright (c) 2008 Jiri Svoboda
4	* All rights reserved.
5	*
6	* Redistribution and use in source and binary forms, with or without
7	* modification, are permitted provided that the following conditions
8	* are met:
9	*
10	* - Redistributions of source code must retain the above copyright
11	* notice, this list of conditions and the following disclaimer.
12	* - Redistributions in binary form must reproduce the above copyright
13	* notice, this list of conditions and the following disclaimer in the
14	* documentation and/or other materials provided with the distribution.
15	* - The name of the author may not be used to endorse or promote products
16	* derived from this software without specific prior written permission.
17	*
18	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28	*/
29
30	/** @addtogroup libc
31	* @{
32	*/
33	/** @file
34	*/
35
36	#include <str.h>
37	#include <stdlib.h>
38	#include <assert.h>
39	#include <stdint.h>
40	#include <ctype.h>
41	#include <malloc.h>
42	#include <errno.h>
43	#include <align.h>
44	#include <mem.h>
45	#include <str.h>
46
47	/** Byte mask consisting of lowest @n bits (out of 8) */
48	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
49
50	/** Byte mask consisting of lowest @n bits (out of 32) */
51	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
52
53	/** Byte mask consisting of highest @n bits (out of 8) */
54	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
55
56	/** Number of data bits in a UTF-8 continuation byte */
57	#define CONT_BITS 6
58
59	/** Decode a single character from a string.
60	*
61	* Decode a single character from a string of size @a size. Decoding starts
62	* at @a offset and this offset is moved to the beginning of the next
63	* character. In case of decoding error, offset generally advances at least
64	* by one. However, offset is never moved beyond size.
65	*
66	* @param str String (not necessarily NULL-terminated).
67	* @param offset Byte offset in string where to start decoding.
68	* @param size Size of the string (in bytes).
69	*
70	* @return Value of decoded character, U_SPECIAL on decoding error or
71	* NULL if attempt to decode beyond @a size.
72	*
73	*/
74	wchar_t str_decode(const char str, size_t offset, size_t size)
75	{
76	if (*offset + 1 > size)
77	return 0;
78
79	/* First byte read from string */
80	uint8_t b0 = (uint8_t) str[(*offset)++];
81
82	/* Determine code length */
83
84	unsigned int b0_bits; /* Data bits in first byte */
85	unsigned int cbytes; /* Number of continuation bytes */
86
87	if ((b0 & 0x80) == 0) {
88	/* 0xxxxxxx (Plain ASCII) */
89	b0_bits = 7;
90	cbytes = 0;
91	} else if ((b0 & 0xe0) == 0xc0) {
92	/* 110xxxxx 10xxxxxx */
93	b0_bits = 5;
94	cbytes = 1;
95	} else if ((b0 & 0xf0) == 0xe0) {
96	/* 1110xxxx 10xxxxxx 10xxxxxx */
97	b0_bits = 4;
98	cbytes = 2;
99	} else if ((b0 & 0xf8) == 0xf0) {
100	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
101	b0_bits = 3;
102	cbytes = 3;
103	} else {
104	/* 10xxxxxx -- unexpected continuation byte */
105	return U_SPECIAL;
106	}
107
108	if (*offset + cbytes > size)
109	return U_SPECIAL;
110
111	wchar_t ch = b0 & LO_MASK_8(b0_bits);
112
113	/* Decode continuation bytes */
114	while (cbytes > 0) {
115	uint8_t b = (uint8_t) str[(*offset)++];
116
117	/* Must be 10xxxxxx */
118	if ((b & 0xc0) != 0x80)
119	return U_SPECIAL;
120
121	/* Shift data bits to ch */
122	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
123	cbytes--;
124	}
125
126	return ch;
127	}
128
129	/** Encode a single character to string representation.
130	*
131	* Encode a single character to string representation (i.e. UTF-8) and store
132	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
133	* is moved to the position where the next character can be written to.
134	*
135	* @param ch Input character.
136	* @param str Output buffer.
137	* @param offset Byte offset where to start writing.
138	* @param size Size of the output buffer (in bytes).
139	*
140	* @return EOK if the character was encoded successfully, EOVERFLOW if there
141	* was not enough space in the output buffer or EINVAL if the character
142	* code was invalid.
143	*/
144	int chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
145	{
146	if (*offset >= size)
147	return EOVERFLOW;
148
149	if (!chr_check(ch))
150	return EINVAL;
151
152	/* Unsigned version of ch (bit operations should only be done
153	on unsigned types). */
154	uint32_t cc = (uint32_t) ch;
155
156	/* Determine how many continuation bytes are needed */
157
158	unsigned int b0_bits; /* Data bits in first byte */
159	unsigned int cbytes; /* Number of continuation bytes */
160
161	if ((cc & ~LO_MASK_32(7)) == 0) {
162	b0_bits = 7;
163	cbytes = 0;
164	} else if ((cc & ~LO_MASK_32(11)) == 0) {
165	b0_bits = 5;
166	cbytes = 1;
167	} else if ((cc & ~LO_MASK_32(16)) == 0) {
168	b0_bits = 4;
169	cbytes = 2;
170	} else if ((cc & ~LO_MASK_32(21)) == 0) {
171	b0_bits = 3;
172	cbytes = 3;
173	} else {
174	/* Codes longer than 21 bits are not supported */
175	return EINVAL;
176	}
177
178	/* Check for available space in buffer */
179	if (*offset + cbytes >= size)
180	return EOVERFLOW;
181
182	/* Encode continuation bytes */
183	unsigned int i;
184	for (i = cbytes; i > 0; i--) {
185	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
186	cc = cc >> CONT_BITS;
187	}
188
189	/* Encode first byte */
190	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
191
192	/* Advance offset */
193	*offset += cbytes + 1;
194
195	return EOK;
196	}
197
198	/** Get size of string.
199	*
200	* Get the number of bytes which are used by the string @a str (excluding the
201	* NULL-terminator).
202	*
203	* @param str String to consider.
204	*
205	* @return Number of bytes used by the string
206	*
207	*/
208	size_t str_size(const char *str)
209	{
210	size_t size = 0;
211
212	while (*str++ != 0)
213	size++;
214
215	return size;
216	}
217
218	/** Get size of wide string.
219	*
220	* Get the number of bytes which are used by the wide string @a str (excluding the
221	* NULL-terminator).
222	*
223	* @param str Wide string to consider.
224	*
225	* @return Number of bytes used by the wide string
226	*
227	*/
228	size_t wstr_size(const wchar_t *str)
229	{
230	return (wstr_length(str) * sizeof(wchar_t));
231	}
232
233	/** Get size of string with length limit.
234	*
235	* Get the number of bytes which are used by up to @a max_len first
236	* characters in the string @a str. If @a max_len is greater than
237	* the length of @a str, the entire string is measured (excluding the
238	* NULL-terminator).
239	*
240	* @param str String to consider.
241	* @param max_len Maximum number of characters to measure.
242	*
243	* @return Number of bytes used by the characters.
244	*
245	*/
246	size_t str_lsize(const char *str, size_t max_len)
247	{
248	size_t len = 0;
249	size_t offset = 0;
250
251	while (len < max_len) {
252	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
253	break;
254
255	len++;
256	}
257
258	return offset;
259	}
260
261	/** Get size of wide string with length limit.
262	*
263	* Get the number of bytes which are used by up to @a max_len first
264	* wide characters in the wide string @a str. If @a max_len is greater than
265	* the length of @a str, the entire wide string is measured (excluding the
266	* NULL-terminator).
267	*
268	* @param str Wide string to consider.
269	* @param max_len Maximum number of wide characters to measure.
270	*
271	* @return Number of bytes used by the wide characters.
272	*
273	*/
274	size_t wstr_lsize(const wchar_t *str, size_t max_len)
275	{
276	return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
277	}
278
279	/** Get number of characters in a string.
280	*
281	* @param str NULL-terminated string.
282	*
283	* @return Number of characters in string.
284	*
285	*/
286	size_t str_length(const char *str)
287	{
288	size_t len = 0;
289	size_t offset = 0;
290
291	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
292	len++;
293
294	return len;
295	}
296
297	/** Get number of characters in a wide string.
298	*
299	* @param str NULL-terminated wide string.
300	*
301	* @return Number of characters in @a str.
302	*
303	*/
304	size_t wstr_length(const wchar_t *wstr)
305	{
306	size_t len = 0;
307
308	while (*wstr++ != 0)
309	len++;
310
311	return len;
312	}
313
314	/** Get number of characters in a string with size limit.
315	*
316	* @param str NULL-terminated string.
317	* @param size Maximum number of bytes to consider.
318	*
319	* @return Number of characters in string.
320	*
321	*/
322	size_t str_nlength(const char *str, size_t size)
323	{
324	size_t len = 0;
325	size_t offset = 0;
326
327	while (str_decode(str, &offset, size) != 0)
328	len++;
329
330	return len;
331	}
332
333	/** Get number of characters in a string with size limit.
334	*
335	* @param str NULL-terminated string.
336	* @param size Maximum number of bytes to consider.
337	*
338	* @return Number of characters in string.
339	*
340	*/
341	size_t wstr_nlength(const wchar_t *str, size_t size)
342	{
343	size_t len = 0;
344	size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
345	size_t offset = 0;
346
347	while ((offset < limit) && (*str++ != 0)) {
348	len++;
349	offset += sizeof(wchar_t);
350	}
351
352	return len;
353	}
354
355	/** Check whether character is plain ASCII.
356	*
357	* @return True if character is plain ASCII.
358	*
359	*/
360	bool ascii_check(wchar_t ch)
361	{
362	if ((ch >= 0) && (ch <= 127))
363	return true;
364
365	return false;
366	}
367
368	/** Check whether wide string is plain ASCII.
369	*
370	* @return True if wide string is plain ASCII.
371	*
372	*/
373	bool wstr_is_ascii(const wchar_t *wstr)
374	{
375	while (wstr && ascii_check(wstr))
376	wstr++;
377	return *wstr == 0;
378	}
379
380	/** Check whether character is valid
381	*
382	* @return True if character is a valid Unicode code point.
383	*
384	*/
385	bool chr_check(wchar_t ch)
386	{
387	if ((ch >= 0) && (ch <= 1114111))
388	return true;
389
390	return false;
391	}
392
393	/** Compare two NULL terminated strings.
394	*
395	* Do a char-by-char comparison of two NULL-terminated strings.
396	* The strings are considered equal iff they consist of the same
397	* characters on the minimum of their lengths.
398	*
399	* @param s1 First string to compare.
400	* @param s2 Second string to compare.
401	*
402	* @return 0 if the strings are equal, -1 if first is smaller,
403	* 1 if second smaller.
404	*
405	*/
406	int str_cmp(const char s1, const char s2)
407	{
408	wchar_t c1 = 0;
409	wchar_t c2 = 0;
410
411	size_t off1 = 0;
412	size_t off2 = 0;
413
414	while (true) {
415	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
416	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
417
418	if (c1 < c2)
419	return -1;
420
421	if (c1 > c2)
422	return 1;
423
424	if (c1 == 0 \|\| c2 == 0)
425	break;
426	}
427
428	return 0;
429	}
430
431	/** Compare two NULL terminated strings with length limit.
432	*
433	* Do a char-by-char comparison of two NULL-terminated strings.
434	* The strings are considered equal iff they consist of the same
435	* characters on the minimum of their lengths and the length limit.
436	*
437	* @param s1 First string to compare.
438	* @param s2 Second string to compare.
439	* @param max_len Maximum number of characters to consider.
440	*
441	* @return 0 if the strings are equal, -1 if first is smaller,
442	* 1 if second smaller.
443	*
444	*/
445	int str_lcmp(const char s1, const char s2, size_t max_len)
446	{
447	wchar_t c1 = 0;
448	wchar_t c2 = 0;
449
450	size_t off1 = 0;
451	size_t off2 = 0;
452
453	size_t len = 0;
454
455	while (true) {
456	if (len >= max_len)
457	break;
458
459	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
460	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
461
462	if (c1 < c2)
463	return -1;
464
465	if (c1 > c2)
466	return 1;
467
468	if (c1 == 0 \|\| c2 == 0)
469	break;
470
471	++len;
472	}
473
474	return 0;
475
476	}
477
478	/** Copy string.
479	*
480	* Copy source string @a src to destination buffer @a dest.
481	* No more than @a size bytes are written. If the size of the output buffer
482	* is at least one byte, the output string will always be well-formed, i.e.
483	* null-terminated and containing only complete characters.
484	*
485	* @param dest Destination buffer.
486	* @param count Size of the destination buffer (must be > 0).
487	* @param src Source string.
488	*/
489	void str_cpy(char dest, size_t size, const char src)
490	{
491	/* There must be space for a null terminator in the buffer. */
492	assert(size > 0);
493
494	size_t src_off = 0;
495	size_t dest_off = 0;
496
497	wchar_t ch;
498	while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
499	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
500	break;
501	}
502
503	dest[dest_off] = '\0';
504	}
505
506	/** Copy size-limited substring.
507	*
508	* Copy prefix of string @a src of max. size @a size to destination buffer
509	* @a dest. No more than @a size bytes are written. The output string will
510	* always be well-formed, i.e. null-terminated and containing only complete
511	* characters.
512	*
513	* No more than @a n bytes are read from the input string, so it does not
514	* have to be null-terminated.
515	*
516	* @param dest Destination buffer.
517	* @param count Size of the destination buffer (must be > 0).
518	* @param src Source string.
519	* @param n Maximum number of bytes to read from @a src.
520	*/
521	void str_ncpy(char dest, size_t size, const char src, size_t n)
522	{
523	/* There must be space for a null terminator in the buffer. */
524	assert(size > 0);
525
526	size_t src_off = 0;
527	size_t dest_off = 0;
528
529	wchar_t ch;
530	while ((ch = str_decode(src, &src_off, n)) != 0) {
531	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
532	break;
533	}
534
535	dest[dest_off] = '\0';
536	}
537
538	/** Append one string to another.
539	*
540	* Append source string @a src to string in destination buffer @a dest.
541	* Size of the destination buffer is @a dest. If the size of the output buffer
542	* is at least one byte, the output string will always be well-formed, i.e.
543	* null-terminated and containing only complete characters.
544	*
545	* @param dest Destination buffer.
546	* @param count Size of the destination buffer.
547	* @param src Source string.
548	*/
549	void str_append(char dest, size_t size, const char src)
550	{
551	size_t dstr_size;
552
553	dstr_size = str_size(dest);
554	str_cpy(dest + dstr_size, size - dstr_size, src);
555	}
556
557	/** Convert wide string to string.
558	*
559	* Convert wide string @a src to string. The output is written to the buffer
560	* specified by @a dest and @a size. @a size must be non-zero and the string
561	* written will always be well-formed.
562	*
563	* @param dest Destination buffer.
564	* @param size Size of the destination buffer.
565	* @param src Source wide string.
566	*
567	* @return EOK, if success, negative otherwise.
568	*/
569	int wstr_to_str(char dest, size_t size, const wchar_t src)
570	{
571	int rc;
572	wchar_t ch;
573	size_t src_idx;
574	size_t dest_off;
575
576	/* There must be space for a null terminator in the buffer. */
577	assert(size > 0);
578
579	src_idx = 0;
580	dest_off = 0;
581
582	while ((ch = src[src_idx++]) != 0) {
583	rc = chr_encode(ch, dest, &dest_off, size - 1);
584	if (rc != EOK)
585	break;
586	}
587
588	dest[dest_off] = '\0';
589	return rc;
590	}
591
592	/** Convert wide string to new string.
593	*
594	* Convert wide string @a src to string. Space for the new string is allocated
595	* on the heap.
596	*
597	* @param src Source wide string.
598	* @return New string.
599	*/
600	char wstr_to_astr(const wchar_t src)
601	{
602	char dbuf[STR_BOUNDS(1)];
603	char *str;
604	wchar_t ch;
605
606	size_t src_idx;
607	size_t dest_off;
608	size_t dest_size;
609
610	/* Compute size of encoded string. */
611
612	src_idx = 0;
613	dest_size = 0;
614
615	while ((ch = src[src_idx++]) != 0) {
616	dest_off = 0;
617	if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
618	break;
619	dest_size += dest_off;
620	}
621
622	str = malloc(dest_size + 1);
623	if (str == NULL)
624	return NULL;
625
626	/* Encode string. */
627
628	src_idx = 0;
629	dest_off = 0;
630
631	while ((ch = src[src_idx++]) != 0) {
632	if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
633	break;
634	}
635
636	str[dest_size] = '\0';
637	return str;
638	}
639
640
641	/** Convert string to wide string.
642	*
643	* Convert string @a src to wide string. The output is written to the
644	* buffer specified by @a dest and @a dlen. @a dlen must be non-zero
645	* and the wide string written will always be null-terminated.
646	*
647	* @param dest Destination buffer.
648	* @param dlen Length of destination buffer (number of wchars).
649	* @param src Source string.
650	*
651	* @return EOK, if success, negative otherwise.
652	*/
653	int str_to_wstr(wchar_t dest, size_t dlen, const char src)
654	{
655	int rc=EOK;
656	size_t offset;
657	size_t di;
658	wchar_t c;
659
660	assert(dlen > 0);
661
662	offset = 0;
663	di = 0;
664
665	do {
666	if (di >= dlen - 1) {
667	rc = EOVERFLOW;
668	break;
669	}
670
671	c = str_decode(src, &offset, STR_NO_LIMIT);
672	dest[di++] = c;
673	} while (c != '\0');
674
675	dest[dlen - 1] = '\0';
676	return rc;
677	}
678
679	/** Find first occurence of character in string.
680	*
681	* @param str String to search.
682	* @param ch Character to look for.
683	*
684	* @return Pointer to character in @a str or NULL if not found.
685	*/
686	char str_chr(const char str, wchar_t ch)
687	{
688	wchar_t acc;
689	size_t off = 0;
690	size_t last = 0;
691
692	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
693	if (acc == ch)
694	return (char *) (str + last);
695	last = off;
696	}
697
698	return NULL;
699	}
700
701	/** Find last occurence of character in string.
702	*
703	* @param str String to search.
704	* @param ch Character to look for.
705	*
706	* @return Pointer to character in @a str or NULL if not found.
707	*/
708	char str_rchr(const char str, wchar_t ch)
709	{
710	wchar_t acc;
711	size_t off = 0;
712	size_t last = 0;
713	const char *res = NULL;
714
715	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
716	if (acc == ch)
717	res = (str + last);
718	last = off;
719	}
720
721	return (char *) res;
722	}
723
724	/** Insert a wide character into a wide string.
725	*
726	* Insert a wide character into a wide string at position
727	* @a pos. The characters after the position are shifted.
728	*
729	* @param str String to insert to.
730	* @param ch Character to insert to.
731	* @param pos Character index where to insert.
732	@ @param max_pos Characters in the buffer.
733	*
734	* @return True if the insertion was sucessful, false if the position
735	* is out of bounds.
736	*
737	*/
738	bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
739	{
740	size_t len = wstr_length(str);
741
742	if ((pos > len) \|\| (pos + 1 > max_pos))
743	return false;
744
745	size_t i;
746	for (i = len; i + 1 > pos; i--)
747	str[i + 1] = str[i];
748
749	str[pos] = ch;
750
751	return true;
752	}
753
754	/** Remove a wide character from a wide string.
755	*
756	* Remove a wide character from a wide string at position
757	* @a pos. The characters after the position are shifted.
758	*
759	* @param str String to remove from.
760	* @param pos Character index to remove.
761	*
762	* @return True if the removal was sucessful, false if the position
763	* is out of bounds.
764	*
765	*/
766	bool wstr_remove(wchar_t *str, size_t pos)
767	{
768	size_t len = wstr_length(str);
769
770	if (pos >= len)
771	return false;
772
773	size_t i;
774	for (i = pos + 1; i <= len; i++)
775	str[i - 1] = str[i];
776
777	return true;
778	}
779
780	int stricmp(const char a, const char b)
781	{
782	int c = 0;
783
784	while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
785	c++;
786
787	return (tolower(a[c]) - tolower(b[c]));
788	}
789
790	/** Convert string to a number.
791	* Core of strtol and strtoul functions.
792	*
793	* @param nptr Pointer to string.
794	* @param endptr If not NULL, function stores here pointer to the first
795	* invalid character.
796	* @param base Zero or number between 2 and 36 inclusive.
797	* @param sgn It's set to 1 if minus found.
798	* @return Result of conversion.
799	*/
800	static unsigned long
801	_strtoul(const char nptr, char endptr, int base, char sgn)
802	{
803	unsigned char c;
804	unsigned long result = 0;
805	unsigned long a, b;
806	const char *str = nptr;
807	const char *tmpptr;
808
809	while (isspace(*str))
810	str++;
811
812	if (*str == '-') {
813	*sgn = 1;
814	++str;
815	} else if (*str == '+')
816	++str;
817
818	if (base) {
819	if ((base == 1) \|\| (base > 36)) {
820	/* FIXME: set errno to EINVAL */
821	return 0;
822	}
823	if ((base == 16) && (*str == '0') && ((str[1] == 'x') \|\|
824	(str[1] == 'X'))) {
825	str += 2;
826	}
827	} else {
828	base = 10;
829
830	if (*str == '0') {
831	base = 8;
832	if ((str[1] == 'X') \|\| (str[1] == 'x')) {
833	base = 16;
834	str += 2;
835	}
836	}
837	}
838
839	tmpptr = str;
840
841	while (*str) {
842	c = *str;
843	c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
844	(c <= '9' ? c - '0' : 0xff)));
845	if (c > base) {
846	break;
847	}
848
849	a = (result & 0xff) * base + c;
850	b = (result >> 8) * base + (a >> 8);
851
852	if (b > (ULONG_MAX >> 8)) {
853	/* overflow */
854	/* FIXME: errno = ERANGE*/
855	return ULONG_MAX;
856	}
857
858	result = (b << 8) + (a & 0xff);
859	++str;
860	}
861
862	if (str == tmpptr) {
863	/*
864	* No number was found => first invalid character is the first
865	* character of the string.
866	*/
867	/* FIXME: set errno to EINVAL */
868	str = nptr;
869	result = 0;
870	}
871
872	if (endptr)
873	endptr = (char ) str;
874
875	if (nptr == str) {
876	/FIXME: errno = EINVAL/
877	return 0;
878	}
879
880	return result;
881	}
882
883	/** Convert initial part of string to long int according to given base.
884	* The number may begin with an arbitrary number of whitespaces followed by
885	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
886	* inserted and the number will be taken as hexadecimal one. If the base is 0
887	* and the number begin with a zero, number will be taken as octal one (as with
888	* base 8). Otherwise the base 0 is taken as decimal.
889	*
890	* @param nptr Pointer to string.
891	* @param endptr If not NULL, function stores here pointer to the first
892	* invalid character.
893	* @param base Zero or number between 2 and 36 inclusive.
894	* @return Result of conversion.
895	*/
896	long int strtol(const char nptr, char *endptr, int base)
897	{
898	char sgn = 0;
899	unsigned long number = 0;
900
901	number = _strtoul(nptr, endptr, base, &sgn);
902
903	if (number > LONG_MAX) {
904	if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
905	/* FIXME: set 0 to errno */
906	return number;
907	}
908	/* FIXME: set ERANGE to errno */
909	return (sgn ? LONG_MIN : LONG_MAX);
910	}
911
912	return (sgn ? -number : number);
913	}
914
915	/** Duplicate string.
916	*
917	* Allocate a new string and copy characters from the source
918	* string into it. The duplicate string is allocated via sleeping
919	* malloc(), thus this function can sleep in no memory conditions.
920	*
921	* The allocation cannot fail and the return value is always
922	* a valid pointer. The duplicate string is always a well-formed
923	* null-terminated UTF-8 string, but it can differ from the source
924	* string on the byte level.
925	*
926	* @param src Source string.
927	*
928	* @return Duplicate string.
929	*
930	*/
931	char str_dup(const char src)
932	{
933	size_t size = str_size(src) + 1;
934	char dest = (char ) malloc(size);
935	if (dest == NULL)
936	return (char *) NULL;
937
938	str_cpy(dest, size, src);
939	return dest;
940	}
941
942	/** Duplicate string with size limit.
943	*
944	* Allocate a new string and copy up to @max_size bytes from the source
945	* string into it. The duplicate string is allocated via sleeping
946	* malloc(), thus this function can sleep in no memory conditions.
947	* No more than @max_size + 1 bytes is allocated, but if the size
948	* occupied by the source string is smaller than @max_size + 1,
949	* less is allocated.
950	*
951	* The allocation cannot fail and the return value is always
952	* a valid pointer. The duplicate string is always a well-formed
953	* null-terminated UTF-8 string, but it can differ from the source
954	* string on the byte level.
955	*
956	* @param src Source string.
957	* @param n Maximum number of bytes to duplicate.
958	*
959	* @return Duplicate string.
960	*
961	*/
962	char str_ndup(const char src, size_t n)
963	{
964	size_t size = str_size(src);
965	if (size > n)
966	size = n;
967
968	char dest = (char ) malloc(size + 1);
969	if (dest == NULL)
970	return (char *) NULL;
971
972	str_ncpy(dest, size + 1, src, size);
973	return dest;
974	}
975
976
977	/** Convert initial part of string to unsigned long according to given base.
978	* The number may begin with an arbitrary number of whitespaces followed by
979	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
980	* inserted and the number will be taken as hexadecimal one. If the base is 0
981	* and the number begin with a zero, number will be taken as octal one (as with
982	* base 8). Otherwise the base 0 is taken as decimal.
983	*
984	* @param nptr Pointer to string.
985	* @param endptr If not NULL, function stores here pointer to the first
986	* invalid character
987	* @param base Zero or number between 2 and 36 inclusive.
988	* @return Result of conversion.
989	*/
990	unsigned long strtoul(const char nptr, char *endptr, int base)
991	{
992	char sgn = 0;
993	unsigned long number = 0;
994
995	number = _strtoul(nptr, endptr, base, &sgn);
996
997	return (sgn ? -number : number);
998	}
999
1000	char strtok(char s, const char *delim)
1001	{
1002	static char *next;
1003
1004	return strtok_r(s, delim, &next);
1005	}
1006
1007	char strtok_r(char s, const char delim, char *next)
1008	{
1009	char start, end;
1010
1011	if (s == NULL)
1012	s = *next;
1013
1014	/* Skip over leading delimiters. */
1015	while (s && (str_chr(delim, s) != NULL)) ++s;
1016	start = s;
1017
1018	/* Skip over token characters. */
1019	while (s && (str_chr(delim, s) == NULL)) ++s;
1020	end = s;
1021	next = (s ? s + 1 : s);
1022
1023	if (start == end) {
1024	return NULL; /* No more tokens. */
1025	}
1026
1027	/* Overwrite delimiter with NULL terminator. */
1028	*end = '\0';
1029	return start;
1030	}
1031
1032	/** Convert string to uint64_t (internal variant).
1033	*
1034	* @param nptr Pointer to string.
1035	* @param endptr Pointer to the first invalid character is stored here.
1036	* @param base Zero or number between 2 and 36 inclusive.
1037	* @param neg Indication of unary minus is stored here.
1038	* @apram result Result of the conversion.
1039	*
1040	* @return EOK if conversion was successful.
1041	*
1042	*/
1043	static int str_uint(const char nptr, char *endptr, unsigned int base,
1044	bool neg, uint64_t result)
1045	{
1046	assert(endptr != NULL);
1047	assert(neg != NULL);
1048	assert(result != NULL);
1049
1050	*neg = false;
1051	const char *str = nptr;
1052
1053	/* Ignore leading whitespace */
1054	while (isspace(*str))
1055	str++;
1056
1057	if (*str == '-') {
1058	*neg = true;
1059	str++;
1060	} else if (*str == '+')
1061	str++;
1062
1063	if (base == 0) {
1064	/* Decode base if not specified */
1065	base = 10;
1066
1067	if (*str == '0') {
1068	base = 8;
1069	str++;
1070
1071	switch (*str) {
1072	case 'b':
1073	case 'B':
1074	base = 2;
1075	str++;
1076	break;
1077	case 'o':
1078	case 'O':
1079	base = 8;
1080	str++;
1081	break;
1082	case 'd':
1083	case 'D':
1084	case 't':
1085	case 'T':
1086	base = 10;
1087	str++;
1088	break;
1089	case 'x':
1090	case 'X':
1091	base = 16;
1092	str++;
1093	break;
1094	default:
1095	str--;
1096	}
1097	}
1098	} else {
1099	/* Check base range */
1100	if ((base < 2) \|\| (base > 36)) {
1101	endptr = (char ) str;
1102	return EINVAL;
1103	}
1104	}
1105
1106	*result = 0;
1107	const char *startstr = str;
1108
1109	while (*str != 0) {
1110	unsigned int digit;
1111
1112	if ((str >= 'a') && (str <= 'z'))
1113	digit = *str - 'a' + 10;
1114	else if ((str >= 'A') && (str <= 'Z'))
1115	digit = *str - 'A' + 10;
1116	else if ((str >= '0') && (str <= '9'))
1117	digit = *str - '0';
1118	else
1119	break;
1120
1121	if (digit >= base)
1122	break;
1123
1124	uint64_t prev = *result;
1125	result = (result) * base + digit;
1126
1127	if (*result < prev) {
1128	/* Overflow */
1129	endptr = (char ) str;
1130	return EOVERFLOW;
1131	}
1132
1133	str++;
1134	}
1135
1136	if (str == startstr) {
1137	/*
1138	* No digits were decoded => first invalid character is
1139	* the first character of the string.
1140	*/
1141	str = nptr;
1142	}
1143
1144	endptr = (char ) str;
1145
1146	if (str == nptr)
1147	return EINVAL;
1148
1149	return EOK;
1150	}
1151
1152	/** Convert string to uint64_t.
1153	*
1154	* @param nptr Pointer to string.
1155	* @param endptr If not NULL, pointer to the first invalid character
1156	* is stored here.
1157	* @param base Zero or number between 2 and 36 inclusive.
1158	* @param strict Do not allow any trailing characters.
1159	* @param result Result of the conversion.
1160	*
1161	* @return EOK if conversion was successful.
1162	*
1163	*/
1164	int str_uint64(const char nptr, char *endptr, unsigned int base,
1165	bool strict, uint64_t *result)
1166	{
1167	assert(result != NULL);
1168
1169	bool neg;
1170	char *lendptr;
1171	int ret = str_uint(nptr, &lendptr, base, &neg, result);
1172
1173	if (endptr != NULL)
1174	endptr = (char ) lendptr;
1175
1176	if (ret != EOK)
1177	return ret;
1178
1179	/* Do not allow negative values */
1180	if (neg)
1181	return EINVAL;
1182
1183	/* Check whether we are at the end of
1184	the string in strict mode */
1185	if ((strict) && (*lendptr != 0))
1186	return EINVAL;
1187
1188	return EOK;
1189	}
1190
1191	/** Convert string to size_t.
1192	*
1193	* @param nptr Pointer to string.
1194	* @param endptr If not NULL, pointer to the first invalid character
1195	* is stored here.
1196	* @param base Zero or number between 2 and 36 inclusive.
1197	* @param strict Do not allow any trailing characters.
1198	* @param result Result of the conversion.
1199	*
1200	* @return EOK if conversion was successful.
1201	*
1202	*/
1203	int str_size_t(const char nptr, char *endptr, unsigned int base,
1204	bool strict, size_t *result)
1205	{
1206	assert(result != NULL);
1207
1208	bool neg;
1209	char *lendptr;
1210	uint64_t res;
1211	int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1212
1213	if (endptr != NULL)
1214	endptr = (char ) lendptr;
1215
1216	if (ret != EOK)
1217	return ret;
1218
1219	/* Do not allow negative values */
1220	if (neg)
1221	return EINVAL;
1222
1223	/* Check whether we are at the end of
1224	the string in strict mode */
1225	if ((strict) && (*lendptr != 0))
1226	return EINVAL;
1227
1228	/* Check for overflow */
1229	size_t _res = (size_t) res;
1230	if (_res != res)
1231	return EOVERFLOW;
1232
1233	*result = _res;
1234
1235	return EOK;
1236	}
1237
1238	void order_suffix(const uint64_t val, uint64_t rv, char suffix)
1239	{
1240	if (val > UINT64_C(10000000000000000000)) {
1241	*rv = val / UINT64_C(1000000000000000000);
1242	*suffix = 'Z';
1243	} else if (val > UINT64_C(1000000000000000000)) {
1244	*rv = val / UINT64_C(1000000000000000);
1245	*suffix = 'E';
1246	} else if (val > UINT64_C(1000000000000000)) {
1247	*rv = val / UINT64_C(1000000000000);
1248	*suffix = 'T';
1249	} else if (val > UINT64_C(1000000000000)) {
1250	*rv = val / UINT64_C(1000000000);
1251	*suffix = 'G';
1252	} else if (val > UINT64_C(1000000000)) {
1253	*rv = val / UINT64_C(1000000);
1254	*suffix = 'M';
1255	} else if (val > UINT64_C(1000000)) {
1256	*rv = val / UINT64_C(1000);
1257	*suffix = 'k';
1258	} else {
1259	*rv = val;
1260	*suffix = ' ';
1261	}
1262	}
1263
1264	void bin_order_suffix(const uint64_t val, uint64_t rv, const char *suffix,
1265	bool fixed)
1266	{
1267	if (val > UINT64_C(1152921504606846976)) {
1268	*rv = val / UINT64_C(1125899906842624);
1269	*suffix = "EiB";
1270	} else if (val > UINT64_C(1125899906842624)) {
1271	*rv = val / UINT64_C(1099511627776);
1272	*suffix = "TiB";
1273	} else if (val > UINT64_C(1099511627776)) {
1274	*rv = val / UINT64_C(1073741824);
1275	*suffix = "GiB";
1276	} else if (val > UINT64_C(1073741824)) {
1277	*rv = val / UINT64_C(1048576);
1278	*suffix = "MiB";
1279	} else if (val > UINT64_C(1048576)) {
1280	*rv = val / UINT64_C(1024);
1281	*suffix = "KiB";
1282	} else {
1283	*rv = val;
1284	if (fixed)
1285	*suffix = "B ";
1286	else
1287	*suffix = "B";
1288	}
1289	}
1290
1291	/** @}
1292	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/uspace/lib/c/generic/str.c@ 61e29a4d

Download in other formats: