Context Navigation

str.c@ a8bc7f8

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since a8bc7f8 was a8bc7f8, checked in by Vojtech Horky <vojtechhorky@…>, 14 years ago

Safer str_append

When the original string has already the size of the buffer, there
is no need to try to append anything to it.

Property mode set to 100644

File size: 28.8 KB

Line
1	/*
2	* Copyright (c) 2005 Martin Decky
3	* Copyright (c) 2008 Jiri Svoboda
4	* All rights reserved.
5	*
6	* Redistribution and use in source and binary forms, with or without
7	* modification, are permitted provided that the following conditions
8	* are met:
9	*
10	* - Redistributions of source code must retain the above copyright
11	* notice, this list of conditions and the following disclaimer.
12	* - Redistributions in binary form must reproduce the above copyright
13	* notice, this list of conditions and the following disclaimer in the
14	* documentation and/or other materials provided with the distribution.
15	* - The name of the author may not be used to endorse or promote products
16	* derived from this software without specific prior written permission.
17	*
18	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28	*/
29
30	/** @addtogroup libc
31	* @{
32	*/
33	/** @file
34	*/
35
36	#include <str.h>
37	#include <stdlib.h>
38	#include <assert.h>
39	#include <stdint.h>
40	#include <ctype.h>
41	#include <malloc.h>
42	#include <errno.h>
43	#include <align.h>
44	#include <mem.h>
45	#include <str.h>
46
47	/** Byte mask consisting of lowest @n bits (out of 8) */
48	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
49
50	/** Byte mask consisting of lowest @n bits (out of 32) */
51	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
52
53	/** Byte mask consisting of highest @n bits (out of 8) */
54	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
55
56	/** Number of data bits in a UTF-8 continuation byte */
57	#define CONT_BITS 6
58
59	/** Decode a single character from a string.
60	*
61	* Decode a single character from a string of size @a size. Decoding starts
62	* at @a offset and this offset is moved to the beginning of the next
63	* character. In case of decoding error, offset generally advances at least
64	* by one. However, offset is never moved beyond size.
65	*
66	* @param str String (not necessarily NULL-terminated).
67	* @param offset Byte offset in string where to start decoding.
68	* @param size Size of the string (in bytes).
69	*
70	* @return Value of decoded character, U_SPECIAL on decoding error or
71	* NULL if attempt to decode beyond @a size.
72	*
73	*/
74	wchar_t str_decode(const char str, size_t offset, size_t size)
75	{
76	if (*offset + 1 > size)
77	return 0;
78
79	/* First byte read from string */
80	uint8_t b0 = (uint8_t) str[(*offset)++];
81
82	/* Determine code length */
83
84	unsigned int b0_bits; /* Data bits in first byte */
85	unsigned int cbytes; /* Number of continuation bytes */
86
87	if ((b0 & 0x80) == 0) {
88	/* 0xxxxxxx (Plain ASCII) */
89	b0_bits = 7;
90	cbytes = 0;
91	} else if ((b0 & 0xe0) == 0xc0) {
92	/* 110xxxxx 10xxxxxx */
93	b0_bits = 5;
94	cbytes = 1;
95	} else if ((b0 & 0xf0) == 0xe0) {
96	/* 1110xxxx 10xxxxxx 10xxxxxx */
97	b0_bits = 4;
98	cbytes = 2;
99	} else if ((b0 & 0xf8) == 0xf0) {
100	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
101	b0_bits = 3;
102	cbytes = 3;
103	} else {
104	/* 10xxxxxx -- unexpected continuation byte */
105	return U_SPECIAL;
106	}
107
108	if (*offset + cbytes > size)
109	return U_SPECIAL;
110
111	wchar_t ch = b0 & LO_MASK_8(b0_bits);
112
113	/* Decode continuation bytes */
114	while (cbytes > 0) {
115	uint8_t b = (uint8_t) str[(*offset)++];
116
117	/* Must be 10xxxxxx */
118	if ((b & 0xc0) != 0x80)
119	return U_SPECIAL;
120
121	/* Shift data bits to ch */
122	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
123	cbytes--;
124	}
125
126	return ch;
127	}
128
129	/** Encode a single character to string representation.
130	*
131	* Encode a single character to string representation (i.e. UTF-8) and store
132	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
133	* is moved to the position where the next character can be written to.
134	*
135	* @param ch Input character.
136	* @param str Output buffer.
137	* @param offset Byte offset where to start writing.
138	* @param size Size of the output buffer (in bytes).
139	*
140	* @return EOK if the character was encoded successfully, EOVERFLOW if there
141	* was not enough space in the output buffer or EINVAL if the character
142	* code was invalid.
143	*/
144	int chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
145	{
146	if (*offset >= size)
147	return EOVERFLOW;
148
149	if (!chr_check(ch))
150	return EINVAL;
151
152	/* Unsigned version of ch (bit operations should only be done
153	on unsigned types). */
154	uint32_t cc = (uint32_t) ch;
155
156	/* Determine how many continuation bytes are needed */
157
158	unsigned int b0_bits; /* Data bits in first byte */
159	unsigned int cbytes; /* Number of continuation bytes */
160
161	if ((cc & ~LO_MASK_32(7)) == 0) {
162	b0_bits = 7;
163	cbytes = 0;
164	} else if ((cc & ~LO_MASK_32(11)) == 0) {
165	b0_bits = 5;
166	cbytes = 1;
167	} else if ((cc & ~LO_MASK_32(16)) == 0) {
168	b0_bits = 4;
169	cbytes = 2;
170	} else if ((cc & ~LO_MASK_32(21)) == 0) {
171	b0_bits = 3;
172	cbytes = 3;
173	} else {
174	/* Codes longer than 21 bits are not supported */
175	return EINVAL;
176	}
177
178	/* Check for available space in buffer */
179	if (*offset + cbytes >= size)
180	return EOVERFLOW;
181
182	/* Encode continuation bytes */
183	unsigned int i;
184	for (i = cbytes; i > 0; i--) {
185	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
186	cc = cc >> CONT_BITS;
187	}
188
189	/* Encode first byte */
190	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
191
192	/* Advance offset */
193	*offset += cbytes + 1;
194
195	return EOK;
196	}
197
198	/** Get size of string.
199	*
200	* Get the number of bytes which are used by the string @a str (excluding the
201	* NULL-terminator).
202	*
203	* @param str String to consider.
204	*
205	* @return Number of bytes used by the string
206	*
207	*/
208	size_t str_size(const char *str)
209	{
210	size_t size = 0;
211
212	while (*str++ != 0)
213	size++;
214
215	return size;
216	}
217
218	/** Get size of wide string.
219	*
220	* Get the number of bytes which are used by the wide string @a str (excluding the
221	* NULL-terminator).
222	*
223	* @param str Wide string to consider.
224	*
225	* @return Number of bytes used by the wide string
226	*
227	*/
228	size_t wstr_size(const wchar_t *str)
229	{
230	return (wstr_length(str) * sizeof(wchar_t));
231	}
232
233	/** Get size of string with length limit.
234	*
235	* Get the number of bytes which are used by up to @a max_len first
236	* characters in the string @a str. If @a max_len is greater than
237	* the length of @a str, the entire string is measured (excluding the
238	* NULL-terminator).
239	*
240	* @param str String to consider.
241	* @param max_len Maximum number of characters to measure.
242	*
243	* @return Number of bytes used by the characters.
244	*
245	*/
246	size_t str_lsize(const char *str, size_t max_len)
247	{
248	size_t len = 0;
249	size_t offset = 0;
250
251	while (len < max_len) {
252	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
253	break;
254
255	len++;
256	}
257
258	return offset;
259	}
260
261	/** Get size of wide string with length limit.
262	*
263	* Get the number of bytes which are used by up to @a max_len first
264	* wide characters in the wide string @a str. If @a max_len is greater than
265	* the length of @a str, the entire wide string is measured (excluding the
266	* NULL-terminator).
267	*
268	* @param str Wide string to consider.
269	* @param max_len Maximum number of wide characters to measure.
270	*
271	* @return Number of bytes used by the wide characters.
272	*
273	*/
274	size_t wstr_lsize(const wchar_t *str, size_t max_len)
275	{
276	return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
277	}
278
279	/** Get number of characters in a string.
280	*
281	* @param str NULL-terminated string.
282	*
283	* @return Number of characters in string.
284	*
285	*/
286	size_t str_length(const char *str)
287	{
288	size_t len = 0;
289	size_t offset = 0;
290
291	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
292	len++;
293
294	return len;
295	}
296
297	/** Get number of characters in a wide string.
298	*
299	* @param str NULL-terminated wide string.
300	*
301	* @return Number of characters in @a str.
302	*
303	*/
304	size_t wstr_length(const wchar_t *wstr)
305	{
306	size_t len = 0;
307
308	while (*wstr++ != 0)
309	len++;
310
311	return len;
312	}
313
314	/** Get number of characters in a string with size limit.
315	*
316	* @param str NULL-terminated string.
317	* @param size Maximum number of bytes to consider.
318	*
319	* @return Number of characters in string.
320	*
321	*/
322	size_t str_nlength(const char *str, size_t size)
323	{
324	size_t len = 0;
325	size_t offset = 0;
326
327	while (str_decode(str, &offset, size) != 0)
328	len++;
329
330	return len;
331	}
332
333	/** Get number of characters in a string with size limit.
334	*
335	* @param str NULL-terminated string.
336	* @param size Maximum number of bytes to consider.
337	*
338	* @return Number of characters in string.
339	*
340	*/
341	size_t wstr_nlength(const wchar_t *str, size_t size)
342	{
343	size_t len = 0;
344	size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
345	size_t offset = 0;
346
347	while ((offset < limit) && (*str++ != 0)) {
348	len++;
349	offset += sizeof(wchar_t);
350	}
351
352	return len;
353	}
354
355	/** Check whether character is plain ASCII.
356	*
357	* @return True if character is plain ASCII.
358	*
359	*/
360	bool ascii_check(wchar_t ch)
361	{
362	if ((ch >= 0) && (ch <= 127))
363	return true;
364
365	return false;
366	}
367
368	/** Check whether character is valid
369	*
370	* @return True if character is a valid Unicode code point.
371	*
372	*/
373	bool chr_check(wchar_t ch)
374	{
375	if ((ch >= 0) && (ch <= 1114111))
376	return true;
377
378	return false;
379	}
380
381	/** Compare two NULL terminated strings.
382	*
383	* Do a char-by-char comparison of two NULL-terminated strings.
384	* The strings are considered equal iff they consist of the same
385	* characters on the minimum of their lengths.
386	*
387	* @param s1 First string to compare.
388	* @param s2 Second string to compare.
389	*
390	* @return 0 if the strings are equal, -1 if first is smaller,
391	* 1 if second smaller.
392	*
393	*/
394	int str_cmp(const char s1, const char s2)
395	{
396	wchar_t c1 = 0;
397	wchar_t c2 = 0;
398
399	size_t off1 = 0;
400	size_t off2 = 0;
401
402	while (true) {
403	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
404	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
405
406	if (c1 < c2)
407	return -1;
408
409	if (c1 > c2)
410	return 1;
411
412	if (c1 == 0 \|\| c2 == 0)
413	break;
414	}
415
416	return 0;
417	}
418
419	/** Compare two NULL terminated strings with length limit.
420	*
421	* Do a char-by-char comparison of two NULL-terminated strings.
422	* The strings are considered equal iff they consist of the same
423	* characters on the minimum of their lengths and the length limit.
424	*
425	* @param s1 First string to compare.
426	* @param s2 Second string to compare.
427	* @param max_len Maximum number of characters to consider.
428	*
429	* @return 0 if the strings are equal, -1 if first is smaller,
430	* 1 if second smaller.
431	*
432	*/
433	int str_lcmp(const char s1, const char s2, size_t max_len)
434	{
435	wchar_t c1 = 0;
436	wchar_t c2 = 0;
437
438	size_t off1 = 0;
439	size_t off2 = 0;
440
441	size_t len = 0;
442
443	while (true) {
444	if (len >= max_len)
445	break;
446
447	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
448	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
449
450	if (c1 < c2)
451	return -1;
452
453	if (c1 > c2)
454	return 1;
455
456	if (c1 == 0 \|\| c2 == 0)
457	break;
458
459	++len;
460	}
461
462	return 0;
463
464	}
465
466	/** Copy string.
467	*
468	* Copy source string @a src to destination buffer @a dest.
469	* No more than @a size bytes are written. If the size of the output buffer
470	* is at least one byte, the output string will always be well-formed, i.e.
471	* null-terminated and containing only complete characters.
472	*
473	* @param dest Destination buffer.
474	* @param count Size of the destination buffer (must be > 0).
475	* @param src Source string.
476	*/
477	void str_cpy(char dest, size_t size, const char src)
478	{
479	/* There must be space for a null terminator in the buffer. */
480	assert(size > 0);
481
482	size_t src_off = 0;
483	size_t dest_off = 0;
484
485	wchar_t ch;
486	while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
487	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
488	break;
489	}
490
491	dest[dest_off] = '\0';
492	}
493
494	/** Copy size-limited substring.
495	*
496	* Copy prefix of string @a src of max. size @a size to destination buffer
497	* @a dest. No more than @a size bytes are written. The output string will
498	* always be well-formed, i.e. null-terminated and containing only complete
499	* characters.
500	*
501	* No more than @a n bytes are read from the input string, so it does not
502	* have to be null-terminated.
503	*
504	* @param dest Destination buffer.
505	* @param count Size of the destination buffer (must be > 0).
506	* @param src Source string.
507	* @param n Maximum number of bytes to read from @a src.
508	*/
509	void str_ncpy(char dest, size_t size, const char src, size_t n)
510	{
511	/* There must be space for a null terminator in the buffer. */
512	assert(size > 0);
513
514	size_t src_off = 0;
515	size_t dest_off = 0;
516
517	wchar_t ch;
518	while ((ch = str_decode(src, &src_off, n)) != 0) {
519	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
520	break;
521	}
522
523	dest[dest_off] = '\0';
524	}
525
526	/** Append one string to another.
527	*
528	* Append source string @a src to string in destination buffer @a dest.
529	* Size of the destination buffer is @a dest. If the size of the output buffer
530	* is at least one byte, the output string will always be well-formed, i.e.
531	* null-terminated and containing only complete characters.
532	*
533	* @param dest Destination buffer.
534	* @param count Size of the destination buffer.
535	* @param src Source string.
536	*/
537	void str_append(char dest, size_t size, const char src)
538	{
539	size_t dstr_size;
540
541	dstr_size = str_size(dest);
542	if (dstr_size >= size) {
543	return;
544	}
545	str_cpy(dest + dstr_size, size - dstr_size, src);
546	}
547
548	/** Convert wide string to string.
549	*
550	* Convert wide string @a src to string. The output is written to the buffer
551	* specified by @a dest and @a size. @a size must be non-zero and the string
552	* written will always be well-formed.
553	*
554	* @param dest Destination buffer.
555	* @param size Size of the destination buffer.
556	* @param src Source wide string.
557	*/
558	void wstr_to_str(char dest, size_t size, const wchar_t src)
559	{
560	wchar_t ch;
561	size_t src_idx;
562	size_t dest_off;
563
564	/* There must be space for a null terminator in the buffer. */
565	assert(size > 0);
566
567	src_idx = 0;
568	dest_off = 0;
569
570	while ((ch = src[src_idx++]) != 0) {
571	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
572	break;
573	}
574
575	dest[dest_off] = '\0';
576	}
577
578	/** Convert wide string to new string.
579	*
580	* Convert wide string @a src to string. Space for the new string is allocated
581	* on the heap.
582	*
583	* @param src Source wide string.
584	* @return New string.
585	*/
586	char wstr_to_astr(const wchar_t src)
587	{
588	char dbuf[STR_BOUNDS(1)];
589	char *str;
590	wchar_t ch;
591
592	size_t src_idx;
593	size_t dest_off;
594	size_t dest_size;
595
596	/* Compute size of encoded string. */
597
598	src_idx = 0;
599	dest_size = 0;
600
601	while ((ch = src[src_idx++]) != 0) {
602	dest_off = 0;
603	if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
604	break;
605	dest_size += dest_off;
606	}
607
608	str = malloc(dest_size + 1);
609	if (str == NULL)
610	return NULL;
611
612	/* Encode string. */
613
614	src_idx = 0;
615	dest_off = 0;
616
617	while ((ch = src[src_idx++]) != 0) {
618	if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
619	break;
620	}
621
622	str[dest_size] = '\0';
623	return str;
624	}
625
626
627	/** Convert string to wide string.
628	*
629	* Convert string @a src to wide string. The output is written to the
630	* buffer specified by @a dest and @a dlen. @a dlen must be non-zero
631	* and the wide string written will always be null-terminated.
632	*
633	* @param dest Destination buffer.
634	* @param dlen Length of destination buffer (number of wchars).
635	* @param src Source string.
636	*/
637	void str_to_wstr(wchar_t dest, size_t dlen, const char src)
638	{
639	size_t offset;
640	size_t di;
641	wchar_t c;
642
643	assert(dlen > 0);
644
645	offset = 0;
646	di = 0;
647
648	do {
649	if (di >= dlen - 1)
650	break;
651
652	c = str_decode(src, &offset, STR_NO_LIMIT);
653	dest[di++] = c;
654	} while (c != '\0');
655
656	dest[dlen - 1] = '\0';
657	}
658
659	/** Find first occurence of character in string.
660	*
661	* @param str String to search.
662	* @param ch Character to look for.
663	*
664	* @return Pointer to character in @a str or NULL if not found.
665	*/
666	char str_chr(const char str, wchar_t ch)
667	{
668	wchar_t acc;
669	size_t off = 0;
670	size_t last = 0;
671
672	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
673	if (acc == ch)
674	return (char *) (str + last);
675	last = off;
676	}
677
678	return NULL;
679	}
680
681	/** Find last occurence of character in string.
682	*
683	* @param str String to search.
684	* @param ch Character to look for.
685	*
686	* @return Pointer to character in @a str or NULL if not found.
687	*/
688	char str_rchr(const char str, wchar_t ch)
689	{
690	wchar_t acc;
691	size_t off = 0;
692	size_t last = 0;
693	const char *res = NULL;
694
695	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
696	if (acc == ch)
697	res = (str + last);
698	last = off;
699	}
700
701	return (char *) res;
702	}
703
704	/** Insert a wide character into a wide string.
705	*
706	* Insert a wide character into a wide string at position
707	* @a pos. The characters after the position are shifted.
708	*
709	* @param str String to insert to.
710	* @param ch Character to insert to.
711	* @param pos Character index where to insert.
712	@ @param max_pos Characters in the buffer.
713	*
714	* @return True if the insertion was sucessful, false if the position
715	* is out of bounds.
716	*
717	*/
718	bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
719	{
720	size_t len = wstr_length(str);
721
722	if ((pos > len) \|\| (pos + 1 > max_pos))
723	return false;
724
725	size_t i;
726	for (i = len; i + 1 > pos; i--)
727	str[i + 1] = str[i];
728
729	str[pos] = ch;
730
731	return true;
732	}
733
734	/** Remove a wide character from a wide string.
735	*
736	* Remove a wide character from a wide string at position
737	* @a pos. The characters after the position are shifted.
738	*
739	* @param str String to remove from.
740	* @param pos Character index to remove.
741	*
742	* @return True if the removal was sucessful, false if the position
743	* is out of bounds.
744	*
745	*/
746	bool wstr_remove(wchar_t *str, size_t pos)
747	{
748	size_t len = wstr_length(str);
749
750	if (pos >= len)
751	return false;
752
753	size_t i;
754	for (i = pos + 1; i <= len; i++)
755	str[i - 1] = str[i];
756
757	return true;
758	}
759
760	int stricmp(const char a, const char b)
761	{
762	int c = 0;
763
764	while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
765	c++;
766
767	return (tolower(a[c]) - tolower(b[c]));
768	}
769
770	/** Convert string to a number.
771	* Core of strtol and strtoul functions.
772	*
773	* @param nptr Pointer to string.
774	* @param endptr If not NULL, function stores here pointer to the first
775	* invalid character.
776	* @param base Zero or number between 2 and 36 inclusive.
777	* @param sgn It's set to 1 if minus found.
778	* @return Result of conversion.
779	*/
780	static unsigned long
781	_strtoul(const char nptr, char endptr, int base, char sgn)
782	{
783	unsigned char c;
784	unsigned long result = 0;
785	unsigned long a, b;
786	const char *str = nptr;
787	const char *tmpptr;
788
789	while (isspace(*str))
790	str++;
791
792	if (*str == '-') {
793	*sgn = 1;
794	++str;
795	} else if (*str == '+')
796	++str;
797
798	if (base) {
799	if ((base == 1) \|\| (base > 36)) {
800	/* FIXME: set errno to EINVAL */
801	return 0;
802	}
803	if ((base == 16) && (*str == '0') && ((str[1] == 'x') \|\|
804	(str[1] == 'X'))) {
805	str += 2;
806	}
807	} else {
808	base = 10;
809
810	if (*str == '0') {
811	base = 8;
812	if ((str[1] == 'X') \|\| (str[1] == 'x')) {
813	base = 16;
814	str += 2;
815	}
816	}
817	}
818
819	tmpptr = str;
820
821	while (*str) {
822	c = *str;
823	c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
824	(c <= '9' ? c - '0' : 0xff)));
825	if (c > base) {
826	break;
827	}
828
829	a = (result & 0xff) * base + c;
830	b = (result >> 8) * base + (a >> 8);
831
832	if (b > (ULONG_MAX >> 8)) {
833	/* overflow */
834	/* FIXME: errno = ERANGE*/
835	return ULONG_MAX;
836	}
837
838	result = (b << 8) + (a & 0xff);
839	++str;
840	}
841
842	if (str == tmpptr) {
843	/*
844	* No number was found => first invalid character is the first
845	* character of the string.
846	*/
847	/* FIXME: set errno to EINVAL */
848	str = nptr;
849	result = 0;
850	}
851
852	if (endptr)
853	endptr = (char ) str;
854
855	if (nptr == str) {
856	/FIXME: errno = EINVAL/
857	return 0;
858	}
859
860	return result;
861	}
862
863	/** Convert initial part of string to long int according to given base.
864	* The number may begin with an arbitrary number of whitespaces followed by
865	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
866	* inserted and the number will be taken as hexadecimal one. If the base is 0
867	* and the number begin with a zero, number will be taken as octal one (as with
868	* base 8). Otherwise the base 0 is taken as decimal.
869	*
870	* @param nptr Pointer to string.
871	* @param endptr If not NULL, function stores here pointer to the first
872	* invalid character.
873	* @param base Zero or number between 2 and 36 inclusive.
874	* @return Result of conversion.
875	*/
876	long int strtol(const char nptr, char *endptr, int base)
877	{
878	char sgn = 0;
879	unsigned long number = 0;
880
881	number = _strtoul(nptr, endptr, base, &sgn);
882
883	if (number > LONG_MAX) {
884	if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
885	/* FIXME: set 0 to errno */
886	return number;
887	}
888	/* FIXME: set ERANGE to errno */
889	return (sgn ? LONG_MIN : LONG_MAX);
890	}
891
892	return (sgn ? -number : number);
893	}
894
895	/** Duplicate string.
896	*
897	* Allocate a new string and copy characters from the source
898	* string into it. The duplicate string is allocated via sleeping
899	* malloc(), thus this function can sleep in no memory conditions.
900	*
901	* The allocation cannot fail and the return value is always
902	* a valid pointer. The duplicate string is always a well-formed
903	* null-terminated UTF-8 string, but it can differ from the source
904	* string on the byte level.
905	*
906	* @param src Source string.
907	*
908	* @return Duplicate string.
909	*
910	*/
911	char str_dup(const char src)
912	{
913	size_t size = str_size(src) + 1;
914	char dest = (char ) malloc(size);
915	if (dest == NULL)
916	return (char *) NULL;
917
918	str_cpy(dest, size, src);
919	return dest;
920	}
921
922	/** Duplicate string with size limit.
923	*
924	* Allocate a new string and copy up to @max_size bytes from the source
925	* string into it. The duplicate string is allocated via sleeping
926	* malloc(), thus this function can sleep in no memory conditions.
927	* No more than @max_size + 1 bytes is allocated, but if the size
928	* occupied by the source string is smaller than @max_size + 1,
929	* less is allocated.
930	*
931	* The allocation cannot fail and the return value is always
932	* a valid pointer. The duplicate string is always a well-formed
933	* null-terminated UTF-8 string, but it can differ from the source
934	* string on the byte level.
935	*
936	* @param src Source string.
937	* @param n Maximum number of bytes to duplicate.
938	*
939	* @return Duplicate string.
940	*
941	*/
942	char str_ndup(const char src, size_t n)
943	{
944	size_t size = str_size(src);
945	if (size > n)
946	size = n;
947
948	char dest = (char ) malloc(size + 1);
949	if (dest == NULL)
950	return (char *) NULL;
951
952	str_ncpy(dest, size + 1, src, size);
953	return dest;
954	}
955
956
957	/** Convert initial part of string to unsigned long according to given base.
958	* The number may begin with an arbitrary number of whitespaces followed by
959	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
960	* inserted and the number will be taken as hexadecimal one. If the base is 0
961	* and the number begin with a zero, number will be taken as octal one (as with
962	* base 8). Otherwise the base 0 is taken as decimal.
963	*
964	* @param nptr Pointer to string.
965	* @param endptr If not NULL, function stores here pointer to the first
966	* invalid character
967	* @param base Zero or number between 2 and 36 inclusive.
968	* @return Result of conversion.
969	*/
970	unsigned long strtoul(const char nptr, char *endptr, int base)
971	{
972	char sgn = 0;
973	unsigned long number = 0;
974
975	number = _strtoul(nptr, endptr, base, &sgn);
976
977	return (sgn ? -number : number);
978	}
979
980	char strtok(char s, const char *delim)
981	{
982	static char *next;
983
984	return strtok_r(s, delim, &next);
985	}
986
987	char strtok_r(char s, const char delim, char *next)
988	{
989	char start, end;
990
991	if (s == NULL)
992	s = *next;
993
994	/* Skip over leading delimiters. */
995	while (s && (str_chr(delim, s) != NULL)) ++s;
996	start = s;
997
998	/* Skip over token characters. */
999	while (s && (str_chr(delim, s) == NULL)) ++s;
1000	end = s;
1001	next = (s ? s + 1 : s);
1002
1003	if (start == end) {
1004	return NULL; /* No more tokens. */
1005	}
1006
1007	/* Overwrite delimiter with NULL terminator. */
1008	*end = '\0';
1009	return start;
1010	}
1011
1012	/** Convert string to uint64_t (internal variant).
1013	*
1014	* @param nptr Pointer to string.
1015	* @param endptr Pointer to the first invalid character is stored here.
1016	* @param base Zero or number between 2 and 36 inclusive.
1017	* @param neg Indication of unary minus is stored here.
1018	* @apram result Result of the conversion.
1019	*
1020	* @return EOK if conversion was successful.
1021	*
1022	*/
1023	static int str_uint(const char nptr, char *endptr, unsigned int base,
1024	bool neg, uint64_t result)
1025	{
1026	assert(endptr != NULL);
1027	assert(neg != NULL);
1028	assert(result != NULL);
1029
1030	*neg = false;
1031	const char *str = nptr;
1032
1033	/* Ignore leading whitespace */
1034	while (isspace(*str))
1035	str++;
1036
1037	if (*str == '-') {
1038	*neg = true;
1039	str++;
1040	} else if (*str == '+')
1041	str++;
1042
1043	if (base == 0) {
1044	/* Decode base if not specified */
1045	base = 10;
1046
1047	if (*str == '0') {
1048	base = 8;
1049	str++;
1050
1051	switch (*str) {
1052	case 'b':
1053	case 'B':
1054	base = 2;
1055	str++;
1056	break;
1057	case 'o':
1058	case 'O':
1059	base = 8;
1060	str++;
1061	break;
1062	case 'd':
1063	case 'D':
1064	case 't':
1065	case 'T':
1066	base = 10;
1067	str++;
1068	break;
1069	case 'x':
1070	case 'X':
1071	base = 16;
1072	str++;
1073	break;
1074	default:
1075	str--;
1076	}
1077	}
1078	} else {
1079	/* Check base range */
1080	if ((base < 2) \|\| (base > 36)) {
1081	endptr = (char ) str;
1082	return EINVAL;
1083	}
1084	}
1085
1086	*result = 0;
1087	const char *startstr = str;
1088
1089	while (*str != 0) {
1090	unsigned int digit;
1091
1092	if ((str >= 'a') && (str <= 'z'))
1093	digit = *str - 'a' + 10;
1094	else if ((str >= 'A') && (str <= 'Z'))
1095	digit = *str - 'A' + 10;
1096	else if ((str >= '0') && (str <= '9'))
1097	digit = *str - '0';
1098	else
1099	break;
1100
1101	if (digit >= base)
1102	break;
1103
1104	uint64_t prev = *result;
1105	result = (result) * base + digit;
1106
1107	if (*result < prev) {
1108	/* Overflow */
1109	endptr = (char ) str;
1110	return EOVERFLOW;
1111	}
1112
1113	str++;
1114	}
1115
1116	if (str == startstr) {
1117	/*
1118	* No digits were decoded => first invalid character is
1119	* the first character of the string.
1120	*/
1121	str = nptr;
1122	}
1123
1124	endptr = (char ) str;
1125
1126	if (str == nptr)
1127	return EINVAL;
1128
1129	return EOK;
1130	}
1131
1132	/** Convert string to uint64_t.
1133	*
1134	* @param nptr Pointer to string.
1135	* @param endptr If not NULL, pointer to the first invalid character
1136	* is stored here.
1137	* @param base Zero or number between 2 and 36 inclusive.
1138	* @param strict Do not allow any trailing characters.
1139	* @param result Result of the conversion.
1140	*
1141	* @return EOK if conversion was successful.
1142	*
1143	*/
1144	int str_uint64(const char nptr, char *endptr, unsigned int base,
1145	bool strict, uint64_t *result)
1146	{
1147	assert(result != NULL);
1148
1149	bool neg;
1150	char *lendptr;
1151	int ret = str_uint(nptr, &lendptr, base, &neg, result);
1152
1153	if (endptr != NULL)
1154	endptr = (char ) lendptr;
1155
1156	if (ret != EOK)
1157	return ret;
1158
1159	/* Do not allow negative values */
1160	if (neg)
1161	return EINVAL;
1162
1163	/* Check whether we are at the end of
1164	the string in strict mode */
1165	if ((strict) && (*lendptr != 0))
1166	return EINVAL;
1167
1168	return EOK;
1169	}
1170
1171	/** Convert string to size_t.
1172	*
1173	* @param nptr Pointer to string.
1174	* @param endptr If not NULL, pointer to the first invalid character
1175	* is stored here.
1176	* @param base Zero or number between 2 and 36 inclusive.
1177	* @param strict Do not allow any trailing characters.
1178	* @param result Result of the conversion.
1179	*
1180	* @return EOK if conversion was successful.
1181	*
1182	*/
1183	int str_size_t(const char nptr, char *endptr, unsigned int base,
1184	bool strict, size_t *result)
1185	{
1186	assert(result != NULL);
1187
1188	bool neg;
1189	char *lendptr;
1190	uint64_t res;
1191	int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1192
1193	if (endptr != NULL)
1194	endptr = (char ) lendptr;
1195
1196	if (ret != EOK)
1197	return ret;
1198
1199	/* Do not allow negative values */
1200	if (neg)
1201	return EINVAL;
1202
1203	/* Check whether we are at the end of
1204	the string in strict mode */
1205	if ((strict) && (*lendptr != 0))
1206	return EINVAL;
1207
1208	/* Check for overflow */
1209	size_t _res = (size_t) res;
1210	if (_res != res)
1211	return EOVERFLOW;
1212
1213	*result = _res;
1214
1215	return EOK;
1216	}
1217
1218	void order_suffix(const uint64_t val, uint64_t rv, char suffix)
1219	{
1220	if (val > UINT64_C(10000000000000000000)) {
1221	*rv = val / UINT64_C(1000000000000000000);
1222	*suffix = 'Z';
1223	} else if (val > UINT64_C(1000000000000000000)) {
1224	*rv = val / UINT64_C(1000000000000000);
1225	*suffix = 'E';
1226	} else if (val > UINT64_C(1000000000000000)) {
1227	*rv = val / UINT64_C(1000000000000);
1228	*suffix = 'T';
1229	} else if (val > UINT64_C(1000000000000)) {
1230	*rv = val / UINT64_C(1000000000);
1231	*suffix = 'G';
1232	} else if (val > UINT64_C(1000000000)) {
1233	*rv = val / UINT64_C(1000000);
1234	*suffix = 'M';
1235	} else if (val > UINT64_C(1000000)) {
1236	*rv = val / UINT64_C(1000);
1237	*suffix = 'k';
1238	} else {
1239	*rv = val;
1240	*suffix = ' ';
1241	}
1242	}
1243
1244	void bin_order_suffix(const uint64_t val, uint64_t rv, const char *suffix,
1245	bool fixed)
1246	{
1247	if (val > UINT64_C(1152921504606846976)) {
1248	*rv = val / UINT64_C(1125899906842624);
1249	*suffix = "EiB";
1250	} else if (val > UINT64_C(1125899906842624)) {
1251	*rv = val / UINT64_C(1099511627776);
1252	*suffix = "TiB";
1253	} else if (val > UINT64_C(1099511627776)) {
1254	*rv = val / UINT64_C(1073741824);
1255	*suffix = "GiB";
1256	} else if (val > UINT64_C(1073741824)) {
1257	*rv = val / UINT64_C(1048576);
1258	*suffix = "MiB";
1259	} else if (val > UINT64_C(1048576)) {
1260	*rv = val / UINT64_C(1024);
1261	*suffix = "KiB";
1262	} else {
1263	*rv = val;
1264	if (fixed)
1265	*suffix = "B ";
1266	else
1267	*suffix = "B";
1268	}
1269	}
1270
1271	/** @}
1272	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/uspace/lib/c/generic/str.c@ a8bc7f8

Download in other formats: