Context Navigation

source: mainline/uspace/lib/c/generic/str.c@ 933cadf

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 933cadf was 933cadf, checked in by Martin Decky <martin@…>, 14 years ago
use binary suffixes in printouts where appropriate
Property mode set to `100644`
File size: 28.8 KB

Line
1	/*
2	* Copyright (c) 2005 Martin Decky
3	* Copyright (c) 2008 Jiri Svoboda
4	* All rights reserved.
5	*
6	* Redistribution and use in source and binary forms, with or without
7	* modification, are permitted provided that the following conditions
8	* are met:
9	*
10	* - Redistributions of source code must retain the above copyright
11	* notice, this list of conditions and the following disclaimer.
12	* - Redistributions in binary form must reproduce the above copyright
13	* notice, this list of conditions and the following disclaimer in the
14	* documentation and/or other materials provided with the distribution.
15	* - The name of the author may not be used to endorse or promote products
16	* derived from this software without specific prior written permission.
17	*
18	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28	*/
29
30	/** @addtogroup libc
31	* @{
32	*/
33	/** @file
34	*/
35
36	#include <str.h>
37	#include <stdlib.h>
38	#include <assert.h>
39	#include <stdint.h>
40	#include <ctype.h>
41	#include <malloc.h>
42	#include <errno.h>
43	#include <align.h>
44	#include <mem.h>
45	#include <str.h>
46
47	/** Byte mask consisting of lowest @n bits (out of 8) */
48	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
49
50	/** Byte mask consisting of lowest @n bits (out of 32) */
51	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
52
53	/** Byte mask consisting of highest @n bits (out of 8) */
54	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
55
56	/** Number of data bits in a UTF-8 continuation byte */
57	#define CONT_BITS 6
58
59	/** Decode a single character from a string.
60	*
61	* Decode a single character from a string of size @a size. Decoding starts
62	* at @a offset and this offset is moved to the beginning of the next
63	* character. In case of decoding error, offset generally advances at least
64	* by one. However, offset is never moved beyond size.
65	*
66	* @param str String (not necessarily NULL-terminated).
67	* @param offset Byte offset in string where to start decoding.
68	* @param size Size of the string (in bytes).
69	*
70	* @return Value of decoded character, U_SPECIAL on decoding error or
71	* NULL if attempt to decode beyond @a size.
72	*
73	*/
74	wchar_t str_decode(const char str, size_t offset, size_t size)
75	{
76	if (*offset + 1 > size)
77	return 0;
78
79	/* First byte read from string */
80	uint8_t b0 = (uint8_t) str[(*offset)++];
81
82	/* Determine code length */
83
84	unsigned int b0_bits; /* Data bits in first byte */
85	unsigned int cbytes; /* Number of continuation bytes */
86
87	if ((b0 & 0x80) == 0) {
88	/* 0xxxxxxx (Plain ASCII) */
89	b0_bits = 7;
90	cbytes = 0;
91	} else if ((b0 & 0xe0) == 0xc0) {
92	/* 110xxxxx 10xxxxxx */
93	b0_bits = 5;
94	cbytes = 1;
95	} else if ((b0 & 0xf0) == 0xe0) {
96	/* 1110xxxx 10xxxxxx 10xxxxxx */
97	b0_bits = 4;
98	cbytes = 2;
99	} else if ((b0 & 0xf8) == 0xf0) {
100	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
101	b0_bits = 3;
102	cbytes = 3;
103	} else {
104	/* 10xxxxxx -- unexpected continuation byte */
105	return U_SPECIAL;
106	}
107
108	if (*offset + cbytes > size)
109	return U_SPECIAL;
110
111	wchar_t ch = b0 & LO_MASK_8(b0_bits);
112
113	/* Decode continuation bytes */
114	while (cbytes > 0) {
115	uint8_t b = (uint8_t) str[(*offset)++];
116
117	/* Must be 10xxxxxx */
118	if ((b & 0xc0) != 0x80)
119	return U_SPECIAL;
120
121	/* Shift data bits to ch */
122	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
123	cbytes--;
124	}
125
126	return ch;
127	}
128
129	/** Encode a single character to string representation.
130	*
131	* Encode a single character to string representation (i.e. UTF-8) and store
132	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
133	* is moved to the position where the next character can be written to.
134	*
135	* @param ch Input character.
136	* @param str Output buffer.
137	* @param offset Byte offset where to start writing.
138	* @param size Size of the output buffer (in bytes).
139	*
140	* @return EOK if the character was encoded successfully, EOVERFLOW if there
141	* was not enough space in the output buffer or EINVAL if the character
142	* code was invalid.
143	*/
144	int chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
145	{
146	if (*offset >= size)
147	return EOVERFLOW;
148
149	if (!chr_check(ch))
150	return EINVAL;
151
152	/* Unsigned version of ch (bit operations should only be done
153	on unsigned types). */
154	uint32_t cc = (uint32_t) ch;
155
156	/* Determine how many continuation bytes are needed */
157
158	unsigned int b0_bits; /* Data bits in first byte */
159	unsigned int cbytes; /* Number of continuation bytes */
160
161	if ((cc & ~LO_MASK_32(7)) == 0) {
162	b0_bits = 7;
163	cbytes = 0;
164	} else if ((cc & ~LO_MASK_32(11)) == 0) {
165	b0_bits = 5;
166	cbytes = 1;
167	} else if ((cc & ~LO_MASK_32(16)) == 0) {
168	b0_bits = 4;
169	cbytes = 2;
170	} else if ((cc & ~LO_MASK_32(21)) == 0) {
171	b0_bits = 3;
172	cbytes = 3;
173	} else {
174	/* Codes longer than 21 bits are not supported */
175	return EINVAL;
176	}
177
178	/* Check for available space in buffer */
179	if (*offset + cbytes >= size)
180	return EOVERFLOW;
181
182	/* Encode continuation bytes */
183	unsigned int i;
184	for (i = cbytes; i > 0; i--) {
185	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
186	cc = cc >> CONT_BITS;
187	}
188
189	/* Encode first byte */
190	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
191
192	/* Advance offset */
193	*offset += cbytes + 1;
194
195	return EOK;
196	}
197
198	/** Get size of string.
199	*
200	* Get the number of bytes which are used by the string @a str (excluding the
201	* NULL-terminator).
202	*
203	* @param str String to consider.
204	*
205	* @return Number of bytes used by the string
206	*
207	*/
208	size_t str_size(const char *str)
209	{
210	size_t size = 0;
211
212	while (*str++ != 0)
213	size++;
214
215	return size;
216	}
217
218	/** Get size of wide string.
219	*
220	* Get the number of bytes which are used by the wide string @a str (excluding the
221	* NULL-terminator).
222	*
223	* @param str Wide string to consider.
224	*
225	* @return Number of bytes used by the wide string
226	*
227	*/
228	size_t wstr_size(const wchar_t *str)
229	{
230	return (wstr_length(str) * sizeof(wchar_t));
231	}
232
233	/** Get size of string with length limit.
234	*
235	* Get the number of bytes which are used by up to @a max_len first
236	* characters in the string @a str. If @a max_len is greater than
237	* the length of @a str, the entire string is measured (excluding the
238	* NULL-terminator).
239	*
240	* @param str String to consider.
241	* @param max_len Maximum number of characters to measure.
242	*
243	* @return Number of bytes used by the characters.
244	*
245	*/
246	size_t str_lsize(const char *str, size_t max_len)
247	{
248	size_t len = 0;
249	size_t offset = 0;
250
251	while (len < max_len) {
252	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
253	break;
254
255	len++;
256	}
257
258	return offset;
259	}
260
261	/** Get size of wide string with length limit.
262	*
263	* Get the number of bytes which are used by up to @a max_len first
264	* wide characters in the wide string @a str. If @a max_len is greater than
265	* the length of @a str, the entire wide string is measured (excluding the
266	* NULL-terminator).
267	*
268	* @param str Wide string to consider.
269	* @param max_len Maximum number of wide characters to measure.
270	*
271	* @return Number of bytes used by the wide characters.
272	*
273	*/
274	size_t wstr_lsize(const wchar_t *str, size_t max_len)
275	{
276	return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
277	}
278
279	/** Get number of characters in a string.
280	*
281	* @param str NULL-terminated string.
282	*
283	* @return Number of characters in string.
284	*
285	*/
286	size_t str_length(const char *str)
287	{
288	size_t len = 0;
289	size_t offset = 0;
290
291	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
292	len++;
293
294	return len;
295	}
296
297	/** Get number of characters in a wide string.
298	*
299	* @param str NULL-terminated wide string.
300	*
301	* @return Number of characters in @a str.
302	*
303	*/
304	size_t wstr_length(const wchar_t *wstr)
305	{
306	size_t len = 0;
307
308	while (*wstr++ != 0)
309	len++;
310
311	return len;
312	}
313
314	/** Get number of characters in a string with size limit.
315	*
316	* @param str NULL-terminated string.
317	* @param size Maximum number of bytes to consider.
318	*
319	* @return Number of characters in string.
320	*
321	*/
322	size_t str_nlength(const char *str, size_t size)
323	{
324	size_t len = 0;
325	size_t offset = 0;
326
327	while (str_decode(str, &offset, size) != 0)
328	len++;
329
330	return len;
331	}
332
333	/** Get number of characters in a string with size limit.
334	*
335	* @param str NULL-terminated string.
336	* @param size Maximum number of bytes to consider.
337	*
338	* @return Number of characters in string.
339	*
340	*/
341	size_t wstr_nlength(const wchar_t *str, size_t size)
342	{
343	size_t len = 0;
344	size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
345	size_t offset = 0;
346
347	while ((offset < limit) && (*str++ != 0)) {
348	len++;
349	offset += sizeof(wchar_t);
350	}
351
352	return len;
353	}
354
355	/** Check whether character is plain ASCII.
356	*
357	* @return True if character is plain ASCII.
358	*
359	*/
360	bool ascii_check(wchar_t ch)
361	{
362	if ((ch >= 0) && (ch <= 127))
363	return true;
364
365	return false;
366	}
367
368	/** Check whether character is valid
369	*
370	* @return True if character is a valid Unicode code point.
371	*
372	*/
373	bool chr_check(wchar_t ch)
374	{
375	if ((ch >= 0) && (ch <= 1114111))
376	return true;
377
378	return false;
379	}
380
381	/** Compare two NULL terminated strings.
382	*
383	* Do a char-by-char comparison of two NULL-terminated strings.
384	* The strings are considered equal iff they consist of the same
385	* characters on the minimum of their lengths.
386	*
387	* @param s1 First string to compare.
388	* @param s2 Second string to compare.
389	*
390	* @return 0 if the strings are equal, -1 if first is smaller,
391	* 1 if second smaller.
392	*
393	*/
394	int str_cmp(const char s1, const char s2)
395	{
396	wchar_t c1 = 0;
397	wchar_t c2 = 0;
398
399	size_t off1 = 0;
400	size_t off2 = 0;
401
402	while (true) {
403	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
404	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
405
406	if (c1 < c2)
407	return -1;
408
409	if (c1 > c2)
410	return 1;
411
412	if (c1 == 0 \|\| c2 == 0)
413	break;
414	}
415
416	return 0;
417	}
418
419	/** Compare two NULL terminated strings with length limit.
420	*
421	* Do a char-by-char comparison of two NULL-terminated strings.
422	* The strings are considered equal iff they consist of the same
423	* characters on the minimum of their lengths and the length limit.
424	*
425	* @param s1 First string to compare.
426	* @param s2 Second string to compare.
427	* @param max_len Maximum number of characters to consider.
428	*
429	* @return 0 if the strings are equal, -1 if first is smaller,
430	* 1 if second smaller.
431	*
432	*/
433	int str_lcmp(const char s1, const char s2, size_t max_len)
434	{
435	wchar_t c1 = 0;
436	wchar_t c2 = 0;
437
438	size_t off1 = 0;
439	size_t off2 = 0;
440
441	size_t len = 0;
442
443	while (true) {
444	if (len >= max_len)
445	break;
446
447	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
448	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
449
450	if (c1 < c2)
451	return -1;
452
453	if (c1 > c2)
454	return 1;
455
456	if (c1 == 0 \|\| c2 == 0)
457	break;
458
459	++len;
460	}
461
462	return 0;
463
464	}
465
466	/** Copy string.
467	*
468	* Copy source string @a src to destination buffer @a dest.
469	* No more than @a size bytes are written. If the size of the output buffer
470	* is at least one byte, the output string will always be well-formed, i.e.
471	* null-terminated and containing only complete characters.
472	*
473	* @param dest Destination buffer.
474	* @param count Size of the destination buffer (must be > 0).
475	* @param src Source string.
476	*/
477	void str_cpy(char dest, size_t size, const char src)
478	{
479	/* There must be space for a null terminator in the buffer. */
480	assert(size > 0);
481
482	size_t src_off = 0;
483	size_t dest_off = 0;
484
485	wchar_t ch;
486	while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
487	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
488	break;
489	}
490
491	dest[dest_off] = '\0';
492	}
493
494	/** Copy size-limited substring.
495	*
496	* Copy prefix of string @a src of max. size @a size to destination buffer
497	* @a dest. No more than @a size bytes are written. The output string will
498	* always be well-formed, i.e. null-terminated and containing only complete
499	* characters.
500	*
501	* No more than @a n bytes are read from the input string, so it does not
502	* have to be null-terminated.
503	*
504	* @param dest Destination buffer.
505	* @param count Size of the destination buffer (must be > 0).
506	* @param src Source string.
507	* @param n Maximum number of bytes to read from @a src.
508	*/
509	void str_ncpy(char dest, size_t size, const char src, size_t n)
510	{
511	/* There must be space for a null terminator in the buffer. */
512	assert(size > 0);
513
514	size_t src_off = 0;
515	size_t dest_off = 0;
516
517	wchar_t ch;
518	while ((ch = str_decode(src, &src_off, n)) != 0) {
519	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
520	break;
521	}
522
523	dest[dest_off] = '\0';
524	}
525
526	/** Append one string to another.
527	*
528	* Append source string @a src to string in destination buffer @a dest.
529	* Size of the destination buffer is @a dest. If the size of the output buffer
530	* is at least one byte, the output string will always be well-formed, i.e.
531	* null-terminated and containing only complete characters.
532	*
533	* @param dest Destination buffer.
534	* @param count Size of the destination buffer.
535	* @param src Source string.
536	*/
537	void str_append(char dest, size_t size, const char src)
538	{
539	size_t dstr_size;
540
541	dstr_size = str_size(dest);
542	str_cpy(dest + dstr_size, size - dstr_size, src);
543	}
544
545	/** Convert wide string to string.
546	*
547	* Convert wide string @a src to string. The output is written to the buffer
548	* specified by @a dest and @a size. @a size must be non-zero and the string
549	* written will always be well-formed.
550	*
551	* @param dest Destination buffer.
552	* @param size Size of the destination buffer.
553	* @param src Source wide string.
554	*/
555	void wstr_to_str(char dest, size_t size, const wchar_t src)
556	{
557	wchar_t ch;
558	size_t src_idx;
559	size_t dest_off;
560
561	/* There must be space for a null terminator in the buffer. */
562	assert(size > 0);
563
564	src_idx = 0;
565	dest_off = 0;
566
567	while ((ch = src[src_idx++]) != 0) {
568	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
569	break;
570	}
571
572	dest[dest_off] = '\0';
573	}
574
575	/** Convert wide string to new string.
576	*
577	* Convert wide string @a src to string. Space for the new string is allocated
578	* on the heap.
579	*
580	* @param src Source wide string.
581	* @return New string.
582	*/
583	char wstr_to_astr(const wchar_t src)
584	{
585	char dbuf[STR_BOUNDS(1)];
586	char *str;
587	wchar_t ch;
588
589	size_t src_idx;
590	size_t dest_off;
591	size_t dest_size;
592
593	/* Compute size of encoded string. */
594
595	src_idx = 0;
596	dest_size = 0;
597
598	while ((ch = src[src_idx++]) != 0) {
599	dest_off = 0;
600	if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
601	break;
602	dest_size += dest_off;
603	}
604
605	str = malloc(dest_size + 1);
606	if (str == NULL)
607	return NULL;
608
609	/* Encode string. */
610
611	src_idx = 0;
612	dest_off = 0;
613
614	while ((ch = src[src_idx++]) != 0) {
615	if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
616	break;
617	}
618
619	str[dest_size] = '\0';
620	return str;
621	}
622
623
624	/** Convert string to wide string.
625	*
626	* Convert string @a src to wide string. The output is written to the
627	* buffer specified by @a dest and @a dlen. @a dlen must be non-zero
628	* and the wide string written will always be null-terminated.
629	*
630	* @param dest Destination buffer.
631	* @param dlen Length of destination buffer (number of wchars).
632	* @param src Source string.
633	*/
634	void str_to_wstr(wchar_t dest, size_t dlen, const char src)
635	{
636	size_t offset;
637	size_t di;
638	wchar_t c;
639
640	assert(dlen > 0);
641
642	offset = 0;
643	di = 0;
644
645	do {
646	if (di >= dlen - 1)
647	break;
648
649	c = str_decode(src, &offset, STR_NO_LIMIT);
650	dest[di++] = c;
651	} while (c != '\0');
652
653	dest[dlen - 1] = '\0';
654	}
655
656	/** Find first occurence of character in string.
657	*
658	* @param str String to search.
659	* @param ch Character to look for.
660	*
661	* @return Pointer to character in @a str or NULL if not found.
662	*/
663	char str_chr(const char str, wchar_t ch)
664	{
665	wchar_t acc;
666	size_t off = 0;
667	size_t last = 0;
668
669	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
670	if (acc == ch)
671	return (char *) (str + last);
672	last = off;
673	}
674
675	return NULL;
676	}
677
678	/** Find last occurence of character in string.
679	*
680	* @param str String to search.
681	* @param ch Character to look for.
682	*
683	* @return Pointer to character in @a str or NULL if not found.
684	*/
685	char str_rchr(const char str, wchar_t ch)
686	{
687	wchar_t acc;
688	size_t off = 0;
689	size_t last = 0;
690	const char *res = NULL;
691
692	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
693	if (acc == ch)
694	res = (str + last);
695	last = off;
696	}
697
698	return (char *) res;
699	}
700
701	/** Insert a wide character into a wide string.
702	*
703	* Insert a wide character into a wide string at position
704	* @a pos. The characters after the position are shifted.
705	*
706	* @param str String to insert to.
707	* @param ch Character to insert to.
708	* @param pos Character index where to insert.
709	@ @param max_pos Characters in the buffer.
710	*
711	* @return True if the insertion was sucessful, false if the position
712	* is out of bounds.
713	*
714	*/
715	bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
716	{
717	size_t len = wstr_length(str);
718
719	if ((pos > len) \|\| (pos + 1 > max_pos))
720	return false;
721
722	size_t i;
723	for (i = len; i + 1 > pos; i--)
724	str[i + 1] = str[i];
725
726	str[pos] = ch;
727
728	return true;
729	}
730
731	/** Remove a wide character from a wide string.
732	*
733	* Remove a wide character from a wide string at position
734	* @a pos. The characters after the position are shifted.
735	*
736	* @param str String to remove from.
737	* @param pos Character index to remove.
738	*
739	* @return True if the removal was sucessful, false if the position
740	* is out of bounds.
741	*
742	*/
743	bool wstr_remove(wchar_t *str, size_t pos)
744	{
745	size_t len = wstr_length(str);
746
747	if (pos >= len)
748	return false;
749
750	size_t i;
751	for (i = pos + 1; i <= len; i++)
752	str[i - 1] = str[i];
753
754	return true;
755	}
756
757	int stricmp(const char a, const char b)
758	{
759	int c = 0;
760
761	while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
762	c++;
763
764	return (tolower(a[c]) - tolower(b[c]));
765	}
766
767	/** Convert string to a number.
768	* Core of strtol and strtoul functions.
769	*
770	* @param nptr Pointer to string.
771	* @param endptr If not NULL, function stores here pointer to the first
772	* invalid character.
773	* @param base Zero or number between 2 and 36 inclusive.
774	* @param sgn It's set to 1 if minus found.
775	* @return Result of conversion.
776	*/
777	static unsigned long
778	_strtoul(const char nptr, char endptr, int base, char sgn)
779	{
780	unsigned char c;
781	unsigned long result = 0;
782	unsigned long a, b;
783	const char *str = nptr;
784	const char *tmpptr;
785
786	while (isspace(*str))
787	str++;
788
789	if (*str == '-') {
790	*sgn = 1;
791	++str;
792	} else if (*str == '+')
793	++str;
794
795	if (base) {
796	if ((base == 1) \|\| (base > 36)) {
797	/* FIXME: set errno to EINVAL */
798	return 0;
799	}
800	if ((base == 16) && (*str == '0') && ((str[1] == 'x') \|\|
801	(str[1] == 'X'))) {
802	str += 2;
803	}
804	} else {
805	base = 10;
806
807	if (*str == '0') {
808	base = 8;
809	if ((str[1] == 'X') \|\| (str[1] == 'x')) {
810	base = 16;
811	str += 2;
812	}
813	}
814	}
815
816	tmpptr = str;
817
818	while (*str) {
819	c = *str;
820	c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
821	(c <= '9' ? c - '0' : 0xff)));
822	if (c > base) {
823	break;
824	}
825
826	a = (result & 0xff) * base + c;
827	b = (result >> 8) * base + (a >> 8);
828
829	if (b > (ULONG_MAX >> 8)) {
830	/* overflow */
831	/* FIXME: errno = ERANGE*/
832	return ULONG_MAX;
833	}
834
835	result = (b << 8) + (a & 0xff);
836	++str;
837	}
838
839	if (str == tmpptr) {
840	/*
841	* No number was found => first invalid character is the first
842	* character of the string.
843	*/
844	/* FIXME: set errno to EINVAL */
845	str = nptr;
846	result = 0;
847	}
848
849	if (endptr)
850	endptr = (char ) str;
851
852	if (nptr == str) {
853	/FIXME: errno = EINVAL/
854	return 0;
855	}
856
857	return result;
858	}
859
860	/** Convert initial part of string to long int according to given base.
861	* The number may begin with an arbitrary number of whitespaces followed by
862	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
863	* inserted and the number will be taken as hexadecimal one. If the base is 0
864	* and the number begin with a zero, number will be taken as octal one (as with
865	* base 8). Otherwise the base 0 is taken as decimal.
866	*
867	* @param nptr Pointer to string.
868	* @param endptr If not NULL, function stores here pointer to the first
869	* invalid character.
870	* @param base Zero or number between 2 and 36 inclusive.
871	* @return Result of conversion.
872	*/
873	long int strtol(const char nptr, char *endptr, int base)
874	{
875	char sgn = 0;
876	unsigned long number = 0;
877
878	number = _strtoul(nptr, endptr, base, &sgn);
879
880	if (number > LONG_MAX) {
881	if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
882	/* FIXME: set 0 to errno */
883	return number;
884	}
885	/* FIXME: set ERANGE to errno */
886	return (sgn ? LONG_MIN : LONG_MAX);
887	}
888
889	return (sgn ? -number : number);
890	}
891
892	/** Duplicate string.
893	*
894	* Allocate a new string and copy characters from the source
895	* string into it. The duplicate string is allocated via sleeping
896	* malloc(), thus this function can sleep in no memory conditions.
897	*
898	* The allocation cannot fail and the return value is always
899	* a valid pointer. The duplicate string is always a well-formed
900	* null-terminated UTF-8 string, but it can differ from the source
901	* string on the byte level.
902	*
903	* @param src Source string.
904	*
905	* @return Duplicate string.
906	*
907	*/
908	char str_dup(const char src)
909	{
910	size_t size = str_size(src) + 1;
911	char dest = (char ) malloc(size);
912	if (dest == NULL)
913	return (char *) NULL;
914
915	str_cpy(dest, size, src);
916	return dest;
917	}
918
919	/** Duplicate string with size limit.
920	*
921	* Allocate a new string and copy up to @max_size bytes from the source
922	* string into it. The duplicate string is allocated via sleeping
923	* malloc(), thus this function can sleep in no memory conditions.
924	* No more than @max_size + 1 bytes is allocated, but if the size
925	* occupied by the source string is smaller than @max_size + 1,
926	* less is allocated.
927	*
928	* The allocation cannot fail and the return value is always
929	* a valid pointer. The duplicate string is always a well-formed
930	* null-terminated UTF-8 string, but it can differ from the source
931	* string on the byte level.
932	*
933	* @param src Source string.
934	* @param n Maximum number of bytes to duplicate.
935	*
936	* @return Duplicate string.
937	*
938	*/
939	char str_ndup(const char src, size_t n)
940	{
941	size_t size = str_size(src);
942	if (size > n)
943	size = n;
944
945	char dest = (char ) malloc(size + 1);
946	if (dest == NULL)
947	return (char *) NULL;
948
949	str_ncpy(dest, size + 1, src, size);
950	return dest;
951	}
952
953
954	/** Convert initial part of string to unsigned long according to given base.
955	* The number may begin with an arbitrary number of whitespaces followed by
956	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
957	* inserted and the number will be taken as hexadecimal one. If the base is 0
958	* and the number begin with a zero, number will be taken as octal one (as with
959	* base 8). Otherwise the base 0 is taken as decimal.
960	*
961	* @param nptr Pointer to string.
962	* @param endptr If not NULL, function stores here pointer to the first
963	* invalid character
964	* @param base Zero or number between 2 and 36 inclusive.
965	* @return Result of conversion.
966	*/
967	unsigned long strtoul(const char nptr, char *endptr, int base)
968	{
969	char sgn = 0;
970	unsigned long number = 0;
971
972	number = _strtoul(nptr, endptr, base, &sgn);
973
974	return (sgn ? -number : number);
975	}
976
977	char strtok(char s, const char *delim)
978	{
979	static char *next;
980
981	return strtok_r(s, delim, &next);
982	}
983
984	char strtok_r(char s, const char delim, char *next)
985	{
986	char start, end;
987
988	if (s == NULL)
989	s = *next;
990
991	/* Skip over leading delimiters. */
992	while (s && (str_chr(delim, s) != NULL)) ++s;
993	start = s;
994
995	/* Skip over token characters. */
996	while (s && (str_chr(delim, s) == NULL)) ++s;
997	end = s;
998	next = (s ? s + 1 : s);
999
1000	if (start == end) {
1001	return NULL; /* No more tokens. */
1002	}
1003
1004	/* Overwrite delimiter with NULL terminator. */
1005	*end = '\0';
1006	return start;
1007	}
1008
1009	/** Convert string to uint64_t (internal variant).
1010	*
1011	* @param nptr Pointer to string.
1012	* @param endptr Pointer to the first invalid character is stored here.
1013	* @param base Zero or number between 2 and 36 inclusive.
1014	* @param neg Indication of unary minus is stored here.
1015	* @apram result Result of the conversion.
1016	*
1017	* @return EOK if conversion was successful.
1018	*
1019	*/
1020	static int str_uint(const char nptr, char *endptr, unsigned int base,
1021	bool neg, uint64_t result)
1022	{
1023	assert(endptr != NULL);
1024	assert(neg != NULL);
1025	assert(result != NULL);
1026
1027	*neg = false;
1028	const char *str = nptr;
1029
1030	/* Ignore leading whitespace */
1031	while (isspace(*str))
1032	str++;
1033
1034	if (*str == '-') {
1035	*neg = true;
1036	str++;
1037	} else if (*str == '+')
1038	str++;
1039
1040	if (base == 0) {
1041	/* Decode base if not specified */
1042	base = 10;
1043
1044	if (*str == '0') {
1045	base = 8;
1046	str++;
1047
1048	switch (*str) {
1049	case 'b':
1050	case 'B':
1051	base = 2;
1052	str++;
1053	break;
1054	case 'o':
1055	case 'O':
1056	base = 8;
1057	str++;
1058	break;
1059	case 'd':
1060	case 'D':
1061	case 't':
1062	case 'T':
1063	base = 10;
1064	str++;
1065	break;
1066	case 'x':
1067	case 'X':
1068	base = 16;
1069	str++;
1070	break;
1071	default:
1072	str--;
1073	}
1074	}
1075	} else {
1076	/* Check base range */
1077	if ((base < 2) \|\| (base > 36)) {
1078	endptr = (char ) str;
1079	return EINVAL;
1080	}
1081	}
1082
1083	*result = 0;
1084	const char *startstr = str;
1085
1086	while (*str != 0) {
1087	unsigned int digit;
1088
1089	if ((str >= 'a') && (str <= 'z'))
1090	digit = *str - 'a' + 10;
1091	else if ((str >= 'A') && (str <= 'Z'))
1092	digit = *str - 'A' + 10;
1093	else if ((str >= '0') && (str <= '9'))
1094	digit = *str - '0';
1095	else
1096	break;
1097
1098	if (digit >= base)
1099	break;
1100
1101	uint64_t prev = *result;
1102	result = (result) * base + digit;
1103
1104	if (*result < prev) {
1105	/* Overflow */
1106	endptr = (char ) str;
1107	return EOVERFLOW;
1108	}
1109
1110	str++;
1111	}
1112
1113	if (str == startstr) {
1114	/*
1115	* No digits were decoded => first invalid character is
1116	* the first character of the string.
1117	*/
1118	str = nptr;
1119	}
1120
1121	endptr = (char ) str;
1122
1123	if (str == nptr)
1124	return EINVAL;
1125
1126	return EOK;
1127	}
1128
1129	/** Convert string to uint64_t.
1130	*
1131	* @param nptr Pointer to string.
1132	* @param endptr If not NULL, pointer to the first invalid character
1133	* is stored here.
1134	* @param base Zero or number between 2 and 36 inclusive.
1135	* @param strict Do not allow any trailing characters.
1136	* @param result Result of the conversion.
1137	*
1138	* @return EOK if conversion was successful.
1139	*
1140	*/
1141	int str_uint64(const char nptr, char *endptr, unsigned int base,
1142	bool strict, uint64_t *result)
1143	{
1144	assert(result != NULL);
1145
1146	bool neg;
1147	char *lendptr;
1148	int ret = str_uint(nptr, &lendptr, base, &neg, result);
1149
1150	if (endptr != NULL)
1151	endptr = (char ) lendptr;
1152
1153	if (ret != EOK)
1154	return ret;
1155
1156	/* Do not allow negative values */
1157	if (neg)
1158	return EINVAL;
1159
1160	/* Check whether we are at the end of
1161	the string in strict mode */
1162	if ((strict) && (*lendptr != 0))
1163	return EINVAL;
1164
1165	return EOK;
1166	}
1167
1168	/** Convert string to size_t.
1169	*
1170	* @param nptr Pointer to string.
1171	* @param endptr If not NULL, pointer to the first invalid character
1172	* is stored here.
1173	* @param base Zero or number between 2 and 36 inclusive.
1174	* @param strict Do not allow any trailing characters.
1175	* @param result Result of the conversion.
1176	*
1177	* @return EOK if conversion was successful.
1178	*
1179	*/
1180	int str_size_t(const char nptr, char *endptr, unsigned int base,
1181	bool strict, size_t *result)
1182	{
1183	assert(result != NULL);
1184
1185	bool neg;
1186	char *lendptr;
1187	uint64_t res;
1188	int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1189
1190	if (endptr != NULL)
1191	endptr = (char ) lendptr;
1192
1193	if (ret != EOK)
1194	return ret;
1195
1196	/* Do not allow negative values */
1197	if (neg)
1198	return EINVAL;
1199
1200	/* Check whether we are at the end of
1201	the string in strict mode */
1202	if ((strict) && (*lendptr != 0))
1203	return EINVAL;
1204
1205	/* Check for overflow */
1206	size_t _res = (size_t) res;
1207	if (_res != res)
1208	return EOVERFLOW;
1209
1210	*result = _res;
1211
1212	return EOK;
1213	}
1214
1215	void order_suffix(const uint64_t val, uint64_t rv, char suffix)
1216	{
1217	if (val > UINT64_C(10000000000000000000)) {
1218	*rv = val / UINT64_C(1000000000000000000);
1219	*suffix = 'Z';
1220	} else if (val > UINT64_C(1000000000000000000)) {
1221	*rv = val / UINT64_C(1000000000000000);
1222	*suffix = 'E';
1223	} else if (val > UINT64_C(1000000000000000)) {
1224	*rv = val / UINT64_C(1000000000000);
1225	*suffix = 'T';
1226	} else if (val > UINT64_C(1000000000000)) {
1227	*rv = val / UINT64_C(1000000000);
1228	*suffix = 'G';
1229	} else if (val > UINT64_C(1000000000)) {
1230	*rv = val / UINT64_C(1000000);
1231	*suffix = 'M';
1232	} else if (val > UINT64_C(1000000)) {
1233	*rv = val / UINT64_C(1000);
1234	*suffix = 'k';
1235	} else {
1236	*rv = val;
1237	*suffix = ' ';
1238	}
1239	}
1240
1241	void bin_order_suffix(const uint64_t val, uint64_t rv, const char *suffix,
1242	bool fixed)
1243	{
1244	if (val > UINT64_C(1152921504606846976)) {
1245	*rv = val / UINT64_C(1125899906842624);
1246	*suffix = "EiB";
1247	} else if (val > UINT64_C(1125899906842624)) {
1248	*rv = val / UINT64_C(1099511627776);
1249	*suffix = "TiB";
1250	} else if (val > UINT64_C(1099511627776)) {
1251	*rv = val / UINT64_C(1073741824);
1252	*suffix = "GiB";
1253	} else if (val > UINT64_C(1073741824)) {
1254	*rv = val / UINT64_C(1048576);
1255	*suffix = "MiB";
1256	} else if (val > UINT64_C(1048576)) {
1257	*rv = val / UINT64_C(1024);
1258	*suffix = "KiB";
1259	} else {
1260	*rv = val;
1261	if (fixed)
1262	*suffix = "B ";
1263	else
1264	*suffix = "B";
1265	}
1266	}
1267
1268	/** @}
1269	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: