Context Navigation

str.c@ 2e839dda

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 2e839dda was 2e839dda, checked in by Oleg Romanenko <romanenko.oleg@…>, 14 years ago

New functions for string library:

Lookup for character in wide string

wstr_chr
wstr_rchr

Convert size_t to string: size_t_str
Reverse string: str_reverse

Property mode set to 100644

File size: 30.5 KB

Line
1	/*
2	* Copyright (c) 2005 Martin Decky
3	* Copyright (c) 2008 Jiri Svoboda
4	* All rights reserved.
5	*
6	* Redistribution and use in source and binary forms, with or without
7	* modification, are permitted provided that the following conditions
8	* are met:
9	*
10	* - Redistributions of source code must retain the above copyright
11	* notice, this list of conditions and the following disclaimer.
12	* - Redistributions in binary form must reproduce the above copyright
13	* notice, this list of conditions and the following disclaimer in the
14	* documentation and/or other materials provided with the distribution.
15	* - The name of the author may not be used to endorse or promote products
16	* derived from this software without specific prior written permission.
17	*
18	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28	*/
29
30	/** @addtogroup libc
31	* @{
32	*/
33	/** @file
34	*/
35
36	#include <str.h>
37	#include <stdlib.h>
38	#include <assert.h>
39	#include <stdint.h>
40	#include <ctype.h>
41	#include <malloc.h>
42	#include <errno.h>
43	#include <align.h>
44	#include <mem.h>
45	#include <str.h>
46
47	/** Byte mask consisting of lowest @n bits (out of 8) */
48	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
49
50	/** Byte mask consisting of lowest @n bits (out of 32) */
51	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
52
53	/** Byte mask consisting of highest @n bits (out of 8) */
54	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
55
56	/** Number of data bits in a UTF-8 continuation byte */
57	#define CONT_BITS 6
58
59	/** Decode a single character from a string.
60	*
61	* Decode a single character from a string of size @a size. Decoding starts
62	* at @a offset and this offset is moved to the beginning of the next
63	* character. In case of decoding error, offset generally advances at least
64	* by one. However, offset is never moved beyond size.
65	*
66	* @param str String (not necessarily NULL-terminated).
67	* @param offset Byte offset in string where to start decoding.
68	* @param size Size of the string (in bytes).
69	*
70	* @return Value of decoded character, U_SPECIAL on decoding error or
71	* NULL if attempt to decode beyond @a size.
72	*
73	*/
74	wchar_t str_decode(const char str, size_t offset, size_t size)
75	{
76	if (*offset + 1 > size)
77	return 0;
78
79	/* First byte read from string */
80	uint8_t b0 = (uint8_t) str[(*offset)++];
81
82	/* Determine code length */
83
84	unsigned int b0_bits; /* Data bits in first byte */
85	unsigned int cbytes; /* Number of continuation bytes */
86
87	if ((b0 & 0x80) == 0) {
88	/* 0xxxxxxx (Plain ASCII) */
89	b0_bits = 7;
90	cbytes = 0;
91	} else if ((b0 & 0xe0) == 0xc0) {
92	/* 110xxxxx 10xxxxxx */
93	b0_bits = 5;
94	cbytes = 1;
95	} else if ((b0 & 0xf0) == 0xe0) {
96	/* 1110xxxx 10xxxxxx 10xxxxxx */
97	b0_bits = 4;
98	cbytes = 2;
99	} else if ((b0 & 0xf8) == 0xf0) {
100	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
101	b0_bits = 3;
102	cbytes = 3;
103	} else {
104	/* 10xxxxxx -- unexpected continuation byte */
105	return U_SPECIAL;
106	}
107
108	if (*offset + cbytes > size)
109	return U_SPECIAL;
110
111	wchar_t ch = b0 & LO_MASK_8(b0_bits);
112
113	/* Decode continuation bytes */
114	while (cbytes > 0) {
115	uint8_t b = (uint8_t) str[(*offset)++];
116
117	/* Must be 10xxxxxx */
118	if ((b & 0xc0) != 0x80)
119	return U_SPECIAL;
120
121	/* Shift data bits to ch */
122	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
123	cbytes--;
124	}
125
126	return ch;
127	}
128
129	/** Encode a single character to string representation.
130	*
131	* Encode a single character to string representation (i.e. UTF-8) and store
132	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
133	* is moved to the position where the next character can be written to.
134	*
135	* @param ch Input character.
136	* @param str Output buffer.
137	* @param offset Byte offset where to start writing.
138	* @param size Size of the output buffer (in bytes).
139	*
140	* @return EOK if the character was encoded successfully, EOVERFLOW if there
141	* was not enough space in the output buffer or EINVAL if the character
142	* code was invalid.
143	*/
144	int chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
145	{
146	if (*offset >= size)
147	return EOVERFLOW;
148
149	if (!chr_check(ch))
150	return EINVAL;
151
152	/* Unsigned version of ch (bit operations should only be done
153	on unsigned types). */
154	uint32_t cc = (uint32_t) ch;
155
156	/* Determine how many continuation bytes are needed */
157
158	unsigned int b0_bits; /* Data bits in first byte */
159	unsigned int cbytes; /* Number of continuation bytes */
160
161	if ((cc & ~LO_MASK_32(7)) == 0) {
162	b0_bits = 7;
163	cbytes = 0;
164	} else if ((cc & ~LO_MASK_32(11)) == 0) {
165	b0_bits = 5;
166	cbytes = 1;
167	} else if ((cc & ~LO_MASK_32(16)) == 0) {
168	b0_bits = 4;
169	cbytes = 2;
170	} else if ((cc & ~LO_MASK_32(21)) == 0) {
171	b0_bits = 3;
172	cbytes = 3;
173	} else {
174	/* Codes longer than 21 bits are not supported */
175	return EINVAL;
176	}
177
178	/* Check for available space in buffer */
179	if (*offset + cbytes >= size)
180	return EOVERFLOW;
181
182	/* Encode continuation bytes */
183	unsigned int i;
184	for (i = cbytes; i > 0; i--) {
185	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
186	cc = cc >> CONT_BITS;
187	}
188
189	/* Encode first byte */
190	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
191
192	/* Advance offset */
193	*offset += cbytes + 1;
194
195	return EOK;
196	}
197
198	/** Get size of string.
199	*
200	* Get the number of bytes which are used by the string @a str (excluding the
201	* NULL-terminator).
202	*
203	* @param str String to consider.
204	*
205	* @return Number of bytes used by the string
206	*
207	*/
208	size_t str_size(const char *str)
209	{
210	size_t size = 0;
211
212	while (*str++ != 0)
213	size++;
214
215	return size;
216	}
217
218	/** Get size of wide string.
219	*
220	* Get the number of bytes which are used by the wide string @a str (excluding the
221	* NULL-terminator).
222	*
223	* @param str Wide string to consider.
224	*
225	* @return Number of bytes used by the wide string
226	*
227	*/
228	size_t wstr_size(const wchar_t *str)
229	{
230	return (wstr_length(str) * sizeof(wchar_t));
231	}
232
233	/** Get size of string with length limit.
234	*
235	* Get the number of bytes which are used by up to @a max_len first
236	* characters in the string @a str. If @a max_len is greater than
237	* the length of @a str, the entire string is measured (excluding the
238	* NULL-terminator).
239	*
240	* @param str String to consider.
241	* @param max_len Maximum number of characters to measure.
242	*
243	* @return Number of bytes used by the characters.
244	*
245	*/
246	size_t str_lsize(const char *str, size_t max_len)
247	{
248	size_t len = 0;
249	size_t offset = 0;
250
251	while (len < max_len) {
252	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
253	break;
254
255	len++;
256	}
257
258	return offset;
259	}
260
261	/** Get size of wide string with length limit.
262	*
263	* Get the number of bytes which are used by up to @a max_len first
264	* wide characters in the wide string @a str. If @a max_len is greater than
265	* the length of @a str, the entire wide string is measured (excluding the
266	* NULL-terminator).
267	*
268	* @param str Wide string to consider.
269	* @param max_len Maximum number of wide characters to measure.
270	*
271	* @return Number of bytes used by the wide characters.
272	*
273	*/
274	size_t wstr_lsize(const wchar_t *str, size_t max_len)
275	{
276	return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
277	}
278
279	/** Get number of characters in a string.
280	*
281	* @param str NULL-terminated string.
282	*
283	* @return Number of characters in string.
284	*
285	*/
286	size_t str_length(const char *str)
287	{
288	size_t len = 0;
289	size_t offset = 0;
290
291	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
292	len++;
293
294	return len;
295	}
296
297	/** Get number of characters in a wide string.
298	*
299	* @param str NULL-terminated wide string.
300	*
301	* @return Number of characters in @a str.
302	*
303	*/
304	size_t wstr_length(const wchar_t *wstr)
305	{
306	size_t len = 0;
307
308	while (*wstr++ != 0)
309	len++;
310
311	return len;
312	}
313
314	/** Get number of characters in a string with size limit.
315	*
316	* @param str NULL-terminated string.
317	* @param size Maximum number of bytes to consider.
318	*
319	* @return Number of characters in string.
320	*
321	*/
322	size_t str_nlength(const char *str, size_t size)
323	{
324	size_t len = 0;
325	size_t offset = 0;
326
327	while (str_decode(str, &offset, size) != 0)
328	len++;
329
330	return len;
331	}
332
333	/** Get number of characters in a string with size limit.
334	*
335	* @param str NULL-terminated string.
336	* @param size Maximum number of bytes to consider.
337	*
338	* @return Number of characters in string.
339	*
340	*/
341	size_t wstr_nlength(const wchar_t *str, size_t size)
342	{
343	size_t len = 0;
344	size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
345	size_t offset = 0;
346
347	while ((offset < limit) && (*str++ != 0)) {
348	len++;
349	offset += sizeof(wchar_t);
350	}
351
352	return len;
353	}
354
355	/** Check whether character is plain ASCII.
356	*
357	* @return True if character is plain ASCII.
358	*
359	*/
360	bool ascii_check(wchar_t ch)
361	{
362	if ((ch >= 0) && (ch <= 127))
363	return true;
364
365	return false;
366	}
367
368	/** Check whether wide string is plain ASCII.
369	*
370	* @return True if wide string is plain ASCII.
371	*
372	*/
373	bool wstr_is_ascii(const wchar_t *wstr)
374	{
375	while (wstr && ascii_check(wstr))
376	wstr++;
377	return *wstr == 0;
378	}
379
380	/** Check whether character is valid
381	*
382	* @return True if character is a valid Unicode code point.
383	*
384	*/
385	bool chr_check(wchar_t ch)
386	{
387	if ((ch >= 0) && (ch <= 1114111))
388	return true;
389
390	return false;
391	}
392
393	/** Compare two NULL terminated strings.
394	*
395	* Do a char-by-char comparison of two NULL-terminated strings.
396	* The strings are considered equal iff they consist of the same
397	* characters on the minimum of their lengths.
398	*
399	* @param s1 First string to compare.
400	* @param s2 Second string to compare.
401	*
402	* @return 0 if the strings are equal, -1 if first is smaller,
403	* 1 if second smaller.
404	*
405	*/
406	int str_cmp(const char s1, const char s2)
407	{
408	wchar_t c1 = 0;
409	wchar_t c2 = 0;
410
411	size_t off1 = 0;
412	size_t off2 = 0;
413
414	while (true) {
415	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
416	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
417
418	if (c1 < c2)
419	return -1;
420
421	if (c1 > c2)
422	return 1;
423
424	if (c1 == 0 \|\| c2 == 0)
425	break;
426	}
427
428	return 0;
429	}
430
431	/** Compare two NULL terminated strings with length limit.
432	*
433	* Do a char-by-char comparison of two NULL-terminated strings.
434	* The strings are considered equal iff they consist of the same
435	* characters on the minimum of their lengths and the length limit.
436	*
437	* @param s1 First string to compare.
438	* @param s2 Second string to compare.
439	* @param max_len Maximum number of characters to consider.
440	*
441	* @return 0 if the strings are equal, -1 if first is smaller,
442	* 1 if second smaller.
443	*
444	*/
445	int str_lcmp(const char s1, const char s2, size_t max_len)
446	{
447	wchar_t c1 = 0;
448	wchar_t c2 = 0;
449
450	size_t off1 = 0;
451	size_t off2 = 0;
452
453	size_t len = 0;
454
455	while (true) {
456	if (len >= max_len)
457	break;
458
459	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
460	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
461
462	if (c1 < c2)
463	return -1;
464
465	if (c1 > c2)
466	return 1;
467
468	if (c1 == 0 \|\| c2 == 0)
469	break;
470
471	++len;
472	}
473
474	return 0;
475
476	}
477
478	/** Copy string.
479	*
480	* Copy source string @a src to destination buffer @a dest.
481	* No more than @a size bytes are written. If the size of the output buffer
482	* is at least one byte, the output string will always be well-formed, i.e.
483	* null-terminated and containing only complete characters.
484	*
485	* @param dest Destination buffer.
486	* @param count Size of the destination buffer (must be > 0).
487	* @param src Source string.
488	*/
489	void str_cpy(char dest, size_t size, const char src)
490	{
491	/* There must be space for a null terminator in the buffer. */
492	assert(size > 0);
493
494	size_t src_off = 0;
495	size_t dest_off = 0;
496
497	wchar_t ch;
498	while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
499	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
500	break;
501	}
502
503	dest[dest_off] = '\0';
504	}
505
506	/** Copy size-limited substring.
507	*
508	* Copy prefix of string @a src of max. size @a size to destination buffer
509	* @a dest. No more than @a size bytes are written. The output string will
510	* always be well-formed, i.e. null-terminated and containing only complete
511	* characters.
512	*
513	* No more than @a n bytes are read from the input string, so it does not
514	* have to be null-terminated.
515	*
516	* @param dest Destination buffer.
517	* @param count Size of the destination buffer (must be > 0).
518	* @param src Source string.
519	* @param n Maximum number of bytes to read from @a src.
520	*/
521	void str_ncpy(char dest, size_t size, const char src, size_t n)
522	{
523	/* There must be space for a null terminator in the buffer. */
524	assert(size > 0);
525
526	size_t src_off = 0;
527	size_t dest_off = 0;
528
529	wchar_t ch;
530	while ((ch = str_decode(src, &src_off, n)) != 0) {
531	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
532	break;
533	}
534
535	dest[dest_off] = '\0';
536	}
537
538	/** Append one string to another.
539	*
540	* Append source string @a src to string in destination buffer @a dest.
541	* Size of the destination buffer is @a dest. If the size of the output buffer
542	* is at least one byte, the output string will always be well-formed, i.e.
543	* null-terminated and containing only complete characters.
544	*
545	* @param dest Destination buffer.
546	* @param count Size of the destination buffer.
547	* @param src Source string.
548	*/
549	void str_append(char dest, size_t size, const char src)
550	{
551	size_t dstr_size;
552
553	dstr_size = str_size(dest);
554	str_cpy(dest + dstr_size, size - dstr_size, src);
555	}
556
557	/** Convert wide string to string.
558	*
559	* Convert wide string @a src to string. The output is written to the buffer
560	* specified by @a dest and @a size. @a size must be non-zero and the string
561	* written will always be well-formed.
562	*
563	* @param dest Destination buffer.
564	* @param size Size of the destination buffer.
565	* @param src Source wide string.
566	*
567	* @return EOK, if success, negative otherwise.
568	*/
569	int wstr_to_str(char dest, size_t size, const wchar_t src)
570	{
571	int rc;
572	wchar_t ch;
573	size_t src_idx;
574	size_t dest_off;
575
576	/* There must be space for a null terminator in the buffer. */
577	assert(size > 0);
578
579	src_idx = 0;
580	dest_off = 0;
581
582	while ((ch = src[src_idx++]) != 0) {
583	rc = chr_encode(ch, dest, &dest_off, size - 1);
584	if (rc != EOK)
585	break;
586	}
587
588	dest[dest_off] = '\0';
589	return rc;
590	}
591
592	/** Convert wide string to new string.
593	*
594	* Convert wide string @a src to string. Space for the new string is allocated
595	* on the heap.
596	*
597	* @param src Source wide string.
598	* @return New string.
599	*/
600	char wstr_to_astr(const wchar_t src)
601	{
602	char dbuf[STR_BOUNDS(1)];
603	char *str;
604	wchar_t ch;
605
606	size_t src_idx;
607	size_t dest_off;
608	size_t dest_size;
609
610	/* Compute size of encoded string. */
611
612	src_idx = 0;
613	dest_size = 0;
614
615	while ((ch = src[src_idx++]) != 0) {
616	dest_off = 0;
617	if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
618	break;
619	dest_size += dest_off;
620	}
621
622	str = malloc(dest_size + 1);
623	if (str == NULL)
624	return NULL;
625
626	/* Encode string. */
627
628	src_idx = 0;
629	dest_off = 0;
630
631	while ((ch = src[src_idx++]) != 0) {
632	if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
633	break;
634	}
635
636	str[dest_size] = '\0';
637	return str;
638	}
639
640
641	/** Convert string to wide string.
642	*
643	* Convert string @a src to wide string. The output is written to the
644	* buffer specified by @a dest and @a dlen. @a dlen must be non-zero
645	* and the wide string written will always be null-terminated.
646	*
647	* @param dest Destination buffer.
648	* @param dlen Length of destination buffer (number of wchars).
649	* @param src Source string.
650	*
651	* @return EOK, if success, negative otherwise.
652	*/
653	int str_to_wstr(wchar_t dest, size_t dlen, const char src)
654	{
655	int rc=EOK;
656	size_t offset;
657	size_t di;
658	wchar_t c;
659
660	assert(dlen > 0);
661
662	offset = 0;
663	di = 0;
664
665	do {
666	if (di >= dlen - 1) {
667	rc = EOVERFLOW;
668	break;
669	}
670
671	c = str_decode(src, &offset, STR_NO_LIMIT);
672	dest[di++] = c;
673	} while (c != '\0');
674
675	dest[dlen - 1] = '\0';
676	return rc;
677	}
678
679	/** Find first occurence of character in string.
680	*
681	* @param str String to search.
682	* @param ch Character to look for.
683	*
684	* @return Pointer to character in @a str or NULL if not found.
685	*/
686	char str_chr(const char str, wchar_t ch)
687	{
688	wchar_t acc;
689	size_t off = 0;
690	size_t last = 0;
691
692	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
693	if (acc == ch)
694	return (char *) (str + last);
695	last = off;
696	}
697
698	return NULL;
699	}
700
701	/** Find last occurence of character in string.
702	*
703	* @param str String to search.
704	* @param ch Character to look for.
705	*
706	* @return Pointer to character in @a str or NULL if not found.
707	*/
708	char str_rchr(const char str, wchar_t ch)
709	{
710	wchar_t acc;
711	size_t off = 0;
712	size_t last = 0;
713	const char *res = NULL;
714
715	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
716	if (acc == ch)
717	res = (str + last);
718	last = off;
719	}
720
721	return (char *) res;
722	}
723
724	/** Find first occurence of character in wide string.
725	*
726	* @param wstr String to search.
727	* @param ch Character to look for.
728	*
729	* @return Pointer to character in @a wstr or NULL if not found.
730	*/
731	wchar_t wstr_chr(const wchar_t wstr, wchar_t ch)
732	{
733	while (wstr && wstr != ch)
734	wstr++;
735	if (*wstr)
736	return (wchar_t *) wstr;
737	else
738	return NULL;
739	}
740
741	/** Find last occurence of character in wide string.
742	*
743	* @param wstr String to search.
744	* @param ch Character to look for.
745	*
746	* @return Pointer to character in @a wstr or NULL if not found.
747	*/
748	wchar_t wstr_rchr(const wchar_t wstr, wchar_t ch)
749	{
750	const wchar_t *res = NULL;
751	while (*wstr) {
752	if (*wstr == ch)
753	res = wstr;
754	wstr++;
755	}
756	return (wchar_t *) res;
757	}
758
759	/** Insert a wide character into a wide string.
760	*
761	* Insert a wide character into a wide string at position
762	* @a pos. The characters after the position are shifted.
763	*
764	* @param str String to insert to.
765	* @param ch Character to insert to.
766	* @param pos Character index where to insert.
767	@ @param max_pos Characters in the buffer.
768	*
769	* @return True if the insertion was sucessful, false if the position
770	* is out of bounds.
771	*
772	*/
773	bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
774	{
775	size_t len = wstr_length(str);
776
777	if ((pos > len) \|\| (pos + 1 > max_pos))
778	return false;
779
780	size_t i;
781	for (i = len; i + 1 > pos; i--)
782	str[i + 1] = str[i];
783
784	str[pos] = ch;
785
786	return true;
787	}
788
789	/** Remove a wide character from a wide string.
790	*
791	* Remove a wide character from a wide string at position
792	* @a pos. The characters after the position are shifted.
793	*
794	* @param str String to remove from.
795	* @param pos Character index to remove.
796	*
797	* @return True if the removal was sucessful, false if the position
798	* is out of bounds.
799	*
800	*/
801	bool wstr_remove(wchar_t *str, size_t pos)
802	{
803	size_t len = wstr_length(str);
804
805	if (pos >= len)
806	return false;
807
808	size_t i;
809	for (i = pos + 1; i <= len; i++)
810	str[i - 1] = str[i];
811
812	return true;
813	}
814
815	int stricmp(const char a, const char b)
816	{
817	int c = 0;
818
819	while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
820	c++;
821
822	return (tolower(a[c]) - tolower(b[c]));
823	}
824
825	/** Convert string to a number.
826	* Core of strtol and strtoul functions.
827	*
828	* @param nptr Pointer to string.
829	* @param endptr If not NULL, function stores here pointer to the first
830	* invalid character.
831	* @param base Zero or number between 2 and 36 inclusive.
832	* @param sgn It's set to 1 if minus found.
833	* @return Result of conversion.
834	*/
835	static unsigned long
836	_strtoul(const char nptr, char endptr, int base, char sgn)
837	{
838	unsigned char c;
839	unsigned long result = 0;
840	unsigned long a, b;
841	const char *str = nptr;
842	const char *tmpptr;
843
844	while (isspace(*str))
845	str++;
846
847	if (*str == '-') {
848	*sgn = 1;
849	++str;
850	} else if (*str == '+')
851	++str;
852
853	if (base) {
854	if ((base == 1) \|\| (base > 36)) {
855	/* FIXME: set errno to EINVAL */
856	return 0;
857	}
858	if ((base == 16) && (*str == '0') && ((str[1] == 'x') \|\|
859	(str[1] == 'X'))) {
860	str += 2;
861	}
862	} else {
863	base = 10;
864
865	if (*str == '0') {
866	base = 8;
867	if ((str[1] == 'X') \|\| (str[1] == 'x')) {
868	base = 16;
869	str += 2;
870	}
871	}
872	}
873
874	tmpptr = str;
875
876	while (*str) {
877	c = *str;
878	c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
879	(c <= '9' ? c - '0' : 0xff)));
880	if (c > base) {
881	break;
882	}
883
884	a = (result & 0xff) * base + c;
885	b = (result >> 8) * base + (a >> 8);
886
887	if (b > (ULONG_MAX >> 8)) {
888	/* overflow */
889	/* FIXME: errno = ERANGE*/
890	return ULONG_MAX;
891	}
892
893	result = (b << 8) + (a & 0xff);
894	++str;
895	}
896
897	if (str == tmpptr) {
898	/*
899	* No number was found => first invalid character is the first
900	* character of the string.
901	*/
902	/* FIXME: set errno to EINVAL */
903	str = nptr;
904	result = 0;
905	}
906
907	if (endptr)
908	endptr = (char ) str;
909
910	if (nptr == str) {
911	/FIXME: errno = EINVAL/
912	return 0;
913	}
914
915	return result;
916	}
917
918	/** Convert initial part of string to long int according to given base.
919	* The number may begin with an arbitrary number of whitespaces followed by
920	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
921	* inserted and the number will be taken as hexadecimal one. If the base is 0
922	* and the number begin with a zero, number will be taken as octal one (as with
923	* base 8). Otherwise the base 0 is taken as decimal.
924	*
925	* @param nptr Pointer to string.
926	* @param endptr If not NULL, function stores here pointer to the first
927	* invalid character.
928	* @param base Zero or number between 2 and 36 inclusive.
929	* @return Result of conversion.
930	*/
931	long int strtol(const char nptr, char *endptr, int base)
932	{
933	char sgn = 0;
934	unsigned long number = 0;
935
936	number = _strtoul(nptr, endptr, base, &sgn);
937
938	if (number > LONG_MAX) {
939	if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
940	/* FIXME: set 0 to errno */
941	return number;
942	}
943	/* FIXME: set ERANGE to errno */
944	return (sgn ? LONG_MIN : LONG_MAX);
945	}
946
947	return (sgn ? -number : number);
948	}
949
950	/** Duplicate string.
951	*
952	* Allocate a new string and copy characters from the source
953	* string into it. The duplicate string is allocated via sleeping
954	* malloc(), thus this function can sleep in no memory conditions.
955	*
956	* The allocation cannot fail and the return value is always
957	* a valid pointer. The duplicate string is always a well-formed
958	* null-terminated UTF-8 string, but it can differ from the source
959	* string on the byte level.
960	*
961	* @param src Source string.
962	*
963	* @return Duplicate string.
964	*
965	*/
966	char str_dup(const char src)
967	{
968	size_t size = str_size(src) + 1;
969	char dest = (char ) malloc(size);
970	if (dest == NULL)
971	return (char *) NULL;
972
973	str_cpy(dest, size, src);
974	return dest;
975	}
976
977	/** Duplicate string with size limit.
978	*
979	* Allocate a new string and copy up to @max_size bytes from the source
980	* string into it. The duplicate string is allocated via sleeping
981	* malloc(), thus this function can sleep in no memory conditions.
982	* No more than @max_size + 1 bytes is allocated, but if the size
983	* occupied by the source string is smaller than @max_size + 1,
984	* less is allocated.
985	*
986	* The allocation cannot fail and the return value is always
987	* a valid pointer. The duplicate string is always a well-formed
988	* null-terminated UTF-8 string, but it can differ from the source
989	* string on the byte level.
990	*
991	* @param src Source string.
992	* @param n Maximum number of bytes to duplicate.
993	*
994	* @return Duplicate string.
995	*
996	*/
997	char str_ndup(const char src, size_t n)
998	{
999	size_t size = str_size(src);
1000	if (size > n)
1001	size = n;
1002
1003	char dest = (char ) malloc(size + 1);
1004	if (dest == NULL)
1005	return (char *) NULL;
1006
1007	str_ncpy(dest, size + 1, src, size);
1008	return dest;
1009	}
1010
1011	void str_reverse(char* begin, char* end)
1012	{
1013	char aux;
1014	while(end>begin)
1015	aux=end, end--=begin, begin++=aux;
1016	}
1017
1018	int size_t_str(size_t value, int base, char* str, size_t size)
1019	{
1020	static char num[] = "0123456789abcdefghijklmnopqrstuvwxyz";
1021	char* wstr=str;
1022
1023	if (size == 0)
1024	return EINVAL;
1025	if (base<2 \|\| base>35) {
1026	*str='\0';
1027	return EINVAL;
1028	}
1029
1030	do {
1031	*wstr++ = num[value % base];
1032	if (--size == 0)
1033	return EOVERFLOW;
1034	} while(value /= base);
1035	*wstr='\0';
1036
1037	// Reverse string
1038	str_reverse(str,wstr-1);
1039	return EOK;
1040	}
1041
1042	/** Convert initial part of string to unsigned long according to given base.
1043	* The number may begin with an arbitrary number of whitespaces followed by
1044	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
1045	* inserted and the number will be taken as hexadecimal one. If the base is 0
1046	* and the number begin with a zero, number will be taken as octal one (as with
1047	* base 8). Otherwise the base 0 is taken as decimal.
1048	*
1049	* @param nptr Pointer to string.
1050	* @param endptr If not NULL, function stores here pointer to the first
1051	* invalid character
1052	* @param base Zero or number between 2 and 36 inclusive.
1053	* @return Result of conversion.
1054	*/
1055	unsigned long strtoul(const char nptr, char *endptr, int base)
1056	{
1057	char sgn = 0;
1058	unsigned long number = 0;
1059
1060	number = _strtoul(nptr, endptr, base, &sgn);
1061
1062	return (sgn ? -number : number);
1063	}
1064
1065	char strtok(char s, const char *delim)
1066	{
1067	static char *next;
1068
1069	return strtok_r(s, delim, &next);
1070	}
1071
1072	char strtok_r(char s, const char delim, char *next)
1073	{
1074	char start, end;
1075
1076	if (s == NULL)
1077	s = *next;
1078
1079	/* Skip over leading delimiters. */
1080	while (s && (str_chr(delim, s) != NULL)) ++s;
1081	start = s;
1082
1083	/* Skip over token characters. */
1084	while (s && (str_chr(delim, s) == NULL)) ++s;
1085	end = s;
1086	next = (s ? s + 1 : s);
1087
1088	if (start == end) {
1089	return NULL; /* No more tokens. */
1090	}
1091
1092	/* Overwrite delimiter with NULL terminator. */
1093	*end = '\0';
1094	return start;
1095	}
1096
1097	/** Convert string to uint64_t (internal variant).
1098	*
1099	* @param nptr Pointer to string.
1100	* @param endptr Pointer to the first invalid character is stored here.
1101	* @param base Zero or number between 2 and 36 inclusive.
1102	* @param neg Indication of unary minus is stored here.
1103	* @apram result Result of the conversion.
1104	*
1105	* @return EOK if conversion was successful.
1106	*
1107	*/
1108	static int str_uint(const char nptr, char *endptr, unsigned int base,
1109	bool neg, uint64_t result)
1110	{
1111	assert(endptr != NULL);
1112	assert(neg != NULL);
1113	assert(result != NULL);
1114
1115	*neg = false;
1116	const char *str = nptr;
1117
1118	/* Ignore leading whitespace */
1119	while (isspace(*str))
1120	str++;
1121
1122	if (*str == '-') {
1123	*neg = true;
1124	str++;
1125	} else if (*str == '+')
1126	str++;
1127
1128	if (base == 0) {
1129	/* Decode base if not specified */
1130	base = 10;
1131
1132	if (*str == '0') {
1133	base = 8;
1134	str++;
1135
1136	switch (*str) {
1137	case 'b':
1138	case 'B':
1139	base = 2;
1140	str++;
1141	break;
1142	case 'o':
1143	case 'O':
1144	base = 8;
1145	str++;
1146	break;
1147	case 'd':
1148	case 'D':
1149	case 't':
1150	case 'T':
1151	base = 10;
1152	str++;
1153	break;
1154	case 'x':
1155	case 'X':
1156	base = 16;
1157	str++;
1158	break;
1159	default:
1160	str--;
1161	}
1162	}
1163	} else {
1164	/* Check base range */
1165	if ((base < 2) \|\| (base > 36)) {
1166	endptr = (char ) str;
1167	return EINVAL;
1168	}
1169	}
1170
1171	*result = 0;
1172	const char *startstr = str;
1173
1174	while (*str != 0) {
1175	unsigned int digit;
1176
1177	if ((str >= 'a') && (str <= 'z'))
1178	digit = *str - 'a' + 10;
1179	else if ((str >= 'A') && (str <= 'Z'))
1180	digit = *str - 'A' + 10;
1181	else if ((str >= '0') && (str <= '9'))
1182	digit = *str - '0';
1183	else
1184	break;
1185
1186	if (digit >= base)
1187	break;
1188
1189	uint64_t prev = *result;
1190	result = (result) * base + digit;
1191
1192	if (*result < prev) {
1193	/* Overflow */
1194	endptr = (char ) str;
1195	return EOVERFLOW;
1196	}
1197
1198	str++;
1199	}
1200
1201	if (str == startstr) {
1202	/*
1203	* No digits were decoded => first invalid character is
1204	* the first character of the string.
1205	*/
1206	str = nptr;
1207	}
1208
1209	endptr = (char ) str;
1210
1211	if (str == nptr)
1212	return EINVAL;
1213
1214	return EOK;
1215	}
1216
1217	/** Convert string to uint64_t.
1218	*
1219	* @param nptr Pointer to string.
1220	* @param endptr If not NULL, pointer to the first invalid character
1221	* is stored here.
1222	* @param base Zero or number between 2 and 36 inclusive.
1223	* @param strict Do not allow any trailing characters.
1224	* @param result Result of the conversion.
1225	*
1226	* @return EOK if conversion was successful.
1227	*
1228	*/
1229	int str_uint64(const char nptr, char *endptr, unsigned int base,
1230	bool strict, uint64_t *result)
1231	{
1232	assert(result != NULL);
1233
1234	bool neg;
1235	char *lendptr;
1236	int ret = str_uint(nptr, &lendptr, base, &neg, result);
1237
1238	if (endptr != NULL)
1239	endptr = (char ) lendptr;
1240
1241	if (ret != EOK)
1242	return ret;
1243
1244	/* Do not allow negative values */
1245	if (neg)
1246	return EINVAL;
1247
1248	/* Check whether we are at the end of
1249	the string in strict mode */
1250	if ((strict) && (*lendptr != 0))
1251	return EINVAL;
1252
1253	return EOK;
1254	}
1255
1256	/** Convert string to size_t.
1257	*
1258	* @param nptr Pointer to string.
1259	* @param endptr If not NULL, pointer to the first invalid character
1260	* is stored here.
1261	* @param base Zero or number between 2 and 36 inclusive.
1262	* @param strict Do not allow any trailing characters.
1263	* @param result Result of the conversion.
1264	*
1265	* @return EOK if conversion was successful.
1266	*
1267	*/
1268	int str_size_t(const char nptr, char *endptr, unsigned int base,
1269	bool strict, size_t *result)
1270	{
1271	assert(result != NULL);
1272
1273	bool neg;
1274	char *lendptr;
1275	uint64_t res;
1276	int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1277
1278	if (endptr != NULL)
1279	endptr = (char ) lendptr;
1280
1281	if (ret != EOK)
1282	return ret;
1283
1284	/* Do not allow negative values */
1285	if (neg)
1286	return EINVAL;
1287
1288	/* Check whether we are at the end of
1289	the string in strict mode */
1290	if ((strict) && (*lendptr != 0))
1291	return EINVAL;
1292
1293	/* Check for overflow */
1294	size_t _res = (size_t) res;
1295	if (_res != res)
1296	return EOVERFLOW;
1297
1298	*result = _res;
1299
1300	return EOK;
1301	}
1302
1303	void order_suffix(const uint64_t val, uint64_t rv, char suffix)
1304	{
1305	if (val > UINT64_C(10000000000000000000)) {
1306	*rv = val / UINT64_C(1000000000000000000);
1307	*suffix = 'Z';
1308	} else if (val > UINT64_C(1000000000000000000)) {
1309	*rv = val / UINT64_C(1000000000000000);
1310	*suffix = 'E';
1311	} else if (val > UINT64_C(1000000000000000)) {
1312	*rv = val / UINT64_C(1000000000000);
1313	*suffix = 'T';
1314	} else if (val > UINT64_C(1000000000000)) {
1315	*rv = val / UINT64_C(1000000000);
1316	*suffix = 'G';
1317	} else if (val > UINT64_C(1000000000)) {
1318	*rv = val / UINT64_C(1000000);
1319	*suffix = 'M';
1320	} else if (val > UINT64_C(1000000)) {
1321	*rv = val / UINT64_C(1000);
1322	*suffix = 'k';
1323	} else {
1324	*rv = val;
1325	*suffix = ' ';
1326	}
1327	}
1328
1329	void bin_order_suffix(const uint64_t val, uint64_t rv, const char *suffix,
1330	bool fixed)
1331	{
1332	if (val > UINT64_C(1152921504606846976)) {
1333	*rv = val / UINT64_C(1125899906842624);
1334	*suffix = "EiB";
1335	} else if (val > UINT64_C(1125899906842624)) {
1336	*rv = val / UINT64_C(1099511627776);
1337	*suffix = "TiB";
1338	} else if (val > UINT64_C(1099511627776)) {
1339	*rv = val / UINT64_C(1073741824);
1340	*suffix = "GiB";
1341	} else if (val > UINT64_C(1073741824)) {
1342	*rv = val / UINT64_C(1048576);
1343	*suffix = "MiB";
1344	} else if (val > UINT64_C(1048576)) {
1345	*rv = val / UINT64_C(1024);
1346	*suffix = "KiB";
1347	} else {
1348	*rv = val;
1349	if (fixed)
1350	*suffix = "B ";
1351	else
1352	*suffix = "B";
1353	}
1354	}
1355
1356	/** @}
1357	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/uspace/lib/c/generic/str.c@ 2e839dda

Download in other formats: