Context Navigation

source: mainline/uspace/lib/c/generic/str.c@ 616e73c

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 616e73c was c4bbca8, checked in by Oleg Romanenko <romanenko.oleg@…>, 14 years ago
Add copyrights
Property mode set to `100644`
File size: 30.5 KB

Line
1	/*
2	* Copyright (c) 2005 Martin Decky
3	* Copyright (c) 2008 Jiri Svoboda
4	* Copyright (c) 2011 Oleg Romanenko
5	* All rights reserved.
6	*
7	* Redistribution and use in source and binary forms, with or without
8	* modification, are permitted provided that the following conditions
9	* are met:
10	*
11	* - Redistributions of source code must retain the above copyright
12	* notice, this list of conditions and the following disclaimer.
13	* - Redistributions in binary form must reproduce the above copyright
14	* notice, this list of conditions and the following disclaimer in the
15	* documentation and/or other materials provided with the distribution.
16	* - The name of the author may not be used to endorse or promote products
17	* derived from this software without specific prior written permission.
18	*
19	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29	*/
30
31	/** @addtogroup libc
32	* @{
33	*/
34	/** @file
35	*/
36
37	#include <str.h>
38	#include <stdlib.h>
39	#include <assert.h>
40	#include <stdint.h>
41	#include <ctype.h>
42	#include <malloc.h>
43	#include <errno.h>
44	#include <align.h>
45	#include <mem.h>
46	#include <str.h>
47
48	/** Byte mask consisting of lowest @n bits (out of 8) */
49	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
50
51	/** Byte mask consisting of lowest @n bits (out of 32) */
52	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
53
54	/** Byte mask consisting of highest @n bits (out of 8) */
55	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
56
57	/** Number of data bits in a UTF-8 continuation byte */
58	#define CONT_BITS 6
59
60	/** Decode a single character from a string.
61	*
62	* Decode a single character from a string of size @a size. Decoding starts
63	* at @a offset and this offset is moved to the beginning of the next
64	* character. In case of decoding error, offset generally advances at least
65	* by one. However, offset is never moved beyond size.
66	*
67	* @param str String (not necessarily NULL-terminated).
68	* @param offset Byte offset in string where to start decoding.
69	* @param size Size of the string (in bytes).
70	*
71	* @return Value of decoded character, U_SPECIAL on decoding error or
72	* NULL if attempt to decode beyond @a size.
73	*
74	*/
75	wchar_t str_decode(const char str, size_t offset, size_t size)
76	{
77	if (*offset + 1 > size)
78	return 0;
79
80	/* First byte read from string */
81	uint8_t b0 = (uint8_t) str[(*offset)++];
82
83	/* Determine code length */
84
85	unsigned int b0_bits; /* Data bits in first byte */
86	unsigned int cbytes; /* Number of continuation bytes */
87
88	if ((b0 & 0x80) == 0) {
89	/* 0xxxxxxx (Plain ASCII) */
90	b0_bits = 7;
91	cbytes = 0;
92	} else if ((b0 & 0xe0) == 0xc0) {
93	/* 110xxxxx 10xxxxxx */
94	b0_bits = 5;
95	cbytes = 1;
96	} else if ((b0 & 0xf0) == 0xe0) {
97	/* 1110xxxx 10xxxxxx 10xxxxxx */
98	b0_bits = 4;
99	cbytes = 2;
100	} else if ((b0 & 0xf8) == 0xf0) {
101	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
102	b0_bits = 3;
103	cbytes = 3;
104	} else {
105	/* 10xxxxxx -- unexpected continuation byte */
106	return U_SPECIAL;
107	}
108
109	if (*offset + cbytes > size)
110	return U_SPECIAL;
111
112	wchar_t ch = b0 & LO_MASK_8(b0_bits);
113
114	/* Decode continuation bytes */
115	while (cbytes > 0) {
116	uint8_t b = (uint8_t) str[(*offset)++];
117
118	/* Must be 10xxxxxx */
119	if ((b & 0xc0) != 0x80)
120	return U_SPECIAL;
121
122	/* Shift data bits to ch */
123	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
124	cbytes--;
125	}
126
127	return ch;
128	}
129
130	/** Encode a single character to string representation.
131	*
132	* Encode a single character to string representation (i.e. UTF-8) and store
133	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
134	* is moved to the position where the next character can be written to.
135	*
136	* @param ch Input character.
137	* @param str Output buffer.
138	* @param offset Byte offset where to start writing.
139	* @param size Size of the output buffer (in bytes).
140	*
141	* @return EOK if the character was encoded successfully, EOVERFLOW if there
142	* was not enough space in the output buffer or EINVAL if the character
143	* code was invalid.
144	*/
145	int chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
146	{
147	if (*offset >= size)
148	return EOVERFLOW;
149
150	if (!chr_check(ch))
151	return EINVAL;
152
153	/* Unsigned version of ch (bit operations should only be done
154	on unsigned types). */
155	uint32_t cc = (uint32_t) ch;
156
157	/* Determine how many continuation bytes are needed */
158
159	unsigned int b0_bits; /* Data bits in first byte */
160	unsigned int cbytes; /* Number of continuation bytes */
161
162	if ((cc & ~LO_MASK_32(7)) == 0) {
163	b0_bits = 7;
164	cbytes = 0;
165	} else if ((cc & ~LO_MASK_32(11)) == 0) {
166	b0_bits = 5;
167	cbytes = 1;
168	} else if ((cc & ~LO_MASK_32(16)) == 0) {
169	b0_bits = 4;
170	cbytes = 2;
171	} else if ((cc & ~LO_MASK_32(21)) == 0) {
172	b0_bits = 3;
173	cbytes = 3;
174	} else {
175	/* Codes longer than 21 bits are not supported */
176	return EINVAL;
177	}
178
179	/* Check for available space in buffer */
180	if (*offset + cbytes >= size)
181	return EOVERFLOW;
182
183	/* Encode continuation bytes */
184	unsigned int i;
185	for (i = cbytes; i > 0; i--) {
186	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
187	cc = cc >> CONT_BITS;
188	}
189
190	/* Encode first byte */
191	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
192
193	/* Advance offset */
194	*offset += cbytes + 1;
195
196	return EOK;
197	}
198
199	/** Get size of string.
200	*
201	* Get the number of bytes which are used by the string @a str (excluding the
202	* NULL-terminator).
203	*
204	* @param str String to consider.
205	*
206	* @return Number of bytes used by the string
207	*
208	*/
209	size_t str_size(const char *str)
210	{
211	size_t size = 0;
212
213	while (*str++ != 0)
214	size++;
215
216	return size;
217	}
218
219	/** Get size of wide string.
220	*
221	* Get the number of bytes which are used by the wide string @a str (excluding the
222	* NULL-terminator).
223	*
224	* @param str Wide string to consider.
225	*
226	* @return Number of bytes used by the wide string
227	*
228	*/
229	size_t wstr_size(const wchar_t *str)
230	{
231	return (wstr_length(str) * sizeof(wchar_t));
232	}
233
234	/** Get size of string with length limit.
235	*
236	* Get the number of bytes which are used by up to @a max_len first
237	* characters in the string @a str. If @a max_len is greater than
238	* the length of @a str, the entire string is measured (excluding the
239	* NULL-terminator).
240	*
241	* @param str String to consider.
242	* @param max_len Maximum number of characters to measure.
243	*
244	* @return Number of bytes used by the characters.
245	*
246	*/
247	size_t str_lsize(const char *str, size_t max_len)
248	{
249	size_t len = 0;
250	size_t offset = 0;
251
252	while (len < max_len) {
253	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
254	break;
255
256	len++;
257	}
258
259	return offset;
260	}
261
262	/** Get size of wide string with length limit.
263	*
264	* Get the number of bytes which are used by up to @a max_len first
265	* wide characters in the wide string @a str. If @a max_len is greater than
266	* the length of @a str, the entire wide string is measured (excluding the
267	* NULL-terminator).
268	*
269	* @param str Wide string to consider.
270	* @param max_len Maximum number of wide characters to measure.
271	*
272	* @return Number of bytes used by the wide characters.
273	*
274	*/
275	size_t wstr_lsize(const wchar_t *str, size_t max_len)
276	{
277	return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
278	}
279
280	/** Get number of characters in a string.
281	*
282	* @param str NULL-terminated string.
283	*
284	* @return Number of characters in string.
285	*
286	*/
287	size_t str_length(const char *str)
288	{
289	size_t len = 0;
290	size_t offset = 0;
291
292	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
293	len++;
294
295	return len;
296	}
297
298	/** Get number of characters in a wide string.
299	*
300	* @param str NULL-terminated wide string.
301	*
302	* @return Number of characters in @a str.
303	*
304	*/
305	size_t wstr_length(const wchar_t *wstr)
306	{
307	size_t len = 0;
308
309	while (*wstr++ != 0)
310	len++;
311
312	return len;
313	}
314
315	/** Get number of characters in a string with size limit.
316	*
317	* @param str NULL-terminated string.
318	* @param size Maximum number of bytes to consider.
319	*
320	* @return Number of characters in string.
321	*
322	*/
323	size_t str_nlength(const char *str, size_t size)
324	{
325	size_t len = 0;
326	size_t offset = 0;
327
328	while (str_decode(str, &offset, size) != 0)
329	len++;
330
331	return len;
332	}
333
334	/** Get number of characters in a string with size limit.
335	*
336	* @param str NULL-terminated string.
337	* @param size Maximum number of bytes to consider.
338	*
339	* @return Number of characters in string.
340	*
341	*/
342	size_t wstr_nlength(const wchar_t *str, size_t size)
343	{
344	size_t len = 0;
345	size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
346	size_t offset = 0;
347
348	while ((offset < limit) && (*str++ != 0)) {
349	len++;
350	offset += sizeof(wchar_t);
351	}
352
353	return len;
354	}
355
356	/** Check whether character is plain ASCII.
357	*
358	* @return True if character is plain ASCII.
359	*
360	*/
361	bool ascii_check(wchar_t ch)
362	{
363	if ((ch >= 0) && (ch <= 127))
364	return true;
365
366	return false;
367	}
368
369	/** Check whether wide string is plain ASCII.
370	*
371	* @return True if wide string is plain ASCII.
372	*
373	*/
374	bool wstr_is_ascii(const wchar_t *wstr)
375	{
376	while (wstr && ascii_check(wstr))
377	wstr++;
378	return *wstr == 0;
379	}
380
381	/** Check whether character is valid
382	*
383	* @return True if character is a valid Unicode code point.
384	*
385	*/
386	bool chr_check(wchar_t ch)
387	{
388	if ((ch >= 0) && (ch <= 1114111))
389	return true;
390
391	return false;
392	}
393
394	/** Compare two NULL terminated strings.
395	*
396	* Do a char-by-char comparison of two NULL-terminated strings.
397	* The strings are considered equal iff they consist of the same
398	* characters on the minimum of their lengths.
399	*
400	* @param s1 First string to compare.
401	* @param s2 Second string to compare.
402	*
403	* @return 0 if the strings are equal, -1 if first is smaller,
404	* 1 if second smaller.
405	*
406	*/
407	int str_cmp(const char s1, const char s2)
408	{
409	wchar_t c1 = 0;
410	wchar_t c2 = 0;
411
412	size_t off1 = 0;
413	size_t off2 = 0;
414
415	while (true) {
416	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
417	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
418
419	if (c1 < c2)
420	return -1;
421
422	if (c1 > c2)
423	return 1;
424
425	if (c1 == 0 \|\| c2 == 0)
426	break;
427	}
428
429	return 0;
430	}
431
432	/** Compare two NULL terminated strings with length limit.
433	*
434	* Do a char-by-char comparison of two NULL-terminated strings.
435	* The strings are considered equal iff they consist of the same
436	* characters on the minimum of their lengths and the length limit.
437	*
438	* @param s1 First string to compare.
439	* @param s2 Second string to compare.
440	* @param max_len Maximum number of characters to consider.
441	*
442	* @return 0 if the strings are equal, -1 if first is smaller,
443	* 1 if second smaller.
444	*
445	*/
446	int str_lcmp(const char s1, const char s2, size_t max_len)
447	{
448	wchar_t c1 = 0;
449	wchar_t c2 = 0;
450
451	size_t off1 = 0;
452	size_t off2 = 0;
453
454	size_t len = 0;
455
456	while (true) {
457	if (len >= max_len)
458	break;
459
460	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
461	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
462
463	if (c1 < c2)
464	return -1;
465
466	if (c1 > c2)
467	return 1;
468
469	if (c1 == 0 \|\| c2 == 0)
470	break;
471
472	++len;
473	}
474
475	return 0;
476
477	}
478
479	/** Copy string.
480	*
481	* Copy source string @a src to destination buffer @a dest.
482	* No more than @a size bytes are written. If the size of the output buffer
483	* is at least one byte, the output string will always be well-formed, i.e.
484	* null-terminated and containing only complete characters.
485	*
486	* @param dest Destination buffer.
487	* @param count Size of the destination buffer (must be > 0).
488	* @param src Source string.
489	*/
490	void str_cpy(char dest, size_t size, const char src)
491	{
492	/* There must be space for a null terminator in the buffer. */
493	assert(size > 0);
494
495	size_t src_off = 0;
496	size_t dest_off = 0;
497
498	wchar_t ch;
499	while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
500	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
501	break;
502	}
503
504	dest[dest_off] = '\0';
505	}
506
507	/** Copy size-limited substring.
508	*
509	* Copy prefix of string @a src of max. size @a size to destination buffer
510	* @a dest. No more than @a size bytes are written. The output string will
511	* always be well-formed, i.e. null-terminated and containing only complete
512	* characters.
513	*
514	* No more than @a n bytes are read from the input string, so it does not
515	* have to be null-terminated.
516	*
517	* @param dest Destination buffer.
518	* @param count Size of the destination buffer (must be > 0).
519	* @param src Source string.
520	* @param n Maximum number of bytes to read from @a src.
521	*/
522	void str_ncpy(char dest, size_t size, const char src, size_t n)
523	{
524	/* There must be space for a null terminator in the buffer. */
525	assert(size > 0);
526
527	size_t src_off = 0;
528	size_t dest_off = 0;
529
530	wchar_t ch;
531	while ((ch = str_decode(src, &src_off, n)) != 0) {
532	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
533	break;
534	}
535
536	dest[dest_off] = '\0';
537	}
538
539	/** Append one string to another.
540	*
541	* Append source string @a src to string in destination buffer @a dest.
542	* Size of the destination buffer is @a dest. If the size of the output buffer
543	* is at least one byte, the output string will always be well-formed, i.e.
544	* null-terminated and containing only complete characters.
545	*
546	* @param dest Destination buffer.
547	* @param count Size of the destination buffer.
548	* @param src Source string.
549	*/
550	void str_append(char dest, size_t size, const char src)
551	{
552	size_t dstr_size;
553
554	dstr_size = str_size(dest);
555	str_cpy(dest + dstr_size, size - dstr_size, src);
556	}
557
558	/** Convert wide string to string.
559	*
560	* Convert wide string @a src to string. The output is written to the buffer
561	* specified by @a dest and @a size. @a size must be non-zero and the string
562	* written will always be well-formed.
563	*
564	* @param dest Destination buffer.
565	* @param size Size of the destination buffer.
566	* @param src Source wide string.
567	*
568	* @return EOK, if success, negative otherwise.
569	*/
570	int wstr_to_str(char dest, size_t size, const wchar_t src)
571	{
572	int rc;
573	wchar_t ch;
574	size_t src_idx;
575	size_t dest_off;
576
577	/* There must be space for a null terminator in the buffer. */
578	assert(size > 0);
579
580	src_idx = 0;
581	dest_off = 0;
582
583	while ((ch = src[src_idx++]) != 0) {
584	rc = chr_encode(ch, dest, &dest_off, size - 1);
585	if (rc != EOK)
586	break;
587	}
588
589	dest[dest_off] = '\0';
590	return rc;
591	}
592
593	/** Convert wide string to new string.
594	*
595	* Convert wide string @a src to string. Space for the new string is allocated
596	* on the heap.
597	*
598	* @param src Source wide string.
599	* @return New string.
600	*/
601	char wstr_to_astr(const wchar_t src)
602	{
603	char dbuf[STR_BOUNDS(1)];
604	char *str;
605	wchar_t ch;
606
607	size_t src_idx;
608	size_t dest_off;
609	size_t dest_size;
610
611	/* Compute size of encoded string. */
612
613	src_idx = 0;
614	dest_size = 0;
615
616	while ((ch = src[src_idx++]) != 0) {
617	dest_off = 0;
618	if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
619	break;
620	dest_size += dest_off;
621	}
622
623	str = malloc(dest_size + 1);
624	if (str == NULL)
625	return NULL;
626
627	/* Encode string. */
628
629	src_idx = 0;
630	dest_off = 0;
631
632	while ((ch = src[src_idx++]) != 0) {
633	if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
634	break;
635	}
636
637	str[dest_size] = '\0';
638	return str;
639	}
640
641
642	/** Convert string to wide string.
643	*
644	* Convert string @a src to wide string. The output is written to the
645	* buffer specified by @a dest and @a dlen. @a dlen must be non-zero
646	* and the wide string written will always be null-terminated.
647	*
648	* @param dest Destination buffer.
649	* @param dlen Length of destination buffer (number of wchars).
650	* @param src Source string.
651	*
652	* @return EOK, if success, negative otherwise.
653	*/
654	int str_to_wstr(wchar_t dest, size_t dlen, const char src)
655	{
656	int rc=EOK;
657	size_t offset;
658	size_t di;
659	wchar_t c;
660
661	assert(dlen > 0);
662
663	offset = 0;
664	di = 0;
665
666	do {
667	if (di >= dlen - 1) {
668	rc = EOVERFLOW;
669	break;
670	}
671
672	c = str_decode(src, &offset, STR_NO_LIMIT);
673	dest[di++] = c;
674	} while (c != '\0');
675
676	dest[dlen - 1] = '\0';
677	return rc;
678	}
679
680	/** Find first occurence of character in string.
681	*
682	* @param str String to search.
683	* @param ch Character to look for.
684	*
685	* @return Pointer to character in @a str or NULL if not found.
686	*/
687	char str_chr(const char str, wchar_t ch)
688	{
689	wchar_t acc;
690	size_t off = 0;
691	size_t last = 0;
692
693	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
694	if (acc == ch)
695	return (char *) (str + last);
696	last = off;
697	}
698
699	return NULL;
700	}
701
702	/** Find last occurence of character in string.
703	*
704	* @param str String to search.
705	* @param ch Character to look for.
706	*
707	* @return Pointer to character in @a str or NULL if not found.
708	*/
709	char str_rchr(const char str, wchar_t ch)
710	{
711	wchar_t acc;
712	size_t off = 0;
713	size_t last = 0;
714	const char *res = NULL;
715
716	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
717	if (acc == ch)
718	res = (str + last);
719	last = off;
720	}
721
722	return (char *) res;
723	}
724
725	/** Find first occurence of character in wide string.
726	*
727	* @param wstr String to search.
728	* @param ch Character to look for.
729	*
730	* @return Pointer to character in @a wstr or NULL if not found.
731	*/
732	wchar_t wstr_chr(const wchar_t wstr, wchar_t ch)
733	{
734	while (wstr && wstr != ch)
735	wstr++;
736	if (*wstr)
737	return (wchar_t *) wstr;
738	else
739	return NULL;
740	}
741
742	/** Find last occurence of character in wide string.
743	*
744	* @param wstr String to search.
745	* @param ch Character to look for.
746	*
747	* @return Pointer to character in @a wstr or NULL if not found.
748	*/
749	wchar_t wstr_rchr(const wchar_t wstr, wchar_t ch)
750	{
751	const wchar_t *res = NULL;
752	while (*wstr) {
753	if (*wstr == ch)
754	res = wstr;
755	wstr++;
756	}
757	return (wchar_t *) res;
758	}
759
760	/** Insert a wide character into a wide string.
761	*
762	* Insert a wide character into a wide string at position
763	* @a pos. The characters after the position are shifted.
764	*
765	* @param str String to insert to.
766	* @param ch Character to insert to.
767	* @param pos Character index where to insert.
768	@ @param max_pos Characters in the buffer.
769	*
770	* @return True if the insertion was sucessful, false if the position
771	* is out of bounds.
772	*
773	*/
774	bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
775	{
776	size_t len = wstr_length(str);
777
778	if ((pos > len) \|\| (pos + 1 > max_pos))
779	return false;
780
781	size_t i;
782	for (i = len; i + 1 > pos; i--)
783	str[i + 1] = str[i];
784
785	str[pos] = ch;
786
787	return true;
788	}
789
790	/** Remove a wide character from a wide string.
791	*
792	* Remove a wide character from a wide string at position
793	* @a pos. The characters after the position are shifted.
794	*
795	* @param str String to remove from.
796	* @param pos Character index to remove.
797	*
798	* @return True if the removal was sucessful, false if the position
799	* is out of bounds.
800	*
801	*/
802	bool wstr_remove(wchar_t *str, size_t pos)
803	{
804	size_t len = wstr_length(str);
805
806	if (pos >= len)
807	return false;
808
809	size_t i;
810	for (i = pos + 1; i <= len; i++)
811	str[i - 1] = str[i];
812
813	return true;
814	}
815
816	int stricmp(const char a, const char b)
817	{
818	int c = 0;
819
820	while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
821	c++;
822
823	return (tolower(a[c]) - tolower(b[c]));
824	}
825
826	/** Convert string to a number.
827	* Core of strtol and strtoul functions.
828	*
829	* @param nptr Pointer to string.
830	* @param endptr If not NULL, function stores here pointer to the first
831	* invalid character.
832	* @param base Zero or number between 2 and 36 inclusive.
833	* @param sgn It's set to 1 if minus found.
834	* @return Result of conversion.
835	*/
836	static unsigned long
837	_strtoul(const char nptr, char endptr, int base, char sgn)
838	{
839	unsigned char c;
840	unsigned long result = 0;
841	unsigned long a, b;
842	const char *str = nptr;
843	const char *tmpptr;
844
845	while (isspace(*str))
846	str++;
847
848	if (*str == '-') {
849	*sgn = 1;
850	++str;
851	} else if (*str == '+')
852	++str;
853
854	if (base) {
855	if ((base == 1) \|\| (base > 36)) {
856	/* FIXME: set errno to EINVAL */
857	return 0;
858	}
859	if ((base == 16) && (*str == '0') && ((str[1] == 'x') \|\|
860	(str[1] == 'X'))) {
861	str += 2;
862	}
863	} else {
864	base = 10;
865
866	if (*str == '0') {
867	base = 8;
868	if ((str[1] == 'X') \|\| (str[1] == 'x')) {
869	base = 16;
870	str += 2;
871	}
872	}
873	}
874
875	tmpptr = str;
876
877	while (*str) {
878	c = *str;
879	c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
880	(c <= '9' ? c - '0' : 0xff)));
881	if (c > base) {
882	break;
883	}
884
885	a = (result & 0xff) * base + c;
886	b = (result >> 8) * base + (a >> 8);
887
888	if (b > (ULONG_MAX >> 8)) {
889	/* overflow */
890	/* FIXME: errno = ERANGE*/
891	return ULONG_MAX;
892	}
893
894	result = (b << 8) + (a & 0xff);
895	++str;
896	}
897
898	if (str == tmpptr) {
899	/*
900	* No number was found => first invalid character is the first
901	* character of the string.
902	*/
903	/* FIXME: set errno to EINVAL */
904	str = nptr;
905	result = 0;
906	}
907
908	if (endptr)
909	endptr = (char ) str;
910
911	if (nptr == str) {
912	/FIXME: errno = EINVAL/
913	return 0;
914	}
915
916	return result;
917	}
918
919	/** Convert initial part of string to long int according to given base.
920	* The number may begin with an arbitrary number of whitespaces followed by
921	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
922	* inserted and the number will be taken as hexadecimal one. If the base is 0
923	* and the number begin with a zero, number will be taken as octal one (as with
924	* base 8). Otherwise the base 0 is taken as decimal.
925	*
926	* @param nptr Pointer to string.
927	* @param endptr If not NULL, function stores here pointer to the first
928	* invalid character.
929	* @param base Zero or number between 2 and 36 inclusive.
930	* @return Result of conversion.
931	*/
932	long int strtol(const char nptr, char *endptr, int base)
933	{
934	char sgn = 0;
935	unsigned long number = 0;
936
937	number = _strtoul(nptr, endptr, base, &sgn);
938
939	if (number > LONG_MAX) {
940	if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
941	/* FIXME: set 0 to errno */
942	return number;
943	}
944	/* FIXME: set ERANGE to errno */
945	return (sgn ? LONG_MIN : LONG_MAX);
946	}
947
948	return (sgn ? -number : number);
949	}
950
951	/** Duplicate string.
952	*
953	* Allocate a new string and copy characters from the source
954	* string into it. The duplicate string is allocated via sleeping
955	* malloc(), thus this function can sleep in no memory conditions.
956	*
957	* The allocation cannot fail and the return value is always
958	* a valid pointer. The duplicate string is always a well-formed
959	* null-terminated UTF-8 string, but it can differ from the source
960	* string on the byte level.
961	*
962	* @param src Source string.
963	*
964	* @return Duplicate string.
965	*
966	*/
967	char str_dup(const char src)
968	{
969	size_t size = str_size(src) + 1;
970	char dest = (char ) malloc(size);
971	if (dest == NULL)
972	return (char *) NULL;
973
974	str_cpy(dest, size, src);
975	return dest;
976	}
977
978	/** Duplicate string with size limit.
979	*
980	* Allocate a new string and copy up to @max_size bytes from the source
981	* string into it. The duplicate string is allocated via sleeping
982	* malloc(), thus this function can sleep in no memory conditions.
983	* No more than @max_size + 1 bytes is allocated, but if the size
984	* occupied by the source string is smaller than @max_size + 1,
985	* less is allocated.
986	*
987	* The allocation cannot fail and the return value is always
988	* a valid pointer. The duplicate string is always a well-formed
989	* null-terminated UTF-8 string, but it can differ from the source
990	* string on the byte level.
991	*
992	* @param src Source string.
993	* @param n Maximum number of bytes to duplicate.
994	*
995	* @return Duplicate string.
996	*
997	*/
998	char str_ndup(const char src, size_t n)
999	{
1000	size_t size = str_size(src);
1001	if (size > n)
1002	size = n;
1003
1004	char dest = (char ) malloc(size + 1);
1005	if (dest == NULL)
1006	return (char *) NULL;
1007
1008	str_ncpy(dest, size + 1, src, size);
1009	return dest;
1010	}
1011
1012	void str_reverse(char* begin, char* end)
1013	{
1014	char aux;
1015	while(end>begin)
1016	aux=end, end--=begin, begin++=aux;
1017	}
1018
1019	int size_t_str(size_t value, int base, char* str, size_t size)
1020	{
1021	static char num[] = "0123456789abcdefghijklmnopqrstuvwxyz";
1022	char* wstr=str;
1023
1024	if (size == 0)
1025	return EINVAL;
1026	if (base<2 \|\| base>35) {
1027	*str='\0';
1028	return EINVAL;
1029	}
1030
1031	do {
1032	*wstr++ = num[value % base];
1033	if (--size == 0)
1034	return EOVERFLOW;
1035	} while(value /= base);
1036	*wstr='\0';
1037
1038	// Reverse string
1039	str_reverse(str,wstr-1);
1040	return EOK;
1041	}
1042
1043	/** Convert initial part of string to unsigned long according to given base.
1044	* The number may begin with an arbitrary number of whitespaces followed by
1045	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
1046	* inserted and the number will be taken as hexadecimal one. If the base is 0
1047	* and the number begin with a zero, number will be taken as octal one (as with
1048	* base 8). Otherwise the base 0 is taken as decimal.
1049	*
1050	* @param nptr Pointer to string.
1051	* @param endptr If not NULL, function stores here pointer to the first
1052	* invalid character
1053	* @param base Zero or number between 2 and 36 inclusive.
1054	* @return Result of conversion.
1055	*/
1056	unsigned long strtoul(const char nptr, char *endptr, int base)
1057	{
1058	char sgn = 0;
1059	unsigned long number = 0;
1060
1061	number = _strtoul(nptr, endptr, base, &sgn);
1062
1063	return (sgn ? -number : number);
1064	}
1065
1066	char strtok(char s, const char *delim)
1067	{
1068	static char *next;
1069
1070	return strtok_r(s, delim, &next);
1071	}
1072
1073	char strtok_r(char s, const char delim, char *next)
1074	{
1075	char start, end;
1076
1077	if (s == NULL)
1078	s = *next;
1079
1080	/* Skip over leading delimiters. */
1081	while (s && (str_chr(delim, s) != NULL)) ++s;
1082	start = s;
1083
1084	/* Skip over token characters. */
1085	while (s && (str_chr(delim, s) == NULL)) ++s;
1086	end = s;
1087	next = (s ? s + 1 : s);
1088
1089	if (start == end) {
1090	return NULL; /* No more tokens. */
1091	}
1092
1093	/* Overwrite delimiter with NULL terminator. */
1094	*end = '\0';
1095	return start;
1096	}
1097
1098	/** Convert string to uint64_t (internal variant).
1099	*
1100	* @param nptr Pointer to string.
1101	* @param endptr Pointer to the first invalid character is stored here.
1102	* @param base Zero or number between 2 and 36 inclusive.
1103	* @param neg Indication of unary minus is stored here.
1104	* @apram result Result of the conversion.
1105	*
1106	* @return EOK if conversion was successful.
1107	*
1108	*/
1109	static int str_uint(const char nptr, char *endptr, unsigned int base,
1110	bool neg, uint64_t result)
1111	{
1112	assert(endptr != NULL);
1113	assert(neg != NULL);
1114	assert(result != NULL);
1115
1116	*neg = false;
1117	const char *str = nptr;
1118
1119	/* Ignore leading whitespace */
1120	while (isspace(*str))
1121	str++;
1122
1123	if (*str == '-') {
1124	*neg = true;
1125	str++;
1126	} else if (*str == '+')
1127	str++;
1128
1129	if (base == 0) {
1130	/* Decode base if not specified */
1131	base = 10;
1132
1133	if (*str == '0') {
1134	base = 8;
1135	str++;
1136
1137	switch (*str) {
1138	case 'b':
1139	case 'B':
1140	base = 2;
1141	str++;
1142	break;
1143	case 'o':
1144	case 'O':
1145	base = 8;
1146	str++;
1147	break;
1148	case 'd':
1149	case 'D':
1150	case 't':
1151	case 'T':
1152	base = 10;
1153	str++;
1154	break;
1155	case 'x':
1156	case 'X':
1157	base = 16;
1158	str++;
1159	break;
1160	default:
1161	str--;
1162	}
1163	}
1164	} else {
1165	/* Check base range */
1166	if ((base < 2) \|\| (base > 36)) {
1167	endptr = (char ) str;
1168	return EINVAL;
1169	}
1170	}
1171
1172	*result = 0;
1173	const char *startstr = str;
1174
1175	while (*str != 0) {
1176	unsigned int digit;
1177
1178	if ((str >= 'a') && (str <= 'z'))
1179	digit = *str - 'a' + 10;
1180	else if ((str >= 'A') && (str <= 'Z'))
1181	digit = *str - 'A' + 10;
1182	else if ((str >= '0') && (str <= '9'))
1183	digit = *str - '0';
1184	else
1185	break;
1186
1187	if (digit >= base)
1188	break;
1189
1190	uint64_t prev = *result;
1191	result = (result) * base + digit;
1192
1193	if (*result < prev) {
1194	/* Overflow */
1195	endptr = (char ) str;
1196	return EOVERFLOW;
1197	}
1198
1199	str++;
1200	}
1201
1202	if (str == startstr) {
1203	/*
1204	* No digits were decoded => first invalid character is
1205	* the first character of the string.
1206	*/
1207	str = nptr;
1208	}
1209
1210	endptr = (char ) str;
1211
1212	if (str == nptr)
1213	return EINVAL;
1214
1215	return EOK;
1216	}
1217
1218	/** Convert string to uint64_t.
1219	*
1220	* @param nptr Pointer to string.
1221	* @param endptr If not NULL, pointer to the first invalid character
1222	* is stored here.
1223	* @param base Zero or number between 2 and 36 inclusive.
1224	* @param strict Do not allow any trailing characters.
1225	* @param result Result of the conversion.
1226	*
1227	* @return EOK if conversion was successful.
1228	*
1229	*/
1230	int str_uint64(const char nptr, char *endptr, unsigned int base,
1231	bool strict, uint64_t *result)
1232	{
1233	assert(result != NULL);
1234
1235	bool neg;
1236	char *lendptr;
1237	int ret = str_uint(nptr, &lendptr, base, &neg, result);
1238
1239	if (endptr != NULL)
1240	endptr = (char ) lendptr;
1241
1242	if (ret != EOK)
1243	return ret;
1244
1245	/* Do not allow negative values */
1246	if (neg)
1247	return EINVAL;
1248
1249	/* Check whether we are at the end of
1250	the string in strict mode */
1251	if ((strict) && (*lendptr != 0))
1252	return EINVAL;
1253
1254	return EOK;
1255	}
1256
1257	/** Convert string to size_t.
1258	*
1259	* @param nptr Pointer to string.
1260	* @param endptr If not NULL, pointer to the first invalid character
1261	* is stored here.
1262	* @param base Zero or number between 2 and 36 inclusive.
1263	* @param strict Do not allow any trailing characters.
1264	* @param result Result of the conversion.
1265	*
1266	* @return EOK if conversion was successful.
1267	*
1268	*/
1269	int str_size_t(const char nptr, char *endptr, unsigned int base,
1270	bool strict, size_t *result)
1271	{
1272	assert(result != NULL);
1273
1274	bool neg;
1275	char *lendptr;
1276	uint64_t res;
1277	int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1278
1279	if (endptr != NULL)
1280	endptr = (char ) lendptr;
1281
1282	if (ret != EOK)
1283	return ret;
1284
1285	/* Do not allow negative values */
1286	if (neg)
1287	return EINVAL;
1288
1289	/* Check whether we are at the end of
1290	the string in strict mode */
1291	if ((strict) && (*lendptr != 0))
1292	return EINVAL;
1293
1294	/* Check for overflow */
1295	size_t _res = (size_t) res;
1296	if (_res != res)
1297	return EOVERFLOW;
1298
1299	*result = _res;
1300
1301	return EOK;
1302	}
1303
1304	void order_suffix(const uint64_t val, uint64_t rv, char suffix)
1305	{
1306	if (val > UINT64_C(10000000000000000000)) {
1307	*rv = val / UINT64_C(1000000000000000000);
1308	*suffix = 'Z';
1309	} else if (val > UINT64_C(1000000000000000000)) {
1310	*rv = val / UINT64_C(1000000000000000);
1311	*suffix = 'E';
1312	} else if (val > UINT64_C(1000000000000000)) {
1313	*rv = val / UINT64_C(1000000000000);
1314	*suffix = 'T';
1315	} else if (val > UINT64_C(1000000000000)) {
1316	*rv = val / UINT64_C(1000000000);
1317	*suffix = 'G';
1318	} else if (val > UINT64_C(1000000000)) {
1319	*rv = val / UINT64_C(1000000);
1320	*suffix = 'M';
1321	} else if (val > UINT64_C(1000000)) {
1322	*rv = val / UINT64_C(1000);
1323	*suffix = 'k';
1324	} else {
1325	*rv = val;
1326	*suffix = ' ';
1327	}
1328	}
1329
1330	void bin_order_suffix(const uint64_t val, uint64_t rv, const char *suffix,
1331	bool fixed)
1332	{
1333	if (val > UINT64_C(1152921504606846976)) {
1334	*rv = val / UINT64_C(1125899906842624);
1335	*suffix = "EiB";
1336	} else if (val > UINT64_C(1125899906842624)) {
1337	*rv = val / UINT64_C(1099511627776);
1338	*suffix = "TiB";
1339	} else if (val > UINT64_C(1099511627776)) {
1340	*rv = val / UINT64_C(1073741824);
1341	*suffix = "GiB";
1342	} else if (val > UINT64_C(1073741824)) {
1343	*rv = val / UINT64_C(1048576);
1344	*suffix = "MiB";
1345	} else if (val > UINT64_C(1048576)) {
1346	*rv = val / UINT64_C(1024);
1347	*suffix = "KiB";
1348	} else {
1349	*rv = val;
1350	if (fixed)
1351	*suffix = "B ";
1352	else
1353	*suffix = "B";
1354	}
1355	}
1356
1357	/** @}
1358	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: