Context Navigation

source: mainline/uspace/lib/c/generic/str.c@ a33f0a6

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since a33f0a6 was a33f0a6, checked in by Oleg Romanenko <romanenko.oleg@…>, 14 years ago
Merge from mainline
Property mode set to `100644`
File size: 33.8 KB

Line
1	/*
2	* Copyright (c) 2005 Martin Decky
3	* Copyright (c) 2008 Jiri Svoboda
4	* Copyright (c) 2011 Oleg Romanenko
5	* All rights reserved.
6	*
7	* Redistribution and use in source and binary forms, with or without
8	* modification, are permitted provided that the following conditions
9	* are met:
10	*
11	* - Redistributions of source code must retain the above copyright
12	* notice, this list of conditions and the following disclaimer.
13	* - Redistributions in binary form must reproduce the above copyright
14	* notice, this list of conditions and the following disclaimer in the
15	* documentation and/or other materials provided with the distribution.
16	* - The name of the author may not be used to endorse or promote products
17	* derived from this software without specific prior written permission.
18	*
19	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29	*/
30
31	/** @addtogroup libc
32	* @{
33	*/
34	/** @file
35	*/
36
37	#include <str.h>
38	#include <stdlib.h>
39	#include <assert.h>
40	#include <stdint.h>
41	#include <ctype.h>
42	#include <malloc.h>
43	#include <errno.h>
44	#include <align.h>
45	#include <mem.h>
46	#include <str.h>
47
48	/** Byte mask consisting of lowest @n bits (out of 8) */
49	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
50
51	/** Byte mask consisting of lowest @n bits (out of 32) */
52	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
53
54	/** Byte mask consisting of highest @n bits (out of 8) */
55	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
56
57	/** Number of data bits in a UTF-8 continuation byte */
58	#define CONT_BITS 6
59
60	/** Decode a single character from a string.
61	*
62	* Decode a single character from a string of size @a size. Decoding starts
63	* at @a offset and this offset is moved to the beginning of the next
64	* character. In case of decoding error, offset generally advances at least
65	* by one. However, offset is never moved beyond size.
66	*
67	* @param str String (not necessarily NULL-terminated).
68	* @param offset Byte offset in string where to start decoding.
69	* @param size Size of the string (in bytes).
70	*
71	* @return Value of decoded character, U_SPECIAL on decoding error or
72	* NULL if attempt to decode beyond @a size.
73	*
74	*/
75	wchar_t str_decode(const char str, size_t offset, size_t size)
76	{
77	if (*offset + 1 > size)
78	return 0;
79
80	/* First byte read from string */
81	uint8_t b0 = (uint8_t) str[(*offset)++];
82
83	/* Determine code length */
84
85	unsigned int b0_bits; /* Data bits in first byte */
86	unsigned int cbytes; /* Number of continuation bytes */
87
88	if ((b0 & 0x80) == 0) {
89	/* 0xxxxxxx (Plain ASCII) */
90	b0_bits = 7;
91	cbytes = 0;
92	} else if ((b0 & 0xe0) == 0xc0) {
93	/* 110xxxxx 10xxxxxx */
94	b0_bits = 5;
95	cbytes = 1;
96	} else if ((b0 & 0xf0) == 0xe0) {
97	/* 1110xxxx 10xxxxxx 10xxxxxx */
98	b0_bits = 4;
99	cbytes = 2;
100	} else if ((b0 & 0xf8) == 0xf0) {
101	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
102	b0_bits = 3;
103	cbytes = 3;
104	} else {
105	/* 10xxxxxx -- unexpected continuation byte */
106	return U_SPECIAL;
107	}
108
109	if (*offset + cbytes > size)
110	return U_SPECIAL;
111
112	wchar_t ch = b0 & LO_MASK_8(b0_bits);
113
114	/* Decode continuation bytes */
115	while (cbytes > 0) {
116	uint8_t b = (uint8_t) str[(*offset)++];
117
118	/* Must be 10xxxxxx */
119	if ((b & 0xc0) != 0x80)
120	return U_SPECIAL;
121
122	/* Shift data bits to ch */
123	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
124	cbytes--;
125	}
126
127	return ch;
128	}
129
130	/** Encode a single character to string representation.
131	*
132	* Encode a single character to string representation (i.e. UTF-8) and store
133	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
134	* is moved to the position where the next character can be written to.
135	*
136	* @param ch Input character.
137	* @param str Output buffer.
138	* @param offset Byte offset where to start writing.
139	* @param size Size of the output buffer (in bytes).
140	*
141	* @return EOK if the character was encoded successfully, EOVERFLOW if there
142	* was not enough space in the output buffer or EINVAL if the character
143	* code was invalid.
144	*/
145	int chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
146	{
147	if (*offset >= size)
148	return EOVERFLOW;
149
150	if (!chr_check(ch))
151	return EINVAL;
152
153	/* Unsigned version of ch (bit operations should only be done
154	on unsigned types). */
155	uint32_t cc = (uint32_t) ch;
156
157	/* Determine how many continuation bytes are needed */
158
159	unsigned int b0_bits; /* Data bits in first byte */
160	unsigned int cbytes; /* Number of continuation bytes */
161
162	if ((cc & ~LO_MASK_32(7)) == 0) {
163	b0_bits = 7;
164	cbytes = 0;
165	} else if ((cc & ~LO_MASK_32(11)) == 0) {
166	b0_bits = 5;
167	cbytes = 1;
168	} else if ((cc & ~LO_MASK_32(16)) == 0) {
169	b0_bits = 4;
170	cbytes = 2;
171	} else if ((cc & ~LO_MASK_32(21)) == 0) {
172	b0_bits = 3;
173	cbytes = 3;
174	} else {
175	/* Codes longer than 21 bits are not supported */
176	return EINVAL;
177	}
178
179	/* Check for available space in buffer */
180	if (*offset + cbytes >= size)
181	return EOVERFLOW;
182
183	/* Encode continuation bytes */
184	unsigned int i;
185	for (i = cbytes; i > 0; i--) {
186	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
187	cc = cc >> CONT_BITS;
188	}
189
190	/* Encode first byte */
191	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
192
193	/* Advance offset */
194	*offset += cbytes + 1;
195
196	return EOK;
197	}
198
199	/** Get size of string.
200	*
201	* Get the number of bytes which are used by the string @a str (excluding the
202	* NULL-terminator).
203	*
204	* @param str String to consider.
205	*
206	* @return Number of bytes used by the string
207	*
208	*/
209	size_t str_size(const char *str)
210	{
211	size_t size = 0;
212
213	while (*str++ != 0)
214	size++;
215
216	return size;
217	}
218
219	/** Get size of wide string.
220	*
221	* Get the number of bytes which are used by the wide string @a str (excluding the
222	* NULL-terminator).
223	*
224	* @param str Wide string to consider.
225	*
226	* @return Number of bytes used by the wide string
227	*
228	*/
229	size_t wstr_size(const wchar_t *str)
230	{
231	return (wstr_length(str) * sizeof(wchar_t));
232	}
233
234	/** Get size of string with length limit.
235	*
236	* Get the number of bytes which are used by up to @a max_len first
237	* characters in the string @a str. If @a max_len is greater than
238	* the length of @a str, the entire string is measured (excluding the
239	* NULL-terminator).
240	*
241	* @param str String to consider.
242	* @param max_len Maximum number of characters to measure.
243	*
244	* @return Number of bytes used by the characters.
245	*
246	*/
247	size_t str_lsize(const char *str, size_t max_len)
248	{
249	size_t len = 0;
250	size_t offset = 0;
251
252	while (len < max_len) {
253	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
254	break;
255
256	len++;
257	}
258
259	return offset;
260	}
261
262	/** Get size of wide string with length limit.
263	*
264	* Get the number of bytes which are used by up to @a max_len first
265	* wide characters in the wide string @a str. If @a max_len is greater than
266	* the length of @a str, the entire wide string is measured (excluding the
267	* NULL-terminator).
268	*
269	* @param str Wide string to consider.
270	* @param max_len Maximum number of wide characters to measure.
271	*
272	* @return Number of bytes used by the wide characters.
273	*
274	*/
275	size_t wstr_lsize(const wchar_t *str, size_t max_len)
276	{
277	return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
278	}
279
280	/** Get number of characters in a string.
281	*
282	* @param str NULL-terminated string.
283	*
284	* @return Number of characters in string.
285	*
286	*/
287	size_t str_length(const char *str)
288	{
289	size_t len = 0;
290	size_t offset = 0;
291
292	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
293	len++;
294
295	return len;
296	}
297
298	/** Get number of characters in a wide string.
299	*
300	* @param str NULL-terminated wide string.
301	*
302	* @return Number of characters in @a str.
303	*
304	*/
305	size_t wstr_length(const wchar_t *wstr)
306	{
307	size_t len = 0;
308
309	while (*wstr++ != 0)
310	len++;
311
312	return len;
313	}
314
315	/** Get number of characters in a string with size limit.
316	*
317	* @param str NULL-terminated string.
318	* @param size Maximum number of bytes to consider.
319	*
320	* @return Number of characters in string.
321	*
322	*/
323	size_t str_nlength(const char *str, size_t size)
324	{
325	size_t len = 0;
326	size_t offset = 0;
327
328	while (str_decode(str, &offset, size) != 0)
329	len++;
330
331	return len;
332	}
333
334	/** Get number of characters in a string with size limit.
335	*
336	* @param str NULL-terminated string.
337	* @param size Maximum number of bytes to consider.
338	*
339	* @return Number of characters in string.
340	*
341	*/
342	size_t wstr_nlength(const wchar_t *str, size_t size)
343	{
344	size_t len = 0;
345	size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
346	size_t offset = 0;
347
348	while ((offset < limit) && (*str++ != 0)) {
349	len++;
350	offset += sizeof(wchar_t);
351	}
352
353	return len;
354	}
355
356	/** Check whether character is plain ASCII.
357	*
358	* @return True if character is plain ASCII.
359	*
360	*/
361	bool ascii_check(wchar_t ch)
362	{
363	if ((ch >= 0) && (ch <= 127))
364	return true;
365
366	return false;
367	}
368
369	/** Check whether wide string is plain ASCII.
370	*
371	* @return True if wide string is plain ASCII.
372	*
373	*/
374	bool wstr_is_ascii(const wchar_t *wstr)
375	{
376	while (wstr && ascii_check(wstr))
377	wstr++;
378	return *wstr == 0;
379	}
380
381	/** Check whether character is valid
382	*
383	* @return True if character is a valid Unicode code point.
384	*
385	*/
386	bool chr_check(wchar_t ch)
387	{
388	if ((ch >= 0) && (ch <= 1114111))
389	return true;
390
391	return false;
392	}
393
394	/** Compare two NULL terminated strings.
395	*
396	* Do a char-by-char comparison of two NULL-terminated strings.
397	* The strings are considered equal iff they consist of the same
398	* characters on the minimum of their lengths.
399	*
400	* @param s1 First string to compare.
401	* @param s2 Second string to compare.
402	*
403	* @return 0 if the strings are equal, -1 if first is smaller,
404	* 1 if second smaller.
405	*
406	*/
407	int str_cmp(const char s1, const char s2)
408	{
409	wchar_t c1 = 0;
410	wchar_t c2 = 0;
411
412	size_t off1 = 0;
413	size_t off2 = 0;
414
415	while (true) {
416	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
417	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
418
419	if (c1 < c2)
420	return -1;
421
422	if (c1 > c2)
423	return 1;
424
425	if (c1 == 0 \|\| c2 == 0)
426	break;
427	}
428
429	return 0;
430	}
431
432	/** Compare two NULL terminated strings with length limit.
433	*
434	* Do a char-by-char comparison of two NULL-terminated strings.
435	* The strings are considered equal iff they consist of the same
436	* characters on the minimum of their lengths and the length limit.
437	*
438	* @param s1 First string to compare.
439	* @param s2 Second string to compare.
440	* @param max_len Maximum number of characters to consider.
441	*
442	* @return 0 if the strings are equal, -1 if first is smaller,
443	* 1 if second smaller.
444	*
445	*/
446	int str_lcmp(const char s1, const char s2, size_t max_len)
447	{
448	wchar_t c1 = 0;
449	wchar_t c2 = 0;
450
451	size_t off1 = 0;
452	size_t off2 = 0;
453
454	size_t len = 0;
455
456	while (true) {
457	if (len >= max_len)
458	break;
459
460	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
461	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
462
463	if (c1 < c2)
464	return -1;
465
466	if (c1 > c2)
467	return 1;
468
469	if (c1 == 0 \|\| c2 == 0)
470	break;
471
472	++len;
473	}
474
475	return 0;
476
477	}
478
479	/** Copy string.
480	*
481	* Copy source string @a src to destination buffer @a dest.
482	* No more than @a size bytes are written. If the size of the output buffer
483	* is at least one byte, the output string will always be well-formed, i.e.
484	* null-terminated and containing only complete characters.
485	*
486	* @param dest Destination buffer.
487	* @param count Size of the destination buffer (must be > 0).
488	* @param src Source string.
489	*/
490	void str_cpy(char dest, size_t size, const char src)
491	{
492	/* There must be space for a null terminator in the buffer. */
493	assert(size > 0);
494
495	size_t src_off = 0;
496	size_t dest_off = 0;
497
498	wchar_t ch;
499	while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
500	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
501	break;
502	}
503
504	dest[dest_off] = '\0';
505	}
506
507	/** Copy size-limited substring.
508	*
509	* Copy prefix of string @a src of max. size @a size to destination buffer
510	* @a dest. No more than @a size bytes are written. The output string will
511	* always be well-formed, i.e. null-terminated and containing only complete
512	* characters.
513	*
514	* No more than @a n bytes are read from the input string, so it does not
515	* have to be null-terminated.
516	*
517	* @param dest Destination buffer.
518	* @param count Size of the destination buffer (must be > 0).
519	* @param src Source string.
520	* @param n Maximum number of bytes to read from @a src.
521	*/
522	void str_ncpy(char dest, size_t size, const char src, size_t n)
523	{
524	/* There must be space for a null terminator in the buffer. */
525	assert(size > 0);
526
527	size_t src_off = 0;
528	size_t dest_off = 0;
529
530	wchar_t ch;
531	while ((ch = str_decode(src, &src_off, n)) != 0) {
532	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
533	break;
534	}
535
536	dest[dest_off] = '\0';
537	}
538
539	/** Append one string to another.
540	*
541	* Append source string @a src to string in destination buffer @a dest.
542	* Size of the destination buffer is @a dest. If the size of the output buffer
543	* is at least one byte, the output string will always be well-formed, i.e.
544	* null-terminated and containing only complete characters.
545	*
546	* @param dest Destination buffer.
547	* @param count Size of the destination buffer.
548	* @param src Source string.
549	*/
550	void str_append(char dest, size_t size, const char src)
551	{
552	size_t dstr_size;
553
554	dstr_size = str_size(dest);
555	if (dstr_size >= size)
556	return;
557
558	str_cpy(dest + dstr_size, size - dstr_size, src);
559	}
560
561	/** Convert space-padded ASCII to string.
562	*
563	* Common legacy text encoding in hardware is 7-bit ASCII fitted into
564	* a fixed-with byte buffer (bit 7 always zero), right-padded with spaces
565	* (ASCII 0x20). Convert space-padded ascii to string representation.
566	*
567	* If the text does not fit into the destination buffer, the function converts
568	* as many characters as possible and returns EOVERFLOW.
569	*
570	* If the text contains non-ASCII bytes (with bit 7 set), the whole string is
571	* converted anyway and invalid characters are replaced with question marks
572	* (U_SPECIAL) and the function returns EIO.
573	*
574	* Regardless of return value upon return @a dest will always be well-formed.
575	*
576	* @param dest Destination buffer
577	* @param size Size of destination buffer
578	* @param src Space-padded ASCII.
579	* @param n Size of the source buffer in bytes.
580	*
581	* @return EOK on success, EOVERFLOW if the text does not fit
582	* destination buffer, EIO if the text contains
583	* non-ASCII bytes.
584	*/
585	int spascii_to_str(char dest, size_t size, const uint8_t src, size_t n)
586	{
587	size_t sidx;
588	size_t didx;
589	size_t dlast;
590	uint8_t byte;
591	int rc;
592	int result;
593
594	/* There must be space for a null terminator in the buffer. */
595	assert(size > 0);
596	result = EOK;
597
598	didx = 0;
599	dlast = 0;
600	for (sidx = 0; sidx < n; ++sidx) {
601	byte = src[sidx];
602	if (!ascii_check(byte)) {
603	byte = U_SPECIAL;
604	result = EIO;
605	}
606
607	rc = chr_encode(byte, dest, &didx, size - 1);
608	if (rc != EOK) {
609	assert(rc == EOVERFLOW);
610	dest[didx] = '\0';
611	return rc;
612	}
613
614	/* Remember dest index after last non-empty character */
615	if (byte != 0x20)
616	dlast = didx;
617	}
618
619	/* Terminate string after last non-empty character */
620	dest[dlast] = '\0';
621	return result;
622	}
623
624	/** Convert wide string to string.
625	*
626	* Convert wide string @a src to string. The output is written to the buffer
627	* specified by @a dest and @a size. @a size must be non-zero and the string
628	* written will always be well-formed.
629	*
630	* @param dest Destination buffer.
631	* @param size Size of the destination buffer.
632	* @param src Source wide string.
633	*
634	* @return EOK, if success, negative otherwise.
635	*/
636	int wstr_to_str(char dest, size_t size, const wchar_t src)
637	{
638	int rc;
639	wchar_t ch;
640	size_t src_idx;
641	size_t dest_off;
642
643	/* There must be space for a null terminator in the buffer. */
644	assert(size > 0);
645
646	src_idx = 0;
647	dest_off = 0;
648
649	while ((ch = src[src_idx++]) != 0) {
650	rc = chr_encode(ch, dest, &dest_off, size - 1);
651	if (rc != EOK)
652	break;
653	}
654
655	dest[dest_off] = '\0';
656	return rc;
657	}
658
659	/** Convert UTF16 string to string.
660	*
661	* Convert utf16 string @a src to string. The output is written to the buffer
662	* specified by @a dest and @a size. @a size must be non-zero and the string
663	* written will always be well-formed. Surrogate pairs also supported.
664	*
665	* @param dest Destination buffer.
666	* @param size Size of the destination buffer.
667	* @param src Source utf16 string.
668	*
669	* @return EOK, if success, negative otherwise.
670	*/
671	int utf16_to_str(char dest, size_t size, const uint16_t src)
672	{
673	size_t idx=0, dest_off=0;
674	wchar_t ch;
675	int rc = EOK;
676
677	/* There must be space for a null terminator in the buffer. */
678	assert(size > 0);
679
680	while (src[idx]) {
681	if ((src[idx] & 0xfc00) == 0xd800) {
682	if (src[idx+1] && (src[idx+1] & 0xfc00) == 0xdc00) {
683	ch = 0x10000;
684	ch += (src[idx] & 0x03FF) << 10;
685	ch += (src[idx+1] & 0x03FF);
686	idx += 2;
687	}
688	else
689	break;
690	} else {
691	ch = src[idx];
692	idx++;
693	}
694	rc = chr_encode(ch, dest, &dest_off, size-1);
695	if (rc != EOK)
696	break;
697	}
698	dest[dest_off] = '\0';
699	return rc;
700	}
701
702	int str_to_utf16(uint16_t dest, size_t size, const char src)
703	{
704	int rc=EOK;
705	size_t offset=0;
706	size_t idx=0;
707	wchar_t c;
708
709	assert(size > 0);
710
711	while ((c = str_decode(src, &offset, STR_NO_LIMIT)) != 0) {
712	if (c > 0x10000) {
713	if (idx+2 >= size-1) {
714	rc=EOVERFLOW;
715	break;
716	}
717	c = (c - 0x10000);
718	dest[idx] = 0xD800 \| (c >> 10);
719	dest[idx+1] = 0xDC00 \| (c & 0x3FF);
720	idx++;
721	} else {
722	dest[idx] = c;
723	}
724
725	idx++;
726	if (idx >= size-1) {
727	rc=EOVERFLOW;
728	break;
729	}
730	}
731
732	dest[idx] = '\0';
733	return rc;
734	}
735
736
737	/** Convert wide string to new string.
738	*
739	* Convert wide string @a src to string. Space for the new string is allocated
740	* on the heap.
741	*
742	* @param src Source wide string.
743	* @return New string.
744	*/
745	char wstr_to_astr(const wchar_t src)
746	{
747	char dbuf[STR_BOUNDS(1)];
748	char *str;
749	wchar_t ch;
750
751	size_t src_idx;
752	size_t dest_off;
753	size_t dest_size;
754
755	/* Compute size of encoded string. */
756
757	src_idx = 0;
758	dest_size = 0;
759
760	while ((ch = src[src_idx++]) != 0) {
761	dest_off = 0;
762	if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
763	break;
764	dest_size += dest_off;
765	}
766
767	str = malloc(dest_size + 1);
768	if (str == NULL)
769	return NULL;
770
771	/* Encode string. */
772
773	src_idx = 0;
774	dest_off = 0;
775
776	while ((ch = src[src_idx++]) != 0) {
777	if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
778	break;
779	}
780
781	str[dest_size] = '\0';
782	return str;
783	}
784
785
786	/** Convert string to wide string.
787	*
788	* Convert string @a src to wide string. The output is written to the
789	* buffer specified by @a dest and @a dlen. @a dlen must be non-zero
790	* and the wide string written will always be null-terminated.
791	*
792	* @param dest Destination buffer.
793	* @param dlen Length of destination buffer (number of wchars).
794	* @param src Source string.
795	*
796	* @return EOK, if success, negative otherwise.
797	*/
798	int str_to_wstr(wchar_t dest, size_t dlen, const char src)
799	{
800	int rc=EOK;
801	size_t offset;
802	size_t di;
803	wchar_t c;
804
805	assert(dlen > 0);
806
807	offset = 0;
808	di = 0;
809
810	do {
811	if (di >= dlen - 1) {
812	rc = EOVERFLOW;
813	break;
814	}
815
816	c = str_decode(src, &offset, STR_NO_LIMIT);
817	dest[di++] = c;
818	} while (c != '\0');
819
820	dest[dlen - 1] = '\0';
821	return rc;
822	}
823
824	/** Find first occurence of character in string.
825	*
826	* @param str String to search.
827	* @param ch Character to look for.
828	*
829	* @return Pointer to character in @a str or NULL if not found.
830	*/
831	char str_chr(const char str, wchar_t ch)
832	{
833	wchar_t acc;
834	size_t off = 0;
835	size_t last = 0;
836
837	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
838	if (acc == ch)
839	return (char *) (str + last);
840	last = off;
841	}
842
843	return NULL;
844	}
845
846	/** Find last occurence of character in string.
847	*
848	* @param str String to search.
849	* @param ch Character to look for.
850	*
851	* @return Pointer to character in @a str or NULL if not found.
852	*/
853	char str_rchr(const char str, wchar_t ch)
854	{
855	wchar_t acc;
856	size_t off = 0;
857	size_t last = 0;
858	const char *res = NULL;
859
860	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
861	if (acc == ch)
862	res = (str + last);
863	last = off;
864	}
865
866	return (char *) res;
867	}
868
869	/** Find first occurence of character in wide string.
870	*
871	* @param wstr String to search.
872	* @param ch Character to look for.
873	*
874	* @return Pointer to character in @a wstr or NULL if not found.
875	*/
876	wchar_t wstr_chr(const wchar_t wstr, wchar_t ch)
877	{
878	while (wstr && wstr != ch)
879	wstr++;
880	if (*wstr)
881	return (wchar_t *) wstr;
882	else
883	return NULL;
884	}
885
886	/** Find last occurence of character in wide string.
887	*
888	* @param wstr String to search.
889	* @param ch Character to look for.
890	*
891	* @return Pointer to character in @a wstr or NULL if not found.
892	*/
893	wchar_t wstr_rchr(const wchar_t wstr, wchar_t ch)
894	{
895	const wchar_t *res = NULL;
896	while (*wstr) {
897	if (*wstr == ch)
898	res = wstr;
899	wstr++;
900	}
901	return (wchar_t *) res;
902	}
903
904	/** Insert a wide character into a wide string.
905	*
906	* Insert a wide character into a wide string at position
907	* @a pos. The characters after the position are shifted.
908	*
909	* @param str String to insert to.
910	* @param ch Character to insert to.
911	* @param pos Character index where to insert.
912	@ @param max_pos Characters in the buffer.
913	*
914	* @return True if the insertion was sucessful, false if the position
915	* is out of bounds.
916	*
917	*/
918	bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
919	{
920	size_t len = wstr_length(str);
921
922	if ((pos > len) \|\| (pos + 1 > max_pos))
923	return false;
924
925	size_t i;
926	for (i = len; i + 1 > pos; i--)
927	str[i + 1] = str[i];
928
929	str[pos] = ch;
930
931	return true;
932	}
933
934	/** Remove a wide character from a wide string.
935	*
936	* Remove a wide character from a wide string at position
937	* @a pos. The characters after the position are shifted.
938	*
939	* @param str String to remove from.
940	* @param pos Character index to remove.
941	*
942	* @return True if the removal was sucessful, false if the position
943	* is out of bounds.
944	*
945	*/
946	bool wstr_remove(wchar_t *str, size_t pos)
947	{
948	size_t len = wstr_length(str);
949
950	if (pos >= len)
951	return false;
952
953	size_t i;
954	for (i = pos + 1; i <= len; i++)
955	str[i - 1] = str[i];
956
957	return true;
958	}
959
960	int stricmp(const char a, const char b)
961	{
962	int c = 0;
963
964	while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
965	c++;
966
967	return (tolower(a[c]) - tolower(b[c]));
968	}
969
970	/** Convert string to a number.
971	* Core of strtol and strtoul functions.
972	*
973	* @param nptr Pointer to string.
974	* @param endptr If not NULL, function stores here pointer to the first
975	* invalid character.
976	* @param base Zero or number between 2 and 36 inclusive.
977	* @param sgn It's set to 1 if minus found.
978	* @return Result of conversion.
979	*/
980	static unsigned long
981	_strtoul(const char nptr, char endptr, int base, char sgn)
982	{
983	unsigned char c;
984	unsigned long result = 0;
985	unsigned long a, b;
986	const char *str = nptr;
987	const char *tmpptr;
988
989	while (isspace(*str))
990	str++;
991
992	if (*str == '-') {
993	*sgn = 1;
994	++str;
995	} else if (*str == '+')
996	++str;
997
998	if (base) {
999	if ((base == 1) \|\| (base > 36)) {
1000	/* FIXME: set errno to EINVAL */
1001	return 0;
1002	}
1003	if ((base == 16) && (*str == '0') && ((str[1] == 'x') \|\|
1004	(str[1] == 'X'))) {
1005	str += 2;
1006	}
1007	} else {
1008	base = 10;
1009
1010	if (*str == '0') {
1011	base = 8;
1012	if ((str[1] == 'X') \|\| (str[1] == 'x')) {
1013	base = 16;
1014	str += 2;
1015	}
1016	}
1017	}
1018
1019	tmpptr = str;
1020
1021	while (*str) {
1022	c = *str;
1023	c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
1024	(c <= '9' ? c - '0' : 0xff)));
1025	if (c > base) {
1026	break;
1027	}
1028
1029	a = (result & 0xff) * base + c;
1030	b = (result >> 8) * base + (a >> 8);
1031
1032	if (b > (ULONG_MAX >> 8)) {
1033	/* overflow */
1034	/* FIXME: errno = ERANGE*/
1035	return ULONG_MAX;
1036	}
1037
1038	result = (b << 8) + (a & 0xff);
1039	++str;
1040	}
1041
1042	if (str == tmpptr) {
1043	/*
1044	* No number was found => first invalid character is the first
1045	* character of the string.
1046	*/
1047	/* FIXME: set errno to EINVAL */
1048	str = nptr;
1049	result = 0;
1050	}
1051
1052	if (endptr)
1053	endptr = (char ) str;
1054
1055	if (nptr == str) {
1056	/FIXME: errno = EINVAL/
1057	return 0;
1058	}
1059
1060	return result;
1061	}
1062
1063	/** Convert initial part of string to long int according to given base.
1064	* The number may begin with an arbitrary number of whitespaces followed by
1065	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
1066	* inserted and the number will be taken as hexadecimal one. If the base is 0
1067	* and the number begin with a zero, number will be taken as octal one (as with
1068	* base 8). Otherwise the base 0 is taken as decimal.
1069	*
1070	* @param nptr Pointer to string.
1071	* @param endptr If not NULL, function stores here pointer to the first
1072	* invalid character.
1073	* @param base Zero or number between 2 and 36 inclusive.
1074	* @return Result of conversion.
1075	*/
1076	long int strtol(const char nptr, char *endptr, int base)
1077	{
1078	char sgn = 0;
1079	unsigned long number = 0;
1080
1081	number = _strtoul(nptr, endptr, base, &sgn);
1082
1083	if (number > LONG_MAX) {
1084	if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
1085	/* FIXME: set 0 to errno */
1086	return number;
1087	}
1088	/* FIXME: set ERANGE to errno */
1089	return (sgn ? LONG_MIN : LONG_MAX);
1090	}
1091
1092	return (sgn ? -number : number);
1093	}
1094
1095	/** Duplicate string.
1096	*
1097	* Allocate a new string and copy characters from the source
1098	* string into it. The duplicate string is allocated via sleeping
1099	* malloc(), thus this function can sleep in no memory conditions.
1100	*
1101	* The allocation cannot fail and the return value is always
1102	* a valid pointer. The duplicate string is always a well-formed
1103	* null-terminated UTF-8 string, but it can differ from the source
1104	* string on the byte level.
1105	*
1106	* @param src Source string.
1107	*
1108	* @return Duplicate string.
1109	*
1110	*/
1111	char str_dup(const char src)
1112	{
1113	size_t size = str_size(src) + 1;
1114	char dest = (char ) malloc(size);
1115	if (dest == NULL)
1116	return (char *) NULL;
1117
1118	str_cpy(dest, size, src);
1119	return dest;
1120	}
1121
1122	/** Duplicate string with size limit.
1123	*
1124	* Allocate a new string and copy up to @max_size bytes from the source
1125	* string into it. The duplicate string is allocated via sleeping
1126	* malloc(), thus this function can sleep in no memory conditions.
1127	* No more than @max_size + 1 bytes is allocated, but if the size
1128	* occupied by the source string is smaller than @max_size + 1,
1129	* less is allocated.
1130	*
1131	* The allocation cannot fail and the return value is always
1132	* a valid pointer. The duplicate string is always a well-formed
1133	* null-terminated UTF-8 string, but it can differ from the source
1134	* string on the byte level.
1135	*
1136	* @param src Source string.
1137	* @param n Maximum number of bytes to duplicate.
1138	*
1139	* @return Duplicate string.
1140	*
1141	*/
1142	char str_ndup(const char src, size_t n)
1143	{
1144	size_t size = str_size(src);
1145	if (size > n)
1146	size = n;
1147
1148	char dest = (char ) malloc(size + 1);
1149	if (dest == NULL)
1150	return (char *) NULL;
1151
1152	str_ncpy(dest, size + 1, src, size);
1153	return dest;
1154	}
1155
1156	void str_reverse(char* begin, char* end)
1157	{
1158	char aux;
1159	while(end>begin)
1160	aux=end, end--=begin, begin++=aux;
1161	}
1162
1163	int size_t_str(size_t value, int base, char* str, size_t size)
1164	{
1165	static char num[] = "0123456789abcdefghijklmnopqrstuvwxyz";
1166	char* wstr=str;
1167
1168	if (size == 0)
1169	return EINVAL;
1170	if (base<2 \|\| base>35) {
1171	*str='\0';
1172	return EINVAL;
1173	}
1174
1175	do {
1176	*wstr++ = num[value % base];
1177	if (--size == 0)
1178	return EOVERFLOW;
1179	} while(value /= base);
1180	*wstr='\0';
1181
1182	// Reverse string
1183	str_reverse(str,wstr-1);
1184	return EOK;
1185	}
1186
1187	/** Convert initial part of string to unsigned long according to given base.
1188	* The number may begin with an arbitrary number of whitespaces followed by
1189	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
1190	* inserted and the number will be taken as hexadecimal one. If the base is 0
1191	* and the number begin with a zero, number will be taken as octal one (as with
1192	* base 8). Otherwise the base 0 is taken as decimal.
1193	*
1194	* @param nptr Pointer to string.
1195	* @param endptr If not NULL, function stores here pointer to the first
1196	* invalid character
1197	* @param base Zero or number between 2 and 36 inclusive.
1198	* @return Result of conversion.
1199	*/
1200	unsigned long strtoul(const char nptr, char *endptr, int base)
1201	{
1202	char sgn = 0;
1203	unsigned long number = 0;
1204
1205	number = _strtoul(nptr, endptr, base, &sgn);
1206
1207	return (sgn ? -number : number);
1208	}
1209
1210	char strtok(char s, const char *delim)
1211	{
1212	static char *next;
1213
1214	return strtok_r(s, delim, &next);
1215	}
1216
1217	char strtok_r(char s, const char delim, char *next)
1218	{
1219	char start, end;
1220
1221	if (s == NULL)
1222	s = *next;
1223
1224	/* Skip over leading delimiters. */
1225	while (s && (str_chr(delim, s) != NULL)) ++s;
1226	start = s;
1227
1228	/* Skip over token characters. */
1229	while (s && (str_chr(delim, s) == NULL)) ++s;
1230	end = s;
1231	next = (s ? s + 1 : s);
1232
1233	if (start == end) {
1234	return NULL; /* No more tokens. */
1235	}
1236
1237	/* Overwrite delimiter with NULL terminator. */
1238	*end = '\0';
1239	return start;
1240	}
1241
1242	/** Convert string to uint64_t (internal variant).
1243	*
1244	* @param nptr Pointer to string.
1245	* @param endptr Pointer to the first invalid character is stored here.
1246	* @param base Zero or number between 2 and 36 inclusive.
1247	* @param neg Indication of unary minus is stored here.
1248	* @apram result Result of the conversion.
1249	*
1250	* @return EOK if conversion was successful.
1251	*
1252	*/
1253	static int str_uint(const char nptr, char *endptr, unsigned int base,
1254	bool neg, uint64_t result)
1255	{
1256	assert(endptr != NULL);
1257	assert(neg != NULL);
1258	assert(result != NULL);
1259
1260	*neg = false;
1261	const char *str = nptr;
1262
1263	/* Ignore leading whitespace */
1264	while (isspace(*str))
1265	str++;
1266
1267	if (*str == '-') {
1268	*neg = true;
1269	str++;
1270	} else if (*str == '+')
1271	str++;
1272
1273	if (base == 0) {
1274	/* Decode base if not specified */
1275	base = 10;
1276
1277	if (*str == '0') {
1278	base = 8;
1279	str++;
1280
1281	switch (*str) {
1282	case 'b':
1283	case 'B':
1284	base = 2;
1285	str++;
1286	break;
1287	case 'o':
1288	case 'O':
1289	base = 8;
1290	str++;
1291	break;
1292	case 'd':
1293	case 'D':
1294	case 't':
1295	case 'T':
1296	base = 10;
1297	str++;
1298	break;
1299	case 'x':
1300	case 'X':
1301	base = 16;
1302	str++;
1303	break;
1304	default:
1305	str--;
1306	}
1307	}
1308	} else {
1309	/* Check base range */
1310	if ((base < 2) \|\| (base > 36)) {
1311	endptr = (char ) str;
1312	return EINVAL;
1313	}
1314	}
1315
1316	*result = 0;
1317	const char *startstr = str;
1318
1319	while (*str != 0) {
1320	unsigned int digit;
1321
1322	if ((str >= 'a') && (str <= 'z'))
1323	digit = *str - 'a' + 10;
1324	else if ((str >= 'A') && (str <= 'Z'))
1325	digit = *str - 'A' + 10;
1326	else if ((str >= '0') && (str <= '9'))
1327	digit = *str - '0';
1328	else
1329	break;
1330
1331	if (digit >= base)
1332	break;
1333
1334	uint64_t prev = *result;
1335	result = (result) * base + digit;
1336
1337	if (*result < prev) {
1338	/* Overflow */
1339	endptr = (char ) str;
1340	return EOVERFLOW;
1341	}
1342
1343	str++;
1344	}
1345
1346	if (str == startstr) {
1347	/*
1348	* No digits were decoded => first invalid character is
1349	* the first character of the string.
1350	*/
1351	str = nptr;
1352	}
1353
1354	endptr = (char ) str;
1355
1356	if (str == nptr)
1357	return EINVAL;
1358
1359	return EOK;
1360	}
1361
1362	/** Convert string to uint64_t.
1363	*
1364	* @param nptr Pointer to string.
1365	* @param endptr If not NULL, pointer to the first invalid character
1366	* is stored here.
1367	* @param base Zero or number between 2 and 36 inclusive.
1368	* @param strict Do not allow any trailing characters.
1369	* @param result Result of the conversion.
1370	*
1371	* @return EOK if conversion was successful.
1372	*
1373	*/
1374	int str_uint64(const char nptr, char *endptr, unsigned int base,
1375	bool strict, uint64_t *result)
1376	{
1377	assert(result != NULL);
1378
1379	bool neg;
1380	char *lendptr;
1381	int ret = str_uint(nptr, &lendptr, base, &neg, result);
1382
1383	if (endptr != NULL)
1384	endptr = (char ) lendptr;
1385
1386	if (ret != EOK)
1387	return ret;
1388
1389	/* Do not allow negative values */
1390	if (neg)
1391	return EINVAL;
1392
1393	/* Check whether we are at the end of
1394	the string in strict mode */
1395	if ((strict) && (*lendptr != 0))
1396	return EINVAL;
1397
1398	return EOK;
1399	}
1400
1401	/** Convert string to size_t.
1402	*
1403	* @param nptr Pointer to string.
1404	* @param endptr If not NULL, pointer to the first invalid character
1405	* is stored here.
1406	* @param base Zero or number between 2 and 36 inclusive.
1407	* @param strict Do not allow any trailing characters.
1408	* @param result Result of the conversion.
1409	*
1410	* @return EOK if conversion was successful.
1411	*
1412	*/
1413	int str_size_t(const char nptr, char *endptr, unsigned int base,
1414	bool strict, size_t *result)
1415	{
1416	assert(result != NULL);
1417
1418	bool neg;
1419	char *lendptr;
1420	uint64_t res;
1421	int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1422
1423	if (endptr != NULL)
1424	endptr = (char ) lendptr;
1425
1426	if (ret != EOK)
1427	return ret;
1428
1429	/* Do not allow negative values */
1430	if (neg)
1431	return EINVAL;
1432
1433	/* Check whether we are at the end of
1434	the string in strict mode */
1435	if ((strict) && (*lendptr != 0))
1436	return EINVAL;
1437
1438	/* Check for overflow */
1439	size_t _res = (size_t) res;
1440	if (_res != res)
1441	return EOVERFLOW;
1442
1443	*result = _res;
1444
1445	return EOK;
1446	}
1447
1448	void order_suffix(const uint64_t val, uint64_t rv, char suffix)
1449	{
1450	if (val > UINT64_C(10000000000000000000)) {
1451	*rv = val / UINT64_C(1000000000000000000);
1452	*suffix = 'Z';
1453	} else if (val > UINT64_C(1000000000000000000)) {
1454	*rv = val / UINT64_C(1000000000000000);
1455	*suffix = 'E';
1456	} else if (val > UINT64_C(1000000000000000)) {
1457	*rv = val / UINT64_C(1000000000000);
1458	*suffix = 'T';
1459	} else if (val > UINT64_C(1000000000000)) {
1460	*rv = val / UINT64_C(1000000000);
1461	*suffix = 'G';
1462	} else if (val > UINT64_C(1000000000)) {
1463	*rv = val / UINT64_C(1000000);
1464	*suffix = 'M';
1465	} else if (val > UINT64_C(1000000)) {
1466	*rv = val / UINT64_C(1000);
1467	*suffix = 'k';
1468	} else {
1469	*rv = val;
1470	*suffix = ' ';
1471	}
1472	}
1473
1474	void bin_order_suffix(const uint64_t val, uint64_t rv, const char *suffix,
1475	bool fixed)
1476	{
1477	if (val > UINT64_C(1152921504606846976)) {
1478	*rv = val / UINT64_C(1125899906842624);
1479	*suffix = "EiB";
1480	} else if (val > UINT64_C(1125899906842624)) {
1481	*rv = val / UINT64_C(1099511627776);
1482	*suffix = "TiB";
1483	} else if (val > UINT64_C(1099511627776)) {
1484	*rv = val / UINT64_C(1073741824);
1485	*suffix = "GiB";
1486	} else if (val > UINT64_C(1073741824)) {
1487	*rv = val / UINT64_C(1048576);
1488	*suffix = "MiB";
1489	} else if (val > UINT64_C(1048576)) {
1490	*rv = val / UINT64_C(1024);
1491	*suffix = "KiB";
1492	} else {
1493	*rv = val;
1494	if (fixed)
1495	*suffix = "B ";
1496	else
1497	*suffix = "B";
1498	}
1499	}
1500
1501	/** @}
1502	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: