Context Navigation

source: mainline/uspace/lib/c/generic/str.c@ c065743

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since c065743 was c065743, checked in by Jakub Jermar <jakub@…>, 14 years ago
Remove str_reverse() and size_t_str().
Property mode set to `100644`
File size: 33.4 KB

Line
1	/*
2	* Copyright (c) 2005 Martin Decky
3	* Copyright (c) 2008 Jiri Svoboda
4	* Copyright (c) 2011 Martin Sucha
5	* Copyright (c) 2011 Oleg Romanenko
6	* All rights reserved.
7	*
8	* Redistribution and use in source and binary forms, with or without
9	* modification, are permitted provided that the following conditions
10	* are met:
11	*
12	* - Redistributions of source code must retain the above copyright
13	* notice, this list of conditions and the following disclaimer.
14	* - Redistributions in binary form must reproduce the above copyright
15	* notice, this list of conditions and the following disclaimer in the
16	* documentation and/or other materials provided with the distribution.
17	* - The name of the author may not be used to endorse or promote products
18	* derived from this software without specific prior written permission.
19	*
20	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30	*/
31
32	/** @addtogroup libc
33	* @{
34	*/
35	/** @file
36	*/
37
38	#include <str.h>
39	#include <stdlib.h>
40	#include <assert.h>
41	#include <stdint.h>
42	#include <ctype.h>
43	#include <malloc.h>
44	#include <errno.h>
45	#include <align.h>
46	#include <mem.h>
47	#include <str.h>
48
49	/** Byte mask consisting of lowest @n bits (out of 8) */
50	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
51
52	/** Byte mask consisting of lowest @n bits (out of 32) */
53	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
54
55	/** Byte mask consisting of highest @n bits (out of 8) */
56	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
57
58	/** Number of data bits in a UTF-8 continuation byte */
59	#define CONT_BITS 6
60
61	/** Decode a single character from a string.
62	*
63	* Decode a single character from a string of size @a size. Decoding starts
64	* at @a offset and this offset is moved to the beginning of the next
65	* character. In case of decoding error, offset generally advances at least
66	* by one. However, offset is never moved beyond size.
67	*
68	* @param str String (not necessarily NULL-terminated).
69	* @param offset Byte offset in string where to start decoding.
70	* @param size Size of the string (in bytes).
71	*
72	* @return Value of decoded character, U_SPECIAL on decoding error or
73	* NULL if attempt to decode beyond @a size.
74	*
75	*/
76	wchar_t str_decode(const char str, size_t offset, size_t size)
77	{
78	if (*offset + 1 > size)
79	return 0;
80
81	/* First byte read from string */
82	uint8_t b0 = (uint8_t) str[(*offset)++];
83
84	/* Determine code length */
85
86	unsigned int b0_bits; /* Data bits in first byte */
87	unsigned int cbytes; /* Number of continuation bytes */
88
89	if ((b0 & 0x80) == 0) {
90	/* 0xxxxxxx (Plain ASCII) */
91	b0_bits = 7;
92	cbytes = 0;
93	} else if ((b0 & 0xe0) == 0xc0) {
94	/* 110xxxxx 10xxxxxx */
95	b0_bits = 5;
96	cbytes = 1;
97	} else if ((b0 & 0xf0) == 0xe0) {
98	/* 1110xxxx 10xxxxxx 10xxxxxx */
99	b0_bits = 4;
100	cbytes = 2;
101	} else if ((b0 & 0xf8) == 0xf0) {
102	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
103	b0_bits = 3;
104	cbytes = 3;
105	} else {
106	/* 10xxxxxx -- unexpected continuation byte */
107	return U_SPECIAL;
108	}
109
110	if (*offset + cbytes > size)
111	return U_SPECIAL;
112
113	wchar_t ch = b0 & LO_MASK_8(b0_bits);
114
115	/* Decode continuation bytes */
116	while (cbytes > 0) {
117	uint8_t b = (uint8_t) str[(*offset)++];
118
119	/* Must be 10xxxxxx */
120	if ((b & 0xc0) != 0x80)
121	return U_SPECIAL;
122
123	/* Shift data bits to ch */
124	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
125	cbytes--;
126	}
127
128	return ch;
129	}
130
131	/** Encode a single character to string representation.
132	*
133	* Encode a single character to string representation (i.e. UTF-8) and store
134	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
135	* is moved to the position where the next character can be written to.
136	*
137	* @param ch Input character.
138	* @param str Output buffer.
139	* @param offset Byte offset where to start writing.
140	* @param size Size of the output buffer (in bytes).
141	*
142	* @return EOK if the character was encoded successfully, EOVERFLOW if there
143	* was not enough space in the output buffer or EINVAL if the character
144	* code was invalid.
145	*/
146	int chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
147	{
148	if (*offset >= size)
149	return EOVERFLOW;
150
151	if (!chr_check(ch))
152	return EINVAL;
153
154	/* Unsigned version of ch (bit operations should only be done
155	on unsigned types). */
156	uint32_t cc = (uint32_t) ch;
157
158	/* Determine how many continuation bytes are needed */
159
160	unsigned int b0_bits; /* Data bits in first byte */
161	unsigned int cbytes; /* Number of continuation bytes */
162
163	if ((cc & ~LO_MASK_32(7)) == 0) {
164	b0_bits = 7;
165	cbytes = 0;
166	} else if ((cc & ~LO_MASK_32(11)) == 0) {
167	b0_bits = 5;
168	cbytes = 1;
169	} else if ((cc & ~LO_MASK_32(16)) == 0) {
170	b0_bits = 4;
171	cbytes = 2;
172	} else if ((cc & ~LO_MASK_32(21)) == 0) {
173	b0_bits = 3;
174	cbytes = 3;
175	} else {
176	/* Codes longer than 21 bits are not supported */
177	return EINVAL;
178	}
179
180	/* Check for available space in buffer */
181	if (*offset + cbytes >= size)
182	return EOVERFLOW;
183
184	/* Encode continuation bytes */
185	unsigned int i;
186	for (i = cbytes; i > 0; i--) {
187	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
188	cc = cc >> CONT_BITS;
189	}
190
191	/* Encode first byte */
192	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
193
194	/* Advance offset */
195	*offset += cbytes + 1;
196
197	return EOK;
198	}
199
200	/** Get size of string.
201	*
202	* Get the number of bytes which are used by the string @a str (excluding the
203	* NULL-terminator).
204	*
205	* @param str String to consider.
206	*
207	* @return Number of bytes used by the string
208	*
209	*/
210	size_t str_size(const char *str)
211	{
212	size_t size = 0;
213
214	while (*str++ != 0)
215	size++;
216
217	return size;
218	}
219
220	/** Get size of wide string.
221	*
222	* Get the number of bytes which are used by the wide string @a str (excluding the
223	* NULL-terminator).
224	*
225	* @param str Wide string to consider.
226	*
227	* @return Number of bytes used by the wide string
228	*
229	*/
230	size_t wstr_size(const wchar_t *str)
231	{
232	return (wstr_length(str) * sizeof(wchar_t));
233	}
234
235	/** Get size of string with length limit.
236	*
237	* Get the number of bytes which are used by up to @a max_len first
238	* characters in the string @a str. If @a max_len is greater than
239	* the length of @a str, the entire string is measured (excluding the
240	* NULL-terminator).
241	*
242	* @param str String to consider.
243	* @param max_len Maximum number of characters to measure.
244	*
245	* @return Number of bytes used by the characters.
246	*
247	*/
248	size_t str_lsize(const char *str, size_t max_len)
249	{
250	size_t len = 0;
251	size_t offset = 0;
252
253	while (len < max_len) {
254	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
255	break;
256
257	len++;
258	}
259
260	return offset;
261	}
262
263	/** Get size of wide string with length limit.
264	*
265	* Get the number of bytes which are used by up to @a max_len first
266	* wide characters in the wide string @a str. If @a max_len is greater than
267	* the length of @a str, the entire wide string is measured (excluding the
268	* NULL-terminator).
269	*
270	* @param str Wide string to consider.
271	* @param max_len Maximum number of wide characters to measure.
272	*
273	* @return Number of bytes used by the wide characters.
274	*
275	*/
276	size_t wstr_lsize(const wchar_t *str, size_t max_len)
277	{
278	return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
279	}
280
281	/** Get number of characters in a string.
282	*
283	* @param str NULL-terminated string.
284	*
285	* @return Number of characters in string.
286	*
287	*/
288	size_t str_length(const char *str)
289	{
290	size_t len = 0;
291	size_t offset = 0;
292
293	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
294	len++;
295
296	return len;
297	}
298
299	/** Get number of characters in a wide string.
300	*
301	* @param str NULL-terminated wide string.
302	*
303	* @return Number of characters in @a str.
304	*
305	*/
306	size_t wstr_length(const wchar_t *wstr)
307	{
308	size_t len = 0;
309
310	while (*wstr++ != 0)
311	len++;
312
313	return len;
314	}
315
316	/** Get number of characters in a string with size limit.
317	*
318	* @param str NULL-terminated string.
319	* @param size Maximum number of bytes to consider.
320	*
321	* @return Number of characters in string.
322	*
323	*/
324	size_t str_nlength(const char *str, size_t size)
325	{
326	size_t len = 0;
327	size_t offset = 0;
328
329	while (str_decode(str, &offset, size) != 0)
330	len++;
331
332	return len;
333	}
334
335	/** Get number of characters in a string with size limit.
336	*
337	* @param str NULL-terminated string.
338	* @param size Maximum number of bytes to consider.
339	*
340	* @return Number of characters in string.
341	*
342	*/
343	size_t wstr_nlength(const wchar_t *str, size_t size)
344	{
345	size_t len = 0;
346	size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
347	size_t offset = 0;
348
349	while ((offset < limit) && (*str++ != 0)) {
350	len++;
351	offset += sizeof(wchar_t);
352	}
353
354	return len;
355	}
356
357	/** Check whether character is plain ASCII.
358	*
359	* @return True if character is plain ASCII.
360	*
361	*/
362	bool ascii_check(wchar_t ch)
363	{
364	if ((ch >= 0) && (ch <= 127))
365	return true;
366
367	return false;
368	}
369
370	/** Check whether character is valid
371	*
372	* @return True if character is a valid Unicode code point.
373	*
374	*/
375	bool chr_check(wchar_t ch)
376	{
377	if ((ch >= 0) && (ch <= 1114111))
378	return true;
379
380	return false;
381	}
382
383	/** Compare two NULL terminated strings.
384	*
385	* Do a char-by-char comparison of two NULL-terminated strings.
386	* The strings are considered equal iff they consist of the same
387	* characters on the minimum of their lengths.
388	*
389	* @param s1 First string to compare.
390	* @param s2 Second string to compare.
391	*
392	* @return 0 if the strings are equal, -1 if first is smaller,
393	* 1 if second smaller.
394	*
395	*/
396	int str_cmp(const char s1, const char s2)
397	{
398	wchar_t c1 = 0;
399	wchar_t c2 = 0;
400
401	size_t off1 = 0;
402	size_t off2 = 0;
403
404	while (true) {
405	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
406	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
407
408	if (c1 < c2)
409	return -1;
410
411	if (c1 > c2)
412	return 1;
413
414	if (c1 == 0 \|\| c2 == 0)
415	break;
416	}
417
418	return 0;
419	}
420
421	/** Compare two NULL terminated strings with length limit.
422	*
423	* Do a char-by-char comparison of two NULL-terminated strings.
424	* The strings are considered equal iff they consist of the same
425	* characters on the minimum of their lengths and the length limit.
426	*
427	* @param s1 First string to compare.
428	* @param s2 Second string to compare.
429	* @param max_len Maximum number of characters to consider.
430	*
431	* @return 0 if the strings are equal, -1 if first is smaller,
432	* 1 if second smaller.
433	*
434	*/
435	int str_lcmp(const char s1, const char s2, size_t max_len)
436	{
437	wchar_t c1 = 0;
438	wchar_t c2 = 0;
439
440	size_t off1 = 0;
441	size_t off2 = 0;
442
443	size_t len = 0;
444
445	while (true) {
446	if (len >= max_len)
447	break;
448
449	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
450	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
451
452	if (c1 < c2)
453	return -1;
454
455	if (c1 > c2)
456	return 1;
457
458	if (c1 == 0 \|\| c2 == 0)
459	break;
460
461	++len;
462	}
463
464	return 0;
465
466	}
467
468	/** Copy string.
469	*
470	* Copy source string @a src to destination buffer @a dest.
471	* No more than @a size bytes are written. If the size of the output buffer
472	* is at least one byte, the output string will always be well-formed, i.e.
473	* null-terminated and containing only complete characters.
474	*
475	* @param dest Destination buffer.
476	* @param count Size of the destination buffer (must be > 0).
477	* @param src Source string.
478	*/
479	void str_cpy(char dest, size_t size, const char src)
480	{
481	/* There must be space for a null terminator in the buffer. */
482	assert(size > 0);
483
484	size_t src_off = 0;
485	size_t dest_off = 0;
486
487	wchar_t ch;
488	while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
489	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
490	break;
491	}
492
493	dest[dest_off] = '\0';
494	}
495
496	/** Copy size-limited substring.
497	*
498	* Copy prefix of string @a src of max. size @a size to destination buffer
499	* @a dest. No more than @a size bytes are written. The output string will
500	* always be well-formed, i.e. null-terminated and containing only complete
501	* characters.
502	*
503	* No more than @a n bytes are read from the input string, so it does not
504	* have to be null-terminated.
505	*
506	* @param dest Destination buffer.
507	* @param count Size of the destination buffer (must be > 0).
508	* @param src Source string.
509	* @param n Maximum number of bytes to read from @a src.
510	*/
511	void str_ncpy(char dest, size_t size, const char src, size_t n)
512	{
513	/* There must be space for a null terminator in the buffer. */
514	assert(size > 0);
515
516	size_t src_off = 0;
517	size_t dest_off = 0;
518
519	wchar_t ch;
520	while ((ch = str_decode(src, &src_off, n)) != 0) {
521	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
522	break;
523	}
524
525	dest[dest_off] = '\0';
526	}
527
528	/** Append one string to another.
529	*
530	* Append source string @a src to string in destination buffer @a dest.
531	* Size of the destination buffer is @a dest. If the size of the output buffer
532	* is at least one byte, the output string will always be well-formed, i.e.
533	* null-terminated and containing only complete characters.
534	*
535	* @param dest Destination buffer.
536	* @param count Size of the destination buffer.
537	* @param src Source string.
538	*/
539	void str_append(char dest, size_t size, const char src)
540	{
541	size_t dstr_size;
542
543	dstr_size = str_size(dest);
544	if (dstr_size >= size)
545	return;
546
547	str_cpy(dest + dstr_size, size - dstr_size, src);
548	}
549
550	/** Convert space-padded ASCII to string.
551	*
552	* Common legacy text encoding in hardware is 7-bit ASCII fitted into
553	* a fixed-with byte buffer (bit 7 always zero), right-padded with spaces
554	* (ASCII 0x20). Convert space-padded ascii to string representation.
555	*
556	* If the text does not fit into the destination buffer, the function converts
557	* as many characters as possible and returns EOVERFLOW.
558	*
559	* If the text contains non-ASCII bytes (with bit 7 set), the whole string is
560	* converted anyway and invalid characters are replaced with question marks
561	* (U_SPECIAL) and the function returns EIO.
562	*
563	* Regardless of return value upon return @a dest will always be well-formed.
564	*
565	* @param dest Destination buffer
566	* @param size Size of destination buffer
567	* @param src Space-padded ASCII.
568	* @param n Size of the source buffer in bytes.
569	*
570	* @return EOK on success, EOVERFLOW if the text does not fit
571	* destination buffer, EIO if the text contains
572	* non-ASCII bytes.
573	*/
574	int spascii_to_str(char dest, size_t size, const uint8_t src, size_t n)
575	{
576	size_t sidx;
577	size_t didx;
578	size_t dlast;
579	uint8_t byte;
580	int rc;
581	int result;
582
583	/* There must be space for a null terminator in the buffer. */
584	assert(size > 0);
585	result = EOK;
586
587	didx = 0;
588	dlast = 0;
589	for (sidx = 0; sidx < n; ++sidx) {
590	byte = src[sidx];
591	if (!ascii_check(byte)) {
592	byte = U_SPECIAL;
593	result = EIO;
594	}
595
596	rc = chr_encode(byte, dest, &didx, size - 1);
597	if (rc != EOK) {
598	assert(rc == EOVERFLOW);
599	dest[didx] = '\0';
600	return rc;
601	}
602
603	/* Remember dest index after last non-empty character */
604	if (byte != 0x20)
605	dlast = didx;
606	}
607
608	/* Terminate string after last non-empty character */
609	dest[dlast] = '\0';
610	return result;
611	}
612
613	/** Convert wide string to string.
614	*
615	* Convert wide string @a src to string. The output is written to the buffer
616	* specified by @a dest and @a size. @a size must be non-zero and the string
617	* written will always be well-formed.
618	*
619	* @param dest Destination buffer.
620	* @param size Size of the destination buffer.
621	* @param src Source wide string.
622	*
623	* @return EOK, if success, negative otherwise.
624	*/
625	int wstr_to_str(char dest, size_t size, const wchar_t src)
626	{
627	int rc;
628	wchar_t ch;
629	size_t src_idx;
630	size_t dest_off;
631
632	/* There must be space for a null terminator in the buffer. */
633	assert(size > 0);
634
635	src_idx = 0;
636	dest_off = 0;
637
638	while ((ch = src[src_idx++]) != 0) {
639	rc = chr_encode(ch, dest, &dest_off, size - 1);
640	if (rc != EOK)
641	break;
642	}
643
644	dest[dest_off] = '\0';
645	return rc;
646	}
647
648	/** Convert UTF16 string to string.
649	*
650	* Convert utf16 string @a src to string. The output is written to the buffer
651	* specified by @a dest and @a size. @a size must be non-zero and the string
652	* written will always be well-formed. Surrogate pairs also supported.
653	*
654	* @param dest Destination buffer.
655	* @param size Size of the destination buffer.
656	* @param src Source utf16 string.
657	*
658	* @return EOK, if success, negative otherwise.
659	*/
660	int utf16_to_str(char dest, size_t size, const uint16_t src)
661	{
662	size_t idx=0, dest_off=0;
663	wchar_t ch;
664	int rc = EOK;
665
666	/* There must be space for a null terminator in the buffer. */
667	assert(size > 0);
668
669	while (src[idx]) {
670	if ((src[idx] & 0xfc00) == 0xd800) {
671	if (src[idx+1] && (src[idx+1] & 0xfc00) == 0xdc00) {
672	ch = 0x10000;
673	ch += (src[idx] & 0x03FF) << 10;
674	ch += (src[idx+1] & 0x03FF);
675	idx += 2;
676	}
677	else
678	break;
679	} else {
680	ch = src[idx];
681	idx++;
682	}
683	rc = chr_encode(ch, dest, &dest_off, size-1);
684	if (rc != EOK)
685	break;
686	}
687	dest[dest_off] = '\0';
688	return rc;
689	}
690
691	int str_to_utf16(uint16_t dest, size_t size, const char src)
692	{
693	int rc=EOK;
694	size_t offset=0;
695	size_t idx=0;
696	wchar_t c;
697
698	assert(size > 0);
699
700	while ((c = str_decode(src, &offset, STR_NO_LIMIT)) != 0) {
701	if (c > 0x10000) {
702	if (idx+2 >= size-1) {
703	rc=EOVERFLOW;
704	break;
705	}
706	c = (c - 0x10000);
707	dest[idx] = 0xD800 \| (c >> 10);
708	dest[idx+1] = 0xDC00 \| (c & 0x3FF);
709	idx++;
710	} else {
711	dest[idx] = c;
712	}
713
714	idx++;
715	if (idx >= size-1) {
716	rc=EOVERFLOW;
717	break;
718	}
719	}
720
721	dest[idx] = '\0';
722	return rc;
723	}
724
725
726	/** Convert wide string to new string.
727	*
728	* Convert wide string @a src to string. Space for the new string is allocated
729	* on the heap.
730	*
731	* @param src Source wide string.
732	* @return New string.
733	*/
734	char wstr_to_astr(const wchar_t src)
735	{
736	char dbuf[STR_BOUNDS(1)];
737	char *str;
738	wchar_t ch;
739
740	size_t src_idx;
741	size_t dest_off;
742	size_t dest_size;
743
744	/* Compute size of encoded string. */
745
746	src_idx = 0;
747	dest_size = 0;
748
749	while ((ch = src[src_idx++]) != 0) {
750	dest_off = 0;
751	if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
752	break;
753	dest_size += dest_off;
754	}
755
756	str = malloc(dest_size + 1);
757	if (str == NULL)
758	return NULL;
759
760	/* Encode string. */
761
762	src_idx = 0;
763	dest_off = 0;
764
765	while ((ch = src[src_idx++]) != 0) {
766	if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
767	break;
768	}
769
770	str[dest_size] = '\0';
771	return str;
772	}
773
774
775	/** Convert string to wide string.
776	*
777	* Convert string @a src to wide string. The output is written to the
778	* buffer specified by @a dest and @a dlen. @a dlen must be non-zero
779	* and the wide string written will always be null-terminated.
780	*
781	* @param dest Destination buffer.
782	* @param dlen Length of destination buffer (number of wchars).
783	* @param src Source string.
784	*
785	* @return EOK, if success, negative otherwise.
786	*/
787	int str_to_wstr(wchar_t dest, size_t dlen, const char src)
788	{
789	int rc=EOK;
790	size_t offset;
791	size_t di;
792	wchar_t c;
793
794	assert(dlen > 0);
795
796	offset = 0;
797	di = 0;
798
799	do {
800	if (di >= dlen - 1) {
801	rc = EOVERFLOW;
802	break;
803	}
804
805	c = str_decode(src, &offset, STR_NO_LIMIT);
806	dest[di++] = c;
807	} while (c != '\0');
808
809	dest[dlen - 1] = '\0';
810	return rc;
811	}
812
813	/** Convert string to wide string.
814	*
815	* Convert string @a src to wide string. A new wide NULL-terminated
816	* string will be allocated on the heap.
817	*
818	* @param src Source string.
819	*/
820	wchar_t str_to_awstr(const char str)
821	{
822	size_t len = str_length(str);
823
824	wchar_t *wstr = calloc(len+1, sizeof(wchar_t));
825	if (wstr == NULL)
826	return NULL;
827
828	str_to_wstr(wstr, len + 1, str);
829	return wstr;
830	}
831
832	/** Find first occurence of character in string.
833	*
834	* @param str String to search.
835	* @param ch Character to look for.
836	*
837	* @return Pointer to character in @a str or NULL if not found.
838	*/
839	char str_chr(const char str, wchar_t ch)
840	{
841	wchar_t acc;
842	size_t off = 0;
843	size_t last = 0;
844
845	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
846	if (acc == ch)
847	return (char *) (str + last);
848	last = off;
849	}
850
851	return NULL;
852	}
853
854	/** Find last occurence of character in string.
855	*
856	* @param str String to search.
857	* @param ch Character to look for.
858	*
859	* @return Pointer to character in @a str or NULL if not found.
860	*/
861	char str_rchr(const char str, wchar_t ch)
862	{
863	wchar_t acc;
864	size_t off = 0;
865	size_t last = 0;
866	const char *res = NULL;
867
868	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
869	if (acc == ch)
870	res = (str + last);
871	last = off;
872	}
873
874	return (char *) res;
875	}
876
877	/** Find first occurence of character in wide string.
878	*
879	* @param wstr String to search.
880	* @param ch Character to look for.
881	*
882	* @return Pointer to character in @a wstr or NULL if not found.
883	*/
884	wchar_t wstr_chr(const wchar_t wstr, wchar_t ch)
885	{
886	while (wstr && wstr != ch)
887	wstr++;
888	if (*wstr)
889	return (wchar_t *) wstr;
890	else
891	return NULL;
892	}
893
894	/** Find last occurence of character in wide string.
895	*
896	* @param wstr String to search.
897	* @param ch Character to look for.
898	*
899	* @return Pointer to character in @a wstr or NULL if not found.
900	*/
901	wchar_t wstr_rchr(const wchar_t wstr, wchar_t ch)
902	{
903	const wchar_t *res = NULL;
904	while (*wstr) {
905	if (*wstr == ch)
906	res = wstr;
907	wstr++;
908	}
909	return (wchar_t *) res;
910	}
911
912	/** Insert a wide character into a wide string.
913	*
914	* Insert a wide character into a wide string at position
915	* @a pos. The characters after the position are shifted.
916	*
917	* @param str String to insert to.
918	* @param ch Character to insert to.
919	* @param pos Character index where to insert.
920	@ @param max_pos Characters in the buffer.
921	*
922	* @return True if the insertion was sucessful, false if the position
923	* is out of bounds.
924	*
925	*/
926	bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
927	{
928	size_t len = wstr_length(str);
929
930	if ((pos > len) \|\| (pos + 1 > max_pos))
931	return false;
932
933	size_t i;
934	for (i = len; i + 1 > pos; i--)
935	str[i + 1] = str[i];
936
937	str[pos] = ch;
938
939	return true;
940	}
941
942	/** Remove a wide character from a wide string.
943	*
944	* Remove a wide character from a wide string at position
945	* @a pos. The characters after the position are shifted.
946	*
947	* @param str String to remove from.
948	* @param pos Character index to remove.
949	*
950	* @return True if the removal was sucessful, false if the position
951	* is out of bounds.
952	*
953	*/
954	bool wstr_remove(wchar_t *str, size_t pos)
955	{
956	size_t len = wstr_length(str);
957
958	if (pos >= len)
959	return false;
960
961	size_t i;
962	for (i = pos + 1; i <= len; i++)
963	str[i - 1] = str[i];
964
965	return true;
966	}
967
968	int stricmp(const char a, const char b)
969	{
970	int c = 0;
971
972	while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
973	c++;
974
975	return (tolower(a[c]) - tolower(b[c]));
976	}
977
978	/** Convert string to a number.
979	* Core of strtol and strtoul functions.
980	*
981	* @param nptr Pointer to string.
982	* @param endptr If not NULL, function stores here pointer to the first
983	* invalid character.
984	* @param base Zero or number between 2 and 36 inclusive.
985	* @param sgn It's set to 1 if minus found.
986	* @return Result of conversion.
987	*/
988	static unsigned long
989	_strtoul(const char nptr, char endptr, int base, char sgn)
990	{
991	unsigned char c;
992	unsigned long result = 0;
993	unsigned long a, b;
994	const char *str = nptr;
995	const char *tmpptr;
996
997	while (isspace(*str))
998	str++;
999
1000	if (*str == '-') {
1001	*sgn = 1;
1002	++str;
1003	} else if (*str == '+')
1004	++str;
1005
1006	if (base) {
1007	if ((base == 1) \|\| (base > 36)) {
1008	/* FIXME: set errno to EINVAL */
1009	return 0;
1010	}
1011	if ((base == 16) && (*str == '0') && ((str[1] == 'x') \|\|
1012	(str[1] == 'X'))) {
1013	str += 2;
1014	}
1015	} else {
1016	base = 10;
1017
1018	if (*str == '0') {
1019	base = 8;
1020	if ((str[1] == 'X') \|\| (str[1] == 'x')) {
1021	base = 16;
1022	str += 2;
1023	}
1024	}
1025	}
1026
1027	tmpptr = str;
1028
1029	while (*str) {
1030	c = *str;
1031	c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
1032	(c <= '9' ? c - '0' : 0xff)));
1033	if (c > base) {
1034	break;
1035	}
1036
1037	a = (result & 0xff) * base + c;
1038	b = (result >> 8) * base + (a >> 8);
1039
1040	if (b > (ULONG_MAX >> 8)) {
1041	/* overflow */
1042	/* FIXME: errno = ERANGE*/
1043	return ULONG_MAX;
1044	}
1045
1046	result = (b << 8) + (a & 0xff);
1047	++str;
1048	}
1049
1050	if (str == tmpptr) {
1051	/*
1052	* No number was found => first invalid character is the first
1053	* character of the string.
1054	*/
1055	/* FIXME: set errno to EINVAL */
1056	str = nptr;
1057	result = 0;
1058	}
1059
1060	if (endptr)
1061	endptr = (char ) str;
1062
1063	if (nptr == str) {
1064	/FIXME: errno = EINVAL/
1065	return 0;
1066	}
1067
1068	return result;
1069	}
1070
1071	/** Convert initial part of string to long int according to given base.
1072	* The number may begin with an arbitrary number of whitespaces followed by
1073	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
1074	* inserted and the number will be taken as hexadecimal one. If the base is 0
1075	* and the number begin with a zero, number will be taken as octal one (as with
1076	* base 8). Otherwise the base 0 is taken as decimal.
1077	*
1078	* @param nptr Pointer to string.
1079	* @param endptr If not NULL, function stores here pointer to the first
1080	* invalid character.
1081	* @param base Zero or number between 2 and 36 inclusive.
1082	* @return Result of conversion.
1083	*/
1084	long int strtol(const char nptr, char *endptr, int base)
1085	{
1086	char sgn = 0;
1087	unsigned long number = 0;
1088
1089	number = _strtoul(nptr, endptr, base, &sgn);
1090
1091	if (number > LONG_MAX) {
1092	if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
1093	/* FIXME: set 0 to errno */
1094	return number;
1095	}
1096	/* FIXME: set ERANGE to errno */
1097	return (sgn ? LONG_MIN : LONG_MAX);
1098	}
1099
1100	return (sgn ? -number : number);
1101	}
1102
1103	/** Duplicate string.
1104	*
1105	* Allocate a new string and copy characters from the source
1106	* string into it. The duplicate string is allocated via sleeping
1107	* malloc(), thus this function can sleep in no memory conditions.
1108	*
1109	* The allocation cannot fail and the return value is always
1110	* a valid pointer. The duplicate string is always a well-formed
1111	* null-terminated UTF-8 string, but it can differ from the source
1112	* string on the byte level.
1113	*
1114	* @param src Source string.
1115	*
1116	* @return Duplicate string.
1117	*
1118	*/
1119	char str_dup(const char src)
1120	{
1121	size_t size = str_size(src) + 1;
1122	char dest = (char ) malloc(size);
1123	if (dest == NULL)
1124	return (char *) NULL;
1125
1126	str_cpy(dest, size, src);
1127	return dest;
1128	}
1129
1130	/** Duplicate string with size limit.
1131	*
1132	* Allocate a new string and copy up to @max_size bytes from the source
1133	* string into it. The duplicate string is allocated via sleeping
1134	* malloc(), thus this function can sleep in no memory conditions.
1135	* No more than @max_size + 1 bytes is allocated, but if the size
1136	* occupied by the source string is smaller than @max_size + 1,
1137	* less is allocated.
1138	*
1139	* The allocation cannot fail and the return value is always
1140	* a valid pointer. The duplicate string is always a well-formed
1141	* null-terminated UTF-8 string, but it can differ from the source
1142	* string on the byte level.
1143	*
1144	* @param src Source string.
1145	* @param n Maximum number of bytes to duplicate.
1146	*
1147	* @return Duplicate string.
1148	*
1149	*/
1150	char str_ndup(const char src, size_t n)
1151	{
1152	size_t size = str_size(src);
1153	if (size > n)
1154	size = n;
1155
1156	char dest = (char ) malloc(size + 1);
1157	if (dest == NULL)
1158	return (char *) NULL;
1159
1160	str_ncpy(dest, size + 1, src, size);
1161	return dest;
1162	}
1163
1164	/** Convert initial part of string to unsigned long according to given base.
1165	* The number may begin with an arbitrary number of whitespaces followed by
1166	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
1167	* inserted and the number will be taken as hexadecimal one. If the base is 0
1168	* and the number begin with a zero, number will be taken as octal one (as with
1169	* base 8). Otherwise the base 0 is taken as decimal.
1170	*
1171	* @param nptr Pointer to string.
1172	* @param endptr If not NULL, function stores here pointer to the first
1173	* invalid character
1174	* @param base Zero or number between 2 and 36 inclusive.
1175	* @return Result of conversion.
1176	*/
1177	unsigned long strtoul(const char nptr, char *endptr, int base)
1178	{
1179	char sgn = 0;
1180	unsigned long number = 0;
1181
1182	number = _strtoul(nptr, endptr, base, &sgn);
1183
1184	return (sgn ? -number : number);
1185	}
1186
1187	char strtok(char s, const char *delim)
1188	{
1189	static char *next;
1190
1191	return strtok_r(s, delim, &next);
1192	}
1193
1194	char strtok_r(char s, const char delim, char *next)
1195	{
1196	char start, end;
1197
1198	if (s == NULL)
1199	s = *next;
1200
1201	/* Skip over leading delimiters. */
1202	while (s && (str_chr(delim, s) != NULL)) ++s;
1203	start = s;
1204
1205	/* Skip over token characters. */
1206	while (s && (str_chr(delim, s) == NULL)) ++s;
1207	end = s;
1208	next = (s ? s + 1 : s);
1209
1210	if (start == end) {
1211	return NULL; /* No more tokens. */
1212	}
1213
1214	/* Overwrite delimiter with NULL terminator. */
1215	*end = '\0';
1216	return start;
1217	}
1218
1219	/** Convert string to uint64_t (internal variant).
1220	*
1221	* @param nptr Pointer to string.
1222	* @param endptr Pointer to the first invalid character is stored here.
1223	* @param base Zero or number between 2 and 36 inclusive.
1224	* @param neg Indication of unary minus is stored here.
1225	* @apram result Result of the conversion.
1226	*
1227	* @return EOK if conversion was successful.
1228	*
1229	*/
1230	static int str_uint(const char nptr, char *endptr, unsigned int base,
1231	bool neg, uint64_t result)
1232	{
1233	assert(endptr != NULL);
1234	assert(neg != NULL);
1235	assert(result != NULL);
1236
1237	*neg = false;
1238	const char *str = nptr;
1239
1240	/* Ignore leading whitespace */
1241	while (isspace(*str))
1242	str++;
1243
1244	if (*str == '-') {
1245	*neg = true;
1246	str++;
1247	} else if (*str == '+')
1248	str++;
1249
1250	if (base == 0) {
1251	/* Decode base if not specified */
1252	base = 10;
1253
1254	if (*str == '0') {
1255	base = 8;
1256	str++;
1257
1258	switch (*str) {
1259	case 'b':
1260	case 'B':
1261	base = 2;
1262	str++;
1263	break;
1264	case 'o':
1265	case 'O':
1266	base = 8;
1267	str++;
1268	break;
1269	case 'd':
1270	case 'D':
1271	case 't':
1272	case 'T':
1273	base = 10;
1274	str++;
1275	break;
1276	case 'x':
1277	case 'X':
1278	base = 16;
1279	str++;
1280	break;
1281	default:
1282	str--;
1283	}
1284	}
1285	} else {
1286	/* Check base range */
1287	if ((base < 2) \|\| (base > 36)) {
1288	endptr = (char ) str;
1289	return EINVAL;
1290	}
1291	}
1292
1293	*result = 0;
1294	const char *startstr = str;
1295
1296	while (*str != 0) {
1297	unsigned int digit;
1298
1299	if ((str >= 'a') && (str <= 'z'))
1300	digit = *str - 'a' + 10;
1301	else if ((str >= 'A') && (str <= 'Z'))
1302	digit = *str - 'A' + 10;
1303	else if ((str >= '0') && (str <= '9'))
1304	digit = *str - '0';
1305	else
1306	break;
1307
1308	if (digit >= base)
1309	break;
1310
1311	uint64_t prev = *result;
1312	result = (result) * base + digit;
1313
1314	if (*result < prev) {
1315	/* Overflow */
1316	endptr = (char ) str;
1317	return EOVERFLOW;
1318	}
1319
1320	str++;
1321	}
1322
1323	if (str == startstr) {
1324	/*
1325	* No digits were decoded => first invalid character is
1326	* the first character of the string.
1327	*/
1328	str = nptr;
1329	}
1330
1331	endptr = (char ) str;
1332
1333	if (str == nptr)
1334	return EINVAL;
1335
1336	return EOK;
1337	}
1338
1339	/** Convert string to uint64_t.
1340	*
1341	* @param nptr Pointer to string.
1342	* @param endptr If not NULL, pointer to the first invalid character
1343	* is stored here.
1344	* @param base Zero or number between 2 and 36 inclusive.
1345	* @param strict Do not allow any trailing characters.
1346	* @param result Result of the conversion.
1347	*
1348	* @return EOK if conversion was successful.
1349	*
1350	*/
1351	int str_uint64(const char nptr, char *endptr, unsigned int base,
1352	bool strict, uint64_t *result)
1353	{
1354	assert(result != NULL);
1355
1356	bool neg;
1357	char *lendptr;
1358	int ret = str_uint(nptr, &lendptr, base, &neg, result);
1359
1360	if (endptr != NULL)
1361	endptr = (char ) lendptr;
1362
1363	if (ret != EOK)
1364	return ret;
1365
1366	/* Do not allow negative values */
1367	if (neg)
1368	return EINVAL;
1369
1370	/* Check whether we are at the end of
1371	the string in strict mode */
1372	if ((strict) && (*lendptr != 0))
1373	return EINVAL;
1374
1375	return EOK;
1376	}
1377
1378	/** Convert string to size_t.
1379	*
1380	* @param nptr Pointer to string.
1381	* @param endptr If not NULL, pointer to the first invalid character
1382	* is stored here.
1383	* @param base Zero or number between 2 and 36 inclusive.
1384	* @param strict Do not allow any trailing characters.
1385	* @param result Result of the conversion.
1386	*
1387	* @return EOK if conversion was successful.
1388	*
1389	*/
1390	int str_size_t(const char nptr, char *endptr, unsigned int base,
1391	bool strict, size_t *result)
1392	{
1393	assert(result != NULL);
1394
1395	bool neg;
1396	char *lendptr;
1397	uint64_t res;
1398	int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1399
1400	if (endptr != NULL)
1401	endptr = (char ) lendptr;
1402
1403	if (ret != EOK)
1404	return ret;
1405
1406	/* Do not allow negative values */
1407	if (neg)
1408	return EINVAL;
1409
1410	/* Check whether we are at the end of
1411	the string in strict mode */
1412	if ((strict) && (*lendptr != 0))
1413	return EINVAL;
1414
1415	/* Check for overflow */
1416	size_t _res = (size_t) res;
1417	if (_res != res)
1418	return EOVERFLOW;
1419
1420	*result = _res;
1421
1422	return EOK;
1423	}
1424
1425	void order_suffix(const uint64_t val, uint64_t rv, char suffix)
1426	{
1427	if (val > UINT64_C(10000000000000000000)) {
1428	*rv = val / UINT64_C(1000000000000000000);
1429	*suffix = 'Z';
1430	} else if (val > UINT64_C(1000000000000000000)) {
1431	*rv = val / UINT64_C(1000000000000000);
1432	*suffix = 'E';
1433	} else if (val > UINT64_C(1000000000000000)) {
1434	*rv = val / UINT64_C(1000000000000);
1435	*suffix = 'T';
1436	} else if (val > UINT64_C(1000000000000)) {
1437	*rv = val / UINT64_C(1000000000);
1438	*suffix = 'G';
1439	} else if (val > UINT64_C(1000000000)) {
1440	*rv = val / UINT64_C(1000000);
1441	*suffix = 'M';
1442	} else if (val > UINT64_C(1000000)) {
1443	*rv = val / UINT64_C(1000);
1444	*suffix = 'k';
1445	} else {
1446	*rv = val;
1447	*suffix = ' ';
1448	}
1449	}
1450
1451	void bin_order_suffix(const uint64_t val, uint64_t rv, const char *suffix,
1452	bool fixed)
1453	{
1454	if (val > UINT64_C(1152921504606846976)) {
1455	*rv = val / UINT64_C(1125899906842624);
1456	*suffix = "EiB";
1457	} else if (val > UINT64_C(1125899906842624)) {
1458	*rv = val / UINT64_C(1099511627776);
1459	*suffix = "TiB";
1460	} else if (val > UINT64_C(1099511627776)) {
1461	*rv = val / UINT64_C(1073741824);
1462	*suffix = "GiB";
1463	} else if (val > UINT64_C(1073741824)) {
1464	*rv = val / UINT64_C(1048576);
1465	*suffix = "MiB";
1466	} else if (val > UINT64_C(1048576)) {
1467	*rv = val / UINT64_C(1024);
1468	*suffix = "KiB";
1469	} else {
1470	*rv = val;
1471	if (fixed)
1472	*suffix = "B ";
1473	else
1474	*suffix = "B";
1475	}
1476	}
1477
1478	/** @}
1479	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: