Context Navigation

source: mainline/uspace/lib/c/generic/str.c@ e406736

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since e406736 was 22cf42d9, checked in by Martin Sucha <sucha14@…>, 14 years ago
Add formatting library and display help message wrapped
Property mode set to `100644`
File size: 31.0 KB

Line
1	/*
2	* Copyright (c) 2005 Martin Decky
3	* Copyright (c) 2008 Jiri Svoboda
4	* Copyright (c) 2011 Martin Sucha
5	* All rights reserved.
6	*
7	* Redistribution and use in source and binary forms, with or without
8	* modification, are permitted provided that the following conditions
9	* are met:
10	*
11	* - Redistributions of source code must retain the above copyright
12	* notice, this list of conditions and the following disclaimer.
13	* - Redistributions in binary form must reproduce the above copyright
14	* notice, this list of conditions and the following disclaimer in the
15	* documentation and/or other materials provided with the distribution.
16	* - The name of the author may not be used to endorse or promote products
17	* derived from this software without specific prior written permission.
18	*
19	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29	*/
30
31	/** @addtogroup libc
32	* @{
33	*/
34	/** @file
35	*/
36
37	#include <str.h>
38	#include <stdlib.h>
39	#include <assert.h>
40	#include <stdint.h>
41	#include <ctype.h>
42	#include <malloc.h>
43	#include <errno.h>
44	#include <align.h>
45	#include <mem.h>
46	#include <str.h>
47
48	/** Byte mask consisting of lowest @n bits (out of 8) */
49	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
50
51	/** Byte mask consisting of lowest @n bits (out of 32) */
52	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
53
54	/** Byte mask consisting of highest @n bits (out of 8) */
55	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
56
57	/** Number of data bits in a UTF-8 continuation byte */
58	#define CONT_BITS 6
59
60	/** Decode a single character from a string.
61	*
62	* Decode a single character from a string of size @a size. Decoding starts
63	* at @a offset and this offset is moved to the beginning of the next
64	* character. In case of decoding error, offset generally advances at least
65	* by one. However, offset is never moved beyond size.
66	*
67	* @param str String (not necessarily NULL-terminated).
68	* @param offset Byte offset in string where to start decoding.
69	* @param size Size of the string (in bytes).
70	*
71	* @return Value of decoded character, U_SPECIAL on decoding error or
72	* NULL if attempt to decode beyond @a size.
73	*
74	*/
75	wchar_t str_decode(const char str, size_t offset, size_t size)
76	{
77	if (*offset + 1 > size)
78	return 0;
79
80	/* First byte read from string */
81	uint8_t b0 = (uint8_t) str[(*offset)++];
82
83	/* Determine code length */
84
85	unsigned int b0_bits; /* Data bits in first byte */
86	unsigned int cbytes; /* Number of continuation bytes */
87
88	if ((b0 & 0x80) == 0) {
89	/* 0xxxxxxx (Plain ASCII) */
90	b0_bits = 7;
91	cbytes = 0;
92	} else if ((b0 & 0xe0) == 0xc0) {
93	/* 110xxxxx 10xxxxxx */
94	b0_bits = 5;
95	cbytes = 1;
96	} else if ((b0 & 0xf0) == 0xe0) {
97	/* 1110xxxx 10xxxxxx 10xxxxxx */
98	b0_bits = 4;
99	cbytes = 2;
100	} else if ((b0 & 0xf8) == 0xf0) {
101	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
102	b0_bits = 3;
103	cbytes = 3;
104	} else {
105	/* 10xxxxxx -- unexpected continuation byte */
106	return U_SPECIAL;
107	}
108
109	if (*offset + cbytes > size)
110	return U_SPECIAL;
111
112	wchar_t ch = b0 & LO_MASK_8(b0_bits);
113
114	/* Decode continuation bytes */
115	while (cbytes > 0) {
116	uint8_t b = (uint8_t) str[(*offset)++];
117
118	/* Must be 10xxxxxx */
119	if ((b & 0xc0) != 0x80)
120	return U_SPECIAL;
121
122	/* Shift data bits to ch */
123	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
124	cbytes--;
125	}
126
127	return ch;
128	}
129
130	/** Encode a single character to string representation.
131	*
132	* Encode a single character to string representation (i.e. UTF-8) and store
133	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
134	* is moved to the position where the next character can be written to.
135	*
136	* @param ch Input character.
137	* @param str Output buffer.
138	* @param offset Byte offset where to start writing.
139	* @param size Size of the output buffer (in bytes).
140	*
141	* @return EOK if the character was encoded successfully, EOVERFLOW if there
142	* was not enough space in the output buffer or EINVAL if the character
143	* code was invalid.
144	*/
145	int chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
146	{
147	if (*offset >= size)
148	return EOVERFLOW;
149
150	if (!chr_check(ch))
151	return EINVAL;
152
153	/* Unsigned version of ch (bit operations should only be done
154	on unsigned types). */
155	uint32_t cc = (uint32_t) ch;
156
157	/* Determine how many continuation bytes are needed */
158
159	unsigned int b0_bits; /* Data bits in first byte */
160	unsigned int cbytes; /* Number of continuation bytes */
161
162	if ((cc & ~LO_MASK_32(7)) == 0) {
163	b0_bits = 7;
164	cbytes = 0;
165	} else if ((cc & ~LO_MASK_32(11)) == 0) {
166	b0_bits = 5;
167	cbytes = 1;
168	} else if ((cc & ~LO_MASK_32(16)) == 0) {
169	b0_bits = 4;
170	cbytes = 2;
171	} else if ((cc & ~LO_MASK_32(21)) == 0) {
172	b0_bits = 3;
173	cbytes = 3;
174	} else {
175	/* Codes longer than 21 bits are not supported */
176	return EINVAL;
177	}
178
179	/* Check for available space in buffer */
180	if (*offset + cbytes >= size)
181	return EOVERFLOW;
182
183	/* Encode continuation bytes */
184	unsigned int i;
185	for (i = cbytes; i > 0; i--) {
186	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
187	cc = cc >> CONT_BITS;
188	}
189
190	/* Encode first byte */
191	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
192
193	/* Advance offset */
194	*offset += cbytes + 1;
195
196	return EOK;
197	}
198
199	/** Get size of string.
200	*
201	* Get the number of bytes which are used by the string @a str (excluding the
202	* NULL-terminator).
203	*
204	* @param str String to consider.
205	*
206	* @return Number of bytes used by the string
207	*
208	*/
209	size_t str_size(const char *str)
210	{
211	size_t size = 0;
212
213	while (*str++ != 0)
214	size++;
215
216	return size;
217	}
218
219	/** Get size of wide string.
220	*
221	* Get the number of bytes which are used by the wide string @a str (excluding the
222	* NULL-terminator).
223	*
224	* @param str Wide string to consider.
225	*
226	* @return Number of bytes used by the wide string
227	*
228	*/
229	size_t wstr_size(const wchar_t *str)
230	{
231	return (wstr_length(str) * sizeof(wchar_t));
232	}
233
234	/** Get size of string with length limit.
235	*
236	* Get the number of bytes which are used by up to @a max_len first
237	* characters in the string @a str. If @a max_len is greater than
238	* the length of @a str, the entire string is measured (excluding the
239	* NULL-terminator).
240	*
241	* @param str String to consider.
242	* @param max_len Maximum number of characters to measure.
243	*
244	* @return Number of bytes used by the characters.
245	*
246	*/
247	size_t str_lsize(const char *str, size_t max_len)
248	{
249	size_t len = 0;
250	size_t offset = 0;
251
252	while (len < max_len) {
253	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
254	break;
255
256	len++;
257	}
258
259	return offset;
260	}
261
262	/** Get size of wide string with length limit.
263	*
264	* Get the number of bytes which are used by up to @a max_len first
265	* wide characters in the wide string @a str. If @a max_len is greater than
266	* the length of @a str, the entire wide string is measured (excluding the
267	* NULL-terminator).
268	*
269	* @param str Wide string to consider.
270	* @param max_len Maximum number of wide characters to measure.
271	*
272	* @return Number of bytes used by the wide characters.
273	*
274	*/
275	size_t wstr_lsize(const wchar_t *str, size_t max_len)
276	{
277	return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
278	}
279
280	/** Get number of characters in a string.
281	*
282	* @param str NULL-terminated string.
283	*
284	* @return Number of characters in string.
285	*
286	*/
287	size_t str_length(const char *str)
288	{
289	size_t len = 0;
290	size_t offset = 0;
291
292	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
293	len++;
294
295	return len;
296	}
297
298	/** Get number of characters in a wide string.
299	*
300	* @param str NULL-terminated wide string.
301	*
302	* @return Number of characters in @a str.
303	*
304	*/
305	size_t wstr_length(const wchar_t *wstr)
306	{
307	size_t len = 0;
308
309	while (*wstr++ != 0)
310	len++;
311
312	return len;
313	}
314
315	/** Get number of characters in a string with size limit.
316	*
317	* @param str NULL-terminated string.
318	* @param size Maximum number of bytes to consider.
319	*
320	* @return Number of characters in string.
321	*
322	*/
323	size_t str_nlength(const char *str, size_t size)
324	{
325	size_t len = 0;
326	size_t offset = 0;
327
328	while (str_decode(str, &offset, size) != 0)
329	len++;
330
331	return len;
332	}
333
334	/** Get number of characters in a string with size limit.
335	*
336	* @param str NULL-terminated string.
337	* @param size Maximum number of bytes to consider.
338	*
339	* @return Number of characters in string.
340	*
341	*/
342	size_t wstr_nlength(const wchar_t *str, size_t size)
343	{
344	size_t len = 0;
345	size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
346	size_t offset = 0;
347
348	while ((offset < limit) && (*str++ != 0)) {
349	len++;
350	offset += sizeof(wchar_t);
351	}
352
353	return len;
354	}
355
356	/** Check whether character is plain ASCII.
357	*
358	* @return True if character is plain ASCII.
359	*
360	*/
361	bool ascii_check(wchar_t ch)
362	{
363	if ((ch >= 0) && (ch <= 127))
364	return true;
365
366	return false;
367	}
368
369	/** Check whether character is valid
370	*
371	* @return True if character is a valid Unicode code point.
372	*
373	*/
374	bool chr_check(wchar_t ch)
375	{
376	if ((ch >= 0) && (ch <= 1114111))
377	return true;
378
379	return false;
380	}
381
382	/** Compare two NULL terminated strings.
383	*
384	* Do a char-by-char comparison of two NULL-terminated strings.
385	* The strings are considered equal iff they consist of the same
386	* characters on the minimum of their lengths.
387	*
388	* @param s1 First string to compare.
389	* @param s2 Second string to compare.
390	*
391	* @return 0 if the strings are equal, -1 if first is smaller,
392	* 1 if second smaller.
393	*
394	*/
395	int str_cmp(const char s1, const char s2)
396	{
397	wchar_t c1 = 0;
398	wchar_t c2 = 0;
399
400	size_t off1 = 0;
401	size_t off2 = 0;
402
403	while (true) {
404	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
405	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
406
407	if (c1 < c2)
408	return -1;
409
410	if (c1 > c2)
411	return 1;
412
413	if (c1 == 0 \|\| c2 == 0)
414	break;
415	}
416
417	return 0;
418	}
419
420	/** Compare two NULL terminated strings with length limit.
421	*
422	* Do a char-by-char comparison of two NULL-terminated strings.
423	* The strings are considered equal iff they consist of the same
424	* characters on the minimum of their lengths and the length limit.
425	*
426	* @param s1 First string to compare.
427	* @param s2 Second string to compare.
428	* @param max_len Maximum number of characters to consider.
429	*
430	* @return 0 if the strings are equal, -1 if first is smaller,
431	* 1 if second smaller.
432	*
433	*/
434	int str_lcmp(const char s1, const char s2, size_t max_len)
435	{
436	wchar_t c1 = 0;
437	wchar_t c2 = 0;
438
439	size_t off1 = 0;
440	size_t off2 = 0;
441
442	size_t len = 0;
443
444	while (true) {
445	if (len >= max_len)
446	break;
447
448	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
449	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
450
451	if (c1 < c2)
452	return -1;
453
454	if (c1 > c2)
455	return 1;
456
457	if (c1 == 0 \|\| c2 == 0)
458	break;
459
460	++len;
461	}
462
463	return 0;
464
465	}
466
467	/** Copy string.
468	*
469	* Copy source string @a src to destination buffer @a dest.
470	* No more than @a size bytes are written. If the size of the output buffer
471	* is at least one byte, the output string will always be well-formed, i.e.
472	* null-terminated and containing only complete characters.
473	*
474	* @param dest Destination buffer.
475	* @param count Size of the destination buffer (must be > 0).
476	* @param src Source string.
477	*/
478	void str_cpy(char dest, size_t size, const char src)
479	{
480	/* There must be space for a null terminator in the buffer. */
481	assert(size > 0);
482
483	size_t src_off = 0;
484	size_t dest_off = 0;
485
486	wchar_t ch;
487	while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
488	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
489	break;
490	}
491
492	dest[dest_off] = '\0';
493	}
494
495	/** Copy size-limited substring.
496	*
497	* Copy prefix of string @a src of max. size @a size to destination buffer
498	* @a dest. No more than @a size bytes are written. The output string will
499	* always be well-formed, i.e. null-terminated and containing only complete
500	* characters.
501	*
502	* No more than @a n bytes are read from the input string, so it does not
503	* have to be null-terminated.
504	*
505	* @param dest Destination buffer.
506	* @param count Size of the destination buffer (must be > 0).
507	* @param src Source string.
508	* @param n Maximum number of bytes to read from @a src.
509	*/
510	void str_ncpy(char dest, size_t size, const char src, size_t n)
511	{
512	/* There must be space for a null terminator in the buffer. */
513	assert(size > 0);
514
515	size_t src_off = 0;
516	size_t dest_off = 0;
517
518	wchar_t ch;
519	while ((ch = str_decode(src, &src_off, n)) != 0) {
520	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
521	break;
522	}
523
524	dest[dest_off] = '\0';
525	}
526
527	/** Append one string to another.
528	*
529	* Append source string @a src to string in destination buffer @a dest.
530	* Size of the destination buffer is @a dest. If the size of the output buffer
531	* is at least one byte, the output string will always be well-formed, i.e.
532	* null-terminated and containing only complete characters.
533	*
534	* @param dest Destination buffer.
535	* @param count Size of the destination buffer.
536	* @param src Source string.
537	*/
538	void str_append(char dest, size_t size, const char src)
539	{
540	size_t dstr_size;
541
542	dstr_size = str_size(dest);
543	if (dstr_size >= size)
544	return;
545
546	str_cpy(dest + dstr_size, size - dstr_size, src);
547	}
548
549	/** Convert space-padded ASCII to string.
550	*
551	* Common legacy text encoding in hardware is 7-bit ASCII fitted into
552	* a fixed-with byte buffer (bit 7 always zero), right-padded with spaces
553	* (ASCII 0x20). Convert space-padded ascii to string representation.
554	*
555	* If the text does not fit into the destination buffer, the function converts
556	* as many characters as possible and returns EOVERFLOW.
557	*
558	* If the text contains non-ASCII bytes (with bit 7 set), the whole string is
559	* converted anyway and invalid characters are replaced with question marks
560	* (U_SPECIAL) and the function returns EIO.
561	*
562	* Regardless of return value upon return @a dest will always be well-formed.
563	*
564	* @param dest Destination buffer
565	* @param size Size of destination buffer
566	* @param src Space-padded ASCII.
567	* @param n Size of the source buffer in bytes.
568	*
569	* @return EOK on success, EOVERFLOW if the text does not fit
570	* destination buffer, EIO if the text contains
571	* non-ASCII bytes.
572	*/
573	int spascii_to_str(char dest, size_t size, const uint8_t src, size_t n)
574	{
575	size_t sidx;
576	size_t didx;
577	size_t dlast;
578	uint8_t byte;
579	int rc;
580	int result;
581
582	/* There must be space for a null terminator in the buffer. */
583	assert(size > 0);
584	result = EOK;
585
586	didx = 0;
587	dlast = 0;
588	for (sidx = 0; sidx < n; ++sidx) {
589	byte = src[sidx];
590	if (!ascii_check(byte)) {
591	byte = U_SPECIAL;
592	result = EIO;
593	}
594
595	rc = chr_encode(byte, dest, &didx, size - 1);
596	if (rc != EOK) {
597	assert(rc == EOVERFLOW);
598	dest[didx] = '\0';
599	return rc;
600	}
601
602	/* Remember dest index after last non-empty character */
603	if (byte != 0x20)
604	dlast = didx;
605	}
606
607	/* Terminate string after last non-empty character */
608	dest[dlast] = '\0';
609	return result;
610	}
611
612	/** Convert wide string to string.
613	*
614	* Convert wide string @a src to string. The output is written to the buffer
615	* specified by @a dest and @a size. @a size must be non-zero and the string
616	* written will always be well-formed.
617	*
618	* @param dest Destination buffer.
619	* @param size Size of the destination buffer.
620	* @param src Source wide string.
621	*/
622	void wstr_to_str(char dest, size_t size, const wchar_t src)
623	{
624	wchar_t ch;
625	size_t src_idx;
626	size_t dest_off;
627
628	/* There must be space for a null terminator in the buffer. */
629	assert(size > 0);
630
631	src_idx = 0;
632	dest_off = 0;
633
634	while ((ch = src[src_idx++]) != 0) {
635	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
636	break;
637	}
638
639	dest[dest_off] = '\0';
640	}
641
642	/** Convert wide string to new string.
643	*
644	* Convert wide string @a src to string. Space for the new string is allocated
645	* on the heap.
646	*
647	* @param src Source wide string.
648	* @return New string.
649	*/
650	char wstr_to_astr(const wchar_t src)
651	{
652	char dbuf[STR_BOUNDS(1)];
653	char *str;
654	wchar_t ch;
655
656	size_t src_idx;
657	size_t dest_off;
658	size_t dest_size;
659
660	/* Compute size of encoded string. */
661
662	src_idx = 0;
663	dest_size = 0;
664
665	while ((ch = src[src_idx++]) != 0) {
666	dest_off = 0;
667	if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
668	break;
669	dest_size += dest_off;
670	}
671
672	str = malloc(dest_size + 1);
673	if (str == NULL)
674	return NULL;
675
676	/* Encode string. */
677
678	src_idx = 0;
679	dest_off = 0;
680
681	while ((ch = src[src_idx++]) != 0) {
682	if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
683	break;
684	}
685
686	str[dest_size] = '\0';
687	return str;
688	}
689
690
691	/** Convert string to wide string.
692	*
693	* Convert string @a src to wide string. The output is written to the
694	* buffer specified by @a dest and @a dlen. @a dlen must be non-zero
695	* and the wide string written will always be null-terminated.
696	*
697	* @param dest Destination buffer.
698	* @param dlen Length of destination buffer (number of wchars).
699	* @param src Source string.
700	*/
701	void str_to_wstr(wchar_t dest, size_t dlen, const char src)
702	{
703	size_t offset;
704	size_t di;
705	wchar_t c;
706
707	assert(dlen > 0);
708
709	offset = 0;
710	di = 0;
711
712	do {
713	if (di >= dlen - 1)
714	break;
715
716	c = str_decode(src, &offset, STR_NO_LIMIT);
717	dest[di++] = c;
718	} while (c != '\0');
719
720	dest[dlen - 1] = '\0';
721	}
722
723	/** Convert string to wide string.
724	*
725	* Convert string @a src to wide string. A new wide NULL-terminated
726	* string will be allocated on the heap.
727	*
728	* @param src Source string.
729	*/
730	wchar_t str_to_awstr(const char str)
731	{
732	size_t len = str_length(str);
733	wchar_t *wstr = calloc(len+1, sizeof(wchar_t));
734	if (wstr == NULL) {
735	return NULL;
736	}
737	str_to_wstr(wstr, len+1, str);
738	return wstr;
739	}
740
741	/** Find first occurence of character in string.
742	*
743	* @param str String to search.
744	* @param ch Character to look for.
745	*
746	* @return Pointer to character in @a str or NULL if not found.
747	*/
748	char str_chr(const char str, wchar_t ch)
749	{
750	wchar_t acc;
751	size_t off = 0;
752	size_t last = 0;
753
754	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
755	if (acc == ch)
756	return (char *) (str + last);
757	last = off;
758	}
759
760	return NULL;
761	}
762
763	/** Find last occurence of character in string.
764	*
765	* @param str String to search.
766	* @param ch Character to look for.
767	*
768	* @return Pointer to character in @a str or NULL if not found.
769	*/
770	char str_rchr(const char str, wchar_t ch)
771	{
772	wchar_t acc;
773	size_t off = 0;
774	size_t last = 0;
775	const char *res = NULL;
776
777	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
778	if (acc == ch)
779	res = (str + last);
780	last = off;
781	}
782
783	return (char *) res;
784	}
785
786	/** Insert a wide character into a wide string.
787	*
788	* Insert a wide character into a wide string at position
789	* @a pos. The characters after the position are shifted.
790	*
791	* @param str String to insert to.
792	* @param ch Character to insert to.
793	* @param pos Character index where to insert.
794	@ @param max_pos Characters in the buffer.
795	*
796	* @return True if the insertion was sucessful, false if the position
797	* is out of bounds.
798	*
799	*/
800	bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
801	{
802	size_t len = wstr_length(str);
803
804	if ((pos > len) \|\| (pos + 1 > max_pos))
805	return false;
806
807	size_t i;
808	for (i = len; i + 1 > pos; i--)
809	str[i + 1] = str[i];
810
811	str[pos] = ch;
812
813	return true;
814	}
815
816	/** Remove a wide character from a wide string.
817	*
818	* Remove a wide character from a wide string at position
819	* @a pos. The characters after the position are shifted.
820	*
821	* @param str String to remove from.
822	* @param pos Character index to remove.
823	*
824	* @return True if the removal was sucessful, false if the position
825	* is out of bounds.
826	*
827	*/
828	bool wstr_remove(wchar_t *str, size_t pos)
829	{
830	size_t len = wstr_length(str);
831
832	if (pos >= len)
833	return false;
834
835	size_t i;
836	for (i = pos + 1; i <= len; i++)
837	str[i - 1] = str[i];
838
839	return true;
840	}
841
842	int stricmp(const char a, const char b)
843	{
844	int c = 0;
845
846	while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
847	c++;
848
849	return (tolower(a[c]) - tolower(b[c]));
850	}
851
852	/** Convert string to a number.
853	* Core of strtol and strtoul functions.
854	*
855	* @param nptr Pointer to string.
856	* @param endptr If not NULL, function stores here pointer to the first
857	* invalid character.
858	* @param base Zero or number between 2 and 36 inclusive.
859	* @param sgn It's set to 1 if minus found.
860	* @return Result of conversion.
861	*/
862	static unsigned long
863	_strtoul(const char nptr, char endptr, int base, char sgn)
864	{
865	unsigned char c;
866	unsigned long result = 0;
867	unsigned long a, b;
868	const char *str = nptr;
869	const char *tmpptr;
870
871	while (isspace(*str))
872	str++;
873
874	if (*str == '-') {
875	*sgn = 1;
876	++str;
877	} else if (*str == '+')
878	++str;
879
880	if (base) {
881	if ((base == 1) \|\| (base > 36)) {
882	/* FIXME: set errno to EINVAL */
883	return 0;
884	}
885	if ((base == 16) && (*str == '0') && ((str[1] == 'x') \|\|
886	(str[1] == 'X'))) {
887	str += 2;
888	}
889	} else {
890	base = 10;
891
892	if (*str == '0') {
893	base = 8;
894	if ((str[1] == 'X') \|\| (str[1] == 'x')) {
895	base = 16;
896	str += 2;
897	}
898	}
899	}
900
901	tmpptr = str;
902
903	while (*str) {
904	c = *str;
905	c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
906	(c <= '9' ? c - '0' : 0xff)));
907	if (c > base) {
908	break;
909	}
910
911	a = (result & 0xff) * base + c;
912	b = (result >> 8) * base + (a >> 8);
913
914	if (b > (ULONG_MAX >> 8)) {
915	/* overflow */
916	/* FIXME: errno = ERANGE*/
917	return ULONG_MAX;
918	}
919
920	result = (b << 8) + (a & 0xff);
921	++str;
922	}
923
924	if (str == tmpptr) {
925	/*
926	* No number was found => first invalid character is the first
927	* character of the string.
928	*/
929	/* FIXME: set errno to EINVAL */
930	str = nptr;
931	result = 0;
932	}
933
934	if (endptr)
935	endptr = (char ) str;
936
937	if (nptr == str) {
938	/FIXME: errno = EINVAL/
939	return 0;
940	}
941
942	return result;
943	}
944
945	/** Convert initial part of string to long int according to given base.
946	* The number may begin with an arbitrary number of whitespaces followed by
947	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
948	* inserted and the number will be taken as hexadecimal one. If the base is 0
949	* and the number begin with a zero, number will be taken as octal one (as with
950	* base 8). Otherwise the base 0 is taken as decimal.
951	*
952	* @param nptr Pointer to string.
953	* @param endptr If not NULL, function stores here pointer to the first
954	* invalid character.
955	* @param base Zero or number between 2 and 36 inclusive.
956	* @return Result of conversion.
957	*/
958	long int strtol(const char nptr, char *endptr, int base)
959	{
960	char sgn = 0;
961	unsigned long number = 0;
962
963	number = _strtoul(nptr, endptr, base, &sgn);
964
965	if (number > LONG_MAX) {
966	if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
967	/* FIXME: set 0 to errno */
968	return number;
969	}
970	/* FIXME: set ERANGE to errno */
971	return (sgn ? LONG_MIN : LONG_MAX);
972	}
973
974	return (sgn ? -number : number);
975	}
976
977	/** Duplicate string.
978	*
979	* Allocate a new string and copy characters from the source
980	* string into it. The duplicate string is allocated via sleeping
981	* malloc(), thus this function can sleep in no memory conditions.
982	*
983	* The allocation cannot fail and the return value is always
984	* a valid pointer. The duplicate string is always a well-formed
985	* null-terminated UTF-8 string, but it can differ from the source
986	* string on the byte level.
987	*
988	* @param src Source string.
989	*
990	* @return Duplicate string.
991	*
992	*/
993	char str_dup(const char src)
994	{
995	size_t size = str_size(src) + 1;
996	char dest = (char ) malloc(size);
997	if (dest == NULL)
998	return (char *) NULL;
999
1000	str_cpy(dest, size, src);
1001	return dest;
1002	}
1003
1004	/** Duplicate string with size limit.
1005	*
1006	* Allocate a new string and copy up to @max_size bytes from the source
1007	* string into it. The duplicate string is allocated via sleeping
1008	* malloc(), thus this function can sleep in no memory conditions.
1009	* No more than @max_size + 1 bytes is allocated, but if the size
1010	* occupied by the source string is smaller than @max_size + 1,
1011	* less is allocated.
1012	*
1013	* The allocation cannot fail and the return value is always
1014	* a valid pointer. The duplicate string is always a well-formed
1015	* null-terminated UTF-8 string, but it can differ from the source
1016	* string on the byte level.
1017	*
1018	* @param src Source string.
1019	* @param n Maximum number of bytes to duplicate.
1020	*
1021	* @return Duplicate string.
1022	*
1023	*/
1024	char str_ndup(const char src, size_t n)
1025	{
1026	size_t size = str_size(src);
1027	if (size > n)
1028	size = n;
1029
1030	char dest = (char ) malloc(size + 1);
1031	if (dest == NULL)
1032	return (char *) NULL;
1033
1034	str_ncpy(dest, size + 1, src, size);
1035	return dest;
1036	}
1037
1038
1039	/** Convert initial part of string to unsigned long according to given base.
1040	* The number may begin with an arbitrary number of whitespaces followed by
1041	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
1042	* inserted and the number will be taken as hexadecimal one. If the base is 0
1043	* and the number begin with a zero, number will be taken as octal one (as with
1044	* base 8). Otherwise the base 0 is taken as decimal.
1045	*
1046	* @param nptr Pointer to string.
1047	* @param endptr If not NULL, function stores here pointer to the first
1048	* invalid character
1049	* @param base Zero or number between 2 and 36 inclusive.
1050	* @return Result of conversion.
1051	*/
1052	unsigned long strtoul(const char nptr, char *endptr, int base)
1053	{
1054	char sgn = 0;
1055	unsigned long number = 0;
1056
1057	number = _strtoul(nptr, endptr, base, &sgn);
1058
1059	return (sgn ? -number : number);
1060	}
1061
1062	char strtok(char s, const char *delim)
1063	{
1064	static char *next;
1065
1066	return strtok_r(s, delim, &next);
1067	}
1068
1069	char strtok_r(char s, const char delim, char *next)
1070	{
1071	char start, end;
1072
1073	if (s == NULL)
1074	s = *next;
1075
1076	/* Skip over leading delimiters. */
1077	while (s && (str_chr(delim, s) != NULL)) ++s;
1078	start = s;
1079
1080	/* Skip over token characters. */
1081	while (s && (str_chr(delim, s) == NULL)) ++s;
1082	end = s;
1083	next = (s ? s + 1 : s);
1084
1085	if (start == end) {
1086	return NULL; /* No more tokens. */
1087	}
1088
1089	/* Overwrite delimiter with NULL terminator. */
1090	*end = '\0';
1091	return start;
1092	}
1093
1094	/** Convert string to uint64_t (internal variant).
1095	*
1096	* @param nptr Pointer to string.
1097	* @param endptr Pointer to the first invalid character is stored here.
1098	* @param base Zero or number between 2 and 36 inclusive.
1099	* @param neg Indication of unary minus is stored here.
1100	* @apram result Result of the conversion.
1101	*
1102	* @return EOK if conversion was successful.
1103	*
1104	*/
1105	static int str_uint(const char nptr, char *endptr, unsigned int base,
1106	bool neg, uint64_t result)
1107	{
1108	assert(endptr != NULL);
1109	assert(neg != NULL);
1110	assert(result != NULL);
1111
1112	*neg = false;
1113	const char *str = nptr;
1114
1115	/* Ignore leading whitespace */
1116	while (isspace(*str))
1117	str++;
1118
1119	if (*str == '-') {
1120	*neg = true;
1121	str++;
1122	} else if (*str == '+')
1123	str++;
1124
1125	if (base == 0) {
1126	/* Decode base if not specified */
1127	base = 10;
1128
1129	if (*str == '0') {
1130	base = 8;
1131	str++;
1132
1133	switch (*str) {
1134	case 'b':
1135	case 'B':
1136	base = 2;
1137	str++;
1138	break;
1139	case 'o':
1140	case 'O':
1141	base = 8;
1142	str++;
1143	break;
1144	case 'd':
1145	case 'D':
1146	case 't':
1147	case 'T':
1148	base = 10;
1149	str++;
1150	break;
1151	case 'x':
1152	case 'X':
1153	base = 16;
1154	str++;
1155	break;
1156	default:
1157	str--;
1158	}
1159	}
1160	} else {
1161	/* Check base range */
1162	if ((base < 2) \|\| (base > 36)) {
1163	endptr = (char ) str;
1164	return EINVAL;
1165	}
1166	}
1167
1168	*result = 0;
1169	const char *startstr = str;
1170
1171	while (*str != 0) {
1172	unsigned int digit;
1173
1174	if ((str >= 'a') && (str <= 'z'))
1175	digit = *str - 'a' + 10;
1176	else if ((str >= 'A') && (str <= 'Z'))
1177	digit = *str - 'A' + 10;
1178	else if ((str >= '0') && (str <= '9'))
1179	digit = *str - '0';
1180	else
1181	break;
1182
1183	if (digit >= base)
1184	break;
1185
1186	uint64_t prev = *result;
1187	result = (result) * base + digit;
1188
1189	if (*result < prev) {
1190	/* Overflow */
1191	endptr = (char ) str;
1192	return EOVERFLOW;
1193	}
1194
1195	str++;
1196	}
1197
1198	if (str == startstr) {
1199	/*
1200	* No digits were decoded => first invalid character is
1201	* the first character of the string.
1202	*/
1203	str = nptr;
1204	}
1205
1206	endptr = (char ) str;
1207
1208	if (str == nptr)
1209	return EINVAL;
1210
1211	return EOK;
1212	}
1213
1214	/** Convert string to uint64_t.
1215	*
1216	* @param nptr Pointer to string.
1217	* @param endptr If not NULL, pointer to the first invalid character
1218	* is stored here.
1219	* @param base Zero or number between 2 and 36 inclusive.
1220	* @param strict Do not allow any trailing characters.
1221	* @param result Result of the conversion.
1222	*
1223	* @return EOK if conversion was successful.
1224	*
1225	*/
1226	int str_uint64(const char nptr, char *endptr, unsigned int base,
1227	bool strict, uint64_t *result)
1228	{
1229	assert(result != NULL);
1230
1231	bool neg;
1232	char *lendptr;
1233	int ret = str_uint(nptr, &lendptr, base, &neg, result);
1234
1235	if (endptr != NULL)
1236	endptr = (char ) lendptr;
1237
1238	if (ret != EOK)
1239	return ret;
1240
1241	/* Do not allow negative values */
1242	if (neg)
1243	return EINVAL;
1244
1245	/* Check whether we are at the end of
1246	the string in strict mode */
1247	if ((strict) && (*lendptr != 0))
1248	return EINVAL;
1249
1250	return EOK;
1251	}
1252
1253	/** Convert string to size_t.
1254	*
1255	* @param nptr Pointer to string.
1256	* @param endptr If not NULL, pointer to the first invalid character
1257	* is stored here.
1258	* @param base Zero or number between 2 and 36 inclusive.
1259	* @param strict Do not allow any trailing characters.
1260	* @param result Result of the conversion.
1261	*
1262	* @return EOK if conversion was successful.
1263	*
1264	*/
1265	int str_size_t(const char nptr, char *endptr, unsigned int base,
1266	bool strict, size_t *result)
1267	{
1268	assert(result != NULL);
1269
1270	bool neg;
1271	char *lendptr;
1272	uint64_t res;
1273	int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1274
1275	if (endptr != NULL)
1276	endptr = (char ) lendptr;
1277
1278	if (ret != EOK)
1279	return ret;
1280
1281	/* Do not allow negative values */
1282	if (neg)
1283	return EINVAL;
1284
1285	/* Check whether we are at the end of
1286	the string in strict mode */
1287	if ((strict) && (*lendptr != 0))
1288	return EINVAL;
1289
1290	/* Check for overflow */
1291	size_t _res = (size_t) res;
1292	if (_res != res)
1293	return EOVERFLOW;
1294
1295	*result = _res;
1296
1297	return EOK;
1298	}
1299
1300	void order_suffix(const uint64_t val, uint64_t rv, char suffix)
1301	{
1302	if (val > UINT64_C(10000000000000000000)) {
1303	*rv = val / UINT64_C(1000000000000000000);
1304	*suffix = 'Z';
1305	} else if (val > UINT64_C(1000000000000000000)) {
1306	*rv = val / UINT64_C(1000000000000000);
1307	*suffix = 'E';
1308	} else if (val > UINT64_C(1000000000000000)) {
1309	*rv = val / UINT64_C(1000000000000);
1310	*suffix = 'T';
1311	} else if (val > UINT64_C(1000000000000)) {
1312	*rv = val / UINT64_C(1000000000);
1313	*suffix = 'G';
1314	} else if (val > UINT64_C(1000000000)) {
1315	*rv = val / UINT64_C(1000000);
1316	*suffix = 'M';
1317	} else if (val > UINT64_C(1000000)) {
1318	*rv = val / UINT64_C(1000);
1319	*suffix = 'k';
1320	} else {
1321	*rv = val;
1322	*suffix = ' ';
1323	}
1324	}
1325
1326	void bin_order_suffix(const uint64_t val, uint64_t rv, const char *suffix,
1327	bool fixed)
1328	{
1329	if (val > UINT64_C(1152921504606846976)) {
1330	*rv = val / UINT64_C(1125899906842624);
1331	*suffix = "EiB";
1332	} else if (val > UINT64_C(1125899906842624)) {
1333	*rv = val / UINT64_C(1099511627776);
1334	*suffix = "TiB";
1335	} else if (val > UINT64_C(1099511627776)) {
1336	*rv = val / UINT64_C(1073741824);
1337	*suffix = "GiB";
1338	} else if (val > UINT64_C(1073741824)) {
1339	*rv = val / UINT64_C(1048576);
1340	*suffix = "MiB";
1341	} else if (val > UINT64_C(1048576)) {
1342	*rv = val / UINT64_C(1024);
1343	*suffix = "KiB";
1344	} else {
1345	*rv = val;
1346	if (fixed)
1347	*suffix = "B ";
1348	else
1349	*suffix = "B";
1350	}
1351	}
1352
1353	/** @}
1354	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: