Context Navigation

source: mainline/uspace/lib/c/generic/str.c@ 58cbf8d5

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 58cbf8d5 was dcb74c0a, checked in by Jiri Svoboda <jiri@…>, 14 years ago
Add function to str.c to convert space-padded ASCII to standard string representation. Use for decoding SCSI strings.
Property mode set to `100644`
File size: 30.5 KB

Line
1	/*
2	* Copyright (c) 2005 Martin Decky
3	* Copyright (c) 2008 Jiri Svoboda
4	* All rights reserved.
5	*
6	* Redistribution and use in source and binary forms, with or without
7	* modification, are permitted provided that the following conditions
8	* are met:
9	*
10	* - Redistributions of source code must retain the above copyright
11	* notice, this list of conditions and the following disclaimer.
12	* - Redistributions in binary form must reproduce the above copyright
13	* notice, this list of conditions and the following disclaimer in the
14	* documentation and/or other materials provided with the distribution.
15	* - The name of the author may not be used to endorse or promote products
16	* derived from this software without specific prior written permission.
17	*
18	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28	*/
29
30	/** @addtogroup libc
31	* @{
32	*/
33	/** @file
34	*/
35
36	#include <str.h>
37	#include <stdlib.h>
38	#include <assert.h>
39	#include <stdint.h>
40	#include <ctype.h>
41	#include <malloc.h>
42	#include <errno.h>
43	#include <align.h>
44	#include <mem.h>
45	#include <str.h>
46
47	/** Byte mask consisting of lowest @n bits (out of 8) */
48	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
49
50	/** Byte mask consisting of lowest @n bits (out of 32) */
51	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
52
53	/** Byte mask consisting of highest @n bits (out of 8) */
54	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
55
56	/** Number of data bits in a UTF-8 continuation byte */
57	#define CONT_BITS 6
58
59	/** Decode a single character from a string.
60	*
61	* Decode a single character from a string of size @a size. Decoding starts
62	* at @a offset and this offset is moved to the beginning of the next
63	* character. In case of decoding error, offset generally advances at least
64	* by one. However, offset is never moved beyond size.
65	*
66	* @param str String (not necessarily NULL-terminated).
67	* @param offset Byte offset in string where to start decoding.
68	* @param size Size of the string (in bytes).
69	*
70	* @return Value of decoded character, U_SPECIAL on decoding error or
71	* NULL if attempt to decode beyond @a size.
72	*
73	*/
74	wchar_t str_decode(const char str, size_t offset, size_t size)
75	{
76	if (*offset + 1 > size)
77	return 0;
78
79	/* First byte read from string */
80	uint8_t b0 = (uint8_t) str[(*offset)++];
81
82	/* Determine code length */
83
84	unsigned int b0_bits; /* Data bits in first byte */
85	unsigned int cbytes; /* Number of continuation bytes */
86
87	if ((b0 & 0x80) == 0) {
88	/* 0xxxxxxx (Plain ASCII) */
89	b0_bits = 7;
90	cbytes = 0;
91	} else if ((b0 & 0xe0) == 0xc0) {
92	/* 110xxxxx 10xxxxxx */
93	b0_bits = 5;
94	cbytes = 1;
95	} else if ((b0 & 0xf0) == 0xe0) {
96	/* 1110xxxx 10xxxxxx 10xxxxxx */
97	b0_bits = 4;
98	cbytes = 2;
99	} else if ((b0 & 0xf8) == 0xf0) {
100	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
101	b0_bits = 3;
102	cbytes = 3;
103	} else {
104	/* 10xxxxxx -- unexpected continuation byte */
105	return U_SPECIAL;
106	}
107
108	if (*offset + cbytes > size)
109	return U_SPECIAL;
110
111	wchar_t ch = b0 & LO_MASK_8(b0_bits);
112
113	/* Decode continuation bytes */
114	while (cbytes > 0) {
115	uint8_t b = (uint8_t) str[(*offset)++];
116
117	/* Must be 10xxxxxx */
118	if ((b & 0xc0) != 0x80)
119	return U_SPECIAL;
120
121	/* Shift data bits to ch */
122	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
123	cbytes--;
124	}
125
126	return ch;
127	}
128
129	/** Encode a single character to string representation.
130	*
131	* Encode a single character to string representation (i.e. UTF-8) and store
132	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
133	* is moved to the position where the next character can be written to.
134	*
135	* @param ch Input character.
136	* @param str Output buffer.
137	* @param offset Byte offset where to start writing.
138	* @param size Size of the output buffer (in bytes).
139	*
140	* @return EOK if the character was encoded successfully, EOVERFLOW if there
141	* was not enough space in the output buffer or EINVAL if the character
142	* code was invalid.
143	*/
144	int chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
145	{
146	if (*offset >= size)
147	return EOVERFLOW;
148
149	if (!chr_check(ch))
150	return EINVAL;
151
152	/* Unsigned version of ch (bit operations should only be done
153	on unsigned types). */
154	uint32_t cc = (uint32_t) ch;
155
156	/* Determine how many continuation bytes are needed */
157
158	unsigned int b0_bits; /* Data bits in first byte */
159	unsigned int cbytes; /* Number of continuation bytes */
160
161	if ((cc & ~LO_MASK_32(7)) == 0) {
162	b0_bits = 7;
163	cbytes = 0;
164	} else if ((cc & ~LO_MASK_32(11)) == 0) {
165	b0_bits = 5;
166	cbytes = 1;
167	} else if ((cc & ~LO_MASK_32(16)) == 0) {
168	b0_bits = 4;
169	cbytes = 2;
170	} else if ((cc & ~LO_MASK_32(21)) == 0) {
171	b0_bits = 3;
172	cbytes = 3;
173	} else {
174	/* Codes longer than 21 bits are not supported */
175	return EINVAL;
176	}
177
178	/* Check for available space in buffer */
179	if (*offset + cbytes >= size)
180	return EOVERFLOW;
181
182	/* Encode continuation bytes */
183	unsigned int i;
184	for (i = cbytes; i > 0; i--) {
185	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
186	cc = cc >> CONT_BITS;
187	}
188
189	/* Encode first byte */
190	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
191
192	/* Advance offset */
193	*offset += cbytes + 1;
194
195	return EOK;
196	}
197
198	/** Get size of string.
199	*
200	* Get the number of bytes which are used by the string @a str (excluding the
201	* NULL-terminator).
202	*
203	* @param str String to consider.
204	*
205	* @return Number of bytes used by the string
206	*
207	*/
208	size_t str_size(const char *str)
209	{
210	size_t size = 0;
211
212	while (*str++ != 0)
213	size++;
214
215	return size;
216	}
217
218	/** Get size of wide string.
219	*
220	* Get the number of bytes which are used by the wide string @a str (excluding the
221	* NULL-terminator).
222	*
223	* @param str Wide string to consider.
224	*
225	* @return Number of bytes used by the wide string
226	*
227	*/
228	size_t wstr_size(const wchar_t *str)
229	{
230	return (wstr_length(str) * sizeof(wchar_t));
231	}
232
233	/** Get size of string with length limit.
234	*
235	* Get the number of bytes which are used by up to @a max_len first
236	* characters in the string @a str. If @a max_len is greater than
237	* the length of @a str, the entire string is measured (excluding the
238	* NULL-terminator).
239	*
240	* @param str String to consider.
241	* @param max_len Maximum number of characters to measure.
242	*
243	* @return Number of bytes used by the characters.
244	*
245	*/
246	size_t str_lsize(const char *str, size_t max_len)
247	{
248	size_t len = 0;
249	size_t offset = 0;
250
251	while (len < max_len) {
252	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
253	break;
254
255	len++;
256	}
257
258	return offset;
259	}
260
261	/** Get size of wide string with length limit.
262	*
263	* Get the number of bytes which are used by up to @a max_len first
264	* wide characters in the wide string @a str. If @a max_len is greater than
265	* the length of @a str, the entire wide string is measured (excluding the
266	* NULL-terminator).
267	*
268	* @param str Wide string to consider.
269	* @param max_len Maximum number of wide characters to measure.
270	*
271	* @return Number of bytes used by the wide characters.
272	*
273	*/
274	size_t wstr_lsize(const wchar_t *str, size_t max_len)
275	{
276	return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
277	}
278
279	/** Get number of characters in a string.
280	*
281	* @param str NULL-terminated string.
282	*
283	* @return Number of characters in string.
284	*
285	*/
286	size_t str_length(const char *str)
287	{
288	size_t len = 0;
289	size_t offset = 0;
290
291	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
292	len++;
293
294	return len;
295	}
296
297	/** Get number of characters in a wide string.
298	*
299	* @param str NULL-terminated wide string.
300	*
301	* @return Number of characters in @a str.
302	*
303	*/
304	size_t wstr_length(const wchar_t *wstr)
305	{
306	size_t len = 0;
307
308	while (*wstr++ != 0)
309	len++;
310
311	return len;
312	}
313
314	/** Get number of characters in a string with size limit.
315	*
316	* @param str NULL-terminated string.
317	* @param size Maximum number of bytes to consider.
318	*
319	* @return Number of characters in string.
320	*
321	*/
322	size_t str_nlength(const char *str, size_t size)
323	{
324	size_t len = 0;
325	size_t offset = 0;
326
327	while (str_decode(str, &offset, size) != 0)
328	len++;
329
330	return len;
331	}
332
333	/** Get number of characters in a string with size limit.
334	*
335	* @param str NULL-terminated string.
336	* @param size Maximum number of bytes to consider.
337	*
338	* @return Number of characters in string.
339	*
340	*/
341	size_t wstr_nlength(const wchar_t *str, size_t size)
342	{
343	size_t len = 0;
344	size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
345	size_t offset = 0;
346
347	while ((offset < limit) && (*str++ != 0)) {
348	len++;
349	offset += sizeof(wchar_t);
350	}
351
352	return len;
353	}
354
355	/** Check whether character is plain ASCII.
356	*
357	* @return True if character is plain ASCII.
358	*
359	*/
360	bool ascii_check(wchar_t ch)
361	{
362	if ((ch >= 0) && (ch <= 127))
363	return true;
364
365	return false;
366	}
367
368	/** Check whether character is valid
369	*
370	* @return True if character is a valid Unicode code point.
371	*
372	*/
373	bool chr_check(wchar_t ch)
374	{
375	if ((ch >= 0) && (ch <= 1114111))
376	return true;
377
378	return false;
379	}
380
381	/** Compare two NULL terminated strings.
382	*
383	* Do a char-by-char comparison of two NULL-terminated strings.
384	* The strings are considered equal iff they consist of the same
385	* characters on the minimum of their lengths.
386	*
387	* @param s1 First string to compare.
388	* @param s2 Second string to compare.
389	*
390	* @return 0 if the strings are equal, -1 if first is smaller,
391	* 1 if second smaller.
392	*
393	*/
394	int str_cmp(const char s1, const char s2)
395	{
396	wchar_t c1 = 0;
397	wchar_t c2 = 0;
398
399	size_t off1 = 0;
400	size_t off2 = 0;
401
402	while (true) {
403	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
404	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
405
406	if (c1 < c2)
407	return -1;
408
409	if (c1 > c2)
410	return 1;
411
412	if (c1 == 0 \|\| c2 == 0)
413	break;
414	}
415
416	return 0;
417	}
418
419	/** Compare two NULL terminated strings with length limit.
420	*
421	* Do a char-by-char comparison of two NULL-terminated strings.
422	* The strings are considered equal iff they consist of the same
423	* characters on the minimum of their lengths and the length limit.
424	*
425	* @param s1 First string to compare.
426	* @param s2 Second string to compare.
427	* @param max_len Maximum number of characters to consider.
428	*
429	* @return 0 if the strings are equal, -1 if first is smaller,
430	* 1 if second smaller.
431	*
432	*/
433	int str_lcmp(const char s1, const char s2, size_t max_len)
434	{
435	wchar_t c1 = 0;
436	wchar_t c2 = 0;
437
438	size_t off1 = 0;
439	size_t off2 = 0;
440
441	size_t len = 0;
442
443	while (true) {
444	if (len >= max_len)
445	break;
446
447	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
448	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
449
450	if (c1 < c2)
451	return -1;
452
453	if (c1 > c2)
454	return 1;
455
456	if (c1 == 0 \|\| c2 == 0)
457	break;
458
459	++len;
460	}
461
462	return 0;
463
464	}
465
466	/** Copy string.
467	*
468	* Copy source string @a src to destination buffer @a dest.
469	* No more than @a size bytes are written. If the size of the output buffer
470	* is at least one byte, the output string will always be well-formed, i.e.
471	* null-terminated and containing only complete characters.
472	*
473	* @param dest Destination buffer.
474	* @param count Size of the destination buffer (must be > 0).
475	* @param src Source string.
476	*/
477	void str_cpy(char dest, size_t size, const char src)
478	{
479	/* There must be space for a null terminator in the buffer. */
480	assert(size > 0);
481
482	size_t src_off = 0;
483	size_t dest_off = 0;
484
485	wchar_t ch;
486	while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
487	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
488	break;
489	}
490
491	dest[dest_off] = '\0';
492	}
493
494	/** Copy size-limited substring.
495	*
496	* Copy prefix of string @a src of max. size @a size to destination buffer
497	* @a dest. No more than @a size bytes are written. The output string will
498	* always be well-formed, i.e. null-terminated and containing only complete
499	* characters.
500	*
501	* No more than @a n bytes are read from the input string, so it does not
502	* have to be null-terminated.
503	*
504	* @param dest Destination buffer.
505	* @param count Size of the destination buffer (must be > 0).
506	* @param src Source string.
507	* @param n Maximum number of bytes to read from @a src.
508	*/
509	void str_ncpy(char dest, size_t size, const char src, size_t n)
510	{
511	/* There must be space for a null terminator in the buffer. */
512	assert(size > 0);
513
514	size_t src_off = 0;
515	size_t dest_off = 0;
516
517	wchar_t ch;
518	while ((ch = str_decode(src, &src_off, n)) != 0) {
519	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
520	break;
521	}
522
523	dest[dest_off] = '\0';
524	}
525
526	/** Append one string to another.
527	*
528	* Append source string @a src to string in destination buffer @a dest.
529	* Size of the destination buffer is @a dest. If the size of the output buffer
530	* is at least one byte, the output string will always be well-formed, i.e.
531	* null-terminated and containing only complete characters.
532	*
533	* @param dest Destination buffer.
534	* @param count Size of the destination buffer.
535	* @param src Source string.
536	*/
537	void str_append(char dest, size_t size, const char src)
538	{
539	size_t dstr_size;
540
541	dstr_size = str_size(dest);
542	if (dstr_size >= size)
543	return;
544
545	str_cpy(dest + dstr_size, size - dstr_size, src);
546	}
547
548	/** Convert space-padded ASCII to string.
549	*
550	* Common legacy text encoding in hardware is 7-bit ASCII fitted into
551	* a fixed-with byte buffer (bit 7 always zero), right-padded with spaces
552	* (ASCII 0x20). Convert space-padded ascii to string representation.
553	*
554	* If the text does not fit into the destination buffer, the function converts
555	* as many characters as possible and returns EOVERFLOW.
556	*
557	* If the text contains non-ASCII bytes (with bit 7 set), the whole string is
558	* converted anyway and invalid characters are replaced with question marks
559	* (U_SPECIAL) and the function returns EIO.
560	*
561	* Regardless of return value upon return @a dest will always be well-formed.
562	*
563	* @param dest Destination buffer
564	* @param size Size of destination buffer
565	* @param src Space-padded ASCII.
566	* @param n Size of the source buffer in bytes.
567	*
568	* @return EOK on success, EOVERFLOW if the text does not fit
569	* destination buffer, EIO if the text contains
570	* non-ASCII bytes.
571	*/
572	int spascii_to_str(char dest, size_t size, const uint8_t src, size_t n)
573	{
574	size_t sidx;
575	size_t didx;
576	size_t dlast;
577	uint8_t byte;
578	int rc;
579	int result;
580
581	/* There must be space for a null terminator in the buffer. */
582	assert(size > 0);
583	result = EOK;
584
585	didx = 0;
586	dlast = 0;
587	for (sidx = 0; sidx < n; ++sidx) {
588	byte = src[sidx];
589	if (!ascii_check(byte)) {
590	byte = U_SPECIAL;
591	result = EIO;
592	}
593
594	rc = chr_encode(byte, dest, &didx, size - 1);
595	if (rc != EOK) {
596	assert(rc == EOVERFLOW);
597	dest[didx] = '\0';
598	return rc;
599	}
600
601	/* Remember dest index after last non-empty character */
602	if (byte != 0x20)
603	dlast = didx;
604	}
605
606	/* Terminate string after last non-empty character */
607	dest[dlast] = '\0';
608	return result;
609	}
610
611	/** Convert wide string to string.
612	*
613	* Convert wide string @a src to string. The output is written to the buffer
614	* specified by @a dest and @a size. @a size must be non-zero and the string
615	* written will always be well-formed.
616	*
617	* @param dest Destination buffer.
618	* @param size Size of the destination buffer.
619	* @param src Source wide string.
620	*/
621	void wstr_to_str(char dest, size_t size, const wchar_t src)
622	{
623	wchar_t ch;
624	size_t src_idx;
625	size_t dest_off;
626
627	/* There must be space for a null terminator in the buffer. */
628	assert(size > 0);
629
630	src_idx = 0;
631	dest_off = 0;
632
633	while ((ch = src[src_idx++]) != 0) {
634	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
635	break;
636	}
637
638	dest[dest_off] = '\0';
639	}
640
641	/** Convert wide string to new string.
642	*
643	* Convert wide string @a src to string. Space for the new string is allocated
644	* on the heap.
645	*
646	* @param src Source wide string.
647	* @return New string.
648	*/
649	char wstr_to_astr(const wchar_t src)
650	{
651	char dbuf[STR_BOUNDS(1)];
652	char *str;
653	wchar_t ch;
654
655	size_t src_idx;
656	size_t dest_off;
657	size_t dest_size;
658
659	/* Compute size of encoded string. */
660
661	src_idx = 0;
662	dest_size = 0;
663
664	while ((ch = src[src_idx++]) != 0) {
665	dest_off = 0;
666	if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
667	break;
668	dest_size += dest_off;
669	}
670
671	str = malloc(dest_size + 1);
672	if (str == NULL)
673	return NULL;
674
675	/* Encode string. */
676
677	src_idx = 0;
678	dest_off = 0;
679
680	while ((ch = src[src_idx++]) != 0) {
681	if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
682	break;
683	}
684
685	str[dest_size] = '\0';
686	return str;
687	}
688
689
690	/** Convert string to wide string.
691	*
692	* Convert string @a src to wide string. The output is written to the
693	* buffer specified by @a dest and @a dlen. @a dlen must be non-zero
694	* and the wide string written will always be null-terminated.
695	*
696	* @param dest Destination buffer.
697	* @param dlen Length of destination buffer (number of wchars).
698	* @param src Source string.
699	*/
700	void str_to_wstr(wchar_t dest, size_t dlen, const char src)
701	{
702	size_t offset;
703	size_t di;
704	wchar_t c;
705
706	assert(dlen > 0);
707
708	offset = 0;
709	di = 0;
710
711	do {
712	if (di >= dlen - 1)
713	break;
714
715	c = str_decode(src, &offset, STR_NO_LIMIT);
716	dest[di++] = c;
717	} while (c != '\0');
718
719	dest[dlen - 1] = '\0';
720	}
721
722	/** Find first occurence of character in string.
723	*
724	* @param str String to search.
725	* @param ch Character to look for.
726	*
727	* @return Pointer to character in @a str or NULL if not found.
728	*/
729	char str_chr(const char str, wchar_t ch)
730	{
731	wchar_t acc;
732	size_t off = 0;
733	size_t last = 0;
734
735	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
736	if (acc == ch)
737	return (char *) (str + last);
738	last = off;
739	}
740
741	return NULL;
742	}
743
744	/** Find last occurence of character in string.
745	*
746	* @param str String to search.
747	* @param ch Character to look for.
748	*
749	* @return Pointer to character in @a str or NULL if not found.
750	*/
751	char str_rchr(const char str, wchar_t ch)
752	{
753	wchar_t acc;
754	size_t off = 0;
755	size_t last = 0;
756	const char *res = NULL;
757
758	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
759	if (acc == ch)
760	res = (str + last);
761	last = off;
762	}
763
764	return (char *) res;
765	}
766
767	/** Insert a wide character into a wide string.
768	*
769	* Insert a wide character into a wide string at position
770	* @a pos. The characters after the position are shifted.
771	*
772	* @param str String to insert to.
773	* @param ch Character to insert to.
774	* @param pos Character index where to insert.
775	@ @param max_pos Characters in the buffer.
776	*
777	* @return True if the insertion was sucessful, false if the position
778	* is out of bounds.
779	*
780	*/
781	bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
782	{
783	size_t len = wstr_length(str);
784
785	if ((pos > len) \|\| (pos + 1 > max_pos))
786	return false;
787
788	size_t i;
789	for (i = len; i + 1 > pos; i--)
790	str[i + 1] = str[i];
791
792	str[pos] = ch;
793
794	return true;
795	}
796
797	/** Remove a wide character from a wide string.
798	*
799	* Remove a wide character from a wide string at position
800	* @a pos. The characters after the position are shifted.
801	*
802	* @param str String to remove from.
803	* @param pos Character index to remove.
804	*
805	* @return True if the removal was sucessful, false if the position
806	* is out of bounds.
807	*
808	*/
809	bool wstr_remove(wchar_t *str, size_t pos)
810	{
811	size_t len = wstr_length(str);
812
813	if (pos >= len)
814	return false;
815
816	size_t i;
817	for (i = pos + 1; i <= len; i++)
818	str[i - 1] = str[i];
819
820	return true;
821	}
822
823	int stricmp(const char a, const char b)
824	{
825	int c = 0;
826
827	while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
828	c++;
829
830	return (tolower(a[c]) - tolower(b[c]));
831	}
832
833	/** Convert string to a number.
834	* Core of strtol and strtoul functions.
835	*
836	* @param nptr Pointer to string.
837	* @param endptr If not NULL, function stores here pointer to the first
838	* invalid character.
839	* @param base Zero or number between 2 and 36 inclusive.
840	* @param sgn It's set to 1 if minus found.
841	* @return Result of conversion.
842	*/
843	static unsigned long
844	_strtoul(const char nptr, char endptr, int base, char sgn)
845	{
846	unsigned char c;
847	unsigned long result = 0;
848	unsigned long a, b;
849	const char *str = nptr;
850	const char *tmpptr;
851
852	while (isspace(*str))
853	str++;
854
855	if (*str == '-') {
856	*sgn = 1;
857	++str;
858	} else if (*str == '+')
859	++str;
860
861	if (base) {
862	if ((base == 1) \|\| (base > 36)) {
863	/* FIXME: set errno to EINVAL */
864	return 0;
865	}
866	if ((base == 16) && (*str == '0') && ((str[1] == 'x') \|\|
867	(str[1] == 'X'))) {
868	str += 2;
869	}
870	} else {
871	base = 10;
872
873	if (*str == '0') {
874	base = 8;
875	if ((str[1] == 'X') \|\| (str[1] == 'x')) {
876	base = 16;
877	str += 2;
878	}
879	}
880	}
881
882	tmpptr = str;
883
884	while (*str) {
885	c = *str;
886	c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
887	(c <= '9' ? c - '0' : 0xff)));
888	if (c > base) {
889	break;
890	}
891
892	a = (result & 0xff) * base + c;
893	b = (result >> 8) * base + (a >> 8);
894
895	if (b > (ULONG_MAX >> 8)) {
896	/* overflow */
897	/* FIXME: errno = ERANGE*/
898	return ULONG_MAX;
899	}
900
901	result = (b << 8) + (a & 0xff);
902	++str;
903	}
904
905	if (str == tmpptr) {
906	/*
907	* No number was found => first invalid character is the first
908	* character of the string.
909	*/
910	/* FIXME: set errno to EINVAL */
911	str = nptr;
912	result = 0;
913	}
914
915	if (endptr)
916	endptr = (char ) str;
917
918	if (nptr == str) {
919	/FIXME: errno = EINVAL/
920	return 0;
921	}
922
923	return result;
924	}
925
926	/** Convert initial part of string to long int according to given base.
927	* The number may begin with an arbitrary number of whitespaces followed by
928	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
929	* inserted and the number will be taken as hexadecimal one. If the base is 0
930	* and the number begin with a zero, number will be taken as octal one (as with
931	* base 8). Otherwise the base 0 is taken as decimal.
932	*
933	* @param nptr Pointer to string.
934	* @param endptr If not NULL, function stores here pointer to the first
935	* invalid character.
936	* @param base Zero or number between 2 and 36 inclusive.
937	* @return Result of conversion.
938	*/
939	long int strtol(const char nptr, char *endptr, int base)
940	{
941	char sgn = 0;
942	unsigned long number = 0;
943
944	number = _strtoul(nptr, endptr, base, &sgn);
945
946	if (number > LONG_MAX) {
947	if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
948	/* FIXME: set 0 to errno */
949	return number;
950	}
951	/* FIXME: set ERANGE to errno */
952	return (sgn ? LONG_MIN : LONG_MAX);
953	}
954
955	return (sgn ? -number : number);
956	}
957
958	/** Duplicate string.
959	*
960	* Allocate a new string and copy characters from the source
961	* string into it. The duplicate string is allocated via sleeping
962	* malloc(), thus this function can sleep in no memory conditions.
963	*
964	* The allocation cannot fail and the return value is always
965	* a valid pointer. The duplicate string is always a well-formed
966	* null-terminated UTF-8 string, but it can differ from the source
967	* string on the byte level.
968	*
969	* @param src Source string.
970	*
971	* @return Duplicate string.
972	*
973	*/
974	char str_dup(const char src)
975	{
976	size_t size = str_size(src) + 1;
977	char dest = (char ) malloc(size);
978	if (dest == NULL)
979	return (char *) NULL;
980
981	str_cpy(dest, size, src);
982	return dest;
983	}
984
985	/** Duplicate string with size limit.
986	*
987	* Allocate a new string and copy up to @max_size bytes from the source
988	* string into it. The duplicate string is allocated via sleeping
989	* malloc(), thus this function can sleep in no memory conditions.
990	* No more than @max_size + 1 bytes is allocated, but if the size
991	* occupied by the source string is smaller than @max_size + 1,
992	* less is allocated.
993	*
994	* The allocation cannot fail and the return value is always
995	* a valid pointer. The duplicate string is always a well-formed
996	* null-terminated UTF-8 string, but it can differ from the source
997	* string on the byte level.
998	*
999	* @param src Source string.
1000	* @param n Maximum number of bytes to duplicate.
1001	*
1002	* @return Duplicate string.
1003	*
1004	*/
1005	char str_ndup(const char src, size_t n)
1006	{
1007	size_t size = str_size(src);
1008	if (size > n)
1009	size = n;
1010
1011	char dest = (char ) malloc(size + 1);
1012	if (dest == NULL)
1013	return (char *) NULL;
1014
1015	str_ncpy(dest, size + 1, src, size);
1016	return dest;
1017	}
1018
1019
1020	/** Convert initial part of string to unsigned long according to given base.
1021	* The number may begin with an arbitrary number of whitespaces followed by
1022	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
1023	* inserted and the number will be taken as hexadecimal one. If the base is 0
1024	* and the number begin with a zero, number will be taken as octal one (as with
1025	* base 8). Otherwise the base 0 is taken as decimal.
1026	*
1027	* @param nptr Pointer to string.
1028	* @param endptr If not NULL, function stores here pointer to the first
1029	* invalid character
1030	* @param base Zero or number between 2 and 36 inclusive.
1031	* @return Result of conversion.
1032	*/
1033	unsigned long strtoul(const char nptr, char *endptr, int base)
1034	{
1035	char sgn = 0;
1036	unsigned long number = 0;
1037
1038	number = _strtoul(nptr, endptr, base, &sgn);
1039
1040	return (sgn ? -number : number);
1041	}
1042
1043	char strtok(char s, const char *delim)
1044	{
1045	static char *next;
1046
1047	return strtok_r(s, delim, &next);
1048	}
1049
1050	char strtok_r(char s, const char delim, char *next)
1051	{
1052	char start, end;
1053
1054	if (s == NULL)
1055	s = *next;
1056
1057	/* Skip over leading delimiters. */
1058	while (s && (str_chr(delim, s) != NULL)) ++s;
1059	start = s;
1060
1061	/* Skip over token characters. */
1062	while (s && (str_chr(delim, s) == NULL)) ++s;
1063	end = s;
1064	next = (s ? s + 1 : s);
1065
1066	if (start == end) {
1067	return NULL; /* No more tokens. */
1068	}
1069
1070	/* Overwrite delimiter with NULL terminator. */
1071	*end = '\0';
1072	return start;
1073	}
1074
1075	/** Convert string to uint64_t (internal variant).
1076	*
1077	* @param nptr Pointer to string.
1078	* @param endptr Pointer to the first invalid character is stored here.
1079	* @param base Zero or number between 2 and 36 inclusive.
1080	* @param neg Indication of unary minus is stored here.
1081	* @apram result Result of the conversion.
1082	*
1083	* @return EOK if conversion was successful.
1084	*
1085	*/
1086	static int str_uint(const char nptr, char *endptr, unsigned int base,
1087	bool neg, uint64_t result)
1088	{
1089	assert(endptr != NULL);
1090	assert(neg != NULL);
1091	assert(result != NULL);
1092
1093	*neg = false;
1094	const char *str = nptr;
1095
1096	/* Ignore leading whitespace */
1097	while (isspace(*str))
1098	str++;
1099
1100	if (*str == '-') {
1101	*neg = true;
1102	str++;
1103	} else if (*str == '+')
1104	str++;
1105
1106	if (base == 0) {
1107	/* Decode base if not specified */
1108	base = 10;
1109
1110	if (*str == '0') {
1111	base = 8;
1112	str++;
1113
1114	switch (*str) {
1115	case 'b':
1116	case 'B':
1117	base = 2;
1118	str++;
1119	break;
1120	case 'o':
1121	case 'O':
1122	base = 8;
1123	str++;
1124	break;
1125	case 'd':
1126	case 'D':
1127	case 't':
1128	case 'T':
1129	base = 10;
1130	str++;
1131	break;
1132	case 'x':
1133	case 'X':
1134	base = 16;
1135	str++;
1136	break;
1137	default:
1138	str--;
1139	}
1140	}
1141	} else {
1142	/* Check base range */
1143	if ((base < 2) \|\| (base > 36)) {
1144	endptr = (char ) str;
1145	return EINVAL;
1146	}
1147	}
1148
1149	*result = 0;
1150	const char *startstr = str;
1151
1152	while (*str != 0) {
1153	unsigned int digit;
1154
1155	if ((str >= 'a') && (str <= 'z'))
1156	digit = *str - 'a' + 10;
1157	else if ((str >= 'A') && (str <= 'Z'))
1158	digit = *str - 'A' + 10;
1159	else if ((str >= '0') && (str <= '9'))
1160	digit = *str - '0';
1161	else
1162	break;
1163
1164	if (digit >= base)
1165	break;
1166
1167	uint64_t prev = *result;
1168	result = (result) * base + digit;
1169
1170	if (*result < prev) {
1171	/* Overflow */
1172	endptr = (char ) str;
1173	return EOVERFLOW;
1174	}
1175
1176	str++;
1177	}
1178
1179	if (str == startstr) {
1180	/*
1181	* No digits were decoded => first invalid character is
1182	* the first character of the string.
1183	*/
1184	str = nptr;
1185	}
1186
1187	endptr = (char ) str;
1188
1189	if (str == nptr)
1190	return EINVAL;
1191
1192	return EOK;
1193	}
1194
1195	/** Convert string to uint64_t.
1196	*
1197	* @param nptr Pointer to string.
1198	* @param endptr If not NULL, pointer to the first invalid character
1199	* is stored here.
1200	* @param base Zero or number between 2 and 36 inclusive.
1201	* @param strict Do not allow any trailing characters.
1202	* @param result Result of the conversion.
1203	*
1204	* @return EOK if conversion was successful.
1205	*
1206	*/
1207	int str_uint64(const char nptr, char *endptr, unsigned int base,
1208	bool strict, uint64_t *result)
1209	{
1210	assert(result != NULL);
1211
1212	bool neg;
1213	char *lendptr;
1214	int ret = str_uint(nptr, &lendptr, base, &neg, result);
1215
1216	if (endptr != NULL)
1217	endptr = (char ) lendptr;
1218
1219	if (ret != EOK)
1220	return ret;
1221
1222	/* Do not allow negative values */
1223	if (neg)
1224	return EINVAL;
1225
1226	/* Check whether we are at the end of
1227	the string in strict mode */
1228	if ((strict) && (*lendptr != 0))
1229	return EINVAL;
1230
1231	return EOK;
1232	}
1233
1234	/** Convert string to size_t.
1235	*
1236	* @param nptr Pointer to string.
1237	* @param endptr If not NULL, pointer to the first invalid character
1238	* is stored here.
1239	* @param base Zero or number between 2 and 36 inclusive.
1240	* @param strict Do not allow any trailing characters.
1241	* @param result Result of the conversion.
1242	*
1243	* @return EOK if conversion was successful.
1244	*
1245	*/
1246	int str_size_t(const char nptr, char *endptr, unsigned int base,
1247	bool strict, size_t *result)
1248	{
1249	assert(result != NULL);
1250
1251	bool neg;
1252	char *lendptr;
1253	uint64_t res;
1254	int ret = str_uint(nptr, &lendptr, base, &neg, &res);
1255
1256	if (endptr != NULL)
1257	endptr = (char ) lendptr;
1258
1259	if (ret != EOK)
1260	return ret;
1261
1262	/* Do not allow negative values */
1263	if (neg)
1264	return EINVAL;
1265
1266	/* Check whether we are at the end of
1267	the string in strict mode */
1268	if ((strict) && (*lendptr != 0))
1269	return EINVAL;
1270
1271	/* Check for overflow */
1272	size_t _res = (size_t) res;
1273	if (_res != res)
1274	return EOVERFLOW;
1275
1276	*result = _res;
1277
1278	return EOK;
1279	}
1280
1281	void order_suffix(const uint64_t val, uint64_t rv, char suffix)
1282	{
1283	if (val > UINT64_C(10000000000000000000)) {
1284	*rv = val / UINT64_C(1000000000000000000);
1285	*suffix = 'Z';
1286	} else if (val > UINT64_C(1000000000000000000)) {
1287	*rv = val / UINT64_C(1000000000000000);
1288	*suffix = 'E';
1289	} else if (val > UINT64_C(1000000000000000)) {
1290	*rv = val / UINT64_C(1000000000000);
1291	*suffix = 'T';
1292	} else if (val > UINT64_C(1000000000000)) {
1293	*rv = val / UINT64_C(1000000000);
1294	*suffix = 'G';
1295	} else if (val > UINT64_C(1000000000)) {
1296	*rv = val / UINT64_C(1000000);
1297	*suffix = 'M';
1298	} else if (val > UINT64_C(1000000)) {
1299	*rv = val / UINT64_C(1000);
1300	*suffix = 'k';
1301	} else {
1302	*rv = val;
1303	*suffix = ' ';
1304	}
1305	}
1306
1307	void bin_order_suffix(const uint64_t val, uint64_t rv, const char *suffix,
1308	bool fixed)
1309	{
1310	if (val > UINT64_C(1152921504606846976)) {
1311	*rv = val / UINT64_C(1125899906842624);
1312	*suffix = "EiB";
1313	} else if (val > UINT64_C(1125899906842624)) {
1314	*rv = val / UINT64_C(1099511627776);
1315	*suffix = "TiB";
1316	} else if (val > UINT64_C(1099511627776)) {
1317	*rv = val / UINT64_C(1073741824);
1318	*suffix = "GiB";
1319	} else if (val > UINT64_C(1073741824)) {
1320	*rv = val / UINT64_C(1048576);
1321	*suffix = "MiB";
1322	} else if (val > UINT64_C(1048576)) {
1323	*rv = val / UINT64_C(1024);
1324	*suffix = "KiB";
1325	} else {
1326	*rv = val;
1327	if (fixed)
1328	*suffix = "B ";
1329	else
1330	*suffix = "B";
1331	}
1332	}
1333
1334	/** @}
1335	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: