Context Navigation

str.c@ 08e103d4

Visit:

Last change on this file since 08e103d4 was 08e103d4, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 6 years ago

Use clearer naming for string length functions

This and the following commit change the names of functions, as well as
their documentation, to use unambiguous terms "bytes" and "code points"
instead of ambiguous terms "size", "length", and "characters".

Property mode set to 100644

File size: 45.0 KB

Line
1	/*
2	* Copyright (c) 2001-2004 Jakub Jermar
3	* Copyright (c) 2005 Martin Decky
4	* Copyright (c) 2008 Jiri Svoboda
5	* Copyright (c) 2011 Martin Sucha
6	* Copyright (c) 2011 Oleg Romanenko
7	* All rights reserved.
8	*
9	* Redistribution and use in source and binary forms, with or without
10	* modification, are permitted provided that the following conditions
11	* are met:
12	*
13	* - Redistributions of source code must retain the above copyright
14	* notice, this list of conditions and the following disclaimer.
15	* - Redistributions in binary form must reproduce the above copyright
16	* notice, this list of conditions and the following disclaimer in the
17	* documentation and/or other materials provided with the distribution.
18	* - The name of the author may not be used to endorse or promote products
19	* derived from this software without specific prior written permission.
20	*
21	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31	*/
32
33	/** @addtogroup libc
34	* @{
35	*/
36
37	/**
38	* @file
39	* @brief String functions.
40	*
41	* Strings and characters use the Universal Character Set (UCS). The standard
42	* strings, called just strings are encoded in UTF-8. Wide strings (encoded
43	* in UTF-32) are supported to a limited degree. A single character is
44	* represented as wchar_t.@n
45	*
46	* Overview of the terminology:@n
47	*
48	* Term Meaning
49	* -------------------- ----------------------------------------------------
50	* byte 8 bits stored in uint8_t (unsigned 8 bit integer)
51	*
52	* character UTF-32 encoded Unicode character, stored in wchar_t
53	* (signed 32 bit integer), code points 0 .. 1114111
54	* are valid
55	*
56	* ASCII character 7 bit encoded ASCII character, stored in char
57	* (usually signed 8 bit integer), code points 0 .. 127
58	* are valid
59	*
60	* string UTF-8 encoded NULL-terminated Unicode string, char *
61	*
62	* wide string UTF-32 encoded NULL-terminated Unicode string,
63	* wchar_t *
64	*
65	* [wide] string size number of BYTES in a [wide] string (excluding
66	* the NULL-terminator), size_t
67	*
68	* [wide] string length number of CHARACTERS in a [wide] string (excluding
69	* the NULL-terminator), size_t
70	*
71	* [wide] string width number of display cells on a monospace display taken
72	* by a [wide] string, size_t
73	*
74	*
75	* Overview of string metrics:@n
76	*
77	* Metric Abbrev. Type Meaning
78	* ------ ------ ------ -------------------------------------------------
79	* size n size_t number of BYTES in a string (excluding the
80	* NULL-terminator)
81	*
82	* length l size_t number of CHARACTERS in a string (excluding the
83	* null terminator)
84	*
85	* width w size_t number of display cells on a monospace display
86	* taken by a string
87	*
88	*
89	* Function naming prefixes:@n
90	*
91	* chr_ operate on characters
92	* ascii_ operate on ASCII characters
93	* str_ operate on strings
94	* wstr_ operate on wide strings
95	*
96	* [w]str_[n\|l\|w] operate on a prefix limited by size, length
97	* or width
98	*
99	*
100	* A specific character inside a [wide] string can be referred to by:@n
101	*
102	* pointer (char , wchar_t )
103	* byte offset (size_t)
104	* character index (size_t)
105	*
106	*/
107
108	#include <str.h>
109
110	#include <assert.h>
111	#include <ctype.h>
112	#include <errno.h>
113	#include <stdbool.h>
114	#include <stddef.h>
115	#include <stdint.h>
116	#include <stdlib.h>
117
118	#include <align.h>
119	#include <mem.h>
120
121	/** Check the condition if wchar_t is signed */
122	#ifdef __WCHAR_UNSIGNED__
123	#define WCHAR_SIGNED_CHECK(cond) (true)
124	#else
125	#define WCHAR_SIGNED_CHECK(cond) (cond)
126	#endif
127
128	/** Byte mask consisting of lowest @n bits (out of 8) */
129	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
130
131	/** Byte mask consisting of lowest @n bits (out of 32) */
132	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
133
134	/** Byte mask consisting of highest @n bits (out of 8) */
135	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
136
137	/** Number of data bits in a UTF-8 continuation byte */
138	#define CONT_BITS 6
139
140	/** Decode a single character from a string.
141	*
142	* Decode a single character from a string of size @a size. Decoding starts
143	* at @a offset and this offset is moved to the beginning of the next
144	* character. In case of decoding error, offset generally advances at least
145	* by one. However, offset is never moved beyond size.
146	*
147	* @param str String (not necessarily NULL-terminated).
148	* @param offset Byte offset in string where to start decoding.
149	* @param size Size of the string (in bytes).
150	*
151	* @return Value of decoded character, U_SPECIAL on decoding error or
152	* NULL if attempt to decode beyond @a size.
153	*
154	*/
155	wchar_t str_decode(const char str, size_t offset, size_t size)
156	{
157	if (*offset + 1 > size)
158	return 0;
159
160	/* First byte read from string */
161	uint8_t b0 = (uint8_t) str[(*offset)++];
162
163	/* Determine code length */
164
165	unsigned int b0_bits; /* Data bits in first byte */
166	unsigned int cbytes; /* Number of continuation bytes */
167
168	if ((b0 & 0x80) == 0) {
169	/* 0xxxxxxx (Plain ASCII) */
170	b0_bits = 7;
171	cbytes = 0;
172	} else if ((b0 & 0xe0) == 0xc0) {
173	/* 110xxxxx 10xxxxxx */
174	b0_bits = 5;
175	cbytes = 1;
176	} else if ((b0 & 0xf0) == 0xe0) {
177	/* 1110xxxx 10xxxxxx 10xxxxxx */
178	b0_bits = 4;
179	cbytes = 2;
180	} else if ((b0 & 0xf8) == 0xf0) {
181	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
182	b0_bits = 3;
183	cbytes = 3;
184	} else {
185	/* 10xxxxxx -- unexpected continuation byte */
186	return U_SPECIAL;
187	}
188
189	if (*offset + cbytes > size)
190	return U_SPECIAL;
191
192	wchar_t ch = b0 & LO_MASK_8(b0_bits);
193
194	/* Decode continuation bytes */
195	while (cbytes > 0) {
196	uint8_t b = (uint8_t) str[(*offset)++];
197
198	/* Must be 10xxxxxx */
199	if ((b & 0xc0) != 0x80)
200	return U_SPECIAL;
201
202	/* Shift data bits to ch */
203	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
204	cbytes--;
205	}
206
207	return ch;
208	}
209
210	/** Decode a single character from a string to the left.
211	*
212	* Decode a single character from a string of size @a size. Decoding starts
213	* at @a offset and this offset is moved to the beginning of the previous
214	* character. In case of decoding error, offset generally decreases at least
215	* by one. However, offset is never moved before 0.
216	*
217	* @param str String (not necessarily NULL-terminated).
218	* @param offset Byte offset in string where to start decoding.
219	* @param size Size of the string (in bytes).
220	*
221	* @return Value of decoded character, U_SPECIAL on decoding error or
222	* NULL if attempt to decode beyond @a start of str.
223	*
224	*/
225	wchar_t str_decode_reverse(const char str, size_t offset, size_t size)
226	{
227	if (*offset == 0)
228	return 0;
229
230	size_t processed = 0;
231	/* Continue while continuation bytes found */
232	while (*offset > 0 && processed < 4) {
233	uint8_t b = (uint8_t) str[--(*offset)];
234
235	if (processed == 0 && (b & 0x80) == 0) {
236	/* 0xxxxxxx (Plain ASCII) */
237	return b & 0x7f;
238	} else if ((b & 0xe0) == 0xc0 \|\| (b & 0xf0) == 0xe0 \|\|
239	(b & 0xf8) == 0xf0) {
240	/* Start byte */
241	size_t start_offset = *offset;
242	return str_decode(str, &start_offset, size);
243	} else if ((b & 0xc0) != 0x80) {
244	/* Not a continuation byte */
245	return U_SPECIAL;
246	}
247	processed++;
248	}
249	/* Too many continuation bytes */
250	return U_SPECIAL;
251	}
252
253	/** Encode a single character to string representation.
254	*
255	* Encode a single character to string representation (i.e. UTF-8) and store
256	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
257	* is moved to the position where the next character can be written to.
258	*
259	* @param ch Input character.
260	* @param str Output buffer.
261	* @param offset Byte offset where to start writing.
262	* @param size Size of the output buffer (in bytes).
263	*
264	* @return EOK if the character was encoded successfully, EOVERFLOW if there
265	* was not enough space in the output buffer or EINVAL if the character
266	* code was invalid.
267	*/
268	errno_t chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
269	{
270	if (*offset >= size)
271	return EOVERFLOW;
272
273	if (!chr_check(ch))
274	return EINVAL;
275
276	/*
277	* Unsigned version of ch (bit operations should only be done
278	* on unsigned types).
279	*/
280	uint32_t cc = (uint32_t) ch;
281
282	/* Determine how many continuation bytes are needed */
283
284	unsigned int b0_bits; /* Data bits in first byte */
285	unsigned int cbytes; /* Number of continuation bytes */
286
287	if ((cc & ~LO_MASK_32(7)) == 0) {
288	b0_bits = 7;
289	cbytes = 0;
290	} else if ((cc & ~LO_MASK_32(11)) == 0) {
291	b0_bits = 5;
292	cbytes = 1;
293	} else if ((cc & ~LO_MASK_32(16)) == 0) {
294	b0_bits = 4;
295	cbytes = 2;
296	} else if ((cc & ~LO_MASK_32(21)) == 0) {
297	b0_bits = 3;
298	cbytes = 3;
299	} else {
300	/* Codes longer than 21 bits are not supported */
301	return EINVAL;
302	}
303
304	/* Check for available space in buffer */
305	if (*offset + cbytes >= size)
306	return EOVERFLOW;
307
308	/* Encode continuation bytes */
309	unsigned int i;
310	for (i = cbytes; i > 0; i--) {
311	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
312	cc = cc >> CONT_BITS;
313	}
314
315	/* Encode first byte */
316	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
317
318	/* Advance offset */
319	*offset += cbytes + 1;
320
321	return EOK;
322	}
323
324	/** Get size of string.
325	*
326	* Get the number of bytes which are used by the string @a str (excluding the
327	* NULL-terminator).
328	*
329	* @param str String to consider.
330	*
331	* @return Number of bytes used by the string
332	*
333	*/
334	size_t str_bytes(const char *str)
335	{
336	size_t size = 0;
337
338	while (*str++ != 0)
339	size++;
340
341	return size;
342	}
343
344	/** Get size of wide string.
345	*
346	* Get the number of bytes which are used by the wide string @a str (excluding the
347	* NULL-terminator).
348	*
349	* @param str Wide string to consider.
350	*
351	* @return Number of bytes used by the wide string
352	*
353	*/
354	size_t wstr_bytes(const wchar_t *str)
355	{
356	return (wstr_code_points(str) * sizeof(wchar_t));
357	}
358
359	/** Get size of string with length limit.
360	*
361	* Get the number of bytes which are used by up to @a max_len first
362	* characters in the string @a str. If @a max_len is greater than
363	* the length of @a str, the entire string is measured (excluding the
364	* NULL-terminator).
365	*
366	* @param str String to consider.
367	* @param max_len Maximum number of characters to measure.
368	*
369	* @return Number of bytes used by the characters.
370	*
371	*/
372	size_t str_lbytes(const char *str, size_t max_len)
373	{
374	size_t len = 0;
375	size_t offset = 0;
376
377	while (len < max_len) {
378	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
379	break;
380
381	len++;
382	}
383
384	return offset;
385	}
386
387	/** Get size of string with size limit.
388	*
389	* Get the number of bytes which are used by the string @a str
390	* (excluding the NULL-terminator), but no more than @max_size bytes.
391	*
392	* @param str String to consider.
393	* @param max_size Maximum number of bytes to measure.
394	*
395	* @return Number of bytes used by the string
396	*
397	*/
398	size_t str_nbytes(const char *str, size_t max_size)
399	{
400	size_t size = 0;
401
402	while ((*str++ != 0) && (size < max_size))
403	size++;
404
405	return size;
406	}
407
408	/** Get size of wide string with size limit.
409	*
410	* Get the number of bytes which are used by the wide string @a str
411	* (excluding the NULL-terminator), but no more than @max_size bytes.
412	*
413	* @param str Wide string to consider.
414	* @param max_size Maximum number of bytes to measure.
415	*
416	* @return Number of bytes used by the wide string
417	*
418	*/
419	size_t wstr_nbytes(const wchar_t *str, size_t max_size)
420	{
421	return (wstr_ncode_points(str, max_size) * sizeof(wchar_t));
422	}
423
424	/** Get size of wide string with length limit.
425	*
426	* Get the number of bytes which are used by up to @a max_len first
427	* wide characters in the wide string @a str. If @a max_len is greater than
428	* the length of @a str, the entire wide string is measured (excluding the
429	* NULL-terminator).
430	*
431	* @param str Wide string to consider.
432	* @param max_len Maximum number of wide characters to measure.
433	*
434	* @return Number of bytes used by the wide characters.
435	*
436	*/
437	size_t wstr_lbytes(const wchar_t *str, size_t max_len)
438	{
439	return (wstr_ncode_points(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
440	}
441
442	/** Get number of characters in a string.
443	*
444	* @param str NULL-terminated string.
445	*
446	* @return Number of characters in string.
447	*
448	*/
449	size_t str_code_points(const char *str)
450	{
451	size_t len = 0;
452	size_t offset = 0;
453
454	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
455	len++;
456
457	return len;
458	}
459
460	/** Get number of characters in a wide string.
461	*
462	* @param str NULL-terminated wide string.
463	*
464	* @return Number of characters in @a str.
465	*
466	*/
467	size_t wstr_code_points(const wchar_t *wstr)
468	{
469	size_t len = 0;
470
471	while (*wstr++ != 0)
472	len++;
473
474	return len;
475	}
476
477	/** Get number of characters in a string with size limit.
478	*
479	* @param str NULL-terminated string.
480	* @param size Maximum number of bytes to consider.
481	*
482	* @return Number of characters in string.
483	*
484	*/
485	size_t str_ncode_points(const char *str, size_t size)
486	{
487	size_t len = 0;
488	size_t offset = 0;
489
490	while (str_decode(str, &offset, size) != 0)
491	len++;
492
493	return len;
494	}
495
496	/** Get number of characters in a string with size limit.
497	*
498	* @param str NULL-terminated string.
499	* @param size Maximum number of bytes to consider.
500	*
501	* @return Number of characters in string.
502	*
503	*/
504	size_t wstr_ncode_points(const wchar_t *str, size_t size)
505	{
506	size_t len = 0;
507	size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
508	size_t offset = 0;
509
510	while ((offset < limit) && (*str++ != 0)) {
511	len++;
512	offset += sizeof(wchar_t);
513	}
514
515	return len;
516	}
517
518	/** Get character display width on a character cell display.
519	*
520	* @param ch Character
521	* @return Width of character in cells.
522	*/
523	size_t chr_width(wchar_t ch)
524	{
525	return 1;
526	}
527
528	/** Get string display width on a character cell display.
529	*
530	* @param str String
531	* @return Width of string in cells.
532	*/
533	size_t str_width(const char *str)
534	{
535	size_t width = 0;
536	size_t offset = 0;
537	wchar_t ch;
538
539	while ((ch = str_decode(str, &offset, STR_NO_LIMIT)) != 0)
540	width += chr_width(ch);
541
542	return width;
543	}
544
545	/** Check whether character is plain ASCII.
546	*
547	* @return True if character is plain ASCII.
548	*
549	*/
550	bool ascii_check(wchar_t ch)
551	{
552	if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 127))
553	return true;
554
555	return false;
556	}
557
558	/** Check whether character is valid
559	*
560	* @return True if character is a valid Unicode code point.
561	*
562	*/
563	bool chr_check(wchar_t ch)
564	{
565	if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 1114111))
566	return true;
567
568	return false;
569	}
570
571	/** Compare two NULL terminated strings.
572	*
573	* Do a char-by-char comparison of two NULL-terminated strings.
574	* The strings are considered equal iff their length is equal
575	* and both strings consist of the same sequence of characters.
576	*
577	* A string S1 is less than another string S2 if it has a character with
578	* lower value at the first character position where the strings differ.
579	* If the strings differ in length, the shorter one is treated as if
580	* padded by characters with a value of zero.
581	*
582	* @param s1 First string to compare.
583	* @param s2 Second string to compare.
584	*
585	* @return 0 if the strings are equal, -1 if the first is less than the second,
586	* 1 if the second is less than the first.
587	*
588	*/
589	int str_cmp(const char s1, const char s2)
590	{
591	wchar_t c1 = 0;
592	wchar_t c2 = 0;
593
594	size_t off1 = 0;
595	size_t off2 = 0;
596
597	while (true) {
598	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
599	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
600
601	if (c1 < c2)
602	return -1;
603
604	if (c1 > c2)
605	return 1;
606
607	if (c1 == 0 \|\| c2 == 0)
608	break;
609	}
610
611	return 0;
612	}
613
614	/** Compare two NULL terminated strings with length limit.
615	*
616	* Do a char-by-char comparison of two NULL-terminated strings.
617	* The strings are considered equal iff
618	* min(str_code_points(s1), max_len) == min(str_code_points(s2), max_len)
619	* and both strings consist of the same sequence of characters,
620	* up to max_len characters.
621	*
622	* A string S1 is less than another string S2 if it has a character with
623	* lower value at the first character position where the strings differ.
624	* If the strings differ in length, the shorter one is treated as if
625	* padded by characters with a value of zero. Only the first max_len
626	* characters are considered.
627	*
628	* @param s1 First string to compare.
629	* @param s2 Second string to compare.
630	* @param max_len Maximum number of characters to consider.
631	*
632	* @return 0 if the strings are equal, -1 if the first is less than the second,
633	* 1 if the second is less than the first.
634	*
635	*/
636	int str_lcmp(const char s1, const char s2, size_t max_len)
637	{
638	wchar_t c1 = 0;
639	wchar_t c2 = 0;
640
641	size_t off1 = 0;
642	size_t off2 = 0;
643
644	size_t len = 0;
645
646	while (true) {
647	if (len >= max_len)
648	break;
649
650	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
651	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
652
653	if (c1 < c2)
654	return -1;
655
656	if (c1 > c2)
657	return 1;
658
659	if (c1 == 0 \|\| c2 == 0)
660	break;
661
662	++len;
663	}
664
665	return 0;
666
667	}
668
669	/** Compare two NULL terminated strings in case-insensitive manner.
670	*
671	* Do a char-by-char comparison of two NULL-terminated strings.
672	* The strings are considered equal iff their length is equal
673	* and both strings consist of the same sequence of characters
674	* when converted to lower case.
675	*
676	* A string S1 is less than another string S2 if it has a character with
677	* lower value at the first character position where the strings differ.
678	* If the strings differ in length, the shorter one is treated as if
679	* padded by characters with a value of zero.
680	*
681	* @param s1 First string to compare.
682	* @param s2 Second string to compare.
683	*
684	* @return 0 if the strings are equal, -1 if the first is less than the second,
685	* 1 if the second is less than the first.
686	*
687	*/
688	int str_casecmp(const char s1, const char s2)
689	{
690	wchar_t c1 = 0;
691	wchar_t c2 = 0;
692
693	size_t off1 = 0;
694	size_t off2 = 0;
695
696	while (true) {
697	c1 = tolower(str_decode(s1, &off1, STR_NO_LIMIT));
698	c2 = tolower(str_decode(s2, &off2, STR_NO_LIMIT));
699
700	if (c1 < c2)
701	return -1;
702
703	if (c1 > c2)
704	return 1;
705
706	if (c1 == 0 \|\| c2 == 0)
707	break;
708	}
709
710	return 0;
711	}
712
713	/** Compare two NULL terminated strings with length limit in case-insensitive
714	* manner.
715	*
716	* Do a char-by-char comparison of two NULL-terminated strings.
717	* The strings are considered equal iff
718	* min(str_code_points(s1), max_len) == min(str_code_points(s2), max_len)
719	* and both strings consist of the same sequence of characters,
720	* up to max_len characters.
721	*
722	* A string S1 is less than another string S2 if it has a character with
723	* lower value at the first character position where the strings differ.
724	* If the strings differ in length, the shorter one is treated as if
725	* padded by characters with a value of zero. Only the first max_len
726	* characters are considered.
727	*
728	* @param s1 First string to compare.
729	* @param s2 Second string to compare.
730	* @param max_len Maximum number of characters to consider.
731	*
732	* @return 0 if the strings are equal, -1 if the first is less than the second,
733	* 1 if the second is less than the first.
734	*
735	*/
736	int str_lcasecmp(const char s1, const char s2, size_t max_len)
737	{
738	wchar_t c1 = 0;
739	wchar_t c2 = 0;
740
741	size_t off1 = 0;
742	size_t off2 = 0;
743
744	size_t len = 0;
745
746	while (true) {
747	if (len >= max_len)
748	break;
749
750	c1 = tolower(str_decode(s1, &off1, STR_NO_LIMIT));
751	c2 = tolower(str_decode(s2, &off2, STR_NO_LIMIT));
752
753	if (c1 < c2)
754	return -1;
755
756	if (c1 > c2)
757	return 1;
758
759	if (c1 == 0 \|\| c2 == 0)
760	break;
761
762	++len;
763	}
764
765	return 0;
766
767	}
768
769	/** Test whether p is a prefix of s.
770	*
771	* Do a char-by-char comparison of two NULL-terminated strings
772	* and determine if p is a prefix of s.
773	*
774	* @param s The string in which to look
775	* @param p The string to check if it is a prefix of s
776	*
777	* @return true iff p is prefix of s else false
778	*
779	*/
780	bool str_test_prefix(const char s, const char p)
781	{
782	wchar_t c1 = 0;
783	wchar_t c2 = 0;
784
785	size_t off1 = 0;
786	size_t off2 = 0;
787
788	while (true) {
789	c1 = str_decode(s, &off1, STR_NO_LIMIT);
790	c2 = str_decode(p, &off2, STR_NO_LIMIT);
791
792	if (c2 == 0)
793	return true;
794
795	if (c1 != c2)
796	return false;
797
798	if (c1 == 0)
799	break;
800	}
801
802	return false;
803	}
804
805	/** Copy string.
806	*
807	* Copy source string @a src to destination buffer @a dest.
808	* No more than @a size bytes are written. If the size of the output buffer
809	* is at least one byte, the output string will always be well-formed, i.e.
810	* null-terminated and containing only complete characters.
811	*
812	* @param dest Destination buffer.
813	* @param count Size of the destination buffer (must be > 0).
814	* @param src Source string.
815	*
816	*/
817	void str_cpy(char dest, size_t size, const char src)
818	{
819	/* There must be space for a null terminator in the buffer. */
820	assert(size > 0);
821	assert(src != NULL);
822
823	size_t src_off = 0;
824	size_t dest_off = 0;
825
826	wchar_t ch;
827	while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
828	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
829	break;
830	}
831
832	dest[dest_off] = '\0';
833	}
834
835	/** Copy size-limited substring.
836	*
837	* Copy prefix of string @a src of max. size @a size to destination buffer
838	* @a dest. No more than @a size bytes are written. The output string will
839	* always be well-formed, i.e. null-terminated and containing only complete
840	* characters.
841	*
842	* No more than @a n bytes are read from the input string, so it does not
843	* have to be null-terminated.
844	*
845	* @param dest Destination buffer.
846	* @param count Size of the destination buffer (must be > 0).
847	* @param src Source string.
848	* @param n Maximum number of bytes to read from @a src.
849	*
850	*/
851	void str_ncpy(char dest, size_t size, const char src, size_t n)
852	{
853	/* There must be space for a null terminator in the buffer. */
854	assert(size > 0);
855
856	size_t src_off = 0;
857	size_t dest_off = 0;
858
859	wchar_t ch;
860	while ((ch = str_decode(src, &src_off, n)) != 0) {
861	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
862	break;
863	}
864
865	dest[dest_off] = '\0';
866	}
867
868	/** Append one string to another.
869	*
870	* Append source string @a src to string in destination buffer @a dest.
871	* Size of the destination buffer is @a dest. If the size of the output buffer
872	* is at least one byte, the output string will always be well-formed, i.e.
873	* null-terminated and containing only complete characters.
874	*
875	* @param dest Destination buffer.
876	* @param count Size of the destination buffer.
877	* @param src Source string.
878	*/
879	void str_append(char dest, size_t size, const char src)
880	{
881	size_t dstr_bytes;
882
883	dstr_bytes = str_bytes(dest);
884	if (dstr_bytes >= size)
885	return;
886
887	str_cpy(dest + dstr_bytes, size - dstr_bytes, src);
888	}
889
890	/** Convert space-padded ASCII to string.
891	*
892	* Common legacy text encoding in hardware is 7-bit ASCII fitted into
893	* a fixed-width byte buffer (bit 7 always zero), right-padded with spaces
894	* (ASCII 0x20). Convert space-padded ascii to string representation.
895	*
896	* If the text does not fit into the destination buffer, the function converts
897	* as many characters as possible and returns EOVERFLOW.
898	*
899	* If the text contains non-ASCII bytes (with bit 7 set), the whole string is
900	* converted anyway and invalid characters are replaced with question marks
901	* (U_SPECIAL) and the function returns EIO.
902	*
903	* Regardless of return value upon return @a dest will always be well-formed.
904	*
905	* @param dest Destination buffer
906	* @param size Size of destination buffer
907	* @param src Space-padded ASCII.
908	* @param n Size of the source buffer in bytes.
909	*
910	* @return EOK on success, EOVERFLOW if the text does not fit
911	* destination buffer, EIO if the text contains
912	* non-ASCII bytes.
913	*/
914	errno_t spascii_to_str(char dest, size_t size, const uint8_t src, size_t n)
915	{
916	size_t sidx;
917	size_t didx;
918	size_t dlast;
919	uint8_t byte;
920	errno_t rc;
921	errno_t result;
922
923	/* There must be space for a null terminator in the buffer. */
924	assert(size > 0);
925	result = EOK;
926
927	didx = 0;
928	dlast = 0;
929	for (sidx = 0; sidx < n; ++sidx) {
930	byte = src[sidx];
931	if (!ascii_check(byte)) {
932	byte = U_SPECIAL;
933	result = EIO;
934	}
935
936	rc = chr_encode(byte, dest, &didx, size - 1);
937	if (rc != EOK) {
938	assert(rc == EOVERFLOW);
939	dest[didx] = '\0';
940	return rc;
941	}
942
943	/* Remember dest index after last non-empty character */
944	if (byte != 0x20)
945	dlast = didx;
946	}
947
948	/* Terminate string after last non-empty character */
949	dest[dlast] = '\0';
950	return result;
951	}
952
953	/** Convert wide string to string.
954	*
955	* Convert wide string @a src to string. The output is written to the buffer
956	* specified by @a dest and @a size. @a size must be non-zero and the string
957	* written will always be well-formed.
958	*
959	* @param dest Destination buffer.
960	* @param size Size of the destination buffer.
961	* @param src Source wide string.
962	*/
963	void wstr_to_str(char dest, size_t size, const wchar_t src)
964	{
965	wchar_t ch;
966	size_t src_idx;
967	size_t dest_off;
968
969	/* There must be space for a null terminator in the buffer. */
970	assert(size > 0);
971
972	src_idx = 0;
973	dest_off = 0;
974
975	while ((ch = src[src_idx++]) != 0) {
976	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
977	break;
978	}
979
980	dest[dest_off] = '\0';
981	}
982
983	/** Convert UTF16 string to string.
984	*
985	* Convert utf16 string @a src to string. The output is written to the buffer
986	* specified by @a dest and @a size. @a size must be non-zero and the string
987	* written will always be well-formed. Surrogate pairs also supported.
988	*
989	* @param dest Destination buffer.
990	* @param size Size of the destination buffer.
991	* @param src Source utf16 string.
992	*
993	* @return EOK, if success, an error code otherwise.
994	*/
995	errno_t utf16_to_str(char dest, size_t size, const uint16_t src)
996	{
997	size_t idx = 0, dest_off = 0;
998	wchar_t ch;
999	errno_t rc = EOK;
1000
1001	/* There must be space for a null terminator in the buffer. */
1002	assert(size > 0);
1003
1004	while (src[idx]) {
1005	if ((src[idx] & 0xfc00) == 0xd800) {
1006	if (src[idx + 1] && (src[idx + 1] & 0xfc00) == 0xdc00) {
1007	ch = 0x10000;
1008	ch += (src[idx] & 0x03FF) << 10;
1009	ch += (src[idx + 1] & 0x03FF);
1010	idx += 2;
1011	} else
1012	break;
1013	} else {
1014	ch = src[idx];
1015	idx++;
1016	}
1017	rc = chr_encode(ch, dest, &dest_off, size - 1);
1018	if (rc != EOK)
1019	break;
1020	}
1021	dest[dest_off] = '\0';
1022	return rc;
1023	}
1024
1025	/** Convert string to UTF16 string.
1026	*
1027	* Convert string @a src to utf16 string. The output is written to the buffer
1028	* specified by @a dest and @a dlen. @a dlen must be non-zero and the string
1029	* written will always be well-formed. Surrogate pairs also supported.
1030	*
1031	* @param dest Destination buffer.
1032	* @param dlen Number of utf16 characters that fit in the destination buffer.
1033	* @param src Source string.
1034	*
1035	* @return EOK, if success, an error code otherwise.
1036	*/
1037	errno_t str_to_utf16(uint16_t dest, size_t dlen, const char src)
1038	{
1039	errno_t rc = EOK;
1040	size_t offset = 0;
1041	size_t idx = 0;
1042	wchar_t c;
1043
1044	assert(dlen > 0);
1045
1046	while ((c = str_decode(src, &offset, STR_NO_LIMIT)) != 0) {
1047	if (c > 0x10000) {
1048	if (idx + 2 >= dlen - 1) {
1049	rc = EOVERFLOW;
1050	break;
1051	}
1052	c = (c - 0x10000);
1053	dest[idx] = 0xD800 \| (c >> 10);
1054	dest[idx + 1] = 0xDC00 \| (c & 0x3FF);
1055	idx++;
1056	} else {
1057	dest[idx] = c;
1058	}
1059
1060	idx++;
1061	if (idx >= dlen - 1) {
1062	rc = EOVERFLOW;
1063	break;
1064	}
1065	}
1066
1067	dest[idx] = '\0';
1068	return rc;
1069	}
1070
1071	/** Get size of UTF-16 string.
1072	*
1073	* Get the number of words which are used by the UTF-16 string @a ustr
1074	* (excluding the NULL-terminator).
1075	*
1076	* @param ustr UTF-16 string to consider.
1077	*
1078	* @return Number of words used by the UTF-16 string
1079	*
1080	*/
1081	size_t utf16_wsize(const uint16_t *ustr)
1082	{
1083	size_t wsize = 0;
1084
1085	while (*ustr++ != 0)
1086	wsize++;
1087
1088	return wsize;
1089	}
1090
1091	/** Convert wide string to new string.
1092	*
1093	* Convert wide string @a src to string. Space for the new string is allocated
1094	* on the heap.
1095	*
1096	* @param src Source wide string.
1097	* @return New string.
1098	*/
1099	char wstr_to_astr(const wchar_t src)
1100	{
1101	char dbuf[STR_BOUNDS(1)];
1102	char *str;
1103	wchar_t ch;
1104
1105	size_t src_idx;
1106	size_t dest_off;
1107	size_t dest_size;
1108
1109	/* Compute size of encoded string. */
1110
1111	src_idx = 0;
1112	dest_size = 0;
1113
1114	while ((ch = src[src_idx++]) != 0) {
1115	dest_off = 0;
1116	if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
1117	break;
1118	dest_size += dest_off;
1119	}
1120
1121	str = malloc(dest_size + 1);
1122	if (str == NULL)
1123	return NULL;
1124
1125	/* Encode string. */
1126
1127	src_idx = 0;
1128	dest_off = 0;
1129
1130	while ((ch = src[src_idx++]) != 0) {
1131	if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
1132	break;
1133	}
1134
1135	str[dest_size] = '\0';
1136	return str;
1137	}
1138
1139	/** Convert string to wide string.
1140	*
1141	* Convert string @a src to wide string. The output is written to the
1142	* buffer specified by @a dest and @a dlen. @a dlen must be non-zero
1143	* and the wide string written will always be null-terminated.
1144	*
1145	* @param dest Destination buffer.
1146	* @param dlen Length of destination buffer (number of wchars).
1147	* @param src Source string.
1148	*/
1149	void str_to_wstr(wchar_t dest, size_t dlen, const char src)
1150	{
1151	size_t offset;
1152	size_t di;
1153	wchar_t c;
1154
1155	assert(dlen > 0);
1156
1157	offset = 0;
1158	di = 0;
1159
1160	do {
1161	if (di >= dlen - 1)
1162	break;
1163
1164	c = str_decode(src, &offset, STR_NO_LIMIT);
1165	dest[di++] = c;
1166	} while (c != '\0');
1167
1168	dest[dlen - 1] = '\0';
1169	}
1170
1171	/** Convert string to wide string.
1172	*
1173	* Convert string @a src to wide string. A new wide NULL-terminated
1174	* string will be allocated on the heap.
1175	*
1176	* @param src Source string.
1177	*/
1178	wchar_t str_to_awstr(const char str)
1179	{
1180	size_t len = str_code_points(str);
1181
1182	wchar_t *wstr = calloc(len + 1, sizeof(wchar_t));
1183	if (wstr == NULL)
1184	return NULL;
1185
1186	str_to_wstr(wstr, len + 1, str);
1187	return wstr;
1188	}
1189
1190	/** Find first occurence of character in string.
1191	*
1192	* @param str String to search.
1193	* @param ch Character to look for.
1194	*
1195	* @return Pointer to character in @a str or NULL if not found.
1196	*/
1197	char str_chr(const char str, wchar_t ch)
1198	{
1199	wchar_t acc;
1200	size_t off = 0;
1201	size_t last = 0;
1202
1203	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
1204	if (acc == ch)
1205	return (char *) (str + last);
1206	last = off;
1207	}
1208
1209	return NULL;
1210	}
1211
1212	/** Find first occurence of substring in string.
1213	*
1214	* @param hs Haystack (string)
1215	* @param n Needle (substring to look for)
1216	*
1217	* @return Pointer to character in @a hs or @c NULL if not found.
1218	*/
1219	char str_str(const char hs, const char *n)
1220	{
1221	size_t off = 0;
1222
1223	if (str_lcmp(hs, n, str_code_points(n)) == 0)
1224	return (char *)hs;
1225
1226	while (str_decode(hs, &off, STR_NO_LIMIT) != 0) {
1227	if (str_lcmp(hs + off, n, str_code_points(n)) == 0)
1228	return (char *)(hs + off);
1229	}
1230
1231	return NULL;
1232	}
1233
1234	/** Removes specified trailing characters from a string.
1235	*
1236	* @param str String to remove from.
1237	* @param ch Character to remove.
1238	*/
1239	void str_rtrim(char *str, wchar_t ch)
1240	{
1241	size_t off = 0;
1242	size_t pos = 0;
1243	wchar_t c;
1244	bool update_last_chunk = true;
1245	char *last_chunk = NULL;
1246
1247	while ((c = str_decode(str, &off, STR_NO_LIMIT))) {
1248	if (c != ch) {
1249	update_last_chunk = true;
1250	last_chunk = NULL;
1251	} else if (update_last_chunk) {
1252	update_last_chunk = false;
1253	last_chunk = (str + pos);
1254	}
1255	pos = off;
1256	}
1257
1258	if (last_chunk)
1259	*last_chunk = '\0';
1260	}
1261
1262	/** Removes specified leading characters from a string.
1263	*
1264	* @param str String to remove from.
1265	* @param ch Character to remove.
1266	*/
1267	void str_ltrim(char *str, wchar_t ch)
1268	{
1269	wchar_t acc;
1270	size_t off = 0;
1271	size_t pos = 0;
1272	size_t str_sz = str_bytes(str);
1273
1274	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
1275	if (acc != ch)
1276	break;
1277	else
1278	pos = off;
1279	}
1280
1281	if (pos > 0) {
1282	memmove(str, &str[pos], str_sz - pos);
1283	pos = str_sz - pos;
1284	str[pos] = '\0';
1285	}
1286	}
1287
1288	/** Find last occurence of character in string.
1289	*
1290	* @param str String to search.
1291	* @param ch Character to look for.
1292	*
1293	* @return Pointer to character in @a str or NULL if not found.
1294	*/
1295	char str_rchr(const char str, wchar_t ch)
1296	{
1297	wchar_t acc;
1298	size_t off = 0;
1299	size_t last = 0;
1300	const char *res = NULL;
1301
1302	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
1303	if (acc == ch)
1304	res = (str + last);
1305	last = off;
1306	}
1307
1308	return (char *) res;
1309	}
1310
1311	/** Insert a wide character into a wide string.
1312	*
1313	* Insert a wide character into a wide string at position
1314	* @a pos. The characters after the position are shifted.
1315	*
1316	* @param str String to insert to.
1317	* @param ch Character to insert to.
1318	* @param pos Character index where to insert.
1319	* @param max_pos Characters in the buffer.
1320	*
1321	* @return True if the insertion was sucessful, false if the position
1322	* is out of bounds.
1323	*
1324	*/
1325	bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
1326	{
1327	size_t len = wstr_code_points(str);
1328
1329	if ((pos > len) \|\| (pos + 1 > max_pos))
1330	return false;
1331
1332	size_t i;
1333	for (i = len; i + 1 > pos; i--)
1334	str[i + 1] = str[i];
1335
1336	str[pos] = ch;
1337
1338	return true;
1339	}
1340
1341	/** Remove a wide character from a wide string.
1342	*
1343	* Remove a wide character from a wide string at position
1344	* @a pos. The characters after the position are shifted.
1345	*
1346	* @param str String to remove from.
1347	* @param pos Character index to remove.
1348	*
1349	* @return True if the removal was sucessful, false if the position
1350	* is out of bounds.
1351	*
1352	*/
1353	bool wstr_remove(wchar_t *str, size_t pos)
1354	{
1355	size_t len = wstr_code_points(str);
1356
1357	if (pos >= len)
1358	return false;
1359
1360	size_t i;
1361	for (i = pos + 1; i <= len; i++)
1362	str[i - 1] = str[i];
1363
1364	return true;
1365	}
1366
1367	/** Duplicate string.
1368	*
1369	* Allocate a new string and copy characters from the source
1370	* string into it. The duplicate string is allocated via sleeping
1371	* malloc(), thus this function can sleep in no memory conditions.
1372	*
1373	* The allocation cannot fail and the return value is always
1374	* a valid pointer. The duplicate string is always a well-formed
1375	* null-terminated UTF-8 string, but it can differ from the source
1376	* string on the byte level.
1377	*
1378	* @param src Source string.
1379	*
1380	* @return Duplicate string.
1381	*
1382	*/
1383	char str_dup(const char src)
1384	{
1385	size_t size = str_bytes(src) + 1;
1386	char *dest = malloc(size);
1387	if (!dest)
1388	return NULL;
1389
1390	str_cpy(dest, size, src);
1391	return dest;
1392	}
1393
1394	/** Duplicate string with size limit.
1395	*
1396	* Allocate a new string and copy up to @max_size bytes from the source
1397	* string into it. The duplicate string is allocated via sleeping
1398	* malloc(), thus this function can sleep in no memory conditions.
1399	* No more than @max_size + 1 bytes is allocated, but if the size
1400	* occupied by the source string is smaller than @max_size + 1,
1401	* less is allocated.
1402	*
1403	* The allocation cannot fail and the return value is always
1404	* a valid pointer. The duplicate string is always a well-formed
1405	* null-terminated UTF-8 string, but it can differ from the source
1406	* string on the byte level.
1407	*
1408	* @param src Source string.
1409	* @param n Maximum number of bytes to duplicate.
1410	*
1411	* @return Duplicate string.
1412	*
1413	*/
1414	char str_ndup(const char src, size_t n)
1415	{
1416	size_t size = str_bytes(src);
1417	if (size > n)
1418	size = n;
1419
1420	char *dest = malloc(size + 1);
1421	if (!dest)
1422	return NULL;
1423
1424	str_ncpy(dest, size + 1, src, size);
1425	return dest;
1426	}
1427
1428	/** Split string by delimiters.
1429	*
1430	* @param s String to be tokenized. May not be NULL.
1431	* @param delim String with the delimiters.
1432	* @param next Variable which will receive the pointer to the
1433	* continuation of the string following the first
1434	* occurrence of any of the delimiter characters.
1435	* May be NULL.
1436	* @return Pointer to the prefix of @a s before the first
1437	* delimiter character. NULL if no such prefix
1438	* exists.
1439	*/
1440	char str_tok(char s, const char delim, char *next)
1441	{
1442	char start, end;
1443
1444	if (!s)
1445	return NULL;
1446
1447	size_t len = str_bytes(s);
1448	size_t cur;
1449	size_t tmp;
1450	wchar_t ch;
1451
1452	/* Skip over leading delimiters. */
1453	tmp = 0;
1454	cur = 0;
1455	while ((ch = str_decode(s, &tmp, len)) && str_chr(delim, ch))
1456	cur = tmp;
1457	start = &s[cur];
1458
1459	/* Skip over token characters. */
1460	tmp = cur;
1461	while ((ch = str_decode(s, &tmp, len)) && !str_chr(delim, ch))
1462	cur = tmp;
1463	end = &s[cur];
1464	if (next)
1465	*next = (ch ? &s[tmp] : &s[cur]);
1466
1467	if (start == end)
1468	return NULL; /* No more tokens. */
1469
1470	/* Overwrite delimiter with NULL terminator. */
1471	*end = '\0';
1472	return start;
1473	}
1474
1475	/** Convert string to uint64_t (internal variant).
1476	*
1477	* @param nptr Pointer to string.
1478	* @param endptr Pointer to the first invalid character is stored here.
1479	* @param base Zero or number between 2 and 36 inclusive.
1480	* @param neg Indication of unary minus is stored here.
1481	* @apram result Result of the conversion.
1482	*
1483	* @return EOK if conversion was successful.
1484	*
1485	*/
1486	static errno_t str_uint(const char nptr, char *endptr, unsigned int base,
1487	bool neg, uint64_t result)
1488	{
1489	assert(endptr != NULL);
1490	assert(neg != NULL);
1491	assert(result != NULL);
1492
1493	*neg = false;
1494	const char *str = nptr;
1495
1496	/* Ignore leading whitespace */
1497	while (isspace(*str))
1498	str++;
1499
1500	if (*str == '-') {
1501	*neg = true;
1502	str++;
1503	} else if (*str == '+')
1504	str++;
1505
1506	if (base == 0) {
1507	/* Decode base if not specified */
1508	base = 10;
1509
1510	if (*str == '0') {
1511	base = 8;
1512	str++;
1513
1514	switch (*str) {
1515	case 'b':
1516	case 'B':
1517	base = 2;
1518	str++;
1519	break;
1520	case 'o':
1521	case 'O':
1522	base = 8;
1523	str++;
1524	break;
1525	case 'd':
1526	case 'D':
1527	case 't':
1528	case 'T':
1529	base = 10;
1530	str++;
1531	break;
1532	case 'x':
1533	case 'X':
1534	base = 16;
1535	str++;
1536	break;
1537	default:
1538	str--;
1539	}
1540	}
1541	} else {
1542	/* Check base range */
1543	if ((base < 2) \|\| (base > 36)) {
1544	endptr = (char ) str;
1545	return EINVAL;
1546	}
1547	}
1548
1549	*result = 0;
1550	const char *startstr = str;
1551
1552	while (*str != 0) {
1553	unsigned int digit;
1554
1555	if ((str >= 'a') && (str <= 'z'))
1556	digit = *str - 'a' + 10;
1557	else if ((str >= 'A') && (str <= 'Z'))
1558	digit = *str - 'A' + 10;
1559	else if ((str >= '0') && (str <= '9'))
1560	digit = *str - '0';
1561	else
1562	break;
1563
1564	if (digit >= base)
1565	break;
1566
1567	uint64_t prev = *result;
1568	result = (result) * base + digit;
1569
1570	if (*result < prev) {
1571	/* Overflow */
1572	endptr = (char ) str;
1573	return EOVERFLOW;
1574	}
1575
1576	str++;
1577	}
1578
1579	if (str == startstr) {
1580	/*
1581	* No digits were decoded => first invalid character is
1582	* the first character of the string.
1583	*/
1584	str = nptr;
1585	}
1586
1587	endptr = (char ) str;
1588
1589	if (str == nptr)
1590	return EINVAL;
1591
1592	return EOK;
1593	}
1594
1595	/** Convert string to uint8_t.
1596	*
1597	* @param nptr Pointer to string.
1598	* @param endptr If not NULL, pointer to the first invalid character
1599	* is stored here.
1600	* @param base Zero or number between 2 and 36 inclusive.
1601	* @param strict Do not allow any trailing characters.
1602	* @param result Result of the conversion.
1603	*
1604	* @return EOK if conversion was successful.
1605	*
1606	*/
1607	errno_t str_uint8_t(const char nptr, const char *endptr, unsigned int base,
1608	bool strict, uint8_t *result)
1609	{
1610	assert(result != NULL);
1611
1612	bool neg;
1613	char *lendptr;
1614	uint64_t res;
1615	errno_t ret = str_uint(nptr, &lendptr, base, &neg, &res);
1616
1617	if (endptr != NULL)
1618	endptr = (char ) lendptr;
1619
1620	if (ret != EOK)
1621	return ret;
1622
1623	/* Do not allow negative values */
1624	if (neg)
1625	return EINVAL;
1626
1627	/*
1628	* Check whether we are at the end of
1629	* the string in strict mode
1630	*/
1631	if ((strict) && (*lendptr != 0))
1632	return EINVAL;
1633
1634	/* Check for overflow */
1635	uint8_t _res = (uint8_t) res;
1636	if (_res != res)
1637	return EOVERFLOW;
1638
1639	*result = _res;
1640
1641	return EOK;
1642	}
1643
1644	/** Convert string to uint16_t.
1645	*
1646	* @param nptr Pointer to string.
1647	* @param endptr If not NULL, pointer to the first invalid character
1648	* is stored here.
1649	* @param base Zero or number between 2 and 36 inclusive.
1650	* @param strict Do not allow any trailing characters.
1651	* @param result Result of the conversion.
1652	*
1653	* @return EOK if conversion was successful.
1654	*
1655	*/
1656	errno_t str_uint16_t(const char nptr, const char *endptr, unsigned int base,
1657	bool strict, uint16_t *result)
1658	{
1659	assert(result != NULL);
1660
1661	bool neg;
1662	char *lendptr;
1663	uint64_t res;
1664	errno_t ret = str_uint(nptr, &lendptr, base, &neg, &res);
1665
1666	if (endptr != NULL)
1667	endptr = (char ) lendptr;
1668
1669	if (ret != EOK)
1670	return ret;
1671
1672	/* Do not allow negative values */
1673	if (neg)
1674	return EINVAL;
1675
1676	/*
1677	* Check whether we are at the end of
1678	* the string in strict mode
1679	*/
1680	if ((strict) && (*lendptr != 0))
1681	return EINVAL;
1682
1683	/* Check for overflow */
1684	uint16_t _res = (uint16_t) res;
1685	if (_res != res)
1686	return EOVERFLOW;
1687
1688	*result = _res;
1689
1690	return EOK;
1691	}
1692
1693	/** Convert string to uint32_t.
1694	*
1695	* @param nptr Pointer to string.
1696	* @param endptr If not NULL, pointer to the first invalid character
1697	* is stored here.
1698	* @param base Zero or number between 2 and 36 inclusive.
1699	* @param strict Do not allow any trailing characters.
1700	* @param result Result of the conversion.
1701	*
1702	* @return EOK if conversion was successful.
1703	*
1704	*/
1705	errno_t str_uint32_t(const char nptr, const char *endptr, unsigned int base,
1706	bool strict, uint32_t *result)
1707	{
1708	assert(result != NULL);
1709
1710	bool neg;
1711	char *lendptr;
1712	uint64_t res;
1713	errno_t ret = str_uint(nptr, &lendptr, base, &neg, &res);
1714
1715	if (endptr != NULL)
1716	endptr = (char ) lendptr;
1717
1718	if (ret != EOK)
1719	return ret;
1720
1721	/* Do not allow negative values */
1722	if (neg)
1723	return EINVAL;
1724
1725	/*
1726	* Check whether we are at the end of
1727	* the string in strict mode
1728	*/
1729	if ((strict) && (*lendptr != 0))
1730	return EINVAL;
1731
1732	/* Check for overflow */
1733	uint32_t _res = (uint32_t) res;
1734	if (_res != res)
1735	return EOVERFLOW;
1736
1737	*result = _res;
1738
1739	return EOK;
1740	}
1741
1742	/** Convert string to uint64_t.
1743	*
1744	* @param nptr Pointer to string.
1745	* @param endptr If not NULL, pointer to the first invalid character
1746	* is stored here.
1747	* @param base Zero or number between 2 and 36 inclusive.
1748	* @param strict Do not allow any trailing characters.
1749	* @param result Result of the conversion.
1750	*
1751	* @return EOK if conversion was successful.
1752	*
1753	*/
1754	errno_t str_uint64_t(const char nptr, const char *endptr, unsigned int base,
1755	bool strict, uint64_t *result)
1756	{
1757	assert(result != NULL);
1758
1759	bool neg;
1760	char *lendptr;
1761	errno_t ret = str_uint(nptr, &lendptr, base, &neg, result);
1762
1763	if (endptr != NULL)
1764	endptr = (char ) lendptr;
1765
1766	if (ret != EOK)
1767	return ret;
1768
1769	/* Do not allow negative values */
1770	if (neg)
1771	return EINVAL;
1772
1773	/*
1774	* Check whether we are at the end of
1775	* the string in strict mode
1776	*/
1777	if ((strict) && (*lendptr != 0))
1778	return EINVAL;
1779
1780	return EOK;
1781	}
1782
1783	/** Convert string to int64_t.
1784	*
1785	* @param nptr Pointer to string.
1786	* @param endptr If not NULL, pointer to the first invalid character
1787	* is stored here.
1788	* @param base Zero or number between 2 and 36 inclusive.
1789	* @param strict Do not allow any trailing characters.
1790	* @param result Result of the conversion.
1791	*
1792	* @return EOK if conversion was successful.
1793	*
1794	*/
1795	int str_int64_t(const char nptr, const char *endptr, unsigned int base,
1796	bool strict, int64_t *result)
1797	{
1798	assert(result != NULL);
1799
1800	bool neg;
1801	char *lendptr;
1802	uint64_t unsigned_result;
1803	int ret = str_uint(nptr, &lendptr, base, &neg, &unsigned_result);
1804
1805	if (endptr != NULL)
1806	endptr = (char ) lendptr;
1807
1808	if (ret != EOK)
1809	return ret;
1810
1811	/* Do not allow negative values */
1812	if (neg) {
1813	if (unsigned_result == UINT64_MAX)
1814	return EINVAL;
1815
1816	*result = -(int64_t) unsigned_result;
1817	} else
1818	*result = unsigned_result;
1819
1820	/*
1821	* Check whether we are at the end of
1822	* the string in strict mode
1823	*/
1824	if ((strict) && (*lendptr != 0))
1825	return EINVAL;
1826
1827	return EOK;
1828	}
1829
1830	/** Convert string to size_t.
1831	*
1832	* @param nptr Pointer to string.
1833	* @param endptr If not NULL, pointer to the first invalid character
1834	* is stored here.
1835	* @param base Zero or number between 2 and 36 inclusive.
1836	* @param strict Do not allow any trailing characters.
1837	* @param result Result of the conversion.
1838	*
1839	* @return EOK if conversion was successful.
1840	*
1841	*/
1842	errno_t str_size_t(const char nptr, const char *endptr, unsigned int base,
1843	bool strict, size_t *result)
1844	{
1845	assert(result != NULL);
1846
1847	bool neg;
1848	char *lendptr;
1849	uint64_t res;
1850	errno_t ret = str_uint(nptr, &lendptr, base, &neg, &res);
1851
1852	if (endptr != NULL)
1853	endptr = (char ) lendptr;
1854
1855	if (ret != EOK)
1856	return ret;
1857
1858	/* Do not allow negative values */
1859	if (neg)
1860	return EINVAL;
1861
1862	/*
1863	* Check whether we are at the end of
1864	* the string in strict mode
1865	*/
1866	if ((strict) && (*lendptr != 0))
1867	return EINVAL;
1868
1869	/* Check for overflow */
1870	size_t _res = (size_t) res;
1871	if (_res != res)
1872	return EOVERFLOW;
1873
1874	*result = _res;
1875
1876	return EOK;
1877	}
1878
1879	void order_suffix(const uint64_t val, uint64_t rv, char suffix)
1880	{
1881	if (val > UINT64_C(10000000000000000000)) {
1882	*rv = val / UINT64_C(1000000000000000000);
1883	*suffix = 'Z';
1884	} else if (val > UINT64_C(1000000000000000000)) {
1885	*rv = val / UINT64_C(1000000000000000);
1886	*suffix = 'E';
1887	} else if (val > UINT64_C(1000000000000000)) {
1888	*rv = val / UINT64_C(1000000000000);
1889	*suffix = 'T';
1890	} else if (val > UINT64_C(1000000000000)) {
1891	*rv = val / UINT64_C(1000000000);
1892	*suffix = 'G';
1893	} else if (val > UINT64_C(1000000000)) {
1894	*rv = val / UINT64_C(1000000);
1895	*suffix = 'M';
1896	} else if (val > UINT64_C(1000000)) {
1897	*rv = val / UINT64_C(1000);
1898	*suffix = 'k';
1899	} else {
1900	*rv = val;
1901	*suffix = ' ';
1902	}
1903	}
1904
1905	void bin_order_suffix(const uint64_t val, uint64_t rv, const char *suffix,
1906	bool fixed)
1907	{
1908	if (val > UINT64_C(1152921504606846976)) {
1909	*rv = val / UINT64_C(1125899906842624);
1910	*suffix = "EiB";
1911	} else if (val > UINT64_C(1125899906842624)) {
1912	*rv = val / UINT64_C(1099511627776);
1913	*suffix = "TiB";
1914	} else if (val > UINT64_C(1099511627776)) {
1915	*rv = val / UINT64_C(1073741824);
1916	*suffix = "GiB";
1917	} else if (val > UINT64_C(1073741824)) {
1918	*rv = val / UINT64_C(1048576);
1919	*suffix = "MiB";
1920	} else if (val > UINT64_C(1048576)) {
1921	*rv = val / UINT64_C(1024);
1922	*suffix = "KiB";
1923	} else {
1924	*rv = val;
1925	if (fixed)
1926	*suffix = "B ";
1927	else
1928	*suffix = "B";
1929	}
1930	}
1931
1932	/** @}
1933	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/uspace/lib/c/generic/str.c@ 08e103d4

Download in other formats: