Context Navigation

source: mainline/common/str.c@ f94a11f

Visit:

Last change on this file since f94a11f was 0600976, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 3 months ago
Reject invalid non-shortest UTF-8 forms and fix some other issues in str
Property mode set to `100644`
File size: 40.9 KB

Line
1	/*
2	* Copyright (c) 2001-2004 Jakub Jermar
3	* Copyright (c) 2005 Martin Decky
4	* Copyright (c) 2008 Jiri Svoboda
5	* Copyright (c) 2011 Martin Sucha
6	* Copyright (c) 2011 Oleg Romanenko
7	* All rights reserved.
8	*
9	* Redistribution and use in source and binary forms, with or without
10	* modification, are permitted provided that the following conditions
11	* are met:
12	*
13	* - Redistributions of source code must retain the above copyright
14	* notice, this list of conditions and the following disclaimer.
15	* - Redistributions in binary form must reproduce the above copyright
16	* notice, this list of conditions and the following disclaimer in the
17	* documentation and/or other materials provided with the distribution.
18	* - The name of the author may not be used to endorse or promote products
19	* derived from this software without specific prior written permission.
20	*
21	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31	*/
32
33	/** @addtogroup libc
34	* @{
35	*/
36
37	/**
38	* @file
39	* @brief String functions.
40	*
41	* Strings and characters use the Universal Character Set (UCS). The standard
42	* strings, called just strings are encoded in UTF-8. Wide strings (encoded
43	* in UTF-32) are supported to a limited degree. A single character is
44	* represented as char32_t.@n
45	*
46	* Overview of the terminology:@n
47	*
48	* Term Meaning
49	* -------------------- ----------------------------------------------------
50	* byte 8 bits stored in uint8_t (unsigned 8 bit integer)
51	*
52	* character UTF-32 encoded Unicode character, stored in char32_t
53	* (unsigned 32 bit integer), code points 0 .. 1114111
54	* are valid
55	*
56	* Note that Unicode characters do not match
57	* one-to-one with displayed characters or glyphs on
58	* screen. For that level of precision, look up
59	* Grapheme Clusters.
60	*
61	* ASCII character 7 bit encoded ASCII character, stored in char
62	* (usually signed 8 bit integer), code points 0 .. 127
63	* are valid
64	*
65	* string UTF-8 encoded NULL-terminated Unicode string, char *
66	*
67	* wide string UTF-32 encoded NULL-terminated Unicode string,
68	* char32_t *
69	*
70	* [wide] string size number of BYTES in a [wide] string (excluding
71	* the NULL-terminator), size_t
72	*
73	* [wide] string length number of CHARACTERS in a [wide] string (excluding
74	* the NULL-terminator), size_t
75	*
76	* [wide] string width number of display cells on a monospace display taken
77	* by a [wide] string, size_t
78	*
79	* This is virtually impossible to determine exactly for
80	* all strings without knowing specifics of the display
81	* device, due to various factors affecting text output.
82	* If you have the option to query the terminal for
83	* position change caused by outputting the string,
84	* it is preferrable to determine width that way.
85	*
86	*
87	* Overview of string metrics:@n
88	*
89	* Metric Abbrev. Type Meaning
90	* ------ ------ ------ -------------------------------------------------
91	* size n size_t number of BYTES in a string (excluding the
92	* NULL-terminator)
93	*
94	* length l size_t number of CHARACTERS in a string (excluding the
95	* null terminator)
96	*
97	* width w size_t number of display cells on a monospace display
98	* taken by a string
99	*
100	*
101	* Function naming prefixes:@n
102	*
103	* chr_ operate on characters
104	* ascii_ operate on ASCII characters
105	* str_ operate on strings
106	* wstr_ operate on wide strings
107	*
108	* [w]str_[n\|l\|w] operate on a prefix limited by size, length
109	* or width
110	*
111	*
112	* A specific character inside a [wide] string can be referred to by:@n
113	*
114	* pointer (char , char32_t )
115	* byte offset (size_t)
116	* character index (size_t)
117	*
118	*/
119
120	#include <str.h>
121
122	#include <align.h>
123	#include <assert.h>
124	#include <ctype.h>
125	#include <errno.h>
126	#include <macros.h>
127	#include <mem.h>
128	#include <stdbool.h>
129	#include <stddef.h>
130	#include <stdint.h>
131	#include <stdlib.h>
132	#include <uchar.h>
133
134	/** Byte mask consisting of lowest @n bits (out of 8) */
135	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
136
137	/** Byte mask consisting of lowest @n bits (out of 32) */
138	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
139
140	/** Byte mask consisting of highest @n bits (out of 8) */
141	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
142
143	/** Number of data bits in a UTF-8 continuation byte */
144	#define CONT_BITS 6
145
146	static inline bool _is_ascii(uint8_t b)
147	{
148	return b < 0x80;
149	}
150
151	static inline bool _is_continuation_byte(uint8_t b)
152	{
153	return (b & 0xc0) == 0x80;
154	}
155
156	static inline int _char_continuation_bytes(char32_t c)
157	{
158	if ((c & ~LO_MASK_32(7)) == 0)
159	return 0;
160
161	if ((c & ~LO_MASK_32(11)) == 0)
162	return 1;
163
164	if ((c & ~LO_MASK_32(16)) == 0)
165	return 2;
166
167	if ((c & ~LO_MASK_32(21)) == 0)
168	return 3;
169
170	/* Codes longer than 21 bits are not supported */
171	return -1;
172	}
173
174	static inline int _continuation_bytes(uint8_t b)
175	{
176	/* 0xxxxxxx */
177	if (_is_ascii(b))
178	return 0;
179
180	/* 110xxxxx 10xxxxxx */
181	if ((b & 0xe0) == 0xc0)
182	return 1;
183
184	/* 1110xxxx 10xxxxxx 10xxxxxx */
185	if ((b & 0xf0) == 0xe0)
186	return 2;
187
188	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
189	if ((b & 0xf8) == 0xf0)
190	return 3;
191
192	return -1;
193	}
194
195	/** Decode a single character from a string.
196	*
197	* Decode a single character from a string of size @a size. Decoding starts
198	* at @a offset and this offset is moved to the beginning of the next
199	* character. In case of decoding error, offset generally advances at least
200	* by one. However, offset is never moved beyond size.
201	*
202	* @param str String (not necessarily NULL-terminated).
203	* @param offset Byte offset in string where to start decoding.
204	* @param size Size of the string (in bytes).
205	*
206	* @return Value of decoded character, U_SPECIAL on decoding error or
207	* NULL if attempt to decode beyond @a size.
208	*
209	*/
210	char32_t str_decode(const char str, size_t offset, size_t size)
211	{
212	if (*offset >= size)
213	return 0;
214
215	/* First byte read from string */
216	uint8_t b0 = (uint8_t) str[(*offset)++];
217
218	/* Fast exit for the most common case. */
219	if (_is_ascii(b0))
220	return b0;
221
222	/* 10xxxxxx -- unexpected continuation byte */
223	if (_is_continuation_byte(b0))
224	return U_SPECIAL;
225
226	/* Determine code length */
227
228	int cbytes = _continuation_bytes(b0);
229	int b0_bits = 6 - cbytes; /* Data bits in first byte */
230
231	if (cbytes < 0 \|\| *offset + cbytes > size)
232	return U_SPECIAL;
233
234	char32_t ch = b0 & LO_MASK_8(b0_bits);
235
236	/* Decode continuation bytes */
237	for (int i = 0; i < cbytes; i++) {
238	uint8_t b = (uint8_t) str[*offset];
239
240	if (!_is_continuation_byte(b))
241	return U_SPECIAL;
242
243	(*offset)++;
244
245	/* Shift data bits to ch */
246	ch = (ch << CONT_BITS) \| (char32_t) (b & LO_MASK_8(CONT_BITS));
247	}
248
249	/*
250	* Reject non-shortest form encodings.
251	* See https://www.unicode.org/versions/corrigendum1.html
252	*/
253	if (cbytes != _char_continuation_bytes(ch))
254	return U_SPECIAL;
255
256	return ch;
257	}
258
259	/** Decode a single character from a string to the left.
260	*
261	* Decode a single character from a string of size @a size. Decoding starts
262	* at @a offset and this offset is moved to the beginning of the previous
263	* character. In case of decoding error, offset generally decreases at least
264	* by one. However, offset is never moved before 0.
265	*
266	* @param str String (not necessarily NULL-terminated).
267	* @param offset Byte offset in string where to start decoding.
268	* @param size Size of the string (in bytes).
269	*
270	* @return Value of decoded character, U_SPECIAL on decoding error or
271	* NULL if attempt to decode beyond @a start of str.
272	*
273	*/
274	char32_t str_decode_reverse(const char str, size_t offset, size_t size)
275	{
276	if (*offset == 0)
277	return 0;
278
279	int cbytes = 0;
280	/* Continue while continuation bytes found */
281	while (*offset > 0 && cbytes < 4) {
282	uint8_t b = (uint8_t) str[--(*offset)];
283
284	if (_is_continuation_byte(b)) {
285	cbytes++;
286	continue;
287	}
288
289	/* Invalid byte. */
290	if (cbytes != _continuation_bytes(b))
291	return U_SPECIAL;
292
293	/* Start byte */
294	size_t start_offset = *offset;
295	return str_decode(str, &start_offset, size);
296	}
297
298	/* Too many continuation bytes */
299	return U_SPECIAL;
300	}
301
302	/** Encode a single character to string representation.
303	*
304	* Encode a single character to string representation (i.e. UTF-8) and store
305	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
306	* is moved to the position where the next character can be written to.
307	*
308	* @param ch Input character.
309	* @param str Output buffer.
310	* @param offset Byte offset where to start writing.
311	* @param size Size of the output buffer (in bytes).
312	*
313	* @return EOK if the character was encoded successfully, EOVERFLOW if there
314	* was not enough space in the output buffer or EINVAL if the character
315	* code was invalid.
316	*/
317	errno_t chr_encode(char32_t ch, char str, size_t offset, size_t size)
318	{
319	if (*offset >= size)
320	return EOVERFLOW;
321
322	/* Fast exit for the most common case. */
323	if (ch < 0x80) {
324	str[(*offset)++] = (char) ch;
325	return EOK;
326	}
327
328	/* Codes longer than 21 bits are not supported */
329	if (!chr_check(ch))
330	return EINVAL;
331
332	/* Determine how many continuation bytes are needed */
333
334	unsigned int cbytes = _char_continuation_bytes(ch);
335	unsigned int b0_bits = 6 - cbytes; /* Data bits in first byte */
336
337	/* Check for available space in buffer */
338	if (*offset + cbytes >= size)
339	return EOVERFLOW;
340
341	/* Encode continuation bytes */
342	unsigned int i;
343	for (i = cbytes; i > 0; i--) {
344	str[*offset + i] = 0x80 \| (ch & LO_MASK_32(CONT_BITS));
345	ch >>= CONT_BITS;
346	}
347
348	/* Encode first byte */
349	str[*offset] = (ch & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
350
351	/* Advance offset */
352	*offset += cbytes + 1;
353
354	return EOK;
355	}
356
357	/* Convert in place any bytes that don't form a valid character into U_SPECIAL. */
358	static void _sanitize_string(char *str, size_t n)
359	{
360	uint8_t b = (uint8_t ) str;
361
362	for (; *b && n > 0; b++, n--) {
363	int cont = _continuation_bytes(b[0]);
364	if (__builtin_expect(cont, 0) == 0)
365	continue;
366
367	if (cont < 0 \|\| n <= (size_t) cont) {
368	b[0] = U_SPECIAL;
369	continue;
370	}
371
372	/* Check continuation bytes. */
373	for (int i = 1; i <= cont; i++) {
374	if (!_is_continuation_byte(b[i])) {
375	b[0] = U_SPECIAL;
376	continue;
377	}
378	}
379
380	/*
381	* Check for non-shortest form encoding.
382	* See https://www.unicode.org/versions/corrigendum1.html
383	*/
384
385	switch (cont) {
386	case 1:
387	/* 0b110!!!!x 0b10xxxxxx */
388	if (!(b[0] & 0b00011110))
389	b[0] = U_SPECIAL;
390
391	continue;
392	case 2:
393	/* 0b1110!!!! 0b10!xxxxx 0b10xxxxxx */
394	if (!(b[0] & 0b00001111) && !(b[1] & 0b00100000))
395	b[0] = U_SPECIAL;
396
397	continue;
398	case 3:
399	/* 0b11110!!! 0b10!!xxxx 0b10xxxxxx 0b10xxxxxx */
400	if (!(b[0] & 0b00000111) && !(b[1] & 0b00110000))
401	b[0] = U_SPECIAL;
402
403	continue;
404	}
405	}
406	}
407
408	static size_t _str_size(const char *str)
409	{
410	size_t size = 0;
411
412	while (*str++ != 0)
413	size++;
414
415	return size;
416	}
417
418	/** Get size of string.
419	*
420	* Get the number of bytes which are used by the string @a str (excluding the
421	* NULL-terminator).
422	*
423	* @param str String to consider.
424	*
425	* @return Number of bytes used by the string
426	*
427	*/
428	size_t str_size(const char *str)
429	{
430	return _str_size(str);
431	}
432
433	/** Get size of wide string.
434	*
435	* Get the number of bytes which are used by the wide string @a str (excluding the
436	* NULL-terminator).
437	*
438	* @param str Wide string to consider.
439	*
440	* @return Number of bytes used by the wide string
441	*
442	*/
443	size_t wstr_size(const char32_t *str)
444	{
445	return (wstr_length(str) * sizeof(char32_t));
446	}
447
448	/** Get size of string with length limit.
449	*
450	* Get the number of bytes which are used by up to @a max_len first
451	* characters in the string @a str. If @a max_len is greater than
452	* the length of @a str, the entire string is measured (excluding the
453	* NULL-terminator).
454	*
455	* @param str String to consider.
456	* @param max_len Maximum number of characters to measure.
457	*
458	* @return Number of bytes used by the characters.
459	*
460	*/
461	size_t str_lsize(const char *str, size_t max_len)
462	{
463	size_t len = 0;
464	size_t offset = 0;
465
466	while (len < max_len) {
467	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
468	break;
469
470	len++;
471	}
472
473	return offset;
474	}
475
476	static size_t _str_nsize(const char *str, size_t max_size)
477	{
478	size_t size = 0;
479
480	while ((*str++ != 0) && (size < max_size))
481	size++;
482
483	return size;
484	}
485
486	/** Get size of string with size limit.
487	*
488	* Get the number of bytes which are used by the string @a str
489	* (excluding the NULL-terminator), but no more than @max_size bytes.
490	*
491	* @param str String to consider.
492	* @param max_size Maximum number of bytes to measure.
493	*
494	* @return Number of bytes used by the string
495	*
496	*/
497	size_t str_nsize(const char *str, size_t max_size)
498	{
499	return _str_nsize(str, max_size);
500	}
501
502	/** Get size of wide string with size limit.
503	*
504	* Get the number of bytes which are used by the wide string @a str
505	* (excluding the NULL-terminator), but no more than @max_size bytes.
506	*
507	* @param str Wide string to consider.
508	* @param max_size Maximum number of bytes to measure.
509	*
510	* @return Number of bytes used by the wide string
511	*
512	*/
513	size_t wstr_nsize(const char32_t *str, size_t max_size)
514	{
515	return (wstr_nlength(str, max_size) * sizeof(char32_t));
516	}
517
518	/** Get size of wide string with length limit.
519	*
520	* Get the number of bytes which are used by up to @a max_len first
521	* wide characters in the wide string @a str. If @a max_len is greater than
522	* the length of @a str, the entire wide string is measured (excluding the
523	* NULL-terminator).
524	*
525	* @param str Wide string to consider.
526	* @param max_len Maximum number of wide characters to measure.
527	*
528	* @return Number of bytes used by the wide characters.
529	*
530	*/
531	size_t wstr_lsize(const char32_t *str, size_t max_len)
532	{
533	return (wstr_nlength(str, max_len * sizeof(char32_t)) * sizeof(char32_t));
534	}
535
536	/** Get number of characters in a string.
537	*
538	* @param str NULL-terminated string.
539	*
540	* @return Number of characters in string.
541	*
542	*/
543	size_t str_length(const char *str)
544	{
545	size_t len = 0;
546	size_t offset = 0;
547
548	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
549	len++;
550
551	return len;
552	}
553
554	/** Get number of characters in a wide string.
555	*
556	* @param str NULL-terminated wide string.
557	*
558	* @return Number of characters in @a str.
559	*
560	*/
561	size_t wstr_length(const char32_t *wstr)
562	{
563	size_t len = 0;
564
565	while (*wstr++ != 0)
566	len++;
567
568	return len;
569	}
570
571	/** Get number of characters in a string with size limit.
572	*
573	* @param str NULL-terminated string.
574	* @param size Maximum number of bytes to consider.
575	*
576	* @return Number of characters in string.
577	*
578	*/
579	size_t str_nlength(const char *str, size_t size)
580	{
581	size_t len = 0;
582	size_t offset = 0;
583
584	while (str_decode(str, &offset, size) != 0)
585	len++;
586
587	return len;
588	}
589
590	/** Get number of characters in a string with size limit.
591	*
592	* @param str NULL-terminated string.
593	* @param size Maximum number of bytes to consider.
594	*
595	* @return Number of characters in string.
596	*
597	*/
598	size_t wstr_nlength(const char32_t *str, size_t size)
599	{
600	size_t len = 0;
601	size_t limit = ALIGN_DOWN(size, sizeof(char32_t));
602	size_t offset = 0;
603
604	while ((offset < limit) && (*str++ != 0)) {
605	len++;
606	offset += sizeof(char32_t);
607	}
608
609	return len;
610	}
611
612	/** Get character display width on a character cell display.
613	*
614	* @param ch Character
615	* @return Width of character in cells.
616	*/
617	size_t chr_width(char32_t ch)
618	{
619	return 1;
620	}
621
622	/** Get string display width on a character cell display.
623	*
624	* @param str String
625	* @return Width of string in cells.
626	*/
627	size_t str_width(const char *str)
628	{
629	size_t width = 0;
630	size_t offset = 0;
631	char32_t ch;
632
633	while ((ch = str_decode(str, &offset, STR_NO_LIMIT)) != 0)
634	width += chr_width(ch);
635
636	return width;
637	}
638
639	/** Check whether character is plain ASCII.
640	*
641	* @return True if character is plain ASCII.
642	*
643	*/
644	bool ascii_check(char32_t ch)
645	{
646	if (ch <= 127)
647	return true;
648
649	return false;
650	}
651
652	/** Check whether character is valid
653	*
654	* @return True if character is a valid Unicode code point.
655	*
656	*/
657	bool chr_check(char32_t ch)
658	{
659	if (ch <= 1114111)
660	return true;
661
662	return false;
663	}
664
665	/** Compare two NULL terminated strings.
666	*
667	* Do a char-by-char comparison of two NULL-terminated strings.
668	* The strings are considered equal iff their length is equal
669	* and both strings consist of the same sequence of characters.
670	*
671	* A string S1 is less than another string S2 if it has a character with
672	* lower value at the first character position where the strings differ.
673	* If the strings differ in length, the shorter one is treated as if
674	* padded by characters with a value of zero.
675	*
676	* @param s1 First string to compare.
677	* @param s2 Second string to compare.
678	*
679	* @return 0 if the strings are equal, -1 if the first is less than the second,
680	* 1 if the second is less than the first.
681	*
682	*/
683	int str_cmp(const char s1, const char s2)
684	{
685	/*
686	* UTF-8 has the nice property that lexicographic ordering on bytes is
687	* the same as the lexicographic ordering of the character sequences.
688	*/
689	while (s1 == s2 && *s1 != 0) {
690	s1++;
691	s2++;
692	}
693
694	if (s1 == s2)
695	return 0;
696
697	return (s1 < s2) ? -1 : 1;
698	}
699
700	/** Compare two NULL terminated strings with length limit.
701	*
702	* Do a char-by-char comparison of two NULL-terminated strings.
703	* The strings are considered equal iff
704	* min(str_length(s1), max_len) == min(str_length(s2), max_len)
705	* and both strings consist of the same sequence of characters,
706	* up to max_len characters.
707	*
708	* A string S1 is less than another string S2 if it has a character with
709	* lower value at the first character position where the strings differ.
710	* If the strings differ in length, the shorter one is treated as if
711	* padded by characters with a value of zero. Only the first max_len
712	* characters are considered.
713	*
714	* @param s1 First string to compare.
715	* @param s2 Second string to compare.
716	* @param max_len Maximum number of characters to consider.
717	*
718	* @return 0 if the strings are equal, -1 if the first is less than the second,
719	* 1 if the second is less than the first.
720	*
721	*/
722	int str_lcmp(const char s1, const char s2, size_t max_len)
723	{
724	char32_t c1 = 0;
725	char32_t c2 = 0;
726
727	size_t off1 = 0;
728	size_t off2 = 0;
729
730	size_t len = 0;
731
732	while (true) {
733	if (len >= max_len)
734	break;
735
736	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
737	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
738
739	if (c1 < c2)
740	return -1;
741
742	if (c1 > c2)
743	return 1;
744
745	if (c1 == 0 \|\| c2 == 0)
746	break;
747
748	++len;
749	}
750
751	return 0;
752
753	}
754
755	/** Compare two NULL terminated strings in case-insensitive manner.
756	*
757	* Do a char-by-char comparison of two NULL-terminated strings.
758	* The strings are considered equal iff their length is equal
759	* and both strings consist of the same sequence of characters
760	* when converted to lower case.
761	*
762	* A string S1 is less than another string S2 if it has a character with
763	* lower value at the first character position where the strings differ.
764	* If the strings differ in length, the shorter one is treated as if
765	* padded by characters with a value of zero.
766	*
767	* @param s1 First string to compare.
768	* @param s2 Second string to compare.
769	*
770	* @return 0 if the strings are equal, -1 if the first is less than the second,
771	* 1 if the second is less than the first.
772	*
773	*/
774	int str_casecmp(const char s1, const char s2)
775	{
776	// FIXME: doesn't work for non-ASCII caseful characters
777
778	char32_t c1 = 0;
779	char32_t c2 = 0;
780
781	size_t off1 = 0;
782	size_t off2 = 0;
783
784	while (true) {
785	c1 = tolower(str_decode(s1, &off1, STR_NO_LIMIT));
786	c2 = tolower(str_decode(s2, &off2, STR_NO_LIMIT));
787
788	if (c1 < c2)
789	return -1;
790
791	if (c1 > c2)
792	return 1;
793
794	if (c1 == 0 \|\| c2 == 0)
795	break;
796	}
797
798	return 0;
799	}
800
801	/** Compare two NULL terminated strings with length limit in case-insensitive
802	* manner.
803	*
804	* Do a char-by-char comparison of two NULL-terminated strings.
805	* The strings are considered equal iff
806	* min(str_length(s1), max_len) == min(str_length(s2), max_len)
807	* and both strings consist of the same sequence of characters,
808	* up to max_len characters.
809	*
810	* A string S1 is less than another string S2 if it has a character with
811	* lower value at the first character position where the strings differ.
812	* If the strings differ in length, the shorter one is treated as if
813	* padded by characters with a value of zero. Only the first max_len
814	* characters are considered.
815	*
816	* @param s1 First string to compare.
817	* @param s2 Second string to compare.
818	* @param max_len Maximum number of characters to consider.
819	*
820	* @return 0 if the strings are equal, -1 if the first is less than the second,
821	* 1 if the second is less than the first.
822	*
823	*/
824	int str_lcasecmp(const char s1, const char s2, size_t max_len)
825	{
826	// FIXME: doesn't work for non-ASCII caseful characters
827
828	char32_t c1 = 0;
829	char32_t c2 = 0;
830
831	size_t off1 = 0;
832	size_t off2 = 0;
833
834	size_t len = 0;
835
836	while (true) {
837	if (len >= max_len)
838	break;
839
840	c1 = tolower(str_decode(s1, &off1, STR_NO_LIMIT));
841	c2 = tolower(str_decode(s2, &off2, STR_NO_LIMIT));
842
843	if (c1 < c2)
844	return -1;
845
846	if (c1 > c2)
847	return 1;
848
849	if (c1 == 0 \|\| c2 == 0)
850	break;
851
852	++len;
853	}
854
855	return 0;
856
857	}
858
859	static bool _test_prefix(const char s, const char p)
860	{
861	while (s == p && *s != 0) {
862	s++;
863	p++;
864	}
865
866	return *p == 0;
867	}
868
869	/** Test whether p is a prefix of s.
870	*
871	* Do a char-by-char comparison of two NULL-terminated strings
872	* and determine if p is a prefix of s.
873	*
874	* @param s The string in which to look
875	* @param p The string to check if it is a prefix of s
876	*
877	* @return true iff p is prefix of s else false
878	*
879	*/
880	bool str_test_prefix(const char s, const char p)
881	{
882	return _test_prefix(s, p);
883	}
884
885	/** Get a string suffix.
886	*
887	* Return a string suffix defined by the prefix length.
888	*
889	* @param s The string to get the suffix from.
890	* @param prefix_length Number of prefix characters to ignore.
891	*
892	* @return String suffix.
893	*
894	*/
895	const char str_suffix(const char s, size_t prefix_length)
896	{
897	size_t off = 0;
898	size_t i = 0;
899
900	while (true) {
901	str_decode(s, &off, STR_NO_LIMIT);
902	i++;
903
904	if (i >= prefix_length)
905	break;
906	}
907
908	return s + off;
909	}
910
911	/** Copy string as a sequence of bytes. */
912	static void _str_cpy(char dest, const char src)
913	{
914	while (*src)
915	(dest++) = (src++);
916
917	*dest = 0;
918	}
919
920	/** Copy string as a sequence of bytes. */
921	static void _str_cpyn(char dest, size_t size, const char src)
922	{
923	assert(dest && src && size);
924
925	if (!dest \|\| !src \|\| !size)
926	return;
927
928	if (size == STR_NO_LIMIT)
929	return _str_cpy(dest, src);
930
931	char *dest_top = dest + size - 1;
932	assert(size == 1 \|\| dest < dest_top);
933
934	while (*src && dest < dest_top)
935	(dest++) = (src++);
936
937	*dest = 0;
938	}
939
940	/** Copy string.
941	*
942	* Copy source string @a src to destination buffer @a dest.
943	* No more than @a size bytes are written. If the size of the output buffer
944	* is at least one byte, the output string will always be well-formed, i.e.
945	* null-terminated and containing only complete characters.
946	*
947	* @param dest Destination buffer.
948	* @param count Size of the destination buffer (must be > 0).
949	* @param src Source string.
950	*
951	*/
952	void str_cpy(char dest, size_t size, const char src)
953	{
954	/* There must be space for a null terminator in the buffer. */
955	assert(size > 0);
956	assert(src != NULL);
957	assert(dest != NULL);
958	assert(size == STR_NO_LIMIT \|\| dest + size > dest);
959
960	/* Copy data. */
961	_str_cpyn(dest, size, src);
962
963	/* In-place translate invalid bytes to U_SPECIAL. */
964	_sanitize_string(dest, size);
965	}
966
967	/** Copy size-limited substring.
968	*
969	* Copy prefix of string @a src of max. size @a size to destination buffer
970	* @a dest. No more than @a size bytes are written. The output string will
971	* always be well-formed, i.e. null-terminated and containing only complete
972	* characters.
973	*
974	* No more than @a n bytes are read from the input string, so it does not
975	* have to be null-terminated.
976	*
977	* @param dest Destination buffer.
978	* @param count Size of the destination buffer (must be > 0).
979	* @param src Source string.
980	* @param n Maximum number of bytes to read from @a src.
981	*
982	*/
983	void str_ncpy(char dest, size_t size, const char src, size_t n)
984	{
985	/* There must be space for a null terminator in the buffer. */
986	assert(size > 0);
987	assert(src != NULL);
988
989	/* Copy data. */
990	_str_cpyn(dest, min(size, n + 1), src);
991
992	/* In-place translate invalid bytes to U_SPECIAL. */
993	_sanitize_string(dest, size);
994	}
995
996	/** Append one string to another.
997	*
998	* Append source string @a src to string in destination buffer @a dest.
999	* Size of the destination buffer is @a dest. If the size of the output buffer
1000	* is at least one byte, the output string will always be well-formed, i.e.
1001	* null-terminated and containing only complete characters.
1002	*
1003	* @param dest Destination buffer.
1004	* @param count Size of the destination buffer.
1005	* @param src Source string.
1006	*/
1007	void str_append(char dest, size_t size, const char src)
1008	{
1009	assert(src != NULL);
1010	assert(dest != NULL);
1011	assert(size > 0);
1012	assert(size == STR_NO_LIMIT \|\| dest + size > dest);
1013
1014	size_t dstr_size = _str_nsize(dest, size);
1015	if (dstr_size < size) {
1016	_str_cpyn(dest + dstr_size, size - dstr_size, src);
1017	_sanitize_string(dest + dstr_size, size - dstr_size);
1018	}
1019	}
1020
1021	/** Convert space-padded ASCII to string.
1022	*
1023	* Common legacy text encoding in hardware is 7-bit ASCII fitted into
1024	* a fixed-width byte buffer (bit 7 always zero), right-padded with spaces
1025	* (ASCII 0x20). Convert space-padded ascii to string representation.
1026	*
1027	* If the text does not fit into the destination buffer, the function converts
1028	* as many characters as possible and returns EOVERFLOW.
1029	*
1030	* If the text contains non-ASCII bytes (with bit 7 set), the whole string is
1031	* converted anyway and invalid characters are replaced with question marks
1032	* (U_SPECIAL) and the function returns EIO.
1033	*
1034	* Regardless of return value upon return @a dest will always be well-formed.
1035	*
1036	* @param dest Destination buffer
1037	* @param size Size of destination buffer
1038	* @param src Space-padded ASCII.
1039	* @param n Size of the source buffer in bytes.
1040	*
1041	* @return EOK on success, EOVERFLOW if the text does not fit
1042	* destination buffer, EIO if the text contains
1043	* non-ASCII bytes.
1044	*/
1045	errno_t spascii_to_str(char dest, size_t size, const uint8_t src, size_t n)
1046	{
1047	size_t len = 0;
1048
1049	/* Determine the length of the source string. */
1050	for (size_t i = 0; i < n; i++) {
1051	if (src[i] == 0)
1052	break;
1053
1054	if (src[i] != ' ')
1055	len = i + 1;
1056	}
1057
1058	errno_t result = EOK;
1059	size_t out_len = min(len, size - 1);
1060
1061	/* Copy characters */
1062	for (size_t i = 0; i < out_len; i++) {
1063	dest[i] = src[i];
1064
1065	if (dest[i] < 0) {
1066	dest[i] = U_SPECIAL;
1067	result = EIO;
1068	}
1069	}
1070
1071	dest[out_len] = 0;
1072
1073	if (out_len < len)
1074	return EOVERFLOW;
1075
1076	return result;
1077	}
1078
1079	/** Convert wide string to string.
1080	*
1081	* Convert wide string @a src to string. The output is written to the buffer
1082	* specified by @a dest and @a size. @a size must be non-zero and the string
1083	* written will always be well-formed.
1084	*
1085	* @param dest Destination buffer.
1086	* @param size Size of the destination buffer.
1087	* @param src Source wide string.
1088	*/
1089	void wstr_to_str(char dest, size_t size, const char32_t src)
1090	{
1091	char32_t ch;
1092	size_t src_idx;
1093	size_t dest_off;
1094
1095	/* There must be space for a null terminator in the buffer. */
1096	assert(size > 0);
1097
1098	src_idx = 0;
1099	dest_off = 0;
1100
1101	while ((ch = src[src_idx++]) != 0) {
1102	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
1103	break;
1104	}
1105
1106	dest[dest_off] = '\0';
1107	}
1108
1109	/** Convert UTF16 string to string.
1110	*
1111	* Convert utf16 string @a src to string. The output is written to the buffer
1112	* specified by @a dest and @a size. @a size must be non-zero and the string
1113	* written will always be well-formed. Surrogate pairs also supported.
1114	*
1115	* @param dest Destination buffer.
1116	* @param size Size of the destination buffer.
1117	* @param src Source utf16 string.
1118	*
1119	* @return EOK, if success, an error code otherwise.
1120	*/
1121	errno_t utf16_to_str(char dest, size_t size, const uint16_t src)
1122	{
1123	size_t idx = 0, dest_off = 0;
1124	char32_t ch;
1125	errno_t rc = EOK;
1126
1127	/* There must be space for a null terminator in the buffer. */
1128	assert(size > 0);
1129
1130	while (src[idx]) {
1131	if ((src[idx] & 0xfc00) == 0xd800) {
1132	if (src[idx + 1] && (src[idx + 1] & 0xfc00) == 0xdc00) {
1133	ch = 0x10000;
1134	ch += (src[idx] & 0x03FF) << 10;
1135	ch += (src[idx + 1] & 0x03FF);
1136	idx += 2;
1137	} else
1138	break;
1139	} else {
1140	ch = src[idx];
1141	idx++;
1142	}
1143	rc = chr_encode(ch, dest, &dest_off, size - 1);
1144	if (rc != EOK)
1145	break;
1146	}
1147	dest[dest_off] = '\0';
1148	return rc;
1149	}
1150
1151	/** Convert string to UTF16 string.
1152	*
1153	* Convert string @a src to utf16 string. The output is written to the buffer
1154	* specified by @a dest and @a dlen. @a dlen must be non-zero and the string
1155	* written will always be well-formed. Surrogate pairs also supported.
1156	*
1157	* @param dest Destination buffer.
1158	* @param dlen Number of utf16 characters that fit in the destination buffer.
1159	* @param src Source string.
1160	*
1161	* @return EOK, if success, an error code otherwise.
1162	*/
1163	errno_t str_to_utf16(uint16_t dest, size_t dlen, const char src)
1164	{
1165	errno_t rc = EOK;
1166	size_t offset = 0;
1167	size_t idx = 0;
1168	char32_t c;
1169
1170	assert(dlen > 0);
1171
1172	while ((c = str_decode(src, &offset, STR_NO_LIMIT)) != 0) {
1173	if (c > 0x10000) {
1174	if (idx + 2 >= dlen - 1) {
1175	rc = EOVERFLOW;
1176	break;
1177	}
1178	c = (c - 0x10000);
1179	dest[idx] = 0xD800 \| (c >> 10);
1180	dest[idx + 1] = 0xDC00 \| (c & 0x3FF);
1181	idx++;
1182	} else {
1183	dest[idx] = c;
1184	}
1185
1186	idx++;
1187	if (idx >= dlen - 1) {
1188	rc = EOVERFLOW;
1189	break;
1190	}
1191	}
1192
1193	dest[idx] = '\0';
1194	return rc;
1195	}
1196
1197	/** Get size of UTF-16 string.
1198	*
1199	* Get the number of words which are used by the UTF-16 string @a ustr
1200	* (excluding the NULL-terminator).
1201	*
1202	* @param ustr UTF-16 string to consider.
1203	*
1204	* @return Number of words used by the UTF-16 string
1205	*
1206	*/
1207	size_t utf16_wsize(const uint16_t *ustr)
1208	{
1209	size_t wsize = 0;
1210
1211	while (*ustr++ != 0)
1212	wsize++;
1213
1214	return wsize;
1215	}
1216
1217	/** Convert wide string to new string.
1218	*
1219	* Convert wide string @a src to string. Space for the new string is allocated
1220	* on the heap.
1221	*
1222	* @param src Source wide string.
1223	* @return New string.
1224	*/
1225	char wstr_to_astr(const char32_t src)
1226	{
1227	char dbuf[STR_BOUNDS(1)];
1228	char *str;
1229	char32_t ch;
1230
1231	size_t src_idx;
1232	size_t dest_off;
1233	size_t dest_size;
1234
1235	/* Compute size of encoded string. */
1236
1237	src_idx = 0;
1238	dest_size = 0;
1239
1240	while ((ch = src[src_idx++]) != 0) {
1241	dest_off = 0;
1242	if (chr_encode(ch, dbuf, &dest_off, STR_BOUNDS(1)) != EOK)
1243	break;
1244	dest_size += dest_off;
1245	}
1246
1247	str = malloc(dest_size + 1);
1248	if (str == NULL)
1249	return NULL;
1250
1251	/* Encode string. */
1252
1253	src_idx = 0;
1254	dest_off = 0;
1255
1256	while ((ch = src[src_idx++]) != 0) {
1257	if (chr_encode(ch, str, &dest_off, dest_size) != EOK)
1258	break;
1259	}
1260
1261	str[dest_size] = '\0';
1262	return str;
1263	}
1264
1265	/** Convert string to wide string.
1266	*
1267	* Convert string @a src to wide string. The output is written to the
1268	* buffer specified by @a dest and @a dlen. @a dlen must be non-zero
1269	* and the wide string written will always be null-terminated.
1270	*
1271	* @param dest Destination buffer.
1272	* @param dlen Length of destination buffer (number of wchars).
1273	* @param src Source string.
1274	*/
1275	void str_to_wstr(char32_t dest, size_t dlen, const char src)
1276	{
1277	size_t offset;
1278	size_t di;
1279	char32_t c;
1280
1281	assert(dlen > 0);
1282
1283	offset = 0;
1284	di = 0;
1285
1286	do {
1287	if (di >= dlen - 1)
1288	break;
1289
1290	c = str_decode(src, &offset, STR_NO_LIMIT);
1291	dest[di++] = c;
1292	} while (c != '\0');
1293
1294	dest[dlen - 1] = '\0';
1295	}
1296
1297	/** Convert string to wide string.
1298	*
1299	* Convert string @a src to wide string. A new wide NULL-terminated
1300	* string will be allocated on the heap.
1301	*
1302	* @param src Source string.
1303	*/
1304	char32_t str_to_awstr(const char str)
1305	{
1306	size_t len = str_length(str);
1307
1308	char32_t *wstr = calloc(len + 1, sizeof(char32_t));
1309	if (wstr == NULL)
1310	return NULL;
1311
1312	str_to_wstr(wstr, len + 1, str);
1313	return wstr;
1314	}
1315
1316	static char _strchr(const char str, char c)
1317	{
1318	while (str != 0 && str != c)
1319	str++;
1320
1321	return (str == c) ? (char ) str : NULL;
1322	}
1323
1324	/** Find first occurence of character in string.
1325	*
1326	* @param str String to search.
1327	* @param ch Character to look for.
1328	*
1329	* @return Pointer to character in @a str or NULL if not found.
1330	*/
1331	char str_chr(const char str, char32_t ch)
1332	{
1333	/* Fast path for an ASCII character. */
1334	if (ascii_check(ch))
1335	return _strchr(str, ch);
1336
1337	/* Convert character to UTF-8. */
1338	char utf8[STR_BOUNDS(1) + 1];
1339	size_t offset = 0;
1340
1341	if (chr_encode(ch, utf8, &offset, sizeof(utf8)) != EOK \|\| offset == 0)
1342	return NULL;
1343
1344	utf8[offset] = '\0';
1345
1346	/* Find the first byte, then check if all of them are correct. */
1347	while (*str != 0) {
1348	str = _strchr(str, utf8[0]);
1349	if (!str)
1350	return NULL;
1351
1352	if (_test_prefix(str, utf8))
1353	return (char *) str;
1354
1355	str++;
1356	}
1357
1358	return NULL;
1359	}
1360
1361	/** Find first occurence of substring in string.
1362	*
1363	* @param hs Haystack (string)
1364	* @param n Needle (substring to look for)
1365	*
1366	* @return Pointer to character in @a hs or @c NULL if not found.
1367	*/
1368	char str_str(const char hs, const char *n)
1369	{
1370	size_t hsize = _str_size(hs);
1371	size_t nsize = _str_size(n);
1372
1373	while (hsize >= nsize) {
1374	if (_test_prefix(hs, n))
1375	return (char *) hs;
1376
1377	hs++;
1378	hsize--;
1379	}
1380
1381	return NULL;
1382	}
1383
1384	static void _str_rtrim(char *str, char c)
1385	{
1386	char *last = str;
1387
1388	while (*str) {
1389	if (*str != c)
1390	last = str;
1391
1392	str++;
1393	}
1394
1395	/* Truncate string. */
1396	last[1] = 0;
1397	}
1398
1399	/** Removes specified trailing characters from a string.
1400	*
1401	* @param str String to remove from.
1402	* @param ch Character to remove.
1403	*/
1404	void str_rtrim(char *str, char32_t ch)
1405	{
1406	/* Fast path for the ASCII case. */
1407	if (ascii_check(ch)) {
1408	_str_rtrim(str, ch);
1409	return;
1410	}
1411
1412	size_t off = 0;
1413	size_t pos = 0;
1414	char32_t c;
1415	bool update_last_chunk = true;
1416	char *last_chunk = NULL;
1417
1418	while ((c = str_decode(str, &off, STR_NO_LIMIT))) {
1419	if (c != ch) {
1420	update_last_chunk = true;
1421	last_chunk = NULL;
1422	} else if (update_last_chunk) {
1423	update_last_chunk = false;
1424	last_chunk = (str + pos);
1425	}
1426	pos = off;
1427	}
1428
1429	if (last_chunk)
1430	*last_chunk = '\0';
1431	}
1432
1433	static void _str_ltrim(char *str, char c)
1434	{
1435	char *p = str;
1436
1437	while (*p == c)
1438	p++;
1439
1440	if (str != p)
1441	_str_cpy(str, p);
1442	}
1443
1444	/** Removes specified leading characters from a string.
1445	*
1446	* @param str String to remove from.
1447	* @param ch Character to remove.
1448	*/
1449	void str_ltrim(char *str, char32_t ch)
1450	{
1451	/* Fast path for the ASCII case. */
1452	if (ascii_check(ch)) {
1453	_str_ltrim(str, ch);
1454	return;
1455	}
1456
1457	char32_t acc;
1458	size_t off = 0;
1459	size_t pos = 0;
1460	size_t str_sz = str_size(str);
1461
1462	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
1463	if (acc != ch)
1464	break;
1465	else
1466	pos = off;
1467	}
1468
1469	if (pos > 0) {
1470	memmove(str, &str[pos], str_sz - pos);
1471	pos = str_sz - pos;
1472	str[pos] = '\0';
1473	}
1474	}
1475
1476	static char _str_rchr(const char str, char c)
1477	{
1478	const char *last = NULL;
1479
1480	while (*str) {
1481	if (*str == c)
1482	last = str;
1483
1484	str++;
1485	}
1486
1487	return (char *) last;
1488	}
1489
1490	/** Find last occurence of character in string.
1491	*
1492	* @param str String to search.
1493	* @param ch Character to look for.
1494	*
1495	* @return Pointer to character in @a str or NULL if not found.
1496	*/
1497	char str_rchr(const char str, char32_t ch)
1498	{
1499	if (ascii_check(ch))
1500	return _str_rchr(str, ch);
1501
1502	char32_t acc;
1503	size_t off = 0;
1504	size_t last = 0;
1505	const char *res = NULL;
1506
1507	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
1508	if (acc == ch)
1509	res = (str + last);
1510	last = off;
1511	}
1512
1513	return (char *) res;
1514	}
1515
1516	/** Insert a wide character into a wide string.
1517	*
1518	* Insert a wide character into a wide string at position
1519	* @a pos. The characters after the position are shifted.
1520	*
1521	* @param str String to insert to.
1522	* @param ch Character to insert to.
1523	* @param pos Character index where to insert.
1524	* @param max_pos Characters in the buffer.
1525	*
1526	* @return True if the insertion was sucessful, false if the position
1527	* is out of bounds.
1528	*
1529	*/
1530	bool wstr_linsert(char32_t *str, char32_t ch, size_t pos, size_t max_pos)
1531	{
1532	size_t len = wstr_length(str);
1533
1534	if ((pos > len) \|\| (pos + 1 > max_pos))
1535	return false;
1536
1537	size_t i;
1538	for (i = len; i + 1 > pos; i--)
1539	str[i + 1] = str[i];
1540
1541	str[pos] = ch;
1542
1543	return true;
1544	}
1545
1546	/** Remove a wide character from a wide string.
1547	*
1548	* Remove a wide character from a wide string at position
1549	* @a pos. The characters after the position are shifted.
1550	*
1551	* @param str String to remove from.
1552	* @param pos Character index to remove.
1553	*
1554	* @return True if the removal was sucessful, false if the position
1555	* is out of bounds.
1556	*
1557	*/
1558	bool wstr_remove(char32_t *str, size_t pos)
1559	{
1560	size_t len = wstr_length(str);
1561
1562	if (pos >= len)
1563	return false;
1564
1565	size_t i;
1566	for (i = pos + 1; i <= len; i++)
1567	str[i - 1] = str[i];
1568
1569	return true;
1570	}
1571
1572	/** Duplicate string.
1573	*
1574	* Allocate a new string and copy characters from the source
1575	* string into it. The duplicate string is allocated via sleeping
1576	* malloc(), thus this function can sleep in no memory conditions.
1577	*
1578	* The allocation cannot fail and the return value is always
1579	* a valid pointer. The duplicate string is always a well-formed
1580	* null-terminated UTF-8 string, but it can differ from the source
1581	* string on the byte level.
1582	*
1583	* @param src Source string.
1584	*
1585	* @return Duplicate string.
1586	*
1587	*/
1588	char str_dup(const char src)
1589	{
1590	size_t size = _str_size(src) + 1;
1591	char *dest = malloc(size);
1592	if (!dest)
1593	return NULL;
1594
1595	memcpy(dest, src, size);
1596	_sanitize_string(dest, size);
1597	return dest;
1598	}
1599
1600	/** Duplicate string with size limit.
1601	*
1602	* Allocate a new string and copy up to @max_size bytes from the source
1603	* string into it. The duplicate string is allocated via sleeping
1604	* malloc(), thus this function can sleep in no memory conditions.
1605	* No more than @max_size + 1 bytes is allocated, but if the size
1606	* occupied by the source string is smaller than @max_size + 1,
1607	* less is allocated.
1608	*
1609	* The allocation cannot fail and the return value is always
1610	* a valid pointer. The duplicate string is always a well-formed
1611	* null-terminated UTF-8 string, but it can differ from the source
1612	* string on the byte level.
1613	*
1614	* @param src Source string.
1615	* @param n Maximum number of bytes to duplicate.
1616	*
1617	* @return Duplicate string.
1618	*
1619	*/
1620	char str_ndup(const char src, size_t n)
1621	{
1622	size_t size = _str_nsize(src, n);
1623
1624	char *dest = malloc(size + 1);
1625	if (!dest)
1626	return NULL;
1627
1628	memcpy(dest, src, size);
1629	_sanitize_string(dest, size);
1630	dest[size] = 0;
1631	return dest;
1632	}
1633
1634	/** Split string by delimiters.
1635	*
1636	* @param s String to be tokenized. May not be NULL.
1637	* @param delim String with the delimiters.
1638	* @param next Variable which will receive the pointer to the
1639	* continuation of the string following the first
1640	* occurrence of any of the delimiter characters.
1641	* May be NULL.
1642	* @return Pointer to the prefix of @a s before the first
1643	* delimiter character. NULL if no such prefix
1644	* exists.
1645	*/
1646	char str_tok(char s, const char delim, char *next)
1647	{
1648	char start, end;
1649
1650	if (!s)
1651	return NULL;
1652
1653	size_t len = str_size(s);
1654	size_t cur;
1655	size_t tmp;
1656	char32_t ch;
1657
1658	/* Skip over leading delimiters. */
1659	tmp = 0;
1660	cur = 0;
1661	while ((ch = str_decode(s, &tmp, len)) && str_chr(delim, ch))
1662	cur = tmp;
1663	start = &s[cur];
1664
1665	/* Skip over token characters. */
1666	tmp = cur;
1667	while ((ch = str_decode(s, &tmp, len)) && !str_chr(delim, ch))
1668	cur = tmp;
1669	end = &s[cur];
1670	if (next)
1671	*next = (ch ? &s[tmp] : &s[cur]);
1672
1673	if (start == end)
1674	return NULL; /* No more tokens. */
1675
1676	/* Overwrite delimiter with NULL terminator. */
1677	*end = '\0';
1678	return start;
1679	}
1680
1681	void order_suffix(const uint64_t val, uint64_t rv, char suffix)
1682	{
1683	if (val > UINT64_C(10000000000000000000)) {
1684	*rv = val / UINT64_C(1000000000000000000);
1685	*suffix = 'Z';
1686	} else if (val > UINT64_C(1000000000000000000)) {
1687	*rv = val / UINT64_C(1000000000000000);
1688	*suffix = 'E';
1689	} else if (val > UINT64_C(1000000000000000)) {
1690	*rv = val / UINT64_C(1000000000000);
1691	*suffix = 'T';
1692	} else if (val > UINT64_C(1000000000000)) {
1693	*rv = val / UINT64_C(1000000000);
1694	*suffix = 'G';
1695	} else if (val > UINT64_C(1000000000)) {
1696	*rv = val / UINT64_C(1000000);
1697	*suffix = 'M';
1698	} else if (val > UINT64_C(1000000)) {
1699	*rv = val / UINT64_C(1000);
1700	*suffix = 'k';
1701	} else {
1702	*rv = val;
1703	*suffix = ' ';
1704	}
1705	}
1706
1707	void bin_order_suffix(const uint64_t val, uint64_t rv, const char *suffix,
1708	bool fixed)
1709	{
1710	if (val > UINT64_C(1152921504606846976)) {
1711	*rv = val / UINT64_C(1125899906842624);
1712	*suffix = "EiB";
1713	} else if (val > UINT64_C(1125899906842624)) {
1714	*rv = val / UINT64_C(1099511627776);
1715	*suffix = "TiB";
1716	} else if (val > UINT64_C(1099511627776)) {
1717	*rv = val / UINT64_C(1073741824);
1718	*suffix = "GiB";
1719	} else if (val > UINT64_C(1073741824)) {
1720	*rv = val / UINT64_C(1048576);
1721	*suffix = "MiB";
1722	} else if (val > UINT64_C(1048576)) {
1723	*rv = val / UINT64_C(1024);
1724	*suffix = "KiB";
1725	} else {
1726	*rv = val;
1727	if (fixed)
1728	*suffix = "B ";
1729	else
1730	*suffix = "B";
1731	}
1732	}
1733
1734	/** @}
1735	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: