Context Navigation

source: mainline/kernel/generic/src/lib/str.c@ 1d2f85e

Visit:

Last change on this file since 1d2f85e was 1d2f85e, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 6 years ago
Change documentation of <str.h> functions to use unambiguous terms
Property mode set to `100644`
File size: 24.3 KB

Line
1	/*
2	* Copyright (c) 2001-2004 Jakub Jermar
3	* Copyright (c) 2005 Martin Decky
4	* Copyright (c) 2008 Jiri Svoboda
5	* Copyright (c) 2011 Martin Sucha
6	* Copyright (c) 2011 Oleg Romanenko
7	* All rights reserved.
8	*
9	* Redistribution and use in source and binary forms, with or without
10	* modification, are permitted provided that the following conditions
11	* are met:
12	*
13	* - Redistributions of source code must retain the above copyright
14	* notice, this list of conditions and the following disclaimer.
15	* - Redistributions in binary form must reproduce the above copyright
16	* notice, this list of conditions and the following disclaimer in the
17	* documentation and/or other materials provided with the distribution.
18	* - The name of the author may not be used to endorse or promote products
19	* derived from this software without specific prior written permission.
20	*
21	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
22	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
23	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
24	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
25	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
26	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
30	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31	*/
32
33	/** @addtogroup kernel_generic
34	* @{
35	*/
36
37	/**
38	* @file
39	* @brief String functions.
40	*
41	* Strings and characters use the Universal Character Set (UCS). The standard
42	* strings, called just strings are encoded in UTF-8. Wide strings (encoded
43	* in UTF-32) are supported to a limited degree. A single code point is
44	* represented as wchar_t.@n
45	*
46	* Overview of the terminology:@n
47	*
48	* Term Meaning
49	* -------------------- ----------------------------------------------------
50	* byte 8 bits stored in uint8_t (unsigned 8 bit integer)
51	*
52	* character UTF-32 encoded Unicode code point, stored in wchar_t
53	* (signed 32 bit integer), code points 0 .. 1114111
54	* are valid
55	*
56	* ASCII character 7 bit encoded ASCII character, stored in char
57	* (usually signed 8 bit integer), code points 0 .. 127
58	* are valid
59	*
60	* string UTF-8 encoded NULL-terminated Unicode string, char *
61	*
62	* wide string UTF-32 encoded NULL-terminated Unicode string,
63	* wchar_t *
64	*
65	* [wide] string size number of BYTES in a [wide] string (excluding
66	* the NULL-terminator), size_t
67	*
68	* [wide] string length number of CODE POINTS in a [wide] string (excluding
69	* the NULL-terminator), size_t
70	*
71	* [wide] string width number of display cells on a monospace display taken
72	* by a [wide] string, size_t
73	*
74	*
75	* Overview of string metrics:@n
76	*
77	* Metric Abbrev. Type Meaning
78	* ------ ------ ------ -------------------------------------------------
79	* size n size_t number of BYTES in a string (excluding the
80	* NULL-terminator)
81	*
82	* length l size_t number of CODE POINTS in a string (excluding the
83	* null terminator)
84	*
85	* width w size_t number of display cells on a monospace display
86	* taken by a string
87	*
88	*
89	* Function naming prefixes:@n
90	*
91	* chr_ operate on code points
92	* ascii_ operate on ASCII characters
93	* str_ operate on strings
94	* wstr_ operate on wide strings
95	*
96	* [w]str_[n\|l\|w] operate on a prefix limited by size, length
97	* or width
98	*
99	*
100	* A specific character inside a [wide] string can be referred to by:@n
101	*
102	* pointer (char , wchar_t )
103	* byte offset (size_t)
104	* code point index (size_t)
105	*
106	*/
107
108	#include <str.h>
109
110	#include <assert.h>
111	#include <errno.h>
112	#include <stdbool.h>
113	#include <stddef.h>
114	#include <stdint.h>
115	#include <stdlib.h>
116
117	#include <align.h>
118	#include <macros.h>
119
120	/** Check the condition if wchar_t is signed */
121	#ifdef __WCHAR_UNSIGNED__
122	#define WCHAR_SIGNED_CHECK(cond) (true)
123	#else
124	#define WCHAR_SIGNED_CHECK(cond) (cond)
125	#endif
126
127	/** Byte mask consisting of lowest @n bits (out of 8) */
128	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
129
130	/** Byte mask consisting of lowest @n bits (out of 32) */
131	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
132
133	/** Byte mask consisting of highest @n bits (out of 8) */
134	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
135
136	/** Number of data bits in a UTF-8 continuation byte */
137	#define CONT_BITS 6
138
139	/** Decode a single code point from an UTF-8 encoded string.
140	*
141	* Decode a single code point from a string of size @a size. Decoding starts
142	* at @a offset and this offset is moved to the beginning of the next
143	* code point. In case of decoding error, offset generally advances at least
144	* by one. However, offset is never moved beyond size.
145	*
146	* @param str String (not necessarily NULL-terminated).
147	* @param offset Byte offset in string where to start decoding.
148	* @param size Size of the string (in bytes).
149	*
150	* @return Value of decoded code point, U_SPECIAL on decoding error or
151	* NULL if attempt to decode beyond @a size.
152	*
153	*/
154	wchar_t str_decode(const char str, size_t offset, size_t size)
155	{
156	if (*offset + 1 > size)
157	return 0;
158
159	/* First byte read from string */
160	uint8_t b0 = (uint8_t) str[(*offset)++];
161
162	/* Determine code length */
163
164	unsigned int b0_bits; /* Data bits in first byte */
165	unsigned int cbytes; /* Number of continuation bytes */
166
167	if ((b0 & 0x80) == 0) {
168	/* 0xxxxxxx (Plain ASCII) */
169	b0_bits = 7;
170	cbytes = 0;
171	} else if ((b0 & 0xe0) == 0xc0) {
172	/* 110xxxxx 10xxxxxx */
173	b0_bits = 5;
174	cbytes = 1;
175	} else if ((b0 & 0xf0) == 0xe0) {
176	/* 1110xxxx 10xxxxxx 10xxxxxx */
177	b0_bits = 4;
178	cbytes = 2;
179	} else if ((b0 & 0xf8) == 0xf0) {
180	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
181	b0_bits = 3;
182	cbytes = 3;
183	} else {
184	/* 10xxxxxx -- unexpected continuation byte */
185	return U_SPECIAL;
186	}
187
188	if (*offset + cbytes > size)
189	return U_SPECIAL;
190
191	wchar_t ch = b0 & LO_MASK_8(b0_bits);
192
193	/* Decode continuation bytes */
194	while (cbytes > 0) {
195	uint8_t b = (uint8_t) str[(*offset)++];
196
197	/* Must be 10xxxxxx */
198	if ((b & 0xc0) != 0x80)
199	return U_SPECIAL;
200
201	/* Shift data bits to ch */
202	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
203	cbytes--;
204	}
205
206	return ch;
207	}
208
209	/** Encode a single code point to a UTF-8 string representation.
210	*
211	* Encode a single code point to a UTF-8 string representation and store
212	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
213	* is moved to the position where the next code point can be written to.
214	*
215	* @param ch Input code point.
216	* @param str Output buffer.
217	* @param offset Byte offset where to start writing.
218	* @param size Size of the output buffer (in bytes).
219	*
220	* @return EOK if the code point was encoded successfully, EOVERFLOW if there
221	* was not enough space in the output buffer or EINVAL if the code point
222	* code was invalid.
223	*/
224	errno_t chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
225	{
226	if (*offset >= size)
227	return EOVERFLOW;
228
229	if (!chr_check(ch))
230	return EINVAL;
231
232	/*
233	* Unsigned version of ch (bit operations should only be done
234	* on unsigned types).
235	*/
236	uint32_t cc = (uint32_t) ch;
237
238	/* Determine how many continuation bytes are needed */
239
240	unsigned int b0_bits; /* Data bits in first byte */
241	unsigned int cbytes; /* Number of continuation bytes */
242
243	if ((cc & ~LO_MASK_32(7)) == 0) {
244	b0_bits = 7;
245	cbytes = 0;
246	} else if ((cc & ~LO_MASK_32(11)) == 0) {
247	b0_bits = 5;
248	cbytes = 1;
249	} else if ((cc & ~LO_MASK_32(16)) == 0) {
250	b0_bits = 4;
251	cbytes = 2;
252	} else if ((cc & ~LO_MASK_32(21)) == 0) {
253	b0_bits = 3;
254	cbytes = 3;
255	} else {
256	/* Codes longer than 21 bits are not supported */
257	return EINVAL;
258	}
259
260	/* Check for available space in buffer */
261	if (*offset + cbytes >= size)
262	return EOVERFLOW;
263
264	/* Encode continuation bytes */
265	unsigned int i;
266	for (i = cbytes; i > 0; i--) {
267	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
268	cc = cc >> CONT_BITS;
269	}
270
271	/* Encode first byte */
272	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
273
274	/* Advance offset */
275	*offset += cbytes + 1;
276
277	return EOK;
278	}
279
280	/** Get size of string.
281	*
282	* Get the number of bytes which are used by the string @a str (excluding the
283	* NULL-terminator).
284	*
285	* @param str String to consider.
286	*
287	* @return Number of bytes used by the string
288	*
289	*/
290	size_t str_bytes(const char *str)
291	{
292	size_t size = 0;
293
294	while (*str++ != 0)
295	size++;
296
297	return size;
298	}
299
300	/** Get size of wide string.
301	*
302	* Get the number of bytes which are used by the wide string @a str (excluding the
303	* NULL-terminator).
304	*
305	* @param str Wide string to consider.
306	*
307	* @return Number of bytes used by the wide string
308	*
309	*/
310	size_t wstr_bytes(const wchar_t *str)
311	{
312	return (wstr_code_points(str) * sizeof(wchar_t));
313	}
314
315	/** Get size of string with code point count limit.
316	*
317	* Get the number of bytes which are used by up to @a max_len first
318	* code points in the string @a str. If @a max_len is greater than
319	* the number of code points in @a str, the entire string is measured
320	* (excluding the NULL-terminator).
321	*
322	* @param str String to consider.
323	* @param max_len Maximum number of code points to measure.
324	*
325	* @return Number of bytes used by the code points.
326	*
327	*/
328	size_t str_lbytes(const char *str, size_t max_len)
329	{
330	size_t len = 0;
331	size_t offset = 0;
332
333	while (len < max_len) {
334	if (str_decode(str, &offset, STR_NO_LIMIT) == 0)
335	break;
336
337	len++;
338	}
339
340	return offset;
341	}
342
343	/** Get size of wide string with length limit.
344	*
345	* Get the number of bytes which are used by up to @a max_len first
346	* code points in the wide string @a str. If @a max_len is greater than
347	* the length of @a str, the entire wide string is measured (excluding the
348	* NULL-terminator).
349	*
350	* @param str Wide string to consider.
351	* @param max_len Maximum number of code points to measure.
352	*
353	* @return Number of bytes used by the code points.
354	*
355	*/
356	size_t wstr_lbytes(const wchar_t *str, size_t max_len)
357	{
358	return (wstr_ncode_points(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t));
359	}
360
361	/** Get number of unicode code points in a UTF-8 encoded string.
362	*
363	* @param str NULL-terminated UTF-8 string.
364	*
365	* @return Number of code points in the string.
366	*
367	*/
368	size_t str_code_points(const char *str)
369	{
370	size_t len = 0;
371	size_t offset = 0;
372
373	while (str_decode(str, &offset, STR_NO_LIMIT) != 0)
374	len++;
375
376	return len;
377	}
378
379	/** Get number of code points in a wide string.
380	*
381	* @param str NULL-terminated wide string.
382	*
383	* @return Number of code points in @a str.
384	*
385	*/
386	size_t wstr_code_points(const wchar_t *wstr)
387	{
388	size_t len = 0;
389
390	while (*wstr++ != 0)
391	len++;
392
393	return len;
394	}
395
396	/** Get number of code points in a string with size limit.
397	*
398	* @param str NULL-terminated string.
399	* @param size Maximum number of bytes to consider.
400	*
401	* @return Number of code points in string.
402	*
403	*/
404	size_t str_ncode_points(const char *str, size_t size)
405	{
406	size_t len = 0;
407	size_t offset = 0;
408
409	while (str_decode(str, &offset, size) != 0)
410	len++;
411
412	return len;
413	}
414
415	/** Get number of code points in a string with size limit.
416	*
417	* @param str NULL-terminated string.
418	* @param size Maximum number of bytes to consider.
419	*
420	* @return Number of code points in string.
421	*
422	*/
423	size_t wstr_ncode_points(const wchar_t *str, size_t size)
424	{
425	size_t len = 0;
426	size_t limit = ALIGN_DOWN(size, sizeof(wchar_t));
427	size_t offset = 0;
428
429	while ((offset < limit) && (*str++ != 0)) {
430	len++;
431	offset += sizeof(wchar_t);
432	}
433
434	return len;
435	}
436
437	/** Check whether code point is plain ASCII.
438	*
439	* @return True if code point is plain ASCII.
440	*
441	*/
442	bool ascii_check(wchar_t ch)
443	{
444	if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 127))
445	return true;
446
447	return false;
448	}
449
450	/** Check whether code point is valid
451	*
452	* @return True if code point is a valid Unicode code point.
453	*
454	*/
455	bool chr_check(wchar_t ch)
456	{
457	if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 1114111))
458	return true;
459
460	return false;
461	}
462
463	/** Compare two NULL terminated strings.
464	*
465	* Do a char-by-char comparison of two NULL-terminated strings.
466	* The strings are considered equal iff their length is equal
467	* and both strings consist of the same sequence of code points.
468	*
469	* A string S1 is less than another string S2 if it has a code point with
470	* lower value at the first code point position where the strings differ.
471	* If the strings differ in length, the shorter one is treated as if
472	* padded by code points with a value of zero.
473	*
474	* @param s1 First string to compare.
475	* @param s2 Second string to compare.
476	*
477	* @return 0 if the strings are equal, -1 if the first is less than the second,
478	* 1 if the second is less than the first.
479	*
480	*/
481	int str_cmp(const char s1, const char s2)
482	{
483	wchar_t c1 = 0;
484	wchar_t c2 = 0;
485
486	size_t off1 = 0;
487	size_t off2 = 0;
488
489	while (true) {
490	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
491	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
492
493	if (c1 < c2)
494	return -1;
495
496	if (c1 > c2)
497	return 1;
498
499	if (c1 == 0 \|\| c2 == 0)
500	break;
501	}
502
503	return 0;
504	}
505
506	/** Compare two NULL terminated strings with length limit.
507	*
508	* Do a char-by-char comparison of two NULL-terminated strings.
509	* The strings are considered equal iff
510	* min(str_code_points(s1), max_len) == min(str_code_points(s2), max_len)
511	* and both strings consist of the same sequence of code points,
512	* up to max_len code points.
513	*
514	* A string S1 is less than another string S2 if it has a code point with
515	* lower value at the first code point position where the strings differ.
516	* If the strings differ in length, the shorter one is treated as if
517	* padded by code points with a value of zero. Only the first max_len
518	* code points are considered.
519	*
520	* @param s1 First string to compare.
521	* @param s2 Second string to compare.
522	* @param max_len Maximum number of code points to consider.
523	*
524	* @return 0 if the strings are equal, -1 if the first is less than the second,
525	* 1 if the second is less than the first.
526	*
527	*/
528	int str_lcmp(const char s1, const char s2, size_t max_len)
529	{
530	wchar_t c1 = 0;
531	wchar_t c2 = 0;
532
533	size_t off1 = 0;
534	size_t off2 = 0;
535
536	size_t len = 0;
537
538	while (true) {
539	if (len >= max_len)
540	break;
541
542	c1 = str_decode(s1, &off1, STR_NO_LIMIT);
543	c2 = str_decode(s2, &off2, STR_NO_LIMIT);
544
545	if (c1 < c2)
546	return -1;
547
548	if (c1 > c2)
549	return 1;
550
551	if (c1 == 0 \|\| c2 == 0)
552	break;
553
554	++len;
555	}
556
557	return 0;
558
559	}
560
561	/** Copy string.
562	*
563	* Copy source string @a src to destination buffer @a dest.
564	* No more than @a size bytes are written. If the size of the output buffer
565	* is at least one byte, the output string will always be well-formed, i.e.
566	* null-terminated and containing only complete code points.
567	*
568	* @param dest Destination buffer.
569	* @param count Size of the destination buffer (must be > 0).
570	* @param src Source string.
571	*
572	*/
573	void str_cpy(char dest, size_t size, const char src)
574	{
575	/* There must be space for a null terminator in the buffer. */
576	assert(size > 0);
577	assert(src != NULL);
578
579	size_t src_off = 0;
580	size_t dest_off = 0;
581
582	wchar_t ch;
583	while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
584	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
585	break;
586	}
587
588	dest[dest_off] = '\0';
589	}
590
591	/** Copy size-limited substring.
592	*
593	* Copy prefix of string @a src of max. size @a size to destination buffer
594	* @a dest. No more than @a size bytes are written. The output string will
595	* always be well-formed, i.e. null-terminated and containing only complete
596	* code points.
597	*
598	* No more than @a n bytes are read from the input string, so it does not
599	* have to be null-terminated.
600	*
601	* @param dest Destination buffer.
602	* @param count Size of the destination buffer (must be > 0).
603	* @param src Source string.
604	* @param n Maximum number of bytes to read from @a src.
605	*
606	*/
607	void str_ncpy(char dest, size_t size, const char src, size_t n)
608	{
609	/* There must be space for a null terminator in the buffer. */
610	assert(size > 0);
611
612	size_t src_off = 0;
613	size_t dest_off = 0;
614
615	wchar_t ch;
616	while ((ch = str_decode(src, &src_off, n)) != 0) {
617	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
618	break;
619	}
620
621	dest[dest_off] = '\0';
622	}
623
624	/** Convert wide string to string.
625	*
626	* Convert wide string @a src to string. The output is written to the buffer
627	* specified by @a dest and @a size. @a size must be non-zero and the string
628	* written will always be well-formed.
629	*
630	* @param dest Destination buffer.
631	* @param size Size of the destination buffer.
632	* @param src Source wide string.
633	*/
634	void wstr_to_str(char dest, size_t size, const wchar_t src)
635	{
636	wchar_t ch;
637	size_t src_idx;
638	size_t dest_off;
639
640	/* There must be space for a null terminator in the buffer. */
641	assert(size > 0);
642
643	src_idx = 0;
644	dest_off = 0;
645
646	while ((ch = src[src_idx++]) != 0) {
647	if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
648	break;
649	}
650
651	dest[dest_off] = '\0';
652	}
653
654	/** Find first occurence of code point in string.
655	*
656	* @param str String to search.
657	* @param ch code point to look for.
658	*
659	* @return Pointer to code point in @a str or NULL if not found.
660	*/
661	char str_chr(const char str, wchar_t ch)
662	{
663	wchar_t acc;
664	size_t off = 0;
665	size_t last = 0;
666
667	while ((acc = str_decode(str, &off, STR_NO_LIMIT)) != 0) {
668	if (acc == ch)
669	return (char *) (str + last);
670	last = off;
671	}
672
673	return NULL;
674	}
675
676	/** Insert a code point into a wide string.
677	*
678	* Insert a code point into a wide string at position
679	* @a pos. The code points after the position are shifted.
680	*
681	* @param str String to insert to.
682	* @param ch Code point to insert.
683	* @param pos Code point index where to insert.
684	* @param max_pos Number of code points that fit in the buffer.
685	*
686	* @return True if the insertion was sucessful, false if the position
687	* is out of bounds.
688	*
689	*/
690	bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)
691	{
692	size_t len = wstr_code_points(str);
693
694	if ((pos > len) \|\| (pos + 1 > max_pos))
695	return false;
696
697	size_t i;
698	for (i = len; i + 1 > pos; i--)
699	str[i + 1] = str[i];
700
701	str[pos] = ch;
702
703	return true;
704	}
705
706	/** Remove a code point from a wide string.
707	*
708	* Remove a code point from a wide string at position
709	* @a pos. The code points after the position are shifted.
710	*
711	* @param str String to remove from.
712	* @param pos Code point index to remove.
713	*
714	* @return True if the removal was sucessful, false if the position
715	* is out of bounds.
716	*
717	*/
718	bool wstr_remove(wchar_t *str, size_t pos)
719	{
720	size_t len = wstr_code_points(str);
721
722	if (pos >= len)
723	return false;
724
725	size_t i;
726	for (i = pos + 1; i <= len; i++)
727	str[i - 1] = str[i];
728
729	return true;
730	}
731
732	/** Duplicate string.
733	*
734	* Allocate a new string and copy the contents of the source string into it.
735	* The duplicate string is allocated as if by malloc().
736	*
737	* If successful, the duplicate string is always a well-formed
738	* null-terminated UTF-8 string, but it can differ from the source
739	* string on the byte level.
740	*
741	* @param src Source string.
742	*
743	* @return Duplicate string, or NULL if allocation failed.
744	*
745	*/
746	char str_dup(const char src)
747	{
748	size_t size = str_bytes(src) + 1;
749	char *dest = malloc(size);
750	if (!dest)
751	return NULL;
752
753	str_cpy(dest, size, src);
754	return dest;
755	}
756
757	/** Duplicate string with size limit.
758	*
759	* Allocate a new string and copy up to @max_size bytes from the source
760	* string into it. The duplicate string is allocated as if by malloc().
761	* No more than @max_size + 1 bytes is allocated, but if the size
762	* occupied by the source string is smaller than @max_size + 1,
763	* less is allocated.
764	*
765	* If successful, the duplicate string is always a well-formed
766	* null-terminated UTF-8 string, but it can differ from the source
767	* string on the byte level.
768	*
769	* @param src Source string.
770	* @param n Maximum number of bytes to duplicate.
771	*
772	* @return Duplicate string.
773	*
774	*/
775	char str_ndup(const char src, size_t n)
776	{
777	size_t size = str_bytes(src);
778	if (size > n)
779	size = n;
780
781	char *dest = malloc(size + 1);
782	if (!dest)
783	return NULL;
784
785	str_ncpy(dest, size + 1, src, size);
786	return dest;
787	}
788
789	/** Convert string to uint64_t (internal variant).
790	*
791	* @param nptr Pointer to string.
792	* @param endptr Pointer to the first invalid character is stored here.
793	* @param base Zero or number between 2 and 36 inclusive.
794	* @param neg Indication of unary minus is stored here.
795	* @apram result Result of the conversion.
796	*
797	* @return EOK if conversion was successful.
798	*
799	*/
800	static errno_t str_uint(const char nptr, char *endptr, unsigned int base,
801	bool neg, uint64_t result)
802	{
803	assert(endptr != NULL);
804	assert(neg != NULL);
805	assert(result != NULL);
806
807	*neg = false;
808	const char *str = nptr;
809
810	/* Ignore leading whitespace */
811	while (isspace(*str))
812	str++;
813
814	if (*str == '-') {
815	*neg = true;
816	str++;
817	} else if (*str == '+')
818	str++;
819
820	if (base == 0) {
821	/* Decode base if not specified */
822	base = 10;
823
824	if (*str == '0') {
825	base = 8;
826	str++;
827
828	switch (*str) {
829	case 'b':
830	case 'B':
831	base = 2;
832	str++;
833	break;
834	case 'o':
835	case 'O':
836	base = 8;
837	str++;
838	break;
839	case 'd':
840	case 'D':
841	case 't':
842	case 'T':
843	base = 10;
844	str++;
845	break;
846	case 'x':
847	case 'X':
848	base = 16;
849	str++;
850	break;
851	default:
852	str--;
853	}
854	}
855	} else {
856	/* Check base range */
857	if ((base < 2) \|\| (base > 36)) {
858	endptr = (char ) str;
859	return EINVAL;
860	}
861	}
862
863	*result = 0;
864	const char *startstr = str;
865
866	while (*str != 0) {
867	unsigned int digit;
868
869	if ((str >= 'a') && (str <= 'z'))
870	digit = *str - 'a' + 10;
871	else if ((str >= 'A') && (str <= 'Z'))
872	digit = *str - 'A' + 10;
873	else if ((str >= '0') && (str <= '9'))
874	digit = *str - '0';
875	else
876	break;
877
878	if (digit >= base)
879	break;
880
881	uint64_t prev = *result;
882	result = (result) * base + digit;
883
884	if (*result < prev) {
885	/* Overflow */
886	endptr = (char ) str;
887	return EOVERFLOW;
888	}
889
890	str++;
891	}
892
893	if (str == startstr) {
894	/*
895	* No digits were decoded => first invalid character is
896	* the first character of the string.
897	*/
898	str = nptr;
899	}
900
901	endptr = (char ) str;
902
903	if (str == nptr)
904	return EINVAL;
905
906	return EOK;
907	}
908
909	/** Convert string to uint64_t.
910	*
911	* @param nptr Pointer to string.
912	* @param endptr If not NULL, pointer to the first invalid character
913	* is stored here.
914	* @param base Zero or number between 2 and 36 inclusive.
915	* @param strict Do not allow any trailing characters.
916	* @param result Result of the conversion.
917	*
918	* @return EOK if conversion was successful.
919	*
920	*/
921	errno_t str_uint64_t(const char nptr, char *endptr, unsigned int base,
922	bool strict, uint64_t *result)
923	{
924	assert(result != NULL);
925
926	bool neg;
927	char *lendptr;
928	errno_t ret = str_uint(nptr, &lendptr, base, &neg, result);
929
930	if (endptr != NULL)
931	endptr = (char ) lendptr;
932
933	if (ret != EOK)
934	return ret;
935
936	/* Do not allow negative values */
937	if (neg)
938	return EINVAL;
939
940	/*
941	* Check whether we are at the end of
942	* the string in strict mode
943	*/
944	if ((strict) && (*lendptr != 0))
945	return EINVAL;
946
947	return EOK;
948	}
949
950	void order_suffix(const uint64_t val, uint64_t rv, char suffix)
951	{
952	if (val > UINT64_C(10000000000000000000)) {
953	*rv = val / UINT64_C(1000000000000000000);
954	*suffix = 'Z';
955	} else if (val > UINT64_C(1000000000000000000)) {
956	*rv = val / UINT64_C(1000000000000000);
957	*suffix = 'E';
958	} else if (val > UINT64_C(1000000000000000)) {
959	*rv = val / UINT64_C(1000000000000);
960	*suffix = 'T';
961	} else if (val > UINT64_C(1000000000000)) {
962	*rv = val / UINT64_C(1000000000);
963	*suffix = 'G';
964	} else if (val > UINT64_C(1000000000)) {
965	*rv = val / UINT64_C(1000000);
966	*suffix = 'M';
967	} else if (val > UINT64_C(1000000)) {
968	*rv = val / UINT64_C(1000);
969	*suffix = 'k';
970	} else {
971	*rv = val;
972	*suffix = ' ';
973	}
974	}
975
976	void bin_order_suffix(const uint64_t val, uint64_t rv, const char *suffix,
977	bool fixed)
978	{
979	if (val > UINT64_C(1152921504606846976)) {
980	*rv = val / UINT64_C(1125899906842624);
981	*suffix = "EiB";
982	} else if (val > UINT64_C(1125899906842624)) {
983	*rv = val / UINT64_C(1099511627776);
984	*suffix = "TiB";
985	} else if (val > UINT64_C(1099511627776)) {
986	*rv = val / UINT64_C(1073741824);
987	*suffix = "GiB";
988	} else if (val > UINT64_C(1073741824)) {
989	*rv = val / UINT64_C(1048576);
990	*suffix = "MiB";
991	} else if (val > UINT64_C(1048576)) {
992	*rv = val / UINT64_C(1024);
993	*suffix = "KiB";
994	} else {
995	*rv = val;
996	if (fixed)
997	*suffix = "B ";
998	else
999	*suffix = "B";
1000	}
1001	}
1002
1003	/** @}
1004	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: