Context Navigation

source: mainline/uspace/lib/libc/generic/string.c@ cb01e1e

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since cb01e1e was 171f9a1, checked in by Jiri Svoboda <jirik.svoboda@…>, 16 years ago
Character encoding/decoding un uspace. Partially fix klog application.
Property mode set to `100644`
File size: 12.1 KB

Line
1	/*
2	* Copyright (c) 2005 Martin Decky
3	* Copyright (c) 2008 Jiri Svoboda
4	* All rights reserved.
5	*
6	* Redistribution and use in source and binary forms, with or without
7	* modification, are permitted provided that the following conditions
8	* are met:
9	*
10	* - Redistributions of source code must retain the above copyright
11	* notice, this list of conditions and the following disclaimer.
12	* - Redistributions in binary form must reproduce the above copyright
13	* notice, this list of conditions and the following disclaimer in the
14	* documentation and/or other materials provided with the distribution.
15	* - The name of the author may not be used to endorse or promote products
16	* derived from this software without specific prior written permission.
17	*
18	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28	*/
29
30	/** @addtogroup libc
31	* @{
32	*/
33	/** @file
34	*/
35
36	#include <string.h>
37	#include <stdlib.h>
38	#include <limits.h>
39	#include <ctype.h>
40	#include <malloc.h>
41	#include <errno.h>
42	#include <string.h>
43
44	/** Byte mask consisting of lowest @n bits (out of 8) */
45	#define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1))
46
47	/** Byte mask consisting of lowest @n bits (out of 32) */
48	#define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1))
49
50	/** Byte mask consisting of highest @n bits (out of 8) */
51	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
52
53	/** Number of data bits in a UTF-8 continuation byte */
54	#define CONT_BITS 6
55
56	/** Decode a single character from a string.
57	*
58	* Decode a single character from a string of size @a size. Decoding starts
59	* at @a offset and this offset is moved to the beginning of the next
60	* character. In case of decoding error, offset generally advances at least
61	* by one. However, offset is never moved beyond size.
62	*
63	* @param str String (not necessarily NULL-terminated).
64	* @param offset Byte offset in string where to start decoding.
65	* @param size Size of the string (in bytes).
66	*
67	* @return Value of decoded character, U_SPECIAL on decoding error or
68	* NULL if attempt to decode beyond @a size.
69	*
70	*/
71	wchar_t str_decode(const char str, size_t offset, size_t size)
72	{
73	if (*offset + 1 > size)
74	return 0;
75
76	/* First byte read from string */
77	uint8_t b0 = (uint8_t) str[(*offset)++];
78
79	/* Determine code length */
80
81	unsigned int b0_bits; /* Data bits in first byte */
82	unsigned int cbytes; /* Number of continuation bytes */
83
84	if ((b0 & 0x80) == 0) {
85	/* 0xxxxxxx (Plain ASCII) */
86	b0_bits = 7;
87	cbytes = 0;
88	} else if ((b0 & 0xe0) == 0xc0) {
89	/* 110xxxxx 10xxxxxx */
90	b0_bits = 5;
91	cbytes = 1;
92	} else if ((b0 & 0xf0) == 0xe0) {
93	/* 1110xxxx 10xxxxxx 10xxxxxx */
94	b0_bits = 4;
95	cbytes = 2;
96	} else if ((b0 & 0xf8) == 0xf0) {
97	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
98	b0_bits = 3;
99	cbytes = 3;
100	} else {
101	/* 10xxxxxx -- unexpected continuation byte */
102	return U_SPECIAL;
103	}
104
105	if (*offset + cbytes > size)
106	return U_SPECIAL;
107
108	wchar_t ch = b0 & LO_MASK_8(b0_bits);
109
110	/* Decode continuation bytes */
111	while (cbytes > 0) {
112	uint8_t b = (uint8_t) str[(*offset)++];
113
114	/* Must be 10xxxxxx */
115	if ((b & 0xc0) != 0x80)
116	return U_SPECIAL;
117
118	/* Shift data bits to ch */
119	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
120	cbytes--;
121	}
122
123	return ch;
124	}
125
126	/** Encode a single character to string representation.
127	*
128	* Encode a single character to string representation (i.e. UTF-8) and store
129	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
130	* is moved to the position where the next character can be written to.
131	*
132	* @param ch Input character.
133	* @param str Output buffer.
134	* @param offset Byte offset where to start writing.
135	* @param size Size of the output buffer (in bytes).
136	*
137	* @return EOK if the character was encoded successfully, EOVERFLOW if there
138	* was not enough space in the output buffer or EINVAL if the character
139	* code was invalid.
140	*/
141	int chr_encode(const wchar_t ch, char str, size_t offset, size_t size)
142	{
143	if (*offset >= size)
144	return EOVERFLOW;
145
146	if (!chr_check(ch))
147	return EINVAL;
148
149	/* Unsigned version of ch (bit operations should only be done
150	on unsigned types). */
151	uint32_t cc = (uint32_t) ch;
152
153	/* Determine how many continuation bytes are needed */
154
155	unsigned int b0_bits; /* Data bits in first byte */
156	unsigned int cbytes; /* Number of continuation bytes */
157
158	if ((cc & ~LO_MASK_32(7)) == 0) {
159	b0_bits = 7;
160	cbytes = 0;
161	} else if ((cc & ~LO_MASK_32(11)) == 0) {
162	b0_bits = 5;
163	cbytes = 1;
164	} else if ((cc & ~LO_MASK_32(16)) == 0) {
165	b0_bits = 4;
166	cbytes = 2;
167	} else if ((cc & ~LO_MASK_32(21)) == 0) {
168	b0_bits = 3;
169	cbytes = 3;
170	} else {
171	/* Codes longer than 21 bits are not supported */
172	return EINVAL;
173	}
174
175	/* Check for available space in buffer */
176	if (*offset + cbytes >= size)
177	return EOVERFLOW;
178
179	/* Encode continuation bytes */
180	unsigned int i;
181	for (i = cbytes; i > 0; i--) {
182	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
183	cc = cc >> CONT_BITS;
184	}
185
186	/* Encode first byte */
187	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
188
189	/* Advance offset */
190	*offset += cbytes + 1;
191
192	return EOK;
193	}
194
195	/** Check whether character is valid
196	*
197	* @return True if character is a valid Unicode code point.
198	*
199	*/
200	bool chr_check(const wchar_t ch)
201	{
202	if ((ch >= 0) && (ch <= 1114111))
203	return true;
204
205	return false;
206	}
207
208	/** Count the number of characters in the string, not including terminating 0.
209	*
210	* @param str String.
211	* @return Number of characters in string.
212	*/
213	size_t strlen(const char *str)
214	{
215	size_t counter = 0;
216
217	while (str[counter] != 0)
218	counter++;
219
220	return counter;
221	}
222
223	int strcmp(const char a, const char b)
224	{
225	int c = 0;
226
227	while (a[c] && b[c] && (!(a[c] - b[c])))
228	c++;
229
230	return (a[c] - b[c]);
231	}
232
233	int strncmp(const char a, const char b, size_t n)
234	{
235	size_t c = 0;
236
237	while (c < n && a[c] && b[c] && (!(a[c] - b[c])))
238	c++;
239
240	return ( c < n ? a[c] - b[c] : 0);
241
242	}
243
244	int stricmp(const char a, const char b)
245	{
246	int c = 0;
247
248	while (a[c] && b[c] && (!(tolower(a[c]) - tolower(b[c]))))
249	c++;
250
251	return (tolower(a[c]) - tolower(b[c]));
252	}
253
254	/** Return pointer to the first occurence of character c in string.
255	*
256	* @param str Scanned string.
257	* @param c Searched character (taken as one byte).
258	* @return Pointer to the matched character or NULL if it is not
259	* found in given string.
260	*/
261	char strchr(const char str, int c)
262	{
263	while (*str != '\0') {
264	if (*str == (char) c)
265	return (char *) str;
266	str++;
267	}
268
269	return NULL;
270	}
271
272	/** Return pointer to the last occurence of character c in string.
273	*
274	* @param str Scanned string.
275	* @param c Searched character (taken as one byte).
276	* @return Pointer to the matched character or NULL if it is not
277	* found in given string.
278	*/
279	char strrchr(const char str, int c)
280	{
281	char *retval = NULL;
282
283	while (*str != '\0') {
284	if (*str == (char) c)
285	retval = (char *) str;
286	str++;
287	}
288
289	return (char *) retval;
290	}
291
292	/** Convert string to a number.
293	* Core of strtol and strtoul functions.
294	*
295	* @param nptr Pointer to string.
296	* @param endptr If not NULL, function stores here pointer to the first
297	* invalid character.
298	* @param base Zero or number between 2 and 36 inclusive.
299	* @param sgn It's set to 1 if minus found.
300	* @return Result of conversion.
301	*/
302	static unsigned long
303	_strtoul(const char nptr, char endptr, int base, char sgn)
304	{
305	unsigned char c;
306	unsigned long result = 0;
307	unsigned long a, b;
308	const char *str = nptr;
309	const char *tmpptr;
310
311	while (isspace(*str))
312	str++;
313
314	if (*str == '-') {
315	*sgn = 1;
316	++str;
317	} else if (*str == '+')
318	++str;
319
320	if (base) {
321	if ((base == 1) \|\| (base > 36)) {
322	/* FIXME: set errno to EINVAL */
323	return 0;
324	}
325	if ((base == 16) && (*str == '0') && ((str[1] == 'x') \|\|
326	(str[1] == 'X'))) {
327	str += 2;
328	}
329	} else {
330	base = 10;
331
332	if (*str == '0') {
333	base = 8;
334	if ((str[1] == 'X') \|\| (str[1] == 'x')) {
335	base = 16;
336	str += 2;
337	}
338	}
339	}
340
341	tmpptr = str;
342
343	while (*str) {
344	c = *str;
345	c = (c >= 'a' ? c - 'a' + 10 : (c >= 'A' ? c - 'A' + 10 :
346	(c <= '9' ? c - '0' : 0xff)));
347	if (c > base) {
348	break;
349	}
350
351	a = (result & 0xff) * base + c;
352	b = (result >> 8) * base + (a >> 8);
353
354	if (b > (ULONG_MAX >> 8)) {
355	/* overflow */
356	/* FIXME: errno = ERANGE*/
357	return ULONG_MAX;
358	}
359
360	result = (b << 8) + (a & 0xff);
361	++str;
362	}
363
364	if (str == tmpptr) {
365	/*
366	* No number was found => first invalid character is the first
367	* character of the string.
368	*/
369	/* FIXME: set errno to EINVAL */
370	str = nptr;
371	result = 0;
372	}
373
374	if (endptr)
375	endptr = (char ) str;
376
377	if (nptr == str) {
378	/FIXME: errno = EINVAL/
379	return 0;
380	}
381
382	return result;
383	}
384
385	/** Convert initial part of string to long int according to given base.
386	* The number may begin with an arbitrary number of whitespaces followed by
387	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
388	* inserted and the number will be taken as hexadecimal one. If the base is 0
389	* and the number begin with a zero, number will be taken as octal one (as with
390	* base 8). Otherwise the base 0 is taken as decimal.
391	*
392	* @param nptr Pointer to string.
393	* @param endptr If not NULL, function stores here pointer to the first
394	* invalid character.
395	* @param base Zero or number between 2 and 36 inclusive.
396	* @return Result of conversion.
397	*/
398	long int strtol(const char nptr, char *endptr, int base)
399	{
400	char sgn = 0;
401	unsigned long number = 0;
402
403	number = _strtoul(nptr, endptr, base, &sgn);
404
405	if (number > LONG_MAX) {
406	if ((sgn) && (number == (unsigned long) (LONG_MAX) + 1)) {
407	/* FIXME: set 0 to errno */
408	return number;
409	}
410	/* FIXME: set ERANGE to errno */
411	return (sgn ? LONG_MIN : LONG_MAX);
412	}
413
414	return (sgn ? -number : number);
415	}
416
417
418	/** Convert initial part of string to unsigned long according to given base.
419	* The number may begin with an arbitrary number of whitespaces followed by
420	* optional sign (`+' or `-'). If the base is 0 or 16, the prefix `0x' may be
421	* inserted and the number will be taken as hexadecimal one. If the base is 0
422	* and the number begin with a zero, number will be taken as octal one (as with
423	* base 8). Otherwise the base 0 is taken as decimal.
424	*
425	* @param nptr Pointer to string.
426	* @param endptr If not NULL, function stores here pointer to the first
427	* invalid character
428	* @param base Zero or number between 2 and 36 inclusive.
429	* @return Result of conversion.
430	*/
431	unsigned long strtoul(const char nptr, char *endptr, int base)
432	{
433	char sgn = 0;
434	unsigned long number = 0;
435
436	number = _strtoul(nptr, endptr, base, &sgn);
437
438	return (sgn ? -number : number);
439	}
440
441	char strcpy(char dest, const char *src)
442	{
443	char *orig = dest;
444
445	while (((dest++) = (src++)))
446	;
447	return orig;
448	}
449
450	char strncpy(char dest, const char *src, size_t n)
451	{
452	char *orig = dest;
453
454	while (((dest++) = (src++)) && --n)
455	;
456	return orig;
457	}
458
459	char strcat(char dest, const char *src)
460	{
461	char *orig = dest;
462	while (*dest++)
463	;
464	--dest;
465	while ((dest++ = src++))
466	;
467	return orig;
468	}
469
470	char * strdup(const char *s1)
471	{
472	size_t len = strlen(s1) + 1;
473	void *ret = malloc(len);
474
475	if (ret == NULL)
476	return (char *) NULL;
477
478	return (char *) memcpy(ret, s1, len);
479	}
480
481	char strtok(char s, const char *delim)
482	{
483	static char *next;
484
485	return strtok_r(s, delim, &next);
486	}
487
488	char strtok_r(char s, const char delim, char *next)
489	{
490	char start, end;
491
492	if (s == NULL)
493	s = *next;
494
495	/* Skip over leading delimiters. */
496	while (s && (strchr(delim, s) != NULL)) ++s;
497	start = s;
498
499	/* Skip over token characters. */
500	while (s && (strchr(delim, s) == NULL)) ++s;
501	end = s;
502	next = (s ? s + 1 : s);
503
504	if (start == end) {
505	return NULL; /* No more tokens. */
506	}
507
508	/* Overwrite delimiter with NULL terminator. */
509	*end = '\0';
510	return start;
511	}
512
513	/** @}
514	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: