Context Navigation

source: mainline/kernel/generic/src/lib/string.c@ c18e666

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since c18e666 was 74c8da2c, checked in by Martin Decky <martin@…>, 16 years ago
more Unicode functions
Property mode set to `100644`
File size: 9.7 KB

Line
1	/*
2	* Copyright (c) 2001-2004 Jakub Jermar
3	* All rights reserved.
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions
7	* are met:
8	*
9	* - Redistributions of source code must retain the above copyright
10	* notice, this list of conditions and the following disclaimer.
11	* - Redistributions in binary form must reproduce the above copyright
12	* notice, this list of conditions and the following disclaimer in the
13	* documentation and/or other materials provided with the distribution.
14	* - The name of the author may not be used to endorse or promote products
15	* derived from this software without specific prior written permission.
16	*
17	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	/** @addtogroup generic
30	* @{
31	*/
32
33	/**
34	* @file
35	* @brief Miscellaneous functions.
36	*/
37
38	#include <string.h>
39	#include <print.h>
40	#include <cpu.h>
41	#include <arch/asm.h>
42	#include <arch.h>
43	#include <console/kconsole.h>
44
45	char invalch = '?';
46
47	/** Decode a single UTF-8 character from a NULL-terminated string.
48	*
49	* Decode a single UTF-8 character from a plain char NULL-terminated
50	* string. Decoding starts at @index and this index is incremented
51	* if the current UTF-8 string is encoded in more than a single byte.
52	*
53	* @param str Plain character NULL-terminated string.
54	* @param index Index (counted in plain characters) where to start
55	* the decoding.
56	* @param limit Maximal allowed value of index.
57	*
58	* @return Decoded character in UTF-32 or '?' if the encoding is wrong.
59	*
60	*/
61	wchar_t utf8_decode(const char str, index_t index, index_t limit)
62	{
63	uint8_t c1; /* First plain character from str */
64	uint8_t c2; /* Second plain character from str */
65	uint8_t c3; /* Third plain character from str */
66	uint8_t c4; /* Fourth plain character from str */
67
68	if (*index > limit)
69	return invalch;
70
71	c1 = (uint8_t) str[*index];
72
73	if ((c1 & 0x80) == 0) {
74	/* Plain ASCII (code points 0 .. 127) */
75	return (wchar_t) c1;
76	}
77
78	if ((c1 & 0xe0) == 0xc0) {
79	/* Code points 128 .. 2047 */
80	if (*index + 1 > limit)
81	return invalch;
82
83	c2 = (uint8_t) str[*index + 1];
84	if ((c2 & 0xc0) == 0x80) {
85	(*index)++;
86	return ((wchar_t) ((c1 & 0x1f) << 6) \| (c2 & 0x3f));
87	} else
88	return invalch;
89	}
90
91	if ((c1 & 0xf0) == 0xe0) {
92	/* Code points 2048 .. 65535 */
93	if (*index + 2 > limit)
94	return invalch;
95
96	c2 = (uint8_t) str[*index + 1];
97	if ((c2 & 0xc0) == 0x80) {
98	(*index)++;
99	c3 = (uint8_t) str[*index + 1];
100	if ((c3 & 0xc0) == 0x80) {
101	(*index)++;
102	return ((wchar_t) ((c1 & 0x0f) << 12) \| ((c2 & 0x3f) << 6) \| (c3 & 0x3f));
103	} else
104	return invalch;
105	} else
106	return invalch;
107	}
108
109	if ((c1 & 0xf8) == 0xf0) {
110	/* Code points 65536 .. 1114111 */
111	if (*index + 3 > limit)
112	return invalch;
113
114	c2 = (uint8_t) str[*index + 1];
115	if ((c2 & 0xc0) == 0x80) {
116	(*index)++;
117	c3 = (uint8_t) str[*index + 1];
118	if ((c3 & 0xc0) == 0x80) {
119	(*index)++;
120	c4 = (uint8_t) str[*index + 1];
121	if ((c4 & 0xc0) == 0x80) {
122	(*index)++;
123	return ((wchar_t) ((c1 & 0x07) << 18) \| ((c2 & 0x3f) << 12) \| ((c3 & 0x3f) << 6) \| (c4 & 0x3f));
124	} else
125	return invalch;
126	} else
127	return invalch;
128	} else
129	return invalch;
130	}
131
132	return invalch;
133	}
134
135	/** Encode a single UTF-32 character as UTF-8
136	*
137	* Encode a single UTF-32 character as UTF-8 and store it into
138	* the given buffer at @index. Encoding starts at @index and
139	* this index is incremented if the UTF-8 character takes
140	* more than a single byte.
141	*
142	* @param ch Input UTF-32 character.
143	* @param str Output buffer.
144	* @param index Index (counted in plain characters) where to start
145	* the encoding
146	* @param limit Maximal allowed value of index.
147	*
148	* @return True if the character was encoded or false if there is not
149	* enought space in the output buffer or the character is invalid
150	* Unicode code point.
151	*
152	*/
153	bool utf8_encode(const wchar_t ch, char str, index_t index, index_t limit)
154	{
155	if (*index > limit)
156	return false;
157
158	if ((ch >= 0) && (ch <= 127)) {
159	/* Plain ASCII (code points 0 .. 127) */
160	str[*index] = ch & 0x7f;
161	return true;
162	}
163
164	if ((ch >= 128) && (ch <= 2047)) {
165	/* Code points 128 .. 2047 */
166	if (*index + 1 > limit)
167	return false;
168
169	str[*index] = 0xc0 \| ((ch >> 6) & 0x1f);
170	(*index)++;
171	str[*index] = 0x80 \| (ch & 0x3f);
172	return true;
173	}
174
175	if ((ch >= 2048) && (ch <= 65535)) {
176	/* Code points 2048 .. 65535 */
177	if (*index + 2 > limit)
178	return false;
179
180	str[*index] = 0xe0 \| ((ch >> 12) & 0x0f);
181	(*index)++;
182	str[*index] = 0x80 \| ((ch >> 6) & 0x3f);
183	(*index)++;
184	str[*index] = 0x80 \| (ch & 0x3f);
185	return true;
186	}
187
188	if ((ch >= 65536) && (ch <= 1114111)) {
189	/* Code points 65536 .. 1114111 */
190	if (*index + 3 > limit)
191	return false;
192
193	str[*index] = 0xf0 \| ((ch >> 18) & 0x07);
194	(*index)++;
195	str[*index] = 0x80 \| ((ch >> 12) & 0x3f);
196	(*index)++;
197	str[*index] = 0x80 \| ((ch >> 6) & 0x3f);
198	(*index)++;
199	str[*index] = 0x80 \| (ch & 0x3f);
200	return true;
201	}
202
203	return false;
204	}
205
206	/** Get bytes used by UTF-8 characters.
207	*
208	* Get the number of bytes (count of plain characters) which
209	* are used by a given count of UTF-8 characters in a string.
210	* As UTF-8 encoding is multibyte, there is no constant
211	* correspondence between number of characters and used bytes.
212	*
213	* @param str UTF-8 string to consider.
214	* @param count Number of UTF-8 characters to count.
215	*
216	* @return Number of bytes used by the characters.
217	*
218	*/
219	size_t utf8_count_bytes(const char *str, count_t count)
220	{
221	size_t size = 0;
222	index_t index = 0;
223
224	while ((utf8_decode(str, &index, UTF8_NO_LIMIT) != 0) && (size < count)) {
225	size++;
226	index++;
227	}
228
229	return index;
230	}
231
232	/** Check whether character is plain ASCII.
233	*
234	* @return True if character is plain ASCII.
235	*
236	*/
237	bool ascii_check(const wchar_t ch)
238	{
239	if ((ch >= 0) && (ch <= 127))
240	return true;
241
242	return false;
243	}
244
245	/** Check whether character is Unicode.
246	*
247	* @return True if character is valid Unicode code point.
248	*
249	*/
250	bool unicode_check(const wchar_t ch)
251	{
252	if ((ch >= 0) && (ch <= 1114111))
253	return true;
254
255	return false;
256	}
257
258	/** Return number of plain characters in a string.
259	*
260	* @param str NULL-terminated string.
261	*
262	* @return Number of characters in str.
263	*
264	*/
265	size_t strlen(const char *str)
266	{
267	size_t size;
268	for (size = 0; str[size]; size++);
269
270	return size;
271	}
272
273	/** Return number of UTF-8 characters in a string.
274	*
275	* @param str NULL-terminated UTF-8 string.
276	*
277	* @return Number of UTF-8 characters in str.
278	*
279	*/
280	size_t strlen_utf8(const char *str)
281	{
282	size_t size = 0;
283	index_t index = 0;
284
285	while (utf8_decode(str, &index, UTF8_NO_LIMIT) != 0) {
286	size++;
287	index++;
288	}
289
290	return size;
291	}
292
293	/** Return number of UTF-32 characters in a string.
294	*
295	* @param str NULL-terminated UTF-32 string.
296	*
297	* @return Number of UTF-32 characters in str.
298	*
299	*/
300	size_t strlen_utf32(const wchar_t *str)
301	{
302	size_t size;
303	for (size = 0; str[size]; size++);
304
305	return size;
306	}
307
308	/** Compare two NULL terminated strings
309	*
310	* Do a char-by-char comparison of two NULL terminated strings.
311	* The strings are considered equal iff they consist of the same
312	* characters on the minimum of their lengths.
313	*
314	* @param src First string to compare.
315	* @param dst Second string to compare.
316	*
317	* @return 0 if the strings are equal, -1 if first is smaller, 1 if second smaller.
318	*
319	*/
320	int strcmp(const char src, const char dst)
321	{
322	for (; src && dst; src++, dst++) {
323	if (src < dst)
324	return -1;
325	if (src > dst)
326	return 1;
327	}
328	if (src == dst)
329	return 0;
330
331	if (!*src)
332	return -1;
333
334	return 1;
335	}
336
337
338	/** Compare two NULL terminated strings
339	*
340	* Do a char-by-char comparison of two NULL terminated strings.
341	* The strings are considered equal iff they consist of the same
342	* characters on the minimum of their lengths and specified maximal
343	* length.
344	*
345	* @param src First string to compare.
346	* @param dst Second string to compare.
347	* @param len Maximal length for comparison.
348	*
349	* @return 0 if the strings are equal, -1 if first is smaller, 1 if second smaller.
350	*
351	*/
352	int strncmp(const char src, const char dst, size_t len)
353	{
354	unsigned int i;
355
356	for (i = 0; (src) && (dst) && (i < len); src++, dst++, i++) {
357	if (src < dst)
358	return -1;
359
360	if (src > dst)
361	return 1;
362	}
363
364	if (i == len \|\| src == dst)
365	return 0;
366
367	if (!*src)
368	return -1;
369
370	return 1;
371	}
372
373
374
375	/** Copy NULL terminated string.
376	*
377	* Copy at most 'len' characters from string 'src' to 'dest'.
378	* If 'src' is shorter than 'len', '\0' is inserted behind the
379	* last copied character.
380	*
381	* @param src Source string.
382	* @param dest Destination buffer.
383	* @param len Size of destination buffer.
384	*
385	*/
386	void strncpy(char dest, const char src, size_t len)
387	{
388	unsigned int i;
389
390	for (i = 0; i < len; i++) {
391	if (!(dest[i] = src[i]))
392	return;
393	}
394
395	dest[i - 1] = '\0';
396	}
397
398	/** Find first occurence of character in string.
399	*
400	* @param s String to search.
401	* @param i Character to look for.
402	*
403	* @return Pointer to character in @a s or NULL if not found.
404	*/
405	extern char strchr(const char s, int i)
406	{
407	while (*s != '\0') {
408	if (*s == i)
409	return (char *) s;
410	++s;
411	}
412
413	return NULL;
414	}
415
416	/** @}
417	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: