Context Navigation

source: mainline/kernel/generic/src/lib/string.c@ 0dd1d444

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 0dd1d444 was 0dd1d444, checked in by Jiri Svoboda <jirik.svoboda@…>, 16 years ago
Slightly 'decompile' character decoder.
Property mode set to `100644`
File size: 9.3 KB

Line
1	/*
2	* Copyright (c) 2001-2004 Jakub Jermar
3	* All rights reserved.
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions
7	* are met:
8	*
9	* - Redistributions of source code must retain the above copyright
10	* notice, this list of conditions and the following disclaimer.
11	* - Redistributions in binary form must reproduce the above copyright
12	* notice, this list of conditions and the following disclaimer in the
13	* documentation and/or other materials provided with the distribution.
14	* - The name of the author may not be used to endorse or promote products
15	* derived from this software without specific prior written permission.
16	*
17	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	/** @addtogroup generic
30	* @{
31	*/
32
33	/**
34	* @file
35	* @brief Miscellaneous functions.
36	*/
37
38	#include <string.h>
39	#include <print.h>
40	#include <cpu.h>
41	#include <arch/asm.h>
42	#include <arch.h>
43	#include <console/kconsole.h>
44
45	char invalch = '?';
46
47	/** Byte mask consisting of bits 0 - (@n - 1) */
48	#define LO_MASK_8(n) ((uint8_t)((1 << (n)) - 1))
49
50	/** Number of data bits in a UTF-8 continuation byte. */
51	#define CONT_BITS 6
52
53	/** Decode a single UTF-8 character from a NULL-terminated string.
54	*
55	* Decode a single UTF-8 character from a plain char NULL-terminated
56	* string. Decoding starts at @index and this index is incremented
57	* if the current UTF-8 string is encoded in more than a single byte.
58	*
59	* @param str Plain character NULL-terminated string.
60	* @param index Index (counted in plain characters) where to start
61	* the decoding.
62	* @param limit Maximal allowed value of index.
63	*
64	* @return Decoded character in UTF-32 or '?' if the encoding is wrong.
65	*
66	*/
67	wchar_t utf8_decode(const char str, index_t index, index_t limit)
68	{
69	uint8_t b0, b; /* Bytes read from str. */
70	wchar_t ch;
71
72	int b0_bits; /* Data bits in first byte. */
73	int cbytes; /* Number of continuation bytes. */
74
75	if (*index > limit)
76	return invalch;
77
78	b0 = (uint8_t) str[*index];
79
80	/* Determine code length. */
81
82	if ((b0 & 0x80) == 0) {
83	/* 0xxxxxxx (Plain ASCII) */
84	b0_bits = 7;
85	cbytes = 0;
86	} else if ((b0 & 0xe0) == 0xc0) {
87	/* 110xxxxx 10xxxxxx */
88	b0_bits = 5;
89	cbytes = 1;
90	} else if ((b0 & 0xf0) == 0xe0) {
91	/* 1110xxxx 10xxxxxx 10xxxxxx */
92	b0_bits = 4;
93	cbytes = 2;
94	} else if ((b0 & 0xf8) == 0xf0) {
95	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
96	b0_bits = 3;
97	cbytes = 3;
98	} else {
99	/* 10xxxxxx -- unexpected continuation byte. */
100	return invalch;
101	}
102
103	if (*index + cbytes > limit) {
104	return invalch;
105	}
106
107	ch = b0 & LO_MASK_8(b0_bits);
108
109	/* Decode continuation bytes. */
110	while (cbytes > 0) {
111	b = (uint8_t) str[*index + 1];
112	++(*index);
113
114	/* Must be 10xxxxxx. */
115	if ((b & 0xc0) != 0x80) {
116	return invalch;
117	}
118
119	/* Shift data bits to ch. */
120	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
121	--cbytes;
122	}
123
124	return ch;
125	}
126
127	/** Encode a single UTF-32 character as UTF-8
128	*
129	* Encode a single UTF-32 character as UTF-8 and store it into
130	* the given buffer at @index. Encoding starts at @index and
131	* this index is incremented if the UTF-8 character takes
132	* more than a single byte.
133	*
134	* @param ch Input UTF-32 character.
135	* @param str Output buffer.
136	* @param index Index (counted in plain characters) where to start
137	* the encoding
138	* @param limit Maximal allowed value of index.
139	*
140	* @return True if the character was encoded or false if there is not
141	* enought space in the output buffer or the character is invalid
142	* Unicode code point.
143	*
144	*/
145	bool utf8_encode(const wchar_t ch, char str, index_t index, index_t limit)
146	{
147	if (*index > limit)
148	return false;
149
150	if ((ch >= 0) && (ch <= 127)) {
151	/* Plain ASCII (code points 0 .. 127) */
152	str[*index] = ch & 0x7f;
153	return true;
154	}
155
156	if ((ch >= 128) && (ch <= 2047)) {
157	/* Code points 128 .. 2047 */
158	if (*index + 1 > limit)
159	return false;
160
161	str[*index] = 0xc0 \| ((ch >> 6) & 0x1f);
162	(*index)++;
163	str[*index] = 0x80 \| (ch & 0x3f);
164	return true;
165	}
166
167	if ((ch >= 2048) && (ch <= 65535)) {
168	/* Code points 2048 .. 65535 */
169	if (*index + 2 > limit)
170	return false;
171
172	str[*index] = 0xe0 \| ((ch >> 12) & 0x0f);
173	(*index)++;
174	str[*index] = 0x80 \| ((ch >> 6) & 0x3f);
175	(*index)++;
176	str[*index] = 0x80 \| (ch & 0x3f);
177	return true;
178	}
179
180	if ((ch >= 65536) && (ch <= 1114111)) {
181	/* Code points 65536 .. 1114111 */
182	if (*index + 3 > limit)
183	return false;
184
185	str[*index] = 0xf0 \| ((ch >> 18) & 0x07);
186	(*index)++;
187	str[*index] = 0x80 \| ((ch >> 12) & 0x3f);
188	(*index)++;
189	str[*index] = 0x80 \| ((ch >> 6) & 0x3f);
190	(*index)++;
191	str[*index] = 0x80 \| (ch & 0x3f);
192	return true;
193	}
194
195	return false;
196	}
197
198	/** Get bytes used by UTF-8 characters.
199	*
200	* Get the number of bytes (count of plain characters) which
201	* are used by a given count of UTF-8 characters in a string.
202	* As UTF-8 encoding is multibyte, there is no constant
203	* correspondence between number of characters and used bytes.
204	*
205	* @param str UTF-8 string to consider.
206	* @param count Number of UTF-8 characters to count.
207	*
208	* @return Number of bytes used by the characters.
209	*
210	*/
211	size_t utf8_count_bytes(const char *str, count_t count)
212	{
213	size_t size = 0;
214	index_t index = 0;
215
216	while ((utf8_decode(str, &index, UTF8_NO_LIMIT) != 0) && (size < count)) {
217	size++;
218	index++;
219	}
220
221	return index;
222	}
223
224	/** Check whether character is plain ASCII.
225	*
226	* @return True if character is plain ASCII.
227	*
228	*/
229	bool ascii_check(const wchar_t ch)
230	{
231	if ((ch >= 0) && (ch <= 127))
232	return true;
233
234	return false;
235	}
236
237	/** Check whether character is Unicode.
238	*
239	* @return True if character is valid Unicode code point.
240	*
241	*/
242	bool unicode_check(const wchar_t ch)
243	{
244	if ((ch >= 0) && (ch <= 1114111))
245	return true;
246
247	return false;
248	}
249
250	/** Return number of plain characters in a string.
251	*
252	* @param str NULL-terminated string.
253	*
254	* @return Number of characters in str.
255	*
256	*/
257	size_t strlen(const char *str)
258	{
259	size_t size;
260	for (size = 0; str[size]; size++);
261
262	return size;
263	}
264
265	/** Return number of UTF-8 characters in a string.
266	*
267	* @param str NULL-terminated UTF-8 string.
268	*
269	* @return Number of UTF-8 characters in str.
270	*
271	*/
272	size_t strlen_utf8(const char *str)
273	{
274	size_t size = 0;
275	index_t index = 0;
276
277	while (utf8_decode(str, &index, UTF8_NO_LIMIT) != 0) {
278	size++;
279	index++;
280	}
281
282	return size;
283	}
284
285	/** Return number of UTF-32 characters in a string.
286	*
287	* @param str NULL-terminated UTF-32 string.
288	*
289	* @return Number of UTF-32 characters in str.
290	*
291	*/
292	size_t strlen_utf32(const wchar_t *str)
293	{
294	size_t size;
295	for (size = 0; str[size]; size++);
296
297	return size;
298	}
299
300	/** Compare two NULL terminated strings
301	*
302	* Do a char-by-char comparison of two NULL terminated strings.
303	* The strings are considered equal iff they consist of the same
304	* characters on the minimum of their lengths.
305	*
306	* @param src First string to compare.
307	* @param dst Second string to compare.
308	*
309	* @return 0 if the strings are equal, -1 if first is smaller, 1 if second smaller.
310	*
311	*/
312	int strcmp(const char src, const char dst)
313	{
314	for (; src && dst; src++, dst++) {
315	if (src < dst)
316	return -1;
317	if (src > dst)
318	return 1;
319	}
320	if (src == dst)
321	return 0;
322
323	if (!*src)
324	return -1;
325
326	return 1;
327	}
328
329
330	/** Compare two NULL terminated strings
331	*
332	* Do a char-by-char comparison of two NULL terminated strings.
333	* The strings are considered equal iff they consist of the same
334	* characters on the minimum of their lengths and specified maximal
335	* length.
336	*
337	* @param src First string to compare.
338	* @param dst Second string to compare.
339	* @param len Maximal length for comparison.
340	*
341	* @return 0 if the strings are equal, -1 if first is smaller, 1 if second smaller.
342	*
343	*/
344	int strncmp(const char src, const char dst, size_t len)
345	{
346	unsigned int i;
347
348	for (i = 0; (src) && (dst) && (i < len); src++, dst++, i++) {
349	if (src < dst)
350	return -1;
351
352	if (src > dst)
353	return 1;
354	}
355
356	if (i == len \|\| src == dst)
357	return 0;
358
359	if (!*src)
360	return -1;
361
362	return 1;
363	}
364
365
366
367	/** Copy NULL terminated string.
368	*
369	* Copy at most 'len' characters from string 'src' to 'dest'.
370	* If 'src' is shorter than 'len', '\0' is inserted behind the
371	* last copied character.
372	*
373	* @param src Source string.
374	* @param dest Destination buffer.
375	* @param len Size of destination buffer.
376	*
377	*/
378	void strncpy(char dest, const char src, size_t len)
379	{
380	unsigned int i;
381
382	for (i = 0; i < len; i++) {
383	if (!(dest[i] = src[i]))
384	return;
385	}
386
387	dest[i - 1] = '\0';
388	}
389
390	/** Find first occurence of character in string.
391	*
392	* @param s String to search.
393	* @param i Character to look for.
394	*
395	* @return Pointer to character in @a s or NULL if not found.
396	*/
397	extern char strchr(const char s, int i)
398	{
399	while (*s != '\0') {
400	if (*s == i)
401	return (char *) s;
402	++s;
403	}
404
405	return NULL;
406	}
407
408	/** @}
409	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: