source: mainline/kernel/generic/src/lib/string.c@ ac7c8d12

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since ac7c8d12 was 21a639b7, checked in by Martin Decky <martin@…>, 16 years ago

UTF-8 decoding routine

  • Property mode set to 100644
File size: 6.1 KB
Line 
1/*
2 * Copyright (c) 2001-2004 Jakub Jermar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup generic
30 * @{
31 */
32
33/**
34 * @file
35 * @brief Miscellaneous functions.
36 */
37
38#include <string.h>
39#include <print.h>
40#include <cpu.h>
41#include <arch/asm.h>
42#include <arch.h>
43#include <console/kconsole.h>
44
45/** Decode a single UTF-8 character from a NULL-terminated string.
46 *
47 * Decode a single UTF-8 character from a plain char NULL-terminated
48 * string. Decoding starts at @index and this index is incremented
49 * if the current UTF-8 string is encoded in more than a single byte.
50 *
51 * @param str Plain character NULL-terminated string.
52 * @param index Index (counted in plain characters) where to start
53 * the decoding.
54 *
55 * @return Decoded character in UTF-32 or '?' if the encoding is wrong.
56 *
57 */
58wchar_t utf8_decode(const char *str, index_t *index)
59{
60 uint8_t c1; /* First plain character from str */
61 uint8_t c2; /* Second plain character from str */
62 uint8_t c3; /* Third plain character from str */
63 uint8_t c4; /* Fourth plain character from str */
64
65 c1 = (uint8_t) str[*index];
66
67 if ((c1 & 0x80) == 0) {
68 /* Plain ASCII (code points 0 .. 127) */
69 return (wchar_t) c1;
70 } else if ((c1 & 0xe0) == 0xc0) {
71 /* Code points 128 .. 2047 */
72 c2 = (uint8_t) str[*index + 1];
73 if ((c2 & 0xc0) == 0x80) {
74 (*index)++;
75 return ((wchar_t) ((c1 & 0x1f) << 6) | (c2 & 0x3f));
76 } else
77 return ((wchar_t) '?');
78 } else if ((c1 & 0xf0) == 0xe0) {
79 /* Code points 2048 .. 65535 */
80 c2 = (uint8_t) str[*index + 1];
81 if ((c2 & 0xc0) == 0x80) {
82 (*index)++;
83 c3 = (uint8_t) str[*index + 1];
84 if ((c3 & 0xc0) == 0x80) {
85 (*index)++;
86 return ((wchar_t) ((c1 & 0x0f) << 12) | ((c2 & 0x3f) << 6) | (c3 & 0x3f));
87 } else
88 return ((wchar_t) '?');
89 } else
90 return ((wchar_t) '?');
91 } else if ((c1 & 0xf8) == 0xf0) {
92 /* Code points 65536 .. 1114111 */
93 c2 = (uint8_t) str[*index + 1];
94 if ((c2 & 0xc0) == 0x80) {
95 (*index)++;
96 c3 = (uint8_t) str[*index + 1];
97 if ((c3 & 0xc0) == 0x80) {
98 (*index)++;
99 c4 = (uint8_t) str[*index + 1];
100 if ((c4 & 0xc0) == 0x80) {
101 (*index)++;
102 return ((wchar_t) ((c1 & 0x07) << 18) | ((c2 & 0x3f) << 12) | ((c3 & 0x3f) << 6) | (c4 & 0x3f));
103 } else
104 return ((wchar_t) '?');
105 } else
106 return ((wchar_t) '?');
107 } else
108 return ((wchar_t) '?');
109 }
110
111 return ((wchar_t) '?');
112}
113
114/** Return number of characters in a string.
115 *
116 * @param str NULL terminated string.
117 *
118 * @return Number of characters in str.
119 *
120 */
121size_t strlen(const char *str)
122{
123 int i;
124
125 for (i = 0; str[i]; i++);
126
127 return i;
128}
129
130/** Compare two NULL terminated strings
131 *
132 * Do a char-by-char comparison of two NULL terminated strings.
133 * The strings are considered equal iff they consist of the same
134 * characters on the minimum of their lengths.
135 *
136 * @param src First string to compare.
137 * @param dst Second string to compare.
138 *
139 * @return 0 if the strings are equal, -1 if first is smaller, 1 if second smaller.
140 *
141 */
142int strcmp(const char *src, const char *dst)
143{
144 for (; *src && *dst; src++, dst++) {
145 if (*src < *dst)
146 return -1;
147 if (*src > *dst)
148 return 1;
149 }
150 if (*src == *dst)
151 return 0;
152
153 if (!*src)
154 return -1;
155
156 return 1;
157}
158
159
160/** Compare two NULL terminated strings
161 *
162 * Do a char-by-char comparison of two NULL terminated strings.
163 * The strings are considered equal iff they consist of the same
164 * characters on the minimum of their lengths and specified maximal
165 * length.
166 *
167 * @param src First string to compare.
168 * @param dst Second string to compare.
169 * @param len Maximal length for comparison.
170 *
171 * @return 0 if the strings are equal, -1 if first is smaller, 1 if second smaller.
172 *
173 */
174int strncmp(const char *src, const char *dst, size_t len)
175{
176 unsigned int i;
177
178 for (i = 0; (*src) && (*dst) && (i < len); src++, dst++, i++) {
179 if (*src < *dst)
180 return -1;
181
182 if (*src > *dst)
183 return 1;
184 }
185
186 if (i == len || *src == *dst)
187 return 0;
188
189 if (!*src)
190 return -1;
191
192 return 1;
193}
194
195
196
197/** Copy NULL terminated string.
198 *
199 * Copy at most 'len' characters from string 'src' to 'dest'.
200 * If 'src' is shorter than 'len', '\0' is inserted behind the
201 * last copied character.
202 *
203 * @param src Source string.
204 * @param dest Destination buffer.
205 * @param len Size of destination buffer.
206 *
207 */
208void strncpy(char *dest, const char *src, size_t len)
209{
210 unsigned int i;
211
212 for (i = 0; i < len; i++) {
213 if (!(dest[i] = src[i]))
214 return;
215 }
216
217 dest[i - 1] = '\0';
218}
219
220/** Find first occurence of character in string.
221 *
222 * @param s String to search.
223 * @param i Character to look for.
224 *
225 * @return Pointer to character in @a s or NULL if not found.
226 */
227extern char *strchr(const char *s, int i)
228{
229 while (*s != '\0') {
230 if (*s == i)
231 return (char *) s;
232 ++s;
233 }
234
235 return NULL;
236}
237
238/** @}
239 */
Note: See TracBrowser for help on using the repository browser.