Context Navigation

source: mainline/kernel/generic/src/lib/string.c@ 0dd1d444

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 0dd1d444 was 0dd1d444, checked in by Jiri Svoboda <jirik.svoboda@…>, 16 years ago
Slightly 'decompile' character decoder.
Property mode set to `100644`
File size: 9.3 KB

Rev	Line
[16da5f8e]	1	/*
	2	* Copyright (c) 2001-2004 Jakub Jermar
	3	* All rights reserved.
	4	*
	5	* Redistribution and use in source and binary forms, with or without
	6	* modification, are permitted provided that the following conditions
	7	* are met:
	8	*
	9	* - Redistributions of source code must retain the above copyright
	10	* notice, this list of conditions and the following disclaimer.
	11	* - Redistributions in binary form must reproduce the above copyright
	12	* notice, this list of conditions and the following disclaimer in the
	13	* documentation and/or other materials provided with the distribution.
	14	* - The name of the author may not be used to endorse or promote products
	15	* derived from this software without specific prior written permission.
	16	*
	17	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	18	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	19	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	20	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	21	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	22	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	23	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	24	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	25	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	26	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	27	*/
	28
[2f57690]	29	/** @addtogroup generic
[16da5f8e]	30	* @{
	31	*/
	32
	33	/**
	34	* @file
[2f57690]	35	* @brief Miscellaneous functions.
[16da5f8e]	36	*/
	37
	38	#include <string.h>
	39	#include <print.h>
	40	#include <cpu.h>
	41	#include <arch/asm.h>
	42	#include <arch.h>
	43	#include <console/kconsole.h>
	44
[74c8da2c]	45	char invalch = '?';
	46
[0dd1d444]	47	/** Byte mask consisting of bits 0 - (@n - 1) */
	48	#define LO_MASK_8(n) ((uint8_t)((1 << (n)) - 1))
	49
	50	/** Number of data bits in a UTF-8 continuation byte. */
	51	#define CONT_BITS 6
	52
[21a639b7]	53	/** Decode a single UTF-8 character from a NULL-terminated string.
	54	*
	55	* Decode a single UTF-8 character from a plain char NULL-terminated
	56	* string. Decoding starts at @index and this index is incremented
	57	* if the current UTF-8 string is encoded in more than a single byte.
	58	*
	59	* @param str Plain character NULL-terminated string.
	60	* @param index Index (counted in plain characters) where to start
	61	* the decoding.
[74c8da2c]	62	* @param limit Maximal allowed value of index.
[21a639b7]	63	*
	64	* @return Decoded character in UTF-32 or '?' if the encoding is wrong.
	65	*
	66	*/
[74c8da2c]	67	wchar_t utf8_decode(const char str, index_t index, index_t limit)
[21a639b7]	68	{
[0dd1d444]	69	uint8_t b0, b; /* Bytes read from str. */
	70	wchar_t ch;
	71
	72	int b0_bits; /* Data bits in first byte. */
	73	int cbytes; /* Number of continuation bytes. */
	74
[74c8da2c]	75	if (*index > limit)
	76	return invalch;
[0dd1d444]	77
	78	b0 = (uint8_t) str[*index];
	79
	80	/* Determine code length. */
	81
	82	if ((b0 & 0x80) == 0) {
	83	/* 0xxxxxxx (Plain ASCII) */
	84	b0_bits = 7;
	85	cbytes = 0;
	86	} else if ((b0 & 0xe0) == 0xc0) {
	87	/* 110xxxxx 10xxxxxx */
	88	b0_bits = 5;
	89	cbytes = 1;
	90	} else if ((b0 & 0xf0) == 0xe0) {
	91	/* 1110xxxx 10xxxxxx 10xxxxxx */
	92	b0_bits = 4;
	93	cbytes = 2;
	94	} else if ((b0 & 0xf8) == 0xf0) {
	95	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
	96	b0_bits = 3;
	97	cbytes = 3;
	98	} else {
	99	/* 10xxxxxx -- unexpected continuation byte. */
	100	return invalch;
[74c8da2c]	101	}
[0dd1d444]	102
	103	if (*index + cbytes > limit) {
	104	return invalch;
[74c8da2c]	105	}
[0dd1d444]	106
	107	ch = b0 & LO_MASK_8(b0_bits);
	108
	109	/* Decode continuation bytes. */
	110	while (cbytes > 0) {
	111	b = (uint8_t) str[*index + 1];
	112	++(*index);
	113
	114	/* Must be 10xxxxxx. */
	115	if ((b & 0xc0) != 0x80) {
[74c8da2c]	116	return invalch;
[0dd1d444]	117	}
	118
	119	/* Shift data bits to ch. */
	120	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
	121	--cbytes;
[74c8da2c]	122	}
[0dd1d444]	123
	124	return ch;
[74c8da2c]	125	}
	126
	127	/** Encode a single UTF-32 character as UTF-8
	128	*
	129	* Encode a single UTF-32 character as UTF-8 and store it into
	130	* the given buffer at @index. Encoding starts at @index and
	131	* this index is incremented if the UTF-8 character takes
	132	* more than a single byte.
	133	*
	134	* @param ch Input UTF-32 character.
	135	* @param str Output buffer.
	136	* @param index Index (counted in plain characters) where to start
	137	* the encoding
	138	* @param limit Maximal allowed value of index.
	139	*
	140	* @return True if the character was encoded or false if there is not
	141	* enought space in the output buffer or the character is invalid
	142	* Unicode code point.
	143	*
	144	*/
	145	bool utf8_encode(const wchar_t ch, char str, index_t index, index_t limit)
	146	{
	147	if (*index > limit)
	148	return false;
	149
	150	if ((ch >= 0) && (ch <= 127)) {
	151	/* Plain ASCII (code points 0 .. 127) */
	152	str[*index] = ch & 0x7f;
	153	return true;
	154	}
	155
	156	if ((ch >= 128) && (ch <= 2047)) {
	157	/* Code points 128 .. 2047 */
	158	if (*index + 1 > limit)
	159	return false;
	160
	161	str[*index] = 0xc0 \| ((ch >> 6) & 0x1f);
	162	(*index)++;
	163	str[*index] = 0x80 \| (ch & 0x3f);
	164	return true;
	165	}
	166
	167	if ((ch >= 2048) && (ch <= 65535)) {
	168	/* Code points 2048 .. 65535 */
	169	if (*index + 2 > limit)
	170	return false;
	171
	172	str[*index] = 0xe0 \| ((ch >> 12) & 0x0f);
	173	(*index)++;
	174	str[*index] = 0x80 \| ((ch >> 6) & 0x3f);
	175	(*index)++;
	176	str[*index] = 0x80 \| (ch & 0x3f);
	177	return true;
	178	}
	179
	180	if ((ch >= 65536) && (ch <= 1114111)) {
	181	/* Code points 65536 .. 1114111 */
	182	if (*index + 3 > limit)
	183	return false;
	184
	185	str[*index] = 0xf0 \| ((ch >> 18) & 0x07);
	186	(*index)++;
	187	str[*index] = 0x80 \| ((ch >> 12) & 0x3f);
	188	(*index)++;
	189	str[*index] = 0x80 \| ((ch >> 6) & 0x3f);
	190	(*index)++;
	191	str[*index] = 0x80 \| (ch & 0x3f);
	192	return true;
	193	}
	194
	195	return false;
	196	}
	197
	198	/** Get bytes used by UTF-8 characters.
	199	*
	200	* Get the number of bytes (count of plain characters) which
	201	* are used by a given count of UTF-8 characters in a string.
	202	* As UTF-8 encoding is multibyte, there is no constant
	203	* correspondence between number of characters and used bytes.
	204	*
	205	* @param str UTF-8 string to consider.
	206	* @param count Number of UTF-8 characters to count.
	207	*
	208	* @return Number of bytes used by the characters.
	209	*
	210	*/
	211	size_t utf8_count_bytes(const char *str, count_t count)
	212	{
	213	size_t size = 0;
	214	index_t index = 0;
	215
	216	while ((utf8_decode(str, &index, UTF8_NO_LIMIT) != 0) && (size < count)) {
	217	size++;
	218	index++;
[21a639b7]	219	}
	220
[74c8da2c]	221	return index;
	222	}
	223
	224	/** Check whether character is plain ASCII.
	225	*
	226	* @return True if character is plain ASCII.
	227	*
	228	*/
	229	bool ascii_check(const wchar_t ch)
	230	{
	231	if ((ch >= 0) && (ch <= 127))
	232	return true;
	233
	234	return false;
	235	}
	236
	237	/** Check whether character is Unicode.
	238	*
	239	* @return True if character is valid Unicode code point.
	240	*
	241	*/
	242	bool unicode_check(const wchar_t ch)
	243	{
	244	if ((ch >= 0) && (ch <= 1114111))
	245	return true;
	246
	247	return false;
[21a639b7]	248	}
	249
[74c8da2c]	250	/** Return number of plain characters in a string.
[16da5f8e]	251	*
[74c8da2c]	252	* @param str NULL-terminated string.
[16da5f8e]	253	*
	254	* @return Number of characters in str.
[2f57690]	255	*
[16da5f8e]	256	*/
	257	size_t strlen(const char *str)
	258	{
[74c8da2c]	259	size_t size;
	260	for (size = 0; str[size]; size++);
	261
	262	return size;
	263	}
	264
	265	/** Return number of UTF-8 characters in a string.
	266	*
	267	* @param str NULL-terminated UTF-8 string.
	268	*
	269	* @return Number of UTF-8 characters in str.
	270	*
	271	*/
	272	size_t strlen_utf8(const char *str)
	273	{
	274	size_t size = 0;
	275	index_t index = 0;
[16da5f8e]	276
[74c8da2c]	277	while (utf8_decode(str, &index, UTF8_NO_LIMIT) != 0) {
	278	size++;
	279	index++;
	280	}
	281
	282	return size;
	283	}
	284
	285	/** Return number of UTF-32 characters in a string.
	286	*
	287	* @param str NULL-terminated UTF-32 string.
	288	*
	289	* @return Number of UTF-32 characters in str.
	290	*
	291	*/
	292	size_t strlen_utf32(const wchar_t *str)
	293	{
	294	size_t size;
	295	for (size = 0; str[size]; size++);
[16da5f8e]	296
[74c8da2c]	297	return size;
[16da5f8e]	298	}
	299
	300	/** Compare two NULL terminated strings
	301	*
	302	* Do a char-by-char comparison of two NULL terminated strings.
	303	* The strings are considered equal iff they consist of the same
	304	* characters on the minimum of their lengths.
	305	*
	306	* @param src First string to compare.
	307	* @param dst Second string to compare.
	308	*
	309	* @return 0 if the strings are equal, -1 if first is smaller, 1 if second smaller.
	310	*
	311	*/
	312	int strcmp(const char src, const char dst)
	313	{
	314	for (; src && dst; src++, dst++) {
	315	if (src < dst)
	316	return -1;
	317	if (src > dst)
	318	return 1;
	319	}
	320	if (src == dst)
	321	return 0;
[2f57690]	322
[16da5f8e]	323	if (!*src)
	324	return -1;
[2f57690]	325
[16da5f8e]	326	return 1;
	327	}
	328
	329
	330	/** Compare two NULL terminated strings
	331	*
	332	* Do a char-by-char comparison of two NULL terminated strings.
	333	* The strings are considered equal iff they consist of the same
	334	* characters on the minimum of their lengths and specified maximal
	335	* length.
	336	*
	337	* @param src First string to compare.
	338	* @param dst Second string to compare.
	339	* @param len Maximal length for comparison.
	340	*
	341	* @return 0 if the strings are equal, -1 if first is smaller, 1 if second smaller.
	342	*
	343	*/
	344	int strncmp(const char src, const char dst, size_t len)
	345	{
	346	unsigned int i;
	347
	348	for (i = 0; (src) && (dst) && (i < len); src++, dst++, i++) {
	349	if (src < dst)
	350	return -1;
[2f57690]	351
[16da5f8e]	352	if (src > dst)
	353	return 1;
	354	}
[2f57690]	355
[16da5f8e]	356	if (i == len \|\| src == dst)
	357	return 0;
[2f57690]	358
[16da5f8e]	359	if (!*src)
	360	return -1;
[2f57690]	361
[16da5f8e]	362	return 1;
	363	}
	364
	365
	366
	367	/** Copy NULL terminated string.
	368	*
	369	* Copy at most 'len' characters from string 'src' to 'dest'.
	370	* If 'src' is shorter than 'len', '\0' is inserted behind the
	371	* last copied character.
	372	*
[2f57690]	373	* @param src Source string.
[16da5f8e]	374	* @param dest Destination buffer.
[2f57690]	375	* @param len Size of destination buffer.
	376	*
[16da5f8e]	377	*/
	378	void strncpy(char dest, const char src, size_t len)
	379	{
	380	unsigned int i;
[2f57690]	381
[16da5f8e]	382	for (i = 0; i < len; i++) {
	383	if (!(dest[i] = src[i]))
	384	return;
	385	}
[2f57690]	386
[16da5f8e]	387	dest[i - 1] = '\0';
	388	}
	389
[20f1597]	390	/** Find first occurence of character in string.
	391	*
[2f57690]	392	* @param s String to search.
	393	* @param i Character to look for.
[20f1597]	394	*
[2f57690]	395	* @return Pointer to character in @a s or NULL if not found.
[20f1597]	396	*/
	397	extern char strchr(const char s, int i)
	398	{
	399	while (*s != '\0') {
[2f57690]	400	if (*s == i)
	401	return (char *) s;
[20f1597]	402	++s;
	403	}
[2f57690]	404
[20f1597]	405	return NULL;
	406	}
	407
[16da5f8e]	408	/** @}
	409	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: