Context Navigation

source: mainline/kernel/generic/src/lib/string.c@ 7ce3cb2

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 7ce3cb2 was 82bb9c1, checked in by Jiri Svoboda <jirik.svoboda@…>, 16 years ago
Consider character display width somewhat. Explain naming scheme.
Property mode set to `100644`
File size: 11.8 KB

Rev	Line
[16da5f8e]	1	/*
	2	* Copyright (c) 2001-2004 Jakub Jermar
	3	* All rights reserved.
	4	*
	5	* Redistribution and use in source and binary forms, with or without
	6	* modification, are permitted provided that the following conditions
	7	* are met:
	8	*
	9	* - Redistributions of source code must retain the above copyright
	10	* notice, this list of conditions and the following disclaimer.
	11	* - Redistributions in binary form must reproduce the above copyright
	12	* notice, this list of conditions and the following disclaimer in the
	13	* documentation and/or other materials provided with the distribution.
	14	* - The name of the author may not be used to endorse or promote products
	15	* derived from this software without specific prior written permission.
	16	*
	17	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	18	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	19	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	20	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	21	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	22	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	23	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	24	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	25	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	26	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	27	*/
	28
[2f57690]	29	/** @addtogroup generic
[16da5f8e]	30	* @{
	31	*/
	32
	33	/**
	34	* @file
[82bb9c1]	35	* @brief String functions.
	36	*
	37	* Strings and characters use the Universal Character Set (UCS). The standard
	38	* strings, called just strings are encoded in UTF-8. Wide strings (encoded
	39	* in UTF-32) are supported to a limited degree. A single character is
	40	* represented as wchar_t.
	41	*
	42	* Strings have the following metrics:
	43	*
	44	* Metric Abbrev. Meaning
	45	* ------ ------ -------
	46	* size n Number of bytes the string is encoded into, excluding
	47	* the null terminator.
	48	* length l The number of characters in the string, excluding
	49	* the null terminator.
	50	* width w The number of character cells the string takes up on a
	51	* monospace display.
	52	*
	53	* Naming scheme:
	54	*
	55	* chr_xxx operate on characters
	56	* str_xxx operate on strings
	57	* wstr_xxx operate on wide strings
	58	*
	59	* [w]str_[n\|l\|w]xxx operate on a prefix limited by size, length
	60	* or width.
[16da5f8e]	61	*/
	62
	63	#include <string.h>
	64	#include <print.h>
	65	#include <cpu.h>
	66	#include <arch/asm.h>
	67	#include <arch.h>
[d09f84e6]	68	#include <errno.h>
[16da5f8e]	69	#include <console/kconsole.h>
	70
[74c8da2c]	71	char invalch = '?';
	72
[32704cb]	73	/** Byte mask consisting of lowest @n bits (out of eight). */
[0dd1d444]	74	#define LO_MASK_8(n) ((uint8_t)((1 << (n)) - 1))
	75
[32704cb]	76	/** Byte mask consisting of lowest @n bits (out of 32). */
	77	#define LO_MASK_32(n) ((uint32_t)((1 << (n)) - 1))
	78
	79	/** Byte mask consisting of highest @n bits (out of eight). */
	80	#define HI_MASK_8(n) (~LO_MASK_8(8 - (n)))
	81
[0dd1d444]	82	/** Number of data bits in a UTF-8 continuation byte. */
	83	#define CONT_BITS 6
	84
[e1813cf]	85	/** Decode a single character from a substring.
[21a639b7]	86	*
[e1813cf]	87	* Decode a single character from a substring of size @a sz. Decoding starts
	88	* at @a offset and this offset is moved to the beginning of the next
	89	* character. In case of decoding error, offset generally advances at least
	90	* by one. However, offset is never moved beyond (str + sz).
[21a639b7]	91	*
[e1813cf]	92	* @param str String (not necessarily NULL-terminated).
[21a639b7]	93	* @param index Index (counted in plain characters) where to start
	94	* the decoding.
[e1813cf]	95	* @param limit Size of the substring.
[21a639b7]	96	*
[e1813cf]	97	* @return Value of decoded character or '?' on decoding error.
[21a639b7]	98	*/
[e1813cf]	99	wchar_t chr_decode(const char str, size_t offset, size_t sz)
[21a639b7]	100	{
[0dd1d444]	101	uint8_t b0, b; /* Bytes read from str. */
	102	wchar_t ch;
	103
	104	int b0_bits; /* Data bits in first byte. */
	105	int cbytes; /* Number of continuation bytes. */
	106
[e1813cf]	107	if (*offset + 1 > sz)
[74c8da2c]	108	return invalch;
[0dd1d444]	109
[e1813cf]	110	b0 = (uint8_t) str[(*offset)++];
[0dd1d444]	111
	112	/* Determine code length. */
	113
	114	if ((b0 & 0x80) == 0) {
	115	/* 0xxxxxxx (Plain ASCII) */
	116	b0_bits = 7;
	117	cbytes = 0;
	118	} else if ((b0 & 0xe0) == 0xc0) {
	119	/* 110xxxxx 10xxxxxx */
	120	b0_bits = 5;
	121	cbytes = 1;
	122	} else if ((b0 & 0xf0) == 0xe0) {
	123	/* 1110xxxx 10xxxxxx 10xxxxxx */
	124	b0_bits = 4;
	125	cbytes = 2;
	126	} else if ((b0 & 0xf8) == 0xf0) {
	127	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
	128	b0_bits = 3;
	129	cbytes = 3;
	130	} else {
	131	/* 10xxxxxx -- unexpected continuation byte. */
	132	return invalch;
[74c8da2c]	133	}
[0dd1d444]	134
[e1813cf]	135	if (*offset + cbytes > sz) {
[0dd1d444]	136	return invalch;
[74c8da2c]	137	}
[0dd1d444]	138
	139	ch = b0 & LO_MASK_8(b0_bits);
	140
	141	/* Decode continuation bytes. */
	142	while (cbytes > 0) {
[e1813cf]	143	b = (uint8_t) str[(*offset)++];
[0dd1d444]	144
	145	/* Must be 10xxxxxx. */
	146	if ((b & 0xc0) != 0x80) {
[74c8da2c]	147	return invalch;
[0dd1d444]	148	}
	149
	150	/* Shift data bits to ch. */
	151	ch = (ch << CONT_BITS) \| (wchar_t) (b & LO_MASK_8(CONT_BITS));
	152	--cbytes;
[74c8da2c]	153	}
[0dd1d444]	154
	155	return ch;
[74c8da2c]	156	}
	157
[e1813cf]	158	/** Encode a single character to string representation.
[74c8da2c]	159	*
[e1813cf]	160	* Encode a single character to string representation (i.e. UTF-8) and store
	161	* it into a buffer at @a offset. Encoding starts at @a offset and this offset
	162	* is moved to the position where the next character can be written to.
[74c8da2c]	163	*
[e1813cf]	164	* @param ch Input character.
	165	* @param str Output buffer.
	166	* @param offset Offset (in bytes) where to start writing.
	167	* @param sz Size of the output buffer.
[74c8da2c]	168	*
[d09f84e6]	169	* @return EOK if the character was encoded successfully, EOVERFLOW if there
	170	* was not enough space in the output buffer or EINVAL if the character
	171	* code was invalid.
[74c8da2c]	172	*/
[82bb9c1]	173	int chr_encode(wchar_t ch, char str, size_t offset, size_t sz)
[74c8da2c]	174	{
[32704cb]	175	uint32_t cc; /* Unsigned version of ch. */
	176
	177	int cbytes; /* Number of continuation bytes. */
	178	int b0_bits; /* Number of data bits in first byte. */
	179	int i;
	180
[e1813cf]	181	if (*offset >= sz)
[d09f84e6]	182	return EOVERFLOW;
[32704cb]	183
	184	if (ch < 0)
[d09f84e6]	185	return EINVAL;
[32704cb]	186
	187	/* Bit operations should only be done on unsigned numbers. */
	188	cc = (uint32_t) ch;
	189
	190	/* Determine how many continuation bytes are needed. */
	191	if ((cc & ~LO_MASK_32(7)) == 0) {
	192	b0_bits = 7;
	193	cbytes = 0;
	194	} else if ((cc & ~LO_MASK_32(11)) == 0) {
	195	b0_bits = 5;
	196	cbytes = 1;
	197	} else if ((cc & ~LO_MASK_32(16)) == 0) {
	198	b0_bits = 4;
	199	cbytes = 2;
	200	} else if ((cc & ~LO_MASK_32(21)) == 0) {
	201	b0_bits = 3;
	202	cbytes = 3;
	203	} else {
	204	/* Codes longer than 21 bits are not supported. */
[d09f84e6]	205	return EINVAL;
[74c8da2c]	206	}
[32704cb]	207
	208	/* Check for available space in buffer. */
[e1813cf]	209	if (*offset + cbytes >= sz)
[d09f84e6]	210	return EOVERFLOW;
[32704cb]	211
	212	/* Encode continuation bytes. */
	213	for (i = cbytes; i > 0; --i) {
[e1813cf]	214	str[*offset + i] = 0x80 \| (cc & LO_MASK_32(CONT_BITS));
[32704cb]	215	cc = cc >> CONT_BITS;
[74c8da2c]	216	}
[32704cb]	217
	218	/* Encode first byte. */
[e1813cf]	219	str[*offset] = (cc & LO_MASK_32(b0_bits)) \| HI_MASK_8(8 - b0_bits - 1);
[32704cb]	220
[e1813cf]	221	/* Advance offset. */
	222	*offset += (1 + cbytes);
[74c8da2c]	223
[d09f84e6]	224	return EOK;
[74c8da2c]	225	}
	226
[82bb9c1]	227	/** Get display width of character.
	228	*
	229	* @param ch The character.
	230	* @return Character width in display cells.
	231	*/
	232	count_t chr_width(wchar_t ch)
	233	{
	234	return 1;
	235	}
	236
[f25b2819]	237	/** Get size of string, with length limit.
[74c8da2c]	238	*
[f25b2819]	239	* Get the number of bytes which are used by up to @a max_len first
	240	* characters in the string @a str. If @a max_len is greater than
	241	* the length of @a str, the entire string is measured.
[74c8da2c]	242	*
[82bb9c1]	243	* @param str String to consider.
	244	* @param count Maximum number of characters to measure.
[74c8da2c]	245	*
[82bb9c1]	246	* @return Number of bytes used by the characters.
[74c8da2c]	247	*/
[f25b2819]	248	size_t str_lsize(const char *str, count_t max_len)
[74c8da2c]	249	{
[f25b2819]	250	count_t len = 0;
	251	size_t cur = 0;
	252	size_t prev;
[b54d2f1]	253	wchar_t ch;
[f25b2819]	254
[b54d2f1]	255	while (true) {
[f25b2819]	256	prev = cur;
	257	if (len >= max_len)
[b54d2f1]	258	break;
[f25b2819]	259	ch = chr_decode(str, &cur, UTF8_NO_LIMIT);
[b54d2f1]	260	if (ch == '\0') break;
	261
[f25b2819]	262	len++;
[21a639b7]	263	}
[f25b2819]	264
	265	return prev;
[74c8da2c]	266	}
	267
[82bb9c1]	268	/** Get size of string, with width limit.
	269	*
	270	* Get the number of bytes which are used by the longest prefix of @a str
	271	* that can fit into @a max_width display cells.
	272	*
	273	* @param str String to consider.
	274	* @param count Maximum number of display cells.
	275	*
	276	* @return Number of bytes used by the characters that fit.
	277	*/
	278	size_t str_wsize(const char *str, count_t max_width)
	279	{
	280	count_t width = 0;
	281	size_t cur = 0;
	282	size_t prev;
	283	wchar_t ch;
	284
	285	while (true) {
	286	prev = cur;
	287	if (width >= max_width)
	288	break;
	289	ch = chr_decode(str, &cur, UTF8_NO_LIMIT);
	290	if (ch == '\0') break;
	291
	292	width += chr_width(ch);
	293	}
	294
	295	return prev;
	296	}
	297
	298
	299	/** Get length of wide string, with width limit.
	300	*
	301	* Get the number of characters in a wide string that can fit into @a max_width
	302	* display cells.
	303	*
	304	* @param wstr Wide string to consider.
	305	* @param count Maximum number of display cells.
	306	*
	307	* @return Number of bytes used by the characters that fit.
	308	*/
	309	count_t wstr_wlength(const wchar_t *wstr, count_t max_width)
	310	{
	311	count_t width = 0;
	312	index_t cur = 0;
	313
	314	while (true) {
	315	if (width >= max_width)
	316	break;
	317	if (wstr[cur] == '\0') break;
	318
	319	width += chr_width(wstr[cur]);
	320	++cur;
	321	}
	322
	323	return (count_t) cur;
	324	}
	325
[74c8da2c]	326	/** Check whether character is plain ASCII.
	327	*
	328	* @return True if character is plain ASCII.
	329	*
	330	*/
	331	bool ascii_check(const wchar_t ch)
	332	{
	333	if ((ch >= 0) && (ch <= 127))
	334	return true;
	335
	336	return false;
	337	}
	338
	339	/** Check whether character is Unicode.
	340	*
	341	* @return True if character is valid Unicode code point.
	342	*/
	343	bool unicode_check(const wchar_t ch)
	344	{
	345	if ((ch >= 0) && (ch <= 1114111))
	346	return true;
	347
	348	return false;
[21a639b7]	349	}
	350
[06b785f]	351	/** Return number of bytes the string occupies.
[2f57690]	352	*
[06b785f]	353	* @param str A string.
	354	* @return Number of bytes in @a str excluding the null terminator.
[16da5f8e]	355	*/
[06b785f]	356	size_t str_size(const char *str)
[16da5f8e]	357	{
[74c8da2c]	358	size_t size;
[06b785f]	359
	360	size = 0;
	361	while (*str++ != '\0')
	362	++size;
	363
[74c8da2c]	364	return size;
	365	}
	366
[f25b2819]	367	/** Return number of characters in a string.
[74c8da2c]	368	*
[f25b2819]	369	* @param str NULL-terminated string.
	370	* @return Number of characters in string.
[74c8da2c]	371	*/
[f25b2819]	372	count_t str_length(const char *str)
[74c8da2c]	373	{
[f25b2819]	374	count_t len = 0;
	375	size_t offset = 0;
	376
	377	while (chr_decode(str, &offset, UTF8_NO_LIMIT) != 0) {
	378	len++;
[74c8da2c]	379	}
[f25b2819]	380
	381	return len;
[74c8da2c]	382	}
	383
[f25b2819]	384	/** Return number of characters in a wide string.
[74c8da2c]	385	*
[82bb9c1]	386	* @param str NULL-terminated wide string.
	387	* @return Number of characters in @a str.
[74c8da2c]	388	*/
[f25b2819]	389	count_t wstr_length(const wchar_t *wstr)
[74c8da2c]	390	{
[f25b2819]	391	count_t len;
	392
	393	len = 0;
	394	while (*wstr++ != '\0')
	395	++len;
	396
	397	return len;
[16da5f8e]	398	}
	399
	400	/** Compare two NULL terminated strings
	401	*
	402	* Do a char-by-char comparison of two NULL terminated strings.
	403	* The strings are considered equal iff they consist of the same
	404	* characters on the minimum of their lengths.
	405	*
	406	* @param src First string to compare.
	407	* @param dst Second string to compare.
	408	*
	409	* @return 0 if the strings are equal, -1 if first is smaller, 1 if second smaller.
	410	*
	411	*/
	412	int strcmp(const char src, const char dst)
	413	{
	414	for (; src && dst; src++, dst++) {
	415	if (src < dst)
	416	return -1;
	417	if (src > dst)
	418	return 1;
	419	}
	420	if (src == dst)
	421	return 0;
[2f57690]	422
[16da5f8e]	423	if (!*src)
	424	return -1;
[2f57690]	425
[16da5f8e]	426	return 1;
	427	}
	428
	429
	430	/** Compare two NULL terminated strings
	431	*
	432	* Do a char-by-char comparison of two NULL terminated strings.
	433	* The strings are considered equal iff they consist of the same
	434	* characters on the minimum of their lengths and specified maximal
	435	* length.
	436	*
	437	* @param src First string to compare.
	438	* @param dst Second string to compare.
	439	* @param len Maximal length for comparison.
	440	*
	441	* @return 0 if the strings are equal, -1 if first is smaller, 1 if second smaller.
	442	*/
	443	int strncmp(const char src, const char dst, size_t len)
	444	{
	445	unsigned int i;
	446
	447	for (i = 0; (src) && (dst) && (i < len); src++, dst++, i++) {
	448	if (src < dst)
	449	return -1;
[2f57690]	450
[16da5f8e]	451	if (src > dst)
	452	return 1;
	453	}
[2f57690]	454
[16da5f8e]	455	if (i == len \|\| src == dst)
	456	return 0;
[2f57690]	457
[16da5f8e]	458	if (!*src)
	459	return -1;
[2f57690]	460
[16da5f8e]	461	return 1;
	462	}
	463
	464
	465
	466	/** Copy NULL terminated string.
	467	*
	468	* Copy at most 'len' characters from string 'src' to 'dest'.
	469	* If 'src' is shorter than 'len', '\0' is inserted behind the
	470	* last copied character.
	471	*
[2f57690]	472	* @param src Source string.
[16da5f8e]	473	* @param dest Destination buffer.
[2f57690]	474	* @param len Size of destination buffer.
[16da5f8e]	475	*/
	476	void strncpy(char dest, const char src, size_t len)
	477	{
	478	unsigned int i;
[2f57690]	479
[16da5f8e]	480	for (i = 0; i < len; i++) {
	481	if (!(dest[i] = src[i]))
	482	return;
	483	}
[2f57690]	484
[16da5f8e]	485	dest[i - 1] = '\0';
	486	}
	487
[20f1597]	488	/** Find first occurence of character in string.
	489	*
[2f57690]	490	* @param s String to search.
	491	* @param i Character to look for.
[20f1597]	492	*
[2f57690]	493	* @return Pointer to character in @a s or NULL if not found.
[20f1597]	494	*/
	495	extern char strchr(const char s, int i)
	496	{
	497	while (*s != '\0') {
[2f57690]	498	if (*s == i)
	499	return (char *) s;
[20f1597]	500	++s;
	501	}
[2f57690]	502
[20f1597]	503	return NULL;
	504	}
	505
[16da5f8e]	506	/** @}
	507	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: