Changeset 171f9a1 in mainline for uspace/lib/libc/generic/string.c
- Timestamp:
- 2009-04-03T20:39:33Z (15 years ago)
- Branches:
- lfn, master, serial, ticket/834-toolchain-update, topic/msim-upgrade
- Children:
- cb01e1e
- Parents:
- 7a2c479
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
uspace/lib/libc/generic/string.c
r7a2c479 r171f9a1 39 39 #include <ctype.h> 40 40 #include <malloc.h> 41 #include <errno.h> 42 #include <string.h> 43 44 /** Byte mask consisting of lowest @n bits (out of 8) */ 45 #define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1)) 46 47 /** Byte mask consisting of lowest @n bits (out of 32) */ 48 #define LO_MASK_32(n) ((uint32_t) ((1 << (n)) - 1)) 49 50 /** Byte mask consisting of highest @n bits (out of 8) */ 51 #define HI_MASK_8(n) (~LO_MASK_8(8 - (n))) 52 53 /** Number of data bits in a UTF-8 continuation byte */ 54 #define CONT_BITS 6 55 56 /** Decode a single character from a string. 57 * 58 * Decode a single character from a string of size @a size. Decoding starts 59 * at @a offset and this offset is moved to the beginning of the next 60 * character. In case of decoding error, offset generally advances at least 61 * by one. However, offset is never moved beyond size. 62 * 63 * @param str String (not necessarily NULL-terminated). 64 * @param offset Byte offset in string where to start decoding. 65 * @param size Size of the string (in bytes). 66 * 67 * @return Value of decoded character, U_SPECIAL on decoding error or 68 * NULL if attempt to decode beyond @a size. 69 * 70 */ 71 wchar_t str_decode(const char *str, size_t *offset, size_t size) 72 { 73 if (*offset + 1 > size) 74 return 0; 75 76 /* First byte read from string */ 77 uint8_t b0 = (uint8_t) str[(*offset)++]; 78 79 /* Determine code length */ 80 81 unsigned int b0_bits; /* Data bits in first byte */ 82 unsigned int cbytes; /* Number of continuation bytes */ 83 84 if ((b0 & 0x80) == 0) { 85 /* 0xxxxxxx (Plain ASCII) */ 86 b0_bits = 7; 87 cbytes = 0; 88 } else if ((b0 & 0xe0) == 0xc0) { 89 /* 110xxxxx 10xxxxxx */ 90 b0_bits = 5; 91 cbytes = 1; 92 } else if ((b0 & 0xf0) == 0xe0) { 93 /* 1110xxxx 10xxxxxx 10xxxxxx */ 94 b0_bits = 4; 95 cbytes = 2; 96 } else if ((b0 & 0xf8) == 0xf0) { 97 /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ 98 b0_bits = 3; 99 cbytes = 3; 100 } else { 101 /* 10xxxxxx -- unexpected continuation byte */ 102 return U_SPECIAL; 103 } 104 105 if (*offset + cbytes > size) 106 return U_SPECIAL; 107 108 wchar_t ch = b0 & LO_MASK_8(b0_bits); 109 110 /* Decode continuation bytes */ 111 while (cbytes > 0) { 112 uint8_t b = (uint8_t) str[(*offset)++]; 113 114 /* Must be 10xxxxxx */ 115 if ((b & 0xc0) != 0x80) 116 return U_SPECIAL; 117 118 /* Shift data bits to ch */ 119 ch = (ch << CONT_BITS) | (wchar_t) (b & LO_MASK_8(CONT_BITS)); 120 cbytes--; 121 } 122 123 return ch; 124 } 125 126 /** Encode a single character to string representation. 127 * 128 * Encode a single character to string representation (i.e. UTF-8) and store 129 * it into a buffer at @a offset. Encoding starts at @a offset and this offset 130 * is moved to the position where the next character can be written to. 131 * 132 * @param ch Input character. 133 * @param str Output buffer. 134 * @param offset Byte offset where to start writing. 135 * @param size Size of the output buffer (in bytes). 136 * 137 * @return EOK if the character was encoded successfully, EOVERFLOW if there 138 * was not enough space in the output buffer or EINVAL if the character 139 * code was invalid. 140 */ 141 int chr_encode(const wchar_t ch, char *str, size_t *offset, size_t size) 142 { 143 if (*offset >= size) 144 return EOVERFLOW; 145 146 if (!chr_check(ch)) 147 return EINVAL; 148 149 /* Unsigned version of ch (bit operations should only be done 150 on unsigned types). */ 151 uint32_t cc = (uint32_t) ch; 152 153 /* Determine how many continuation bytes are needed */ 154 155 unsigned int b0_bits; /* Data bits in first byte */ 156 unsigned int cbytes; /* Number of continuation bytes */ 157 158 if ((cc & ~LO_MASK_32(7)) == 0) { 159 b0_bits = 7; 160 cbytes = 0; 161 } else if ((cc & ~LO_MASK_32(11)) == 0) { 162 b0_bits = 5; 163 cbytes = 1; 164 } else if ((cc & ~LO_MASK_32(16)) == 0) { 165 b0_bits = 4; 166 cbytes = 2; 167 } else if ((cc & ~LO_MASK_32(21)) == 0) { 168 b0_bits = 3; 169 cbytes = 3; 170 } else { 171 /* Codes longer than 21 bits are not supported */ 172 return EINVAL; 173 } 174 175 /* Check for available space in buffer */ 176 if (*offset + cbytes >= size) 177 return EOVERFLOW; 178 179 /* Encode continuation bytes */ 180 unsigned int i; 181 for (i = cbytes; i > 0; i--) { 182 str[*offset + i] = 0x80 | (cc & LO_MASK_32(CONT_BITS)); 183 cc = cc >> CONT_BITS; 184 } 185 186 /* Encode first byte */ 187 str[*offset] = (cc & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1); 188 189 /* Advance offset */ 190 *offset += cbytes + 1; 191 192 return EOK; 193 } 194 195 /** Check whether character is valid 196 * 197 * @return True if character is a valid Unicode code point. 198 * 199 */ 200 bool chr_check(const wchar_t ch) 201 { 202 if ((ch >= 0) && (ch <= 1114111)) 203 return true; 204 205 return false; 206 } 41 207 42 208 /** Count the number of characters in the string, not including terminating 0.
Note:
See TracChangeset
for help on using the changeset viewer.