Changes in uspace/lib/c/generic/str.c [28a5ebd:1c9bf292] in mainline
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
uspace/lib/c/generic/str.c
r28a5ebd r1c9bf292 42 42 * strings, called just strings are encoded in UTF-8. Wide strings (encoded 43 43 * in UTF-32) are supported to a limited degree. A single character is 44 * represented as char32_t.@n44 * represented as wchar_t.@n 45 45 * 46 46 * Overview of the terminology:@n … … 50 50 * byte 8 bits stored in uint8_t (unsigned 8 bit integer) 51 51 * 52 * character UTF-32 encoded Unicode character, stored in char32_t53 * ( unsigned 32 bit integer), code points 0 .. 111411152 * character UTF-32 encoded Unicode character, stored in wchar_t 53 * (signed 32 bit integer), code points 0 .. 1114111 54 54 * are valid 55 55 * … … 61 61 * 62 62 * wide string UTF-32 encoded NULL-terminated Unicode string, 63 * char32_t *63 * wchar_t * 64 64 * 65 65 * [wide] string size number of BYTES in a [wide] string (excluding … … 100 100 * A specific character inside a [wide] string can be referred to by:@n 101 101 * 102 * pointer (char *, char32_t *)102 * pointer (char *, wchar_t *) 103 103 * byte offset (size_t) 104 104 * character index (size_t) … … 119 119 #include <mem.h> 120 120 121 /** Check the condition if wchar_t is signed */ 122 #ifdef __WCHAR_UNSIGNED__ 123 #define WCHAR_SIGNED_CHECK(cond) (true) 124 #else 125 #define WCHAR_SIGNED_CHECK(cond) (cond) 126 #endif 127 121 128 /** Byte mask consisting of lowest @n bits (out of 8) */ 122 129 #define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1)) … … 146 153 * 147 154 */ 148 char32_t str_decode(const char *str, size_t *offset, size_t size)155 wchar_t str_decode(const char *str, size_t *offset, size_t size) 149 156 { 150 157 if (*offset + 1 > size) … … 183 190 return U_SPECIAL; 184 191 185 char32_t ch = b0 & LO_MASK_8(b0_bits);192 wchar_t ch = b0 & LO_MASK_8(b0_bits); 186 193 187 194 /* Decode continuation bytes */ … … 194 201 195 202 /* Shift data bits to ch */ 196 ch = (ch << CONT_BITS) | ( char32_t) (b & LO_MASK_8(CONT_BITS));203 ch = (ch << CONT_BITS) | (wchar_t) (b & LO_MASK_8(CONT_BITS)); 197 204 cbytes--; 198 205 } … … 216 223 * 217 224 */ 218 char32_t str_decode_reverse(const char *str, size_t *offset, size_t size)225 wchar_t str_decode_reverse(const char *str, size_t *offset, size_t size) 219 226 { 220 227 if (*offset == 0) … … 259 266 * code was invalid. 260 267 */ 261 errno_t chr_encode(const char32_t ch, char *str, size_t *offset, size_t size)268 errno_t chr_encode(const wchar_t ch, char *str, size_t *offset, size_t size) 262 269 { 263 270 if (*offset >= size) … … 345 352 * 346 353 */ 347 size_t wstr_size(const char32_t *str)348 { 349 return (wstr_length(str) * sizeof( char32_t));354 size_t wstr_size(const wchar_t *str) 355 { 356 return (wstr_length(str) * sizeof(wchar_t)); 350 357 } 351 358 … … 410 417 * 411 418 */ 412 size_t wstr_nsize(const char32_t *str, size_t max_size)413 { 414 return (wstr_nlength(str, max_size) * sizeof( char32_t));419 size_t wstr_nsize(const wchar_t *str, size_t max_size) 420 { 421 return (wstr_nlength(str, max_size) * sizeof(wchar_t)); 415 422 } 416 423 … … 428 435 * 429 436 */ 430 size_t wstr_lsize(const char32_t *str, size_t max_len)431 { 432 return (wstr_nlength(str, max_len * sizeof( char32_t)) * sizeof(char32_t));437 size_t wstr_lsize(const wchar_t *str, size_t max_len) 438 { 439 return (wstr_nlength(str, max_len * sizeof(wchar_t)) * sizeof(wchar_t)); 433 440 } 434 441 … … 458 465 * 459 466 */ 460 size_t wstr_length(const char32_t *wstr)467 size_t wstr_length(const wchar_t *wstr) 461 468 { 462 469 size_t len = 0; … … 495 502 * 496 503 */ 497 size_t wstr_nlength(const char32_t *str, size_t size)504 size_t wstr_nlength(const wchar_t *str, size_t size) 498 505 { 499 506 size_t len = 0; 500 size_t limit = ALIGN_DOWN(size, sizeof( char32_t));507 size_t limit = ALIGN_DOWN(size, sizeof(wchar_t)); 501 508 size_t offset = 0; 502 509 503 510 while ((offset < limit) && (*str++ != 0)) { 504 511 len++; 505 offset += sizeof( char32_t);512 offset += sizeof(wchar_t); 506 513 } 507 514 … … 514 521 * @return Width of character in cells. 515 522 */ 516 size_t chr_width( char32_t ch)523 size_t chr_width(wchar_t ch) 517 524 { 518 525 return 1; … … 528 535 size_t width = 0; 529 536 size_t offset = 0; 530 char32_t ch;537 wchar_t ch; 531 538 532 539 while ((ch = str_decode(str, &offset, STR_NO_LIMIT)) != 0) … … 541 548 * 542 549 */ 543 bool ascii_check( char32_t ch)544 { 545 if ( ch <= 127)550 bool ascii_check(wchar_t ch) 551 { 552 if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 127)) 546 553 return true; 547 554 … … 554 561 * 555 562 */ 556 bool chr_check( char32_t ch)557 { 558 if ( ch <= 1114111)563 bool chr_check(wchar_t ch) 564 { 565 if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 1114111)) 559 566 return true; 560 567 … … 582 589 int str_cmp(const char *s1, const char *s2) 583 590 { 584 char32_t c1 = 0;585 char32_t c2 = 0;591 wchar_t c1 = 0; 592 wchar_t c2 = 0; 586 593 587 594 size_t off1 = 0; … … 629 636 int str_lcmp(const char *s1, const char *s2, size_t max_len) 630 637 { 631 char32_t c1 = 0;632 char32_t c2 = 0;638 wchar_t c1 = 0; 639 wchar_t c2 = 0; 633 640 634 641 size_t off1 = 0; … … 681 688 int str_casecmp(const char *s1, const char *s2) 682 689 { 683 char32_t c1 = 0;684 char32_t c2 = 0;690 wchar_t c1 = 0; 691 wchar_t c2 = 0; 685 692 686 693 size_t off1 = 0; … … 729 736 int str_lcasecmp(const char *s1, const char *s2, size_t max_len) 730 737 { 731 char32_t c1 = 0;732 char32_t c2 = 0;738 wchar_t c1 = 0; 739 wchar_t c2 = 0; 733 740 734 741 size_t off1 = 0; … … 773 780 bool str_test_prefix(const char *s, const char *p) 774 781 { 775 char32_t c1 = 0;776 char32_t c2 = 0;782 wchar_t c1 = 0; 783 wchar_t c2 = 0; 777 784 778 785 size_t off1 = 0; … … 794 801 795 802 return false; 796 }797 798 /** Get a string suffix.799 *800 * Return a string suffix defined by the prefix length.801 *802 * @param s The string to get the suffix from.803 * @param prefix_length Number of prefix characters to ignore.804 *805 * @return String suffix.806 *807 */808 const char *str_suffix(const char *s, size_t prefix_length)809 {810 size_t off = 0;811 size_t i = 0;812 813 while (true) {814 str_decode(s, &off, STR_NO_LIMIT);815 i++;816 817 if (i >= prefix_length)818 break;819 }820 821 return s + off;822 803 } 823 804 … … 843 824 size_t dest_off = 0; 844 825 845 char32_t ch;826 wchar_t ch; 846 827 while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) { 847 828 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK) … … 876 857 size_t dest_off = 0; 877 858 878 char32_t ch;859 wchar_t ch; 879 860 while ((ch = str_decode(src, &src_off, n)) != 0) { 880 861 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK) … … 980 961 * @param src Source wide string. 981 962 */ 982 void wstr_to_str(char *dest, size_t size, const char32_t *src)983 { 984 char32_t ch;963 void wstr_to_str(char *dest, size_t size, const wchar_t *src) 964 { 965 wchar_t ch; 985 966 size_t src_idx; 986 967 size_t dest_off; … … 1015 996 { 1016 997 size_t idx = 0, dest_off = 0; 1017 char32_t ch;998 wchar_t ch; 1018 999 errno_t rc = EOK; 1019 1000 … … 1059 1040 size_t offset = 0; 1060 1041 size_t idx = 0; 1061 char32_t c;1042 wchar_t c; 1062 1043 1063 1044 assert(dlen > 0); … … 1116 1097 * @return New string. 1117 1098 */ 1118 char *wstr_to_astr(const char32_t *src)1099 char *wstr_to_astr(const wchar_t *src) 1119 1100 { 1120 1101 char dbuf[STR_BOUNDS(1)]; 1121 1102 char *str; 1122 char32_t ch;1103 wchar_t ch; 1123 1104 1124 1105 size_t src_idx; … … 1166 1147 * @param src Source string. 1167 1148 */ 1168 void str_to_wstr( char32_t *dest, size_t dlen, const char *src)1149 void str_to_wstr(wchar_t *dest, size_t dlen, const char *src) 1169 1150 { 1170 1151 size_t offset; 1171 1152 size_t di; 1172 char32_t c;1153 wchar_t c; 1173 1154 1174 1155 assert(dlen > 0); … … 1195 1176 * @param src Source string. 1196 1177 */ 1197 char32_t *str_to_awstr(const char *str)1178 wchar_t *str_to_awstr(const char *str) 1198 1179 { 1199 1180 size_t len = str_length(str); 1200 1181 1201 char32_t *wstr = calloc(len + 1, sizeof(char32_t));1182 wchar_t *wstr = calloc(len + 1, sizeof(wchar_t)); 1202 1183 if (wstr == NULL) 1203 1184 return NULL; … … 1214 1195 * @return Pointer to character in @a str or NULL if not found. 1215 1196 */ 1216 char *str_chr(const char *str, char32_t ch)1217 { 1218 char32_t acc;1197 char *str_chr(const char *str, wchar_t ch) 1198 { 1199 wchar_t acc; 1219 1200 size_t off = 0; 1220 1201 size_t last = 0; … … 1256 1237 * @param ch Character to remove. 1257 1238 */ 1258 void str_rtrim(char *str, char32_t ch)1239 void str_rtrim(char *str, wchar_t ch) 1259 1240 { 1260 1241 size_t off = 0; 1261 1242 size_t pos = 0; 1262 char32_t c;1243 wchar_t c; 1263 1244 bool update_last_chunk = true; 1264 1245 char *last_chunk = NULL; … … 1284 1265 * @param ch Character to remove. 1285 1266 */ 1286 void str_ltrim(char *str, char32_t ch)1287 { 1288 char32_t acc;1267 void str_ltrim(char *str, wchar_t ch) 1268 { 1269 wchar_t acc; 1289 1270 size_t off = 0; 1290 1271 size_t pos = 0; … … 1312 1293 * @return Pointer to character in @a str or NULL if not found. 1313 1294 */ 1314 char *str_rchr(const char *str, char32_t ch)1315 { 1316 char32_t acc;1295 char *str_rchr(const char *str, wchar_t ch) 1296 { 1297 wchar_t acc; 1317 1298 size_t off = 0; 1318 1299 size_t last = 0; … … 1342 1323 * 1343 1324 */ 1344 bool wstr_linsert( char32_t *str, char32_t ch, size_t pos, size_t max_pos)1325 bool wstr_linsert(wchar_t *str, wchar_t ch, size_t pos, size_t max_pos) 1345 1326 { 1346 1327 size_t len = wstr_length(str); … … 1370 1351 * 1371 1352 */ 1372 bool wstr_remove( char32_t *str, size_t pos)1353 bool wstr_remove(wchar_t *str, size_t pos) 1373 1354 { 1374 1355 size_t len = wstr_length(str); … … 1467 1448 size_t cur; 1468 1449 size_t tmp; 1469 char32_t ch;1450 wchar_t ch; 1470 1451 1471 1452 /* Skip over leading delimiters. */
Note:
See TracChangeset
for help on using the changeset viewer.