Changes in uspace/lib/c/generic/str.c [1c9bf292:28a5ebd] in mainline
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
uspace/lib/c/generic/str.c
r1c9bf292 r28a5ebd 42 42 * strings, called just strings are encoded in UTF-8. Wide strings (encoded 43 43 * in UTF-32) are supported to a limited degree. A single character is 44 * represented as wchar_t.@n44 * represented as char32_t.@n 45 45 * 46 46 * Overview of the terminology:@n … … 50 50 * byte 8 bits stored in uint8_t (unsigned 8 bit integer) 51 51 * 52 * character UTF-32 encoded Unicode character, stored in wchar_t53 * ( signed 32 bit integer), code points 0 .. 111411152 * character UTF-32 encoded Unicode character, stored in char32_t 53 * (unsigned 32 bit integer), code points 0 .. 1114111 54 54 * are valid 55 55 * … … 61 61 * 62 62 * wide string UTF-32 encoded NULL-terminated Unicode string, 63 * wchar_t *63 * char32_t * 64 64 * 65 65 * [wide] string size number of BYTES in a [wide] string (excluding … … 100 100 * A specific character inside a [wide] string can be referred to by:@n 101 101 * 102 * pointer (char *, wchar_t *)102 * pointer (char *, char32_t *) 103 103 * byte offset (size_t) 104 104 * character index (size_t) … … 119 119 #include <mem.h> 120 120 121 /** Check the condition if wchar_t is signed */122 #ifdef __WCHAR_UNSIGNED__123 #define WCHAR_SIGNED_CHECK(cond) (true)124 #else125 #define WCHAR_SIGNED_CHECK(cond) (cond)126 #endif127 128 121 /** Byte mask consisting of lowest @n bits (out of 8) */ 129 122 #define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1)) … … 153 146 * 154 147 */ 155 wchar_t str_decode(const char *str, size_t *offset, size_t size)148 char32_t str_decode(const char *str, size_t *offset, size_t size) 156 149 { 157 150 if (*offset + 1 > size) … … 190 183 return U_SPECIAL; 191 184 192 wchar_t ch = b0 & LO_MASK_8(b0_bits);185 char32_t ch = b0 & LO_MASK_8(b0_bits); 193 186 194 187 /* Decode continuation bytes */ … … 201 194 202 195 /* Shift data bits to ch */ 203 ch = (ch << CONT_BITS) | ( wchar_t) (b & LO_MASK_8(CONT_BITS));196 ch = (ch << CONT_BITS) | (char32_t) (b & LO_MASK_8(CONT_BITS)); 204 197 cbytes--; 205 198 } … … 223 216 * 224 217 */ 225 wchar_t str_decode_reverse(const char *str, size_t *offset, size_t size)218 char32_t str_decode_reverse(const char *str, size_t *offset, size_t size) 226 219 { 227 220 if (*offset == 0) … … 266 259 * code was invalid. 267 260 */ 268 errno_t chr_encode(const wchar_t ch, char *str, size_t *offset, size_t size)261 errno_t chr_encode(const char32_t ch, char *str, size_t *offset, size_t size) 269 262 { 270 263 if (*offset >= size) … … 352 345 * 353 346 */ 354 size_t wstr_size(const wchar_t *str)355 { 356 return (wstr_length(str) * sizeof( wchar_t));347 size_t wstr_size(const char32_t *str) 348 { 349 return (wstr_length(str) * sizeof(char32_t)); 357 350 } 358 351 … … 417 410 * 418 411 */ 419 size_t wstr_nsize(const wchar_t *str, size_t max_size)420 { 421 return (wstr_nlength(str, max_size) * sizeof( wchar_t));412 size_t wstr_nsize(const char32_t *str, size_t max_size) 413 { 414 return (wstr_nlength(str, max_size) * sizeof(char32_t)); 422 415 } 423 416 … … 435 428 * 436 429 */ 437 size_t wstr_lsize(const wchar_t *str, size_t max_len)438 { 439 return (wstr_nlength(str, max_len * sizeof( wchar_t)) * sizeof(wchar_t));430 size_t wstr_lsize(const char32_t *str, size_t max_len) 431 { 432 return (wstr_nlength(str, max_len * sizeof(char32_t)) * sizeof(char32_t)); 440 433 } 441 434 … … 465 458 * 466 459 */ 467 size_t wstr_length(const wchar_t *wstr)460 size_t wstr_length(const char32_t *wstr) 468 461 { 469 462 size_t len = 0; … … 502 495 * 503 496 */ 504 size_t wstr_nlength(const wchar_t *str, size_t size)497 size_t wstr_nlength(const char32_t *str, size_t size) 505 498 { 506 499 size_t len = 0; 507 size_t limit = ALIGN_DOWN(size, sizeof( wchar_t));500 size_t limit = ALIGN_DOWN(size, sizeof(char32_t)); 508 501 size_t offset = 0; 509 502 510 503 while ((offset < limit) && (*str++ != 0)) { 511 504 len++; 512 offset += sizeof( wchar_t);505 offset += sizeof(char32_t); 513 506 } 514 507 … … 521 514 * @return Width of character in cells. 522 515 */ 523 size_t chr_width( wchar_t ch)516 size_t chr_width(char32_t ch) 524 517 { 525 518 return 1; … … 535 528 size_t width = 0; 536 529 size_t offset = 0; 537 wchar_t ch;530 char32_t ch; 538 531 539 532 while ((ch = str_decode(str, &offset, STR_NO_LIMIT)) != 0) … … 548 541 * 549 542 */ 550 bool ascii_check( wchar_t ch)551 { 552 if ( WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 127))543 bool ascii_check(char32_t ch) 544 { 545 if (ch <= 127) 553 546 return true; 554 547 … … 561 554 * 562 555 */ 563 bool chr_check( wchar_t ch)564 { 565 if ( WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 1114111))556 bool chr_check(char32_t ch) 557 { 558 if (ch <= 1114111) 566 559 return true; 567 560 … … 589 582 int str_cmp(const char *s1, const char *s2) 590 583 { 591 wchar_t c1 = 0;592 wchar_t c2 = 0;584 char32_t c1 = 0; 585 char32_t c2 = 0; 593 586 594 587 size_t off1 = 0; … … 636 629 int str_lcmp(const char *s1, const char *s2, size_t max_len) 637 630 { 638 wchar_t c1 = 0;639 wchar_t c2 = 0;631 char32_t c1 = 0; 632 char32_t c2 = 0; 640 633 641 634 size_t off1 = 0; … … 688 681 int str_casecmp(const char *s1, const char *s2) 689 682 { 690 wchar_t c1 = 0;691 wchar_t c2 = 0;683 char32_t c1 = 0; 684 char32_t c2 = 0; 692 685 693 686 size_t off1 = 0; … … 736 729 int str_lcasecmp(const char *s1, const char *s2, size_t max_len) 737 730 { 738 wchar_t c1 = 0;739 wchar_t c2 = 0;731 char32_t c1 = 0; 732 char32_t c2 = 0; 740 733 741 734 size_t off1 = 0; … … 780 773 bool str_test_prefix(const char *s, const char *p) 781 774 { 782 wchar_t c1 = 0;783 wchar_t c2 = 0;775 char32_t c1 = 0; 776 char32_t c2 = 0; 784 777 785 778 size_t off1 = 0; … … 801 794 802 795 return false; 796 } 797 798 /** Get a string suffix. 799 * 800 * Return a string suffix defined by the prefix length. 801 * 802 * @param s The string to get the suffix from. 803 * @param prefix_length Number of prefix characters to ignore. 804 * 805 * @return String suffix. 806 * 807 */ 808 const char *str_suffix(const char *s, size_t prefix_length) 809 { 810 size_t off = 0; 811 size_t i = 0; 812 813 while (true) { 814 str_decode(s, &off, STR_NO_LIMIT); 815 i++; 816 817 if (i >= prefix_length) 818 break; 819 } 820 821 return s + off; 803 822 } 804 823 … … 824 843 size_t dest_off = 0; 825 844 826 wchar_t ch;845 char32_t ch; 827 846 while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) { 828 847 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK) … … 857 876 size_t dest_off = 0; 858 877 859 wchar_t ch;878 char32_t ch; 860 879 while ((ch = str_decode(src, &src_off, n)) != 0) { 861 880 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK) … … 961 980 * @param src Source wide string. 962 981 */ 963 void wstr_to_str(char *dest, size_t size, const wchar_t *src)964 { 965 wchar_t ch;982 void wstr_to_str(char *dest, size_t size, const char32_t *src) 983 { 984 char32_t ch; 966 985 size_t src_idx; 967 986 size_t dest_off; … … 996 1015 { 997 1016 size_t idx = 0, dest_off = 0; 998 wchar_t ch;1017 char32_t ch; 999 1018 errno_t rc = EOK; 1000 1019 … … 1040 1059 size_t offset = 0; 1041 1060 size_t idx = 0; 1042 wchar_t c;1061 char32_t c; 1043 1062 1044 1063 assert(dlen > 0); … … 1097 1116 * @return New string. 1098 1117 */ 1099 char *wstr_to_astr(const wchar_t *src)1118 char *wstr_to_astr(const char32_t *src) 1100 1119 { 1101 1120 char dbuf[STR_BOUNDS(1)]; 1102 1121 char *str; 1103 wchar_t ch;1122 char32_t ch; 1104 1123 1105 1124 size_t src_idx; … … 1147 1166 * @param src Source string. 1148 1167 */ 1149 void str_to_wstr( wchar_t *dest, size_t dlen, const char *src)1168 void str_to_wstr(char32_t *dest, size_t dlen, const char *src) 1150 1169 { 1151 1170 size_t offset; 1152 1171 size_t di; 1153 wchar_t c;1172 char32_t c; 1154 1173 1155 1174 assert(dlen > 0); … … 1176 1195 * @param src Source string. 1177 1196 */ 1178 wchar_t *str_to_awstr(const char *str)1197 char32_t *str_to_awstr(const char *str) 1179 1198 { 1180 1199 size_t len = str_length(str); 1181 1200 1182 wchar_t *wstr = calloc(len + 1, sizeof(wchar_t));1201 char32_t *wstr = calloc(len + 1, sizeof(char32_t)); 1183 1202 if (wstr == NULL) 1184 1203 return NULL; … … 1195 1214 * @return Pointer to character in @a str or NULL if not found. 1196 1215 */ 1197 char *str_chr(const char *str, wchar_t ch)1198 { 1199 wchar_t acc;1216 char *str_chr(const char *str, char32_t ch) 1217 { 1218 char32_t acc; 1200 1219 size_t off = 0; 1201 1220 size_t last = 0; … … 1237 1256 * @param ch Character to remove. 1238 1257 */ 1239 void str_rtrim(char *str, wchar_t ch)1258 void str_rtrim(char *str, char32_t ch) 1240 1259 { 1241 1260 size_t off = 0; 1242 1261 size_t pos = 0; 1243 wchar_t c;1262 char32_t c; 1244 1263 bool update_last_chunk = true; 1245 1264 char *last_chunk = NULL; … … 1265 1284 * @param ch Character to remove. 1266 1285 */ 1267 void str_ltrim(char *str, wchar_t ch)1268 { 1269 wchar_t acc;1286 void str_ltrim(char *str, char32_t ch) 1287 { 1288 char32_t acc; 1270 1289 size_t off = 0; 1271 1290 size_t pos = 0; … … 1293 1312 * @return Pointer to character in @a str or NULL if not found. 1294 1313 */ 1295 char *str_rchr(const char *str, wchar_t ch)1296 { 1297 wchar_t acc;1314 char *str_rchr(const char *str, char32_t ch) 1315 { 1316 char32_t acc; 1298 1317 size_t off = 0; 1299 1318 size_t last = 0; … … 1323 1342 * 1324 1343 */ 1325 bool wstr_linsert( wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)1344 bool wstr_linsert(char32_t *str, char32_t ch, size_t pos, size_t max_pos) 1326 1345 { 1327 1346 size_t len = wstr_length(str); … … 1351 1370 * 1352 1371 */ 1353 bool wstr_remove( wchar_t *str, size_t pos)1372 bool wstr_remove(char32_t *str, size_t pos) 1354 1373 { 1355 1374 size_t len = wstr_length(str); … … 1448 1467 size_t cur; 1449 1468 size_t tmp; 1450 wchar_t ch;1469 char32_t ch; 1451 1470 1452 1471 /* Skip over leading delimiters. */
Note:
See TracChangeset
for help on using the changeset viewer.