Changeset 28a5ebd in mainline for uspace/lib/c/generic/str.c
- Timestamp:
- 2020-06-18T15:39:50Z (4 years ago)
- Branches:
- lfn, master, serial, ticket/834-toolchain-update, topic/msim-upgrade, topic/simplify-dev-export
- Children:
- ce52c333
- Parents:
- 4f663f3e
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
uspace/lib/c/generic/str.c
r4f663f3e r28a5ebd 42 42 * strings, called just strings are encoded in UTF-8. Wide strings (encoded 43 43 * in UTF-32) are supported to a limited degree. A single character is 44 * represented as wchar_t.@n44 * represented as char32_t.@n 45 45 * 46 46 * Overview of the terminology:@n … … 50 50 * byte 8 bits stored in uint8_t (unsigned 8 bit integer) 51 51 * 52 * character UTF-32 encoded Unicode character, stored in wchar_t53 * ( signed 32 bit integer), code points 0 .. 111411152 * character UTF-32 encoded Unicode character, stored in char32_t 53 * (unsigned 32 bit integer), code points 0 .. 1114111 54 54 * are valid 55 55 * … … 61 61 * 62 62 * wide string UTF-32 encoded NULL-terminated Unicode string, 63 * wchar_t *63 * char32_t * 64 64 * 65 65 * [wide] string size number of BYTES in a [wide] string (excluding … … 100 100 * A specific character inside a [wide] string can be referred to by:@n 101 101 * 102 * pointer (char *, wchar_t *)102 * pointer (char *, char32_t *) 103 103 * byte offset (size_t) 104 104 * character index (size_t) … … 119 119 #include <mem.h> 120 120 121 /** Check the condition if wchar_t is signed */122 #ifdef __WCHAR_UNSIGNED__123 #define WCHAR_SIGNED_CHECK(cond) (true)124 #else125 #define WCHAR_SIGNED_CHECK(cond) (cond)126 #endif127 128 121 /** Byte mask consisting of lowest @n bits (out of 8) */ 129 122 #define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1)) … … 153 146 * 154 147 */ 155 wchar_t str_decode(const char *str, size_t *offset, size_t size)148 char32_t str_decode(const char *str, size_t *offset, size_t size) 156 149 { 157 150 if (*offset + 1 > size) … … 190 183 return U_SPECIAL; 191 184 192 wchar_t ch = b0 & LO_MASK_8(b0_bits);185 char32_t ch = b0 & LO_MASK_8(b0_bits); 193 186 194 187 /* Decode continuation bytes */ … … 201 194 202 195 /* Shift data bits to ch */ 203 ch = (ch << CONT_BITS) | ( wchar_t) (b & LO_MASK_8(CONT_BITS));196 ch = (ch << CONT_BITS) | (char32_t) (b & LO_MASK_8(CONT_BITS)); 204 197 cbytes--; 205 198 } … … 223 216 * 224 217 */ 225 wchar_t str_decode_reverse(const char *str, size_t *offset, size_t size)218 char32_t str_decode_reverse(const char *str, size_t *offset, size_t size) 226 219 { 227 220 if (*offset == 0) … … 266 259 * code was invalid. 267 260 */ 268 errno_t chr_encode(const wchar_t ch, char *str, size_t *offset, size_t size)261 errno_t chr_encode(const char32_t ch, char *str, size_t *offset, size_t size) 269 262 { 270 263 if (*offset >= size) … … 352 345 * 353 346 */ 354 size_t wstr_size(const wchar_t *str)355 { 356 return (wstr_length(str) * sizeof( wchar_t));347 size_t wstr_size(const char32_t *str) 348 { 349 return (wstr_length(str) * sizeof(char32_t)); 357 350 } 358 351 … … 417 410 * 418 411 */ 419 size_t wstr_nsize(const wchar_t *str, size_t max_size)420 { 421 return (wstr_nlength(str, max_size) * sizeof( wchar_t));412 size_t wstr_nsize(const char32_t *str, size_t max_size) 413 { 414 return (wstr_nlength(str, max_size) * sizeof(char32_t)); 422 415 } 423 416 … … 435 428 * 436 429 */ 437 size_t wstr_lsize(const wchar_t *str, size_t max_len)438 { 439 return (wstr_nlength(str, max_len * sizeof( wchar_t)) * sizeof(wchar_t));430 size_t wstr_lsize(const char32_t *str, size_t max_len) 431 { 432 return (wstr_nlength(str, max_len * sizeof(char32_t)) * sizeof(char32_t)); 440 433 } 441 434 … … 465 458 * 466 459 */ 467 size_t wstr_length(const wchar_t *wstr)460 size_t wstr_length(const char32_t *wstr) 468 461 { 469 462 size_t len = 0; … … 502 495 * 503 496 */ 504 size_t wstr_nlength(const wchar_t *str, size_t size)497 size_t wstr_nlength(const char32_t *str, size_t size) 505 498 { 506 499 size_t len = 0; 507 size_t limit = ALIGN_DOWN(size, sizeof( wchar_t));500 size_t limit = ALIGN_DOWN(size, sizeof(char32_t)); 508 501 size_t offset = 0; 509 502 510 503 while ((offset < limit) && (*str++ != 0)) { 511 504 len++; 512 offset += sizeof( wchar_t);505 offset += sizeof(char32_t); 513 506 } 514 507 … … 521 514 * @return Width of character in cells. 522 515 */ 523 size_t chr_width( wchar_t ch)516 size_t chr_width(char32_t ch) 524 517 { 525 518 return 1; … … 535 528 size_t width = 0; 536 529 size_t offset = 0; 537 wchar_t ch;530 char32_t ch; 538 531 539 532 while ((ch = str_decode(str, &offset, STR_NO_LIMIT)) != 0) … … 548 541 * 549 542 */ 550 bool ascii_check( wchar_t ch)551 { 552 if ( WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 127))543 bool ascii_check(char32_t ch) 544 { 545 if (ch <= 127) 553 546 return true; 554 547 … … 561 554 * 562 555 */ 563 bool chr_check( wchar_t ch)564 { 565 if ( WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 1114111))556 bool chr_check(char32_t ch) 557 { 558 if (ch <= 1114111) 566 559 return true; 567 560 … … 589 582 int str_cmp(const char *s1, const char *s2) 590 583 { 591 wchar_t c1 = 0;592 wchar_t c2 = 0;584 char32_t c1 = 0; 585 char32_t c2 = 0; 593 586 594 587 size_t off1 = 0; … … 636 629 int str_lcmp(const char *s1, const char *s2, size_t max_len) 637 630 { 638 wchar_t c1 = 0;639 wchar_t c2 = 0;631 char32_t c1 = 0; 632 char32_t c2 = 0; 640 633 641 634 size_t off1 = 0; … … 688 681 int str_casecmp(const char *s1, const char *s2) 689 682 { 690 wchar_t c1 = 0;691 wchar_t c2 = 0;683 char32_t c1 = 0; 684 char32_t c2 = 0; 692 685 693 686 size_t off1 = 0; … … 736 729 int str_lcasecmp(const char *s1, const char *s2, size_t max_len) 737 730 { 738 wchar_t c1 = 0;739 wchar_t c2 = 0;731 char32_t c1 = 0; 732 char32_t c2 = 0; 740 733 741 734 size_t off1 = 0; … … 780 773 bool str_test_prefix(const char *s, const char *p) 781 774 { 782 wchar_t c1 = 0;783 wchar_t c2 = 0;775 char32_t c1 = 0; 776 char32_t c2 = 0; 784 777 785 778 size_t off1 = 0; … … 850 843 size_t dest_off = 0; 851 844 852 wchar_t ch;845 char32_t ch; 853 846 while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) { 854 847 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK) … … 883 876 size_t dest_off = 0; 884 877 885 wchar_t ch;878 char32_t ch; 886 879 while ((ch = str_decode(src, &src_off, n)) != 0) { 887 880 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK) … … 987 980 * @param src Source wide string. 988 981 */ 989 void wstr_to_str(char *dest, size_t size, const wchar_t *src)990 { 991 wchar_t ch;982 void wstr_to_str(char *dest, size_t size, const char32_t *src) 983 { 984 char32_t ch; 992 985 size_t src_idx; 993 986 size_t dest_off; … … 1022 1015 { 1023 1016 size_t idx = 0, dest_off = 0; 1024 wchar_t ch;1017 char32_t ch; 1025 1018 errno_t rc = EOK; 1026 1019 … … 1066 1059 size_t offset = 0; 1067 1060 size_t idx = 0; 1068 wchar_t c;1061 char32_t c; 1069 1062 1070 1063 assert(dlen > 0); … … 1123 1116 * @return New string. 1124 1117 */ 1125 char *wstr_to_astr(const wchar_t *src)1118 char *wstr_to_astr(const char32_t *src) 1126 1119 { 1127 1120 char dbuf[STR_BOUNDS(1)]; 1128 1121 char *str; 1129 wchar_t ch;1122 char32_t ch; 1130 1123 1131 1124 size_t src_idx; … … 1173 1166 * @param src Source string. 1174 1167 */ 1175 void str_to_wstr( wchar_t *dest, size_t dlen, const char *src)1168 void str_to_wstr(char32_t *dest, size_t dlen, const char *src) 1176 1169 { 1177 1170 size_t offset; 1178 1171 size_t di; 1179 wchar_t c;1172 char32_t c; 1180 1173 1181 1174 assert(dlen > 0); … … 1202 1195 * @param src Source string. 1203 1196 */ 1204 wchar_t *str_to_awstr(const char *str)1197 char32_t *str_to_awstr(const char *str) 1205 1198 { 1206 1199 size_t len = str_length(str); 1207 1200 1208 wchar_t *wstr = calloc(len + 1, sizeof(wchar_t));1201 char32_t *wstr = calloc(len + 1, sizeof(char32_t)); 1209 1202 if (wstr == NULL) 1210 1203 return NULL; … … 1221 1214 * @return Pointer to character in @a str or NULL if not found. 1222 1215 */ 1223 char *str_chr(const char *str, wchar_t ch)1224 { 1225 wchar_t acc;1216 char *str_chr(const char *str, char32_t ch) 1217 { 1218 char32_t acc; 1226 1219 size_t off = 0; 1227 1220 size_t last = 0; … … 1263 1256 * @param ch Character to remove. 1264 1257 */ 1265 void str_rtrim(char *str, wchar_t ch)1258 void str_rtrim(char *str, char32_t ch) 1266 1259 { 1267 1260 size_t off = 0; 1268 1261 size_t pos = 0; 1269 wchar_t c;1262 char32_t c; 1270 1263 bool update_last_chunk = true; 1271 1264 char *last_chunk = NULL; … … 1291 1284 * @param ch Character to remove. 1292 1285 */ 1293 void str_ltrim(char *str, wchar_t ch)1294 { 1295 wchar_t acc;1286 void str_ltrim(char *str, char32_t ch) 1287 { 1288 char32_t acc; 1296 1289 size_t off = 0; 1297 1290 size_t pos = 0; … … 1319 1312 * @return Pointer to character in @a str or NULL if not found. 1320 1313 */ 1321 char *str_rchr(const char *str, wchar_t ch)1322 { 1323 wchar_t acc;1314 char *str_rchr(const char *str, char32_t ch) 1315 { 1316 char32_t acc; 1324 1317 size_t off = 0; 1325 1318 size_t last = 0; … … 1349 1342 * 1350 1343 */ 1351 bool wstr_linsert( wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)1344 bool wstr_linsert(char32_t *str, char32_t ch, size_t pos, size_t max_pos) 1352 1345 { 1353 1346 size_t len = wstr_length(str); … … 1377 1370 * 1378 1371 */ 1379 bool wstr_remove( wchar_t *str, size_t pos)1372 bool wstr_remove(char32_t *str, size_t pos) 1380 1373 { 1381 1374 size_t len = wstr_length(str); … … 1474 1467 size_t cur; 1475 1468 size_t tmp; 1476 wchar_t ch;1469 char32_t ch; 1477 1470 1478 1471 /* Skip over leading delimiters. */
Note:
See TracChangeset
for help on using the changeset viewer.