Changes in kernel/generic/src/lib/str.c [42e91ae:28a5ebd] in mainline
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
kernel/generic/src/lib/str.c
r42e91ae r28a5ebd 42 42 * strings, called just strings are encoded in UTF-8. Wide strings (encoded 43 43 * in UTF-32) are supported to a limited degree. A single character is 44 * represented as wchar_t.@n44 * represented as char32_t.@n 45 45 * 46 46 * Overview of the terminology:@n … … 50 50 * byte 8 bits stored in uint8_t (unsigned 8 bit integer) 51 51 * 52 * character UTF-32 encoded Unicode character, stored in wchar_t53 * ( signed 32 bit integer), code points 0 .. 111411152 * character UTF-32 encoded Unicode character, stored in char32_t 53 * (unsigned 32 bit integer), code points 0 .. 1114111 54 54 * are valid 55 55 * … … 61 61 * 62 62 * wide string UTF-32 encoded NULL-terminated Unicode string, 63 * wchar_t *63 * char32_t * 64 64 * 65 65 * [wide] string size number of BYTES in a [wide] string (excluding … … 100 100 * A specific character inside a [wide] string can be referred to by:@n 101 101 * 102 * pointer (char *, wchar_t *)102 * pointer (char *, char32_t *) 103 103 * byte offset (size_t) 104 104 * character index (size_t) … … 118 118 #include <macros.h> 119 119 120 /** Check the condition if wchar_t is signed */121 #ifdef __WCHAR_UNSIGNED__122 #define WCHAR_SIGNED_CHECK(cond) (true)123 #else124 #define WCHAR_SIGNED_CHECK(cond) (cond)125 #endif126 127 120 /** Byte mask consisting of lowest @n bits (out of 8) */ 128 121 #define LO_MASK_8(n) ((uint8_t) ((1 << (n)) - 1)) … … 152 145 * 153 146 */ 154 wchar_t str_decode(const char *str, size_t *offset, size_t size)147 char32_t str_decode(const char *str, size_t *offset, size_t size) 155 148 { 156 149 if (*offset + 1 > size) … … 189 182 return U_SPECIAL; 190 183 191 wchar_t ch = b0 & LO_MASK_8(b0_bits);184 char32_t ch = b0 & LO_MASK_8(b0_bits); 192 185 193 186 /* Decode continuation bytes */ … … 200 193 201 194 /* Shift data bits to ch */ 202 ch = (ch << CONT_BITS) | ( wchar_t) (b & LO_MASK_8(CONT_BITS));195 ch = (ch << CONT_BITS) | (char32_t) (b & LO_MASK_8(CONT_BITS)); 203 196 cbytes--; 204 197 } … … 222 215 * code was invalid. 223 216 */ 224 errno_t chr_encode(const wchar_t ch, char *str, size_t *offset, size_t size)217 errno_t chr_encode(const char32_t ch, char *str, size_t *offset, size_t size) 225 218 { 226 219 if (*offset >= size) … … 308 301 * 309 302 */ 310 size_t wstr_size(const wchar_t *str)311 { 312 return (wstr_length(str) * sizeof( wchar_t));303 size_t wstr_size(const char32_t *str) 304 { 305 return (wstr_length(str) * sizeof(char32_t)); 313 306 } 314 307 … … 354 347 * 355 348 */ 356 size_t wstr_lsize(const wchar_t *str, size_t max_len)357 { 358 return (wstr_nlength(str, max_len * sizeof( wchar_t)) * sizeof(wchar_t));349 size_t wstr_lsize(const char32_t *str, size_t max_len) 350 { 351 return (wstr_nlength(str, max_len * sizeof(char32_t)) * sizeof(char32_t)); 359 352 } 360 353 … … 384 377 * 385 378 */ 386 size_t wstr_length(const wchar_t *wstr)379 size_t wstr_length(const char32_t *wstr) 387 380 { 388 381 size_t len = 0; … … 421 414 * 422 415 */ 423 size_t wstr_nlength(const wchar_t *str, size_t size)416 size_t wstr_nlength(const char32_t *str, size_t size) 424 417 { 425 418 size_t len = 0; 426 size_t limit = ALIGN_DOWN(size, sizeof( wchar_t));419 size_t limit = ALIGN_DOWN(size, sizeof(char32_t)); 427 420 size_t offset = 0; 428 421 429 422 while ((offset < limit) && (*str++ != 0)) { 430 423 len++; 431 offset += sizeof( wchar_t);424 offset += sizeof(char32_t); 432 425 } 433 426 … … 440 433 * 441 434 */ 442 bool ascii_check( wchar_t ch)443 { 444 if ( WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 127))435 bool ascii_check(char32_t ch) 436 { 437 if (ch <= 127) 445 438 return true; 446 439 … … 453 446 * 454 447 */ 455 bool chr_check( wchar_t ch)456 { 457 if ( WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 1114111))448 bool chr_check(char32_t ch) 449 { 450 if (ch <= 1114111) 458 451 return true; 459 452 … … 481 474 int str_cmp(const char *s1, const char *s2) 482 475 { 483 wchar_t c1 = 0;484 wchar_t c2 = 0;476 char32_t c1 = 0; 477 char32_t c2 = 0; 485 478 486 479 size_t off1 = 0; … … 528 521 int str_lcmp(const char *s1, const char *s2, size_t max_len) 529 522 { 530 wchar_t c1 = 0;531 wchar_t c2 = 0;523 char32_t c1 = 0; 524 char32_t c2 = 0; 532 525 533 526 size_t off1 = 0; … … 580 573 size_t dest_off = 0; 581 574 582 wchar_t ch;575 char32_t ch; 583 576 while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) { 584 577 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK) … … 613 606 size_t dest_off = 0; 614 607 615 wchar_t ch;608 char32_t ch; 616 609 while ((ch = str_decode(src, &src_off, n)) != 0) { 617 610 if (chr_encode(ch, dest, &dest_off, size - 1) != EOK) … … 628 621 * written will always be well-formed. 629 622 * 630 * @param dest 631 * @param size 632 * @param src 633 */ 634 void wstr_to_str(char *dest, size_t size, const wchar_t *src)635 { 636 wchar_t ch;623 * @param dest Destination buffer. 624 * @param size Size of the destination buffer. 625 * @param src Source wide string. 626 */ 627 void wstr_to_str(char *dest, size_t size, const char32_t *src) 628 { 629 char32_t ch; 637 630 size_t src_idx; 638 631 size_t dest_off; … … 659 652 * @return Pointer to character in @a str or NULL if not found. 660 653 */ 661 char *str_chr(const char *str, wchar_t ch)662 { 663 wchar_t acc;654 char *str_chr(const char *str, char32_t ch) 655 { 656 char32_t acc; 664 657 size_t off = 0; 665 658 size_t last = 0; … … 688 681 * 689 682 */ 690 bool wstr_linsert( wchar_t *str, wchar_t ch, size_t pos, size_t max_pos)683 bool wstr_linsert(char32_t *str, char32_t ch, size_t pos, size_t max_pos) 691 684 { 692 685 size_t len = wstr_length(str); … … 716 709 * 717 710 */ 718 bool wstr_remove( wchar_t *str, size_t pos)711 bool wstr_remove(char32_t *str, size_t pos) 719 712 { 720 713 size_t len = wstr_length(str);
Note:
See TracChangeset
for help on using the changeset viewer.