Changeset 28a5ebd in mainline for boot


Ignore:
Timestamp:
2020-06-18T15:39:50Z (5 years ago)
Author:
Martin Decky <martin@…>
Branches:
lfn, master, serial, ticket/834-toolchain-update, topic/msim-upgrade, topic/simplify-dev-export
Children:
ce52c333
Parents:
4f663f3e
Message:

Use char32_t instead of wchat_t to represent UTF-32 strings

The intention of the native HelenOS string API has been always to
support Unicode in the UTF-8 and UTF-32 encodings as the sole character
representations and ignore the obsolete mess of older single-byte and
multibyte character encodings. Before C11, the wchar_t type has been
slightly misused for the purpose of the UTF-32 strings. The newer
char32_t type is obviously a much more suitable option. The standard
defines char32_t as uint_least32_t, thus we can take the liberty to fix
it to uint32_t.

To maintain compatilibity with the C Standard, the putwchar(wchar_t)
functions has been replaced by our custom putuchar(char32_t) functions
where appropriate.

Location:
boot
Files:
1 added
13 edited

Legend:

Unmodified
Added
Removed
  • boot/arch/arm32/src/putchar.c

    r4f663f3e r28a5ebd  
    193193/** Display a character
    194194 *
    195  * @param ch    Character to display
    196  */
    197 void putwchar(const wchar_t ch)
     195 * @param ch Character to display
     196 *
     197 */
     198void putuchar(const char32_t ch)
    198199{
    199200        if (ch == '\n')
  • boot/arch/arm64/src/main.c

    r4f663f3e r28a5ebd  
    9999 *
    100100 * @param ch Character to display.
    101  */
    102 void putwchar(wchar_t ch)
     101 *
     102 */
     103void putuchar(char32_t ch)
    103104{
    104105        if (ch == '\n')
  • boot/arch/ia64/src/putchar.c

    r4f663f3e r28a5ebd  
    3333#include <arch/ski.h>
    3434
    35 void putwchar(const wchar_t ch)
     35void putuchar(const char32_t ch)
    3636{
    3737#ifdef MACHINE_ski
  • boot/arch/mips32/src/putchar.c

    r4f663f3e r28a5ebd  
    5757#endif
    5858
    59 void putwchar(const wchar_t ch)
     59void putuchar(const char32_t ch)
    6060{
    6161        if (ascii_check(ch))
  • boot/arch/ppc32/src/ofw.c

    r4f663f3e r28a5ebd  
    3939}
    4040
    41 void putwchar(wchar_t ch)
     41void putuchar(char32_t ch)
    4242{
    4343        if (ch == '\n')
  • boot/arch/riscv64/src/putchar.c

    r4f663f3e r28a5ebd  
    3333#include <arch/ucb.h>
    3434
    35 void putwchar(wchar_t ch)
     35void putuchar(char32_t ch)
    3636{
    3737        if (ascii_check(ch))
  • boot/arch/sparc64/src/ofw.c

    r4f663f3e r28a5ebd  
    4343#include <str.h>
    4444
    45 void putwchar(wchar_t ch)
     45void putuchar(char32_t ch)
    4646{
    4747        if (ch == '\n')
  • boot/generic/include/putchar.h

    r4f663f3e r28a5ebd  
    3737
    3838#include <stddef.h>
     39#include <uchar.h>
    3940
    40 extern void putwchar(wchar_t);
     41extern void putuchar(char32_t);
    4142
    4243#endif
  • boot/generic/include/str.h

    r4f663f3e r28a5ebd  
    3838#include <stdbool.h>
    3939#include <stddef.h>
     40#include <uchar.h>
    4041
    4142/* Common Unicode characters */
    42 #define U_SPECIAL      '?'
     43#define U_SPECIAL  '?'
    4344
    4445/** No size limit constant */
    4546#define STR_NO_LIMIT  ((size_t) -1)
    4647
    47 extern wchar_t str_decode(const char *str, size_t *offset, size_t sz);
    48 extern errno_t chr_encode(wchar_t ch, char *str, size_t *offset, size_t sz);
     48extern char32_t str_decode(const char *str, size_t *offset, size_t sz);
     49extern errno_t chr_encode(char32_t ch, char *str, size_t *offset, size_t sz);
    4950
    5051extern size_t str_size(const char *str);
     
    5253extern size_t str_length(const char *str);
    5354
    54 extern bool ascii_check(wchar_t ch);
    55 extern bool chr_check(wchar_t ch);
     55extern bool ascii_check(char32_t ch);
     56extern bool chr_check(char32_t ch);
    5657
    5758extern int str_cmp(const char *s1, const char *s2);
  • boot/generic/include/tar.h

    r4f663f3e r28a5ebd  
    3333 */
    3434
    35 #ifndef TAR_H_
    36 #define TAR_H_
     35#ifndef BOOT_TAR_H_
     36#define BOOT_TAR_H_
    3737
    3838#include <stdbool.h>
  • boot/generic/src/printf_core.c

    r4f663f3e r28a5ebd  
    504504        while (true) {
    505505                i = nxt;
    506                 wchar_t uc = str_decode(fmt, &nxt, STR_NO_LIMIT);
     506                char32_t uc = str_decode(fmt, &nxt, STR_NO_LIMIT);
    507507
    508508                if (uc == 0)
  • boot/generic/src/str.c

    r4f663f3e r28a5ebd  
    3838 * strings, called just strings are encoded in UTF-8. Wide strings (encoded
    3939 * in UTF-32) are supported to a limited degree. A single character is
    40  * represented as wchar_t.@n
     40 * represented as char32_t.@n
    4141 *
    4242 * Overview of the terminology:@n
     
    4646 *  byte                  8 bits stored in uint8_t (unsigned 8 bit integer)
    4747 *
    48  *  character             UTF-32 encoded Unicode character, stored in wchar_t
    49  *                        (signed 32 bit integer), code points 0 .. 1114111
     48 *  character             UTF-32 encoded Unicode character, stored in char32_t
     49 *                        (unsigned 32 bit integer), code points 0 .. 1114111
    5050 *                        are valid
    5151 *
     
    5757 *
    5858 *  wide string           UTF-32 encoded NULL-terminated Unicode string,
    59  *                        wchar_t *
     59 *                        char32_t *
    6060 *
    6161 *  [wide] string size    number of BYTES in a [wide] string (excluding
     
    9696 * A specific character inside a [wide] string can be referred to by:@n
    9797 *
    98  *  pointer (char *, wchar_t *)
     98 *  pointer (char *, char32_t *)
    9999 *  byte offset (size_t)
    100100 *  character index (size_t)
     
    109109#include <stdint.h>
    110110
    111 /** Check the condition if wchar_t is signed */
    112 #ifdef __WCHAR_UNSIGNED__
    113 #define WCHAR_SIGNED_CHECK(cond)  (true)
    114 #else
    115 #define WCHAR_SIGNED_CHECK(cond)  (cond)
    116 #endif
    117 
    118111/** Byte mask consisting of lowest @n bits (out of 8) */
    119112#define LO_MASK_8(n)  ((uint8_t) ((1 << (n)) - 1))
     
    143136 *
    144137 */
    145 wchar_t str_decode(const char *str, size_t *offset, size_t size)
     138char32_t str_decode(const char *str, size_t *offset, size_t size)
    146139{
    147140        if (*offset + 1 > size)
     
    180173                return U_SPECIAL;
    181174
    182         wchar_t ch = b0 & LO_MASK_8(b0_bits);
     175        char32_t ch = b0 & LO_MASK_8(b0_bits);
    183176
    184177        /* Decode continuation bytes */
     
    191184
    192185                /* Shift data bits to ch */
    193                 ch = (ch << CONT_BITS) | (wchar_t) (b & LO_MASK_8(CONT_BITS));
     186                ch = (ch << CONT_BITS) | (char32_t) (b & LO_MASK_8(CONT_BITS));
    194187                cbytes--;
    195188        }
     
    213206 *         code was invalid.
    214207 */
    215 errno_t chr_encode(const wchar_t ch, char *str, size_t *offset, size_t size)
     208errno_t chr_encode(const char32_t ch, char *str, size_t *offset, size_t size)
    216209{
    217210        if (*offset >= size)
     
    340333 *
    341334 */
    342 bool ascii_check(wchar_t ch)
    343 {
    344         if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 127))
     335bool ascii_check(char32_t ch)
     336{
     337        if (ch <= 127)
    345338                return true;
    346339
     
    353346 *
    354347 */
    355 bool chr_check(wchar_t ch)
    356 {
    357         if (WCHAR_SIGNED_CHECK(ch >= 0) && (ch <= 1114111))
     348bool chr_check(char32_t ch)
     349{
     350        if (ch <= 1114111)
    358351                return true;
    359352
     
    381374int str_cmp(const char *s1, const char *s2)
    382375{
    383         wchar_t c1 = 0;
    384         wchar_t c2 = 0;
     376        char32_t c1 = 0;
     377        char32_t c2 = 0;
    385378
    386379        size_t off1 = 0;
     
    421414        size_t dest_off = 0;
    422415
    423         wchar_t ch;
     416        char32_t ch;
    424417        while ((ch = str_decode(src, &src_off, STR_NO_LIMIT)) != 0) {
    425418                if (chr_encode(ch, dest, &dest_off, size - 1) != EOK)
  • boot/generic/src/vprintf.c

    r4f663f3e r28a5ebd  
    4242
    4343        while (offset < size) {
    44                 putwchar(str_decode(str, &offset, size));
     44                putuchar(str_decode(str, &offset, size));
    4545                chars++;
    4646        }
     
    5353        size_t offset = 0;
    5454        size_t chars = 0;
    55         wchar_t uc;
     55        char32_t uc;
    5656
    5757        while ((uc = str_decode(str, &offset, STR_NO_LIMIT)) != 0) {
    58                 putwchar(uc);
     58                putuchar(uc);
    5959                chars++;
    6060        }
    6161
    62         putwchar('\n');
     62        putuchar('\n');
    6363        return chars;
    6464}
Note: See TracChangeset for help on using the changeset viewer.