Changeset e1813cf in mainline for kernel/generic/src/lib/string.c


Ignore:
Timestamp:
2009-03-31T22:51:41Z (15 years ago)
Author:
Jiri Svoboda <jirik.svoboda@…>
Branches:
lfn, master, serial, ticket/834-toolchain-update, topic/msim-upgrade, topic/simplify-dev-export
Children:
ce87a8aa
Parents:
b54d2f1
Message:

Start converting string functions according to the terminology agreed upon.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • kernel/generic/src/lib/string.c

    rb54d2f1 re1813cf  
    5757#define CONT_BITS 6
    5858
    59 /** Decode a single UTF-8 character from a NULL-terminated string.
    60  *
    61  * Decode a single UTF-8 character from a plain char NULL-terminated
    62  * string. Decoding starts at @index and this index is moved to the
    63  * beginning of the next character. In case of decoding error,
    64  * index advances. However, index is never moved beyond (str+limit).
    65  *
    66  * @param str   Plain character NULL-terminated string.
     59/** Decode a single character from a substring.
     60 *
     61 * Decode a single character from a substring of size @a sz. Decoding starts
     62 * at @a offset and this offset is moved to the beginning of the next
     63 * character. In case of decoding error, offset generally advances at least
     64 * by one. However, offset is never moved beyond (str + sz).
     65 *
     66 * @param str   String (not necessarily NULL-terminated).
    6767 * @param index Index (counted in plain characters) where to start
    6868 *              the decoding.
    69  * @param limit Maximal allowed value of index.
    70  *
    71  * @return Decoded character in UTF-32 or '?' if the encoding is wrong.
    72  *
    73  */
    74 wchar_t utf8_decode(const char *str, index_t *index, index_t limit)
     69 * @param limit Size of the substring.
     70 *
     71 * @return      Value of decoded character or '?' on decoding error.
     72 *
     73 */
     74wchar_t chr_decode(const char *str, size_t *offset, size_t sz)
    7575{
    7676        uint8_t b0, b;          /* Bytes read from str. */
     
    8080        int cbytes;             /* Number of continuation bytes. */
    8181
    82         if (*index + 1 > limit)
     82        if (*offset + 1 > sz)
    8383                return invalch;
    8484
    85         b0 = (uint8_t) str[(*index)++];
     85        b0 = (uint8_t) str[(*offset)++];
    8686
    8787        /* Determine code length. */
     
    108108        }
    109109
    110         if (*index + cbytes > limit) {
     110        if (*offset + cbytes > sz) {
    111111                return invalch;
    112112        }
     
    116116        /* Decode continuation bytes. */
    117117        while (cbytes > 0) {
    118                 b = (uint8_t) str[(*index)++];
     118                b = (uint8_t) str[(*offset)++];
    119119
    120120                /* Must be 10xxxxxx. */
     
    131131}
    132132
    133 /** Encode a single UTF-32 character as UTF-8
    134  *
    135  * Encode a single UTF-32 character as UTF-8 and store it into
    136  * the given buffer at @index. Encoding starts at @index and
    137  * this index is moved at the position where the next character
    138  * can be written to.
    139  *
    140  * @param ch    Input UTF-32 character.
    141  * @param str   Output buffer.
    142  * @param index Index (counted in plain characters) where to start
    143  *              the encoding
    144  * @param limit Maximal allowed value of index.
    145  *
    146  * @return True if the character was encoded or false if there is not
    147  *         enought space in the output buffer or the character is invalid
    148  *         Unicode code point.
    149  *
    150  */
    151 bool utf8_encode(const wchar_t ch, char *str, index_t *index, index_t limit)
     133/** Encode a single character to string representation.
     134 *
     135 * Encode a single character to string representation (i.e. UTF-8) and store
     136 * it into a buffer at @a offset. Encoding starts at @a offset and this offset
     137 * is moved to the position where the next character can be written to.
     138 *
     139 * @param ch            Input character.
     140 * @param str           Output buffer.
     141 * @param offset        Offset (in bytes) where to start writing.
     142 * @param sz            Size of the output buffer.
     143 *
     144 * @return True if the character was encoded successfully or false if there
     145 *         was not enough space in the output buffer or the character code
     146 *         was invalid.
     147 */
     148bool chr_encode(const wchar_t ch, char *str, size_t *offset, size_t sz)
    152149{
    153150        uint32_t cc;            /* Unsigned version of ch. */
     
    157154        int i;
    158155
    159         if (*index >= limit)
     156        if (*offset >= sz)
    160157                return false;
    161158
     
    185182
    186183        /* Check for available space in buffer. */
    187         if (*index + cbytes >= limit)
     184        if (*offset + cbytes >= sz)
    188185                return false;
    189186
    190187        /* Encode continuation bytes. */
    191188        for (i = cbytes; i > 0; --i) {
    192                 str[*index + i] = 0x80 | (cc & LO_MASK_32(CONT_BITS));
     189                str[*offset + i] = 0x80 | (cc & LO_MASK_32(CONT_BITS));
    193190                cc = cc >> CONT_BITS;
    194191        }
    195192
    196193        /* Encode first byte. */
    197         str[*index] = (cc & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1);
    198 
    199         /* Advance index. */
    200         *index += (1 + cbytes);
     194        str[*offset] = (cc & LO_MASK_32(b0_bits)) | HI_MASK_8(8 - b0_bits - 1);
     195
     196        /* Advance offset. */
     197        *offset += (1 + cbytes);
    201198       
    202199        return true;
     
    227224                if (size >= count)
    228225                        break;
    229                 ch = utf8_decode(str, &index, UTF8_NO_LIMIT);
     226                ch = chr_decode(str, &index, UTF8_NO_LIMIT);
    230227                if (ch == '\0') break;
    231228
     
    289286        index_t index = 0;
    290287       
    291         while (utf8_decode(str, &index, UTF8_NO_LIMIT) != 0) {
     288        while (chr_decode(str, &index, UTF8_NO_LIMIT) != 0) {
    292289                size++;
    293290        }
Note: See TracChangeset for help on using the changeset viewer.