Ignore:
File:
1 edited

Legend:

Unmodified
Added
Removed
  • uspace/lib/c/test/str.c

    rb31323f r09ab0a9a  
    2727 */
    2828
    29 #include "pcut/asserts.h"
    30 #include <assert.h>
    31 #include <stdint.h>
    3229#include <stdio.h>
    3330#include <str.h>
     
    4845{
    4946        memset(buffer, 0, BUFFER_SIZE);
    50 }
    51 
    52 /* Helper to display string contents for debugging */
    53 static void print_string_hex(char *out, const char *s, size_t len)
    54 {
    55         *out++ = '"';
    56         for (size_t i = 0; i < len && s[i]; i++) {
    57                 if (s[i] >= 32 && s[i] <= 126)
    58                         *out++ = s[i];
    59                 else
    60                         out += snprintf(out, 5, "\\x%02x", (uint8_t) s[i]);
    61         }
    62         *out++ = '"';
    63         *out++ = 0;
    6447}
    6548
     
    132115}
    133116
    134 PCUT_TEST(str_non_shortest)
    135 {
    136         /* Overlong zero. */
    137         const char overlong1[] = "\xC0\x80";
    138         const char overlong2[] = "\xE0\x80\x80";
    139         const char overlong3[] = "\xF0\x80\x80\x80";
    140 
    141         const char overlong4[] = "\xC1\xBF";
    142         const char overlong5[] = "\xE0\x9F\xBF";
    143         const char overlong6[] = "\xF0\x8F\xBF\xBF";
    144 
    145         size_t offset = 0;
    146         PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong1, &offset, sizeof(overlong1)));
    147         offset = 0;
    148         PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong2, &offset, sizeof(overlong2)));
    149         offset = 0;
    150         PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong3, &offset, sizeof(overlong3)));
    151         offset = 0;
    152         PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong4, &offset, sizeof(overlong4)));
    153         offset = 0;
    154         PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong5, &offset, sizeof(overlong5)));
    155         offset = 0;
    156         PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong6, &offset, sizeof(overlong6)));
    157 }
    158 
    159 struct sanitize_test {
    160         const char *input;
    161         const char *output;
    162 };
    163 
    164 static const struct sanitize_test sanitize_tests[] = {
    165         // Empty string
    166         { "", "" },
    167         // ASCII only
    168         { "Hello, world!", "Hello, world!" },
    169         // Valid multi-byte sequences
    170         { "Aπ你🐱", "Aπ你🐱" },
    171         // U+D7FF is last valid before surrogates
    172         { "A\xED\x9F\xBFZ", "A\xED\x9F\xBFZ" },
    173         // 0x10FFFF is the highest legal code point
    174         { "A\xF4\x8F\xBF\xBFZ", "A\xF4\x8F\xBF\xBFZ" },
    175 
    176         // Missing continuation byte
    177         { "A\xC2Z", "A?Z" },
    178         // Truncated multi-byte at buffer end
    179         { "A\xE2\x82", "A??" },
    180         // Continuation byte without leading byte (0x80-0xBF are never valid first bytes)
    181         { "A\x80Y\xBFZ", "A?Y?Z" },
    182 
    183         // 'A' (U+0041) normally encoded as 0x41
    184         // Overlong 2-byte encoding: 0xC1 0x81
    185         { "\xC1\x81X", "??X" },
    186 
    187         // ¢ (U+00A2) normally encoded as 0xC2 0xA2
    188         // Overlong 3-byte encoding: 0xE0 0x82 0xA2
    189         { "\xE0\x82\xA2X", "???X" },
    190 
    191         // ¢ (U+00A2) normally encoded as 0xC2 0xA2
    192         // Overlong 4-byte encoding: 0xF0 0x80 0x82 0xA2
    193         { "\xF0\x80\x82\xA2X", "????X" },
    194 
    195         // € (U+20AC) normally encoded as 0xE2 0x82 0xAC
    196         // Overlong 4-byte encoding: 0xF0 0x82 0x82 0xAC
    197         { "\xF0\x82\x82\xACX", "????X" },
    198 
    199         // Using 0xC0 0x80 as overlong encoding for NUL (which should be just 0x00)
    200         { "\xC0\x80X", "??X" },
    201 
    202         // 0xED 0xA0 0x80 encodes a surrogate half (U+D800), not allowed in UTF-8
    203         { "A\xED\xA0\x80Z", "A???Z" },
    204 
    205         // 0x110000 is not a legal code point
    206         { "A\xF4\x90\x80\x80Z", "A????Z" },
    207 
    208         // Mix of valid and invalid sequences
    209         { "A\xC2\xA9\xE2\x28\xA1\xF0\x9F\x98\x81\x80Z", "A©?(?😁?Z" },
    210 };
    211 
    212 static size_t count_diff(const char *a, const char *b, size_t n)
    213 {
    214         size_t count = 0;
    215 
    216         for (size_t i = 0; i < n; i++) {
    217                 if (a[i] != b[i])
    218                         count++;
    219         }
    220 
    221         return count;
    222 }
    223 
    224 PCUT_TEST(str_sanitize)
    225 {
    226         char replacement = '?';
    227         char buffer2[255];
    228 
    229         for (size_t i = 0; i < sizeof(sanitize_tests) / sizeof(sanitize_tests[0]); i++) {
    230                 const char *in = sanitize_tests[i].input;
    231                 const char *out = sanitize_tests[i].output;
    232                 size_t n = str_size(in) + 1;
    233                 assert(str_size(out) + 1 == n);
    234 
    235                 memcpy(buffer, in, n);
    236                 size_t replaced = str_sanitize(buffer, n, replacement);
    237                 if (memcmp(buffer, out, n) != 0) {
    238                         print_string_hex(buffer2, buffer, n);
    239                         print_string_hex(buffer, out, n);
    240                         PCUT_ASSERTION_FAILED("Expected %s, got %s", buffer, buffer2);
    241                 }
    242 
    243                 size_t expect_replaced = count_diff(buffer, in, n);
    244                 PCUT_ASSERT_INT_EQUALS(expect_replaced, replaced);
    245         }
    246 
    247         // Test with n smaller than string length - truncated valid encoding for €
    248         const char *in = "ABC€";
    249         const char *out = "ABC??\xAC";
    250         size_t n = str_size(in) + 1;
    251         memcpy(buffer, in, n);
    252         size_t replaced = str_sanitize(buffer, 5, replacement);
    253         if (memcmp(buffer, out, n) != 0) {
    254                 print_string_hex(buffer2, buffer, n);
    255                 print_string_hex(buffer, out, n);
    256                 PCUT_ASSERTION_FAILED("Expected %s, got %s", buffer, buffer2);
    257         }
    258 
    259         PCUT_ASSERT_INT_EQUALS(2, replaced);
    260 }
    261 
    262117PCUT_EXPORT(str);
Note: See TracChangeset for help on using the changeset viewer.