Changes in uspace/lib/c/test/str.c [b31323f:09ab0a9a] in mainline
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
uspace/lib/c/test/str.c
rb31323f r09ab0a9a 27 27 */ 28 28 29 #include "pcut/asserts.h"30 #include <assert.h>31 #include <stdint.h>32 29 #include <stdio.h> 33 30 #include <str.h> … … 48 45 { 49 46 memset(buffer, 0, BUFFER_SIZE); 50 }51 52 /* Helper to display string contents for debugging */53 static void print_string_hex(char *out, const char *s, size_t len)54 {55 *out++ = '"';56 for (size_t i = 0; i < len && s[i]; i++) {57 if (s[i] >= 32 && s[i] <= 126)58 *out++ = s[i];59 else60 out += snprintf(out, 5, "\\x%02x", (uint8_t) s[i]);61 }62 *out++ = '"';63 *out++ = 0;64 47 } 65 48 … … 132 115 } 133 116 134 PCUT_TEST(str_non_shortest)135 {136 /* Overlong zero. */137 const char overlong1[] = "\xC0\x80";138 const char overlong2[] = "\xE0\x80\x80";139 const char overlong3[] = "\xF0\x80\x80\x80";140 141 const char overlong4[] = "\xC1\xBF";142 const char overlong5[] = "\xE0\x9F\xBF";143 const char overlong6[] = "\xF0\x8F\xBF\xBF";144 145 size_t offset = 0;146 PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong1, &offset, sizeof(overlong1)));147 offset = 0;148 PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong2, &offset, sizeof(overlong2)));149 offset = 0;150 PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong3, &offset, sizeof(overlong3)));151 offset = 0;152 PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong4, &offset, sizeof(overlong4)));153 offset = 0;154 PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong5, &offset, sizeof(overlong5)));155 offset = 0;156 PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong6, &offset, sizeof(overlong6)));157 }158 159 struct sanitize_test {160 const char *input;161 const char *output;162 };163 164 static const struct sanitize_test sanitize_tests[] = {165 // Empty string166 { "", "" },167 // ASCII only168 { "Hello, world!", "Hello, world!" },169 // Valid multi-byte sequences170 { "Aπ你🐱", "Aπ你🐱" },171 // U+D7FF is last valid before surrogates172 { "A\xED\x9F\xBFZ", "A\xED\x9F\xBFZ" },173 // 0x10FFFF is the highest legal code point174 { "A\xF4\x8F\xBF\xBFZ", "A\xF4\x8F\xBF\xBFZ" },175 176 // Missing continuation byte177 { "A\xC2Z", "A?Z" },178 // Truncated multi-byte at buffer end179 { "A\xE2\x82", "A??" },180 // Continuation byte without leading byte (0x80-0xBF are never valid first bytes)181 { "A\x80Y\xBFZ", "A?Y?Z" },182 183 // 'A' (U+0041) normally encoded as 0x41184 // Overlong 2-byte encoding: 0xC1 0x81185 { "\xC1\x81X", "??X" },186 187 // ¢ (U+00A2) normally encoded as 0xC2 0xA2188 // Overlong 3-byte encoding: 0xE0 0x82 0xA2189 { "\xE0\x82\xA2X", "???X" },190 191 // ¢ (U+00A2) normally encoded as 0xC2 0xA2192 // Overlong 4-byte encoding: 0xF0 0x80 0x82 0xA2193 { "\xF0\x80\x82\xA2X", "????X" },194 195 // € (U+20AC) normally encoded as 0xE2 0x82 0xAC196 // Overlong 4-byte encoding: 0xF0 0x82 0x82 0xAC197 { "\xF0\x82\x82\xACX", "????X" },198 199 // Using 0xC0 0x80 as overlong encoding for NUL (which should be just 0x00)200 { "\xC0\x80X", "??X" },201 202 // 0xED 0xA0 0x80 encodes a surrogate half (U+D800), not allowed in UTF-8203 { "A\xED\xA0\x80Z", "A???Z" },204 205 // 0x110000 is not a legal code point206 { "A\xF4\x90\x80\x80Z", "A????Z" },207 208 // Mix of valid and invalid sequences209 { "A\xC2\xA9\xE2\x28\xA1\xF0\x9F\x98\x81\x80Z", "A©?(?😁?Z" },210 };211 212 static size_t count_diff(const char *a, const char *b, size_t n)213 {214 size_t count = 0;215 216 for (size_t i = 0; i < n; i++) {217 if (a[i] != b[i])218 count++;219 }220 221 return count;222 }223 224 PCUT_TEST(str_sanitize)225 {226 char replacement = '?';227 char buffer2[255];228 229 for (size_t i = 0; i < sizeof(sanitize_tests) / sizeof(sanitize_tests[0]); i++) {230 const char *in = sanitize_tests[i].input;231 const char *out = sanitize_tests[i].output;232 size_t n = str_size(in) + 1;233 assert(str_size(out) + 1 == n);234 235 memcpy(buffer, in, n);236 size_t replaced = str_sanitize(buffer, n, replacement);237 if (memcmp(buffer, out, n) != 0) {238 print_string_hex(buffer2, buffer, n);239 print_string_hex(buffer, out, n);240 PCUT_ASSERTION_FAILED("Expected %s, got %s", buffer, buffer2);241 }242 243 size_t expect_replaced = count_diff(buffer, in, n);244 PCUT_ASSERT_INT_EQUALS(expect_replaced, replaced);245 }246 247 // Test with n smaller than string length - truncated valid encoding for €248 const char *in = "ABC€";249 const char *out = "ABC??\xAC";250 size_t n = str_size(in) + 1;251 memcpy(buffer, in, n);252 size_t replaced = str_sanitize(buffer, 5, replacement);253 if (memcmp(buffer, out, n) != 0) {254 print_string_hex(buffer2, buffer, n);255 print_string_hex(buffer, out, n);256 PCUT_ASSERTION_FAILED("Expected %s, got %s", buffer, buffer2);257 }258 259 PCUT_ASSERT_INT_EQUALS(2, replaced);260 }261 262 117 PCUT_EXPORT(str);
Note:
See TracChangeset
for help on using the changeset viewer.