source: mainline/uspace/lib/c/test/str.c@ b31323f

Last change on this file since b31323f was b31323f, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 2 months ago

Test, fix and extend string sanitization

  • Property mode set to 100644
File size: 7.0 KB
RevLine 
[a18a8b9]1/*
2 * Copyright (c) 2015 Michal Koutny
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
[0600976]29#include "pcut/asserts.h"
[b31323f]30#include <assert.h>
31#include <stdint.h>
[a18a8b9]32#include <stdio.h>
33#include <str.h>
34#include <pcut/pcut.h>
35
36#define BUFFER_SIZE 256
37
38#define SET_BUFFER(str) snprintf(buffer, BUFFER_SIZE, "%s", str)
39#define EQ(expected, value) PCUT_ASSERT_STR_EQUALS(expected, value)
40
[3f932a7e]41PCUT_INIT;
[a18a8b9]42
43PCUT_TEST_SUITE(str);
44
45static char buffer[BUFFER_SIZE];
46
[3bacee1]47PCUT_TEST_BEFORE
48{
[a18a8b9]49 memset(buffer, 0, BUFFER_SIZE);
50}
51
[b31323f]52/* Helper to display string contents for debugging */
53static void print_string_hex(char *out, const char *s, size_t len)
54{
55 *out++ = '"';
56 for (size_t i = 0; i < len && s[i]; i++) {
57 if (s[i] >= 32 && s[i] <= 126)
58 *out++ = s[i];
59 else
60 out += snprintf(out, 5, "\\x%02x", (uint8_t) s[i]);
61 }
62 *out++ = '"';
63 *out++ = 0;
64}
65
[3bacee1]66PCUT_TEST(rtrim)
67{
[a18a8b9]68 SET_BUFFER("foobar");
69 str_rtrim(buffer, ' ');
70 EQ("foobar", buffer);
71
72 SET_BUFFER(" foobar ");
73 str_rtrim(buffer, ' ');
74 EQ(" foobar", buffer);
75
76 SET_BUFFER(" ššš ");
77 str_rtrim(buffer, ' ');
78 EQ(" ššš", buffer);
79
80 SET_BUFFER("ššAAAšš");
81 str_rtrim(buffer, L'š');
82 EQ("ššAAA", buffer);
83}
84
[3bacee1]85PCUT_TEST(ltrim)
86{
[a18a8b9]87 SET_BUFFER("foobar");
88 str_ltrim(buffer, ' ');
89 EQ("foobar", buffer);
90
91 SET_BUFFER(" foobar ");
92 str_ltrim(buffer, ' ');
93 EQ("foobar ", buffer);
94
95 SET_BUFFER(" ššš ");
96 str_ltrim(buffer, ' ');
97 EQ("ššš ", buffer);
98
99 SET_BUFFER("ššAAAšš");
100 str_ltrim(buffer, L'š');
101 EQ("AAAšš", buffer);
102}
103
[da680b4b]104PCUT_TEST(str_str_found)
105{
106 const char *hs = "abracadabra";
107 const char *n = "raca";
108 char *p;
109
110 p = str_str(hs, n);
111 PCUT_ASSERT_TRUE((const char *)p == hs + 2);
112}
113
114PCUT_TEST(str_str_not_found)
115{
116 const char *hs = "abracadabra";
117 const char *n = "racab";
118 char *p;
119
120 p = str_str(hs, n);
121 PCUT_ASSERT_TRUE(p == NULL);
122}
123
124PCUT_TEST(str_str_empty_n)
125{
126 const char *hs = "abracadabra";
127 const char *n = "";
128 char *p;
129
130 p = str_str(hs, n);
131 PCUT_ASSERT_TRUE((const char *)p == hs);
132}
[a18a8b9]133
[0600976]134PCUT_TEST(str_non_shortest)
135{
136 /* Overlong zero. */
[b31323f]137 const char overlong1[] = "\xC0\x80";
138 const char overlong2[] = "\xE0\x80\x80";
139 const char overlong3[] = "\xF0\x80\x80\x80";
[0600976]140
[b31323f]141 const char overlong4[] = "\xC1\xBF";
142 const char overlong5[] = "\xE0\x9F\xBF";
143 const char overlong6[] = "\xF0\x8F\xBF\xBF";
[0600976]144
145 size_t offset = 0;
146 PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong1, &offset, sizeof(overlong1)));
147 offset = 0;
148 PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong2, &offset, sizeof(overlong2)));
149 offset = 0;
150 PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong3, &offset, sizeof(overlong3)));
151 offset = 0;
152 PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong4, &offset, sizeof(overlong4)));
153 offset = 0;
154 PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong5, &offset, sizeof(overlong5)));
155 offset = 0;
156 PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong6, &offset, sizeof(overlong6)));
[b31323f]157}
158
159struct sanitize_test {
160 const char *input;
161 const char *output;
162};
163
164static const struct sanitize_test sanitize_tests[] = {
165 // Empty string
166 { "", "" },
167 // ASCII only
168 { "Hello, world!", "Hello, world!" },
169 // Valid multi-byte sequences
170 { "Aπ你🐱", "Aπ你🐱" },
171 // U+D7FF is last valid before surrogates
172 { "A\xED\x9F\xBFZ", "A\xED\x9F\xBFZ" },
173 // 0x10FFFF is the highest legal code point
174 { "A\xF4\x8F\xBF\xBFZ", "A\xF4\x8F\xBF\xBFZ" },
175
176 // Missing continuation byte
177 { "A\xC2Z", "A?Z" },
178 // Truncated multi-byte at buffer end
179 { "A\xE2\x82", "A??" },
180 // Continuation byte without leading byte (0x80-0xBF are never valid first bytes)
181 { "A\x80Y\xBFZ", "A?Y?Z" },
182
183 // 'A' (U+0041) normally encoded as 0x41
184 // Overlong 2-byte encoding: 0xC1 0x81
185 { "\xC1\x81X", "??X" },
186
187 // ¢ (U+00A2) normally encoded as 0xC2 0xA2
188 // Overlong 3-byte encoding: 0xE0 0x82 0xA2
189 { "\xE0\x82\xA2X", "???X" },
190
191 // ¢ (U+00A2) normally encoded as 0xC2 0xA2
192 // Overlong 4-byte encoding: 0xF0 0x80 0x82 0xA2
193 { "\xF0\x80\x82\xA2X", "????X" },
194
195 // € (U+20AC) normally encoded as 0xE2 0x82 0xAC
196 // Overlong 4-byte encoding: 0xF0 0x82 0x82 0xAC
197 { "\xF0\x82\x82\xACX", "????X" },
198
199 // Using 0xC0 0x80 as overlong encoding for NUL (which should be just 0x00)
200 { "\xC0\x80X", "??X" },
201
202 // 0xED 0xA0 0x80 encodes a surrogate half (U+D800), not allowed in UTF-8
203 { "A\xED\xA0\x80Z", "A???Z" },
204
205 // 0x110000 is not a legal code point
206 { "A\xF4\x90\x80\x80Z", "A????Z" },
207
208 // Mix of valid and invalid sequences
209 { "A\xC2\xA9\xE2\x28\xA1\xF0\x9F\x98\x81\x80Z", "A©?(?😁?Z" },
210};
211
212static size_t count_diff(const char *a, const char *b, size_t n)
213{
214 size_t count = 0;
215
216 for (size_t i = 0; i < n; i++) {
217 if (a[i] != b[i])
218 count++;
219 }
220
221 return count;
222}
223
224PCUT_TEST(str_sanitize)
225{
226 char replacement = '?';
227 char buffer2[255];
228
229 for (size_t i = 0; i < sizeof(sanitize_tests) / sizeof(sanitize_tests[0]); i++) {
230 const char *in = sanitize_tests[i].input;
231 const char *out = sanitize_tests[i].output;
232 size_t n = str_size(in) + 1;
233 assert(str_size(out) + 1 == n);
234
235 memcpy(buffer, in, n);
236 size_t replaced = str_sanitize(buffer, n, replacement);
237 if (memcmp(buffer, out, n) != 0) {
238 print_string_hex(buffer2, buffer, n);
239 print_string_hex(buffer, out, n);
240 PCUT_ASSERTION_FAILED("Expected %s, got %s", buffer, buffer2);
241 }
242
243 size_t expect_replaced = count_diff(buffer, in, n);
244 PCUT_ASSERT_INT_EQUALS(expect_replaced, replaced);
245 }
246
247 // Test with n smaller than string length - truncated valid encoding for €
248 const char *in = "ABC€";
249 const char *out = "ABC??\xAC";
250 size_t n = str_size(in) + 1;
251 memcpy(buffer, in, n);
252 size_t replaced = str_sanitize(buffer, 5, replacement);
253 if (memcmp(buffer, out, n) != 0) {
254 print_string_hex(buffer2, buffer, n);
255 print_string_hex(buffer, out, n);
256 PCUT_ASSERTION_FAILED("Expected %s, got %s", buffer, buffer2);
257 }
[0600976]258
[b31323f]259 PCUT_ASSERT_INT_EQUALS(2, replaced);
[0600976]260}
261
[a18a8b9]262PCUT_EXPORT(str);
Note: See TracBrowser for help on using the repository browser.