Context Navigation

source: mainline/uspace/lib/c/test/str.c

Visit:

Last change on this file was b31323f, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 2 months ago
Test, fix and extend string sanitization
Property mode set to `100644`
File size: 7.0 KB

Line
1	/*
2	* Copyright (c) 2015 Michal Koutny
3	* All rights reserved.
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions
7	* are met:
8	*
9	* - Redistributions of source code must retain the above copyright
10	* notice, this list of conditions and the following disclaimer.
11	* - Redistributions in binary form must reproduce the above copyright
12	* notice, this list of conditions and the following disclaimer in the
13	* documentation and/or other materials provided with the distribution.
14	* - The name of the author may not be used to endorse or promote products
15	* derived from this software without specific prior written permission.
16	*
17	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	#include "pcut/asserts.h"
30	#include <assert.h>
31	#include <stdint.h>
32	#include <stdio.h>
33	#include <str.h>
34	#include <pcut/pcut.h>
35
36	#define BUFFER_SIZE 256
37
38	#define SET_BUFFER(str) snprintf(buffer, BUFFER_SIZE, "%s", str)
39	#define EQ(expected, value) PCUT_ASSERT_STR_EQUALS(expected, value)
40
41	PCUT_INIT;
42
43	PCUT_TEST_SUITE(str);
44
45	static char buffer[BUFFER_SIZE];
46
47	PCUT_TEST_BEFORE
48	{
49	memset(buffer, 0, BUFFER_SIZE);
50	}
51
52	/* Helper to display string contents for debugging */
53	static void print_string_hex(char out, const char s, size_t len)
54	{
55	*out++ = '"';
56	for (size_t i = 0; i < len && s[i]; i++) {
57	if (s[i] >= 32 && s[i] <= 126)
58	*out++ = s[i];
59	else
60	out += snprintf(out, 5, "\\x%02x", (uint8_t) s[i]);
61	}
62	*out++ = '"';
63	*out++ = 0;
64	}
65
66	PCUT_TEST(rtrim)
67	{
68	SET_BUFFER("foobar");
69	str_rtrim(buffer, ' ');
70	EQ("foobar", buffer);
71
72	SET_BUFFER(" foobar ");
73	str_rtrim(buffer, ' ');
74	EQ(" foobar", buffer);
75
76	SET_BUFFER(" ššš ");
77	str_rtrim(buffer, ' ');
78	EQ(" ššš", buffer);
79
80	SET_BUFFER("ššAAAšš");
81	str_rtrim(buffer, L'š');
82	EQ("ššAAA", buffer);
83	}
84
85	PCUT_TEST(ltrim)
86	{
87	SET_BUFFER("foobar");
88	str_ltrim(buffer, ' ');
89	EQ("foobar", buffer);
90
91	SET_BUFFER(" foobar ");
92	str_ltrim(buffer, ' ');
93	EQ("foobar ", buffer);
94
95	SET_BUFFER(" ššš ");
96	str_ltrim(buffer, ' ');
97	EQ("ššš ", buffer);
98
99	SET_BUFFER("ššAAAšš");
100	str_ltrim(buffer, L'š');
101	EQ("AAAšš", buffer);
102	}
103
104	PCUT_TEST(str_str_found)
105	{
106	const char *hs = "abracadabra";
107	const char *n = "raca";
108	char *p;
109
110	p = str_str(hs, n);
111	PCUT_ASSERT_TRUE((const char *)p == hs + 2);
112	}
113
114	PCUT_TEST(str_str_not_found)
115	{
116	const char *hs = "abracadabra";
117	const char *n = "racab";
118	char *p;
119
120	p = str_str(hs, n);
121	PCUT_ASSERT_TRUE(p == NULL);
122	}
123
124	PCUT_TEST(str_str_empty_n)
125	{
126	const char *hs = "abracadabra";
127	const char *n = "";
128	char *p;
129
130	p = str_str(hs, n);
131	PCUT_ASSERT_TRUE((const char *)p == hs);
132	}
133
134	PCUT_TEST(str_non_shortest)
135	{
136	/* Overlong zero. */
137	const char overlong1[] = "\xC0\x80";
138	const char overlong2[] = "\xE0\x80\x80";
139	const char overlong3[] = "\xF0\x80\x80\x80";
140
141	const char overlong4[] = "\xC1\xBF";
142	const char overlong5[] = "\xE0\x9F\xBF";
143	const char overlong6[] = "\xF0\x8F\xBF\xBF";
144
145	size_t offset = 0;
146	PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong1, &offset, sizeof(overlong1)));
147	offset = 0;
148	PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong2, &offset, sizeof(overlong2)));
149	offset = 0;
150	PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong3, &offset, sizeof(overlong3)));
151	offset = 0;
152	PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong4, &offset, sizeof(overlong4)));
153	offset = 0;
154	PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong5, &offset, sizeof(overlong5)));
155	offset = 0;
156	PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong6, &offset, sizeof(overlong6)));
157	}
158
159	struct sanitize_test {
160	const char *input;
161	const char *output;
162	};
163
164	static const struct sanitize_test sanitize_tests[] = {
165	// Empty string
166	{ "", "" },
167	// ASCII only
168	{ "Hello, world!", "Hello, world!" },
169	// Valid multi-byte sequences
170	{ "Aπ你🐱", "Aπ你🐱" },
171	// U+D7FF is last valid before surrogates
172	{ "A\xED\x9F\xBFZ", "A\xED\x9F\xBFZ" },
173	// 0x10FFFF is the highest legal code point
174	{ "A\xF4\x8F\xBF\xBFZ", "A\xF4\x8F\xBF\xBFZ" },
175
176	// Missing continuation byte
177	{ "A\xC2Z", "A?Z" },
178	// Truncated multi-byte at buffer end
179	{ "A\xE2\x82", "A??" },
180	// Continuation byte without leading byte (0x80-0xBF are never valid first bytes)
181	{ "A\x80Y\xBFZ", "A?Y?Z" },
182
183	// 'A' (U+0041) normally encoded as 0x41
184	// Overlong 2-byte encoding: 0xC1 0x81
185	{ "\xC1\x81X", "??X" },
186
187	// ¢ (U+00A2) normally encoded as 0xC2 0xA2
188	// Overlong 3-byte encoding: 0xE0 0x82 0xA2
189	{ "\xE0\x82\xA2X", "???X" },
190
191	// ¢ (U+00A2) normally encoded as 0xC2 0xA2
192	// Overlong 4-byte encoding: 0xF0 0x80 0x82 0xA2
193	{ "\xF0\x80\x82\xA2X", "????X" },
194
195	// € (U+20AC) normally encoded as 0xE2 0x82 0xAC
196	// Overlong 4-byte encoding: 0xF0 0x82 0x82 0xAC
197	{ "\xF0\x82\x82\xACX", "????X" },
198
199	// Using 0xC0 0x80 as overlong encoding for NUL (which should be just 0x00)
200	{ "\xC0\x80X", "??X" },
201
202	// 0xED 0xA0 0x80 encodes a surrogate half (U+D800), not allowed in UTF-8
203	{ "A\xED\xA0\x80Z", "A???Z" },
204
205	// 0x110000 is not a legal code point
206	{ "A\xF4\x90\x80\x80Z", "A????Z" },
207
208	// Mix of valid and invalid sequences
209	{ "A\xC2\xA9\xE2\x28\xA1\xF0\x9F\x98\x81\x80Z", "A©?(?😁?Z" },
210	};
211
212	static size_t count_diff(const char a, const char b, size_t n)
213	{
214	size_t count = 0;
215
216	for (size_t i = 0; i < n; i++) {
217	if (a[i] != b[i])
218	count++;
219	}
220
221	return count;
222	}
223
224	PCUT_TEST(str_sanitize)
225	{
226	char replacement = '?';
227	char buffer2[255];
228
229	for (size_t i = 0; i < sizeof(sanitize_tests) / sizeof(sanitize_tests[0]); i++) {
230	const char *in = sanitize_tests[i].input;
231	const char *out = sanitize_tests[i].output;
232	size_t n = str_size(in) + 1;
233	assert(str_size(out) + 1 == n);
234
235	memcpy(buffer, in, n);
236	size_t replaced = str_sanitize(buffer, n, replacement);
237	if (memcmp(buffer, out, n) != 0) {
238	print_string_hex(buffer2, buffer, n);
239	print_string_hex(buffer, out, n);
240	PCUT_ASSERTION_FAILED("Expected %s, got %s", buffer, buffer2);
241	}
242
243	size_t expect_replaced = count_diff(buffer, in, n);
244	PCUT_ASSERT_INT_EQUALS(expect_replaced, replaced);
245	}
246
247	// Test with n smaller than string length - truncated valid encoding for €
248	const char *in = "ABC€";
249	const char *out = "ABC??\xAC";
250	size_t n = str_size(in) + 1;
251	memcpy(buffer, in, n);
252	size_t replaced = str_sanitize(buffer, 5, replacement);
253	if (memcmp(buffer, out, n) != 0) {
254	print_string_hex(buffer2, buffer, n);
255	print_string_hex(buffer, out, n);
256	PCUT_ASSERTION_FAILED("Expected %s, got %s", buffer, buffer2);
257	}
258
259	PCUT_ASSERT_INT_EQUALS(2, replaced);
260	}
261
262	PCUT_EXPORT(str);

Note: See TracBrowser for help on using the repository browser.

Download in other formats: