Context Navigation

source: mainline/uspace/lib/c/test/str.c@ b31323f

Visit:

Last change on this file since b31323f was b31323f, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 2 months ago
Test, fix and extend string sanitization
Property mode set to `100644`
File size: 7.0 KB

Rev	Line
[a18a8b9]	1	/*
	2	* Copyright (c) 2015 Michal Koutny
	3	* All rights reserved.
	4	*
	5	* Redistribution and use in source and binary forms, with or without
	6	* modification, are permitted provided that the following conditions
	7	* are met:
	8	*
	9	* - Redistributions of source code must retain the above copyright
	10	* notice, this list of conditions and the following disclaimer.
	11	* - Redistributions in binary form must reproduce the above copyright
	12	* notice, this list of conditions and the following disclaimer in the
	13	* documentation and/or other materials provided with the distribution.
	14	* - The name of the author may not be used to endorse or promote products
	15	* derived from this software without specific prior written permission.
	16	*
	17	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
	18	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
	19	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
	20	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
	21	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
	22	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	23	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	24	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	25	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
	26	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	27	*/
	28
[0600976]	29	#include "pcut/asserts.h"
[b31323f]	30	#include <assert.h>
	31	#include <stdint.h>
[a18a8b9]	32	#include <stdio.h>
	33	#include <str.h>
	34	#include <pcut/pcut.h>
	35
	36	#define BUFFER_SIZE 256
	37
	38	#define SET_BUFFER(str) snprintf(buffer, BUFFER_SIZE, "%s", str)
	39	#define EQ(expected, value) PCUT_ASSERT_STR_EQUALS(expected, value)
	40
[3f932a7e]	41	PCUT_INIT;
[a18a8b9]	42
	43	PCUT_TEST_SUITE(str);
	44
	45	static char buffer[BUFFER_SIZE];
	46
[3bacee1]	47	PCUT_TEST_BEFORE
	48	{
[a18a8b9]	49	memset(buffer, 0, BUFFER_SIZE);
	50	}
	51
[b31323f]	52	/* Helper to display string contents for debugging */
	53	static void print_string_hex(char out, const char s, size_t len)
	54	{
	55	*out++ = '"';
	56	for (size_t i = 0; i < len && s[i]; i++) {
	57	if (s[i] >= 32 && s[i] <= 126)
	58	*out++ = s[i];
	59	else
	60	out += snprintf(out, 5, "\\x%02x", (uint8_t) s[i]);
	61	}
	62	*out++ = '"';
	63	*out++ = 0;
	64	}
	65
[3bacee1]	66	PCUT_TEST(rtrim)
	67	{
[a18a8b9]	68	SET_BUFFER("foobar");
	69	str_rtrim(buffer, ' ');
	70	EQ("foobar", buffer);
	71
	72	SET_BUFFER(" foobar ");
	73	str_rtrim(buffer, ' ');
	74	EQ(" foobar", buffer);
	75
	76	SET_BUFFER(" ššš ");
	77	str_rtrim(buffer, ' ');
	78	EQ(" ššš", buffer);
	79
	80	SET_BUFFER("ššAAAšš");
	81	str_rtrim(buffer, L'š');
	82	EQ("ššAAA", buffer);
	83	}
	84
[3bacee1]	85	PCUT_TEST(ltrim)
	86	{
[a18a8b9]	87	SET_BUFFER("foobar");
	88	str_ltrim(buffer, ' ');
	89	EQ("foobar", buffer);
	90
	91	SET_BUFFER(" foobar ");
	92	str_ltrim(buffer, ' ');
	93	EQ("foobar ", buffer);
	94
	95	SET_BUFFER(" ššš ");
	96	str_ltrim(buffer, ' ');
	97	EQ("ššš ", buffer);
	98
	99	SET_BUFFER("ššAAAšš");
	100	str_ltrim(buffer, L'š');
	101	EQ("AAAšš", buffer);
	102	}
	103
[da680b4b]	104	PCUT_TEST(str_str_found)
	105	{
	106	const char *hs = "abracadabra";
	107	const char *n = "raca";
	108	char *p;
	109
	110	p = str_str(hs, n);
	111	PCUT_ASSERT_TRUE((const char *)p == hs + 2);
	112	}
	113
	114	PCUT_TEST(str_str_not_found)
	115	{
	116	const char *hs = "abracadabra";
	117	const char *n = "racab";
	118	char *p;
	119
	120	p = str_str(hs, n);
	121	PCUT_ASSERT_TRUE(p == NULL);
	122	}
	123
	124	PCUT_TEST(str_str_empty_n)
	125	{
	126	const char *hs = "abracadabra";
	127	const char *n = "";
	128	char *p;
	129
	130	p = str_str(hs, n);
	131	PCUT_ASSERT_TRUE((const char *)p == hs);
	132	}
[a18a8b9]	133
[0600976]	134	PCUT_TEST(str_non_shortest)
	135	{
	136	/* Overlong zero. */
[b31323f]	137	const char overlong1[] = "\xC0\x80";
	138	const char overlong2[] = "\xE0\x80\x80";
	139	const char overlong3[] = "\xF0\x80\x80\x80";
[0600976]	140
[b31323f]	141	const char overlong4[] = "\xC1\xBF";
	142	const char overlong5[] = "\xE0\x9F\xBF";
	143	const char overlong6[] = "\xF0\x8F\xBF\xBF";
[0600976]	144
	145	size_t offset = 0;
	146	PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong1, &offset, sizeof(overlong1)));
	147	offset = 0;
	148	PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong2, &offset, sizeof(overlong2)));
	149	offset = 0;
	150	PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong3, &offset, sizeof(overlong3)));
	151	offset = 0;
	152	PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong4, &offset, sizeof(overlong4)));
	153	offset = 0;
	154	PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong5, &offset, sizeof(overlong5)));
	155	offset = 0;
	156	PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong6, &offset, sizeof(overlong6)));
[b31323f]	157	}
	158
	159	struct sanitize_test {
	160	const char *input;
	161	const char *output;
	162	};
	163
	164	static const struct sanitize_test sanitize_tests[] = {
	165	// Empty string
	166	{ "", "" },
	167	// ASCII only
	168	{ "Hello, world!", "Hello, world!" },
	169	// Valid multi-byte sequences
	170	{ "Aπ你🐱", "Aπ你🐱" },
	171	// U+D7FF is last valid before surrogates
	172	{ "A\xED\x9F\xBFZ", "A\xED\x9F\xBFZ" },
	173	// 0x10FFFF is the highest legal code point
	174	{ "A\xF4\x8F\xBF\xBFZ", "A\xF4\x8F\xBF\xBFZ" },
	175
	176	// Missing continuation byte
	177	{ "A\xC2Z", "A?Z" },
	178	// Truncated multi-byte at buffer end
	179	{ "A\xE2\x82", "A??" },
	180	// Continuation byte without leading byte (0x80-0xBF are never valid first bytes)
	181	{ "A\x80Y\xBFZ", "A?Y?Z" },
	182
	183	// 'A' (U+0041) normally encoded as 0x41
	184	// Overlong 2-byte encoding: 0xC1 0x81
	185	{ "\xC1\x81X", "??X" },
	186
	187	// ¢ (U+00A2) normally encoded as 0xC2 0xA2
	188	// Overlong 3-byte encoding: 0xE0 0x82 0xA2
	189	{ "\xE0\x82\xA2X", "???X" },
	190
	191	// ¢ (U+00A2) normally encoded as 0xC2 0xA2
	192	// Overlong 4-byte encoding: 0xF0 0x80 0x82 0xA2
	193	{ "\xF0\x80\x82\xA2X", "????X" },
	194
	195	// € (U+20AC) normally encoded as 0xE2 0x82 0xAC
	196	// Overlong 4-byte encoding: 0xF0 0x82 0x82 0xAC
	197	{ "\xF0\x82\x82\xACX", "????X" },
	198
	199	// Using 0xC0 0x80 as overlong encoding for NUL (which should be just 0x00)
	200	{ "\xC0\x80X", "??X" },
	201
	202	// 0xED 0xA0 0x80 encodes a surrogate half (U+D800), not allowed in UTF-8
	203	{ "A\xED\xA0\x80Z", "A???Z" },
	204
	205	// 0x110000 is not a legal code point
	206	{ "A\xF4\x90\x80\x80Z", "A????Z" },
	207
	208	// Mix of valid and invalid sequences
	209	{ "A\xC2\xA9\xE2\x28\xA1\xF0\x9F\x98\x81\x80Z", "A©?(?😁?Z" },
	210	};
	211
	212	static size_t count_diff(const char a, const char b, size_t n)
	213	{
	214	size_t count = 0;
	215
	216	for (size_t i = 0; i < n; i++) {
	217	if (a[i] != b[i])
	218	count++;
	219	}
	220
	221	return count;
	222	}
	223
	224	PCUT_TEST(str_sanitize)
	225	{
	226	char replacement = '?';
	227	char buffer2[255];
	228
	229	for (size_t i = 0; i < sizeof(sanitize_tests) / sizeof(sanitize_tests[0]); i++) {
	230	const char *in = sanitize_tests[i].input;
	231	const char *out = sanitize_tests[i].output;
	232	size_t n = str_size(in) + 1;
	233	assert(str_size(out) + 1 == n);
	234
	235	memcpy(buffer, in, n);
	236	size_t replaced = str_sanitize(buffer, n, replacement);
	237	if (memcmp(buffer, out, n) != 0) {
	238	print_string_hex(buffer2, buffer, n);
	239	print_string_hex(buffer, out, n);
	240	PCUT_ASSERTION_FAILED("Expected %s, got %s", buffer, buffer2);
	241	}
	242
	243	size_t expect_replaced = count_diff(buffer, in, n);
	244	PCUT_ASSERT_INT_EQUALS(expect_replaced, replaced);
	245	}
	246
	247	// Test with n smaller than string length - truncated valid encoding for €
	248	const char *in = "ABC€";
	249	const char *out = "ABC??\xAC";
	250	size_t n = str_size(in) + 1;
	251	memcpy(buffer, in, n);
	252	size_t replaced = str_sanitize(buffer, 5, replacement);
	253	if (memcmp(buffer, out, n) != 0) {
	254	print_string_hex(buffer2, buffer, n);
	255	print_string_hex(buffer, out, n);
	256	PCUT_ASSERTION_FAILED("Expected %s, got %s", buffer, buffer2);
	257	}
[0600976]	258
[b31323f]	259	PCUT_ASSERT_INT_EQUALS(2, replaced);
[0600976]	260	}
	261
[a18a8b9]	262	PCUT_EXPORT(str);

Note: See TracBrowser for help on using the repository browser.

Download in other formats: