Index: common/include/str.h
===================================================================
--- common/include/str.h	(revision 65bf08437fcd848d357a9e3d4004b178ef9adafd)
+++ common/include/str.h	(revision b31323f46d9857fd4d01382a6541e53d03de5c11)
@@ -162,4 +162,6 @@
 extern void bin_order_suffix(const uint64_t, uint64_t *, const char **, bool);
 
+extern size_t str_sanitize(char *str, size_t n, uint8_t replacement);
+
 /*
  * TODO: Get rid of this.
Index: common/str.c
===================================================================
--- common/str.c	(revision 65bf08437fcd848d357a9e3d4004b178ef9adafd)
+++ common/str.c	(revision b31323f46d9857fd4d01382a6541e53d03de5c11)
@@ -234,4 +234,9 @@
 }
 
+static bool _is_surrogate(const mbstate_t *mb, uint8_t b)
+{
+	return (mb->state == 0b1111110000001101 && b >= 0xa0);
+}
+
 #define _likely(expr) __builtin_expect((expr), true)
 #define _unlikely(expr) __builtin_expect((expr), false)
@@ -299,4 +304,8 @@
 					return CHAR_INVALID;
 
+				/* Reject surrogates */
+				if (_unlikely(ch >= 0xD800 && ch < 0xE000))
+					return CHAR_INVALID;
+
 				return ch;
 			}
@@ -323,4 +332,8 @@
 					return CHAR_INVALID;
 
+				/* Reject out-of-range characters. */
+				if (_unlikely(ch >= 0x110000))
+					return CHAR_INVALID;
+
 				return ch;
 			}
@@ -339,5 +352,5 @@
 		uint8_t b = s[*offset];
 
-		if (!_is_continuation(b) || _is_non_shortest(mb, b)) {
+		if (!_is_continuation(b) || _is_non_shortest(mb, b) || _is_surrogate(mb, b)) {
 			mb->state = 0;
 			return CHAR_INVALID;
@@ -523,10 +536,11 @@
 }
 
-/* Convert in place any bytes that don't form a valid character into U_SPECIAL. */
-static void _sanitize_string(char *str, size_t n)
+/* Convert in place any bytes that don't form a valid character into replacement. */
+static size_t _str_sanitize(char *str, size_t n, uint8_t replacement)
 {
 	uint8_t *b = (uint8_t *) str;
-
-	for (; *b && n > 0; b++, n--) {
+	size_t count = 0;
+
+	for (; n > 0 && b[0]; b++, n--) {
 		int cont = _continuation_bytes(b[0]);
 		if (__builtin_expect(cont, 0) == 0)
@@ -534,14 +548,22 @@
 
 		if (cont < 0 || n <= (size_t) cont) {
-			b[0] = U_SPECIAL;
+			b[0] = replacement;
+			count++;
 			continue;
 		}
 
 		/* Check continuation bytes. */
+		bool valid = true;
 		for (int i = 1; i <= cont; i++) {
 			if (!_is_continuation(b[i])) {
-				b[0] = U_SPECIAL;
-				continue;
+				valid = false;
+				break;
 			}
+		}
+
+		if (!valid) {
+			b[0] = replacement;
+			count++;
+			continue;
 		}
 
@@ -551,25 +573,49 @@
 		 */
 
-		switch (cont) {
-		case 1:
-			/* 0b110!!!!x 0b10xxxxxx */
-			if (!(b[0] & 0b00011110))
-				b[0] = U_SPECIAL;
-
-			continue;
-		case 2:
-			/* 0b1110!!!! 0b10!xxxxx 0b10xxxxxx */
-			if (!(b[0] & 0b00001111) && !(b[1] & 0b00100000))
-				b[0] = U_SPECIAL;
-
-			continue;
-		case 3:
-			/* 0b11110!!! 0b10!!xxxx 0b10xxxxxx 0b10xxxxxx */
-			if (!(b[0] & 0b00000111) && !(b[1] & 0b00110000))
-				b[0] = U_SPECIAL;
-
+		/* 0b110!!!!x 0b10xxxxxx */
+		if (cont == 1 && !(b[0] & 0b00011110)) {
+			b[0] = replacement;
+			count++;
 			continue;
 		}
-	}
+
+		/* 0b1110!!!! 0b10!xxxxx 0b10xxxxxx */
+		if (cont == 2 && !(b[0] & 0b00001111) && !(b[1] & 0b00100000)) {
+			b[0] = replacement;
+			count++;
+			continue;
+		}
+
+		/* 0b11110!!! 0b10!!xxxx 0b10xxxxxx 0b10xxxxxx */
+		if (cont == 3 && !(b[0] & 0b00000111) && !(b[1] & 0b00110000)) {
+			b[0] = replacement;
+			count++;
+			continue;
+		}
+
+		/* Check for surrogate character encoding. */
+		if (cont == 2 && b[0] == 0xED && b[1] >= 0xA0) {
+			b[0] = replacement;
+			count++;
+			continue;
+		}
+
+		/* Check for out-of-range code points. */
+		if (cont == 3 && (b[0] > 0xF4 || (b[0] == 0xF4 && b[1] >= 0x90))) {
+			b[0] = replacement;
+			count++;
+			continue;
+		}
+
+		b += cont;
+		n -= cont;
+	}
+
+	return count;
+}
+
+size_t str_sanitize(char *str, size_t n, uint8_t replacement)
+{
+	return _str_sanitize(str, n, replacement);
 }
 
@@ -1130,5 +1176,5 @@
 
 	/* In-place translate invalid bytes to U_SPECIAL. */
-	_sanitize_string(dest, size);
+	_str_sanitize(dest, size, U_SPECIAL);
 }
 
@@ -1159,5 +1205,5 @@
 
 	/* In-place translate invalid bytes to U_SPECIAL. */
-	_sanitize_string(dest, size);
+	_str_sanitize(dest, size, U_SPECIAL);
 }
 
@@ -1183,5 +1229,5 @@
 	if (dstr_size < size) {
 		_str_cpyn(dest + dstr_size, size - dstr_size, src);
-		_sanitize_string(dest + dstr_size, size - dstr_size);
+		_str_sanitize(dest + dstr_size, size - dstr_size, U_SPECIAL);
 	}
 }
@@ -1762,5 +1808,5 @@
 
 	memcpy(dest, src, size);
-	_sanitize_string(dest, size);
+	_str_sanitize(dest, size, U_SPECIAL);
 	return dest;
 }
@@ -1795,5 +1841,5 @@
 
 	memcpy(dest, src, size);
-	_sanitize_string(dest, size);
+	_str_sanitize(dest, size, U_SPECIAL);
 	dest[size] = 0;
 	return dest;
Index: uspace/lib/c/test/str.c
===================================================================
--- uspace/lib/c/test/str.c	(revision 65bf08437fcd848d357a9e3d4004b178ef9adafd)
+++ uspace/lib/c/test/str.c	(revision b31323f46d9857fd4d01382a6541e53d03de5c11)
@@ -28,4 +28,6 @@
 
 #include "pcut/asserts.h"
+#include <assert.h>
+#include <stdint.h>
 #include <stdio.h>
 #include <str.h>
@@ -48,4 +50,18 @@
 }
 
+/* Helper to display string contents for debugging */
+static void print_string_hex(char *out, const char *s, size_t len)
+{
+	*out++ = '"';
+	for (size_t i = 0; i < len && s[i]; i++) {
+		if (s[i] >= 32 && s[i] <= 126)
+			*out++ = s[i];
+		else
+			out += snprintf(out, 5, "\\x%02x", (uint8_t) s[i]);
+	}
+	*out++ = '"';
+	*out++ = 0;
+}
+
 PCUT_TEST(rtrim)
 {
@@ -119,11 +135,11 @@
 {
 	/* Overlong zero. */
-	const char overlong1[] = { 0b11000000, 0b10000000, 0 };
-	const char overlong2[] = { 0b11100000, 0b10000000, 0 };
-	const char overlong3[] = { 0b11110000, 0b10000000, 0 };
-
-	const char overlong4[] = { 0b11000001, 0b10111111, 0 };
-	const char overlong5[] = { 0b11100000, 0b10011111, 0b10111111, 0 };
-	const char overlong6[] = { 0b11110000, 0b10001111, 0b10111111, 0b10111111, 0 };
+	const char overlong1[] = "\xC0\x80";
+	const char overlong2[] = "\xE0\x80\x80";
+	const char overlong3[] = "\xF0\x80\x80\x80";
+
+	const char overlong4[] = "\xC1\xBF";
+	const char overlong5[] = "\xE0\x9F\xBF";
+	const char overlong6[] = "\xF0\x8F\xBF\xBF";
 
 	size_t offset = 0;
@@ -139,18 +155,107 @@
 	offset = 0;
 	PCUT_ASSERT_INT_EQUALS(U_SPECIAL, str_decode(overlong6, &offset, sizeof(overlong6)));
-
-	char sanitized[sizeof(overlong6)];
-	str_cpy(sanitized, STR_NO_LIMIT, overlong1);
-	PCUT_ASSERT_INT_EQUALS(U_SPECIAL, sanitized[0]);
-	str_cpy(sanitized, STR_NO_LIMIT, overlong2);
-	PCUT_ASSERT_INT_EQUALS(U_SPECIAL, sanitized[0]);
-	str_cpy(sanitized, STR_NO_LIMIT, overlong3);
-	PCUT_ASSERT_INT_EQUALS(U_SPECIAL, sanitized[0]);
-	str_cpy(sanitized, STR_NO_LIMIT, overlong4);
-	PCUT_ASSERT_INT_EQUALS(U_SPECIAL, sanitized[0]);
-	str_cpy(sanitized, STR_NO_LIMIT, overlong5);
-	PCUT_ASSERT_INT_EQUALS(U_SPECIAL, sanitized[0]);
-	str_cpy(sanitized, STR_NO_LIMIT, overlong6);
-	PCUT_ASSERT_INT_EQUALS(U_SPECIAL, sanitized[0]);
+}
+
+struct sanitize_test {
+	const char *input;
+	const char *output;
+};
+
+static const struct sanitize_test sanitize_tests[] = {
+	// Empty string
+	{ "", "" },
+	// ASCII only
+	{ "Hello, world!", "Hello, world!" },
+	// Valid multi-byte sequences
+	{ "Aπ你🐱", "Aπ你🐱" },
+	// U+D7FF is last valid before surrogates
+	{ "A\xED\x9F\xBFZ", "A\xED\x9F\xBFZ" },
+	// 0x10FFFF is the highest legal code point
+	{ "A\xF4\x8F\xBF\xBFZ", "A\xF4\x8F\xBF\xBFZ" },
+
+	// Missing continuation byte
+	{ "A\xC2Z", "A?Z" },
+	// Truncated multi-byte at buffer end
+	{ "A\xE2\x82", "A??" },
+	// Continuation byte without leading byte (0x80-0xBF are never valid first bytes)
+	{ "A\x80Y\xBFZ", "A?Y?Z" },
+
+	// 'A' (U+0041) normally encoded as 0x41
+	// Overlong 2-byte encoding: 0xC1 0x81
+	{ "\xC1\x81X", "??X" },
+
+	// ¢ (U+00A2) normally encoded as 0xC2 0xA2
+	// Overlong 3-byte encoding: 0xE0 0x82 0xA2
+	{ "\xE0\x82\xA2X", "???X" },
+
+	// ¢ (U+00A2) normally encoded as 0xC2 0xA2
+	// Overlong 4-byte encoding: 0xF0 0x80 0x82 0xA2
+	{ "\xF0\x80\x82\xA2X", "????X" },
+
+	// € (U+20AC) normally encoded as 0xE2 0x82 0xAC
+	// Overlong 4-byte encoding: 0xF0 0x82 0x82 0xAC
+	{ "\xF0\x82\x82\xACX", "????X" },
+
+	// Using 0xC0 0x80 as overlong encoding for NUL (which should be just 0x00)
+	{ "\xC0\x80X", "??X" },
+
+	// 0xED 0xA0 0x80 encodes a surrogate half (U+D800), not allowed in UTF-8
+	{ "A\xED\xA0\x80Z", "A???Z" },
+
+	// 0x110000 is not a legal code point
+	{ "A\xF4\x90\x80\x80Z", "A????Z" },
+
+	// Mix of valid and invalid sequences
+	{ "A\xC2\xA9\xE2\x28\xA1\xF0\x9F\x98\x81\x80Z", "A©?(?😁?Z" },
+};
+
+static size_t count_diff(const char *a, const char *b, size_t n)
+{
+	size_t count = 0;
+
+	for (size_t i = 0; i < n; i++) {
+		if (a[i] != b[i])
+			count++;
+	}
+
+	return count;
+}
+
+PCUT_TEST(str_sanitize)
+{
+	char replacement = '?';
+	char buffer2[255];
+
+	for (size_t i = 0; i < sizeof(sanitize_tests) / sizeof(sanitize_tests[0]); i++) {
+		const char *in = sanitize_tests[i].input;
+		const char *out = sanitize_tests[i].output;
+		size_t n = str_size(in) + 1;
+		assert(str_size(out) + 1 == n);
+
+		memcpy(buffer, in, n);
+		size_t replaced = str_sanitize(buffer, n, replacement);
+		if (memcmp(buffer, out, n) != 0) {
+			print_string_hex(buffer2, buffer, n);
+			print_string_hex(buffer, out, n);
+			PCUT_ASSERTION_FAILED("Expected %s, got %s", buffer, buffer2);
+		}
+
+		size_t expect_replaced = count_diff(buffer, in, n);
+		PCUT_ASSERT_INT_EQUALS(expect_replaced, replaced);
+	}
+
+	// Test with n smaller than string length - truncated valid encoding for €
+	const char *in = "ABC€";
+	const char *out = "ABC??\xAC";
+	size_t n = str_size(in) + 1;
+	memcpy(buffer, in, n);
+	size_t replaced = str_sanitize(buffer, 5, replacement);
+	if (memcmp(buffer, out, n) != 0) {
+		print_string_hex(buffer2, buffer, n);
+		print_string_hex(buffer, out, n);
+		PCUT_ASSERTION_FAILED("Expected %s, got %s", buffer, buffer2);
+	}
+
+	PCUT_ASSERT_INT_EQUALS(2, replaced);
 }
 
