From 2f0dda747f0b5be771303f26b016b0dd428dcd19 Mon Sep 17 00:00:00 2001
From: Anthony Ryan <anthonyryan1@gmail.com>
Date: Sun, 21 Jun 2026 20:16:48 -0400
Subject: [PATCH] Update JSON::safeEncode to sanitize, rather than attempting
 convert/repair

UTF::utf8ize previously attempted to "repair" as many mangled strings
as possible, but I've observed a number of examples over the years where
it choked and broke rutorrent.

I would argue that taking arbitrary strings of bytes, and attempting
to convert them with perfect accuracy into valid UTF8 (when those strings
come to us mangled, corrupt and always with unknown encodings) isn't possible.

Here's a handful of examples of of byte sequences that choke the UTF8 repair code:

```php
require('php/utility/json.php');
var_dump(JSON::safeEncode("\xC0\x80"));
var_dump(JSON::safeEncode("\xED\xA0\x80"));
var_dump(JSON::safeEncode("\xED\xBF\xBF"));
var_dump(JSON::safeEncode("\xF5\x80\x80\x80"));
var_dump(JSON::safeEncode("\xF7\xBF\xBF\xBF"));
```

The problem is that I think this list is nowhere near comprehensive, and
even if we fix all of these, there will be many more corrupt sequences in
the wild we can't predict.

Instead of trying to repair all this corrupt data, let's just use the xFFFD
replacement character. Users who add files with corrupt strings will see the
replacement character (adding some awareness), but that becomes a content
problem and not a "ruTorrent is broken" problem.
---
 php/utility/json.php |  5 ++--
 php/utility/utf.php  | 55 --------------------------------------------
 2 files changed, 2 insertions(+), 58 deletions(-)

diff --git a/php/utility/json.php b/php/utility/json.php
index acb6bf2d8..5afad655f 100644
--- a/php/utility/json.php
+++ b/php/utility/json.php
@@ -6,7 +6,6 @@ class JSON
 {	
 	public static function safeEncode($value)
 	{
-		$encoded = json_encode($value);
-		return(!function_exists('json_last_error') || json_last_error()==JSON_ERROR_NONE ? $encoded : json_encode(UTF::utf8ize($value)));
+		return json_encode($value, JSON_THROW_ON_ERROR|JSON_INVALID_UTF8_SUBSTITUTE);
 	}
-}
\ No newline at end of file
+}
diff --git a/php/utility/utf.php b/php/utility/utf.php
index 7091687ce..fe9aee9a3 100644
--- a/php/utility/utf.php
+++ b/php/utility/utf.php
@@ -107,61 +107,6 @@ public static function win2utf($str)
 		return($outstr);
 	}
 
-	private static function mix2utf($str, $inv = '_') 
-	{
-		$len = strlen($str);
-		for($i = 0; $i < $len; $i++)
-		{
-			$c = ord($str[$i]);
-			if($c > 128) 
-			{
-				$bytes = 0;
-				if(($c > 247)) $str[$i] = $inv;
-				elseif($c > 239) $bytes = 4;
-				elseif($c > 223) $bytes = 3;
-				elseif($c > 191) $bytes = 2;
-				else $str[$i] = $inv;
-				if($bytes)
-				{
-					if(($i + $bytes) > $len) $str[$i] = $inv;
-					else
-					{
-						$start = $i;
-						$cnt = $bytes;
-						while($bytes > 1) 
-						{
-							$i++;
-							$b = ord($str[$i]);
-							if($b < 128 || $b > 191) 
-							{
-								$str[$start] = $inv;
-								$i = $start;
-								break;
-							}
-							$bytes--;
-						}
-					}
-				}
-			}
-		}
-		return($str);
-	}
-
-	public static function utf8ize($mixed) 
-	{
-		if(is_array($mixed) || is_object($mixed)) 
-		{
-				foreach($mixed as $key => $value) 
-				{
-					$mixed[$key] = self::utf8ize($value);
-				}
-			} 
-			else 
-				if(is_string($mixed)) 
-					$mixed = self::mix2utf($mixed);
-		return($mixed);
-	}
-
 	private static function maybe_mb_convert_to_utf8($string) {
 		return function_exists('mb_convert_encoding') ? mb_convert_encoding($string, "UTF-8", "auto") : $string;
 	}