Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 16 additions & 8 deletions slugify/slugify.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,20 @@
DEFAULT_SEPARATOR = '-'


def _convert_decimal_reference(match: re.Match[str]) -> str:
try:
return chr(int(match.group(1)))
except (OverflowError, ValueError):
return match.group(0)


def _convert_hexadecimal_reference(match: re.Match[str]) -> str:
try:
return chr(int(match.group(1), 16))
except (OverflowError, ValueError):
return match.group(0)


def smart_truncate(
string: str,
max_length: int = 0,
Expand Down Expand Up @@ -134,17 +148,11 @@ def slugify(

# decimal character reference
if decimal:
try:
text = DECIMAL_PATTERN.sub(lambda m: chr(int(m.group(1))), text)
except Exception:
pass
text = DECIMAL_PATTERN.sub(_convert_decimal_reference, text)

# hexadecimal character reference
if hexadecimal:
try:
text = HEX_PATTERN.sub(lambda m: chr(int(m.group(1), 16)), text)
except Exception:
pass
text = HEX_PATTERN.sub(_convert_hexadecimal_reference, text)

# re normalize text
if allow_unicode:
Expand Down
10 changes: 10 additions & 0 deletions test.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,11 @@ def test_html_decimal_on(self):
r = slugify(txt, decimal=True)
self.assertEqual(r, 'z')

def test_html_decimal_on_with_invalid_reference(self):
txt = 'Ž �'
r = slugify(txt, decimal=True)
self.assertEqual(r, 'z-9999999999')

def test_html_decimal_off(self):
txt = 'Ž'
r = slugify(txt, entities=False, decimal=False)
Expand All @@ -176,6 +181,11 @@ def test_html_hexadecimal_on(self):
r = slugify(txt, hexadecimal=True)
self.assertEqual(r, 'z')

def test_html_hexadecimal_on_with_invalid_reference(self):
txt = 'Ž �'
r = slugify(txt, hexadecimal=True)
self.assertEqual(r, 'z-x110000')

def test_html_hexadecimal_off(self):
txt = 'Ž'
r = slugify(txt, hexadecimal=False)
Expand Down