From f242c7db62ca26d856421050b993e00dd55a026f Mon Sep 17 00:00:00 2001 From: Sarath Francis Date: Mon, 1 Jun 2026 22:31:37 -0400 Subject: [PATCH] Strip all whitespace before commas in reindented identifier lists When reindenting an identifier list, only the single whitespace token immediately preceding a comma was removed. If a comma was preceded by more than one whitespace token (for example multiple spaces, tabs, or a newline followed by a space, as in "a , b"), one whitespace token was left in place. StripWhitespaceFilter._stripws_default then collapsed it to a single space, producing output such as "a ," with a stray space before the comma. That stray space was removed on a subsequent format pass, so formatting was not idempotent: format(format(sql)) != format(sql) for any identifier list containing extra whitespace before a comma. Track the full run of consecutive whitespace tokens before a comma and remove all of them, so the comma always hugs the preceding token and the output is stable when the formatter is applied to its own output. This extends the fix for issue140, which only handled a single whitespace token before a comma. --- CHANGELOG | 5 +++++ sqlparse/filters/others.py | 16 +++++++++++----- tests/test_format.py | 21 +++++++++++++++++++++ 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 63076fab..bd4d09b3 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -15,6 +15,11 @@ Bug Fixes * Fix statement splitting (issue845). * Fix a late-binding closure bug in `TokenList.token_not_matching`. +* Fix non-idempotent reindent output when a comma in an identifier list is + preceded by more than one whitespace token (e.g. ``a , b``). Previously + only a single whitespace token before a comma was stripped, leaving a stray + space that was removed on a subsequent format pass. All whitespace before a + comma is now removed. Release 0.5.5 (Dec 19, 2025) diff --git a/sqlparse/filters/others.py b/sqlparse/filters/others.py index 95bc436c..fb3d2ba9 100644 --- a/sqlparse/filters/others.py +++ b/sqlparse/filters/others.py @@ -98,12 +98,18 @@ def _stripws_default(tlist): is_first_char = False def _stripws_identifierlist(self, tlist): - # Removes newlines before commas, see issue140 - last_nl = None + # Removes whitespace before commas, see issue140. All consecutive + # whitespace tokens before a comma are removed so that the result is + # stable when the formatter is applied to its own output (issue140 + # only handled a single preceding whitespace token, which left a + # stray space when the comma was preceded by multiple whitespace + # tokens, e.g. ``a , b``). + last_ws = [] for token in list(tlist.tokens): - if last_nl and token.ttype is T.Punctuation and token.value == ',': - tlist.tokens.remove(last_nl) - last_nl = token if token.is_whitespace else None + if last_ws and token.ttype is T.Punctuation and token.value == ',': + for ws in last_ws: + tlist.tokens.remove(ws) + last_ws = last_ws + [token] if token.is_whitespace else [] # next_ = tlist.token_next(token, skip_ws=False) # if (next_ and not next_.is_whitespace and diff --git a/tests/test_format.py b/tests/test_format.py index 0cdbcf88..168361b3 100644 --- a/tests/test_format.py +++ b/tests/test_format.py @@ -491,6 +491,27 @@ def test_identifier_list(self): 'from a,', ' b']) + def test_identifier_list_whitespace_before_comma(self): + # issue140 only removed a single whitespace token before a comma, so + # input with multiple whitespace tokens before a comma left a stray + # space (``a ,``) which made formatting non-idempotent. All whitespace + # before a comma must be removed. + f = lambda sql: sqlparse.format(sql, reindent=True) + expected = '\n'.join([ + 'select a,', + ' b', + 'from t']) + for s in ( + 'select a , b from t', # multiple spaces before comma + 'select a , b from t', # several spaces before comma + 'select a\t\t, b from t', # tabs before comma + 'select a \n , b from t', # newline + space before comma + ): + formatted = f(s) + assert formatted == expected + # formatting must be idempotent + assert f(formatted) == formatted + def test_identifier_list_with_wrap_after(self): f = lambda sql: sqlparse.format(sql, reindent=True, wrap_after=14) s = 'select foo, bar, baz from table1, table2 where 1 = 2'