diff --git a/CHANGELOG b/CHANGELOG index 63076fab..bd4d09b3 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -15,6 +15,11 @@ Bug Fixes * Fix statement splitting (issue845). * Fix a late-binding closure bug in `TokenList.token_not_matching`. +* Fix non-idempotent reindent output when a comma in an identifier list is + preceded by more than one whitespace token (e.g. ``a , b``). Previously + only a single whitespace token before a comma was stripped, leaving a stray + space that was removed on a subsequent format pass. All whitespace before a + comma is now removed. Release 0.5.5 (Dec 19, 2025) diff --git a/sqlparse/filters/others.py b/sqlparse/filters/others.py index 95bc436c..fb3d2ba9 100644 --- a/sqlparse/filters/others.py +++ b/sqlparse/filters/others.py @@ -98,12 +98,18 @@ def _stripws_default(tlist): is_first_char = False def _stripws_identifierlist(self, tlist): - # Removes newlines before commas, see issue140 - last_nl = None + # Removes whitespace before commas, see issue140. All consecutive + # whitespace tokens before a comma are removed so that the result is + # stable when the formatter is applied to its own output (issue140 + # only handled a single preceding whitespace token, which left a + # stray space when the comma was preceded by multiple whitespace + # tokens, e.g. ``a , b``). + last_ws = [] for token in list(tlist.tokens): - if last_nl and token.ttype is T.Punctuation and token.value == ',': - tlist.tokens.remove(last_nl) - last_nl = token if token.is_whitespace else None + if last_ws and token.ttype is T.Punctuation and token.value == ',': + for ws in last_ws: + tlist.tokens.remove(ws) + last_ws = last_ws + [token] if token.is_whitespace else [] # next_ = tlist.token_next(token, skip_ws=False) # if (next_ and not next_.is_whitespace and diff --git a/tests/test_format.py b/tests/test_format.py index 0cdbcf88..168361b3 100644 --- a/tests/test_format.py +++ b/tests/test_format.py @@ -491,6 +491,27 @@ def test_identifier_list(self): 'from a,', ' b']) + def test_identifier_list_whitespace_before_comma(self): + # issue140 only removed a single whitespace token before a comma, so + # input with multiple whitespace tokens before a comma left a stray + # space (``a ,``) which made formatting non-idempotent. All whitespace + # before a comma must be removed. + f = lambda sql: sqlparse.format(sql, reindent=True) + expected = '\n'.join([ + 'select a,', + ' b', + 'from t']) + for s in ( + 'select a , b from t', # multiple spaces before comma + 'select a , b from t', # several spaces before comma + 'select a\t\t, b from t', # tabs before comma + 'select a \n , b from t', # newline + space before comma + ): + formatted = f(s) + assert formatted == expected + # formatting must be idempotent + assert f(formatted) == formatted + def test_identifier_list_with_wrap_after(self): f = lambda sql: sqlparse.format(sql, reindent=True, wrap_after=14) s = 'select foo, bar, baz from table1, table2 where 1 = 2'