From 7c8943d6cffbf4ff5706cca33c6f1f28a22c122b Mon Sep 17 00:00:00 2001 From: Artur Shiriev Date: Sun, 31 May 2026 13:36:50 +0300 Subject: [PATCH] validate path arg, fix non-cwd resolution, refactor _fix_file - Reject non-directory paths up front via `parser.error` instead of letting `pathspec` raise an obscure error later (#6). - Open files `rb` in `--check` mode so read-only files don't raise `OSError`; the write paths in `_fix_file` were already guarded (#8). - Resolve walked filenames against the supplied `path`, not cwd. Previously `eof-fixer /some/dir` from elsewhere raised `FileNotFoundError` on every file. - Split `_fix_file` into `_detect_trailing` (pure inspection) + dispatch, dropping `# noqa: C901, PLR0911` (#10). - README: note that an appended terminator is always LF regardless of the file's existing CRLF/CR style (#5). - Add tests for CRLF / CR / BOM / late null byte / symlinks / read-only-in-check / arg validation / non-cwd invocation, all encoding current behavior (#9). Coverage stays at 100%. Co-Authored-By: Claude Opus 4.7 (1M context) --- README.md | 6 + eof_fixer/main.py | 80 ++++++------ tests/test_end_of_file_fixer.py | 207 ++++++++++++++++++++++++++++++++ 3 files changed, 258 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index 43a14de..40cdaf2 100644 --- a/README.md +++ b/README.md @@ -68,6 +68,12 @@ The eof-fixer processes files in the following way: | `hello world\n\n\n` | `hello world\n` | | `` (empty file) | `` (unchanged) | +> **Note on line endings:** when appending a missing terminator, eof-fixer always +> writes an LF (`\n`), regardless of the existing line-ending style of the file. +> A file that otherwise uses CRLF or CR will end up with a mixed terminator on +> its last line. This matches the behavior of pre-commit's `end-of-file-fixer`. +> Files that already end with a single CRLF, CR, or LF are left untouched. + ## Configuration The tool automatically respects patterns in your `.gitignore` file, so it won't process files that are ignored by Git. Only the `.gitignore` at the root of the supplied path is consulted; nested `.gitignore` files in subdirectories are not read. Additionally, it always ignores: diff --git a/eof_fixer/main.py b/eof_fixer/main.py index 4d25f4c..73e6194 100644 --- a/eof_fixer/main.py +++ b/eof_fixer/main.py @@ -15,56 +15,60 @@ def _is_binary(file_obj: IO[bytes]) -> bool: return b"\x00" in sample -def _fix_file(file_obj: IO[bytes], check: bool) -> int: # noqa: C901, PLR0911 - # Skip binary files - if _is_binary(file_obj): - return 0 - - # Test for newline at end of file - # Empty files will throw IOError here +def _detect_trailing(file_obj: IO[bytes]) -> tuple[str, int]: + """Inspect the end of `file_obj` and return the action needed. + + Returns one of: + - ("none", 0) — file is empty, or already ends with exactly one terminator. + - ("append_lf", 0) — file lacks a trailing newline; an LF should be appended. + - ("truncate", offset) — file has excess trailing newlines; truncate to `offset` + (0 means truncate to empty). + """ try: file_obj.seek(-1, os.SEEK_END) except OSError: - return 0 + return ("none", 0) last_character = file_obj.read(1) - # last_character will be '' for an empty file - if last_character not in {b"\n", b"\r"} and last_character != b"": - if not check: - # Needs this seek for windows, otherwise IOError - file_obj.seek(0, os.SEEK_END) - file_obj.write(b"\n") - return 1 + if last_character not in {b"\n", b"\r"}: + return ("append_lf", 0) while last_character in {b"\n", b"\r"}: - # Deal with the beginning of the file if file_obj.tell() == 1: - if not check: - # If we've reached the beginning of the file and it is all - # linebreaks then we can make this file empty - file_obj.seek(0) - file_obj.truncate() - return 1 - - # Go back two bytes and read a character + # All bytes are line terminators — truncate to empty. + return ("truncate", 0) file_obj.seek(-2, os.SEEK_CUR) last_character = file_obj.read(1) - # Our current position is at the end of the file just before any amount of - # newlines. If we find extraneous newlines, then backtrack and trim them. position = file_obj.tell() remaining = file_obj.read() for sequence in (b"\n", b"\r\n", b"\r"): if remaining == sequence: - return 0 - + return ("none", 0) if remaining.startswith(sequence): - if not check: - file_obj.seek(position + len(sequence)) - file_obj.truncate() - return 1 + return ("truncate", position + len(sequence)) + + return ("none", 0) # pragma: no cover + + +def _fix_file(file_obj: IO[bytes], check: bool) -> int: + if _is_binary(file_obj): + return 0 - return 0 # pragma: no cover + action, offset = _detect_trailing(file_obj) + if action == "none": + return 0 + + if not check: + if action == "append_lf": + # Needs this seek for windows, otherwise IOError + file_obj.seek(0, os.SEEK_END) + file_obj.write(b"\n") + else: # action == "truncate" + file_obj.seek(offset) + file_obj.truncate() + + return 1 def main() -> int: @@ -75,8 +79,11 @@ def main() -> int: path: pathlib.Path = args.path check: bool = args.check - gitignore_path = path / ".gitignore" + if not path.is_dir(): + parser.error(f"path is not a directory: {path}") + + gitignore_path = path / ".gitignore" ignore_patterns = [ ".git", ".cache", # for uv cache @@ -87,10 +94,13 @@ def main() -> int: ignore_patterns.extend(f.readlines()) gitignore_spec = pathspec.GitIgnoreSpec.from_lines(ignore_patterns) + open_mode = "rb" if check else "rb+" retv = 0 for filename in gitignore_spec.match_tree_files(path, negate=True): - with pathlib.Path(filename).open("rb+") as f: + # match_tree_files yields paths relative to `path`, not cwd — resolve + # them so the tool works regardless of the caller's working directory. + with (path / filename).open(open_mode) as f: ret_for_file = _fix_file(f, check=check) if ret_for_file: sys.stdout.write(f"Fixing {filename}\n") diff --git a/tests/test_end_of_file_fixer.py b/tests/test_end_of_file_fixer.py index 0c4c872..d77aaa9 100644 --- a/tests/test_end_of_file_fixer.py +++ b/tests/test_end_of_file_fixer.py @@ -1,13 +1,40 @@ import os import shutil +import stat import sys import tempfile +from collections.abc import Iterator +from contextlib import contextmanager from io import StringIO from pathlib import Path +import pytest + from eof_fixer.main import main +@contextmanager +def _run_main_in(temp_dir: Path, argv: list[str]) -> Iterator[tuple[StringIO, StringIO]]: + """Swap cwd, argv, stdout, stderr around a ``main()`` call.""" + captured_stdout = StringIO() + captured_stderr = StringIO() + original_cwd = Path.cwd() + original_argv = sys.argv + original_stdout = sys.stdout + original_stderr = sys.stderr + try: + os.chdir(temp_dir) + sys.argv = argv + sys.stdout = captured_stdout + sys.stderr = captured_stderr + yield captured_stdout, captured_stderr + finally: + os.chdir(original_cwd) + sys.argv = original_argv + sys.stdout = original_stdout + sys.stderr = original_stderr + + def test_end_of_file_fixer_command_with_check_false() -> None: # Create a temporary directory with test files with tempfile.TemporaryDirectory() as temp_dir: @@ -243,3 +270,183 @@ def test_end_of_file_fixer_skips_binary_files() -> None: # Check that binary file was not modified binary_content = (temp_path / "binary_file.bin").read_bytes() assert binary_content == b"\x00\x01\x02\x03\x04\x05" + + +def test_crlf_no_trailing_newline_appends_lf() -> None: + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + target = temp_path / "crlf_no_newline.txt" + target.write_bytes(b"a\r\nb") + + with _run_main_in(temp_path, ["eof-fixer", "."]) as (stdout, _stderr): + result = main() + + assert result == 1 + assert "Fixing crlf_no_newline.txt\n" in stdout.getvalue() + # LF is appended even though the file uses CRLF — documented behavior. + assert target.read_bytes() == b"a\r\nb\n" + + +def test_crlf_perfect_unchanged() -> None: + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + target = temp_path / "crlf_perfect.txt" + target.write_bytes(b"a\r\n") + + with _run_main_in(temp_path, ["eof-fixer", "."]) as (stdout, _stderr): + result = main() + + assert result == 0 + assert stdout.getvalue() == "" + assert target.read_bytes() == b"a\r\n" + + +def test_crlf_multiple_trailing_truncated_to_one() -> None: + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + target = temp_path / "crlf_multiple.txt" + target.write_bytes(b"a\r\n\r\n\r\n") + + with _run_main_in(temp_path, ["eof-fixer", "."]) as (stdout, _stderr): + result = main() + + assert result == 1 + assert "Fixing crlf_multiple.txt\n" in stdout.getvalue() + assert target.read_bytes() == b"a\r\n" + + +def test_cr_only_multiple_trailing_truncated_to_one() -> None: + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + target = temp_path / "cr_only.txt" + target.write_bytes(b"a\r\r\r") + + with _run_main_in(temp_path, ["eof-fixer", "."]) as (stdout, _stderr): + result = main() + + assert result == 1 + assert "Fixing cr_only.txt\n" in stdout.getvalue() + assert target.read_bytes() == b"a\r" + + +def test_bom_prefixed_file_is_treated_as_text() -> None: + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + target = temp_path / "bom_no_newline.txt" + target.write_bytes(b"\xef\xbb\xbfhello") + + with _run_main_in(temp_path, ["eof-fixer", "."]) as (stdout, _stderr): + result = main() + + assert result == 1 + assert "Fixing bom_no_newline.txt\n" in stdout.getvalue() + assert target.read_bytes() == b"\xef\xbb\xbfhello\n" + + +def test_null_byte_beyond_first_1024_bytes_is_treated_as_text() -> None: + # _is_binary inspects only the first 1024 bytes, so a null byte past that + # boundary does NOT mark the file as binary. Document the current behavior. + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + target = temp_path / "late_null.txt" + payload = b"a" * 2000 + b"\x00" + b"b" + target.write_bytes(payload) + + with _run_main_in(temp_path, ["eof-fixer", "."]) as (stdout, _stderr): + result = main() + + assert result == 1 + assert "Fixing late_null.txt\n" in stdout.getvalue() + assert target.read_bytes() == payload + b"\n" + + +def test_symlink_in_tree_does_not_crash() -> None: + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + target = temp_path / "target.txt" + target.write_bytes(b"contents without newline") + link = temp_path / "link.txt" + try: + link.symlink_to(target) + except (OSError, NotImplementedError): # pragma: no cover + pytest.skip("symlinks not available on this platform") + + with _run_main_in(temp_path, ["eof-fixer", "."]) as (_stdout, _stderr): + result = main() + + # Either path (link followed or skipped) is acceptable — but the + # walked target itself must end up fixed and the tool must not crash. + assert result == 1 + assert target.read_bytes().endswith(b"\n") + + +def test_path_arg_rejects_file() -> None: + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + not_a_dir = temp_path / "regular.txt" + not_a_dir.write_text("hi\n") + + with ( + _run_main_in(temp_path, ["eof-fixer", str(not_a_dir)]) as (_stdout, stderr), + pytest.raises(SystemExit) as exc_info, + ): + main() + + assert exc_info.value.code != 0 + assert "is not a directory" in stderr.getvalue() + assert str(not_a_dir) in stderr.getvalue() + + +def test_path_arg_rejects_nonexistent_path() -> None: + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + missing = temp_path / "does-not-exist" + + with ( + _run_main_in(temp_path, ["eof-fixer", str(missing)]) as (_stdout, stderr), + pytest.raises(SystemExit) as exc_info, + ): + main() + + assert exc_info.value.code != 0 + assert "is not a directory" in stderr.getvalue() + + +def test_readonly_file_in_check_mode_does_not_raise() -> None: + # In --check mode no writes occur, so the file should be opened read-only + # and not raise on read-only files. + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + readonly = temp_path / "readonly.txt" + readonly.write_text("no newline") + readonly.chmod(stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH) + + try: + with _run_main_in(temp_path, ["eof-fixer", ".", "--check"]) as (stdout, _stderr): + result = main() + + assert result == 1 + assert "Fixing readonly.txt\n" in stdout.getvalue() + # File must still be unchanged in check mode. + assert readonly.read_text() == "no newline" + finally: + # Restore writable bits so the tempdir can be cleaned up. + readonly.chmod(stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH) + + +def test_runs_from_unrelated_cwd_with_absolute_path() -> None: + # Regression test: the tool should work when invoked with an absolute path + # that is not the caller's cwd. Previously yielded relative names were + # opened against cwd, causing FileNotFoundError. + with tempfile.TemporaryDirectory() as work_dir, tempfile.TemporaryDirectory() as target_dir: + work_path = Path(work_dir) + target_path = Path(target_dir) + target_file = target_path / "needs_fix.txt" + target_file.write_text("no newline") + + with _run_main_in(work_path, ["eof-fixer", str(target_path)]) as (stdout, _stderr): + result = main() + + assert result == 1 + assert "Fixing needs_fix.txt\n" in stdout.getvalue() + assert target_file.read_text() == "no newline\n"