Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,12 @@ The eof-fixer processes files in the following way:
| `hello world\n\n\n` | `hello world\n` |
| `` (empty file) | `` (unchanged) |

> **Note on line endings:** when appending a missing terminator, eof-fixer always
> writes an LF (`\n`), regardless of the existing line-ending style of the file.
> A file that otherwise uses CRLF or CR will end up with a mixed terminator on
> its last line. This matches the behavior of pre-commit's `end-of-file-fixer`.
> Files that already end with a single CRLF, CR, or LF are left untouched.

## Configuration

The tool automatically respects patterns in your `.gitignore` file, so it won't process files that are ignored by Git. Only the `.gitignore` at the root of the supplied path is consulted; nested `.gitignore` files in subdirectories are not read. Additionally, it always ignores:
Expand Down
80 changes: 45 additions & 35 deletions eof_fixer/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,56 +15,60 @@ def _is_binary(file_obj: IO[bytes]) -> bool:
return b"\x00" in sample


def _fix_file(file_obj: IO[bytes], check: bool) -> int: # noqa: C901, PLR0911
# Skip binary files
if _is_binary(file_obj):
return 0

# Test for newline at end of file
# Empty files will throw IOError here
def _detect_trailing(file_obj: IO[bytes]) -> tuple[str, int]:
"""Inspect the end of `file_obj` and return the action needed.

Returns one of:
- ("none", 0) — file is empty, or already ends with exactly one terminator.
- ("append_lf", 0) — file lacks a trailing newline; an LF should be appended.
- ("truncate", offset) — file has excess trailing newlines; truncate to `offset`
(0 means truncate to empty).
"""
try:
file_obj.seek(-1, os.SEEK_END)
except OSError:
return 0
return ("none", 0)

last_character = file_obj.read(1)
# last_character will be '' for an empty file
if last_character not in {b"\n", b"\r"} and last_character != b"":
if not check:
# Needs this seek for windows, otherwise IOError
file_obj.seek(0, os.SEEK_END)
file_obj.write(b"\n")
return 1
if last_character not in {b"\n", b"\r"}:
return ("append_lf", 0)

while last_character in {b"\n", b"\r"}:
# Deal with the beginning of the file
if file_obj.tell() == 1:
if not check:
# If we've reached the beginning of the file and it is all
# linebreaks then we can make this file empty
file_obj.seek(0)
file_obj.truncate()
return 1

# Go back two bytes and read a character
# All bytes are line terminators — truncate to empty.
return ("truncate", 0)
file_obj.seek(-2, os.SEEK_CUR)
last_character = file_obj.read(1)

# Our current position is at the end of the file just before any amount of
# newlines. If we find extraneous newlines, then backtrack and trim them.
position = file_obj.tell()
remaining = file_obj.read()
for sequence in (b"\n", b"\r\n", b"\r"):
if remaining == sequence:
return 0

return ("none", 0)
if remaining.startswith(sequence):
if not check:
file_obj.seek(position + len(sequence))
file_obj.truncate()
return 1
return ("truncate", position + len(sequence))

return ("none", 0) # pragma: no cover


def _fix_file(file_obj: IO[bytes], check: bool) -> int:
if _is_binary(file_obj):
return 0

return 0 # pragma: no cover
action, offset = _detect_trailing(file_obj)
if action == "none":
return 0

if not check:
if action == "append_lf":
# Needs this seek for windows, otherwise IOError
file_obj.seek(0, os.SEEK_END)
file_obj.write(b"\n")
else: # action == "truncate"
file_obj.seek(offset)
file_obj.truncate()

return 1


def main() -> int:
Expand All @@ -75,8 +79,11 @@ def main() -> int:

path: pathlib.Path = args.path
check: bool = args.check
gitignore_path = path / ".gitignore"

if not path.is_dir():
parser.error(f"path is not a directory: {path}")

gitignore_path = path / ".gitignore"
ignore_patterns = [
".git",
".cache", # for uv cache
Expand All @@ -87,10 +94,13 @@ def main() -> int:
ignore_patterns.extend(f.readlines())

gitignore_spec = pathspec.GitIgnoreSpec.from_lines(ignore_patterns)
open_mode = "rb" if check else "rb+"

retv = 0
for filename in gitignore_spec.match_tree_files(path, negate=True):
with pathlib.Path(filename).open("rb+") as f:
# match_tree_files yields paths relative to `path`, not cwd — resolve
# them so the tool works regardless of the caller's working directory.
with (path / filename).open(open_mode) as f:
ret_for_file = _fix_file(f, check=check)
if ret_for_file:
sys.stdout.write(f"Fixing {filename}\n")
Expand Down
207 changes: 207 additions & 0 deletions tests/test_end_of_file_fixer.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,40 @@
import os
import shutil
import stat
import sys
import tempfile
from collections.abc import Iterator
from contextlib import contextmanager
from io import StringIO
from pathlib import Path

import pytest

from eof_fixer.main import main


@contextmanager
def _run_main_in(temp_dir: Path, argv: list[str]) -> Iterator[tuple[StringIO, StringIO]]:
"""Swap cwd, argv, stdout, stderr around a ``main()`` call."""
captured_stdout = StringIO()
captured_stderr = StringIO()
original_cwd = Path.cwd()
original_argv = sys.argv
original_stdout = sys.stdout
original_stderr = sys.stderr
try:
os.chdir(temp_dir)
sys.argv = argv
sys.stdout = captured_stdout
sys.stderr = captured_stderr
yield captured_stdout, captured_stderr
finally:
os.chdir(original_cwd)
sys.argv = original_argv
sys.stdout = original_stdout
sys.stderr = original_stderr


def test_end_of_file_fixer_command_with_check_false() -> None:
# Create a temporary directory with test files
with tempfile.TemporaryDirectory() as temp_dir:
Expand Down Expand Up @@ -243,3 +270,183 @@ def test_end_of_file_fixer_skips_binary_files() -> None:
# Check that binary file was not modified
binary_content = (temp_path / "binary_file.bin").read_bytes()
assert binary_content == b"\x00\x01\x02\x03\x04\x05"


def test_crlf_no_trailing_newline_appends_lf() -> None:
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
target = temp_path / "crlf_no_newline.txt"
target.write_bytes(b"a\r\nb")

with _run_main_in(temp_path, ["eof-fixer", "."]) as (stdout, _stderr):
result = main()

assert result == 1
assert "Fixing crlf_no_newline.txt\n" in stdout.getvalue()
# LF is appended even though the file uses CRLF — documented behavior.
assert target.read_bytes() == b"a\r\nb\n"


def test_crlf_perfect_unchanged() -> None:
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
target = temp_path / "crlf_perfect.txt"
target.write_bytes(b"a\r\n")

with _run_main_in(temp_path, ["eof-fixer", "."]) as (stdout, _stderr):
result = main()

assert result == 0
assert stdout.getvalue() == ""
assert target.read_bytes() == b"a\r\n"


def test_crlf_multiple_trailing_truncated_to_one() -> None:
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
target = temp_path / "crlf_multiple.txt"
target.write_bytes(b"a\r\n\r\n\r\n")

with _run_main_in(temp_path, ["eof-fixer", "."]) as (stdout, _stderr):
result = main()

assert result == 1
assert "Fixing crlf_multiple.txt\n" in stdout.getvalue()
assert target.read_bytes() == b"a\r\n"


def test_cr_only_multiple_trailing_truncated_to_one() -> None:
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
target = temp_path / "cr_only.txt"
target.write_bytes(b"a\r\r\r")

with _run_main_in(temp_path, ["eof-fixer", "."]) as (stdout, _stderr):
result = main()

assert result == 1
assert "Fixing cr_only.txt\n" in stdout.getvalue()
assert target.read_bytes() == b"a\r"


def test_bom_prefixed_file_is_treated_as_text() -> None:
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
target = temp_path / "bom_no_newline.txt"
target.write_bytes(b"\xef\xbb\xbfhello")

with _run_main_in(temp_path, ["eof-fixer", "."]) as (stdout, _stderr):
result = main()

assert result == 1
assert "Fixing bom_no_newline.txt\n" in stdout.getvalue()
assert target.read_bytes() == b"\xef\xbb\xbfhello\n"


def test_null_byte_beyond_first_1024_bytes_is_treated_as_text() -> None:
# _is_binary inspects only the first 1024 bytes, so a null byte past that
# boundary does NOT mark the file as binary. Document the current behavior.
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
target = temp_path / "late_null.txt"
payload = b"a" * 2000 + b"\x00" + b"b"
target.write_bytes(payload)

with _run_main_in(temp_path, ["eof-fixer", "."]) as (stdout, _stderr):
result = main()

assert result == 1
assert "Fixing late_null.txt\n" in stdout.getvalue()
assert target.read_bytes() == payload + b"\n"


def test_symlink_in_tree_does_not_crash() -> None:
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
target = temp_path / "target.txt"
target.write_bytes(b"contents without newline")
link = temp_path / "link.txt"
try:
link.symlink_to(target)
except (OSError, NotImplementedError): # pragma: no cover
pytest.skip("symlinks not available on this platform")

with _run_main_in(temp_path, ["eof-fixer", "."]) as (_stdout, _stderr):
result = main()

# Either path (link followed or skipped) is acceptable — but the
# walked target itself must end up fixed and the tool must not crash.
assert result == 1
assert target.read_bytes().endswith(b"\n")


def test_path_arg_rejects_file() -> None:
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
not_a_dir = temp_path / "regular.txt"
not_a_dir.write_text("hi\n")

with (
_run_main_in(temp_path, ["eof-fixer", str(not_a_dir)]) as (_stdout, stderr),
pytest.raises(SystemExit) as exc_info,
):
main()

assert exc_info.value.code != 0
assert "is not a directory" in stderr.getvalue()
assert str(not_a_dir) in stderr.getvalue()


def test_path_arg_rejects_nonexistent_path() -> None:
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
missing = temp_path / "does-not-exist"

with (
_run_main_in(temp_path, ["eof-fixer", str(missing)]) as (_stdout, stderr),
pytest.raises(SystemExit) as exc_info,
):
main()

assert exc_info.value.code != 0
assert "is not a directory" in stderr.getvalue()


def test_readonly_file_in_check_mode_does_not_raise() -> None:
# In --check mode no writes occur, so the file should be opened read-only
# and not raise on read-only files.
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
readonly = temp_path / "readonly.txt"
readonly.write_text("no newline")
readonly.chmod(stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH)

try:
with _run_main_in(temp_path, ["eof-fixer", ".", "--check"]) as (stdout, _stderr):
result = main()

assert result == 1
assert "Fixing readonly.txt\n" in stdout.getvalue()
# File must still be unchanged in check mode.
assert readonly.read_text() == "no newline"
finally:
# Restore writable bits so the tempdir can be cleaned up.
readonly.chmod(stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH)


def test_runs_from_unrelated_cwd_with_absolute_path() -> None:
# Regression test: the tool should work when invoked with an absolute path
# that is not the caller's cwd. Previously yielded relative names were
# opened against cwd, causing FileNotFoundError.
with tempfile.TemporaryDirectory() as work_dir, tempfile.TemporaryDirectory() as target_dir:
work_path = Path(work_dir)
target_path = Path(target_dir)
target_file = target_path / "needs_fix.txt"
target_file.write_text("no newline")

with _run_main_in(work_path, ["eof-fixer", str(target_path)]) as (stdout, _stderr):
result = main()

assert result == 1
assert "Fixing needs_fix.txt\n" in stdout.getvalue()
assert target_file.read_text() == "no newline\n"
Loading