Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 99 additions & 21 deletions pkg/commands/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,20 @@ package commands

import (
"bufio"
"bytes"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"strings"
)

type File struct {
FilePath string
ParentPath []string
YamlPath string
Commands []string
FilePath string
ParentPath []string
YamlPath string
Commands []string
}

func (f *File) Extract() error {
Expand All @@ -22,31 +25,83 @@ func (f *File) Extract() error {
return fmt.Errorf("failed to resolved the file path for file %s, error: %w", absoluteFilePath, err)
}

// Open the file
file, err := os.Open(filepath.Clean((absoluteFilePath)))
commands, err := f.readCommands(absoluteFilePath)
if err != nil {
return fmt.Errorf("failed to open the commands_file at %s, error: %w", absoluteFilePath, err)
return err
}

// If no commands were read, return an error indicating that the file is empty
if len(commands) == 0 {
return fmt.Errorf("the commands_file at location %s is empty", absoluteFilePath)
}

f.Commands = commands
return nil
}

// readCommands reads the commands_file from the working tree. When the file is
// not present on disk - which happens when the repository was checked out with
// a sparse working tree (for example the pipeline initialization job, which
// only materializes the pipeline directory) - it falls back to reading the
// file content directly from Git. For partial (blobless) clones this fetches
// the blob on demand, so commands_file references outside the sparse paths
// keep working.
func (f *File) readCommands(absoluteFilePath string) ([]string, error) {
file, err := os.Open(filepath.Clean(absoluteFilePath))
if err != nil {
if os.IsNotExist(err) {
if commands, gitErr := f.readCommandsFromGit(); gitErr == nil {
return commands, nil
}
}

return nil, fmt.Errorf("failed to open the commands_file at %s, error: %w", absoluteFilePath, err)
}
defer file.Close()

// Read the file line by line
scanner := bufio.NewScanner(file)
return readLines(file)
}

// readCommandsFromGit reads the commands_file content from the checked-out
// revision (HEAD) using `git show`, without requiring the file to be present
// in the working tree.
func (f *File) readCommandsFromGit() ([]string, error) {
relPath, err := f.repoRelativePath()
if err != nil {
return nil, err
}

// #nosec G204 - relPath is derived from the pipeline definition that is
// checked out in the repository, addressed via an explicit "HEAD:" revision.
cmd := exec.Command("git", "show", "HEAD:"+relPath)

var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr

if err := cmd.Run(); err != nil {
return nil, fmt.Errorf(
"failed to read commands_file %s from git: %s: %w",
relPath, strings.TrimSpace(stderr.String()), err,
)
}

return readLines(bytes.NewReader(stdout.Bytes()))
}

func readLines(reader io.Reader) ([]string, error) {
var lines []string

scanner := bufio.NewScanner(reader)
for scanner.Scan() {
line := scanner.Text()
f.Commands = append(f.Commands, line)
lines = append(lines, scanner.Text())
}

// Check for scanning errors

if err := scanner.Err(); err != nil {
return fmt.Errorf("error reading file: %w", err)
}

// If no commands were read, return an error indicating that the file is empty
if len(f.Commands) == 0 {
return fmt.Errorf("the commands_file at location %s is empty", absoluteFilePath)
return nil, fmt.Errorf("error reading file: %w", err)
}

return nil
return lines, nil
}

func (f *File) getAbsoluteFilePath() (string, error) {
Expand All @@ -66,4 +121,27 @@ func (f *File) getAbsoluteFilePath() (string, error) {
// and then File path is relative to that YML directory path
return filepath.Join(ymlDirPath, f.FilePath), nil
}
}
}

// repoRelativePath returns the commands_file path relative to the repository
// root, using forward slashes, so it can be addressed in Git (e.g. via
// `git show HEAD:<path>`). It mirrors the resolution rules of
// getAbsoluteFilePath: a leading '/' means a path from the repository root,
// otherwise the path is relative to the directory of the pipeline YAML.
func (f *File) repoRelativePath() (string, error) {
var relPath string

if strings.HasPrefix(f.FilePath, "/") {
relPath = strings.TrimPrefix(f.FilePath, "/")
} else {
relPath = filepath.Join(filepath.Dir(f.YamlPath), f.FilePath)
}

relPath = filepath.ToSlash(filepath.Clean(relPath))

if relPath == ".." || strings.HasPrefix(relPath, "../") {
return "", fmt.Errorf("commands_file path %q escapes the repository root", relPath)
}

return relPath, nil
}
75 changes: 72 additions & 3 deletions pkg/commands/file_test.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,19 @@
package commands

import (
"os"
"os/exec"
"path/filepath"
"testing"

assert "github.com/stretchr/testify/assert"
)

const (
testDirPerm = 0o755
testFilePerm = 0o644
)

func Test__Extract(t *testing.T) {
// If commands file does not exist, it returns the error
file := File{
Expand Down Expand Up @@ -33,7 +41,7 @@ func Test__Extract(t *testing.T) {
// Commands are read successfully from the valid file with relative path.
file.FilePath = "valid_commands_file.txt"
err = file.Extract()

assert.Nil(t, err)

expectedCommands := []string{"echo 1", "echo 12", "echo 123"}
Expand All @@ -43,7 +51,68 @@ func Test__Extract(t *testing.T) {
file.FilePath = "/../../test/fixtures/valid_commands_file.txt"
file.Commands = []string{}
err = file.Extract()

assert.Nil(t, err)
assert.Equal(t, file.Commands, expectedCommands)
}
}

// When the commands_file is not present in the working tree (e.g. a sparse
// checkout that only materializes the pipeline directory), its content is read
// from Git instead, so references outside the sparse paths keep working.
func Test__ExtractFromGitWhenMissingFromWorkingTree(t *testing.T) {
if _, err := exec.LookPath("git"); err != nil {
t.Skip("git is not available")
}

repoDir := t.TempDir()

runGit := func(args ...string) {
cmd := exec.Command("git", args...)
cmd.Dir = repoDir
out, err := cmd.CombinedOutput()
assert.NoError(t, err, string(out))
}

runGit("init", "-q")
runGit("config", "user.email", "test@example.com")
runGit("config", "user.name", "test")

// The commands_file lives outside the pipeline directory.
scriptsDir := filepath.Join(repoDir, "scripts")
assert.NoError(t, os.MkdirAll(scriptsDir, testDirPerm))
scriptPath := filepath.Join(scriptsDir, "build.sh")
assert.NoError(t, os.WriteFile(scriptPath, []byte("echo a\necho b\necho c\n"), testFilePerm))

runGit("add", ".")
runGit("commit", "-q", "-m", "init")

// Simulate a sparse checkout: the file exists in Git but not on disk.
assert.NoError(t, os.Remove(scriptPath))

originalWd, err := os.Getwd()
assert.NoError(t, err)
assert.NoError(t, os.Chdir(repoDir))
defer func() { _ = os.Chdir(originalWd) }()

expectedCommands := []string{"echo a", "echo b", "echo c"}

// Absolute path (from repository root).
absFile := File{
FilePath: "/scripts/build.sh",
ParentPath: []string{},
YamlPath: ".semaphore/semaphore.yml",
Commands: []string{},
}
assert.NoError(t, absFile.Extract())
assert.Equal(t, expectedCommands, absFile.Commands)

// Relative path (relative to the pipeline YAML directory).
relFile := File{
FilePath: "../scripts/build.sh",
ParentPath: []string{},
YamlPath: ".semaphore/semaphore.yml",
Commands: []string{},
}
assert.NoError(t, relFile.Extract())
assert.Equal(t, expectedCommands, relFile.Commands)
}