From f3460802c28e4a8e92da9c752b418ca2732dedcd Mon Sep 17 00:00:00 2001 From: Jake Keuhlen Date: Fri, 1 Aug 2025 13:08:32 -0600 Subject: [PATCH 1/3] Create a new flag to --exclude-columns from ripoff-export --- .gitignore | 1 + README.md | 23 ++++- cmd/ripoff-export/ripoff_export.go | 10 +- export.go | 83 ++++++++++++++-- export_test.go | 150 +++++++++++++++++++++++++++-- 5 files changed, 245 insertions(+), 22 deletions(-) diff --git a/.gitignore b/.gitignore index 9885b66..46d9564 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ .vscode tmp /ripoff +/ripoff-export .DS_Store /export diff --git a/README.md b/README.md index baf6bc7..8a839a0 100644 --- a/README.md +++ b/README.md @@ -95,13 +95,34 @@ rows: An experimental command has been added to generate ripoff files from your database. This may be useful to users just starting to use ripoff who don't have so much fake data that templating is required yet. -Currently, it attempts to export all data from all tables into a single ripoff file. You can use the `--exclude` flag to exclude specific tables from the export: +Currently, it attempts to export all data from all tables into a single ripoff file. You can use the `--exclude` flag to exclude specific tables from the export, and the `--exclude-columns` flag to exclude specific columns: ```bash # Export all tables except 'users' and 'audit_logs' ripoff-export --exclude users --exclude audit_logs /path/to/export + +# Exclude created_at and updated_at columns from all tables +ripoff-export --exclude-columns created_at --exclude-columns updated_at /path/to/export + +# Exclude email column only from users table +ripoff-export --exclude-columns users.email /path/to/export + +# Combine exclusions: exclude created_at globally and email from users table +ripoff-export --exclude-columns created_at --exclude-columns users.email /path/to/export + +# Combine table and column exclusions +ripoff-export --exclude audit_logs --exclude-columns created_at --exclude-columns users.email /path/to/export ``` +## Column Exclusion Format + +The `--exclude-columns` flag accepts two formats: + +- `table.column` - Excludes a specific column from a specific table +- `column` - Excludes the column from ALL tables + +The latter format is especially useful if you have generated columns on every table like `created_at` or `updated_at` to avoid noisy updates when you re-export your data. + In the future, additional flags may be added to allow you to include tables, add arbitrary `WHERE` conditions, modify the row id/key, export multiple files, or use existing templates. ## Installation diff --git a/cmd/ripoff-export/ripoff_export.go b/cmd/ripoff-export/ripoff_export.go index 3da8e45..8039f8d 100644 --- a/cmd/ripoff-export/ripoff_export.go +++ b/cmd/ripoff-export/ripoff_export.go @@ -23,11 +23,13 @@ func errAttr(err error) slog.Attr { func main() { // Define flags var excludeTables stringSliceFlag + var excludeColumns stringSliceFlag flag.Var(&excludeTables, "exclude", "Exclude specific tables from export (can be specified multiple times)") - + flag.Var(&excludeColumns, "exclude-columns", "Exclude specific columns from export. Format: 'table.column' or 'column' (can be specified multiple times)") + // Parse flags flag.Parse() - + dburl := os.Getenv("DATABASE_URL") if dburl == "" { slog.Error("DATABASE_URL env variable is required") @@ -97,8 +99,8 @@ func main() { } }() - // Pass the excluded tables to the export function - ripoffFile, err := ripoff.ExportToRipoff(ctx, tx, excludeTables) + // Pass the excluded tables and columns to the export function + ripoffFile, err := ripoff.ExportToRipoff(ctx, tx, excludeTables, excludeColumns) if err != nil { slog.Error("Could not assemble ripoff file from database", errAttr(err)) os.Exit(1) diff --git a/export.go b/export.go index 8933263..aa67773 100644 --- a/export.go +++ b/export.go @@ -10,6 +10,48 @@ import ( "github.com/lib/pq" ) +// parseColumnExclusions parses column exclusion specifications and returns +// table-specific exclusions and global column exclusions. +func parseColumnExclusions(excludeColumns []string) (map[string][]string, []string) { + tableSpecific := make(map[string][]string) + var globalColumns []string + + for _, spec := range excludeColumns { + parts := strings.SplitN(spec, ".", 2) + if len(parts) == 2 { + // table.column format + table, column := parts[0], parts[1] + tableSpecific[table] = append(tableSpecific[table], column) + } else { + // column format - applies to all tables + globalColumns = append(globalColumns, spec) + } + } + + return tableSpecific, globalColumns +} + +// shouldExcludeColumn checks if a column should be excluded based on exclusion rules. +func shouldExcludeColumn(table, column string, tableSpecific map[string][]string, globalColumns []string) bool { + // Check global column exclusions + for _, globalCol := range globalColumns { + if column == globalCol { + return true + } + } + + // Check table-specific exclusions + if excludedCols, exists := tableSpecific[table]; exists { + for _, excludedCol := range excludedCols { + if column == excludedCol { + return true + } + } + } + + return false +} + type RowMissingDependency struct { Row Row ConstraintMapKey [3]string @@ -17,33 +59,38 @@ type RowMissingDependency struct { // Exports all rows in the database to a ripoff file. // excludeTables is a list of table names to exclude from the export. -func ExportToRipoff(ctx context.Context, tx pgx.Tx, excludeTables []string) (RipoffFile, error) { +// excludeColumns is a list of column specifications to exclude from the export. +// Format: "table.column" (exclude column from specific table) or "column" (exclude column from all tables). +func ExportToRipoff(ctx context.Context, tx pgx.Tx, excludeTables []string, excludeColumns []string) (RipoffFile, error) { ripoffFile := RipoffFile{ Rows: map[string]Row{}, } + // Parse column exclusions + tableSpecificExclusions, globalColumnExclusions := parseColumnExclusions(excludeColumns) + // We use primary keys to determine what columns to use as row keys. primaryKeyResult, err := getPrimaryKeys(ctx, tx) if err != nil { return ripoffFile, err } - + // Remove excluded tables from the primary keys for _, table := range excludeTables { delete(primaryKeyResult, table) } - + // We use foreign keys to reference other rows using the table_name:literal(...) syntax. foreignKeyResult, err := getForeignKeysResult(ctx, tx) if err != nil { return ripoffFile, err } - + // Remove excluded tables from foreign key results for _, table := range excludeTables { delete(foreignKeyResult, table) } - + // A map from [table,column] -> ForeignKey for single column foreign keys. singleColumnFkeyMap := map[[2]string]*ForeignKey{} // A map from [table,constraintName,values] -> rowKey. @@ -59,18 +106,34 @@ func ExportToRipoff(ctx context.Context, tx pgx.Tx, excludeTables []string) (Rip missingDependencies := []RowMissingDependency{} for table, primaryKeys := range primaryKeyResult { - columns := make([]string, len(foreignKeyResult[table].Columns)) - // Due to yaml limitations, ripoff treats all data as nullable text on import and export. - for i, column := range foreignKeyResult[table].Columns { - columns[i] = fmt.Sprintf("CAST(%s AS TEXT)", pq.QuoteIdentifier(column)) + // Filter out excluded columns from the foreign key result columns + var filteredColumns []string + var filteredColumnNames []string + for _, column := range foreignKeyResult[table].Columns { + if !shouldExcludeColumn(table, column, tableSpecificExclusions, globalColumnExclusions) { + filteredColumns = append(filteredColumns, fmt.Sprintf("CAST(%s AS TEXT)", pq.QuoteIdentifier(column))) + filteredColumnNames = append(filteredColumnNames, column) + } } - selectQuery := fmt.Sprintf("SELECT %s FROM %s;", strings.Join(columns, ", "), pq.QuoteIdentifier(table)) + + // Skip table if no columns remain after filtering + if len(filteredColumns) == 0 { + continue + } + + selectQuery := fmt.Sprintf("SELECT %s FROM %s;", strings.Join(filteredColumns, ", "), pq.QuoteIdentifier(table)) rows, err := tx.Query(ctx, selectQuery) if err != nil { return RipoffFile{}, err } defer rows.Close() fields := rows.FieldDescriptions() + + // Verify that field descriptions match our filtered columns (sanity check) + if len(fields) != len(filteredColumnNames) { + return RipoffFile{}, fmt.Errorf("mismatch between query fields (%d) and filtered columns (%d) for table %s", len(fields), len(filteredColumnNames), table) + } + for rows.Next() { columnsRaw, err := rows.Values() if err != nil { diff --git a/export_test.go b/export_test.go index 6d72d77..9205eb0 100644 --- a/export_test.go +++ b/export_test.go @@ -23,7 +23,7 @@ func runExportTestData(t *testing.T, ctx context.Context, tx pgx.Tx, testDir str require.NoError(t, err) // Generate new ripoff file. - ripoffFile, err := ExportToRipoff(ctx, tx, []string{}) + ripoffFile, err := ExportToRipoff(ctx, tx, []string{}, []string{}) require.NoError(t, err) // Ensure ripoff file matches expected output. @@ -140,7 +140,7 @@ func TestExcludeFlag(t *testing.T) { // Test 1: Exclude a single table t.Run("Single exclude", func(t *testing.T) { - ripoffFile, err := ExportToRipoff(ctx, tx, []string{"exclude_me"}) + ripoffFile, err := ExportToRipoff(ctx, tx, []string{"exclude_me"}, []string{}) require.NoError(t, err) // Verify that ripoffFile.Rows contains rows from include_me but not exclude_me @@ -165,10 +165,10 @@ func TestExcludeFlag(t *testing.T) { // We should have rows from include_me require.True(t, hasIncludeMe, "Expected to find rows from include_me table") - + // We should NOT have rows from exclude_me require.False(t, hasExcludeMe, "Found rows from exclude_me table even though it was excluded") - + // We should have rows from also_exclude_me (since it wasn't excluded in this test) require.True(t, hasAlsoExcludeMe, "Expected to find rows from also_exclude_me table") @@ -199,7 +199,7 @@ func TestExcludeFlag(t *testing.T) { // Test 2: Exclude multiple tables t.Run("Multiple excludes", func(t *testing.T) { - ripoffFile, err := ExportToRipoff(ctx, tx, []string{"exclude_me", "also_exclude_me"}) + ripoffFile, err := ExportToRipoff(ctx, tx, []string{"exclude_me", "also_exclude_me"}, []string{}) require.NoError(t, err) // Verify that ripoffFile.Rows contains rows from include_me but not from the excluded tables @@ -224,10 +224,10 @@ func TestExcludeFlag(t *testing.T) { // We should have rows from include_me require.True(t, hasIncludeMe, "Expected to find rows from include_me table") - + // We should NOT have rows from exclude_me require.False(t, hasExcludeMe, "Found rows from exclude_me table even though it was excluded") - + // We should NOT have rows from also_exclude_me require.False(t, hasAlsoExcludeMe, "Found rows from also_exclude_me table even though it was excluded") @@ -256,3 +256,139 @@ func TestExcludeFlag(t *testing.T) { require.Equal(t, 0, alsoExcludeCount, "Expected 0 rows from also_exclude_me table") }) } + +// TestExcludeColumnsFlag tests that the exclude-columns flag properly excludes columns from export +func TestExcludeColumnsFlag(t *testing.T) { + envUrl := os.Getenv("RIPOFF_TEST_DATABASE_URL") + if envUrl == "" { + envUrl = "postgres:///ripoff-test-db" + } + ctx := context.Background() + conn, err := pgx.Connect(ctx, envUrl) + if err != nil { + require.NoError(t, err) + } + defer conn.Close(ctx) + + // Start a transaction that we'll roll back at the end + tx, err := conn.Begin(ctx) + require.NoError(t, err) + defer func() { + err := tx.Rollback(ctx) + require.NoError(t, err) + }() + + // Create test tables with timestamped columns + _, err = tx.Exec(ctx, ` + CREATE TABLE users ( + id SERIAL PRIMARY KEY, + name TEXT, + email TEXT, + created_at TIMESTAMP DEFAULT NOW(), + updated_at TIMESTAMP DEFAULT NOW() + ); + + CREATE TABLE posts ( + id SERIAL PRIMARY KEY, + title TEXT, + content TEXT, + user_id INTEGER REFERENCES users(id), + created_at TIMESTAMP DEFAULT NOW(), + updated_at TIMESTAMP DEFAULT NOW() + ); + + INSERT INTO users (name, email) VALUES ('Alice', 'alice@example.com'), ('Bob', 'bob@example.com'); + INSERT INTO posts (title, content, user_id) VALUES + ('Post 1', 'Content 1', 1), + ('Post 2', 'Content 2', 1), + ('Post 3', 'Content 3', 2); + `) + require.NoError(t, err) + + // Test 1: Exclude global columns (created_at, updated_at) + t.Run("Global column exclusion", func(t *testing.T) { + ripoffFile, err := ExportToRipoff(ctx, tx, []string{}, []string{"created_at", "updated_at"}) + require.NoError(t, err) + + // Verify that no row contains created_at or updated_at columns + for rowId, row := range ripoffFile.Rows { + _, hasCreatedAt := row["created_at"] + _, hasUpdatedAt := row["updated_at"] + require.False(t, hasCreatedAt, "Row %s should not have created_at column", rowId) + require.False(t, hasUpdatedAt, "Row %s should not have updated_at column", rowId) + + // But should still have other columns + tableName := strings.Split(rowId, ":")[0] + switch tableName { + case "users": + _, hasName := row["name"] + _, hasEmail := row["email"] + require.True(t, hasName, "Row %s should have name column", rowId) + require.True(t, hasEmail, "Row %s should have email column", rowId) + case "posts": + _, hasTitle := row["title"] + _, hasContent := row["content"] + require.True(t, hasTitle, "Row %s should have title column", rowId) + require.True(t, hasContent, "Row %s should have content column", rowId) + } + } + }) + + // Test 2: Exclude table-specific column (users.email) + t.Run("Table-specific column exclusion", func(t *testing.T) { + ripoffFile, err := ExportToRipoff(ctx, tx, []string{}, []string{"users.email"}) + require.NoError(t, err) + + // Verify that user rows don't have email but post rows still have all columns + for rowId, row := range ripoffFile.Rows { + tableName := strings.Split(rowId, ":")[0] + switch tableName { + case "users": + _, hasEmail := row["email"] + require.False(t, hasEmail, "User row %s should not have email column", rowId) + // Should still have other columns + _, hasName := row["name"] + require.True(t, hasName, "User row %s should have name column", rowId) + case "posts": + // Posts should have all columns including created_at/updated_at since only users.email was excluded + _, hasTitle := row["title"] + _, hasCreatedAt := row["created_at"] + require.True(t, hasTitle, "Post row %s should have title column", rowId) + require.True(t, hasCreatedAt, "Post row %s should have created_at column", rowId) + } + } + }) + + // Test 3: Combine both exclusion types + t.Run("Combined exclusions", func(t *testing.T) { + ripoffFile, err := ExportToRipoff(ctx, tx, []string{}, []string{"created_at", "users.email"}) + require.NoError(t, err) + + // Verify exclusions are applied correctly + for rowId, row := range ripoffFile.Rows { + tableName := strings.Split(rowId, ":")[0] + + // No row should have created_at (global exclusion) + _, hasCreatedAt := row["created_at"] + require.False(t, hasCreatedAt, "Row %s should not have created_at column", rowId) + + switch tableName { + case "users": + // Users should not have email (table-specific exclusion) + _, hasEmail := row["email"] + require.False(t, hasEmail, "User row %s should not have email column", rowId) + // But should have name and updated_at + _, hasName := row["name"] + _, hasUpdatedAt := row["updated_at"] + require.True(t, hasName, "User row %s should have name column", rowId) + require.True(t, hasUpdatedAt, "User row %s should have updated_at column", rowId) + case "posts": + // Posts should have all columns except created_at + _, hasTitle := row["title"] + _, hasUpdatedAt := row["updated_at"] + require.True(t, hasTitle, "Post row %s should have title column", rowId) + require.True(t, hasUpdatedAt, "Post row %s should have updated_at column", rowId) + } + } + }) +} From ea7e837ac0ba1be6dac045b28eda295d5fbf10d2 Mon Sep 17 00:00:00 2001 From: Jake Keuhlen Date: Fri, 1 Aug 2025 19:56:15 -0600 Subject: [PATCH 2/3] Remove extraneous sanity check and improve one of the test cases --- export.go | 5 ----- export_test.go | 14 ++++++++------ 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/export.go b/export.go index aa67773..80138cd 100644 --- a/export.go +++ b/export.go @@ -129,11 +129,6 @@ func ExportToRipoff(ctx context.Context, tx pgx.Tx, excludeTables []string, excl defer rows.Close() fields := rows.FieldDescriptions() - // Verify that field descriptions match our filtered columns (sanity check) - if len(fields) != len(filteredColumnNames) { - return RipoffFile{}, fmt.Errorf("mismatch between query fields (%d) and filtered columns (%d) for table %s", len(fields), len(filteredColumnNames), table) - } - for rows.Next() { columnsRaw, err := rows.Values() if err != nil { diff --git a/export_test.go b/export_test.go index 9205eb0..b0b5d8b 100644 --- a/export_test.go +++ b/export_test.go @@ -334,23 +334,25 @@ func TestExcludeColumnsFlag(t *testing.T) { } }) - // Test 2: Exclude table-specific column (users.email) + // Test 2: Exclude table-specific column (users.created_at) - shared column name t.Run("Table-specific column exclusion", func(t *testing.T) { - ripoffFile, err := ExportToRipoff(ctx, tx, []string{}, []string{"users.email"}) + ripoffFile, err := ExportToRipoff(ctx, tx, []string{}, []string{"users.created_at"}) require.NoError(t, err) - // Verify that user rows don't have email but post rows still have all columns + // Verify that user rows don't have created_at but post rows still have created_at for rowId, row := range ripoffFile.Rows { tableName := strings.Split(rowId, ":")[0] switch tableName { case "users": - _, hasEmail := row["email"] - require.False(t, hasEmail, "User row %s should not have email column", rowId) + _, hasCreatedAt := row["created_at"] + require.False(t, hasCreatedAt, "User row %s should not have created_at column", rowId) // Should still have other columns _, hasName := row["name"] + _, hasEmail := row["email"] require.True(t, hasName, "User row %s should have name column", rowId) + require.True(t, hasEmail, "User row %s should have email column", rowId) case "posts": - // Posts should have all columns including created_at/updated_at since only users.email was excluded + // Posts should have created_at since only users.created_at was excluded _, hasTitle := row["title"] _, hasCreatedAt := row["created_at"] require.True(t, hasTitle, "Post row %s should have title column", rowId) From 0ac6d211a42f77d335103ed6b007744f2f78a97c Mon Sep 17 00:00:00 2001 From: Jake Keuhlen Date: Fri, 1 Aug 2025 20:07:49 -0600 Subject: [PATCH 3/3] lint unused variable too --- export.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/export.go b/export.go index 80138cd..1fee25d 100644 --- a/export.go +++ b/export.go @@ -108,11 +108,9 @@ func ExportToRipoff(ctx context.Context, tx pgx.Tx, excludeTables []string, excl for table, primaryKeys := range primaryKeyResult { // Filter out excluded columns from the foreign key result columns var filteredColumns []string - var filteredColumnNames []string for _, column := range foreignKeyResult[table].Columns { if !shouldExcludeColumn(table, column, tableSpecificExclusions, globalColumnExclusions) { filteredColumns = append(filteredColumns, fmt.Sprintf("CAST(%s AS TEXT)", pq.QuoteIdentifier(column))) - filteredColumnNames = append(filteredColumnNames, column) } }