-
Notifications
You must be signed in to change notification settings - Fork 1
Generate ripoff queries asynchronously to help plugin performance #27
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e3bce6b
f2900ce
0792a1e
2e60a9f
a98962c
600c161
f9261c8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,12 +1,15 @@ | ||
| package main | ||
|
|
||
| import ( | ||
| "bufio" | ||
| "context" | ||
| "flag" | ||
| "fmt" | ||
| "log/slog" | ||
| "os" | ||
| "path" | ||
| "slices" | ||
| "strings" | ||
|
|
||
| "github.com/jackc/pgx/v5" | ||
|
|
||
|
|
@@ -17,9 +20,54 @@ func errAttr(err error) slog.Attr { | |
| return slog.Any("error", err) | ||
| } | ||
|
|
||
| func confirmPluginsSafe(plugins map[string]ripoff.RipoffPlugin) { | ||
|
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is slightly weird but better than code execution? I would consider this prompt for any ripoff file as well |
||
| baseDir, err := os.UserHomeDir() | ||
| if err != nil { | ||
| baseDir = os.TempDir() | ||
| } | ||
| consentFilePath := path.Join(baseDir, ".ripoff-consent") | ||
| consentFile, err := os.ReadFile(consentFilePath) | ||
| if err != nil && !os.IsNotExist(err) { | ||
| slog.Error("Could not read from consent file", errAttr(err), slog.String("filepath", consentFilePath)) | ||
| } | ||
| consentFileLines := strings.Split(string(consentFile), "\n") | ||
| scanner := bufio.NewScanner(os.Stdin) | ||
| newConsentLines := []string{} | ||
| for _, plugin := range plugins { | ||
| cmdJoined := strings.Join(append([]string{plugin.Address, " -> "}, plugin.Command...), " ") | ||
| if !slices.Contains(consentFileLines, cmdJoined) { | ||
| newConsentLines = append(newConsentLines, cmdJoined) | ||
| } | ||
| } | ||
| if len(newConsentLines) > 0 { | ||
| fmt.Printf("You have not run these ripoff plugins before, please confirm that the following commands are safe to run on your machine: \n") | ||
| fmt.Println() | ||
| for _, consentLine := range newConsentLines { | ||
| fmt.Printf(" %s\n", consentLine) | ||
| } | ||
| fmt.Println() | ||
| fmt.Println("Run the above? (Y/N)") | ||
| scanner.Scan() | ||
| input := scanner.Text() | ||
| if input == "y" || input == "Y" { | ||
| consentFileLines = append(consentFileLines, newConsentLines...) | ||
| err = os.WriteFile(consentFilePath, []byte(strings.Join(consentFileLines, "\n")), 0644) | ||
| if err != nil { | ||
| slog.Error("Could not append to the consent file", errAttr(err), slog.String("filepath", consentFilePath)) | ||
| } | ||
| fmt.Println("Proceeding...") | ||
| } else { | ||
| fmt.Println("ABORT") | ||
| os.Exit(1) | ||
| } | ||
| } | ||
| } | ||
|
|
||
| func main() { | ||
| verbosePtr := flag.Bool("v", false, "enable verbose output") | ||
| softPtr := flag.Bool("s", false, "do not commit generated queries") | ||
| maxConcurrencyPtr := flag.Int("c", ripoff.DEFAULT_MAX_CONCURRENCY, "maximum number of rows to generate queries for at one time. defaults at 1000") | ||
| unsafePluginPtr := flag.Bool("u", false, "execute new plugin commands without prompting. only for use in CI or trusted environments") | ||
| flag.Parse() | ||
|
|
||
| if *verbosePtr { | ||
|
|
@@ -77,7 +125,11 @@ func main() { | |
| os.Exit(1) | ||
| } | ||
|
|
||
| err = ripoff.RunRipoff(ctx, tx, totalRipoff) | ||
| if !*unsafePluginPtr && len(totalRipoff.Plugins) > 0 { | ||
| confirmPluginsSafe(totalRipoff.Plugins) | ||
| } | ||
|
|
||
| err = ripoff.RunRipoff(ctx, tx, totalRipoff, *maxConcurrencyPtr) | ||
| if err != nil { | ||
| slog.Error("Could not run ripoff", errAttr(err)) | ||
| os.Exit(1) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,6 +10,7 @@ import ( | |
| "regexp" | ||
| "slices" | ||
| "strings" | ||
| "sync" | ||
| "time" | ||
|
|
||
| "github.com/brianvoe/gofakeit/v7" | ||
|
|
@@ -19,9 +20,11 @@ import ( | |
| "github.com/tj/go-naturaldate" | ||
| ) | ||
|
|
||
| const DEFAULT_MAX_CONCURRENCY = 1000 | ||
|
|
||
| // Runs ripoff from start to finish, without committing the transaction. | ||
| func RunRipoff(ctx context.Context, tx pgx.Tx, totalRipoff RipoffFile) error { | ||
| manager, err := NewPluginManager(totalRipoff.Plugins) | ||
| func RunRipoff(ctx context.Context, tx pgx.Tx, totalRipoff RipoffFile, maxConcurrency int) error { | ||
| manager, err := NewPluginManager(ctx, totalRipoff.Plugins) | ||
| if err != nil { | ||
| return err | ||
| } | ||
|
|
@@ -32,7 +35,7 @@ func RunRipoff(ctx context.Context, tx pgx.Tx, totalRipoff RipoffFile) error { | |
| return err | ||
| } | ||
|
|
||
| queries, err := buildQueriesForRipoff(manager, primaryKeys, totalRipoff) | ||
| queries, err := buildQueriesForRipoff(maxConcurrency, manager, primaryKeys, totalRipoff) | ||
| if err != nil { | ||
| return err | ||
| } | ||
|
|
@@ -163,10 +166,11 @@ func prepareValue(manager *PluginManager, rawValue string) (string, error) { | |
| return fakerResult, nil | ||
| } | ||
|
|
||
| func buildQueryForRow(manager *PluginManager, primaryKeys PrimaryKeysResult, rowId string, row Row, dependencyGraph map[string][]string) (string, error) { | ||
| func buildQueryForRow(manager *PluginManager, primaryKeys PrimaryKeysResult, rowId string, row Row) (string, []string, error) { | ||
| dependencyResult := []string{} | ||
| parts := strings.Split(rowId, ":") | ||
| if len(parts) < 2 { | ||
| return "", fmt.Errorf("invalid id: %s", rowId) | ||
| return "", dependencyResult, fmt.Errorf("invalid id: %s", rowId) | ||
| } | ||
| table := parts[0] | ||
| primaryKeysForTable, hasPrimaryKeysForTable := primaryKeys[table] | ||
|
|
@@ -210,10 +214,10 @@ func buildQueryForRow(manager *PluginManager, primaryKeys PrimaryKeysResult, row | |
| case []string: | ||
| dependencies = v | ||
| default: | ||
| return "", fmt.Errorf("cannot parse ~dependencies value in row %s", rowId) | ||
| return "", dependencyResult, fmt.Errorf("cannot parse ~dependencies value in row %s", rowId) | ||
| } | ||
| dependencyGraph[rowId] = append(dependencyGraph[rowId], dependencies...) | ||
| dependencyGraph[rowId] = slices.Compact(dependencyGraph[rowId]) | ||
| dependencyResult = append(dependencyResult, dependencies...) | ||
| dependencyResult = slices.Compact(dependencyResult) | ||
| continue | ||
| } | ||
|
|
||
|
|
@@ -230,14 +234,14 @@ func buildQueryForRow(manager *PluginManager, primaryKeys PrimaryKeysResult, row | |
| addEdge := referenceRegex.MatchString(value) | ||
| // Don't add edges to and from the same row. | ||
| if addEdge && rowId != value { | ||
| dependencyGraph[rowId] = append(dependencyGraph[rowId], value) | ||
| dependencyGraph[rowId] = slices.Compact(dependencyGraph[rowId]) | ||
| dependencyResult = append(dependencyResult, value) | ||
| dependencyResult = slices.Compact(dependencyResult) | ||
| } | ||
|
|
||
| columns = append(columns, pq.QuoteIdentifier(column)) | ||
| valuePrepared, err := prepareValue(manager, value) | ||
| if err != nil { | ||
| return "", err | ||
| return "", dependencyResult, err | ||
| } | ||
| // Assume this column is the primary key. | ||
| if rowId == value && onConflictColumn == "" { | ||
|
|
@@ -249,7 +253,7 @@ func buildQueryForRow(manager *PluginManager, primaryKeys PrimaryKeysResult, row | |
| } | ||
|
|
||
| if onConflictColumn == "" { | ||
| return "", fmt.Errorf("cannot determine column to conflict with for: %s, saw %s", rowId, row) | ||
| return "", dependencyResult, fmt.Errorf("cannot determine column to conflict with for: %s, saw %s", rowId, row) | ||
| } | ||
|
|
||
| // Extremely smart query builder. | ||
|
|
@@ -263,11 +267,11 @@ func buildQueryForRow(manager *PluginManager, primaryKeys PrimaryKeysResult, row | |
| strings.Join(values, ","), | ||
| onConflictColumn, | ||
| strings.Join(setStatements, ","), | ||
| ), nil | ||
| ), dependencyResult, nil | ||
| } | ||
|
|
||
| // Returns a sorted array of queries to run based on a given ripoff file. | ||
| func buildQueriesForRipoff(manager *PluginManager, primaryKeys PrimaryKeysResult, totalRipoff RipoffFile) ([]string, error) { | ||
| func buildQueriesForRipoff(maxConcurrency int, manager *PluginManager, primaryKeys PrimaryKeysResult, totalRipoff RipoffFile) ([]string, error) { | ||
| dependencyGraph := map[string][]string{} | ||
| queries := map[string]string{} | ||
|
|
||
|
|
@@ -277,12 +281,37 @@ func buildQueriesForRipoff(manager *PluginManager, primaryKeys PrimaryKeysResult | |
| } | ||
|
|
||
| // Build queries. | ||
| var wg sync.WaitGroup | ||
| semaphore := make(chan struct{}, maxConcurrency) | ||
| type rowChanItem struct { | ||
| rowId string | ||
| query string | ||
| dependencies []string | ||
| err error | ||
|
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I considered
Owner
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (or, at least, better than the semaphore) |
||
| } | ||
| rowChan := make(chan rowChanItem, len(totalRipoff.Rows)) | ||
| for rowId, row := range totalRipoff.Rows { | ||
| query, err := buildQueryForRow(manager, primaryKeys, rowId, row, dependencyGraph) | ||
| if err != nil { | ||
| return []string{}, err | ||
| semaphore <- struct{}{} | ||
| wg.Add(1) | ||
| go func(rowId string, row Row) { | ||
| defer wg.Done() | ||
| defer func() { <-semaphore }() | ||
| query, dependencies, err := buildQueryForRow(manager, primaryKeys, rowId, row) | ||
| rowChan <- rowChanItem{rowId, query, dependencies, err} | ||
| }(rowId, row) | ||
| } | ||
|
|
||
| go func() { | ||
| wg.Wait() | ||
| close(rowChan) | ||
| }() | ||
|
|
||
| for rowItem := range rowChan { | ||
| if rowItem.err != nil { | ||
| return []string{}, rowItem.err | ||
| } | ||
| queries[rowId] = query | ||
| dependencyGraph[rowItem.rowId] = rowItem.dependencies | ||
| queries[rowItem.rowId] = rowItem.query | ||
| } | ||
|
|
||
| // Sort and reverse the graph, so queries are in order of least (hopefully none) to most dependencies. | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This didn't really do anything useful, IMO. Mostly I added it before I had process group IDs working