diff --git a/docs/plans/trt-1989-partitioning-prep.md b/docs/plans/trt-1989-partitioning-prep.md index e2f4e62e7..b47ef5a5a 100644 --- a/docs/plans/trt-1989-partitioning-prep.md +++ b/docs/plans/trt-1989-partitioning-prep.md @@ -83,9 +83,8 @@ When `prow_job_runs` is partitioned: `(id, prow_job_release, timestamp)`). 2. All tables with FKs into `prow_job_runs` must reference the full composite key — meaning they need the partition key columns too. -3. Tables with FKs **from** `prow_job_runs` to non-partitioned tables - (annotations, pull request join table) must either be co-partitioned - or have their FKs dropped. +3. Tables with FKs **to** `prow_job_runs` (annotations, pull request + join table) must either be co-partitioned or have their FKs dropped. This means `prow_job_runs`, `prow_job_run_annotations`, and `prow_job_run_prow_pull_requests` must all migrate to partitioned in a diff --git a/docs/plans/trt-1989-phase2-indexes.md b/docs/plans/trt-1989-phase2-indexes.md new file mode 100644 index 000000000..0a2f6d90f --- /dev/null +++ b/docs/plans/trt-1989-phase2-indexes.md @@ -0,0 +1,128 @@ +# TRT-1989 Phase 2: Composite Indexes on Denormalized Columns + +**Date:** 2026-05-19 +**JIRA:** [TRT-1989](https://redhat.atlassian.net/browse/TRT-1989) +**Depends on:** Phase 1 — column prep (`trt-1989-partitioning-prep.md`) + +## Purpose + +Phase 1 added denormalized `release` and `timestamp` columns to every table +that will be partitioned or holds a FK into a partitioned table. Phase 2 +adds composite indexes on those columns so the query planner can use them +immediately — before partitioning is applied. + +These indexes mirror the future partition key `(release, timestamp)`. Once +the tables are partitioned, each partition inherits a local copy of the +index, and the planner uses partition pruning instead. The indexes added +here serve two purposes: + +1. **Immediate benefit** — queries migrated in Phase 3 to filter on the + denormalized columns will use these indexes on the current + non-partitioned tables. +2. **Validation** — exercising the indexes under production workload + confirms the column data is correct before committing to partitioning. + +## Changes + +All changes are GORM index tags on model structs in +`pkg/db/models/prow.go`. GORM `AutoMigrate` creates the indexes +automatically on the next migration run. + +### prow_job_runs + +Added composite index `idx_prow_job_runs_release_timestamp` across +`ProwJobRelease` and `Timestamp`. + +Also added a standalone index on `ProwJobRunTest.ProwJobID` to support +variant queries that previously required joining through `prow_job_runs`. + +### prow_job_run_tests + +Added composite index `idx_prow_job_run_tests_release_timestamp` across +`ProwJobRunTimestamp` and `ProwJobRunRelease`. + +### prow_job_run_test_outputs + +Added composite index `idx_prow_job_run_test_outputs_release_timestamp` +across `ProwJobRunTestTimestamp` and `ProwJobRunTestRelease`. + +### prow_job_run_prow_pull_requests + +Added composite index +`idx_prow_job_run_prow_pull_requests_release_timestamp` across +`ProwJobRunRelease` and `ProwJobRunTimestamp`. + +### prow_job_run_annotations + +Added composite index `idx_prow_job_run_annotations_release_timestamp` +across `ProwJobRunRelease` and `ProwJobRunTimestamp`. + +## Explicit SQL + +GORM `AutoMigrate` will create these indexes on the next migration run. +If you prefer to create them manually — for example, using `CONCURRENTLY` +to avoid locking production tables — run these statements directly. + +`CREATE INDEX CONCURRENTLY` cannot run inside a transaction, so each +statement must be executed individually (not wrapped in `BEGIN`/`COMMIT`). + +### prow_job_runs + +```sql +CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_prow_job_runs_release_timestamp + ON prow_job_runs (prow_job_release, "timestamp"); +``` + +### prow_job_run_tests + +```sql +CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_prow_job_run_tests_prow_job_id + ON prow_job_run_tests (prow_job_id); + +CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_prow_job_run_tests_release_timestamp + ON prow_job_run_tests (prow_job_run_timestamp, prow_job_run_release); +``` + +### prow_job_run_test_outputs + +```sql +CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_prow_job_run_test_outputs_release_timestamp + ON prow_job_run_test_outputs (prow_job_run_test_timestamp, prow_job_run_test_release); +``` + +### prow_job_run_prow_pull_requests + +```sql +CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_prow_job_run_prow_pull_requests_release_timestamp + ON prow_job_run_prow_pull_requests (prow_job_run_release, prow_job_run_timestamp); +``` + +### prow_job_run_annotations + +```sql +CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_prow_job_run_annotations_release_timestamp + ON prow_job_run_annotations (prow_job_run_release, prow_job_run_timestamp); +``` + +## Notes + +- **Safe to create before deploying model updates.** GORM `AutoMigrate` + only adds — it never drops indexes, columns, or tables it doesn't + recognize. Indexes created manually will persist through any number of + `AutoMigrate` runs on the old model. Once the updated model with index + tags is deployed, `AutoMigrate` sees the indexes already exist and + skips them. There is no rollback risk. +- `CONCURRENTLY` avoids taking an exclusive lock on the table, allowing + reads and writes to continue during index creation. It is slower but + safe for production use. +- If the index already exists (e.g., GORM created it during a prior + migration), `IF NOT EXISTS` makes the statement a no-op. +- GORM `AutoMigrate` does **not** use `CONCURRENTLY` — it takes a brief + lock. On large tables this can block writes for the duration of the + index build. For production deployments, prefer creating the indexes + manually with the SQL above ahead of the code deploy, so that + `AutoMigrate` finds them already in place. +- Column order in the index matches the expected query pattern: most + queries filter on release first (equality), then timestamp (range). + The `prow_job_run_tests` index leads with timestamp because the + materialized view queries filter primarily on timestamp ranges. diff --git a/docs/plans/trt-1989-phase3-query-optimization.md b/docs/plans/trt-1989-phase3-query-optimization.md new file mode 100644 index 000000000..3884ff557 --- /dev/null +++ b/docs/plans/trt-1989-phase3-query-optimization.md @@ -0,0 +1,172 @@ +# TRT-1989 Phase 3: Query Optimization Using Denormalized Columns + +**Date:** 2026-05-19 +**JIRA:** [TRT-1989](https://redhat.atlassian.net/browse/TRT-1989) +**Depends on:** Phase 1 (column prep), Phase 2 (indexes) + +## Purpose + +Phase 1 added denormalized `release` and `timestamp` columns to child +tables (`prow_job_run_tests`, `prow_job_run_test_outputs`, +`prow_job_run_prow_pull_requests`, `prow_job_run_annotations`). Phase 2 +added composite indexes on those columns. + +Nearly every significant query in sippy filters on +`prow_job_runs.timestamp` and/or `prow_jobs.release` via joins. Once these +tables are partitioned, those join-based filters **won't help the planner +prune child table partitions** — the planner needs WHERE clauses on each +partitioned table's own partition key columns. + +This phase adds filters on the denormalized columns and drops joins where +all referenced columns have local replacements. This is safe to ship +before partitioning — the extra WHERE clauses let the planner use the +composite indexes from Phase 2. After partitioning, they become the +primary mechanism for partition pruning. + +## Guiding Principles + +1. **Add filters first, then replace when validated** — keep existing + join-based filters alongside new local filters during rollout. + After local denormalized columns are validated, replace old filters + and drop no-longer-needed joins where safe. + +2. **Drop joins only when safe** — a join can be dropped only if *every* + column it provides (in SELECT, WHERE, GROUP BY, ORDER BY, FILTER) has + a local replacement. + +3. **Materialized views use `|||TIMENOW|||` templates** — filters added + to mat views must use the same template tokens, not `$1`-style params. + +4. **SQL functions use `$N` params** — new WHERE clauses reuse existing + params from the function signature. + +5. **GORM queries use `?` placeholders** — pass the same Go variables + already available in the function scope. + +## Changes by Query + +### Group A: Queries starting from `prow_job_run_tests` + +#### A1. `prowJobFailedTestsMatView` — `pkg/db/views.go` + +Rewritten to start from `prow_job_run_tests`. Replaced +`prow_job_runs."timestamp"` with `pjrt.prow_job_run_timestamp` and +`prow_job_runs.prow_job_id` with `pjrt.prow_job_id`. **Dropped JOIN +`prow_job_runs`**. + +#### A2. `testAnalysisByJobMatView` — `pkg/db/views.go` + +Replaced all `prow_job_runs."timestamp"` references with +`prow_job_run_tests.prow_job_run_timestamp`. Replaced `prow_jobs.release` +with `prow_job_run_tests.prow_job_run_release`. **Dropped JOIN +`prow_job_runs`**. Rewired JOIN `prow_jobs` via +`prow_job_run_tests.prow_job_id`. + +#### A3. `testReportMatView` — `pkg/db/views.go` + +Replaced all `prow_job_runs."timestamp"` in WHERE and FILTER clauses with +`prow_job_run_tests.prow_job_run_timestamp`. Replaced `prow_jobs.release` +with `prow_job_run_tests.prow_job_run_release`. **Dropped JOIN +`prow_job_runs`**. Rewired JOIN `prow_jobs` via +`prow_job_run_tests.prow_job_id`. JOIN `prow_jobs` kept for +`prow_jobs.variants`. + +#### A4. `test_results()` function — `pkg/db/functions.go` + +Added `WHERE prow_job_run_tests.prow_job_run_timestamp BETWEEN $1 AND $3` +to limit the scan. Replaced `timestamp` in all CASE expressions with +`prow_job_run_tests.prow_job_run_timestamp`. Replaced `prow_jobs.release` +with `prow_job_run_tests.prow_job_run_release`. **Dropped JOINs +`prow_job_runs` and `prow_jobs`**. + +#### A5. `ProwJobHistoricalTestCounts` — `pkg/db/query/job_queries.go` + +Replaced `prow_job_runs.prow_job_id` with +`prow_job_run_tests.prow_job_id` and `prow_job_runs.timestamp` with +`prow_job_run_tests.prow_job_run_timestamp`. **Dropped JOIN +`prow_job_runs`**. + +#### A6. `GetRecentTestFailures` — `pkg/api/recent_test_failures.go` + +Added redundant local filters (`prow_job_run_tests.prow_job_run_timestamp` +and `prow_job_run_tests.prow_job_run_release`) to all four queries: +main query, NOT EXISTS subquery, last-pass lookback, and failure outputs. +Joins kept — `prow_job_runs` still needed for `timestamp` in SELECT and +`url`; `prow_jobs` still needed for `name`. + +#### A7. `testStatusQuery` (CR) — `pkg/api/componentreadiness/.../provider.go` + +Added `pjrt.prow_job_run_release = ?` and `pjrt.prow_job_run_timestamp` +range filters to the CTE WHERE clause. Joins kept — `prow_job_runs` +needed for `labels`, `prow_jobs` needed for variant lookup. + +#### A8. `testDetailQuery` (CR) — `pkg/api/componentreadiness/.../provider.go` + +Same pattern as A7 — added local release and timestamp filters. Joins +kept — `pjr.url`, `pjr.timestamp`, `pjr.labels`, `pj.name`, `pj.id` +still needed in SELECT. + +#### A9. `payloadTestFailuresMatView` — `pkg/db/views.go` + +Added `pjrt.prow_job_run_timestamp > (|||TIMENOW||| - '14 days'::interval)` +to WHERE. Joins kept — `release_tags`, `release_job_runs`, `prow_jobs`, +`prow_job_runs` still needed for other columns. + +### Group B: Queries starting from `prow_job_run_test_outputs` + +#### B1. `TestOutputs` — `pkg/db/query/test_queries.go` + +Added `prow_job_run_test_outputs.prow_job_run_test_timestamp`, +`prow_job_run_test_outputs.prow_job_run_test_release`, +`prow_job_run_tests.prow_job_run_timestamp`, and +`prow_job_run_tests.prow_job_run_release` filters. Joins kept — +`prow_job_runs` for URL, `prow_jobs` for variants. + +#### B2. `TestDurations` — `pkg/db/query/test_queries.go` + +Replaced `prow_job_runs.timestamp` filter with +`prow_job_run_tests.prow_job_run_timestamp`. Replaced ambiguous +`"timestamp"` in SELECT/GROUP BY/ORDER BY with explicit +`prow_job_run_tests.prow_job_run_timestamp`. Replaced `prow_jobs.release` +with `prow_job_run_tests.prow_job_run_release`. **Dropped JOIN +`prow_job_runs`**. JOIN `prow_jobs` rewired via +`prow_job_run_tests.prow_job_id` (needed for variants). + +### Group C: Queries on `prow_job_runs` directly + +#### C1. `BuildClusterHealth` — `pkg/db/query/build_clusters.go` + +Added `WHERE prow_job_runs.timestamp BETWEEN @start AND @end` so the +planner can use the timestamp index to limit the scan. The `@start` and +`@end` params already exist in the function signature. + +#### C2-C4. No changes + +- `BuildClusterAnalysis` — already has timestamp in WHERE, cross-release +- `HasBuildClusterData` — existence check, timestamp bound would be wrong +- `ProwJobRunIDs` — simple lookup, already indexed + +### Group D: SQL functions with PR join tables + +#### D1. `job_results()` — `pkg/db/functions.go` + +- **`repo_org_jobs` CTE**: Added `WHERE prow_job_runs.prow_job_release = $1` +- **`merged_prs` CTE**: Added `AND prow_job_runs.timestamp BETWEEN $2 AND $4` +- **`results` CTE**: Added `WHERE prow_job_runs.prow_job_release = $1` +- **`last_pass` CTE**: No change — intentionally cross-release + +#### D2. `jobRunsReportMatView` — No change + +The CTEs materialize all data. Adding filters would require +parameterizing the mat view. Deferred to a future change. + +## Joins Dropped Summary + +| Query | Join dropped | Columns replaced | +|-------|-------------|-----------------| +| `prowJobFailedTestsMatView` | `prow_job_runs` | `timestamp` → `pjrt.prow_job_run_timestamp`, `prow_job_id` → `pjrt.prow_job_id` | +| `testAnalysisByJobMatView` | `prow_job_runs` | `timestamp` → local, `prow_job_id` → local; `prow_jobs` rewired via `prow_job_run_tests.prow_job_id` | +| `testReportMatView` | `prow_job_runs` | `timestamp` → local; `prow_jobs` rewired via `prow_job_run_tests.prow_job_id` | +| `test_results()` | `prow_job_runs` + `prow_jobs` | `timestamp` → local, `release` → local | +| `ProwJobHistoricalTestCounts` | `prow_job_runs` | `prow_job_id` → local, `timestamp` → local | +| `TestDurations` | `prow_job_runs` | `timestamp` → local; `prow_jobs` rewired via `prow_job_run_tests.prow_job_id` | diff --git a/pkg/api/componentreadiness/dataprovider/postgres/provider.go b/pkg/api/componentreadiness/dataprovider/postgres/provider.go index a830f7d09..206f0dac4 100644 --- a/pkg/api/componentreadiness/dataprovider/postgres/provider.go +++ b/pkg/api/componentreadiness/dataprovider/postgres/provider.go @@ -310,6 +310,9 @@ WITH deduped AS ( JOIN prow_jobs pj ON pj.id = pjr.prow_job_id WHERE pj.release = ? AND pjr.timestamp >= ? AND pjr.timestamp < ? + AND pjr.prow_job_release = ? + AND pjrt.prow_job_run_release = ? + AND pjrt.prow_job_run_timestamp >= ? AND pjrt.prow_job_run_timestamp < ? AND pjrt.deleted_at IS NULL AND pjr.deleted_at IS NULL AND pj.deleted_at IS NULL AND (pjr.labels IS NULL OR NOT pjr.labels @> ARRAY['InfraFailure']) ORDER BY pjrt.prow_job_run_id, pjrt.test_id, pjrt.suite_id, @@ -340,7 +343,7 @@ func (p *PostgresProvider) queryTestStatus(ctx context.Context, release string, dbGroupBy map[string]bool) (map[string]crstatus.TestStatus, []error) { var rows []testStatusRow - if err := p.dbc.DB.WithContext(ctx).Raw(testStatusQuery, release, start, end).Scan(&rows).Error; err != nil { + if err := p.dbc.DB.WithContext(ctx).Raw(testStatusQuery, release, start, end, release, release, start, end).Scan(&rows).Error; err != nil { return nil, []error{fmt.Errorf("querying test status: %w", err)} } @@ -517,6 +520,9 @@ JOIN test_ownerships tow ON tow.test_id = pjrt.test_id AND (tow.suite_id = pjrt.suite_id OR (tow.suite_id IS NULL AND pjrt.suite_id IS NULL)) WHERE pj.release = ? AND pjr.timestamp >= ? AND pjr.timestamp < ? + AND pjr.prow_job_release = ? + AND pjrt.prow_job_run_release = ? + AND pjrt.prow_job_run_timestamp >= ? AND pjrt.prow_job_run_timestamp < ? AND pjrt.deleted_at IS NULL AND pjr.deleted_at IS NULL AND pj.deleted_at IS NULL AND tow.staff_approved_obsolete = false AND (pjr.labels IS NULL OR NOT pjr.labels @> ARRAY['InfraFailure']) @@ -528,7 +534,7 @@ func (p *PostgresProvider) queryTestDetails(ctx context.Context, release string, includeVariants map[string][]string) (map[string][]crstatus.TestJobRunRows, []error) { var rows []testDetailRow - if err := p.dbc.DB.WithContext(ctx).Raw(testDetailQuery, release, start, end).Scan(&rows).Error; err != nil { + if err := p.dbc.DB.WithContext(ctx).Raw(testDetailQuery, release, start, end, release, release, start, end).Scan(&rows).Error; err != nil { return nil, []error{fmt.Errorf("querying test details: %w", err)} } @@ -680,11 +686,12 @@ func (p *PostgresProvider) QueryJobRuns(ctx context.Context, reqOptions reqopts. JOIN prow_job_runs pjr ON pjr.prow_job_id = pj.id WHERE pj.release = ? AND pjr.timestamp >= ? AND pjr.timestamp < ? + AND pjr.prow_job_release = ? AND pj.deleted_at IS NULL AND pjr.deleted_at IS NULL AND (pj.name LIKE 'periodic-%%' OR pj.name LIKE 'release-%%' OR pj.name LIKE 'aggregator-%%') GROUP BY pj.name ORDER BY pj.name - `, release, start, end).Scan(&rows).Error + `, release, start, end, release).Scan(&rows).Error if err != nil { return nil, fmt.Errorf("querying job runs: %w", err) } diff --git a/pkg/api/job_analysis.go b/pkg/api/job_analysis.go index a33fa334a..9176e8705 100644 --- a/pkg/api/job_analysis.go +++ b/pkg/api/job_analysis.go @@ -62,6 +62,8 @@ func PrintJobAnalysisJSONFromDB( sum(case when overall_result = 'A' then 1 else 0 end) AS "A"`, period). Joins("INNER JOIN prow_jobs ON prow_job_runs.prow_job_id = prow_jobs.id"). Where("prow_jobs.id IN ?", jobs). + Where("prow_job_runs.prow_job_release = ?", release). + Where("prow_job_runs.timestamp BETWEEN ? AND ?", start, end). Group("period") sumResults.Scan(&sums) diff --git a/pkg/api/job_runs.go b/pkg/api/job_runs.go index bac8d07b5..7764dc8ce 100644 --- a/pkg/api/job_runs.go +++ b/pkg/api/job_runs.go @@ -144,7 +144,11 @@ func JobsRunsReportFromDB(dbc *db.DB, filterOpts *filter.FilterOptions, release ids[i] = jr.ID } var annotations []models.ProwJobRunAnnotation - if err := dbc.DB.Where("prow_job_run_id IN ?", ids).Find(&annotations).Error; err != nil { + annotationQuery := dbc.DB.Where("prow_job_run_id IN ?", ids) + if len(release) > 0 { + annotationQuery = annotationQuery.Where("prow_job_run_release = ?", release) + } + if err := annotationQuery.Find(&annotations).Error; err != nil { return nil, err } annotationsByRun := make(map[string]apitype.AnnotationMap) diff --git a/pkg/api/recent_test_failures.go b/pkg/api/recent_test_failures.go index ce4ff0f83..c6b133ee3 100644 --- a/pkg/api/recent_test_failures.go +++ b/pkg/api/recent_test_failures.go @@ -30,8 +30,11 @@ func GetRecentTestFailures( Joins("LEFT JOIN suites ON suites.id = prow_job_run_tests.suite_id"). Joins("LEFT JOIN test_ownerships ON test_ownerships.test_id = tests.id"). Where("prow_job_runs.timestamp >= ? AND prow_job_runs.timestamp < ?", periodStart, reportEnd). + Where("prow_job_runs.prow_job_release = ?", release). + Where("prow_job_run_tests.prow_job_run_timestamp >= ? AND prow_job_run_tests.prow_job_run_timestamp < ?", periodStart, reportEnd). Where("prow_job_run_tests.status = ?", int(sippyprocessingv1.TestStatusFailure)). Where("prow_jobs.release = ?", release). + Where("prow_job_run_tests.prow_job_run_release = ?", release). Where("prow_job_run_tests.deleted_at IS NULL"). Where("prow_job_runs.deleted_at IS NULL"). Where("prow_jobs.deleted_at IS NULL"). @@ -56,11 +59,14 @@ func GetRecentTestFailures( WHERE prev_t.test_id = tests.id AND prev_t.status = ? AND prev_r.timestamp >= ? AND prev_r.timestamp < ? + AND prev_r.prow_job_release = ? + AND prev_t.prow_job_run_timestamp >= ? AND prev_t.prow_job_run_timestamp < ? AND prev_j.release = ? + AND prev_t.prow_job_run_release = ? AND prev_t.deleted_at IS NULL AND prev_r.deleted_at IS NULL AND prev_j.deleted_at IS NULL - )`, int(sippyprocessingv1.TestStatusFailure), prevStart, periodStart, release) + )`, int(sippyprocessingv1.TestStatusFailure), prevStart, periodStart, release, prevStart, periodStart, release, release) } // Wrap the aggregated query as a subquery so we can apply filters/sort/pagination @@ -113,7 +119,10 @@ func GetRecentTestFailures( Where("prow_job_run_tests.test_id IN ?", testIDs). Where("prow_job_run_tests.status = ?", int(sippyprocessingv1.TestStatusSuccess)). Where("prow_job_runs.timestamp >= ?", lastPassLookback). + Where("prow_job_runs.prow_job_release = ?", release). + Where("prow_job_run_tests.prow_job_run_timestamp >= ?", lastPassLookback). Where("prow_jobs.release = ?", release). + Where("prow_job_run_tests.prow_job_run_release = ?", release). Where("prow_job_run_tests.deleted_at IS NULL"). Where("prow_job_runs.deleted_at IS NULL"). Where("prow_jobs.deleted_at IS NULL"). @@ -148,7 +157,10 @@ func GetRecentTestFailures( Where("prow_job_run_tests.test_id IN ?", testIDs). Where("prow_job_run_tests.status = ?", int(sippyprocessingv1.TestStatusFailure)). Where("prow_job_runs.timestamp >= ? AND prow_job_runs.timestamp < ?", periodStart, reportEnd). + Where("prow_job_runs.prow_job_release = ?", release). + Where("prow_job_run_tests.prow_job_run_timestamp >= ? AND prow_job_run_tests.prow_job_run_timestamp < ?", periodStart, reportEnd). Where("prow_jobs.release = ?", release). + Where("prow_job_run_tests.prow_job_run_release = ?", release). Where("prow_job_run_tests.deleted_at IS NULL"). Where("prow_job_runs.deleted_at IS NULL"). Where("prow_jobs.deleted_at IS NULL"). diff --git a/pkg/api/tests.go b/pkg/api/tests.go index 3ea06f4d8..a5efd87bc 100644 --- a/pkg/api/tests.go +++ b/pkg/api/tests.go @@ -396,7 +396,7 @@ func GetJobRunTestsCountByLookback(dbc *db.DB, lookbackDays int) (int64, int64, err := dbc.DB.Table("prow_job_run_tests"). Select("count(distinct prow_job_run_id) as job_runs_count, count(distinct test_id) as test_ids_count"). - Where("created_at > ?", truncatedTime). + Where("prow_job_run_timestamp > ?", truncatedTime). Scan(&queryCounts). Error diff --git a/pkg/db/functions.go b/pkg/db/functions.go index 0c99bbcf6..b3540b483 100644 --- a/pkg/db/functions.go +++ b/pkg/db/functions.go @@ -39,20 +39,19 @@ CREATE FUNCTION public.test_results(start timestamp without time zone, boundary WITH results AS ( SELECT tests.id AS id, - coalesce(count(case when status = 1 AND timestamp BETWEEN $1 AND $2 then 1 end), 0) AS previous_successes, - coalesce(count(case when status = 13 AND timestamp BETWEEN $1 AND $2 then 1 end), 0) AS previous_flakes, - coalesce(count(case when status = 12 AND timestamp BETWEEN $1 AND $2 then 1 end), 0) AS previous_failures, - coalesce(count(case when timestamp BETWEEN $1 AND $2 then 1 end), 0) as previous_runs, - coalesce(count(case when status = 1 AND timestamp BETWEEN $2 AND $3 then 1 end), 0) AS current_successes, - coalesce(count(case when status = 13 AND timestamp BETWEEN $2 AND $3 then 1 end), 0) AS current_flakes, - coalesce(count(case when status = 12 AND timestamp BETWEEN $2 AND $3 then 1 end), 0) AS current_failures, - coalesce(count(case when timestamp BETWEEN $2 AND $3 then 1 end), 0) as current_runs, - prow_jobs.release + coalesce(count(case when status = 1 AND prow_job_run_tests.prow_job_run_timestamp BETWEEN $1 AND $2 then 1 end), 0) AS previous_successes, + coalesce(count(case when status = 13 AND prow_job_run_tests.prow_job_run_timestamp BETWEEN $1 AND $2 then 1 end), 0) AS previous_flakes, + coalesce(count(case when status = 12 AND prow_job_run_tests.prow_job_run_timestamp BETWEEN $1 AND $2 then 1 end), 0) AS previous_failures, + coalesce(count(case when prow_job_run_tests.prow_job_run_timestamp BETWEEN $1 AND $2 then 1 end), 0) as previous_runs, + coalesce(count(case when status = 1 AND prow_job_run_tests.prow_job_run_timestamp BETWEEN $2 AND $3 then 1 end), 0) AS current_successes, + coalesce(count(case when status = 13 AND prow_job_run_tests.prow_job_run_timestamp BETWEEN $2 AND $3 then 1 end), 0) AS current_flakes, + coalesce(count(case when status = 12 AND prow_job_run_tests.prow_job_run_timestamp BETWEEN $2 AND $3 then 1 end), 0) AS current_failures, + coalesce(count(case when prow_job_run_tests.prow_job_run_timestamp BETWEEN $2 AND $3 then 1 end), 0) as current_runs, + prow_job_run_tests.prow_job_run_release AS release FROM prow_job_run_tests JOIN tests ON tests.id = prow_job_run_tests.test_id - JOIN prow_job_runs ON prow_job_runs.id = prow_job_run_tests.prow_job_run_id - JOIN prow_jobs ON prow_job_runs.prow_job_id = prow_jobs.id -GROUP BY tests.id, prow_jobs.release +WHERE prow_job_run_tests.prow_job_run_timestamp BETWEEN $1 AND $3 +GROUP BY tests.id, prow_job_run_tests.prow_job_run_release ) SELECT tests.id, tests.name, @@ -85,6 +84,10 @@ WITH repo_org_jobs AS ( INNER JOIN prow_job_run_prow_pull_requests on prow_job_run_prow_pull_requests.prow_job_run_id = prow_job_runs.id INNER JOIN prow_pull_requests on prow_pull_requests.id = prow_job_run_prow_pull_requests.prow_pull_request_id INNER JOIN prow_jobs ON prow_job_runs.prow_job_id = prow_jobs.id + WHERE prow_job_runs.prow_job_release = $1 + AND prow_job_runs.timestamp BETWEEN $2 AND $4 + AND prow_job_run_prow_pull_requests.prow_job_run_release = $1 + AND prow_job_run_prow_pull_requests.prow_job_run_timestamp BETWEEN $2 AND $4 GROUP BY prow_pull_requests.org, prow_pull_requests.repo, prow_jobs.id ), merged_prs AS @@ -94,6 +97,10 @@ merged_prs AS INNER JOIN prow_pull_requests on prow_pull_requests.id = prow_job_run_prow_pull_requests.prow_pull_request_id INNER JOIN prow_jobs ON prow_job_runs.prow_job_id = prow_jobs.id WHERE prow_pull_requests.merged_at BETWEEN $2::timestamp AND $4::timestamp + AND prow_job_runs.timestamp BETWEEN $2 AND $4 + AND prow_job_runs.prow_job_release = $1 + AND prow_job_run_prow_pull_requests.prow_job_run_release = $1 + AND prow_job_run_prow_pull_requests.prow_job_run_timestamp BETWEEN $2 AND $4 AND prow_job_runs.overall_result != 'S' AND prow_job_runs.overall_result != 'A' GROUP BY prow_jobs.id, prow_pull_requests.id, prow_pull_requests.link), @@ -117,6 +124,7 @@ results AS ( AND timestamp BETWEEN $2 AND $4 LEFT JOIN bug_jobs on prow_jobs.id = bug_jobs.prow_job_id LEFT JOIN bugs on bugs.id = bug_jobs.bug_id AND lower(bugs.status) NOT IN ('verified', 'modified', 'closed', 'on_qa') + WHERE prow_job_runs.prow_job_release = $1 group by prow_jobs.name, prow_jobs.variants ), last_pass AS ( diff --git a/pkg/db/models/prow.go b/pkg/db/models/prow.go index 5d9cbb650..db3d607de 100644 --- a/pkg/db/models/prow.go +++ b/pkg/db/models/prow.go @@ -40,7 +40,7 @@ type ProwJobRun struct { ProwJob ProwJob ProwJobID uint `gorm:"index"` // Used for partitioning - ProwJobRelease string + ProwJobRelease string `gorm:"index:idx_prow_job_runs_release_timestamp"` // Cluster is the cluster where the prow job was run. Cluster string @@ -57,7 +57,7 @@ type ProwJobRun struct { // KnownFailure is true if the job run failed, but we found a bug that is likely related already filed. KnownFailure bool Succeeded bool - Timestamp time.Time `gorm:"index;index:idx_prow_job_runs_timestamp_date,expression:DATE(timestamp AT TIME ZONE 'UTC')"` + Timestamp time.Time `gorm:"index;index:idx_prow_job_runs_timestamp_date,expression:DATE(timestamp AT TIME ZONE 'UTC');index:idx_prow_job_runs_release_timestamp"` Duration time.Duration OverallResult v1.JobOverallResult `gorm:"index"` // Labels stores the IDs of labels applied to this job run @@ -72,10 +72,10 @@ type ProwJobRun struct { // between ProwJobRun and ProwPullRequest. Release and timestamp are denormalized from // ProwJobRun to support future partitioning. type ProwJobRunProwPullRequest struct { - ProwJobRunID uint `gorm:"primaryKey"` - ProwPullRequestID uint `gorm:"primaryKey"` - ProwJobRunRelease string - ProwJobRunTimestamp time.Time + ProwJobRunID uint `gorm:"primaryKey"` + ProwPullRequestID uint `gorm:"primaryKey"` + ProwJobRunRelease string `gorm:"index:idx_prow_job_run_prow_pull_requests_release_timestamp"` + ProwJobRunTimestamp time.Time `gorm:"index:idx_prow_job_run_prow_pull_requests_release_timestamp"` } // ProwJobRunAnnotation stores a single key-value annotation for a ProwJobRun. @@ -84,8 +84,8 @@ type ProwJobRunAnnotation struct { ProwJobRunID uint `gorm:"index;uniqueIndex:idx_prow_job_run_annotations_key"` Key string `gorm:"uniqueIndex:idx_prow_job_run_annotations_key"` Value string - ProwJobRunRelease string - ProwJobRunTimestamp time.Time + ProwJobRunRelease string `gorm:"index:idx_prow_job_run_annotations_release_timestamp"` + ProwJobRunTimestamp time.Time `gorm:"index:idx_prow_job_run_annotations_release_timestamp"` } type Test struct { @@ -103,12 +103,12 @@ type ProwJobRunTest struct { ProwJobRun ProwJobRun // used for variants // skips joining on ProwJobRunID just to get ProwJobID - ProwJobID uint + ProwJobID uint `gorm:"index"` // used for partitioning - ProwJobRunTimestamp time.Time + ProwJobRunTimestamp time.Time `gorm:"index:idx_prow_job_run_tests_release_timestamp"` // used for partitioning - ProwJobRunRelease string - TestID uint `gorm:"index;index:idx_prow_job_run_tests_test_id_status"` + ProwJobRunRelease string `gorm:"index:idx_prow_job_run_tests_release_timestamp"` + TestID uint `gorm:"index;index:idx_prow_job_run_tests_test_id_status"` Test Test // SuiteID may be nil if no suite name could be parsed from the testgrid test name. SuiteID *uint `gorm:"index"` @@ -129,9 +129,9 @@ type ProwJobRunTestOutput struct { // Output stores the output of a ProwJobRunTest. Output string // used for partitioning - ProwJobRunTestTimestamp time.Time + ProwJobRunTestTimestamp time.Time `gorm:"index:idx_prow_job_run_test_outputs_release_timestamp"` // used for partitioning - ProwJobRunTestRelease string + ProwJobRunTestRelease string `gorm:"index:idx_prow_job_run_test_outputs_release_timestamp"` } // Suite defines a junit testsuite. Used to differentiate the same test being run in different suites in ProwJobRunTest. diff --git a/pkg/db/query/build_clusters.go b/pkg/db/query/build_clusters.go index 7edec8efe..9796c76bd 100644 --- a/pkg/db/query/build_clusters.go +++ b/pkg/db/query/build_clusters.go @@ -32,6 +32,7 @@ func BuildClusterHealth(dbc *db.DB, start, boundary, end time.Time) ([]models.Bu Joins("JOIN prow_jobs ON prow_job_runs.prow_job_id = prow_jobs.id"). Where(`cluster != '' AND cluster IS NOT NULL`). Where("prow_jobs.kind = 'periodic'"). + Where("prow_job_runs.timestamp BETWEEN @start AND @end", sql.Named("start", start), sql.Named("end", end)). Group("cluster") q := dbc.DB.Table("(?) as results", rawResults). diff --git a/pkg/db/query/job_queries.go b/pkg/db/query/job_queries.go index a07959a8b..5afdb5107 100644 --- a/pkg/db/query/job_queries.go +++ b/pkg/db/query/job_queries.go @@ -71,11 +71,11 @@ func ProwJobRunIDs(dbc *db.DB, prowJobID uint) ([]uint, error) { func ProwJobHistoricalTestCounts(dbc *db.DB, prowJobID uint) (int, error) { var historicalProwJobRunTestCount float64 - q := dbc.DB.Raw(`SELECT avg(count) - FROM (SELECT count(*) - FROM prow_job_run_tests INNER JOIN prow_job_runs ON prow_job_runs.id = prow_job_run_tests.prow_job_run_id - WHERE prow_job_runs.prow_job_id = ? - AND prow_job_runs.timestamp >= CURRENT_DATE - interval '14' day + q := dbc.DB.Raw(`SELECT avg(count) + FROM (SELECT count(*) + FROM prow_job_run_tests + WHERE prow_job_run_tests.prow_job_id = ? + AND prow_job_run_tests.prow_job_run_timestamp >= CURRENT_DATE - interval '14' day GROUP BY prow_job_run_id) t`, prowJobID) if q.Error != nil { @@ -119,11 +119,12 @@ WITH results AS ( coalesce(count(case when succeeded = true AND timestamp BETWEEN @boundary AND @end then 1 end), 0) as current_passes, coalesce(count(case when succeeded = false AND timestamp BETWEEN @boundary AND @end then 1 end), 0) as current_fails, coalesce(count(case when timestamp BETWEEN @boundary AND @end then 1 end), 0) as current_runs - FROM prow_job_runs - JOIN prow_jobs - ON prow_jobs.id = prow_job_runs.prow_job_id + FROM prow_job_runs + JOIN prow_jobs + ON prow_jobs.id = prow_job_runs.prow_job_id AND prow_jobs.release = @release - AND timestamp BETWEEN @start AND @end + AND timestamp BETWEEN @start AND @end + WHERE prow_job_runs.prow_job_release = @release group by variant ) SELECT variant as name, diff --git a/pkg/db/query/pull_request_queries.go b/pkg/db/query/pull_request_queries.go index e1eb205d6..a5b51f4af 100644 --- a/pkg/db/query/pull_request_queries.go +++ b/pkg/db/query/pull_request_queries.go @@ -28,6 +28,8 @@ func PullRequestReport(dbc *db.DB, filterOpts *filter.FilterOptions, release str Joins("INNER JOIN prow_job_runs on prow_job_run_prow_pull_requests.prow_job_run_id = prow_job_runs.id"). Joins("INNER JOIN prow_jobs on prow_job_runs.prow_job_id = prow_jobs.id"). Where("prow_jobs.release = ?", release). + Where("prow_job_runs.prow_job_release = ?", release). + Where("prow_job_run_prow_pull_requests.prow_job_run_release = ?", release). Select("DISTINCT ON(prow_pull_requests.link) prow_pull_requests.*, ci.release_tag AS first_ci_payload, ci.phase AS first_ci_payload_phase, ci.release as first_ci_payload_release, nightly.release_tag as first_nightly_payload, nightly.phase as first_nightly_payload_phase, nightly.release as first_nightly_payload_release") results := make([]api.PullRequest, 0) diff --git a/pkg/db/query/repository_queries.go b/pkg/db/query/repository_queries.go index 0b0b4a12b..3faccdc58 100644 --- a/pkg/db/query/repository_queries.go +++ b/pkg/db/query/repository_queries.go @@ -26,6 +26,8 @@ func RepositoryReport(dbc *db.DB, filterOpts *filter.FilterOptions, release stri Joins("LEFT JOIN (?) revert_count ON revert_count.org = prow_pull_requests.org AND revert_count.repo = prow_pull_requests.repo", revertCount). Joins("LEFT JOIN (?) premerge_failures ON premerge_failures.prow_job_ID = prow_jobs.id", averageByJob). Where("prow_jobs.release = ?", release). + Where("prow_job_runs.prow_job_release = ?", release). + Where("prow_job_run_prow_pull_requests.prow_job_run_release = ?", release). Group("prow_pull_requests.org, prow_pull_requests.repo"). Select("ROW_NUMBER() OVER() as id, prow_pull_requests.org, prow_pull_requests.repo, max(revert_count) as revert_count, coalesce(max(average_premerge_job_failures), 0) as worst_premerge_job_failures, count(distinct(prow_jobs.id)) as job_count") diff --git a/pkg/db/query/test_queries.go b/pkg/db/query/test_queries.go index 83c1acd89..cd9fe640d 100644 --- a/pkg/db/query/test_queries.go +++ b/pkg/db/query/test_queries.go @@ -283,8 +283,12 @@ func TestOutputs(dbc *db.DB, release, test string, includedVariants, excludedVar Joins("JOIN prow_job_runs ON prow_job_run_tests.prow_job_run_id = prow_job_runs.id"). Joins("JOIN prow_jobs ON prow_job_runs.prow_job_id = prow_jobs.id"). Where("prow_job_runs.timestamp > current_date - interval '14' day"). - Where("prow_job_run_tests.test_id = (?)", testQuery). - Where("prow_jobs.release = ?", release) + Where("prow_job_runs.prow_job_release = ?", release). + Where("prow_job_run_test_outputs.prow_job_run_test_timestamp > current_date - interval '14' day"). + Where("prow_job_run_test_outputs.prow_job_run_test_release = ?", release). + Where("prow_job_run_tests.prow_job_run_timestamp > current_date - interval '14' day"). + Where("prow_job_run_tests.prow_job_run_release = ?", release). + Where("prow_job_run_tests.test_id = (?)", testQuery) for _, variant := range includedVariants { q = q.Where("? = any(prow_jobs.variants)", variant) @@ -314,11 +318,10 @@ func TestDurations(dbc *db.DB, release, test string, includedVariants, excludedV testQuery := dbc.DB.Table("tests").Where("name = ?", test).Select("id") q := dbc.DB.Table("prow_job_run_tests"). Joins("JOIN tests ON prow_job_run_tests.test_id = tests.id"). - Joins("JOIN prow_job_runs ON prow_job_run_tests.prow_job_run_id = prow_job_runs.id"). - Joins("JOIN prow_jobs ON prow_job_runs.prow_job_id = prow_jobs.id"). - Where("prow_job_runs.timestamp > current_date - interval '14' day"). + Joins("JOIN prow_jobs ON prow_jobs.id = prow_job_run_tests.prow_job_id"). + Where("prow_job_run_tests.prow_job_run_timestamp > current_date - interval '14' day"). Where("prow_job_run_tests.test_id = (?)", testQuery). - Where("prow_jobs.release = ?", release) + Where("prow_job_run_tests.prow_job_run_release = ?", release) for _, variant := range includedVariants { q = q.Where("? = any(prow_jobs.variants)", variant) @@ -330,10 +333,10 @@ func TestDurations(dbc *db.DB, release, test string, includedVariants, excludedV res := q. Select(` - date("timestamp" AT TIME ZONE 'UTC'::text) as period, + date(prow_job_run_tests.prow_job_run_timestamp AT TIME ZONE 'UTC'::text) as period, AVG(prow_job_run_tests.duration) as average_duration`). - Group(`date("timestamp" AT TIME ZONE 'UTC'::text)`). - Order(`date("timestamp" AT TIME ZONE 'UTC'::text)`). + Group(`date(prow_job_run_tests.prow_job_run_timestamp AT TIME ZONE 'UTC'::text)`). + Order(`date(prow_job_run_tests.prow_job_run_timestamp AT TIME ZONE 'UTC'::text)`). Scan(&rows) for _, row := range rows { diff --git a/pkg/db/views.go b/pkg/db/views.go index 84b906a9a..a15f03593 100644 --- a/pkg/db/views.go +++ b/pkg/db/views.go @@ -240,18 +240,18 @@ SELECT tests.name, suites.name AS suite_name, jira_components.name AS jira_component, - jira_components.id AS jira_component_id, - COUNT(*) FILTER (WHERE prow_job_run_tests.status = 1 AND prow_job_runs."timestamp" BETWEEN |||START||| AND |||BOUNDARY|||) AS previous_successes, - COUNT(*) FILTER (WHERE prow_job_run_tests.status = 13 AND prow_job_runs."timestamp" BETWEEN |||START||| AND |||BOUNDARY|||) AS previous_flakes, - COUNT(*) FILTER (WHERE prow_job_run_tests.status = 12 AND prow_job_runs."timestamp" BETWEEN |||START||| AND |||BOUNDARY|||) AS previous_failures, - COUNT(*) FILTER (WHERE prow_job_runs."timestamp" BETWEEN |||START||| AND |||BOUNDARY|||) AS previous_runs, - COUNT(*) FILTER (WHERE prow_job_run_tests.status = 1 AND prow_job_runs."timestamp" BETWEEN |||BOUNDARY||| AND |||END|||) AS current_successes, - COUNT(*) FILTER (WHERE prow_job_run_tests.status = 13 AND prow_job_runs."timestamp" BETWEEN |||BOUNDARY||| AND |||END|||) AS current_flakes, - COUNT(*) FILTER (WHERE prow_job_run_tests.status = 12 AND prow_job_runs."timestamp" BETWEEN |||BOUNDARY||| AND |||END|||) AS current_failures, - COUNT(*) FILTER (WHERE prow_job_runs."timestamp" BETWEEN |||BOUNDARY||| AND |||END|||) AS current_runs, + jira_components.id AS jira_component_id, + COUNT(*) FILTER (WHERE prow_job_run_tests.status = 1 AND prow_job_run_tests.prow_job_run_timestamp BETWEEN |||START||| AND |||BOUNDARY|||) AS previous_successes, + COUNT(*) FILTER (WHERE prow_job_run_tests.status = 13 AND prow_job_run_tests.prow_job_run_timestamp BETWEEN |||START||| AND |||BOUNDARY|||) AS previous_flakes, + COUNT(*) FILTER (WHERE prow_job_run_tests.status = 12 AND prow_job_run_tests.prow_job_run_timestamp BETWEEN |||START||| AND |||BOUNDARY|||) AS previous_failures, + COUNT(*) FILTER (WHERE prow_job_run_tests.prow_job_run_timestamp BETWEEN |||START||| AND |||BOUNDARY|||) AS previous_runs, + COUNT(*) FILTER (WHERE prow_job_run_tests.status = 1 AND prow_job_run_tests.prow_job_run_timestamp BETWEEN |||BOUNDARY||| AND |||END|||) AS current_successes, + COUNT(*) FILTER (WHERE prow_job_run_tests.status = 13 AND prow_job_run_tests.prow_job_run_timestamp BETWEEN |||BOUNDARY||| AND |||END|||) AS current_flakes, + COUNT(*) FILTER (WHERE prow_job_run_tests.status = 12 AND prow_job_run_tests.prow_job_run_timestamp BETWEEN |||BOUNDARY||| AND |||END|||) AS current_failures, + COUNT(*) FILTER (WHERE prow_job_run_tests.prow_job_run_timestamp BETWEEN |||BOUNDARY||| AND |||END|||) AS current_runs, open_bugs.open_bugs AS open_bugs, prow_jobs.variants, - prow_jobs.release + prow_job_run_tests.prow_job_run_release AS release FROM prow_job_run_tests JOIN tests ON tests.id = prow_job_run_tests.test_id @@ -259,12 +259,11 @@ FROM LEFT JOIN suites ON suites.id = prow_job_run_tests.suite_id LEFT JOIN test_ownerships ON (tests.id = test_ownerships.test_id AND prow_job_run_tests.suite_id = test_ownerships.suite_id) LEFT JOIN jira_components ON test_ownerships.jira_component = jira_components.name - JOIN prow_job_runs ON prow_job_runs.id = prow_job_run_tests.prow_job_run_id - JOIN prow_jobs ON prow_job_runs.prow_job_id = prow_jobs.id + JOIN prow_jobs ON prow_jobs.id = prow_job_run_tests.prow_job_id WHERE - prow_job_run_tests.created_at >= |||START||| AND prow_job_runs.timestamp >= |||START||| + prow_job_run_tests.prow_job_run_timestamp >= |||START||| GROUP BY - tests.id, tests.name, jira_components.name, jira_components.id, suites.name, open_bugs.open_bugs, prow_jobs.variants, prow_jobs.release + tests.id, tests.name, jira_components.name, jira_components.id, suites.name, open_bugs.open_bugs, prow_jobs.variants, prow_job_run_tests.prow_job_run_release ` const testAnalysisByVariantView = ` @@ -292,34 +291,32 @@ const testAnalysisByJobMatView = ` SELECT tests.id AS test_id, tests.name AS test_name, - date(prow_job_runs."timestamp") AS date, - prow_jobs.release, + date(prow_job_run_tests.prow_job_run_timestamp) AS date, + prow_job_run_tests.prow_job_run_release AS release, prow_jobs.name AS job_name, - COUNT(*) FILTER (WHERE prow_job_runs."timestamp" >= (|||TIMENOW||| - '14 days'::interval) AND prow_job_runs."timestamp" <= |||TIMENOW|||) AS runs, - COUNT(*) FILTER (WHERE prow_job_run_tests.status = 1 AND prow_job_runs."timestamp" >= (|||TIMENOW||| - '14 days'::interval) AND prow_job_runs."timestamp" <= |||TIMENOW|||) AS passes, - COUNT(*) FILTER (WHERE prow_job_run_tests.status = 13 AND prow_job_runs."timestamp" >= (|||TIMENOW||| - '14 days'::interval) AND prow_job_runs."timestamp" <= |||TIMENOW|||) AS flakes, - COUNT(*) FILTER (WHERE prow_job_run_tests.status = 12 AND prow_job_runs."timestamp" >= (|||TIMENOW||| - '14 days'::interval) AND prow_job_runs."timestamp" <= |||TIMENOW|||) AS failures + COUNT(*) FILTER (WHERE prow_job_run_tests.prow_job_run_timestamp >= (|||TIMENOW||| - '14 days'::interval) AND prow_job_run_tests.prow_job_run_timestamp <= |||TIMENOW|||) AS runs, + COUNT(*) FILTER (WHERE prow_job_run_tests.status = 1 AND prow_job_run_tests.prow_job_run_timestamp >= (|||TIMENOW||| - '14 days'::interval) AND prow_job_run_tests.prow_job_run_timestamp <= |||TIMENOW|||) AS passes, + COUNT(*) FILTER (WHERE prow_job_run_tests.status = 13 AND prow_job_run_tests.prow_job_run_timestamp >= (|||TIMENOW||| - '14 days'::interval) AND prow_job_run_tests.prow_job_run_timestamp <= |||TIMENOW|||) AS flakes, + COUNT(*) FILTER (WHERE prow_job_run_tests.status = 12 AND prow_job_run_tests.prow_job_run_timestamp >= (|||TIMENOW||| - '14 days'::interval) AND prow_job_run_tests.prow_job_run_timestamp <= |||TIMENOW|||) AS failures FROM prow_job_run_tests JOIN tests ON tests.id = prow_job_run_tests.test_id - JOIN prow_job_runs ON prow_job_runs.id = prow_job_run_tests.prow_job_run_id - JOIN prow_jobs ON prow_jobs.id = prow_job_runs.prow_job_id + JOIN prow_jobs ON prow_jobs.id = prow_job_run_tests.prow_job_id WHERE - prow_job_run_tests.created_at > (|||TIMENOW||| - '14 days'::interval) AND prow_job_runs."timestamp" > (|||TIMENOW||| - '14 days'::interval) + prow_job_run_tests.prow_job_run_timestamp > (|||TIMENOW||| - '14 days'::interval) GROUP BY - tests.name, tests.id, date(prow_job_runs."timestamp"), prow_jobs.release, prow_jobs.name + tests.name, tests.id, date(prow_job_run_tests.prow_job_run_timestamp), prow_job_run_tests.prow_job_run_release, prow_jobs.name ` const prowJobFailedTestsMatView = ` -SELECT date_trunc('|||BY|||'::text, prow_job_runs."timestamp") AS period, - prow_job_runs.prow_job_id, +SELECT date_trunc('|||BY|||'::text, pjrt.prow_job_run_timestamp) AS period, + pjrt.prow_job_id, tests.name AS test_name, count(tests.name) AS count -FROM prow_job_runs - JOIN prow_job_run_tests pjrt ON prow_job_runs.id = pjrt.prow_job_run_id +FROM prow_job_run_tests pjrt JOIN tests tests ON pjrt.test_id = tests.id WHERE pjrt.status = 12 -GROUP BY tests.name, (date_trunc('|||BY|||'::text, prow_job_runs."timestamp")), prow_job_runs.prow_job_id +GROUP BY tests.name, (date_trunc('|||BY|||'::text, pjrt.prow_job_run_timestamp)), pjrt.prow_job_id ` // TODO: remove distinct once bug fixed re dupes in release_job_runs @@ -346,6 +343,8 @@ FROM prow_job_runs pjr WHERE rt.release_time > (|||TIMENOW||| - '14 days'::interval) + AND pjrt.prow_job_run_timestamp > (|||TIMENOW||| - '14 days'::interval) + AND pjr.timestamp > (|||TIMENOW||| - '14 days'::interval) AND rjr.release_tag_id = rt.id AND rjr.kind = 'Blocking' AND rjr.State = 'Failed' diff --git a/pkg/flags/postgres_benchmarking_test.go b/pkg/flags/postgres_benchmarking_test.go index bf4368bfe..9387ae0b4 100644 --- a/pkg/flags/postgres_benchmarking_test.go +++ b/pkg/flags/postgres_benchmarking_test.go @@ -9,9 +9,12 @@ import ( "time" "github.com/openshift/sippy/pkg/api" + apitype "github.com/openshift/sippy/pkg/apis/api" "github.com/openshift/sippy/pkg/db" + "github.com/openshift/sippy/pkg/db/models" "github.com/openshift/sippy/pkg/db/query" "github.com/openshift/sippy/pkg/filter" + "github.com/openshift/sippy/pkg/util" log "github.com/sirupsen/logrus" ) @@ -33,7 +36,19 @@ type benchmarkResult struct { max time.Duration } -func printSummaryTable(results []benchmarkResult) { +func extractConnectionName(dsn string) string { + atIdx := strings.Index(dsn, "@") + if atIdx < 0 { + return "" + } + host := dsn[atIdx+1:] + if dotIdx := strings.Index(host, "."); dotIdx > 0 { + return host[:dotIdx] + } + return "" +} + +func printSummaryTable(t *testing.T, results []benchmarkResult, connName string) { nameWidth := 4 for _, r := range results { if len(r.name) > nameWidth { @@ -45,16 +60,35 @@ func printSummaryTable(results []benchmarkResult) { return results[i].avg > results[j].avg }) + var sb strings.Builder header := fmt.Sprintf(" %-*s %5s %12s %12s %12s %12s", nameWidth, "Name", "Iters", "Total", "Avg", "Min", "Max") - fmt.Println() - fmt.Println(header) - fmt.Println(" " + strings.Repeat("-", len(header)-2)) + sb.WriteString("\n") + sb.WriteString(header + "\n") + sb.WriteString(" " + strings.Repeat("-", len(header)-2) + "\n") for _, r := range results { - fmt.Printf(" %-*s %5d %12s %12s %12s %12s\n", + fmt.Fprintf(&sb, " %-*s %5d %12s %12s %12s %12s\n", nameWidth, r.name, r.iterations, r.total, r.avg, r.min, r.max) } - fmt.Println() + sb.WriteString("\n") + fmt.Print(sb.String()) + + // optional helper to track results + benchmarkFilePath := os.Getenv("benchmarking_file_path") + if connName != "" && len(benchmarkFilePath) > 0 { + + if !strings.HasSuffix(benchmarkFilePath, "/") { + benchmarkFilePath += "/" + } + + ts := time.Now().UTC().Format("2006-01-02T15-04-05") + filename := fmt.Sprintf("benchmark-%s-%s.txt", connName, ts) + if err := os.WriteFile(benchmarkFilePath+filename, []byte(sb.String()), 0600); err != nil { + t.Logf("failed to write benchmark report to %s: %v", filename, err) + } else { + t.Logf("benchmark report written to %s", filename) + } + } } func runBenchmarkCase(t *testing.T, dbc *db.DB, bc benchmarkCase, iterations int) benchmarkResult { @@ -97,11 +131,9 @@ func getIndividualBenchmarkCases() map[string]benchmarkCase { SELECT DISTINCT t.id, t.name FROM tests t JOIN prow_job_run_tests pjrt ON pjrt.test_id = t.id - JOIN prow_job_runs pjr ON pjr.id = pjrt.prow_job_run_id - JOIN prow_jobs pj ON pj.id = pjr.prow_job_id - WHERE pj.release = ? + WHERE pjrt.prow_job_run_release = ? AND t.name LIKE ? - AND pjrt.created_at > NOW() - INTERVAL '14 days' + AND pjrt.prow_job_run_timestamp > NOW() - INTERVAL '14 days' ORDER BY t.name LIMIT 20`, benchmarkRelease, "%events should not repeat%").Scan(&results) if res.Error != nil { @@ -237,10 +269,8 @@ func getBenchmarkCases(asOf time.Time) []benchmarkCase { SELECT count(distinct pjrt.prow_job_run_id) as job_runs_count, count(distinct pjrt.test_id) as test_ids_count FROM prow_job_run_tests pjrt - JOIN prow_job_runs pjr ON pjr.id = pjrt.prow_job_run_id - JOIN prow_jobs pj ON pj.id = pjr.prow_job_id - WHERE pjrt.created_at > ? - AND pj.release = ?`, truncatedTime, benchmarkRelease).Scan(&result) + WHERE pjrt.prow_job_run_timestamp > ? + AND pjrt.prow_job_run_release = ?`, truncatedTime, benchmarkRelease).Scan(&result) if res.Error != nil { return res.Error } @@ -262,10 +292,8 @@ func getBenchmarkCases(asOf time.Time) []benchmarkCase { SELECT count(distinct pjrt.prow_job_run_id) as job_runs_count, count(distinct pjrt.test_id) as test_ids_count FROM prow_job_run_tests pjrt - JOIN prow_job_runs pjr ON pjr.id = pjrt.prow_job_run_id - JOIN prow_jobs pj ON pj.id = pjr.prow_job_id - WHERE pjrt.created_at > ? - AND pj.release = ?`, truncatedTime, benchmarkRelease).Scan(&result) + WHERE pjrt.prow_job_run_timestamp > ? + AND pjrt.prow_job_run_release = ?`, truncatedTime, benchmarkRelease).Scan(&result) if res.Error != nil { return res.Error } @@ -274,6 +302,97 @@ func getBenchmarkCases(asOf time.Time) []benchmarkCase { return nil }, }, + { + name: "VariantReports", + fn: func(dbc *db.DB) error { + start, boundary, end := util.PeriodToDates("default", asOf) + results, err := query.VariantReports(dbc, benchmarkRelease, start, boundary, end) + if err == nil { + log.Printf("VariantReports: %d variants", len(results)) + } + return err + }, + }, + { + name: "JobReports", + fn: func(dbc *db.DB) error { + start, boundary, end := util.PeriodToDates("default", asOf) + results, err := query.JobReports(dbc, &filter.FilterOptions{Filter: &filter.Filter{}}, benchmarkRelease, start, boundary, end) + if err == nil { + log.Printf("JobReports: %d jobs", len(results)) + } + return err + }, + }, + { + name: "BuildClusterHealth", + fn: func(dbc *db.DB) error { + start, boundary, end := util.PeriodToDates("default", asOf) + results, err := query.BuildClusterHealth(dbc, start, boundary, end) + if err == nil { + log.Printf("BuildClusterHealth: %d clusters", len(results)) + } + return err + }, + }, + { + name: "RecentTestFailures", + fn: func(dbc *db.DB) error { + period := 7 * 24 * time.Hour + previousPeriod := 7 * 24 * time.Hour + pagination := &apitype.Pagination{PerPage: 20, Page: 0} + result, err := api.GetRecentTestFailures(dbc, benchmarkRelease, period, &previousPeriod, false, &filter.FilterOptions{Filter: &filter.Filter{}}, pagination, asOf) + if err == nil { + log.Printf("RecentTestFailures: %d rows", result.TotalRows) + } + return err + }, + }, + { + name: "PullRequestReport", + fn: func(dbc *db.DB) error { + results, err := query.PullRequestReport(dbc, &filter.FilterOptions{Filter: &filter.Filter{}}, benchmarkRelease) + if err == nil { + log.Printf("PullRequestReport: %d PRs", len(results)) + } + return err + }, + }, + { + name: "RepositoryReport", + fn: func(dbc *db.DB) error { + results, err := query.RepositoryReport(dbc, &filter.FilterOptions{Filter: &filter.Filter{}}, benchmarkRelease, asOf) + if err == nil { + log.Printf("RepositoryReport: %d repos", len(results)) + } + return err + }, + }, + { + name: "JobsRunsReport", + fn: func(dbc *db.DB) error { + pagination := &apitype.Pagination{PerPage: 20, Page: 0} + result, err := api.JobsRunsReportFromDB(dbc, &filter.FilterOptions{Filter: &filter.Filter{}}, benchmarkRelease, pagination, asOf) + if err == nil { + log.Printf("JobsRunsReport: %d rows", result.TotalRows) + } + return err + }, + }, + { + name: "ProwJobHistoricalTestCounts", + fn: func(dbc *db.DB) error { + var prowJob models.ProwJob + if err := dbc.DB.Where("name = ? AND release = ?", benchmarkJobName, benchmarkRelease).First(&prowJob).Error; err != nil { + return err + } + count, err := query.ProwJobHistoricalTestCounts(dbc, prowJob.ID) + if err == nil { + log.Printf("ProwJobHistoricalTestCounts for %s: %d", benchmarkJobName, count) + } + return err + }, + }, { name: "TestAnalysisPassRate", fn: func(dbc *db.DB) error { @@ -298,7 +417,7 @@ func getBenchmarkCases(asOf time.Time) []benchmarkCase { } } -func getBenchmarkDBClient(t *testing.T) *db.DB { +func getBenchmarkDBClient(t *testing.T) (*db.DB, string) { t.Helper() dsn := os.Getenv("db_benchmarking_dsn") if dsn == "" { @@ -323,11 +442,11 @@ func getBenchmarkDBClient(t *testing.T) *db.DB { t.Logf("failed to close DB client: %v", err) } }) - return dbc + return dbc, extractConnectionName(dsn) } func Test_BenchmarkIndividual(t *testing.T) { - dbc := getBenchmarkDBClient(t) + dbc, connName := getBenchmarkDBClient(t) asOf := time.Now().UTC() iterations := 3 cases := getBenchmarkCases(asOf) @@ -339,11 +458,11 @@ func Test_BenchmarkIndividual(t *testing.T) { results = append(results, r) }) } - printSummaryTable(results) + printSummaryTable(t, results, connName) } func Test_BenchmarkFindTestsByRelease(t *testing.T) { - dbc := getBenchmarkDBClient(t) + dbc, connName := getBenchmarkDBClient(t) iterations := 1 bc, ok := getIndividualBenchmarkCases()["FindTestsByRelease"] if !ok { @@ -351,11 +470,11 @@ func Test_BenchmarkFindTestsByRelease(t *testing.T) { } r := runBenchmarkCase(t, dbc, bc, iterations) - printSummaryTable([]benchmarkResult{r}) + printSummaryTable(t, []benchmarkResult{r}, connName) } func Test_BenchmarkCombined(t *testing.T) { - dbc := getBenchmarkDBClient(t) + dbc, connName := getBenchmarkDBClient(t) asOf := time.Now().UTC() iterations := 3 @@ -372,11 +491,11 @@ func Test_BenchmarkCombined(t *testing.T) { results = append(results, r) }) } - printSummaryTable(results) + printSummaryTable(t, results, connName) } func Test_BenchmarkGroup(t *testing.T) { - dbc := getBenchmarkDBClient(t) + dbc, connName := getBenchmarkDBClient(t) asOf := time.Now().UTC() iterations := 1 cases := getBenchmarkCases(asOf) @@ -402,5 +521,5 @@ func Test_BenchmarkGroup(t *testing.T) { fmt.Printf(" group iteration %d: %s\n", i+1, elapsed) } group.avg = group.total / time.Duration(iterations) - printSummaryTable([]benchmarkResult{group}) + printSummaryTable(t, []benchmarkResult{group}, connName) }