From bb4951a7b6a71757ec7b3af5607bef29400aa068 Mon Sep 17 00:00:00 2001
From: zuchka <zuchka@users.noreply.github.com>
Date: Fri, 8 May 2026 18:31:23 -0700
Subject: [PATCH 1/6] feat(evaluator): humanize_duration + default template
 helpers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pure functions, not yet wired into renderMessage — that lands in the
next commit. Covered by 19 table-driven test cases across both helpers.

humanize_duration: any numeric seconds value through time.Duration.String()
default: returns fallback only on nil or empty string (narrower than
sprig — 0 and false pass through to avoid {{ .exit_code | default 0 }}
footgun)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 internal/evaluator/template_funcs.go      | 57 +++++++++++++++++++
 internal/evaluator/template_funcs_test.go | 68 +++++++++++++++++++++++
 2 files changed, 125 insertions(+)
 create mode 100644 internal/evaluator/template_funcs.go
 create mode 100644 internal/evaluator/template_funcs_test.go
diff --git a/internal/evaluator/template_funcs.go b/internal/evaluator/template_funcs.go
new file mode 100644
index 0000000..6de1d9c
--- /dev/null
+++ b/internal/evaluator/template_funcs.go
@@ -0,0 +1,57 @@
+package evaluator
+
+import (
+	"fmt"
+	"time"
+)
+
+// humanizeDuration converts a numeric seconds value to a human-readable
+// duration string using Go's native time.Duration.String() format
+// (e.g., 30m43s, 4m7.3s, 500ms). Non-numeric inputs pass through via
+// fmt.Sprint so templates don't crash on bad data; users see something
+// obviously wrong (e.g. "<nil>", "[1 2]") and can fix the template.
+//
+// Used as a text/template FuncMap entry. Pipe form in templates:
+//
+//	{{ .duration_seconds | humanize_duration }}
+func humanizeDuration(seconds interface{}) string {
+	var f float64
+	switch v := seconds.(type) {
+	case float64:
+		f = v
+	case float32:
+		f = float64(v)
+	case int:
+		f = float64(v)
+	case int32:
+		f = float64(v)
+	case int64:
+		f = float64(v)
+	case uint:
+		f = float64(v)
+	case uint32:
+		f = float64(v)
+	case uint64:
+		f = float64(v)
+	default:
+		return fmt.Sprint(seconds)
+	}
+	return time.Duration(f * float64(time.Second)).String()
+}
+
+// defaultValue returns fallback when value is nil or the empty string;
+// otherwise returns value unchanged. Intentionally narrower than sprig's
+// default to avoid the "0/false fall back" footgun.
+//
+// Used as a text/template FuncMap entry. Pipe form in templates:
+//
+//	{{ .branch | default "unknown" }}
+func defaultValue(fallback interface{}, value interface{}) interface{} {
+	if value == nil {
+		return fallback
+	}
+	if s, ok := value.(string); ok && s == "" {
+		return fallback
+	}
+	return value
+}
diff --git a/internal/evaluator/template_funcs_test.go b/internal/evaluator/template_funcs_test.go
new file mode 100644
index 0000000..89a4df3
--- /dev/null
+++ b/internal/evaluator/template_funcs_test.go
@@ -0,0 +1,68 @@
+package evaluator
+
+import (
+	"testing"
+)
+
+// TestHumanizeDuration verifies the duration humanization helper renders
+// numeric seconds values via Go's native time.Duration.String() format
+// (e.g. 30m43s, 4m7.3s, 500ms), and that non-numeric input passes through
+// via fmt.Sprint so templates don't crash on bad data.
+func TestHumanizeDuration(t *testing.T) {
+	tests := []struct {
+		name string
+		in   interface{}
+		want string
+	}{
+		{"zero", 0, "0s"},
+		{"subsecond_float", 0.5, "500ms"},
+		{"int_seconds", 7, "7s"},
+		{"float_minutes_with_decimal", 247.3, "4m7.3s"},
+		{"int_half_hour_plus", 1843, "30m43s"},
+		{"int_two_hours_with_zero_minutes", 7245, "2h0m45s"},
+		{"negative_seconds", -30, "-30s"},
+		{"explicit_float64", float64(60), "1m0s"},
+		{"explicit_int64", int64(60), "1m0s"},
+		{"explicit_uint", uint(120), "2m0s"},
+		{"non_numeric_string_passthrough", "hello", "hello"},
+		{"nil_passthrough", nil, "<nil>"},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := humanizeDuration(tt.in)
+			if got != tt.want {
+				t.Errorf("humanizeDuration(%v) = %q, want %q", tt.in, got, tt.want)
+			}
+		})
+	}
+}
+
+// TestDefault verifies the default helper returns the fallback only when
+// the value is nil or the empty string. We intentionally do NOT replicate
+// sprig's "0/false/empty-collection falls back" behavior — those values
+// are real and would create footguns for {{ .exit_code | default 0 }}-
+// style rules.
+func TestDefault(t *testing.T) {
+	tests := []struct {
+		name     string
+		fallback interface{}
+		value    interface{}
+		want     interface{}
+	}{
+		{"nil_value_uses_fallback", "main", nil, "main"},
+		{"empty_string_uses_fallback", "main", "", "main"},
+		{"non_empty_string_passes_through", "main", "dev", "dev"},
+		{"zero_int_passes_through", 99, 0, 0},
+		{"false_passes_through", true, false, false},
+		{"string_zero_passes_through", "fallback", "0", "0"},
+		{"string_false_passes_through", "fallback", "false", "false"},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := defaultValue(tt.fallback, tt.value)
+			if got != tt.want {
+				t.Errorf("defaultValue(%v, %v) = %v, want %v", tt.fallback, tt.value, got, tt.want)
+			}
+		})
+	}
+}

From 06bc624b3e69678cf6bfab382f675a5a7f8e6164 Mon Sep 17 00:00:00 2001
From: zuchka <zuchka@users.noreply.github.com>
Date: Fri, 8 May 2026 18:38:49 -0700
Subject: [PATCH 2/6] feat(evaluator): wire template helpers into renderMessage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds template.FuncMap registration for humanize_duration and default
plus Option("missingkey=zero") so missing fields surface via Go's zero
interface handling. Two new integration tests exercise both helpers
through the full renderMessage pipeline.

Note: missingkey=zero on map[string]interface{} still renders missing
keys as <no value> (nil interface prints that way in Go templates), so
TestRenderMessage_MissingFieldStillRendersGracefully required no update
— its existing <no value> assertion remains correct.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 internal/evaluator/engine.go              |  8 ++++-
 internal/evaluator/template_funcs_test.go | 44 +++++++++++++++++++++++
 2 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/internal/evaluator/engine.go b/internal/evaluator/engine.go
index 4363366..14e9376 100644
--- a/internal/evaluator/engine.go
+++ b/internal/evaluator/engine.go
@@ -457,7 +457,13 @@ func renderMessage(tmpl string, alert Alert) string {
 	if tmpl == "" {
 		return fmt.Sprintf("rule %q fired (metric=%s value=%v)", alert.Rule, alert.Metric, alert.Value)
 	}
-	t, err := template.New("msg").Parse(tmpl)
+	t, err := template.New("msg").
+		Option("missingkey=zero").
+		Funcs(template.FuncMap{
+			"humanize_duration": humanizeDuration,
+			"default":           defaultValue,
+		}).
+		Parse(tmpl)
 	if err != nil {
 		return tmpl // return raw if template is invalid
 	}
diff --git a/internal/evaluator/template_funcs_test.go b/internal/evaluator/template_funcs_test.go
index 89a4df3..1a11b32 100644
--- a/internal/evaluator/template_funcs_test.go
+++ b/internal/evaluator/template_funcs_test.go
@@ -2,6 +2,7 @@ package evaluator
 
 import (
 	"testing"
+	"time"
 )
 
 // TestHumanizeDuration verifies the duration humanization helper renders
@@ -66,3 +67,46 @@ func TestDefault(t *testing.T) {
 		})
 	}
 }
+
+// TestRenderMessage_DefaultHelper_FillsMissingField proves the default
+// helper works through the full renderMessage pipeline for a field that
+// is not in Labels, Floats, or the alert struct. After missingkey=zero
+// is wired, the missing field surfaces as nil in the template scope and
+// default returns the fallback.
+func TestRenderMessage_DefaultHelper_FillsMissingField(t *testing.T) {
+	alert := Alert{
+		Rule:    "build_failed",
+		Metric:  "run.exit",
+		Value:   1,
+		Labels:  map[string]string{"runner": "github-actions"},
+		FiredAt: time.Now(),
+	}
+
+	got := renderMessage(`Build on {{ .branch | default "unknown" }} failed`, alert)
+
+	want := "Build on unknown failed"
+	if got != want {
+		t.Errorf("renderMessage default helper: got %q, want %q", got, want)
+	}
+}
+
+// TestRenderMessage_HumanizeDuration_ThroughPipeline proves humanize_duration
+// works through the full renderMessage pipeline against the synthetic
+// run.exit event shape (duration_seconds in Floats).
+func TestRenderMessage_HumanizeDuration_ThroughPipeline(t *testing.T) {
+	alert := Alert{
+		Rule:    "training_failed",
+		Metric:  "run.exit",
+		Value:   1,
+		Labels:  map[string]string{"runner": "mlflow"},
+		Floats:  map[string]float64{"duration_seconds": 1843},
+		FiredAt: time.Now(),
+	}
+
+	got := renderMessage("Run failed after {{ .duration_seconds | humanize_duration }}", alert)
+
+	want := "Run failed after 30m43s"
+	if got != want {
+		t.Errorf("renderMessage humanize_duration: got %q, want %q", got, want)
+	}
+}

From 2a862bd5e36fc4a892466b2f02ccc3f54ca8e772 Mon Sep 17 00:00:00 2001
From: zuchka <zuchka@users.noreply.github.com>
Date: Fri, 8 May 2026 18:54:50 -0700
Subject: [PATCH 3/6] docs(recipes): use humanize_duration for duration_seconds

Sweeps the {{ .duration_seconds }}s pattern across all 9 platform
recipes, the canonical _template.md, and ding.yaml.example. Renders
human-readable durations (30m43s, 4m7.3s) instead of raw float seconds
(1843, 247.3) in the example alert messages.

Verified end-to-end via ding test-rule with a synthetic run.exit event.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 ding.yaml.example               | 2 +-
 docs/recipes/_template.md       | 2 +-
 docs/recipes/argo-workflows.md  | 2 +-
 docs/recipes/buildkite.md       | 2 +-
 docs/recipes/gitlab-ci.md       | 2 +-
 docs/recipes/jenkins.md         | 2 +-
 docs/recipes/kubernetes-jobs.md | 4 ++--
 docs/recipes/mlflow.md          | 2 +-
 docs/recipes/modal.md           | 2 +-
 docs/recipes/ray.md             | 2 +-
 10 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/ding.yaml.example b/ding.yaml.example
index b1be8a7..691c007 100644
--- a/ding.yaml.example
+++ b/ding.yaml.example
@@ -143,7 +143,7 @@ rules:
   #   match:
   #     metric: run.exit
   #   condition: value > 0
-  #   message: "job failed: exit code {{ .value }} after {{ .duration_seconds }}s"
+  #   message: "job failed: exit code {{ .value }} after {{ .duration_seconds | humanize_duration }}"
   #   alert:
   #     - notifier: github_actions
 
diff --git a/docs/recipes/_template.md b/docs/recipes/_template.md
index 01ebcf4..20aa0cc 100644
--- a/docs/recipes/_template.md
+++ b/docs/recipes/_template.md
@@ -32,7 +32,7 @@ rules:
     match:
       metric: run.exit
     condition: value > 0
-    message: "Job failed (exit {{ .exit_code }} after {{ .duration_seconds }}s)"
+    message: "Job failed (exit {{ .exit_code }} after {{ .duration_seconds | humanize_duration }})"
     alert:
       - notifier: slack
 ```
diff --git a/docs/recipes/argo-workflows.md b/docs/recipes/argo-workflows.md
index 22d90bb..cb23ea9 100644
--- a/docs/recipes/argo-workflows.md
+++ b/docs/recipes/argo-workflows.md
@@ -49,7 +49,7 @@ data:
         match:
           metric: run.exit
         condition: value > 0
-        message: "Argo step {{ .pod }} (workflow {{ .workflow }}) failed with exit {{ .exit_code }} after {{ .duration_seconds }}s"
+        message: "Argo step {{ .pod }} (workflow {{ .workflow }}) failed with exit {{ .exit_code }} after {{ .duration_seconds | humanize_duration }}"
         alert:
           - notifier: slack
 ---
diff --git a/docs/recipes/buildkite.md b/docs/recipes/buildkite.md
index 05902bf..1009bc0 100644
--- a/docs/recipes/buildkite.md
+++ b/docs/recipes/buildkite.md
@@ -33,7 +33,7 @@ rules:
     match:
       metric: run.exit
     condition: value > 0
-    message: "{{ .repo }}@{{ .branch }} failed (exit {{ .exit_code }} after {{ .duration_seconds }}s)"
+    message: "{{ .repo }}@{{ .branch }} failed (exit {{ .exit_code }} after {{ .duration_seconds | humanize_duration }})"
     alert:
       - notifier: slack
 ```
diff --git a/docs/recipes/gitlab-ci.md b/docs/recipes/gitlab-ci.md
index 7a6704b..2693ca9 100644
--- a/docs/recipes/gitlab-ci.md
+++ b/docs/recipes/gitlab-ci.md
@@ -35,7 +35,7 @@ rules:
     match:
       metric: run.exit
     condition: value > 0
-    message: "Pipeline {{ .branch }} failed (exit {{ .exit_code }} after {{ .duration_seconds }}s)"
+    message: "Pipeline {{ .branch }} failed (exit {{ .exit_code }} after {{ .duration_seconds | humanize_duration }})"
     alert:
       - notifier: slack
 ```
diff --git a/docs/recipes/jenkins.md b/docs/recipes/jenkins.md
index f1d906a..19e3496 100644
--- a/docs/recipes/jenkins.md
+++ b/docs/recipes/jenkins.md
@@ -47,7 +47,7 @@ rules:
     match:
       metric: run.exit
     condition: value > 0
-    message: "{{ .job }} build {{ .build }} failed (exit {{ .exit_code }} after {{ .duration_seconds }}s)"
+    message: "{{ .job }} build {{ .build }} failed (exit {{ .exit_code }} after {{ .duration_seconds | humanize_duration }})"
     alert:
       - notifier: slack
 ```
diff --git a/docs/recipes/kubernetes-jobs.md b/docs/recipes/kubernetes-jobs.md
index 6898509..741a616 100644
--- a/docs/recipes/kubernetes-jobs.md
+++ b/docs/recipes/kubernetes-jobs.md
@@ -58,7 +58,7 @@ data:
         match:
           metric: run.exit
         condition: value > 0
-        message: "{{ .pod }} (Job {{ .job_name }}) failed with exit {{ .exit_code }} after {{ .duration_seconds }}s"
+        message: "{{ .pod }} (Job {{ .job_name }}) failed with exit {{ .exit_code }} after {{ .duration_seconds | humanize_duration }}"
         alert:
           - notifier: slack
 ---
@@ -225,7 +225,7 @@ rules:
   - name: job_failed
     match: { metric: run.exit }
     condition: value > 0
-    message: "Job failed (exit {{ .exit_code }} after {{ .duration_seconds }}s)"
+    message: "Job failed (exit {{ .exit_code }} after {{ .duration_seconds | humanize_duration }})"
     alert:
       - notifier: k8s
 ```
diff --git a/docs/recipes/mlflow.md b/docs/recipes/mlflow.md
index 98e767b..07903d2 100644
--- a/docs/recipes/mlflow.md
+++ b/docs/recipes/mlflow.md
@@ -49,7 +49,7 @@ rules:
     match: { metric: run.exit }
     condition: value > 0
     message: |
-      MLflow run failed (exit {{ .exit_code }} after {{ .duration_seconds }}s)
+      MLflow run failed (exit {{ .exit_code }} after {{ .duration_seconds | humanize_duration }})
       <{{ .tracking_uri }}/#/experiments/{{ .experiment_id }}/runs/{{ .run_id }}|View run in MLflow UI>
     alert:
       - notifier: slack
diff --git a/docs/recipes/modal.md b/docs/recipes/modal.md
index acb9781..5c53990 100644
--- a/docs/recipes/modal.md
+++ b/docs/recipes/modal.md
@@ -67,7 +67,7 @@ rules:
   - name: training_failed
     match: { metric: run.exit }
     condition: value > 0
-    message: "Modal function {{ .function_name }} (task {{ .modal_task_id }}) failed (exit {{ .exit_code }} after {{ .duration_seconds }}s)"
+    message: "Modal function {{ .function_name }} (task {{ .modal_task_id }}) failed (exit {{ .exit_code }} after {{ .duration_seconds | humanize_duration }})"
     alert:
       - notifier: slack
 ```
diff --git a/docs/recipes/ray.md b/docs/recipes/ray.md
index f1d445f..0736546 100644
--- a/docs/recipes/ray.md
+++ b/docs/recipes/ray.md
@@ -39,7 +39,7 @@ rules:
   - name: training_failed
     match: { metric: run.exit }
     condition: value > 0
-    message: "Ray job {{ .run_id }} failed (exit {{ .exit_code }} after {{ .duration_seconds }}s)"
+    message: "Ray job {{ .run_id }} failed (exit {{ .exit_code }} after {{ .duration_seconds | humanize_duration }})"
     alert:
       - notifier: slack
 ```

From 0cfccb39ea29c62fd7c5c04ff095eda7201171ad Mon Sep 17 00:00:00 2001
From: zuchka <zuchka@users.noreply.github.com>
Date: Fri, 8 May 2026 18:58:27 -0700
Subject: [PATCH 4/6] docs(recipes): update rendered-output examples to match
 humanize_duration

The "What you get" prose example in modal.md and ray.md showed raw
seconds (287s, 1843s) but the YAML now produces humanized output via
humanize_duration. Update prose to match: 287s -> 4m47s, 1843s -> 30m43s.

mlflow.md:90 was already consistent (42 humanizes to 42s, no change).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 docs/recipes/modal.md | 2 +-
 docs/recipes/ray.md   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/recipes/modal.md b/docs/recipes/modal.md
index 5c53990..d4a722c 100644
--- a/docs/recipes/modal.md
+++ b/docs/recipes/modal.md
@@ -91,7 +91,7 @@ A Slack message during training when `val_loss` exceeds threshold:
 …and on function exit:
 
 > 🔔 `training_failed`
-> Modal function trainer (task ta-abc123def) failed (exit 1 after 287s)
+> Modal function trainer (task ta-abc123def) failed (exit 1 after 4m47s)
 
 The `modal_task_id` matches the task ID visible in the Modal dashboard, so the Slack alert is one click away from the function's logs and metrics.
 
diff --git a/docs/recipes/ray.md b/docs/recipes/ray.md
index 0736546..c01d296 100644
--- a/docs/recipes/ray.md
+++ b/docs/recipes/ray.md
@@ -112,7 +112,7 @@ A Slack message during training when `val_loss` exceeds threshold:
 …and on training-process exit:
 
 > 🔔 `training_failed`
-> Ray job raysubmit_abcdef1234567890 failed (exit 1 after 1843s)
+> Ray job raysubmit_abcdef1234567890 failed (exit 1 after 30m43s)
 
 All Path A alerts are auto-tagged with `run_id` + `runner=ray`. The `run_id` matches the UUID printed by `ray job list`.
 

From d37dfbdee64fd911dcde5f4bb1769626ced66dee Mon Sep 17 00:00:00 2001
From: zuchka <zuchka@users.noreply.github.com>
Date: Fri, 8 May 2026 18:59:22 -0700
Subject: [PATCH 5/6] docs(configuration): document template helpers section

Adds a new ### Template helpers subsection covering humanize_duration
and default, with input/output tables matching the implementation
contract and a note on the deliberate diverge from sprig's default
semantics for 0/false values.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 docs/configuration.md | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/docs/configuration.md b/docs/configuration.md
index bb0a8c6..af2f0cd 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -362,6 +362,43 @@ wall-clock windows in long-running serve deployments.
 | `.sum` | windowed | Sum over window |
 | `.count` | windowed | Event count over window |
 
+### Template helpers
+
+Two helper functions are available in message templates beyond Go's default `text/template` syntax:
+
+#### `humanize_duration`
+
+Renders a numeric seconds value as a human-readable duration string using Go's native `time.Duration.String()` format. Useful for the `{{ .duration_seconds }}` field on `run.exit` events.
+
+```yaml
+message: "Job failed after {{ .duration_seconds | humanize_duration }}"
+```
+
+| Input (seconds) | Rendered |
+|----------------:|----------|
+| `0` | `0s` |
+| `0.5` | `500ms` |
+| `7` | `7s` |
+| `247.3` | `4m7.3s` |
+| `1843` | `30m43s` |
+| `7245` | `2h0m45s` |
+
+Accepts any numeric type (int, int64, float64, etc.) interpreted as seconds. Non-numeric inputs pass through unchanged via `fmt.Sprint`, so a typo or a missing field renders something visibly wrong rather than crashing the template.
+
+#### `default`
+
+Returns a fallback when the piped value is `nil` (typically a missing field) or the empty string. Numeric `0` and boolean `false` pass through unchanged — they are real values, not absences. This is intentionally narrower than sprig's `default` to avoid the `{{ .exit_code | default 0 }}` footgun.
+
+```yaml
+message: "Build on {{ .branch | default \"unknown\" }} failed"
+```
+
+| `.branch` value | Rendered |
+|---|---|
+| `"main"` | `Build on main failed` |
+| `""` | `Build on unknown failed` |
+| missing | `Build on unknown failed` |
+
 ### Per-label-set cooldowns
 
 Cooldowns are tracked independently per unique label combination. A noisy `web-01` does not suppress alerts from `web-02`.

From 7af63c9c707800b4cfea937ed4a4a0aceacad96d Mon Sep 17 00:00:00 2001
From: zuchka <zuchka@users.noreply.github.com>
Date: Fri, 8 May 2026 19:07:03 -0700
Subject: [PATCH 6/6] docs(readme): use humanize_duration in run.exit example

The README's run.exit example still showed the raw {{ .duration_seconds }}s
form. Final-review caught the inconsistency: the recipe sweep updated all
9 platform recipes + _template.md + ding.yaml.example, but README.md
contains the canonical run.exit example a new user reads first.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 8cd3a36..bc98c91 100644
--- a/README.md
+++ b/README.md
@@ -167,7 +167,7 @@ Match it like any other metric:
 - name: nonzero_exit
   match: { metric: run.exit }
   condition: value > 0
-  message: "job failed with exit code {{ .value }} after {{ .duration_seconds }}s"
+  message: "job failed with exit code {{ .value }} after {{ .duration_seconds | humanize_duration }}"
   alert: [{ notifier: github_actions }]
 ```