Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,14 @@ When your command exits, DING:

SIGTERM and SIGINT are forwarded to the child for graceful shutdown.

After writing a rule, preview it without a real workload:

```sh
echo '{"metric":"loss","value":1.5}' | ding test-rule --config ding.yaml
```

For a full preview against a real run without sending notifications, use `ding run --dry-run -- <your-cmd>`.

### Run context, auto-detected

DING reads the runner's environment variables and attaches labels automatically. No config required.
Expand Down
5 changes: 5 additions & 0 deletions ding.yaml.example
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
# events the command emits + the synthetic run.exit
# ding serve long-running HTTP daemon; rules evaluate against
# events POSTed to /ingest or piped via stdin
#
# Preview rules without sending notifications:
# echo '{"metric":"name","value":42}' | ding test-rule --config ding.yaml
# ding run --dry-run --config ding.yaml -- ./your-script.sh
# See: docs/configuration.md#testing-rules-without-a-workload

server:
port: 8080
Expand Down
38 changes: 38 additions & 0 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,44 @@ For typical secrets (Slack URLs, PagerDuty tokens, API keys, opaque ID strings)
- No `${VAR:-default}` for inline defaults — set the env var to the default before launching DING.
- No `$${VAR}` escape for writing literal `${VAR}` — the use case is rare; if you hit it, file an issue.

## Testing rules without a workload

DING ships two preview surfaces so you can verify rules before turning on real notifications.

### `ding test-rule` — replay synthetic events

Pipe or pass JSONL events at a config; matching rules render messages as if they were about to fire, but no notifications go out.

```sh
# Pipe events from any source
echo '{"metric":"loss","value":1.5}' | ding test-rule --config ding.yaml

# Read from a file (use - for explicit stdin)
ding test-rule events.jsonl
```

Each input line is a JSON event in DING's normal shape: a `metric` field for matching, a `value` field for numeric conditions, and any other key/value pairs as labels (string) or floats (number). An optional `timestamp` field (RFC3339 string or Unix epoch number) controls the event's time for windowed rules; events without `timestamp` get sequential synthetic times starting from now.

Output format auto-detects: human-readable text when stdout is a terminal, JSON (one object per line) when piped. Override with `--format text|json`. Disable color with `--no-color`.

End-of-run rules (`mode: end-of-run`) fire after the last input event.

### `ding run --dry-run` — wrap a real workload, suppress sends

Same as `ding run`, but the dispatch boundary is swapped for a logging one — your wrapped command runs normally, events flow through the engine normally, the synthetic `run.exit` event still emits, end-of-run rules still fire, the wrapped command's exit code still propagates. Only `notifier.Send` is bypassed.

```sh
# Preview what alerts would fire on a real failing build
ding run --dry-run --config ding.yaml -- pytest tests/

# JSON output for piping (preview is on stderr; redirect to stdout for jq)
ding run --dry-run --format json --config ding.yaml -- ./train.sh 2>&1 | jq
```

Preview output goes to stderr alongside the wrapped command's own stderr; the wrapped command's stdout stays clean for downstream tools that read it.

---

## Platform-specific examples

See [Recipes](recipes/index.md) for end-to-end configurations on specific CI/CD platforms (GitLab CI, Jenkins, Buildkite). Each recipe shows the auto-captured labels and the minimal `ding.yaml` for that platform.
42 changes: 42 additions & 0 deletions internal/cli/dispatcher.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
package cli

import (
"log"

"github.com/ding-labs/ding/internal/evaluator"
"github.com/ding-labs/ding/internal/notifier"
)

// Dispatcher routes alerts to their final destination. Implementations live
// here (NotifierDispatcher — production sends) and in internal/dryrun
// (LoggingDispatcher — preview-only, no sends).
type Dispatcher interface {
Dispatch(alerts []evaluator.Alert)
}

// NotifierDispatcher is the production Dispatcher: writes each alert to the
// alert log (if configured) then calls Send on each named notifier.
type NotifierDispatcher struct {
Notifiers map[string]notifier.Notifier
AlertLogger *notifier.AlertLogger
}

func (d *NotifierDispatcher) Dispatch(alerts []evaluator.Alert) {
for _, alert := range alerts {
if d.AlertLogger != nil {
if err := d.AlertLogger.Log(alert); err != nil {
log.Printf("ding: alert log write error: %v", err)
}
}
for _, name := range alert.Notifiers {
n, ok := d.Notifiers[name]
if !ok {
log.Printf("ding: unknown notifier %q for rule %q", name, alert.Rule)
continue
}
if err := n.Send(alert); err != nil {
log.Printf("ding: notifier %q error: %v", name, err)
}
}
}
}
1 change: 1 addition & 0 deletions internal/cli/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Two modes:
newValidateCmd(),
newVersionCmd(version),
newInstallCmd(),
newTestRuleCmd(),
)
return root
}
92 changes: 48 additions & 44 deletions internal/cli/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"github.com/spf13/cobra"

"github.com/ding-labs/ding/internal/config"
"github.com/ding-labs/ding/internal/dryrun"
"github.com/ding-labs/ding/internal/evaluator"
"github.com/ding-labs/ding/internal/ingester"
"github.com/ding-labs/ding/internal/metrics"
Expand All @@ -27,6 +28,9 @@ import (
func newRunCmd() *cobra.Command {
var configPath string
var runIDOverride string
var dryRun bool
var format string
var noColor bool

cmd := &cobra.Command{
Use: "run [flags] -- <command> [args...]",
Expand Down Expand Up @@ -55,25 +59,53 @@ is safe.`,
ding run -- python train.py --epochs 100

# Override the auto-detected run ID
ding run --run-id manual-debug -- ./flaky-script.sh`,
ding run --run-id manual-debug -- ./flaky-script.sh

# Preview alerts without sending to notifiers
ding run --dry-run --config alerts.yaml -- ./script.sh`,
DisableFlagsInUseLine: true,
Args: cobra.MinimumNArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
return runRun(configPath, runIDOverride, args)
return runRun(configPath, runIDOverride, args, dryRun, format, noColor)
},
}
cmd.Flags().StringVar(&configPath, "config", "ding.yaml", "path to config file")
cmd.Flags().StringVar(&runIDOverride, "run-id", "", "override auto-detected run ID")
cmd.Flags().BoolVar(&dryRun, "dry-run", false, "preview alerts without sending to notifiers")
cmd.Flags().StringVar(&format, "format", "auto", "output format when --dry-run is set: auto, text, json")
cmd.Flags().BoolVar(&noColor, "no-color", false, "disable ANSI color in dry-run text output")
return cmd
}

func runRun(configPath, runIDOverride string, args []string) error {
func runRun(configPath, runIDOverride string, args []string, dryRun bool, format string, noColor bool) error {
// Validate dry-run format BEFORE loading config so we don't pay the
// config-load + notifier-construction cost just to reject a typo.
if dryRun {
switch format {
case "auto", "text", "json":
// ok
default:
return fmt.Errorf("invalid --format %q: must be auto, text, or json", format)
}
}

collector := metrics.NewCollector()

eng, cfg, notifiers, alertLogger, jqCode, err := server.BuildFromConfig(configPath, collector)
if err != nil {
return fmt.Errorf("loading config: %w", err)
}

var dispatcher Dispatcher
if dryRun {
formatter := pickFormatter(format, noColor, os.Stderr)
dispatcher = dryrun.NewLoggingDispatcher(formatter, os.Stderr)
} else {
dispatcher = &NotifierDispatcher{
Notifiers: notifiers,
AlertLogger: alertLogger,
}
}
// Drain handler — runs from both the deferred path (covers early returns
// from config errors, command-start failures, etc.) and the explicit path
// before os.Exit (covers the non-zero-exit case where defers don't run).
Expand All @@ -85,9 +117,11 @@ func runRun(configPath, runIDOverride string, args []string) error {
return
}
drained = true
drainNotifiers(notifiers, cfg.Server.DrainTimeout.Duration)
if alertLogger != nil {
_ = alertLogger.Close()
if !dryRun {
drainNotifiers(notifiers, cfg.Server.DrainTimeout.Duration)
if alertLogger != nil {
_ = alertLogger.Close()
}
}
}
defer drainOnce()
Expand Down Expand Up @@ -134,11 +168,11 @@ func runRun(configPath, runIDOverride string, args []string) error {
wg.Add(2)
go func() {
defer wg.Done()
ingestStream(stdoutPipe, os.Stdout, eng, notifiers, alertLogger, cfg, jqCode, rc)
ingestStream(stdoutPipe, os.Stdout, eng, dispatcher, cfg, jqCode, rc)
}()
go func() {
defer wg.Done()
ingestStream(stderrPipe, os.Stderr, eng, notifiers, alertLogger, cfg, jqCode, rc)
ingestStream(stderrPipe, os.Stderr, eng, dispatcher, cfg, jqCode, rc)
}()

wg.Wait()
Expand All @@ -162,11 +196,11 @@ func runRun(configPath, runIDOverride string, args []string) error {
// Synthetic run.exit event flows through the engine like any other —
// during-run rules matching metric: run.exit fire here.
summary := rc.SummaryEvent(exitCode)
dispatchEvent(summary, eng, notifiers, alertLogger)
dispatchEvent(summary, eng, dispatcher)

// End-of-run rules accumulate state during the run; fire them now.
endAlerts := eng.ProcessEndOfRun(time.Now())
dispatchAlerts(endAlerts, notifiers, alertLogger)
dispatcher.Dispatch(endAlerts)

log.Printf("ding: run end — run_id=%s exit_code=%d duration=%.1fs",
rc.RunID, exitCode, time.Since(rc.StartedAt).Seconds())
Expand Down Expand Up @@ -209,8 +243,7 @@ func ingestStream(
r io.Reader,
mirror io.Writer,
eng *evaluator.Engine,
notifiers map[string]notifier.Notifier,
alertLogger *notifier.AlertLogger,
dispatcher Dispatcher,
cfg *config.Config,
jqCode *gojq.Code,
rc *runctx.Context,
Expand Down Expand Up @@ -246,7 +279,7 @@ func ingestStream(
}
for _, ev := range events {
ev.Labels = rc.Apply(ev.Labels)
dispatchEvent(ev, eng, notifiers, alertLogger)
dispatchEvent(ev, eng, dispatcher)
}
}
// Scanner errors on closed pipes are expected at EOF; only log surprising ones.
Expand All @@ -255,36 +288,7 @@ func ingestStream(
}
}

func dispatchEvent(
ev ingester.Event,
eng *evaluator.Engine,
notifiers map[string]notifier.Notifier,
alertLogger *notifier.AlertLogger,
) {
func dispatchEvent(ev ingester.Event, eng *evaluator.Engine, dispatcher Dispatcher) {
alerts := eng.Process(ev, time.Now())
dispatchAlerts(alerts, notifiers, alertLogger)
}

func dispatchAlerts(
alerts []evaluator.Alert,
notifiers map[string]notifier.Notifier,
alertLogger *notifier.AlertLogger,
) {
for _, alert := range alerts {
if alertLogger != nil {
if err := alertLogger.Log(alert); err != nil {
log.Printf("ding: alert log write error: %v", err)
}
}
for _, name := range alert.Notifiers {
n, ok := notifiers[name]
if !ok {
log.Printf("ding: unknown notifier %q for rule %q", name, alert.Rule)
continue
}
if err := n.Send(alert); err != nil {
log.Printf("ding: notifier %q error: %v", name, err)
}
}
}
dispatcher.Dispatch(alerts)
}
50 changes: 48 additions & 2 deletions internal/cli/run_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"time"

"github.com/ding-labs/ding/internal/config"
"github.com/ding-labs/ding/internal/dryrun"
"github.com/ding-labs/ding/internal/evaluator"
"github.com/ding-labs/ding/internal/notifier"
"github.com/ding-labs/ding/internal/runctx"
Expand Down Expand Up @@ -71,7 +72,8 @@ not a json line — mirrored only
`)
var mirror bytes.Buffer

ingestStream(input, &mirror, eng, notifierMap, nil, cfg, nil, rc)
dispatcher := &NotifierDispatcher{Notifiers: notifierMap, AlertLogger: nil}
ingestStream(input, &mirror, eng, dispatcher, cfg, nil, rc)

alerts := cap.snapshot()
if len(alerts) != 2 {
Expand Down Expand Up @@ -123,7 +125,8 @@ FAILED test_b
some random shell output here
`)
var mirror bytes.Buffer
ingestStream(input, &mirror, eng, notifierMap, nil, cfg, nil, rc)
dispatcher := &NotifierDispatcher{Notifiers: notifierMap, AlertLogger: nil}
ingestStream(input, &mirror, eng, dispatcher, cfg, nil, rc)

if got := cap.snapshot(); len(got) != 0 {
t.Errorf("expected no alerts on non-event input, got %d: %#v", len(got), got)
Expand Down Expand Up @@ -206,3 +209,46 @@ func TestDrainNotifiers_HandlesEmpty(t *testing.T) {
// no panic, no return value to check — the helper just returns
}

func TestIngestStream_DryRun_NoNotifierSends(t *testing.T) {
rules := []evaluator.EngineRule{
{
Name: "spike",
Match: map[string]string{"metric": "latency"},
Condition: "value > 100",
Message: "spike",
Alerts: []string{"capture"},
},
}
eng, err := evaluator.NewEngine(rules, 1000)
if err != nil {
t.Fatalf("NewEngine: %v", err)
}

cap := &captureNotifier{}
notifierMap := map[string]notifier.Notifier{"capture": cap}

rc := &runctx.Context{
RunID: "r-test",
Runner: "local",
Labels: map[string]string{},
}
cfg := &config.Config{}

var dryOut bytes.Buffer
dispatcher := dryrun.NewLoggingDispatcher(&dryrun.JSONFormatter{}, &dryOut)

input := strings.NewReader(`{"metric":"latency","value":150}` + "\n")
var mirror bytes.Buffer
ingestStream(input, &mirror, eng, dispatcher, cfg, nil, rc)

// The capture notifier must have received zero alerts (dry run never sends).
if got := cap.snapshot(); len(got) != 0 {
t.Errorf("dry-run leaked %d alert(s) to real notifier", len(got))
}
// LoggingDispatcher must have written the alert to its buffer.
if !strings.Contains(dryOut.String(), `"rule":"spike"`) {
t.Errorf("expected dry-run output to include the spike rule, got: %s", dryOut.String())
}

_ = notifierMap // declared for clarity, not wired into dispatcher
}
Loading
Loading