Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 140 additions & 0 deletions admin/app/sla-tracking/page.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
'use client';

import React, { useEffect, useState } from 'react';
import { LineChart } from '../components/LineChart';

interface SLAMetric {
contract: number;
latest_sla: number | null;
latest_hour: string | null;
avg_sla: number | null;
violations: number;
}

interface Contract {
id: number;
contract_id: string;
name: string;
alias: string;
}

function getSLAColor(sla: number | null | undefined): string {
if (sla === null || sla === undefined) return 'text-zinc-400';
if (sla >= 98) return 'text-green-400';
if (sla >= 95) return 'text-yellow-400';
return 'text-red-400';
}

function getSLAStatus(sla: number | null | undefined): string {
if (sla === null || sla === undefined) return 'No Data';
if (sla >= 98) return 'Good';
if (sla >= 95) return 'Warning';
return 'Critical';
}

export default function SLAMetricsPage() {
const [metrics, setMetrics] = useState<SLAMetric[]>([]);
const [contracts, setContracts] = useState<Record<number, Contract>>({});
const [trendData, setTrendData] = useState<any[]>([]);
const [loading, setLoading] = useState(true);

useEffect(() => {
const fetchData = async () => {
setLoading(true);
try {
const slaResponse = await fetch('/api/ingest/admin/sla-metrics/');
const slaData = await slaResponse.json();
setMetrics(slaData);

// Build trend data from metrics
const trend = slaData.map((m: SLAMetric) => ({
name: contracts[m.contract]?.alias || contracts[m.contract]?.name || `Contract ${m.contract}`,
sla: m.latest_sla || 0,
}));
setTrendData(trend);
} finally {
setLoading(false);
}
};

fetchData();
}, []);

const fetchContracts = async () => {
const response = await fetch('/api/contracts/');
const data = await response.json();
const contractsMap: Record<number, Contract> = {};
data.results?.forEach((c: Contract) => {
contractsMap[c.id] = c;
});
setContracts(contractsMap);
};

useEffect(() => {
fetchContracts();
}, []);

if (loading) {
return (
<div className="min-h-screen bg-zinc-950 p-6">
<div className="text-zinc-400 font-mono">Loading SLA metrics...</div>
</div>
);
}

return (
<div className="min-h-screen bg-zinc-950 p-6">
<div className="mb-6">
<h1 className="text-2xl font-mono text-zinc-100 uppercase tracking-wider mb-2">
SLA Tracking
</h1>
<p className="text-sm font-mono text-zinc-500">
Event completeness SLA metrics for tracked contracts
</p>
</div>

<div className="mb-6">
<LineChart
data={trendData}
lines={[
{ dataKey: 'sla', name: 'SLA %', stroke: '#00ff00' },
]}
title="SLA Trend"
description="Current SLA percentage by contract"
/>
</div>

<div className="bg-zinc-900 border border-zinc-800 rounded-lg p-4">
<table className="w-full font-mono text-xs">
<thead>
<tbody className="divide-y divide-zinc-800">
{metrics.map((metric) => (
<tr key={metric.contract}>
<td className="py-2 px-3 text-zinc-300">
{contracts[metric.contract]?.alias || contracts[metric.contract]?.name || `Contract ${metric.contract}`}
</td>
<td className="py-2 px-3">
<span className={getSLAColor(metric.latest_sla)}>
{metric.latest_sla?.toFixed(1) ?? 'N/A'}%
</span>
</td>
<td className="py-2 px-3 text-zinc-500">
{getSLAStatus(metric.latest_sla)}
</td>
<td className="py-2 px-3 text-zinc-500">
{metric.avg_sla?.toFixed(1) ?? 'N/A'}%
</td>
<td className="py-2 px-3">
<span className={metric.violations > 0 ? 'text-red-400' : 'text-green-400'}>
{metric.violations}
</span>
</td>
</tr>
))}
</tbody>
</thead>
</table>
</div>
</div>
);
}
179 changes: 179 additions & 0 deletions django-backend/soroscan/ingest/management/commands/calculate_sla.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
from datetime import datetime, timedelta

from django.conf import settings
from django.core.management.base import BaseCommand
from django.utils import timezone

from soroscan.ingest.models import (
ContractCompletenessSLA,
ContractEvent,
SLAAlert,
TrackedContract,
)
from soroscan.ingest.stellar_client import SorobanClient


DEFAULT_EXPECTED_EVENTS_PER_HOUR = 100


class Command(BaseCommand):
help = "Calculate event completeness SLA for all active contracts for the previous hour."

def add_arguments(self, parser):
parser.add_argument(
"--contract-id",
type=str,
default=None,
help="Optional contract ID to calculate SLA for a specific contract",
)
parser.add_argument(
"--hours",
type=int,
default=1,
help="Number of past hours to process (default: 1)",
)

def handle(self, *args, **options):
contract_id = options["contract_id"]
hours = max(1, min(options["hours"], 168)) # Limit to 1-168 hours

if contract_id:
contracts = TrackedContract.objects.filter(
contract_id=contract_id,
is_active=True,
)
if not contracts.exists():
self.stdout.write(
self.style.WARNING(
f"No active contract found with ID: {contract_id}"
)
)
return
else:
contracts = TrackedContract.objects.filter(is_active=True)

for contract in contracts:
self._calculate_sla_for_contract(contract, hours)

self.stdout.write(
self.style.SUCCESS(
f"Processed SLA calculations for {contracts.count()} contract(s)"
)
)

def _calculate_sla_for_contract(self, contract: TrackedContract, hours: int):
"""Calculate SLA for a single contract across multiple hours."""
use_rpc = getattr(settings, "SLA_USE_RPC_EXPECTED_EVENTS", False)
client = SorobanClient() if use_rpc else None

for hour_offset in range(hours):
hour_start = timezone.now().replace(
minute=0,
second=0,
microsecond=0,
) - timedelta(hours=hour_offset + 1)
hour_end = self._hour_end_for_offset(hour_start, hour_offset)

# Count indexed events in this hour
indexed_count = ContractEvent.objects.filter(
contract=contract,
timestamp__gte=hour_start,
timestamp__lt=hour_end,
).count()

# Estimate expected events locally unless RPC-backed counts are enabled.
expected_count = self._get_expected_event_count(
client,
contract,
indexed_count,
)

sla_percentage = (
(indexed_count / expected_count * 100)
if expected_count > 0
else 100.0
)
is_violated = sla_percentage < 95.0

sla_record, created = ContractCompletenessSLA.objects.update_or_create(
contract=contract,
hour_start=hour_start,
defaults={
"events_expected": expected_count,
"events_indexed": indexed_count,
"sla_percentage": sla_percentage,
"is_violated": is_violated,
},
)

if is_violated and not sla_record.alert_sent:
SLAAlert.objects.create(
sla_record=sla_record,
alert_type=SLAAlert.ALERT_TYPE_SLA_VIOLATION,
contract=contract,
message=(
f"SLA violation detected for {contract.name}: "
f"{sla_percentage:.1f}% events indexed "
f"(expected {expected_count}, got {indexed_count})"
),
)
sla_record.alert_sent = True
sla_record.save(update_fields=["alert_sent"])
self.stdout.write(
self.style.WARNING(
f"SLA violation: {contract.name} @ {hour_start}: "
f"{sla_percentage:.1f}%"
)
)
elif not is_violated and created:
SLAAlert.objects.create(
sla_record=sla_record,
alert_type=SLAAlert.ALERT_TYPE_RECOVERY,
contract=contract,
message=(
f"SLA recovered for {contract.name}: "
f"{sla_percentage:.1f}% events indexed"
),
)

def _hour_end_for_offset(self, hour_start: datetime, hour_offset: int) -> datetime:
"""Return the time window end used for SLA bucketing."""
if hour_offset == 0:
return hour_start + timedelta(hours=2)
return hour_start + timedelta(hours=1)

def _get_expected_event_count(
self,
client: SorobanClient | None,
contract: TrackedContract,
indexed_count: int,
) -> int:
"""Get the expected number of events from RPC for the given time range."""
baseline = int(
getattr(
settings,
"SLA_DEFAULT_EXPECTED_EVENTS_PER_HOUR",
DEFAULT_EXPECTED_EVENTS_PER_HOUR,
)
)

def estimated_count() -> int:
if indexed_count <= 0:
return 0
return max(indexed_count, baseline)

if client is None:
return estimated_count()

try:
# Query events from RPC for this contract in the time range
events = client.get_events_range(
contract_id=contract.contract_id,
start_ledger=0,
end_ledger=999999999999,
)
rpc_count = len(events) if events else 0
return max(rpc_count, estimated_count())
except Exception:
# If RPC fails, estimate from indexed events and the configured baseline.
return estimated_count()
Loading
Loading