Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions hack/e2e/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ The default `all` command runs:
1. Build the local `aks-flex-node` binary unless `--binary` or `--skip-build` is used.
2. Deploy AKS and three VMs with Bicep.
3. Join all three VMs.
4. Validate node readiness and run smoke workloads.
4. Validate node readiness, node-problem-detector status, and run smoke workloads.
5. Unjoin all Flex Nodes and verify they are absent.
6. Rejoin all Flex Nodes and validate again.
7. Run local-machine-driven repave validation.
Expand All @@ -51,7 +51,7 @@ The default `all` command runs:
| `unjoin-msi` | Unjoin only the managed-identity node. |
| `unjoin-token` | Unjoin only the bootstrap-token node. |
| `unjoin-kubeadm` | Unjoin only the kubeadm-style node. |
| `validate` | Verify joined nodes and run smoke tests. |
| `validate` | Verify joined nodes, node-problem-detector status, and run smoke tests. |
| `validate-absent` | Verify Flex Node objects are absent after unjoin. |
| `smoke` | Run smoke workloads only. |
| `upgrade-drift` | Validate local-machine-driven repave to the alternate nspawn side. |
Expand Down Expand Up @@ -197,6 +197,6 @@ Logs are collected under `$E2E_WORK_DIR/logs/`.
- **Missing prerequisites:** run `./hack/e2e/run.sh --help` and confirm `az`, `jq`, `kubectl`, `ssh`, `scp`, and `openssl` are available.
- **Azure auth failures:** run `az account show` and `az login` if needed.
- **SSH failures:** inspect `state.json` for VM public IPs and confirm the SSH key configured by `E2E_SSH_KEY_FILE` is available.
- **Node join failures:** run `./hack/e2e/run.sh logs` and inspect agent, bootstrap unit, kubelet, and containerd logs.
- **Node join failures:** run `./hack/e2e/run.sh logs` and inspect agent, bootstrap unit, kubelet, containerd, and node-problem-detector logs.
- **Repave failures:** check `aks-flex-node-agent` logs, `machinectl list`, and kubelet versions inside `kube1` and `kube2`.
- **Leftover resources:** run `E2E_RESOURCE_GROUP=<rg> ./hack/e2e/run.sh cleanup`.
33 changes: 33 additions & 0 deletions hack/e2e/lib/cleanup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,39 @@ _collect_vm_logs() {
fi" \
> "${E2E_LOG_DIR}/${prefix}-containerd.log" 2>/dev/null || true

remote_exec "${vm_ip}" "bash -s" <<'REMOTE' > "${E2E_LOG_DIR}/${prefix}-npd.log" 2>&1 || true
npd_service="node-problem-detector.service"
active_machine="$(sudo python3 - <<'PY'
import json
import sys

try:
with open("/etc/aks-flex-node/daemon-state.json", encoding="utf-8") as state:
active_machine = json.load(state).get("activeMachine", "")
if active_machine:
print(active_machine)
else:
print("daemon state does not include activeMachine", file=sys.stderr)
except FileNotFoundError as exc:
print(f"daemon state not found: {exc}", file=sys.stderr)
except json.JSONDecodeError as exc:
print(f"daemon state is not valid JSON: {exc}", file=sys.stderr)
except PermissionError as exc:
print(f"daemon state permission denied: {exc}", file=sys.stderr)
PY
)"
if [ -n "${active_machine}" ]; then
echo "=== ${npd_service} logs (${active_machine}) ==="
# Match the agent and kubelet log depth; NPD entries are sparse but useful across node lifecycle phases.
sudo systemd-run --machine="${active_machine}" --quiet --pipe journalctl -u "${npd_service}" -n 500 --no-pager || \
echo "warning: failed to collect ${npd_service} logs from ${active_machine}"
else
echo "warning: active machine unknown; falling back to host journal"
sudo journalctl -u "${npd_service}" -n 500 --no-pager || \
echo "warning: failed to collect ${npd_service} logs from host"
fi
REMOTE

# Collect CNI config and nspawn machine state for networking diagnostics.
# Read directly from the nspawn rootfs at /var/lib/machines/kube1/.
local kube1_root="/var/lib/machines/kube1"
Expand Down
95 changes: 95 additions & 0 deletions hack/e2e/lib/validate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# Functions:
# validate_node_joined <vm_name> - Wait for a specific node to appear in kubectl
# validate_all_nodes - Verify MSI, token, and kubeadm nodes joined
# validate_npd_status <vm_name> <vm_ip> - Verify node-problem-detector is active
# validate_node_absent <vm_name> - Wait for a node to disappear from kubectl
# validate_all_nodes_absent - Verify all flex nodes are gone after unjoin
# smoke_test <vm_name> <label> - Schedule an nginx pod on a node
Expand Down Expand Up @@ -73,6 +74,93 @@ validate_node_ip() {
return 1
}

# ---------------------------------------------------------------------------
# validate_npd_status - Ensure node-problem-detector is active and reporting
# ---------------------------------------------------------------------------
validate_npd_status() {
local vm_name="$1"
local vm_ip="$2"
local timeout="${E2E_NODE_JOIN_TIMEOUT}"
local elapsed=0
local npd_condition_jsonpath='{.status.conditions[?(@.type=="KernelDeadlock")].status}'
local condition_error="${E2E_WORK_DIR}/npd-condition-${vm_name}.err"
local quoted_timeout

log_info "Validating node-problem-detector on '${vm_name}'..."

if ! [[ "${timeout}" =~ ^[0-9]+$ ]]; then
log_error "E2E_NODE_JOIN_TIMEOUT must be numeric, got '${timeout}'"
return 1
fi
printf -v quoted_timeout "%q" "${timeout}"

Comment thread
bcho marked this conversation as resolved.
remote_exec "${vm_ip}" "E2E_NODE_JOIN_TIMEOUT=${quoted_timeout} bash -s" <<'REMOTE'
set -euo pipefail

deadline=$((SECONDS + E2E_NODE_JOIN_TIMEOUT))
active_machine_error="/tmp/aks-flex-node-e2e-active-machine-$$.err"
status_error="/tmp/aks-flex-node-e2e-npd-status-$$.err"
while true; do
if [[ ! -f /etc/aks-flex-node/daemon-state.json ]]; then
active_machine=""
echo "/etc/aks-flex-node/daemon-state.json is missing" > "${active_machine_error}"
else
active_machine="$(sudo python3 - <<'PY' 2>"${active_machine_error}" || true
import json
with open("/etc/aks-flex-node/daemon-state.json", encoding="utf-8") as state:
print(json.load(state).get("activeMachine", ""))
PY
)"
fi
if [[ -n "${active_machine}" ]] && sudo machinectl show "${active_machine}" &>/dev/null; then
status="$(sudo systemd-run --machine="${active_machine}" --quiet --pipe systemctl is-active node-problem-detector.service 2>"${status_error}" || true)"
if [[ "${status}" == "active" ]]; then
echo "node-problem-detector.service is active in ${active_machine}"
exit 0
fi
fi

if (( SECONDS >= deadline )); then
echo "node-problem-detector.service did not become active"
if [[ -s "${active_machine_error}" ]]; then
cat "${active_machine_error}"
fi
if [[ -s "${status_error}" ]]; then
cat "${status_error}"
fi
sudo machinectl list --no-pager || true
if [[ -n "${active_machine:-}" ]]; then
sudo systemd-run --machine="${active_machine}" --quiet --pipe systemctl status node-problem-detector.service --no-pager -l || true
sudo systemd-run --machine="${active_machine}" --quiet --pipe journalctl -u node-problem-detector.service -n 50 --no-pager || true
fi
exit 1
fi

sleep 5
done
REMOTE

local kernel_deadlock
while [[ "${elapsed}" -lt "${timeout}" ]]; do
kernel_deadlock="$(kubectl get node "${vm_name}" -o jsonpath="${npd_condition_jsonpath}" 2>"${condition_error}" || true)"
if [[ "${kernel_deadlock}" == "False" ]]; then
log_success "node-problem-detector is active and reporting on '${vm_name}'"
return 0
fi

sleep 10
elapsed=$((elapsed + 10))
log_debug "Waiting for node-problem-detector condition on ${vm_name}... (${elapsed}/${timeout}s)"
done

log_error "node-problem-detector did not report KernelDeadlock=False on '${vm_name}' within ${timeout}s"
if [[ -s "${condition_error}" ]]; then
cat "${condition_error}" >&2
fi
kubectl describe node "${vm_name}" 2>&1 || true
return 1
}

# ---------------------------------------------------------------------------
# validate_all_nodes - Check all MSI, token, and kubeadm VMs joined
# ---------------------------------------------------------------------------
Expand All @@ -91,17 +179,24 @@ validate_all_nodes() {
--admin

local msi_vm_name token_vm_name kubeadm_vm_name
local msi_vm_ip token_vm_ip kubeadm_vm_ip
local token_vm_private_ip
msi_vm_name="$(state_get msi_vm_name)"
token_vm_name="$(state_get token_vm_name)"
kubeadm_vm_name="$(state_get kubeadm_vm_name)"
msi_vm_ip="$(state_get msi_vm_ip)"
token_vm_ip="$(state_get token_vm_ip)"
kubeadm_vm_ip="$(state_get kubeadm_vm_ip)"
token_vm_private_ip="$(state_get token_vm_private_ip)"

local failed=0
validate_node_joined "${msi_vm_name}" || failed=1
validate_node_joined "${token_vm_name}" || failed=1
validate_node_joined "${kubeadm_vm_name}" || failed=1
validate_node_ip "${token_vm_name}" "${token_vm_private_ip}" || failed=1
validate_npd_status "${msi_vm_name}" "${msi_vm_ip}" || failed=1
validate_npd_status "${token_vm_name}" "${token_vm_ip}" || failed=1
validate_npd_status "${kubeadm_vm_name}" "${kubeadm_vm_ip}" || failed=1

if [[ "${failed}" -eq 1 ]]; then
log_error "One or more nodes failed to join"
Expand Down
2 changes: 1 addition & 1 deletion pkg/daemon/nodeoperator.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ func (o *nspawnNodeOperator) RestartNode(ctx context.Context, log *slog.Logger)
nodestop.StopNode(log, active.Name),
nodestart.StartNode(log, gs.NodeStart),
nodestart.WaitForKubelet(log, active.Name),
npd.Start(cfg, log, gs.RootFS.MachineDir, active.Name),
npd.Start(log, gs.NodeStart),
).Do(ctx)
}

Expand Down
2 changes: 1 addition & 1 deletion pkg/daemon/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func StartNode(
),
nodestart.StartNode(log, gs.NodeStart),
nodestart.WaitForKubelet(log, machineName),
npd.Start(cfg, log, gs.RootFS.MachineDir, machineName),
npd.Start(log, gs.NodeStart),
saveState(store, state),
)
}
2 changes: 1 addition & 1 deletion pkg/npd/assets/node-problem-detector.service
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ Description=Node Problem Detector
After=network.target

[Service]
ExecStart={{.NPDBinaryPath}} --apiserver-override="{{.APIServerURL}}?inClusterConfig=false&auth={{.KubeconfigPath}}" --config.system-log-monitor={{.NPDConfigPath}}
ExecStart={{.NPDBinaryPath}} --hostname-override={{.NodeName}} --apiserver-override="{{.APIServerURL}}?inClusterConfig=false&auth={{.KubeconfigPath}}" --config.system-log-monitor={{.NPDConfigPath}}
Restart=on-failure
RestartSec=5s

Expand Down
18 changes: 10 additions & 8 deletions pkg/npd/start.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,17 @@ import (
"path/filepath"
"text/template"

"github.com/Azure/AKSFlexNode/pkg/config"
"github.com/Azure/AKSFlexNode/pkg/utils/utilexec"
"github.com/Azure/AKSFlexNode/pkg/utils/utilio"
"github.com/Azure/unbounded/pkg/agent/goalstates"
"github.com/Azure/unbounded/pkg/agent/phases"
)

//go:embed assets/node-problem-detector.service
var serviceTemplate string

const (
KubeletKubeconfigPath = "/var/lib/kubelet/kubelet/kubeconfig"
systemdUnitNPD = "node-problem-detector.service"
systemdUnitNPD = "node-problem-detector.service"
)

var tmpl = template.Must(template.New("npd-service").Parse(serviceTemplate))
Expand All @@ -33,18 +32,20 @@ type startTask struct {
kubeconfigPath string
machineDir string
machineName string
nodeName string
}

// Start returns a task that renders the NPD systemd unit file into the
// nspawn machine rootfs and ensures the service is running inside the
// container via systemd-run --machine.
func Start(cfg *config.Config, log *slog.Logger, machineDir, machineName string) phases.Task {
func Start(log *slog.Logger, nodeStart *goalstates.NodeStart) phases.Task {
return &startTask{
log: log,
apiServer: cfg.Node.Kubelet.ServerURL,
kubeconfigPath: KubeletKubeconfigPath,
machineDir: machineDir,
machineName: machineName,
apiServer: nodeStart.Kubelet.APIServer,
kubeconfigPath: goalstates.KubeletKubeconfigPath,
machineDir: nodeStart.MachineDir,
machineName: nodeStart.MachineName,
nodeName: nodeStart.NodeName,
}
}

Expand All @@ -66,6 +67,7 @@ func (t *startTask) ensureServiceFile() (updated bool, err error) {
"APIServerURL": t.apiServer,
"KubeconfigPath": t.kubeconfigPath,
"NPDConfigPath": npdConfigPath,
"NodeName": t.nodeName,
}); err != nil {
return false, fmt.Errorf("render npd service template: %w", err)
}
Expand Down
70 changes: 70 additions & 0 deletions pkg/npd/start_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package npd

import (
"log/slog"
"os"
"path/filepath"
"strings"
"testing"

"github.com/Azure/unbounded/pkg/agent/goalstates"
)

// wantKubeconfigPath is the path the kubelet actually writes its kubeconfig to.
// The historical bug used a doubled segment ("/var/lib/kubelet/kubelet/kubeconfig"),
// which made node-problem-detector panic on startup and crash-loop forever:
//
// panic: stat /var/lib/kubelet/kubelet/kubeconfig: no such file or directory
const wantKubeconfigPath = "/var/lib/kubelet/kubeconfig"

// TestCanonicalKubeletKubeconfigPath guards the value of the shared library
// constant so the doubled-segment typo cannot reappear from a dependency bump.
func TestCanonicalKubeletKubeconfigPath(t *testing.T) {
Comment thread
bcho marked this conversation as resolved.
t.Parallel()
if goalstates.KubeletKubeconfigPath != wantKubeconfigPath {
t.Fatalf("goalstates.KubeletKubeconfigPath = %q, want %q",
goalstates.KubeletKubeconfigPath, wantKubeconfigPath)
}
}

// TestRenderedNPDUnitUsesCanonicalKubeconfig drives Start() through the real
// service-file rendering and asserts on the kubeconfig path that ends up in the
// node-problem-detector systemd unit's ExecStart. Asserting on the rendered unit
// (the externally observable artifact) rather than internal fields keeps the
// test stable across refactors while still exercising the Start() wiring.
func TestRenderedNPDUnitUsesCanonicalKubeconfig(t *testing.T) {
Comment thread
bcho marked this conversation as resolved.
t.Parallel()
machineDir := t.TempDir()
nodeStart := &goalstates.NodeStart{
MachineDir: machineDir,
MachineName: "kube1",
NodeName: "vm-e2e-token-1781659839",
Kubelet: goalstates.Kubelet{
APIServer: "https://example.hcp.westus.azmk8s.io:443",
},
}
task, ok := Start(slog.Default(), nodeStart).(*startTask)
if !ok {
t.Fatalf("Start did not return *startTask")
}
if _, err := task.ensureServiceFile(); err != nil {
t.Fatalf("ensureServiceFile: %v", err)
}

unitPath := filepath.Join(machineDir, "etc/systemd/system", systemdUnitNPD)
data, err := os.ReadFile(unitPath) //nolint:gosec // path built from test TempDir
if err != nil {
t.Fatalf("read rendered unit: %v", err)
}
rendered := string(data)

if !strings.Contains(rendered, "auth="+wantKubeconfigPath) {
t.Fatalf("rendered unit missing auth=%s:\n%s", wantKubeconfigPath, rendered)
}
if strings.Contains(rendered, "kubelet/kubelet") {
t.Fatalf("rendered unit contains doubled 'kubelet' segment:\n%s", rendered)
}
if !strings.Contains(rendered, "--hostname-override=vm-e2e-token-1781659839") {
t.Fatalf("rendered unit missing hostname override:\n%s", rendered)
}
}
Loading