Skip to content
6 changes: 3 additions & 3 deletions hack/e2e/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ The default `all` command runs:
1. Build the local `aks-flex-node` binary unless `--binary` or `--skip-build` is used.
2. Deploy AKS and three VMs with Bicep.
3. Join all three VMs.
4. Validate node readiness and run smoke workloads.
4. Validate node readiness, node-problem-detector status, and run smoke workloads.
5. Unjoin all Flex Nodes and verify they are absent.
6. Rejoin all Flex Nodes and validate again.
7. Run local-machine-driven repave validation.
Expand All @@ -51,7 +51,7 @@ The default `all` command runs:
| `unjoin-msi` | Unjoin only the managed-identity node. |
| `unjoin-token` | Unjoin only the bootstrap-token node. |
| `unjoin-kubeadm` | Unjoin only the kubeadm-style node. |
| `validate` | Verify joined nodes and run smoke tests. |
| `validate` | Verify joined nodes, node-problem-detector status, and run smoke tests. |
| `validate-absent` | Verify Flex Node objects are absent after unjoin. |
| `smoke` | Run smoke workloads only. |
| `upgrade-drift` | Validate local-machine-driven repave to the alternate nspawn side. |
Expand Down Expand Up @@ -197,6 +197,6 @@ Logs are collected under `$E2E_WORK_DIR/logs/`.
- **Missing prerequisites:** run `./hack/e2e/run.sh --help` and confirm `az`, `jq`, `kubectl`, `ssh`, `scp`, and `openssl` are available.
- **Azure auth failures:** run `az account show` and `az login` if needed.
- **SSH failures:** inspect `state.json` for VM public IPs and confirm the SSH key configured by `E2E_SSH_KEY_FILE` is available.
- **Node join failures:** run `./hack/e2e/run.sh logs` and inspect agent, bootstrap unit, kubelet, and containerd logs.
- **Node join failures:** run `./hack/e2e/run.sh logs` and inspect agent, bootstrap unit, kubelet, containerd, and node-problem-detector logs.
- **Repave failures:** check `aks-flex-node-agent` logs, `machinectl list`, and kubelet versions inside `kube1` and `kube2`.
- **Leftover resources:** run `E2E_RESOURCE_GROUP=<rg> ./hack/e2e/run.sh cleanup`.
33 changes: 33 additions & 0 deletions hack/e2e/lib/cleanup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,39 @@ _collect_vm_logs() {
fi" \
> "${E2E_LOG_DIR}/${prefix}-containerd.log" 2>/dev/null || true

remote_exec "${vm_ip}" "bash -s" <<'REMOTE' > "${E2E_LOG_DIR}/${prefix}-npd.log" 2>&1 || true
npd_service="node-problem-detector.service"
active_machine="$(sudo python3 - <<'PY'
import json
import sys

try:
with open("/etc/aks-flex-node/daemon-state.json", encoding="utf-8") as state:
active_machine = json.load(state).get("activeMachine", "")
if active_machine:
print(active_machine)
else:
print("daemon state does not include activeMachine", file=sys.stderr)
except FileNotFoundError as exc:
print(f"daemon state not found: {exc}", file=sys.stderr)
except json.JSONDecodeError as exc:
print(f"daemon state is not valid JSON: {exc}", file=sys.stderr)
except PermissionError as exc:
print(f"daemon state permission denied: {exc}", file=sys.stderr)
PY
)"
if [ -n "${active_machine}" ]; then
echo "=== ${npd_service} logs (${active_machine}) ==="
# Match the agent and kubelet log depth; NPD entries are sparse but useful across node lifecycle phases.
sudo systemd-run --machine="${active_machine}" --quiet --pipe journalctl -u "${npd_service}" -n 500 --no-pager || \
echo "warning: failed to collect ${npd_service} logs from ${active_machine}"
else
echo "warning: active machine unknown; falling back to host journal"
sudo journalctl -u "${npd_service}" -n 500 --no-pager || \
echo "warning: failed to collect ${npd_service} logs from host"
fi
REMOTE

# Collect CNI config and nspawn machine state for networking diagnostics.
# Read directly from the nspawn rootfs at /var/lib/machines/kube1/.
local kube1_root="/var/lib/machines/kube1"
Expand Down
95 changes: 95 additions & 0 deletions hack/e2e/lib/validate.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# Functions:
# validate_node_joined <vm_name> - Wait for a specific node to appear in kubectl
# validate_all_nodes - Verify MSI, token, and kubeadm nodes joined
# validate_npd_status <vm_name> <vm_ip> - Verify node-problem-detector is active
# validate_node_absent <vm_name> - Wait for a node to disappear from kubectl
# validate_all_nodes_absent - Verify all flex nodes are gone after unjoin
# smoke_test <vm_name> <label> - Schedule an nginx pod on a node
Expand Down Expand Up @@ -73,6 +74,93 @@ validate_node_ip() {
return 1
}

# ---------------------------------------------------------------------------
# validate_npd_status - Ensure node-problem-detector is active and reporting
# ---------------------------------------------------------------------------
validate_npd_status() {
local vm_name="$1"
local vm_ip="$2"
local timeout="${E2E_NODE_JOIN_TIMEOUT}"
local elapsed=0
local npd_condition_jsonpath='{.status.conditions[?(@.type=="KernelDeadlock")].status}'
local condition_error="${E2E_WORK_DIR}/npd-condition-${vm_name}.err"
local quoted_timeout

log_info "Validating node-problem-detector on '${vm_name}'..."

if ! [[ "${timeout}" =~ ^[0-9]+$ ]]; then
log_error "E2E_NODE_JOIN_TIMEOUT must be numeric, got '${timeout}'"
return 1
fi
printf -v quoted_timeout "%q" "${timeout}"

remote_exec "${vm_ip}" "E2E_NODE_JOIN_TIMEOUT=${quoted_timeout} bash -s" <<'REMOTE'
set -euo pipefail

deadline=$((SECONDS + E2E_NODE_JOIN_TIMEOUT))
active_machine_error="/tmp/aks-flex-node-e2e-active-machine-$$.err"
status_error="/tmp/aks-flex-node-e2e-npd-status-$$.err"
while true; do
if [[ ! -f /etc/aks-flex-node/daemon-state.json ]]; then
active_machine=""
echo "/etc/aks-flex-node/daemon-state.json is missing" > "${active_machine_error}"
else
active_machine="$(sudo python3 - <<'PY' 2>"${active_machine_error}" || true
import json
with open("/etc/aks-flex-node/daemon-state.json", encoding="utf-8") as state:
print(json.load(state).get("activeMachine", ""))
PY
)"
fi
if [[ -n "${active_machine}" ]] && machinectl show "${active_machine}" &>/dev/null; then
status="$(sudo systemd-run --machine="${active_machine}" --quiet --pipe systemctl is-active node-problem-detector.service 2>"${status_error}" || true)"
if [[ "${status}" == "active" ]]; then
echo "node-problem-detector.service is active in ${active_machine}"
exit 0
fi
fi

if (( SECONDS >= deadline )); then
echo "node-problem-detector.service did not become active"
if [[ -s "${active_machine_error}" ]]; then
cat "${active_machine_error}"
fi
if [[ -s "${status_error}" ]]; then
cat "${status_error}"
fi
machinectl list --no-pager || true
if [[ -n "${active_machine:-}" ]]; then
sudo systemd-run --machine="${active_machine}" --quiet --pipe systemctl status node-problem-detector.service --no-pager -l || true
sudo systemd-run --machine="${active_machine}" --quiet --pipe journalctl -u node-problem-detector.service -n 50 --no-pager || true
fi
exit 1
fi

sleep 5
done
REMOTE

local kernel_deadlock
while [[ "${elapsed}" -lt "${timeout}" ]]; do
kernel_deadlock="$(kubectl get node "${vm_name}" -o jsonpath="${npd_condition_jsonpath}" 2>"${condition_error}" || true)"
if [[ "${kernel_deadlock}" == "False" ]]; then
log_success "node-problem-detector is active and reporting on '${vm_name}'"
return 0
fi

sleep 10
elapsed=$((elapsed + 10))
log_debug "Waiting for node-problem-detector condition on ${vm_name}... (${elapsed}/${timeout}s)"
done

log_error "node-problem-detector did not report KernelDeadlock=False on '${vm_name}' within ${timeout}s"
if [[ -s "${condition_error}" ]]; then
cat "${condition_error}" >&2
fi
kubectl describe node "${vm_name}" 2>&1 || true
return 1
}

# ---------------------------------------------------------------------------
# validate_all_nodes - Check all MSI, token, and kubeadm VMs joined
# ---------------------------------------------------------------------------
Expand All @@ -91,17 +179,24 @@ validate_all_nodes() {
--admin

local msi_vm_name token_vm_name kubeadm_vm_name
local msi_vm_ip token_vm_ip kubeadm_vm_ip
local token_vm_private_ip
msi_vm_name="$(state_get msi_vm_name)"
token_vm_name="$(state_get token_vm_name)"
kubeadm_vm_name="$(state_get kubeadm_vm_name)"
msi_vm_ip="$(state_get msi_vm_ip)"
token_vm_ip="$(state_get token_vm_ip)"
kubeadm_vm_ip="$(state_get kubeadm_vm_ip)"
token_vm_private_ip="$(state_get token_vm_private_ip)"

local failed=0
validate_node_joined "${msi_vm_name}" || failed=1
validate_node_joined "${token_vm_name}" || failed=1
validate_node_joined "${kubeadm_vm_name}" || failed=1
validate_node_ip "${token_vm_name}" "${token_vm_private_ip}" || failed=1
validate_npd_status "${msi_vm_name}" "${msi_vm_ip}" || failed=1
validate_npd_status "${token_vm_name}" "${token_vm_ip}" || failed=1
validate_npd_status "${kubeadm_vm_name}" "${kubeadm_vm_ip}" || failed=1

if [[ "${failed}" -eq 1 ]]; then
log_error "One or more nodes failed to join"
Expand Down
Loading