From 8d9c6d571e367e0ee63c391f8ecadb004378db38 Mon Sep 17 00:00:00 2001 From: Khalefa Date: Fri, 5 Jun 2026 04:08:03 +0300 Subject: [PATCH 1/2] fix(deploy): disable rp_filter so multi-hop cascade egress works MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cascade routing is asymmetric — a device's packets reach the exit on the inner-link adapter while the return path leaves a different interface. Reverse- path filtering (even Ubuntu's default "loose" rp_filter=2) silently drops those packets, so 2-hop egress black-holes while single-hop works. Set net.ipv4.conf.{all,default}.rp_filter=0 in the node's sysctl prep (next to ip_forward); new awg interfaces inherit the default. Confirmed live: nyc(entry)→ams(exit) cascade — client egress flipped from the entry's IP to the exit's IP the instant rp_filter was turned off. Co-Authored-By: Claude Opus 4.8 (1M context) --- deploy/cloud-init.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/deploy/cloud-init.sh b/deploy/cloud-init.sh index 7a67602..0c46007 100644 --- a/deploy/cloud-init.sh +++ b/deploy/cloud-init.sh @@ -22,6 +22,13 @@ apt-get install -y "linux-headers-$(uname -r)" amneziawg amneziawg-tools cat >/etc/sysctl.d/99-pharos.conf <<'EOF' net.ipv4.ip_forward=1 net.ipv6.conf.all.forwarding=1 +# Cascade routing is asymmetric: a device's packets arrive on the inner-link +# adapter (awg1/awg0) while the return path leaves a different interface, so +# reverse-path filtering — even Ubuntu's default "loose" (2) — silently drops +# them and multi-hop egress black-holes. Turn rp_filter off; new awg interfaces +# inherit the default. (decision 16 / node cascade.) +net.ipv4.conf.all.rp_filter=0 +net.ipv4.conf.default.rp_filter=0 EOF sysctl --system From eb4af6638bbcdbace3e8191a660a39d41d6e3ee7 Mon Sep 17 00:00:00 2001 From: Khalefa Date: Fri, 5 Jun 2026 04:19:38 +0300 Subject: [PATCH 2/2] fix(netpolicy): correctly relax rp_filter + idempotent transit route MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cascade black-hole had two code causes the live test surfaced: - rp_filter: the transit rules set only `conf.all.rp_filter=0`, but the effective value is max(conf.all, conf.), so the receiving inner interface stayed at its inherited default (2) and returns were still dropped — `all=0` alone was a no-op. Now relax `default` too (new wg interfaces inherit 0) and stop resetting `all` to 2 on teardown (that re-broke any other transit still up). - `ip route add default dev table ` is non-idempotent: a 2nd device binding the same path 502s with "File exists". Use `ip route replace`. netpolicy_test pins both (and that teardown never sets rp_filter=2); kept byte-identical with coxswain's renderer. Co-Authored-By: Claude Opus 4.8 (1M context) --- internal/netpolicy/netpolicy.go | 21 +++++++++++++------- internal/netpolicy/netpolicy_test.go | 29 ++++++++++++++++++++-------- 2 files changed, 35 insertions(+), 15 deletions(-) diff --git a/internal/netpolicy/netpolicy.go b/internal/netpolicy/netpolicy.go index 1790ac5..e9e1e5c 100644 --- a/internal/netpolicy/netpolicy.go +++ b/internal/netpolicy/netpolicy.go @@ -129,13 +129,18 @@ func (p Policy) Rules() Rules { // // A return from the exit arrives on the inner interface, but the route back // to its source (the public destination) is the egress interface — an - // asymmetric path that reverse-path filtering drops, even in loose mode, so - // the entry silently fails to forward returns to the client. Relax rp_filter - // while the node carries transits. The effective value is max(all, iface), - // so `all` must be relaxed — relaxing only the inner interface is a no-op. + // asymmetric path that reverse-path filtering drops, even in loose mode (2), + // so the entry silently fails to forward returns to the client. The effective + // value is max(conf.all, conf.): relaxing `all` ALONE is a no-op while + // the receiving interface keeps the inherited default (2) — both must be 0. + // Relax `default` so every wg interface inherits 0 when it is created (covers + // the inner interface without racing its bring-up). Not restored on teardown: + // resetting `all` to 2 would re-break any other transit still up. (Proven + // live 2026-06: `all=0` alone left awg1 at 2 and the cascade black-holed.) if len(p.Transits) > 0 { - r.PreUp = append(r.PreUp, "sysctl -w net.ipv4.conf.all.rp_filter=0") - r.PostDown = append(r.PostDown, "sysctl -w net.ipv4.conf.all.rp_filter=2") + r.PreUp = append(r.PreUp, + "sysctl -w net.ipv4.conf.all.rp_filter=0", + "sysctl -w net.ipv4.conf.default.rp_filter=0") } for _, t := range p.Transits { mark := strconv.FormatUint(uint64(t.Mark), 10) @@ -143,7 +148,9 @@ func (p Policy) Rules() Rules { r.PostUp = append(r.PostUp, "iptables -t mangle -A PREROUTING -i "+ifaceToken+" -s "+t.DeviceCIDR+" -j MARK --set-mark "+mark, "ip rule add fwmark "+mark+" lookup "+table, - "ip route add default dev "+t.InnerInterface+" table "+table) + // `replace` not `add`: a 2nd device binding the same path reuses this + // per-path table+inner interface; `add` fails with "File exists". + "ip route replace default dev "+t.InnerInterface+" table "+table) r.PostDown = append(r.PostDown, "ip route del default dev "+t.InnerInterface+" table "+table, "ip rule del fwmark "+mark+" lookup "+table, diff --git a/internal/netpolicy/netpolicy_test.go b/internal/netpolicy/netpolicy_test.go index b43cf31..fd96af9 100644 --- a/internal/netpolicy/netpolicy_test.go +++ b/internal/netpolicy/netpolicy_test.go @@ -8,6 +8,7 @@ import ( "errors" "path/filepath" "reflect" + "strings" "testing" ) @@ -133,13 +134,18 @@ func TestTransitRulesCanonical(t *testing.T) { wantUp := []string{ "iptables -t mangle -A PREROUTING -i %i -s 10.8.0.5/32 -j MARK --set-mark 100", "ip rule add fwmark 100 lookup 100", - "ip route add default dev awg1 table 100", + // `replace`, not `add` — idempotent so a 2nd device on the same path + // doesn't fail with "File exists" (the live cascade-bind regression). + "ip route replace default dev awg1 table 100", } for _, w := range wantUp { if !containsLine(r.PostUp, w) { t.Errorf("PostUp missing %q\n got: %#v", w, r.PostUp) } } + if containsLine(r.PostUp, "ip route add default dev awg1 table 100") { + t.Errorf("transit route must use `ip route replace`, not `add` (idempotency)\n got: %#v", r.PostUp) + } wantDown := []string{ "ip route del default dev awg1 table 100", "ip rule del fwmark 100 lookup 100", @@ -152,13 +158,20 @@ func TestTransitRulesCanonical(t *testing.T) { } // A transit node forwards returns asymmetrically (in on the inner interface, - // route-back via egress), which rp_filter drops — so the cascade entry must - // relax it while it carries transits, and restore it on teardown. - if !containsLine(r.PreUp, "sysctl -w net.ipv4.conf.all.rp_filter=0") { - t.Errorf("PreUp missing the rp_filter relax\n got: %#v", r.PreUp) - } - if !containsLine(r.PostDown, "sysctl -w net.ipv4.conf.all.rp_filter=2") { - t.Errorf("PostDown missing the rp_filter restore\n got: %#v", r.PostDown) + // route-back via egress), which rp_filter drops. The effective value is + // max(conf.all, conf.), so BOTH all and default must be relaxed — + // relaxing `all` alone leaves the interface at its inherited 2 and the cascade + // black-holes (the live regression this guards). + if !containsLine(r.PreUp, "sysctl -w net.ipv4.conf.all.rp_filter=0") || + !containsLine(r.PreUp, "sysctl -w net.ipv4.conf.default.rp_filter=0") { + t.Errorf("PreUp must relax both all AND default rp_filter (all alone is a no-op)\n got: %#v", r.PreUp) + } + // Must NOT reset rp_filter to 2 on teardown — that re-breaks any other transit + // still up. + for _, d := range r.PostDown { + if strings.Contains(d, "rp_filter=2") { + t.Errorf("PostDown must not reset rp_filter to 2\n got: %#v", r.PostDown) + } } }