From ce946b3fe86790c3bb3058f8e9d1547513e9197c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 6 May 2026 18:46:46 +0000 Subject: [PATCH 01/14] Initial plan From dc5a3e4b5a0d9cecbda6efad9b84541c65f66918 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 6 May 2026 18:59:57 +0000 Subject: [PATCH 02/14] add rathole as alternative port-forwarding tunnel backend - Add TunnelType, RatholeExecutableURL, and RatholeCommand fields to Network config - Add templates/rathole-template.yaml with Deployment/ConfigMap/Service/Ingress for a rathole server using WebSocket transport (compatible with nginx ingress) - Embed rathole template; add DefaultRatholeExecutableURL and DefaultRatholeCommand constants; add annRatholeClientCmds annotation constant - Modify executeWstunnelTemplate to select rathole template when TunnelType==rathole - Modify addWstunnelClientAnnotation to generate rathole client TOML + bootstrap command when rathole mode is active; clear stale wstunnel annotation - Update clearConflictingNetworkAnnotations to also remove annRatholeClientCmds in full-mesh mode - Update cleanupWstunnelResources to also delete the rathole ConfigMap on teardown - Add tests covering rathole template rendering, annotation generation, and the updated clearConflictingNetworkAnnotations behaviour Agent-Logs-Url: https://github.com/interlink-hq/interLink/sessions/76bae3af-62af-4408-96ac-81a236bbae50 Co-authored-by: dciangot <4144326+dciangot@users.noreply.github.com> --- pkg/virtualkubelet/config.go | 10 +- pkg/virtualkubelet/config_test.go | 24 +++ pkg/virtualkubelet/mesh.go | 45 ++++- pkg/virtualkubelet/mesh_annotations_test.go | 18 ++ pkg/virtualkubelet/rathole_test.go | 185 ++++++++++++++++++ .../templates/rathole-template.yaml | 130 ++++++++++++ pkg/virtualkubelet/virtualkubelet.go | 59 ++++-- 7 files changed, 452 insertions(+), 19 deletions(-) create mode 100644 pkg/virtualkubelet/rathole_test.go create mode 100644 pkg/virtualkubelet/templates/rathole-template.yaml diff --git a/pkg/virtualkubelet/config.go b/pkg/virtualkubelet/config.go index b3ed38ff..2a6706c3 100644 --- a/pkg/virtualkubelet/config.go +++ b/pkg/virtualkubelet/config.go @@ -131,14 +131,22 @@ type PodCIDR struct { type Network struct { // EnableTunnel enables WebSocket tunneling for pod port exposure EnableTunnel bool `yaml:"EnableTunnel" default:"false"` + // TunnelType selects the port-forwarding backend: "wstunnel" (default) or "rathole" + TunnelType string `yaml:"TunnelType,omitempty"` // WildcardDNS specifies the DNS domain for generating tunnel endpoints WildcardDNS string `yaml:"WildcardDNS,omitempty"` // WSTunnelExecutableURL specifies the URL to download the wstunnel executable (default is "https://github.com/interlink-hq/interlink-artifacts/raw/main/wstunnel/v10.4.4/linux-amd64/wstunnel") WSTunnelExecutableURL string `yaml:"WSTunnelExecutable,omitempty"` - // WstunnelTemplatePath is the path to a custom wstunnel template file + // WstunnelTemplatePath is the path to a custom tunnel template file (applies to both wstunnel and rathole) WstunnelTemplatePath string `yaml:"WstunnelTemplatePath,omitempty"` // WstunnelCommand specifies the command template for setting up wstunnel clients WstunnelCommand string `yaml:"WstunnelCommand,omitempty"` + // RatholeExecutableURL specifies the URL to download the rathole executable zip archive + // (default is "https://github.com/rapiz1/rathole/releases/download/v0.5.0/rathole-x86_64-unknown-linux-musl.zip") + RatholeExecutableURL string `yaml:"RatholeExecutableURL,omitempty"` + // RatholeCommand specifies a custom command template for setting up rathole clients. + // Two %s format verbs are substituted: the rathole download URL and the base64-encoded client TOML config. + RatholeCommand string `yaml:"RatholeCommand,omitempty"` // FullMesh enables full mesh networking with slirp4netns and WireGuard FullMesh bool `yaml:"FullMesh" default:"false"` // MeshScriptTemplatePath is the path to a custom mesh.sh template file diff --git a/pkg/virtualkubelet/config_test.go b/pkg/virtualkubelet/config_test.go index a800c6bf..7960582e 100644 --- a/pkg/virtualkubelet/config_test.go +++ b/pkg/virtualkubelet/config_test.go @@ -124,6 +124,30 @@ func TestNetwork_Configuration(t *testing.T) { assert.NotEmpty(t, network.WstunnelCommand) } +func TestNetwork_RatholeConfiguration(t *testing.T) { + network := Network{ + EnableTunnel: true, + TunnelType: "rathole", + WildcardDNS: "tunnel.example.com", + RatholeExecutableURL: "https://example.com/rathole.zip", + RatholeCommand: "curl -L %s -o rathole.zip && unzip rathole.zip && echo %s | base64 -d > /tmp/client.toml && ./rathole /tmp/client.toml &", + } + + assert.True(t, network.EnableTunnel) + assert.Equal(t, "rathole", network.TunnelType) + assert.Equal(t, "tunnel.example.com", network.WildcardDNS) + assert.Equal(t, "https://example.com/rathole.zip", network.RatholeExecutableURL) + assert.NotEmpty(t, network.RatholeCommand) +} + +func TestNetwork_WstunnelDefaultTunnelType(t *testing.T) { + // Empty TunnelType means wstunnel (backward-compatible default) + network := Network{ + EnableTunnel: true, + } + assert.Empty(t, network.TunnelType, "empty TunnelType should default to wstunnel behaviour") +} + func TestAccelerator_AvailableIsKubernetesQuantity(t *testing.T) { tests := []struct { name string diff --git a/pkg/virtualkubelet/mesh.go b/pkg/virtualkubelet/mesh.go index e7dac2b4..58c33128 100644 --- a/pkg/virtualkubelet/mesh.go +++ b/pkg/virtualkubelet/mesh.go @@ -235,7 +235,8 @@ func deriveWGPublicKey(privB64 string) (string, error) { return base64.StdEncoding.EncodeToString(pubRaw), nil } -// addWstunnelClientAnnotation adds the wstunnel client command annotation to the original pod +// addWstunnelClientAnnotation adds the tunnel client command annotation to the original pod. +// In rathole mode it writes a rathole client command; otherwise it writes a wstunnel command. func (p *Provider) addWstunnelClientAnnotation(ctx context.Context, pod *v1.Pod, td *WstunnelTemplateData) error { if pod.Annotations == nil { pod.Annotations = make(map[string]string) @@ -289,6 +290,43 @@ PersistentKeepalive = %d pod.Annotations["interlink.eu/wireguard-client-snippet"] = wgSnippet + } else if p.config.Network.TunnelType == "rathole" { + // Rathole mode: build a client TOML config and generate the client bootstrap command + ratholeEndpoint := fmt.Sprintf("rathole-%s.%s", td.Name, td.WildcardDNS) + ratholeEndpoint = sanitizeFullDNSName(ratholeEndpoint) + if td.WildcardDNS == "" { + ratholeEndpoint = td.Name + } + + var tomlBuilder strings.Builder + tomlBuilder.WriteString(fmt.Sprintf("[client]\nremote_addr = \"%s:80\"\n\n", ratholeEndpoint)) + tomlBuilder.WriteString("[client.transport]\ntype = \"websocket\"\n\n") + for _, port := range td.ExposedPorts { + if strings.ToUpper(port.Protocol) == "UDP" { + continue + } + tomlBuilder.WriteString(fmt.Sprintf("[client.services.p%d]\ntoken = \"%s\"\nlocal_addr = \"127.0.0.1:%d\"\n\n", + port.Port, td.RandomPassword, port.Port)) + } + + configB64 := base64.StdEncoding.EncodeToString([]byte(tomlBuilder.String())) + + ratholeURL := p.config.Network.RatholeExecutableURL + if ratholeURL == "" { + ratholeURL = DefaultRatholeExecutableURL + } + + ratholeCmd := p.config.Network.RatholeCommand + if ratholeCmd == "" { + ratholeCmd = DefaultRatholeCommand + } + + mainCmd := fmt.Sprintf(ratholeCmd, ratholeURL, configB64) + + // Remove any stale wstunnel annotation and set the rathole one + delete(pod.Annotations, annWSTunnelClientCmds) + pod.Annotations[annRatholeClientCmds] = mainCmd + } else { var rOptions []string for _, port := range td.ExposedPorts { @@ -347,8 +385,8 @@ PersistentKeepalive = %d // clearConflictingNetworkAnnotations removes generated annotations that are specific to // the opposite network mode to keep pod network bootstrap behavior uniform. -// When fullMeshEnabledForPod is true, any stale wstunnel client command annotation is removed. -// When false, any stale WireGuard snippet annotation is removed. +// When fullMeshEnabledForPod is true, any stale wstunnel and rathole client command annotations +// are removed. When false, any stale WireGuard snippet annotation is removed. func clearConflictingNetworkAnnotations(pod *v1.Pod, fullMeshEnabledForPod bool) { if pod == nil || pod.Annotations == nil { return @@ -356,6 +394,7 @@ func clearConflictingNetworkAnnotations(pod *v1.Pod, fullMeshEnabledForPod bool) if fullMeshEnabledForPod { delete(pod.Annotations, annWSTunnelClientCmds) + delete(pod.Annotations, annRatholeClientCmds) return } diff --git a/pkg/virtualkubelet/mesh_annotations_test.go b/pkg/virtualkubelet/mesh_annotations_test.go index 1ac13e1c..315ed81d 100644 --- a/pkg/virtualkubelet/mesh_annotations_test.go +++ b/pkg/virtualkubelet/mesh_annotations_test.go @@ -27,6 +27,24 @@ func TestClearConflictingNetworkAnnotations(t *testing.T) { assert.Equal(t, "value", pod.Annotations["keep"]) }) + t.Run("full mesh also removes rathole command annotation", func(t *testing.T) { + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + annRatholeClientCmds: "rathole-command", + annWGClientSnippet: "wireguard-snippet", + "keep": "value", + }, + }, + } + + clearConflictingNetworkAnnotations(pod, true) + + assert.NotContains(t, pod.Annotations, annRatholeClientCmds) + assert.Contains(t, pod.Annotations, annWGClientSnippet) + assert.Equal(t, "value", pod.Annotations["keep"]) + }) + t.Run("non mesh removes wireguard snippet annotation", func(t *testing.T) { pod := &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ diff --git a/pkg/virtualkubelet/rathole_test.go b/pkg/virtualkubelet/rathole_test.go new file mode 100644 index 00000000..2451ed31 --- /dev/null +++ b/pkg/virtualkubelet/rathole_test.go @@ -0,0 +1,185 @@ +package virtualkubelet + +import ( + "context" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/fake" +) + +// TestRatholeTemplateExecution verifies that the built-in rathole template can be +// loaded and executed without errors when TunnelType is "rathole". +func TestRatholeTemplateExecution(t *testing.T) { + p := &Provider{ + config: Config{ + Network: Network{ + TunnelType: "rathole", + WildcardDNS: "tunnel.example.com", + }, + }, + clientSet: fake.NewClientset(), + } + + data := WstunnelTemplateData{ + Name: "my-pod-default", + Namespace: "default-wstunnel", + RandomPassword: "abc123", + WildcardDNS: "tunnel.example.com", + ExposedPorts: []PortMapping{ + {Port: 8080, Name: "http", Protocol: "TCP"}, + {Port: 9090, Name: "metrics", Protocol: "TCP"}, + }, + } + + ctx := context.Background() + yaml, err := p.executeWstunnelTemplate(ctx, data) + require.NoError(t, err) + assert.NotEmpty(t, yaml) + + // Verify the rendered YAML contains rathole-specific markers + assert.Contains(t, yaml, "rathole-config", "ConfigMap name should reference rathole") + assert.Contains(t, yaml, "rapiz1/rathole", "should use the default rathole image") + assert.Contains(t, yaml, "bind_addr = \"0.0.0.0:2333\"", "server control port") + assert.Contains(t, yaml, "token = \"abc123\"", "token from RandomPassword") + assert.Contains(t, yaml, "bind_addr = \"0.0.0.0:8080\"", "port 8080 should be forwarded") + assert.Contains(t, yaml, "bind_addr = \"0.0.0.0:9090\"", "port 9090 should be forwarded") + assert.Contains(t, yaml, "rathole-my-pod-default.tunnel.example.com", "Ingress host should use rathole prefix") +} + +// TestWstunnelTemplateUnchanged verifies that the existing wstunnel template is still +// selected when TunnelType is empty (backward-compatible default). +func TestWstunnelTemplateUnchanged(t *testing.T) { + p := &Provider{ + config: Config{ + Network: Network{ + // TunnelType deliberately empty → wstunnel + WildcardDNS: "tunnel.example.com", + }, + }, + clientSet: fake.NewClientset(), + } + + data := WstunnelTemplateData{ + Name: "my-pod-default", + Namespace: "default-wstunnel", + RandomPassword: "abc123", + WildcardDNS: "tunnel.example.com", + ExposedPorts: []PortMapping{ + {Port: 8080, Name: "http", Protocol: "TCP"}, + }, + } + + ctx := context.Background() + yaml, err := p.executeWstunnelTemplate(ctx, data) + require.NoError(t, err) + assert.NotEmpty(t, yaml) + + // The default wstunnel template should not contain rathole markers + assert.NotContains(t, yaml, "rathole-config") + assert.Contains(t, yaml, "wstunnel", "should use wstunnel image/command") +} + +// TestRatholeClientAnnotation verifies that addWstunnelClientAnnotation sets +// the rathole annotation and removes any stale wstunnel annotation. +func TestRatholeClientAnnotation(t *testing.T) { + fakeClient := fake.NewClientset() + + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + Annotations: map[string]string{ + // Simulate a stale wstunnel annotation from a previous run + annWSTunnelClientCmds: "old-wstunnel-cmd", + }, + }, + } + // Create the pod in the fake client so Patch succeeds + _, err := fakeClient.CoreV1().Pods(pod.Namespace).Create(context.Background(), pod, metav1.CreateOptions{}) + require.NoError(t, err) + + p := &Provider{ + config: Config{ + Network: Network{ + TunnelType: "rathole", + WildcardDNS: "tunnel.example.com", + }, + }, + clientSet: fakeClient, + } + + td := &WstunnelTemplateData{ + Name: "my-pod-default", + Namespace: "default-wstunnel", + RandomPassword: "secrettoken", + WildcardDNS: "tunnel.example.com", + ExposedPorts: []PortMapping{ + {Port: 8080, Name: "http", Protocol: "TCP"}, + }, + } + + err = p.addWstunnelClientAnnotation(context.Background(), pod, td) + require.NoError(t, err) + + // The rathole annotation should be set + ratholeCmd, ok := pod.Annotations[annRatholeClientCmds] + assert.True(t, ok, "rathole client command annotation should be present") + assert.NotEmpty(t, ratholeCmd) + assert.Contains(t, ratholeCmd, DefaultRatholeExecutableURL, "should embed the default rathole URL") + // The base64-encoded client config should be included + assert.True(t, strings.Contains(ratholeCmd, "base64"), "command should decode a base64 client config") + + // The stale wstunnel annotation should be removed + _, wstunnelPresent := pod.Annotations[annWSTunnelClientCmds] + assert.False(t, wstunnelPresent, "stale wstunnel annotation should be cleared in rathole mode") +} + +// TestRatholeClientAnnotationCustomCommand verifies that a custom RatholeCommand template +// is honoured when set. +func TestRatholeClientAnnotationCustomCommand(t *testing.T) { + fakeClient := fake.NewClientset() + + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + Annotations: map[string]string{}, + }, + } + _, err := fakeClient.CoreV1().Pods(pod.Namespace).Create(context.Background(), pod, metav1.CreateOptions{}) + require.NoError(t, err) + + customCmd := "my-custom-rathole-installer %s && my-custom-start %s &" + p := &Provider{ + config: Config{ + Network: Network{ + TunnelType: "rathole", + WildcardDNS: "tunnel.example.com", + RatholeCommand: customCmd, + }, + }, + clientSet: fakeClient, + } + + td := &WstunnelTemplateData{ + Name: "my-pod-default", + Namespace: "default-wstunnel", + RandomPassword: "token", + WildcardDNS: "tunnel.example.com", + ExposedPorts: []PortMapping{ + {Port: 8080, Name: "http", Protocol: "TCP"}, + }, + } + + err = p.addWstunnelClientAnnotation(context.Background(), pod, td) + require.NoError(t, err) + + ratholeCmd, ok := pod.Annotations[annRatholeClientCmds] + assert.True(t, ok) + assert.Contains(t, ratholeCmd, "my-custom-rathole-installer", "custom command template should be used") +} diff --git a/pkg/virtualkubelet/templates/rathole-template.yaml b/pkg/virtualkubelet/templates/rathole-template.yaml new file mode 100644 index 00000000..9a0039fa --- /dev/null +++ b/pkg/virtualkubelet/templates/rathole-template.yaml @@ -0,0 +1,130 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{.Name}}-rathole-config + namespace: {{.Namespace}} +data: + server.toml: | + [server] + bind_addr = "0.0.0.0:2333" + + [server.transport] + type = "websocket" + {{- range .ExposedPorts}} + + [server.services.p{{.Port}}] + token = "{{$.RandomPassword}}" + bind_addr = "0.0.0.0:{{.Port}}" + {{- end}} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{.Name}} + namespace: {{.Namespace}} +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/component: {{.Name}} + template: + metadata: + labels: + app.kubernetes.io/component: {{.Name}} + annotations: + interlink.eu/wstunnel-path-prefix: {{.RandomPassword}} + spec: + containers: + - name: rathole + image: rapiz1/rathole:latest + imagePullPolicy: IfNotPresent + args: ["--server", "/etc/rathole/server.toml"] + ports: + - containerPort: 2333 + name: control + protocol: TCP + {{- range .ExposedPorts}} + - containerPort: {{.Port}} + name: {{if .Name}}{{.Name}}{{else}}port-{{.Port}}{{end}} + protocol: {{.Protocol}} + {{- end}} + volumeMounts: + - name: rathole-config + mountPath: /etc/rathole + resources: + requests: + cpu: 100m + memory: 64Mi + readinessProbe: + tcpSocket: + port: 2333 + initialDelaySeconds: 2 + periodSeconds: 2 + failureThreshold: 10 + livenessProbe: + tcpSocket: + port: 2333 + initialDelaySeconds: 10 + periodSeconds: 10 + nodeSelector: + kubernetes.io/os: linux + volumes: + - name: rathole-config + configMap: + name: {{.Name}}-rathole-config +--- +apiVersion: v1 +kind: Service +metadata: + name: {{.Name}} + namespace: {{.Namespace}} +spec: + type: ClusterIP + selector: + app.kubernetes.io/component: {{.Name}} + ports: + - port: 2333 + targetPort: 2333 + name: control + protocol: TCP + {{- range .ExposedPorts}} + {{- if ne .Port 2333}} + - port: {{.Port}} + targetPort: {{.Port}} + name: {{if .Name}}{{.Name}}{{else}}port-{{.Port}}{{end}} + {{- if .Protocol}} + protocol: {{.Protocol}} + {{- end}} + {{- end}} + {{- end}} +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{.Name}} + namespace: {{.Namespace}} + annotations: + nginx.ingress.kubernetes.io/proxy-read-timeout: "3600" + nginx.ingress.kubernetes.io/proxy-send-timeout: "3600" + nginx.ingress.kubernetes.io/server-snippets: | + location / { + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_http_version 1.1; + proxy_set_header X-Forwarded-For $remote_addr; + proxy_set_header Host $host; + proxy_cache_bypass $http_upgrade; + } + kubernetes.io/ingress.class: "nginx" +spec: + rules: + - host: rathole-{{.Name}}.{{.WildcardDNS}} + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: {{.Name}} + port: + number: 2333 diff --git a/pkg/virtualkubelet/virtualkubelet.go b/pkg/virtualkubelet/virtualkubelet.go index 7e5cbe65..374fb1ec 100644 --- a/pkg/virtualkubelet/virtualkubelet.go +++ b/pkg/virtualkubelet/virtualkubelet.go @@ -46,6 +46,9 @@ import ( //go:embed templates/wstunnel-template.yaml var defaultWstunnelTemplate embed.FS +//go:embed templates/rathole-template.yaml +var defaultRatholeTemplate embed.FS + //go:embed all:templates/mesh.sh var meshScriptTemplate embed.FS @@ -67,6 +70,11 @@ const ( intelFPGA = "intel.com/fpga" DefaultProtocol = "TCP" DefaultWstunnelCommand = "curl -L -f -k https://github.com/erebe/wstunnel/releases/download/v10.4.4/wstunnel_10.4.4_linux_amd64.tar.gz -o wstunnel.tar.gz && tar -xzvf wstunnel.tar.gz && chmod +x wstunnel && ./wstunnel client --http-upgrade-path-prefix %s %s ws://%s:80 &" + // DefaultRatholeExecutableURL is the default URL to download the rathole executable zip archive. + DefaultRatholeExecutableURL = "https://github.com/rapiz1/rathole/releases/download/v0.5.0/rathole-x86_64-unknown-linux-musl.zip" + // DefaultRatholeCommand is the default command template for the rathole client. + // Two %s format verbs are substituted: the rathole download URL and the base64-encoded client TOML config. + DefaultRatholeCommand = "curl -L -f -k %s -o rathole.zip && unzip -q rathole.zip && chmod +x rathole && echo %s | base64 -d > /tmp/rathole-client.toml && ./rathole /tmp/rathole-client.toml &" ) // Annotations for WireGuard and WStunnel configuration @@ -76,6 +84,7 @@ const ( annWGMTU = "interlink.eu/wg-mtu" // optional, default 1280 annWgKeepaliveSeconds = "interlink.eu/wg-keepalive-seconds" // optional, default 25 annWSTunnelClientCmds = "interlink.eu/wstunnel-client-commands" + annRatholeClientCmds = "interlink.eu/rathole-client-commands" annWGClientSnippet = "interlink.eu/wireguard-client-snippet" annDisableOffloadContainers = "interlink.eu/disable-offload-containers" // comma-separated container names annDisableOffloadInitContainers = "interlink.eu/disable-offload-init-containers" // comma-separated init container names @@ -944,11 +953,11 @@ func mergeMaps(dst, src map[string]string) map[string]string { return dst } -// executeWstunnelTemplate loads and executes the wstunnel template +// executeWstunnelTemplate loads and executes the tunnel template (wstunnel or rathole based on configuration) func (p *Provider) executeWstunnelTemplate(ctx context.Context, data WstunnelTemplateData) (string, error) { var templateContent string - // Try to load from custom path first + // Try to load from custom path first (applies to both wstunnel and rathole) if p.config.Network.WstunnelTemplatePath != "" { content, err := os.ReadFile(p.config.Network.WstunnelTemplatePath) if err != nil { @@ -958,11 +967,23 @@ func (p *Provider) executeWstunnelTemplate(ctx context.Context, data WstunnelTem } } - // Fall back to embedded template + // Fall back to the built-in template for the configured tunnel type if templateContent == "" { - content, err := defaultWstunnelTemplate.ReadFile("templates/wstunnel-template.yaml") - if err != nil { - return "", fmt.Errorf("failed to read embedded template: %w", err) + var ( + content []byte + err error + ) + if p.config.Network.TunnelType == "rathole" { + content, err = defaultRatholeTemplate.ReadFile("templates/rathole-template.yaml") + if err != nil { + return "", fmt.Errorf("failed to read embedded rathole template: %w", err) + } + log.G(ctx).Info("Using built-in rathole template") + } else { + content, err = defaultWstunnelTemplate.ReadFile("templates/wstunnel-template.yaml") + if err != nil { + return "", fmt.Errorf("failed to read embedded template: %w", err) + } } templateContent = string(content) } @@ -1253,41 +1274,49 @@ func (p *Provider) waitForDeploymentPod(ctx context.Context, deploymentName, nam return nil, fmt.Errorf("no pod found for deployment %s within timeout", deploymentName) } -// cleanupWstunnelResources removes all wstunnel resources for a given name and namespace +// cleanupWstunnelResources removes all tunnel resources for a given name and namespace func (p *Provider) cleanupWstunnelResources(ctx context.Context, wstunnelName, namespace string) { - log.G(ctx).Infof("Cleaning up wstunnel resources for %s/%s", namespace, wstunnelName) + log.G(ctx).Infof("Cleaning up tunnel resources for %s/%s", namespace, wstunnelName) // Delete deployment err := p.clientSet.AppsV1().Deployments(namespace).Delete(ctx, wstunnelName, metav1.DeleteOptions{}) if err != nil { - log.G(ctx).Warningf("Failed to delete wstunnel deployment %s/%s: %v", namespace, wstunnelName, err) + log.G(ctx).Warningf("Failed to delete tunnel deployment %s/%s: %v", namespace, wstunnelName, err) } else { - log.G(ctx).Infof("Successfully deleted wstunnel deployment %s/%s", namespace, wstunnelName) + log.G(ctx).Infof("Successfully deleted tunnel deployment %s/%s", namespace, wstunnelName) } // Delete service err = p.clientSet.CoreV1().Services(namespace).Delete(ctx, wstunnelName, metav1.DeleteOptions{}) if err != nil { - log.G(ctx).Warningf("Failed to delete wstunnel service %s/%s: %v", namespace, wstunnelName, err) + log.G(ctx).Warningf("Failed to delete tunnel service %s/%s: %v", namespace, wstunnelName, err) } else { - log.G(ctx).Infof("Successfully deleted wstunnel service %s/%s", namespace, wstunnelName) + log.G(ctx).Infof("Successfully deleted tunnel service %s/%s", namespace, wstunnelName) } // Delete ingress err = p.clientSet.NetworkingV1().Ingresses(namespace).Delete(ctx, wstunnelName, metav1.DeleteOptions{}) if err != nil { - log.G(ctx).Warningf("Failed to delete wstunnel ingress %s/%s: %v", namespace, wstunnelName, err) + log.G(ctx).Warningf("Failed to delete tunnel ingress %s/%s: %v", namespace, wstunnelName, err) } else { - log.G(ctx).Infof("Successfully deleted wstunnel ingress %s/%s", namespace, wstunnelName) + log.G(ctx).Infof("Successfully deleted tunnel ingress %s/%s", namespace, wstunnelName) } - // Delete configmap + // Delete wstunnel wireguard configmap (used in full-mesh / wstunnel mode) err = p.clientSet.CoreV1().ConfigMaps(namespace).Delete(ctx, wstunnelName+"-wg-config", metav1.DeleteOptions{}) if err != nil { log.G(ctx).Warningf("Failed to delete wstunnel configmap %s/%s: %v", namespace, wstunnelName+"-wg-config", err) } else { log.G(ctx).Infof("Successfully deleted wstunnel configmap %s/%s", namespace, wstunnelName+"-wg-config") } + + // Delete rathole configmap (used in rathole mode) + err = p.clientSet.CoreV1().ConfigMaps(namespace).Delete(ctx, wstunnelName+"-rathole-config", metav1.DeleteOptions{}) + if err != nil { + log.G(ctx).Warningf("Failed to delete rathole configmap %s/%s: %v", namespace, wstunnelName+"-rathole-config", err) + } else { + log.G(ctx).Infof("Successfully deleted rathole configmap %s/%s", namespace, wstunnelName+"-rathole-config") + } } // cleanupPartialWstunnelResources removes specific resources that were created before a failure From 6463e7661e120c005e205ebc133856778d5b33a3 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 6 May 2026 19:03:54 +0000 Subject: [PATCH 03/14] address code review: pin rathole image version, add port-conflict comment, log skipped UDP ports Agent-Logs-Url: https://github.com/interlink-hq/interLink/sessions/76bae3af-62af-4408-96ac-81a236bbae50 Co-authored-by: dciangot <4144326+dciangot@users.noreply.github.com> --- pkg/virtualkubelet/mesh.go | 1 + pkg/virtualkubelet/templates/rathole-template.yaml | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/virtualkubelet/mesh.go b/pkg/virtualkubelet/mesh.go index 58c33128..70dc242d 100644 --- a/pkg/virtualkubelet/mesh.go +++ b/pkg/virtualkubelet/mesh.go @@ -303,6 +303,7 @@ PersistentKeepalive = %d tomlBuilder.WriteString("[client.transport]\ntype = \"websocket\"\n\n") for _, port := range td.ExposedPorts { if strings.ToUpper(port.Protocol) == "UDP" { + log.G(ctx).Debugf("Skipping UDP port %d in rathole client config (rathole websocket transport forwards TCP only)", port.Port) continue } tomlBuilder.WriteString(fmt.Sprintf("[client.services.p%d]\ntoken = \"%s\"\nlocal_addr = \"127.0.0.1:%d\"\n\n", diff --git a/pkg/virtualkubelet/templates/rathole-template.yaml b/pkg/virtualkubelet/templates/rathole-template.yaml index 9a0039fa..41ce7ba4 100644 --- a/pkg/virtualkubelet/templates/rathole-template.yaml +++ b/pkg/virtualkubelet/templates/rathole-template.yaml @@ -36,7 +36,7 @@ spec: spec: containers: - name: rathole - image: rapiz1/rathole:latest + image: rapiz1/rathole:v0.5.0 imagePullPolicy: IfNotPresent args: ["--server", "/etc/rathole/server.toml"] ports: @@ -88,7 +88,7 @@ spec: name: control protocol: TCP {{- range .ExposedPorts}} - {{- if ne .Port 2333}} + {{- if ne .Port 2333}}{{/* skip control port 2333 to avoid conflicts with the rathole server listener */}} - port: {{.Port}} targetPort: {{.Port}} name: {{if .Name}}{{.Name}}{{else}}port-{{.Port}}{{end}} From 4fca6a0c8a16c678539cbf6e90c90c28464d3037 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 6 May 2026 19:27:25 +0000 Subject: [PATCH 04/14] fix lint: switch statement, fmt.Fprintf, goimports formatting - Rewrite if-else chain in addWstunnelClientAnnotation as a switch statement (gocritic) - Replace WriteString(fmt.Sprintf(...)) with fmt.Fprintf(...) (staticcheck QF1012) - Fix struct field alignment in rathole_test.go (goimports) Agent-Logs-Url: https://github.com/interlink-hq/interLink/sessions/d9236412-33e3-4b2c-af8a-71a1d7613670 Co-authored-by: dciangot <4144326+dciangot@users.noreply.github.com> --- pkg/virtualkubelet/mesh.go | 13 +++++++------ pkg/virtualkubelet/rathole_test.go | 4 ++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/pkg/virtualkubelet/mesh.go b/pkg/virtualkubelet/mesh.go index 70dc242d..cfeeed41 100644 --- a/pkg/virtualkubelet/mesh.go +++ b/pkg/virtualkubelet/mesh.go @@ -253,7 +253,8 @@ func (p *Provider) addWstunnelClientAnnotation(ctx context.Context, pod *v1.Pod, clearConflictingNetworkAnnotations(pod, fullMeshEnabledForPod) // Check if FullMesh mode is enabled and not disabled for this specific pod - if fullMeshEnabledForPod { + switch { + case fullMeshEnabledForPod: log.G(ctx).Infof("FullMesh mode enabled, generating pre-exec script for pod %s/%s", pod.Namespace, pod.Name) // Generate full mesh script @@ -290,7 +291,7 @@ PersistentKeepalive = %d pod.Annotations["interlink.eu/wireguard-client-snippet"] = wgSnippet - } else if p.config.Network.TunnelType == "rathole" { + case p.config.Network.TunnelType == "rathole": // Rathole mode: build a client TOML config and generate the client bootstrap command ratholeEndpoint := fmt.Sprintf("rathole-%s.%s", td.Name, td.WildcardDNS) ratholeEndpoint = sanitizeFullDNSName(ratholeEndpoint) @@ -299,15 +300,15 @@ PersistentKeepalive = %d } var tomlBuilder strings.Builder - tomlBuilder.WriteString(fmt.Sprintf("[client]\nremote_addr = \"%s:80\"\n\n", ratholeEndpoint)) + fmt.Fprintf(&tomlBuilder, "[client]\nremote_addr = \"%s:80\"\n\n", ratholeEndpoint) tomlBuilder.WriteString("[client.transport]\ntype = \"websocket\"\n\n") for _, port := range td.ExposedPorts { if strings.ToUpper(port.Protocol) == "UDP" { log.G(ctx).Debugf("Skipping UDP port %d in rathole client config (rathole websocket transport forwards TCP only)", port.Port) continue } - tomlBuilder.WriteString(fmt.Sprintf("[client.services.p%d]\ntoken = \"%s\"\nlocal_addr = \"127.0.0.1:%d\"\n\n", - port.Port, td.RandomPassword, port.Port)) + fmt.Fprintf(&tomlBuilder, "[client.services.p%d]\ntoken = \"%s\"\nlocal_addr = \"127.0.0.1:%d\"\n\n", + port.Port, td.RandomPassword, port.Port) } configB64 := base64.StdEncoding.EncodeToString([]byte(tomlBuilder.String())) @@ -328,7 +329,7 @@ PersistentKeepalive = %d delete(pod.Annotations, annWSTunnelClientCmds) pod.Annotations[annRatholeClientCmds] = mainCmd - } else { + default: var rOptions []string for _, port := range td.ExposedPorts { if strings.ToUpper(port.Protocol) == "UDP" { diff --git a/pkg/virtualkubelet/rathole_test.go b/pkg/virtualkubelet/rathole_test.go index 2451ed31..dd37f8ad 100644 --- a/pkg/virtualkubelet/rathole_test.go +++ b/pkg/virtualkubelet/rathole_test.go @@ -146,8 +146,8 @@ func TestRatholeClientAnnotationCustomCommand(t *testing.T) { pod := &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ - Name: "test-pod", - Namespace: "default", + Name: "test-pod", + Namespace: "default", Annotations: map[string]string{}, }, } From 10d34ee281da0b3c96f26111f32fc79db108e404 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 6 May 2026 21:15:17 +0000 Subject: [PATCH 05/14] switch rathole to TLS via cert-manager and Traefik IngressRouteTCP Agent-Logs-Url: https://github.com/interlink-hq/interLink/sessions/a0ffa622-8d08-4ef5-8c1b-b5bc31967057 Co-authored-by: dciangot <4144326+dciangot@users.noreply.github.com> --- pkg/virtualkubelet/config.go | 10 +- pkg/virtualkubelet/mesh.go | 89 ++++++-- pkg/virtualkubelet/rathole_test.go | 88 ++++++- .../templates/rathole-template.yaml | 36 --- pkg/virtualkubelet/virtualkubelet.go | 215 +++++++++++++++++- 5 files changed, 374 insertions(+), 64 deletions(-) diff --git a/pkg/virtualkubelet/config.go b/pkg/virtualkubelet/config.go index 2a6706c3..fd6bb9cb 100644 --- a/pkg/virtualkubelet/config.go +++ b/pkg/virtualkubelet/config.go @@ -145,8 +145,16 @@ type Network struct { // (default is "https://github.com/rapiz1/rathole/releases/download/v0.5.0/rathole-x86_64-unknown-linux-musl.zip") RatholeExecutableURL string `yaml:"RatholeExecutableURL,omitempty"` // RatholeCommand specifies a custom command template for setting up rathole clients. - // Two %s format verbs are substituted: the rathole download URL and the base64-encoded client TOML config. + // Five %s format verbs are substituted in order: the rathole download URL, base64-encoded CA cert, + // base64-encoded client cert, base64-encoded client key, and base64-encoded client TOML config. + // When RatholeCAIssuerName is empty the legacy two-argument WebSocket command is used instead. RatholeCommand string `yaml:"RatholeCommand,omitempty"` + // RatholeCAIssuerName is the cert-manager ClusterIssuer or Issuer name for the admin-provided CA. + // When set, rathole uses TLS transport; cert-manager issues both the server and client certificates. + // A Traefik IngressRouteTCP resource is created to expose the rathole server via TLS on port 443. + RatholeCAIssuerName string `yaml:"RatholeCAIssuerName,omitempty"` + // RatholeCAIssuerKind is the kind of the cert-manager issuer: "ClusterIssuer" (default) or "Issuer". + RatholeCAIssuerKind string `yaml:"RatholeCAIssuerKind,omitempty"` // FullMesh enables full mesh networking with slirp4netns and WireGuard FullMesh bool `yaml:"FullMesh" default:"false"` // MeshScriptTemplatePath is the path to a custom mesh.sh template file diff --git a/pkg/virtualkubelet/mesh.go b/pkg/virtualkubelet/mesh.go index cfeeed41..d4452791 100644 --- a/pkg/virtualkubelet/mesh.go +++ b/pkg/virtualkubelet/mesh.go @@ -292,38 +292,87 @@ PersistentKeepalive = %d pod.Annotations["interlink.eu/wireguard-client-snippet"] = wgSnippet case p.config.Network.TunnelType == "rathole": - // Rathole mode: build a client TOML config and generate the client bootstrap command + // Rathole mode: build a client TOML config and generate the client bootstrap command. + // When RatholeCAIssuerName is set, use TLS transport with cert-manager-issued certificates; + // otherwise fall back to WebSocket transport for backward compatibility. ratholeEndpoint := fmt.Sprintf("rathole-%s.%s", td.Name, td.WildcardDNS) ratholeEndpoint = sanitizeFullDNSName(ratholeEndpoint) if td.WildcardDNS == "" { ratholeEndpoint = td.Name } - var tomlBuilder strings.Builder - fmt.Fprintf(&tomlBuilder, "[client]\nremote_addr = \"%s:80\"\n\n", ratholeEndpoint) - tomlBuilder.WriteString("[client.transport]\ntype = \"websocket\"\n\n") - for _, port := range td.ExposedPorts { - if strings.ToUpper(port.Protocol) == "UDP" { - log.G(ctx).Debugf("Skipping UDP port %d in rathole client config (rathole websocket transport forwards TCP only)", port.Port) - continue - } - fmt.Fprintf(&tomlBuilder, "[client.services.p%d]\ntoken = \"%s\"\nlocal_addr = \"127.0.0.1:%d\"\n\n", - port.Port, td.RandomPassword, port.Port) - } - - configB64 := base64.StdEncoding.EncodeToString([]byte(tomlBuilder.String())) - ratholeURL := p.config.Network.RatholeExecutableURL if ratholeURL == "" { ratholeURL = DefaultRatholeExecutableURL } - ratholeCmd := p.config.Network.RatholeCommand - if ratholeCmd == "" { - ratholeCmd = DefaultRatholeCommand - } + var mainCmd string + + if p.config.Network.RatholeCAIssuerName != "" { + // TLS mode: rathole client uses TLS transport; Traefik terminates TLS at port 443. + // Wait for the client certificate secret to be issued by cert-manager. + clientCertSecretName := td.Name + "-rathole-client-tls" + if err := p.waitForRatholeCertSecret(ctx, clientCertSecretName, td.Namespace); err != nil { + return fmt.Errorf("rathole client certificate not ready: %w", err) + } + + certSecret, err := p.clientSet.CoreV1().Secrets(td.Namespace).Get(ctx, clientCertSecretName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to read rathole client certificate secret: %w", err) + } + + caCrtB64 := base64.StdEncoding.EncodeToString(certSecret.Data["ca.crt"]) + clientCrtB64 := base64.StdEncoding.EncodeToString(certSecret.Data["tls.crt"]) + clientKeyB64 := base64.StdEncoding.EncodeToString(certSecret.Data["tls.key"]) + + var tomlBuilder strings.Builder + fmt.Fprintf(&tomlBuilder, "[client]\nremote_addr = \"%s:443\"\n\n", ratholeEndpoint) + tomlBuilder.WriteString("[client.transport]\ntype = \"tls\"\n\n") + tomlBuilder.WriteString("[client.transport.tls]\n") + fmt.Fprintf(&tomlBuilder, "hostname = \"%s\"\n", ratholeEndpoint) + tomlBuilder.WriteString("trusted_root = \"/tmp/rathole-ca.crt\"\n") + tomlBuilder.WriteString("cert = \"/tmp/rathole-client.crt\"\n") + tomlBuilder.WriteString("key = \"/tmp/rathole-client.key\"\n\n") + for _, port := range td.ExposedPorts { + if strings.ToUpper(port.Protocol) == "UDP" { + log.G(ctx).Debugf("Skipping UDP port %d in rathole client config (TLS transport forwards TCP only)", port.Port) + continue + } + fmt.Fprintf(&tomlBuilder, "[client.services.p%d]\ntoken = \"%s\"\nlocal_addr = \"127.0.0.1:%d\"\n\n", + port.Port, td.RandomPassword, port.Port) + } - mainCmd := fmt.Sprintf(ratholeCmd, ratholeURL, configB64) + configB64 := base64.StdEncoding.EncodeToString([]byte(tomlBuilder.String())) + + ratholeCmd := p.config.Network.RatholeCommand + if ratholeCmd == "" { + ratholeCmd = DefaultRatholeCommand + } + mainCmd = fmt.Sprintf(ratholeCmd, ratholeURL, caCrtB64, clientCrtB64, clientKeyB64, configB64) + } else { + // WebSocket fallback (no CA issuer configured) + log.G(ctx).Debugf("RatholeCAIssuerName not set; using WebSocket transport for pod %s/%s", pod.Namespace, pod.Name) + + var tomlBuilder strings.Builder + fmt.Fprintf(&tomlBuilder, "[client]\nremote_addr = \"%s:80\"\n\n", ratholeEndpoint) + tomlBuilder.WriteString("[client.transport]\ntype = \"websocket\"\n\n") + for _, port := range td.ExposedPorts { + if strings.ToUpper(port.Protocol) == "UDP" { + log.G(ctx).Debugf("Skipping UDP port %d in rathole client config (websocket transport forwards TCP only)", port.Port) + continue + } + fmt.Fprintf(&tomlBuilder, "[client.services.p%d]\ntoken = \"%s\"\nlocal_addr = \"127.0.0.1:%d\"\n\n", + port.Port, td.RandomPassword, port.Port) + } + + configB64 := base64.StdEncoding.EncodeToString([]byte(tomlBuilder.String())) + + ratholeWSCmd := p.config.Network.RatholeCommand + if ratholeWSCmd == "" { + ratholeWSCmd = DefaultRatholeWSCommand + } + mainCmd = fmt.Sprintf(ratholeWSCmd, ratholeURL, configB64) + } // Remove any stale wstunnel annotation and set the rathole one delete(pod.Annotations, annWSTunnelClientCmds) diff --git a/pkg/virtualkubelet/rathole_test.go b/pkg/virtualkubelet/rathole_test.go index dd37f8ad..5bacce4b 100644 --- a/pkg/virtualkubelet/rathole_test.go +++ b/pkg/virtualkubelet/rathole_test.go @@ -48,7 +48,12 @@ func TestRatholeTemplateExecution(t *testing.T) { assert.Contains(t, yaml, "token = \"abc123\"", "token from RandomPassword") assert.Contains(t, yaml, "bind_addr = \"0.0.0.0:8080\"", "port 8080 should be forwarded") assert.Contains(t, yaml, "bind_addr = \"0.0.0.0:9090\"", "port 9090 should be forwarded") - assert.Contains(t, yaml, "rathole-my-pod-default.tunnel.example.com", "Ingress host should use rathole prefix") + + // The nginx Ingress is no longer part of the template; TLS ingress is managed separately + // via the Traefik IngressRouteTCP applied by applyRatholeTLSResources. + assert.NotContains(t, yaml, "nginx.ingress.kubernetes.io", "nginx Ingress should not be in the rathole template") + // Plain TCP server — no WebSocket transport section + assert.NotContains(t, yaml, "type = \"websocket\"", "server should use plain TCP, not WebSocket") } // TestWstunnelTemplateUnchanged verifies that the existing wstunnel template is still @@ -84,8 +89,9 @@ func TestWstunnelTemplateUnchanged(t *testing.T) { assert.Contains(t, yaml, "wstunnel", "should use wstunnel image/command") } -// TestRatholeClientAnnotation verifies that addWstunnelClientAnnotation sets -// the rathole annotation and removes any stale wstunnel annotation. +// TestRatholeClientAnnotation verifies that addWstunnelClientAnnotation sets the rathole +// annotation and removes any stale wstunnel annotation when using the WebSocket fallback +// (RatholeCAIssuerName not set). func TestRatholeClientAnnotation(t *testing.T) { fakeClient := fake.NewClientset() @@ -108,6 +114,7 @@ func TestRatholeClientAnnotation(t *testing.T) { Network: Network{ TunnelType: "rathole", WildcardDNS: "tunnel.example.com", + // RatholeCAIssuerName intentionally left empty → WebSocket fallback }, }, clientSet: fakeClient, @@ -139,8 +146,80 @@ func TestRatholeClientAnnotation(t *testing.T) { assert.False(t, wstunnelPresent, "stale wstunnel annotation should be cleared in rathole mode") } +// TestRatholeClientAnnotationTLS verifies that addWstunnelClientAnnotation produces a TLS-mode +// bootstrap command when RatholeCAIssuerName is configured and the cert-manager secret is present. +func TestRatholeClientAnnotationTLS(t *testing.T) { + fakeClient := fake.NewClientset() + + // Pre-create the cert-manager-issued client certificate secret (normally done by cert-manager) + clientCertSecret := &v1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "my-pod-default-rathole-client-tls", + Namespace: "default-wstunnel", + }, + Data: map[string][]byte{ + "ca.crt": []byte("fake-ca-cert"), + "tls.crt": []byte("fake-client-cert"), + "tls.key": []byte("fake-client-key"), + }, + } + _, err := fakeClient.CoreV1().Secrets(clientCertSecret.Namespace).Create(context.Background(), clientCertSecret, metav1.CreateOptions{}) + require.NoError(t, err) + + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-pod", + Namespace: "default", + Annotations: map[string]string{}, + }, + } + _, err = fakeClient.CoreV1().Pods(pod.Namespace).Create(context.Background(), pod, metav1.CreateOptions{}) + require.NoError(t, err) + + p := &Provider{ + config: Config{ + Network: Network{ + TunnelType: "rathole", + WildcardDNS: "tunnel.example.com", + RatholeCAIssuerName: "my-admin-ca", + }, + }, + clientSet: fakeClient, + } + + td := &WstunnelTemplateData{ + Name: "my-pod-default", + Namespace: "default-wstunnel", + RandomPassword: "secrettoken", + WildcardDNS: "tunnel.example.com", + ExposedPorts: []PortMapping{ + {Port: 8080, Name: "http", Protocol: "TCP"}, + }, + } + + err = p.addWstunnelClientAnnotation(context.Background(), pod, td) + require.NoError(t, err) + + ratholeCmd, ok := pod.Annotations[annRatholeClientCmds] + require.True(t, ok, "rathole client command annotation should be present") + assert.NotEmpty(t, ratholeCmd) + + // TLS command should reference the default rathole download URL + assert.Contains(t, ratholeCmd, DefaultRatholeExecutableURL) + + // TLS command should write four distinct base64-decoded files: CA cert, client cert, client key, client TOML + assert.Contains(t, ratholeCmd, "rathole-ca.crt", "command should write CA cert file") + assert.Contains(t, ratholeCmd, "rathole-client.crt", "command should write client cert file") + assert.Contains(t, ratholeCmd, "rathole-client.key", "command should write client key file") + assert.Contains(t, ratholeCmd, "rathole-client.toml", "command should write client TOML file") + + // The stale wstunnel annotation must not be present + _, wstunnelPresent := pod.Annotations[annWSTunnelClientCmds] + assert.False(t, wstunnelPresent, "stale wstunnel annotation should be cleared in rathole TLS mode") +} + // TestRatholeClientAnnotationCustomCommand verifies that a custom RatholeCommand template -// is honoured when set. +// is honoured in WebSocket fallback mode (RatholeCAIssuerName not set). func TestRatholeClientAnnotationCustomCommand(t *testing.T) { fakeClient := fake.NewClientset() @@ -161,6 +240,7 @@ func TestRatholeClientAnnotationCustomCommand(t *testing.T) { TunnelType: "rathole", WildcardDNS: "tunnel.example.com", RatholeCommand: customCmd, + // RatholeCAIssuerName intentionally empty → WebSocket fallback uses RatholeCommand }, }, clientSet: fakeClient, diff --git a/pkg/virtualkubelet/templates/rathole-template.yaml b/pkg/virtualkubelet/templates/rathole-template.yaml index 41ce7ba4..ca5f53fb 100644 --- a/pkg/virtualkubelet/templates/rathole-template.yaml +++ b/pkg/virtualkubelet/templates/rathole-template.yaml @@ -7,9 +7,6 @@ data: server.toml: | [server] bind_addr = "0.0.0.0:2333" - - [server.transport] - type = "websocket" {{- range .ExposedPorts}} [server.services.p{{.Port}}] @@ -31,8 +28,6 @@ spec: metadata: labels: app.kubernetes.io/component: {{.Name}} - annotations: - interlink.eu/wstunnel-path-prefix: {{.RandomPassword}} spec: containers: - name: rathole @@ -97,34 +92,3 @@ spec: {{- end}} {{- end}} {{- end}} ---- -apiVersion: networking.k8s.io/v1 -kind: Ingress -metadata: - name: {{.Name}} - namespace: {{.Namespace}} - annotations: - nginx.ingress.kubernetes.io/proxy-read-timeout: "3600" - nginx.ingress.kubernetes.io/proxy-send-timeout: "3600" - nginx.ingress.kubernetes.io/server-snippets: | - location / { - proxy_set_header Upgrade $http_upgrade; - proxy_set_header Connection "upgrade"; - proxy_http_version 1.1; - proxy_set_header X-Forwarded-For $remote_addr; - proxy_set_header Host $host; - proxy_cache_bypass $http_upgrade; - } - kubernetes.io/ingress.class: "nginx" -spec: - rules: - - host: rathole-{{.Name}}.{{.WildcardDNS}} - http: - paths: - - path: / - pathType: Prefix - backend: - service: - name: {{.Name}} - port: - number: 2333 diff --git a/pkg/virtualkubelet/virtualkubelet.go b/pkg/virtualkubelet/virtualkubelet.go index 374fb1ec..11134fb0 100644 --- a/pkg/virtualkubelet/virtualkubelet.go +++ b/pkg/virtualkubelet/virtualkubelet.go @@ -33,7 +33,10 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/runtime/serializer" + "k8s.io/client-go/dynamic" "k8s.io/client-go/kubernetes" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/tools/clientcmd" @@ -72,9 +75,13 @@ const ( DefaultWstunnelCommand = "curl -L -f -k https://github.com/erebe/wstunnel/releases/download/v10.4.4/wstunnel_10.4.4_linux_amd64.tar.gz -o wstunnel.tar.gz && tar -xzvf wstunnel.tar.gz && chmod +x wstunnel && ./wstunnel client --http-upgrade-path-prefix %s %s ws://%s:80 &" // DefaultRatholeExecutableURL is the default URL to download the rathole executable zip archive. DefaultRatholeExecutableURL = "https://github.com/rapiz1/rathole/releases/download/v0.5.0/rathole-x86_64-unknown-linux-musl.zip" - // DefaultRatholeCommand is the default command template for the rathole client. - // Two %s format verbs are substituted: the rathole download URL and the base64-encoded client TOML config. - DefaultRatholeCommand = "curl -L -f -k %s -o rathole.zip && unzip -q rathole.zip && chmod +x rathole && echo %s | base64 -d > /tmp/rathole-client.toml && ./rathole /tmp/rathole-client.toml &" + // DefaultRatholeCommand is the default command template for the rathole client in TLS mode. + // Five %s format verbs are substituted in order: rathole download URL, base64-encoded CA cert, + // base64-encoded client cert, base64-encoded client key, and base64-encoded client TOML config. + DefaultRatholeCommand = "curl -L -f -k %s -o rathole.zip && unzip -q rathole.zip && chmod +x rathole && echo %s | base64 -d > /tmp/rathole-ca.crt && echo %s | base64 -d > /tmp/rathole-client.crt && echo %s | base64 -d > /tmp/rathole-client.key && echo %s | base64 -d > /tmp/rathole-client.toml && ./rathole /tmp/rathole-client.toml &" + // DefaultRatholeWSCommand is the fallback command template used when no CA issuer is configured + // (WebSocket transport, backward-compatible). Two %s args: download URL and base64 client TOML. + DefaultRatholeWSCommand = "curl -L -f -k %s -o rathole.zip && unzip -q rathole.zip && chmod +x rathole && echo %s | base64 -d > /tmp/rathole-client.toml && ./rathole /tmp/rathole-client.toml &" ) // Annotations for WireGuard and WStunnel configuration @@ -145,6 +152,7 @@ type Provider struct { notifier func(*v1.Pod) onNodeChangeCallback func(*v1.Node) clientSet kubernetes.Interface + dynamicClient dynamic.Interface clientHTTPTransport *http.Transport podIPs []string } @@ -879,6 +887,13 @@ func (p *Provider) createDummyPod(ctx context.Context, originalPod *v1.Pod) (*v1 return nil, nil, fmt.Errorf("failed to apply wstunnel manifests: %w", err) } + // For rathole TLS mode, also create the cert-manager Certificates and Traefik IngressRouteTCP. + if p.config.Network.TunnelType == "rathole" && p.config.Network.RatholeCAIssuerName != "" { + if tlsErr := p.applyRatholeTLSResources(ctx, templateData); tlsErr != nil { + log.G(ctx).Warningf("Failed to apply rathole TLS resources for %s/%s: %v", originalPod.Namespace, originalPod.Name, tlsErr) + } + } + log.G(ctx).Infof("Created wstunnel infrastructure for %s/%s", originalPod.Namespace, originalPod.Name) return createdPod, &templateData, nil } @@ -1317,6 +1332,194 @@ func (p *Provider) cleanupWstunnelResources(ctx context.Context, wstunnelName, n } else { log.G(ctx).Infof("Successfully deleted rathole configmap %s/%s", namespace, wstunnelName+"-rathole-config") } + + // Delete rathole TLS resources (cert-manager Certificates and Traefik IngressRouteTCP) + if p.dynamicClient != nil { + for _, certName := range []string{wstunnelName + "-rathole-server-tls", wstunnelName + "-rathole-client-tls"} { + if delErr := p.deleteUnstructuredResource(ctx, certManagerCertGVR, certName, namespace); delErr != nil { + log.G(ctx).Warningf("Failed to delete rathole cert-manager Certificate %s/%s: %v", namespace, certName, delErr) + } else { + log.G(ctx).Infof("Deleted rathole cert-manager Certificate %s/%s", namespace, certName) + } + } + if delErr := p.deleteUnstructuredResource(ctx, traefikIngressRouteTCPGVR, wstunnelName, namespace); delErr != nil { + log.G(ctx).Warningf("Failed to delete rathole IngressRouteTCP %s/%s: %v", namespace, wstunnelName, delErr) + } else { + log.G(ctx).Infof("Deleted rathole IngressRouteTCP %s/%s", namespace, wstunnelName) + } + } +} + +// GroupVersionResource definitions for cert-manager and Traefik CRDs. +var ( + certManagerCertGVR = schema.GroupVersionResource{ + Group: "cert-manager.io", + Version: "v1", + Resource: "certificates", + } + traefikIngressRouteTCPGVR = schema.GroupVersionResource{ + Group: "traefik.io", + Version: "v1alpha1", + Resource: "ingressroutetcps", + } +) + +// applyUnstructuredResource creates or updates an unstructured Kubernetes resource via the dynamic client. +func (p *Provider) applyUnstructuredResource(ctx context.Context, gvr schema.GroupVersionResource, obj *unstructured.Unstructured) error { + if p.dynamicClient == nil { + return fmt.Errorf("dynamic client not initialised") + } + ns := obj.GetNamespace() + name := obj.GetName() + dr := p.dynamicClient.Resource(gvr).Namespace(ns) + + existing, err := dr.Get(ctx, name, metav1.GetOptions{}) + if apierrors.IsNotFound(err) { + _, err = dr.Create(ctx, obj, metav1.CreateOptions{}) + return err + } + if err != nil { + return err + } + obj.SetResourceVersion(existing.GetResourceVersion()) + _, err = dr.Update(ctx, obj, metav1.UpdateOptions{}) + return err +} + +// deleteUnstructuredResource deletes a namespaced unstructured resource; not-found errors are ignored. +func (p *Provider) deleteUnstructuredResource(ctx context.Context, gvr schema.GroupVersionResource, name, namespace string) error { + if p.dynamicClient == nil { + return nil + } + err := p.dynamicClient.Resource(gvr).Namespace(namespace).Delete(ctx, name, metav1.DeleteOptions{}) + if apierrors.IsNotFound(err) { + return nil + } + return err +} + +// applyRatholeTLSResources creates the cert-manager Certificate resources (server and client) and the +// Traefik IngressRouteTCP that exposes the rathole server via TLS on the websecure entry point. +// The server TLS certificate is served by Traefik; the client certificate is issued so that +// addWstunnelClientAnnotation can embed it in the compute-side bootstrap command. +func (p *Provider) applyRatholeTLSResources(ctx context.Context, td WstunnelTemplateData) error { + if p.dynamicClient == nil { + return fmt.Errorf("dynamic client not initialised; cannot manage cert-manager/Traefik resources") + } + + issuerName := p.config.Network.RatholeCAIssuerName + issuerKind := p.config.Network.RatholeCAIssuerKind + if issuerKind == "" { + issuerKind = "ClusterIssuer" + } + + ratholeHost := fmt.Sprintf("rathole-%s.%s", td.Name, td.WildcardDNS) + ratholeHost = sanitizeFullDNSName(ratholeHost) + + serverCertName := td.Name + "-rathole-server-tls" + clientCertName := td.Name + "-rathole-client-tls" + + // cert-manager Certificate for the server (Traefik TLS termination) + serverCert := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "cert-manager.io/v1", + "kind": "Certificate", + "metadata": map[string]interface{}{ + "name": serverCertName, + "namespace": td.Namespace, + }, + "spec": map[string]interface{}{ + "secretName": serverCertName, + "dnsNames": []interface{}{ratholeHost}, + "issuerRef": map[string]interface{}{ + "name": issuerName, + "kind": issuerKind, + }, + }, + }, + } + if err := p.applyUnstructuredResource(ctx, certManagerCertGVR, serverCert); err != nil { + return fmt.Errorf("failed to apply rathole server Certificate: %w", err) + } + log.G(ctx).Infof("Applied cert-manager Certificate %s/%s for rathole server", td.Namespace, serverCertName) + + // cert-manager Certificate for the client (embedded in the compute-side bootstrap command) + clientCert := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "cert-manager.io/v1", + "kind": "Certificate", + "metadata": map[string]interface{}{ + "name": clientCertName, + "namespace": td.Namespace, + }, + "spec": map[string]interface{}{ + "secretName": clientCertName, + "commonName": "rathole-client", + "usages": []interface{}{"client auth"}, + "issuerRef": map[string]interface{}{ + "name": issuerName, + "kind": issuerKind, + }, + }, + }, + } + if err := p.applyUnstructuredResource(ctx, certManagerCertGVR, clientCert); err != nil { + return fmt.Errorf("failed to apply rathole client Certificate: %w", err) + } + log.G(ctx).Infof("Applied cert-manager Certificate %s/%s for rathole client", td.Namespace, clientCertName) + + // Traefik IngressRouteTCP — TLS termination at Traefik, plain TCP to the rathole server + ingressRoute := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "traefik.io/v1alpha1", + "kind": "IngressRouteTCP", + "metadata": map[string]interface{}{ + "name": td.Name, + "namespace": td.Namespace, + }, + "spec": map[string]interface{}{ + "entryPoints": []interface{}{"websecure"}, + "routes": []interface{}{ + map[string]interface{}{ + "match": fmt.Sprintf("HostSNI(`%s`)", ratholeHost), + "services": []interface{}{ + map[string]interface{}{ + "name": td.Name, + "port": int64(2333), + }, + }, + }, + }, + "tls": map[string]interface{}{ + "secretName": serverCertName, + }, + }, + }, + } + if err := p.applyUnstructuredResource(ctx, traefikIngressRouteTCPGVR, ingressRoute); err != nil { + return fmt.Errorf("failed to apply rathole IngressRouteTCP: %w", err) + } + log.G(ctx).Infof("Applied Traefik IngressRouteTCP %s/%s for rathole (host: %s)", td.Namespace, td.Name, ratholeHost) + + return nil +} + +// waitForRatholeCertSecret polls until cert-manager has issued the given TLS secret or the context is cancelled. +func (p *Provider) waitForRatholeCertSecret(ctx context.Context, secretName, namespace string) error { + timeout := 120 * time.Second + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + secret, err := p.clientSet.CoreV1().Secrets(namespace).Get(ctx, secretName, metav1.GetOptions{}) + if err == nil && len(secret.Data["tls.crt"]) > 0 { + return nil + } + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(2 * time.Second): + } + } + return fmt.Errorf("timed out waiting for cert-manager to issue secret %s/%s", namespace, secretName) } // cleanupPartialWstunnelResources removes specific resources that were created before a failure @@ -2245,6 +2448,12 @@ func (p *Provider) initClientSet(ctx context.Context) error { log.G(ctx).Error(err) return err } + + p.dynamicClient, err = dynamic.NewForConfig(config) + if err != nil { + log.G(ctx).Warningf("Failed to create dynamic client (CRD resources will not be managed): %v", err) + // non-fatal: rathole TLS resources won't be applied, but wstunnel still works + } } return nil From 65a5aaa4efb23fefdba576d45891408d3b5bf9f1 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 7 May 2026 06:30:59 +0000 Subject: [PATCH 06/14] fix lint and code review: goconst constants, cert validation, no-noise cleanup, ticker, nginx Ingress for WS mode Agent-Logs-Url: https://github.com/interlink-hq/interLink/sessions/300c0012-13ca-4381-82ca-f53d8326ae51 Co-authored-by: dciangot <4144326+dciangot@users.noreply.github.com> --- pkg/virtualkubelet/config.go | 16 ++-- pkg/virtualkubelet/mesh.go | 17 ++-- pkg/virtualkubelet/rathole_test.go | 44 ++++++++-- .../templates/rathole-template.yaml | 33 +++++++ pkg/virtualkubelet/virtualkubelet.go | 85 ++++++++++++------- 5 files changed, 149 insertions(+), 46 deletions(-) diff --git a/pkg/virtualkubelet/config.go b/pkg/virtualkubelet/config.go index fd6bb9cb..eb8e5097 100644 --- a/pkg/virtualkubelet/config.go +++ b/pkg/virtualkubelet/config.go @@ -131,7 +131,7 @@ type PodCIDR struct { type Network struct { // EnableTunnel enables WebSocket tunneling for pod port exposure EnableTunnel bool `yaml:"EnableTunnel" default:"false"` - // TunnelType selects the port-forwarding backend: "wstunnel" (default) or "rathole" + // TunnelType selects the port-forwarding backend: "" or "wstunnel" (default, backward-compatible) or "rathole". TunnelType string `yaml:"TunnelType,omitempty"` // WildcardDNS specifies the DNS domain for generating tunnel endpoints WildcardDNS string `yaml:"WildcardDNS,omitempty"` @@ -144,11 +144,17 @@ type Network struct { // RatholeExecutableURL specifies the URL to download the rathole executable zip archive // (default is "https://github.com/rapiz1/rathole/releases/download/v0.5.0/rathole-x86_64-unknown-linux-musl.zip") RatholeExecutableURL string `yaml:"RatholeExecutableURL,omitempty"` - // RatholeCommand specifies a custom command template for setting up rathole clients. - // Five %s format verbs are substituted in order: the rathole download URL, base64-encoded CA cert, - // base64-encoded client cert, base64-encoded client key, and base64-encoded client TOML config. - // When RatholeCAIssuerName is empty the legacy two-argument WebSocket command is used instead. + // RatholeCommand specifies a custom command template for rathole clients in TLS mode + // (i.e., when RatholeCAIssuerName is set). Five %s format verbs are substituted in order: + // the rathole download URL, base64-encoded CA cert, base64-encoded client cert, + // base64-encoded client key, and base64-encoded client TOML config. + // Default: DefaultRatholeCommand. RatholeCommand string `yaml:"RatholeCommand,omitempty"` + // RatholeWSCommand specifies a custom command template for rathole clients in WebSocket fallback + // mode (i.e., when RatholeCAIssuerName is empty). Two %s format verbs are substituted in order: + // the rathole download URL and the base64-encoded client TOML config. + // Default: DefaultRatholeWSCommand. + RatholeWSCommand string `yaml:"RatholeWSCommand,omitempty"` // RatholeCAIssuerName is the cert-manager ClusterIssuer or Issuer name for the admin-provided CA. // When set, rathole uses TLS transport; cert-manager issues both the server and client certificates. // A Traefik IngressRouteTCP resource is created to expose the rathole server via TLS on port 443. diff --git a/pkg/virtualkubelet/mesh.go b/pkg/virtualkubelet/mesh.go index d4452791..ebb043ba 100644 --- a/pkg/virtualkubelet/mesh.go +++ b/pkg/virtualkubelet/mesh.go @@ -291,7 +291,7 @@ PersistentKeepalive = %d pod.Annotations["interlink.eu/wireguard-client-snippet"] = wgSnippet - case p.config.Network.TunnelType == "rathole": + case p.config.Network.TunnelType == tunnelTypeRathole: // Rathole mode: build a client TOML config and generate the client bootstrap command. // When RatholeCAIssuerName is set, use TLS transport with cert-manager-issued certificates; // otherwise fall back to WebSocket transport for backward compatibility. @@ -321,6 +321,13 @@ PersistentKeepalive = %d return fmt.Errorf("failed to read rathole client certificate secret: %w", err) } + // Validate all required keys are present and non-empty. + for _, key := range []string{"ca.crt", "tls.crt", "tls.key"} { + if len(certSecret.Data[key]) == 0 { + return fmt.Errorf("rathole client certificate secret %s/%s is missing required key %q", td.Namespace, clientCertSecretName, key) + } + } + caCrtB64 := base64.StdEncoding.EncodeToString(certSecret.Data["ca.crt"]) clientCrtB64 := base64.StdEncoding.EncodeToString(certSecret.Data["tls.crt"]) clientKeyB64 := base64.StdEncoding.EncodeToString(certSecret.Data["tls.key"]) @@ -334,7 +341,7 @@ PersistentKeepalive = %d tomlBuilder.WriteString("cert = \"/tmp/rathole-client.crt\"\n") tomlBuilder.WriteString("key = \"/tmp/rathole-client.key\"\n\n") for _, port := range td.ExposedPorts { - if strings.ToUpper(port.Protocol) == "UDP" { + if strings.ToUpper(port.Protocol) == protocolUDP { log.G(ctx).Debugf("Skipping UDP port %d in rathole client config (TLS transport forwards TCP only)", port.Port) continue } @@ -357,7 +364,7 @@ PersistentKeepalive = %d fmt.Fprintf(&tomlBuilder, "[client]\nremote_addr = \"%s:80\"\n\n", ratholeEndpoint) tomlBuilder.WriteString("[client.transport]\ntype = \"websocket\"\n\n") for _, port := range td.ExposedPorts { - if strings.ToUpper(port.Protocol) == "UDP" { + if strings.ToUpper(port.Protocol) == protocolUDP { log.G(ctx).Debugf("Skipping UDP port %d in rathole client config (websocket transport forwards TCP only)", port.Port) continue } @@ -367,7 +374,7 @@ PersistentKeepalive = %d configB64 := base64.StdEncoding.EncodeToString([]byte(tomlBuilder.String())) - ratholeWSCmd := p.config.Network.RatholeCommand + ratholeWSCmd := p.config.Network.RatholeWSCommand if ratholeWSCmd == "" { ratholeWSCmd = DefaultRatholeWSCommand } @@ -381,7 +388,7 @@ PersistentKeepalive = %d default: var rOptions []string for _, port := range td.ExposedPorts { - if strings.ToUpper(port.Protocol) == "UDP" { + if strings.ToUpper(port.Protocol) == protocolUDP { continue } rOptions = append(rOptions, fmt.Sprintf("-R tcp://0.0.0.0:%d:localhost:%d", port.Port, port.Port)) diff --git a/pkg/virtualkubelet/rathole_test.go b/pkg/virtualkubelet/rathole_test.go index 5bacce4b..ad63cbb9 100644 --- a/pkg/virtualkubelet/rathole_test.go +++ b/pkg/virtualkubelet/rathole_test.go @@ -51,11 +51,45 @@ func TestRatholeTemplateExecution(t *testing.T) { // The nginx Ingress is no longer part of the template; TLS ingress is managed separately // via the Traefik IngressRouteTCP applied by applyRatholeTLSResources. - assert.NotContains(t, yaml, "nginx.ingress.kubernetes.io", "nginx Ingress should not be in the rathole template") + assert.NotContains(t, yaml, "nginx.ingress.kubernetes.io", "nginx Ingress should not be in the rathole template without HasNginxIngress") // Plain TCP server — no WebSocket transport section assert.NotContains(t, yaml, "type = \"websocket\"", "server should use plain TCP, not WebSocket") } +// TestRatholeTemplateWebSocketMode verifies that the rathole template includes a nginx Ingress +// when HasNginxIngress is true (WebSocket fallback mode, no CA issuer configured). +func TestRatholeTemplateWebSocketMode(t *testing.T) { + p := &Provider{ + config: Config{ + Network: Network{ + TunnelType: "rathole", + WildcardDNS: "tunnel.example.com", + }, + }, + clientSet: fake.NewClientset(), + } + + data := WstunnelTemplateData{ + Name: "my-pod-default", + Namespace: "default-wstunnel", + RandomPassword: "abc123", + WildcardDNS: "tunnel.example.com", + HasNginxIngress: true, + ExposedPorts: []PortMapping{ + {Port: 8080, Name: "http", Protocol: "TCP"}, + }, + } + + ctx := context.Background() + yaml, err := p.executeWstunnelTemplate(ctx, data) + require.NoError(t, err) + assert.NotEmpty(t, yaml) + + // WebSocket mode: nginx Ingress should be present so the client can reach port 80 + assert.Contains(t, yaml, "nginx.ingress.kubernetes.io", "nginx Ingress should be present in WebSocket mode") + assert.Contains(t, yaml, "rathole-my-pod-default.tunnel.example.com", "Ingress host should match rathole DNS") +} + // TestWstunnelTemplateUnchanged verifies that the existing wstunnel template is still // selected when TunnelType is empty (backward-compatible default). func TestWstunnelTemplateUnchanged(t *testing.T) { @@ -237,10 +271,10 @@ func TestRatholeClientAnnotationCustomCommand(t *testing.T) { p := &Provider{ config: Config{ Network: Network{ - TunnelType: "rathole", - WildcardDNS: "tunnel.example.com", - RatholeCommand: customCmd, - // RatholeCAIssuerName intentionally empty → WebSocket fallback uses RatholeCommand + TunnelType: "rathole", + WildcardDNS: "tunnel.example.com", + RatholeWSCommand: customCmd, + // RatholeCAIssuerName intentionally empty → WebSocket fallback uses RatholeWSCommand }, }, clientSet: fakeClient, diff --git a/pkg/virtualkubelet/templates/rathole-template.yaml b/pkg/virtualkubelet/templates/rathole-template.yaml index ca5f53fb..8333b6d9 100644 --- a/pkg/virtualkubelet/templates/rathole-template.yaml +++ b/pkg/virtualkubelet/templates/rathole-template.yaml @@ -92,3 +92,36 @@ spec: {{- end}} {{- end}} {{- end}} +{{- if .HasNginxIngress}} +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{.Name}} + namespace: {{.Namespace}} + annotations: + nginx.ingress.kubernetes.io/proxy-read-timeout: "3600" + nginx.ingress.kubernetes.io/proxy-send-timeout: "3600" + nginx.ingress.kubernetes.io/server-snippets: | + location / { + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_http_version 1.1; + proxy_set_header X-Forwarded-For $remote_addr; + proxy_set_header Host $host; + proxy_cache_bypass $http_upgrade; + } + kubernetes.io/ingress.class: "nginx" +spec: + rules: + - host: rathole-{{.Name}}.{{.WildcardDNS}} + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: {{.Name}} + port: + number: 2333 +{{- end}} diff --git a/pkg/virtualkubelet/virtualkubelet.go b/pkg/virtualkubelet/virtualkubelet.go index 11134fb0..d6e43e32 100644 --- a/pkg/virtualkubelet/virtualkubelet.go +++ b/pkg/virtualkubelet/virtualkubelet.go @@ -56,22 +56,26 @@ var defaultRatholeTemplate embed.FS var meshScriptTemplate embed.FS const ( - DefaultCPUCapacity = "100" - DefaultMemoryCapacity = "3000G" - DefaultPodCapacity = "10000" - DefaultGPUCapacity = "0" - DefaultFPGACapacity = "0" - DefaultListenPort = 10250 - NamespaceKey = "namespace" - NameKey = "name" - CREATE = 0 - DELETE = 1 - nvidiaGPU = "nvidia.com/gpu" - amdGPU = "amd.com/gpu" - intelGPU = "intel.com/gpu" - xilinxFPGA = "xilinx.com/fpga" - intelFPGA = "intel.com/fpga" - DefaultProtocol = "TCP" + DefaultCPUCapacity = "100" + DefaultMemoryCapacity = "3000G" + DefaultPodCapacity = "10000" + DefaultGPUCapacity = "0" + DefaultFPGACapacity = "0" + DefaultListenPort = 10250 + NamespaceKey = "namespace" + NameKey = "name" + CREATE = 0 + DELETE = 1 + nvidiaGPU = "nvidia.com/gpu" + amdGPU = "amd.com/gpu" + intelGPU = "intel.com/gpu" + xilinxFPGA = "xilinx.com/fpga" + intelFPGA = "intel.com/fpga" + DefaultProtocol = "TCP" + // protocolUDP is the protocol string for UDP ports; used to skip UDP in tunnel client configs. + protocolUDP = "UDP" + // tunnelTypeRathole is the TunnelType value that selects the rathole port-forwarding backend. + tunnelTypeRathole = "rathole" DefaultWstunnelCommand = "curl -L -f -k https://github.com/erebe/wstunnel/releases/download/v10.4.4/wstunnel_10.4.4_linux_amd64.tar.gz -o wstunnel.tar.gz && tar -xzvf wstunnel.tar.gz && chmod +x wstunnel && ./wstunnel client --http-upgrade-path-prefix %s %s ws://%s:80 &" // DefaultRatholeExecutableURL is the default URL to download the rathole executable zip archive. DefaultRatholeExecutableURL = "https://github.com/rapiz1/rathole/releases/download/v0.5.0/rathole-x86_64-unknown-linux-musl.zip" @@ -114,6 +118,9 @@ type WstunnelTemplateData struct { Volumes []v1.Volume PodLabels map[string]string // Labels from original pod PodAnnotations map[string]string // Annotations from original pod + // HasNginxIngress controls whether the rathole template emits a nginx Ingress for + // WebSocket mode. It is true when TunnelType=="rathole" and RatholeCAIssuerName=="". + HasNginxIngress bool } type PortMapping struct { @@ -861,6 +868,9 @@ func (p *Provider) createDummyPod(ctx context.Context, originalPod *v1.Pod) (*v1 Volumes: extractVolumesForLocalContainers(originalPod), PodLabels: podLabels, PodAnnotations: podAnnotations, + // In rathole WebSocket mode (no TLS issuer), expose the rathole server via nginx Ingress so + // the compute-side client can reach port 80 over WebSocket. + HasNginxIngress: p.config.Network.TunnelType == tunnelTypeRathole && p.config.Network.RatholeCAIssuerName == "", } log.G(ctx).Debugf("LocalInitContainers count: %d", len(templateData.LocalInitContainers)) @@ -888,13 +898,13 @@ func (p *Provider) createDummyPod(ctx context.Context, originalPod *v1.Pod) (*v1 } // For rathole TLS mode, also create the cert-manager Certificates and Traefik IngressRouteTCP. - if p.config.Network.TunnelType == "rathole" && p.config.Network.RatholeCAIssuerName != "" { + if p.config.Network.TunnelType == tunnelTypeRathole && p.config.Network.RatholeCAIssuerName != "" { if tlsErr := p.applyRatholeTLSResources(ctx, templateData); tlsErr != nil { log.G(ctx).Warningf("Failed to apply rathole TLS resources for %s/%s: %v", originalPod.Namespace, originalPod.Name, tlsErr) } } - log.G(ctx).Infof("Created wstunnel infrastructure for %s/%s", originalPod.Namespace, originalPod.Name) + log.G(ctx).Infof("Created tunnel infrastructure (%s) for %s/%s", p.config.Network.TunnelType, originalPod.Namespace, originalPod.Name) return createdPod, &templateData, nil } @@ -988,7 +998,7 @@ func (p *Provider) executeWstunnelTemplate(ctx context.Context, data WstunnelTem content []byte err error ) - if p.config.Network.TunnelType == "rathole" { + if p.config.Network.TunnelType == tunnelTypeRathole { content, err = defaultRatholeTemplate.ReadFile("templates/rathole-template.yaml") if err != nil { return "", fmt.Errorf("failed to read embedded rathole template: %w", err) @@ -1319,17 +1329,17 @@ func (p *Provider) cleanupWstunnelResources(ctx context.Context, wstunnelName, n // Delete wstunnel wireguard configmap (used in full-mesh / wstunnel mode) err = p.clientSet.CoreV1().ConfigMaps(namespace).Delete(ctx, wstunnelName+"-wg-config", metav1.DeleteOptions{}) - if err != nil { + if err != nil && !apierrors.IsNotFound(err) { log.G(ctx).Warningf("Failed to delete wstunnel configmap %s/%s: %v", namespace, wstunnelName+"-wg-config", err) - } else { + } else if err == nil { log.G(ctx).Infof("Successfully deleted wstunnel configmap %s/%s", namespace, wstunnelName+"-wg-config") } // Delete rathole configmap (used in rathole mode) err = p.clientSet.CoreV1().ConfigMaps(namespace).Delete(ctx, wstunnelName+"-rathole-config", metav1.DeleteOptions{}) - if err != nil { + if err != nil && !apierrors.IsNotFound(err) { log.G(ctx).Warningf("Failed to delete rathole configmap %s/%s: %v", namespace, wstunnelName+"-rathole-config", err) - } else { + } else if err == nil { log.G(ctx).Infof("Successfully deleted rathole configmap %s/%s", namespace, wstunnelName+"-rathole-config") } @@ -1504,22 +1514,35 @@ func (p *Provider) applyRatholeTLSResources(ctx context.Context, td WstunnelTemp return nil } -// waitForRatholeCertSecret polls until cert-manager has issued the given TLS secret or the context is cancelled. +// waitForRatholeCertSecret polls until cert-manager has issued the given TLS secret (with all +// required data keys: ca.crt, tls.crt, tls.key) or the context is cancelled. func (p *Provider) waitForRatholeCertSecret(ctx context.Context, secretName, namespace string) error { - timeout := 120 * time.Second - deadline := time.Now().Add(timeout) - for time.Now().Before(deadline) { + const pollInterval = 2 * time.Second + const timeout = 120 * time.Second + + ticker := time.NewTicker(pollInterval) + defer ticker.Stop() + timer := time.NewTimer(timeout) + defer timer.Stop() + + for { secret, err := p.clientSet.CoreV1().Secrets(namespace).Get(ctx, secretName, metav1.GetOptions{}) - if err == nil && len(secret.Data["tls.crt"]) > 0 { - return nil + if err == nil { + allPresent := len(secret.Data["ca.crt"]) > 0 && + len(secret.Data["tls.crt"]) > 0 && + len(secret.Data["tls.key"]) > 0 + if allPresent { + return nil + } } select { case <-ctx.Done(): return ctx.Err() - case <-time.After(2 * time.Second): + case <-timer.C: + return fmt.Errorf("timed out waiting for cert-manager to issue secret %s/%s", namespace, secretName) + case <-ticker.C: } } - return fmt.Errorf("timed out waiting for cert-manager to issue secret %s/%s", namespace, secretName) } // cleanupPartialWstunnelResources removes specific resources that were created before a failure From 3681321667f561fe10e39c2854b0f6d58dee1d53 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 7 May 2026 06:54:59 +0000 Subject: [PATCH 07/14] use rathole-org/rathole repo: update release URL (gnu, not musl) and add --client flag Agent-Logs-Url: https://github.com/interlink-hq/interLink/sessions/764971d7-9f40-49cf-b508-b26a749d454b Co-authored-by: dciangot <4144326+dciangot@users.noreply.github.com> --- pkg/virtualkubelet/config.go | 2 +- pkg/virtualkubelet/templates/rathole-template.yaml | 1 + pkg/virtualkubelet/virtualkubelet.go | 7 ++++--- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/pkg/virtualkubelet/config.go b/pkg/virtualkubelet/config.go index eb8e5097..6b9185c5 100644 --- a/pkg/virtualkubelet/config.go +++ b/pkg/virtualkubelet/config.go @@ -142,7 +142,7 @@ type Network struct { // WstunnelCommand specifies the command template for setting up wstunnel clients WstunnelCommand string `yaml:"WstunnelCommand,omitempty"` // RatholeExecutableURL specifies the URL to download the rathole executable zip archive - // (default is "https://github.com/rapiz1/rathole/releases/download/v0.5.0/rathole-x86_64-unknown-linux-musl.zip") + // (default is "https://github.com/rathole-org/rathole/releases/download/v0.5.0/rathole-x86_64-unknown-linux-gnu.zip") RatholeExecutableURL string `yaml:"RatholeExecutableURL,omitempty"` // RatholeCommand specifies a custom command template for rathole clients in TLS mode // (i.e., when RatholeCAIssuerName is set). Five %s format verbs are substituted in order: diff --git a/pkg/virtualkubelet/templates/rathole-template.yaml b/pkg/virtualkubelet/templates/rathole-template.yaml index 8333b6d9..bcd0ac30 100644 --- a/pkg/virtualkubelet/templates/rathole-template.yaml +++ b/pkg/virtualkubelet/templates/rathole-template.yaml @@ -31,6 +31,7 @@ spec: spec: containers: - name: rathole + # Docker Hub image published by https://github.com/rathole-org/rathole CI image: rapiz1/rathole:v0.5.0 imagePullPolicy: IfNotPresent args: ["--server", "/etc/rathole/server.toml"] diff --git a/pkg/virtualkubelet/virtualkubelet.go b/pkg/virtualkubelet/virtualkubelet.go index d6e43e32..99427e42 100644 --- a/pkg/virtualkubelet/virtualkubelet.go +++ b/pkg/virtualkubelet/virtualkubelet.go @@ -78,14 +78,15 @@ const ( tunnelTypeRathole = "rathole" DefaultWstunnelCommand = "curl -L -f -k https://github.com/erebe/wstunnel/releases/download/v10.4.4/wstunnel_10.4.4_linux_amd64.tar.gz -o wstunnel.tar.gz && tar -xzvf wstunnel.tar.gz && chmod +x wstunnel && ./wstunnel client --http-upgrade-path-prefix %s %s ws://%s:80 &" // DefaultRatholeExecutableURL is the default URL to download the rathole executable zip archive. - DefaultRatholeExecutableURL = "https://github.com/rapiz1/rathole/releases/download/v0.5.0/rathole-x86_64-unknown-linux-musl.zip" + // Source: https://github.com/rathole-org/rathole (note: x86_64 musl was dropped in v0.5.0; use gnu). + DefaultRatholeExecutableURL = "https://github.com/rathole-org/rathole/releases/download/v0.5.0/rathole-x86_64-unknown-linux-gnu.zip" // DefaultRatholeCommand is the default command template for the rathole client in TLS mode. // Five %s format verbs are substituted in order: rathole download URL, base64-encoded CA cert, // base64-encoded client cert, base64-encoded client key, and base64-encoded client TOML config. - DefaultRatholeCommand = "curl -L -f -k %s -o rathole.zip && unzip -q rathole.zip && chmod +x rathole && echo %s | base64 -d > /tmp/rathole-ca.crt && echo %s | base64 -d > /tmp/rathole-client.crt && echo %s | base64 -d > /tmp/rathole-client.key && echo %s | base64 -d > /tmp/rathole-client.toml && ./rathole /tmp/rathole-client.toml &" + DefaultRatholeCommand = "curl -L -f -k %s -o rathole.zip && unzip -q rathole.zip && chmod +x rathole && echo %s | base64 -d > /tmp/rathole-ca.crt && echo %s | base64 -d > /tmp/rathole-client.crt && echo %s | base64 -d > /tmp/rathole-client.key && echo %s | base64 -d > /tmp/rathole-client.toml && ./rathole --client /tmp/rathole-client.toml &" // DefaultRatholeWSCommand is the fallback command template used when no CA issuer is configured // (WebSocket transport, backward-compatible). Two %s args: download URL and base64 client TOML. - DefaultRatholeWSCommand = "curl -L -f -k %s -o rathole.zip && unzip -q rathole.zip && chmod +x rathole && echo %s | base64 -d > /tmp/rathole-client.toml && ./rathole /tmp/rathole-client.toml &" + DefaultRatholeWSCommand = "curl -L -f -k %s -o rathole.zip && unzip -q rathole.zip && chmod +x rathole && echo %s | base64 -d > /tmp/rathole-client.toml && ./rathole --client /tmp/rathole-client.toml &" ) // Annotations for WireGuard and WStunnel configuration From 8133b3c9bd9aea64f72cf0d4e1ff07f33713ad6a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 7 May 2026 10:23:48 +0000 Subject: [PATCH 08/14] address review: hard errors, notfound suppression, naming fix, cert polling, UDP filter, verb validation, test fix Agent-Logs-Url: https://github.com/interlink-hq/interLink/sessions/35ce6acd-f8e9-43b3-8aaa-e3a97405d8d4 Co-authored-by: dciangot <4144326+dciangot@users.noreply.github.com> --- pkg/virtualkubelet/config_test.go | 11 ++++- pkg/virtualkubelet/mesh.go | 6 +++ .../templates/rathole-template.yaml | 12 +++-- pkg/virtualkubelet/virtualkubelet.go | 49 ++++++++++++++----- 4 files changed, 62 insertions(+), 16 deletions(-) diff --git a/pkg/virtualkubelet/config_test.go b/pkg/virtualkubelet/config_test.go index 7960582e..9726396c 100644 --- a/pkg/virtualkubelet/config_test.go +++ b/pkg/virtualkubelet/config_test.go @@ -1,6 +1,7 @@ package virtualkubelet import ( + "strings" "testing" "github.com/stretchr/testify/assert" @@ -130,7 +131,10 @@ func TestNetwork_RatholeConfiguration(t *testing.T) { TunnelType: "rathole", WildcardDNS: "tunnel.example.com", RatholeExecutableURL: "https://example.com/rathole.zip", - RatholeCommand: "curl -L %s -o rathole.zip && unzip rathole.zip && echo %s | base64 -d > /tmp/client.toml && ./rathole /tmp/client.toml &", + // RatholeCommand is the TLS-mode template: 5 %s args (URL, CA cert, client cert, client key, client TOML) + RatholeCommand: "curl -L %s -o r.zip && unzip r.zip && echo %s | base64 -d > /tmp/ca.crt && echo %s | base64 -d > /tmp/cl.crt && echo %s | base64 -d > /tmp/cl.key && echo %s | base64 -d > /tmp/c.toml && ./rathole --client /tmp/c.toml &", + // RatholeWSCommand is the WebSocket-fallback template: 2 %s args (URL, client TOML) + RatholeWSCommand: "curl -L %s -o r.zip && unzip r.zip && echo %s | base64 -d > /tmp/c.toml && ./rathole --client /tmp/c.toml &", } assert.True(t, network.EnableTunnel) @@ -138,6 +142,11 @@ func TestNetwork_RatholeConfiguration(t *testing.T) { assert.Equal(t, "tunnel.example.com", network.WildcardDNS) assert.Equal(t, "https://example.com/rathole.zip", network.RatholeExecutableURL) assert.NotEmpty(t, network.RatholeCommand) + assert.NotEmpty(t, network.RatholeWSCommand) + // Validate that RatholeCommand contains exactly 5 %s verbs (TLS mode) + assert.Equal(t, 5, strings.Count(network.RatholeCommand, "%s"), "RatholeCommand must have exactly 5 %%s format verbs for TLS mode") + // Validate that RatholeWSCommand contains exactly 2 %s verbs (WebSocket fallback) + assert.Equal(t, 2, strings.Count(network.RatholeWSCommand, "%s"), "RatholeWSCommand must have exactly 2 %%s format verbs for WebSocket mode") } func TestNetwork_WstunnelDefaultTunnelType(t *testing.T) { diff --git a/pkg/virtualkubelet/mesh.go b/pkg/virtualkubelet/mesh.go index ebb043ba..50de924a 100644 --- a/pkg/virtualkubelet/mesh.go +++ b/pkg/virtualkubelet/mesh.go @@ -356,6 +356,9 @@ PersistentKeepalive = %d ratholeCmd = DefaultRatholeCommand } mainCmd = fmt.Sprintf(ratholeCmd, ratholeURL, caCrtB64, clientCrtB64, clientKeyB64, configB64) + if strings.Contains(mainCmd, "%!(") { + return fmt.Errorf("RatholeCommand has wrong number of format verbs (expected 5 %%s): %q", p.config.Network.RatholeCommand) + } } else { // WebSocket fallback (no CA issuer configured) log.G(ctx).Debugf("RatholeCAIssuerName not set; using WebSocket transport for pod %s/%s", pod.Namespace, pod.Name) @@ -379,6 +382,9 @@ PersistentKeepalive = %d ratholeWSCmd = DefaultRatholeWSCommand } mainCmd = fmt.Sprintf(ratholeWSCmd, ratholeURL, configB64) + if strings.Contains(mainCmd, "%!(") { + return fmt.Errorf("RatholeWSCommand has wrong number of format verbs (expected 2 %%s): %q", p.config.Network.RatholeWSCommand) + } } // Remove any stale wstunnel annotation and set the rathole one diff --git a/pkg/virtualkubelet/templates/rathole-template.yaml b/pkg/virtualkubelet/templates/rathole-template.yaml index bcd0ac30..2314a1f2 100644 --- a/pkg/virtualkubelet/templates/rathole-template.yaml +++ b/pkg/virtualkubelet/templates/rathole-template.yaml @@ -8,11 +8,13 @@ data: [server] bind_addr = "0.0.0.0:2333" {{- range .ExposedPorts}} + {{- if ne .Protocol "UDP"}} [server.services.p{{.Port}}] token = "{{$.RandomPassword}}" bind_addr = "0.0.0.0:{{.Port}}" {{- end}} + {{- end}} --- apiVersion: apps/v1 kind: Deployment @@ -40,9 +42,11 @@ spec: name: control protocol: TCP {{- range .ExposedPorts}} + {{- if ne .Protocol "UDP"}} - containerPort: {{.Port}} name: {{if .Name}}{{.Name}}{{else}}port-{{.Port}}{{end}} - protocol: {{.Protocol}} + protocol: TCP + {{- end}} {{- end}} volumeMounts: - name: rathole-config @@ -84,13 +88,13 @@ spec: name: control protocol: TCP {{- range .ExposedPorts}} + {{- if ne .Protocol "UDP"}} {{- if ne .Port 2333}}{{/* skip control port 2333 to avoid conflicts with the rathole server listener */}} - port: {{.Port}} targetPort: {{.Port}} name: {{if .Name}}{{.Name}}{{else}}port-{{.Port}}{{end}} - {{- if .Protocol}} - protocol: {{.Protocol}} - {{- end}} + protocol: TCP + {{- end}} {{- end}} {{- end}} {{- if .HasNginxIngress}} diff --git a/pkg/virtualkubelet/virtualkubelet.go b/pkg/virtualkubelet/virtualkubelet.go index 99427e42..cd2b66ae 100644 --- a/pkg/virtualkubelet/virtualkubelet.go +++ b/pkg/virtualkubelet/virtualkubelet.go @@ -101,6 +101,8 @@ const ( annDisableOffloadContainers = "interlink.eu/disable-offload-containers" // comma-separated container names annDisableOffloadInitContainers = "interlink.eu/disable-offload-init-containers" // comma-separated init container names annMeshNetworkDisabled = "interlink.eu/mesh-network" // set to "disabled" to opt out of mesh networking + annShadowSameNS = "interlink.eu/shadow-same-ns" // set to "true" to create shadow resources in the same namespace + annShadowSameNSValue = "true" // expected value for annShadowSameNS ) type WstunnelTemplateData struct { @@ -764,7 +766,7 @@ func (p *Provider) createDummyPod(ctx context.Context, originalPod *v1.Pod) (*v1 isSameNamespace := false if originalPod.Annotations != nil { - if val, ok := originalPod.Annotations["interlink.eu/shadow-same-ns"]; ok && val == "true" { + if val, ok := originalPod.Annotations["interlink.eu/shadow-same-ns"]; ok && val == annShadowSameNSValue { isSameNamespace = true } } @@ -899,9 +901,11 @@ func (p *Provider) createDummyPod(ctx context.Context, originalPod *v1.Pod) (*v1 } // For rathole TLS mode, also create the cert-manager Certificates and Traefik IngressRouteTCP. + // This is a hard requirement: if TLS resource creation fails when RatholeCAIssuerName is set, + // the annotation path will block waiting for a cert secret that will never appear. if p.config.Network.TunnelType == tunnelTypeRathole && p.config.Network.RatholeCAIssuerName != "" { if tlsErr := p.applyRatholeTLSResources(ctx, templateData); tlsErr != nil { - log.G(ctx).Warningf("Failed to apply rathole TLS resources for %s/%s: %v", originalPod.Namespace, originalPod.Name, tlsErr) + return nil, nil, fmt.Errorf("failed to apply rathole TLS resources for %s/%s: %w", originalPod.Namespace, originalPod.Name, tlsErr) } } @@ -1320,11 +1324,12 @@ func (p *Provider) cleanupWstunnelResources(ctx context.Context, wstunnelName, n log.G(ctx).Infof("Successfully deleted tunnel service %s/%s", namespace, wstunnelName) } - // Delete ingress + // Delete ingress (nginx WebSocket ingress used in rathole WS-fallback mode or wstunnel mode). + // Ingress is absent in rathole TLS mode (Traefik IngressRouteTCP is used instead); suppress NotFound. err = p.clientSet.NetworkingV1().Ingresses(namespace).Delete(ctx, wstunnelName, metav1.DeleteOptions{}) - if err != nil { + if err != nil && !apierrors.IsNotFound(err) { log.G(ctx).Warningf("Failed to delete tunnel ingress %s/%s: %v", namespace, wstunnelName, err) - } else { + } else if err == nil { log.G(ctx).Infof("Successfully deleted tunnel ingress %s/%s", namespace, wstunnelName) } @@ -1535,6 +1540,10 @@ func (p *Provider) waitForRatholeCertSecret(ctx context.Context, secretName, nam if allPresent { return nil } + } else if !apierrors.IsNotFound(err) { + // Fail fast for non-transient errors (e.g. Forbidden, Unauthorized) so the caller + // gets actionable feedback instead of waiting the full 120s timeout. + return fmt.Errorf("unexpected error polling for cert secret %s/%s: %w", namespace, secretName, err) } select { case <-ctx.Done(): @@ -1740,14 +1749,27 @@ func isMeshNetworkingDisabled(pod *v1.Pod) bool { // handleWstunnelCreation creates wstunnel infrastructure and returns the pod IP func (p *Provider) handleWstunnelCreation(ctx context.Context, pod *v1.Pod) (string, error) { - wstunnelName := pod.Name + "-wstunnel" + // Compute resource names using the same logic as createDummyPod so that cleanup + // always targets the resources that were actually created. + isSameNamespace := false + if pod.Annotations != nil { + if val, ok := pod.Annotations["interlink.eu/shadow-same-ns"]; ok && val == annShadowSameNSValue { + isSameNamespace = true + } + } + var wstunnelName, wstunnelNS string + if isSameNamespace { + wstunnelName, wstunnelNS = computeWstunnelResourceNamesForSameNamespace(pod.Name, pod.Namespace) + } else { + wstunnelName, wstunnelNS = computeWstunnelResourceNames(pod.Name, pod.Namespace) + } // Create wstunnel infrastructure outside virtual node for port exposure dummyPod, templateData, err := p.createDummyPod(ctx, pod) if err != nil { log.G(ctx).Errorf("Failed to create wstunnel infrastructure for %s/%s: %v", pod.Namespace, pod.Name, err) // Clean up any partially created resources - p.cleanupWstunnelResources(ctx, wstunnelName, pod.Namespace) + p.cleanupWstunnelResources(ctx, wstunnelName, wstunnelNS) return "", fmt.Errorf("failed to create wstunnel infrastructure for exposed ports: %w", err) } @@ -1759,11 +1781,11 @@ func (p *Provider) handleWstunnelCreation(ctx context.Context, pod *v1.Pod) (str } } - podIP, err := p.waitForWstunnelPodIP(ctx, dummyPod, timeout, wstunnelName, pod.Namespace) + podIP, err := p.waitForWstunnelPodIP(ctx, dummyPod, timeout, wstunnelName, wstunnelNS) if err != nil { log.G(ctx).Errorf("Failed to get wstunnel pod IP for %s/%s: %v", pod.Namespace, pod.Name, err) // Clean up resources since we failed to get a working pod - p.cleanupWstunnelResources(ctx, wstunnelName, pod.Namespace) + p.cleanupWstunnelResources(ctx, wstunnelName, wstunnelNS) return "", err } @@ -2039,7 +2061,7 @@ func (p *Provider) DeletePod(ctx context.Context, pod *v1.Pod) (err error) { isSameNamespace := false if pod.Annotations != nil { - if val, ok := pod.Annotations["interlink.eu/shadow-same-ns"]; ok && val == "true" { + if val, ok := pod.Annotations["interlink.eu/shadow-same-ns"]; ok && val == annShadowSameNSValue { isSameNamespace = true } } @@ -2475,8 +2497,13 @@ func (p *Provider) initClientSet(ctx context.Context) error { p.dynamicClient, err = dynamic.NewForConfig(config) if err != nil { + // In rathole TLS mode the dynamic client is required to create cert-manager and Traefik + // resources. Fail hard so the operator learns about the misconfiguration immediately. + if p.config.Network.TunnelType == tunnelTypeRathole && p.config.Network.RatholeCAIssuerName != "" { + log.G(ctx).Error(err) + return fmt.Errorf("dynamic client required for rathole TLS mode but could not be initialised: %w", err) + } log.G(ctx).Warningf("Failed to create dynamic client (CRD resources will not be managed): %v", err) - // non-fatal: rathole TLS resources won't be applied, but wstunnel still works } } From 43a7d19c83fb8ed6610bb5e1a328a9f0017e6afe Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 7 May 2026 10:28:46 +0000 Subject: [PATCH 09/14] use annShadowSameNS constant; validate format verb count with strings.Count before Sprintf Agent-Logs-Url: https://github.com/interlink-hq/interLink/sessions/35ce6acd-f8e9-43b3-8aaa-e3a97405d8d4 Co-authored-by: dciangot <4144326+dciangot@users.noreply.github.com> --- pkg/virtualkubelet/mesh.go | 18 ++++++++++++------ pkg/virtualkubelet/virtualkubelet.go | 6 +++--- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/pkg/virtualkubelet/mesh.go b/pkg/virtualkubelet/mesh.go index 50de924a..345962c8 100644 --- a/pkg/virtualkubelet/mesh.go +++ b/pkg/virtualkubelet/mesh.go @@ -355,10 +355,13 @@ PersistentKeepalive = %d if ratholeCmd == "" { ratholeCmd = DefaultRatholeCommand } - mainCmd = fmt.Sprintf(ratholeCmd, ratholeURL, caCrtB64, clientCrtB64, clientKeyB64, configB64) - if strings.Contains(mainCmd, "%!(") { - return fmt.Errorf("RatholeCommand has wrong number of format verbs (expected 5 %%s): %q", p.config.Network.RatholeCommand) + // Validate that the TLS command template has exactly 5 %s format verbs + // (URL, CA cert, client cert, client key, client TOML). + if strings.Count(ratholeCmd, "%s") != 5 { + return fmt.Errorf("RatholeCommand must have exactly 5 %%s format verbs (url, ca, cert, key, toml); got %d in %q", + strings.Count(ratholeCmd, "%s"), p.config.Network.RatholeCommand) } + mainCmd = fmt.Sprintf(ratholeCmd, ratholeURL, caCrtB64, clientCrtB64, clientKeyB64, configB64) } else { // WebSocket fallback (no CA issuer configured) log.G(ctx).Debugf("RatholeCAIssuerName not set; using WebSocket transport for pod %s/%s", pod.Namespace, pod.Name) @@ -381,10 +384,13 @@ PersistentKeepalive = %d if ratholeWSCmd == "" { ratholeWSCmd = DefaultRatholeWSCommand } - mainCmd = fmt.Sprintf(ratholeWSCmd, ratholeURL, configB64) - if strings.Contains(mainCmd, "%!(") { - return fmt.Errorf("RatholeWSCommand has wrong number of format verbs (expected 2 %%s): %q", p.config.Network.RatholeWSCommand) + // Validate that the WebSocket command template has exactly 2 %s format verbs + // (URL, client TOML). + if strings.Count(ratholeWSCmd, "%s") != 2 { + return fmt.Errorf("RatholeWSCommand must have exactly 2 %%s format verbs (url, toml); got %d in %q", + strings.Count(ratholeWSCmd, "%s"), p.config.Network.RatholeWSCommand) } + mainCmd = fmt.Sprintf(ratholeWSCmd, ratholeURL, configB64) } // Remove any stale wstunnel annotation and set the rathole one diff --git a/pkg/virtualkubelet/virtualkubelet.go b/pkg/virtualkubelet/virtualkubelet.go index cd2b66ae..b75659e0 100644 --- a/pkg/virtualkubelet/virtualkubelet.go +++ b/pkg/virtualkubelet/virtualkubelet.go @@ -766,7 +766,7 @@ func (p *Provider) createDummyPod(ctx context.Context, originalPod *v1.Pod) (*v1 isSameNamespace := false if originalPod.Annotations != nil { - if val, ok := originalPod.Annotations["interlink.eu/shadow-same-ns"]; ok && val == annShadowSameNSValue { + if val, ok := originalPod.Annotations[annShadowSameNS]; ok && val == annShadowSameNSValue { isSameNamespace = true } } @@ -1753,7 +1753,7 @@ func (p *Provider) handleWstunnelCreation(ctx context.Context, pod *v1.Pod) (str // always targets the resources that were actually created. isSameNamespace := false if pod.Annotations != nil { - if val, ok := pod.Annotations["interlink.eu/shadow-same-ns"]; ok && val == annShadowSameNSValue { + if val, ok := pod.Annotations[annShadowSameNS]; ok && val == annShadowSameNSValue { isSameNamespace = true } } @@ -2061,7 +2061,7 @@ func (p *Provider) DeletePod(ctx context.Context, pod *v1.Pod) (err error) { isSameNamespace := false if pod.Annotations != nil { - if val, ok := pod.Annotations["interlink.eu/shadow-same-ns"]; ok && val == annShadowSameNSValue { + if val, ok := pod.Annotations[annShadowSameNS]; ok && val == annShadowSameNSValue { isSameNamespace = true } } From f8c73c6246817971ddcaa1e4a4e4cbf166033447 Mon Sep 17 00:00:00 2001 From: rocky Cloud User Date: Tue, 16 Jun 2026 16:45:30 +0200 Subject: [PATCH 10/14] test: add port-forwarding e2e test suite and vk-test-set integration Add a comprehensive test suite for the rathole tunnel port-forwarding feature introduced in PR #529. ## test/portforward/ (new) - Docker Compose two-network topology: nginx backend + 4 rathole containers (TCP and WebSocket modes) on isolated networks. - pytest suite (test_tunnel.py, 27 tests): verify TCP and WS tunnels forward HTTP traffic, multi-port isolation, and annotation TOML format. - pytest suite (test_tls_k8s.py, 10 tests): verify cert-manager Certificates, Traefik IngressRouteTCP, TLS secrets, pod annotation, and cleanup lifecycle. - conftest.py with Docker tunnel URL fixtures + Kubernetes API fixtures. - pyproject.toml with all dependencies (kubernetes>=28.0). ## test/vk-test-set (in submodule) - templates/130-port-forwarding.yaml: pod with containerPort 8080 running a Python HTTP server; validates Running phase + server startup log. - vktestset/port_forward_test.py (10 tests): end-to-end infrastructure test using a module-scoped fixture; verifies: * Pod reaches Running phase on VK node * interlink.eu/rathole-client-commands annotation is set * Shadow namespace ({namespace}-wstunnel) is created * Rathole server Deployment and Service are provisioned * cert-manager Certificates (server + client) reach Ready=True * TLS Secrets contain ca.crt, tls.crt, tls.key * Traefik IngressRouteTCP is created * Annotation encodes valid TOML with [client], remote_addr :443, TLS transport, and p8080 service ## plugins/slurm (submodule) - prepare.go: inject interlink.eu/rathole-client-commands into the SLURM job script prefix (mirrors existing wstunnel-client-commands handling) so the rathole client bootstrap command runs alongside the job. ## CI scripts - k3s-test-setup.sh: Traefik v3 CRDs, cert-manager, CA ClusterIssuer chain, interlink namespace, VK RBAC with cert-manager/traefik perms, VK config with Network.TunnelType=rathole and RatholeCAIssuerName, rathole compose. - k3s-test-run.sh: portforward pytest section (isolated venv). - k3s-test-cleanup.sh: TLS resource state capture + compose teardown. - integration-test-k3s.yaml: updated artifact paths. Signed-off-by: dciangot Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: rocky Cloud User --- .github/workflows/integration-test-k3s.yaml | 8 +- scripts/k3s-test-cleanup.sh | 36 ++ scripts/k3s-test-run.sh | 87 +++- scripts/k3s-test-setup.sh | 166 ++++++- test/portforward/.gitignore | 1 + test/portforward/README.md | 106 +++++ test/portforward/configs/client-tcp.toml | 14 + test/portforward/configs/client-ws.toml | 16 + test/portforward/configs/nginx.conf | 17 + test/portforward/configs/server-tcp.toml | 12 + test/portforward/configs/server-ws.toml | 16 + test/portforward/conftest.py | 163 +++++++ test/portforward/docker-compose.yml | 105 +++++ test/portforward/pyproject.toml | 21 + test/portforward/test_tls_k8s.py | 482 ++++++++++++++++++++ test/portforward/test_tunnel.py | 285 ++++++++++++ 16 files changed, 1522 insertions(+), 13 deletions(-) create mode 100644 test/portforward/.gitignore create mode 100644 test/portforward/README.md create mode 100644 test/portforward/configs/client-tcp.toml create mode 100644 test/portforward/configs/client-ws.toml create mode 100644 test/portforward/configs/nginx.conf create mode 100644 test/portforward/configs/server-tcp.toml create mode 100644 test/portforward/configs/server-ws.toml create mode 100644 test/portforward/conftest.py create mode 100644 test/portforward/docker-compose.yml create mode 100644 test/portforward/pyproject.toml create mode 100644 test/portforward/test_tls_k8s.py create mode 100644 test/portforward/test_tunnel.py diff --git a/.github/workflows/integration-test-k3s.yaml b/.github/workflows/integration-test-k3s.yaml index fede6102..90ec8813 100644 --- a/.github/workflows/integration-test-k3s.yaml +++ b/.github/workflows/integration-test-k3s.yaml @@ -37,7 +37,13 @@ jobs: uses: actions/upload-artifact@v4 with: name: e2e-test-logs - path: /tmp/interlink-test-* + path: | + /tmp/interlink-test-*/test-results.log + /tmp/interlink-test-*/portforward-test-results.log + /tmp/interlink-test-*/rathole-compose.log + /tmp/interlink-test-*/interlink-api.log + /tmp/interlink-test-*/interlink-plugin.log + /tmp/interlink-test-*/vk.log retention-days: 7 if-no-files-found: ignore diff --git a/scripts/k3s-test-cleanup.sh b/scripts/k3s-test-cleanup.sh index 76a98394..8c2e95ec 100755 --- a/scripts/k3s-test-cleanup.sh +++ b/scripts/k3s-test-cleanup.sh @@ -4,8 +4,32 @@ # Usage: ./scripts/k3s-test-cleanup.sh # Requirements: sudo access (for K3s uninstall) +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" + echo "=== Cleaning up interLink integration test environment ===" +# --------------------------------------------------------------------------- +# Stop rathole port-forwarding test environment (PR #529) +# Do this first to capture logs before any other teardown. +# --------------------------------------------------------------------------- +echo "" +echo "=== Stopping rathole tunnel test environment ===" +if [ -f /tmp/interlink-test-dir.txt ]; then + TEST_DIR=$(cat /tmp/interlink-test-dir.txt) + echo "Saving rathole container logs to ${TEST_DIR}..." + docker compose \ + -f "${PROJECT_ROOT}/test/portforward/docker-compose.yml" \ + --project-name interlink-portforward \ + logs 2>/dev/null > "${TEST_DIR}/rathole-compose.log" || true +fi + +docker compose \ + -f "${PROJECT_ROOT}/test/portforward/docker-compose.yml" \ + --project-name interlink-portforward \ + down --volumes 2>/dev/null || true +echo "✓ Rathole tunnel containers stopped" + # --------------------------------------------------------------------------- # Stop Virtual Kubelet host process # --------------------------------------------------------------------------- @@ -26,6 +50,18 @@ fi # Kill any remaining VK processes by binary name pkill -f "interlink-test.*vk$" 2>/dev/null || true +# --------------------------------------------------------------------------- +# Stop background log-streaming processes +# --------------------------------------------------------------------------- +if [ -f /tmp/interlink-test-dir.txt ]; then + TEST_DIR=$(cat /tmp/interlink-test-dir.txt) + echo "Saving k8s tunnel TLS resource state to ${TEST_DIR}..." + # Capture cert-manager Certificate status for debugging + kubectl get certificates --all-namespaces -o yaml > "${TEST_DIR}/cert-manager-certificates.yaml" 2>/dev/null || true + kubectl get clusterissuers -o yaml > "${TEST_DIR}/cert-manager-issuers.yaml" 2>/dev/null || true + kubectl get ingressroutetcps --all-namespaces -o yaml > "${TEST_DIR}/traefik-ingressroutetcps.yaml" 2>/dev/null || true +fi + # --------------------------------------------------------------------------- # Stop background log-streaming processes # --------------------------------------------------------------------------- diff --git a/scripts/k3s-test-run.sh b/scripts/k3s-test-run.sh index e5122171..af29944c 100755 --- a/scripts/k3s-test-run.sh +++ b/scripts/k3s-test-run.sh @@ -136,15 +136,86 @@ echo "=========================================" echo "" if [ ${TEST_EXIT_CODE} -eq 0 ]; then - echo "✓ All tests passed!" + echo "✓ All VK integration tests passed!" else - echo "✗ Some tests failed (exit code: ${TEST_EXIT_CODE})" - echo "" - echo "Check logs for details:" + echo "✗ Some VK integration tests failed (exit code: ${TEST_EXIT_CODE})" echo " - Test results: ${TEST_DIR}/test-results.log" - echo " - Plugin: ${TEST_DIR}/plugin.log" - echo " - interLink: ${TEST_DIR}/interlink.log" - echo " - Virtual Kubelet: kubectl logs -n interlink -l app=virtual-kubelet" + echo " - Plugin logs: ${TEST_DIR}/interlink-plugin.log" + echo " - API logs: ${TEST_DIR}/interlink-api.log" + echo " - VK logs: ${TEST_DIR}/vk.log" fi -exit ${TEST_EXIT_CODE} +# --------------------------------------------------------------------------- +# Port-forwarding network tests (rathole tunnel, PR #529) +# Tests the TCP and WebSocket tunnel backends independently of Kubernetes. +# The rathole containers were started by k3s-test-setup.sh; pytest connects +# to the exposed host ports (18080, 18082, 19090) to verify end-to-end flow. +# --------------------------------------------------------------------------- +echo "" +echo "=== Running port-forwarding network tests ===" +echo "=========================================" + +PF_DIR="${PROJECT_ROOT}/test/portforward" + +# Create an isolated venv so portforward deps don't interfere with vk-test-set. +python3 -m venv "${TEST_DIR}/.venv-portforward" +# shellcheck source=/dev/null +source "${TEST_DIR}/.venv-portforward/bin/activate" + +# Upgrade pip first, then install deps directly to avoid hatchling build-backend +# issues on older pip versions (Python 3.9 system pip on some distros). +pip install -q --upgrade pip +pip install -q pytest requests pytest-timeout "kubernetes>=28.0" || { + echo "ERROR: Failed to install portforward test dependencies" + deactivate + PF_EXIT_CODE=1 +} + +if [ "${PF_EXIT_CODE:-0}" -ne 1 ]; then + cd "${PF_DIR}" + + # Give rathole clients extra time to connect in the CI environment. + # The annotation-format and isolation tests run without Docker, so they + # pass even if the tunnel containers haven't finished handshaking yet. + TUNNEL_WAIT_TIMEOUT=90 \ + pytest -v \ + 2>&1 | tee "${TEST_DIR}/portforward-test-results.log" + PF_EXIT_CODE=${PIPESTATUS[0]} + + deactivate +fi + +echo "=========================================" +echo "" + +if [ "${PF_EXIT_CODE:-0}" -eq 0 ]; then + echo "✓ All port-forwarding tunnel tests passed!" +else + echo "✗ Some port-forwarding tunnel tests failed (exit code: ${PF_EXIT_CODE})" + echo " - Test results: ${TEST_DIR}/portforward-test-results.log" + echo " - Rathole server (TCP):" + docker compose \ + -f "${PROJECT_ROOT}/test/portforward/docker-compose.yml" \ + --project-name interlink-portforward \ + logs rathole-server-tcp 2>/dev/null | tail -30 || true + echo " - Rathole server (WS):" + docker compose \ + -f "${PROJECT_ROOT}/test/portforward/docker-compose.yml" \ + --project-name interlink-portforward \ + logs rathole-server-ws 2>/dev/null | tail -30 || true +fi + +# Combine both exit codes: fail if either test suite failed. +OVERALL_EXIT_CODE=0 +[ "${TEST_EXIT_CODE}" -ne 0 ] && OVERALL_EXIT_CODE="${TEST_EXIT_CODE}" +[ "${PF_EXIT_CODE:-0}" -ne 0 ] && OVERALL_EXIT_CODE="${PF_EXIT_CODE}" + +if [ "${OVERALL_EXIT_CODE}" -eq 0 ]; then + echo "✓ All test suites passed!" +else + echo "✗ One or more test suites failed" + echo " VK integration tests: $([ ${TEST_EXIT_CODE} -eq 0 ] && echo PASS || echo FAIL)" + echo " Port-forwarding tests: $([ ${PF_EXIT_CODE:-0} -eq 0 ] && echo PASS || echo FAIL)" +fi + +exit ${OVERALL_EXIT_CODE} diff --git a/scripts/k3s-test-setup.sh b/scripts/k3s-test-setup.sh index faa3e971..784cd78c 100755 --- a/scripts/k3s-test-setup.sh +++ b/scripts/k3s-test-setup.sh @@ -73,10 +73,113 @@ fi echo "✓ K3s is ready!" kubectl get nodes + +# --------------------------------------------------------------------------- +# Install Traefik v3 CRDs +# The VK uses traefik.io/v1alpha1 (Traefik v3 API) to create IngressRouteTCP +# resources. k3s 1.31 bundles Traefik v2 (different API group), so we install +# only the CRD definitions – no Traefik controller is needed for resource +# creation testing. +# --------------------------------------------------------------------------- +echo "" +echo "=== Installing Traefik v3 CRDs ===" +kubectl apply -f \ + https://raw.githubusercontent.com/traefik/traefik/v3.3.6/docs/content/reference/dynamic-configuration/kubernetes-crd-definition-v1.yml \ + 2>&1 | tee "${TEST_DIR}/traefik-crd-install.log" +echo "✓ Traefik v3 CRDs installed" + # --------------------------------------------------------------------------- -# Build Docker images from source +# Install cert-manager +# Required so the VK can create cert-manager Certificate CRs and have them +# actually issued as TLS secrets in the rathole TLS mode. # --------------------------------------------------------------------------- echo "" +echo "=== Installing cert-manager ===" +CERT_MANAGER_VERSION="${CERT_MANAGER_VERSION:-v1.16.2}" +kubectl apply -f \ + "https://github.com/cert-manager/cert-manager/releases/download/${CERT_MANAGER_VERSION}/cert-manager.yaml" \ + 2>&1 | tee "${TEST_DIR}/cert-manager-install.log" + +echo "Waiting for cert-manager deployments to be available..." +kubectl wait --for=condition=Available deployment/cert-manager \ + -n cert-manager --timeout=180s +kubectl wait --for=condition=Available deployment/cert-manager-webhook \ + -n cert-manager --timeout=180s +kubectl wait --for=condition=Available deployment/cert-manager-cainjector \ + -n cert-manager --timeout=180s +echo "✓ cert-manager is ready" + +# --------------------------------------------------------------------------- +# Create a self-signed CA ClusterIssuer for interLink tunnel TLS tests +# The two-step approach (bootstrap self-signed → CA cert → CA issuer) ensures +# that the issued certificate secrets contain a valid ca.crt field, which the +# VK checks before embedding certs in the pod annotation. +# --------------------------------------------------------------------------- +echo "" +echo "=== Creating interLink test CA ClusterIssuer ===" +kubectl apply -f - <<'CA_YAML' +--- +apiVersion: cert-manager.io/v1 +kind: ClusterIssuer +metadata: + name: interlink-selfsigned-bootstrap +spec: + selfSigned: {} +--- +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: interlink-test-ca + namespace: cert-manager +spec: + isCA: true + commonName: interlink-test-ca + secretName: interlink-test-ca-secret + duration: 87600h + privateKey: + algorithm: ECDSA + size: 256 + issuerRef: + name: interlink-selfsigned-bootstrap + kind: ClusterIssuer +--- +apiVersion: cert-manager.io/v1 +kind: ClusterIssuer +metadata: + name: interlink-test-ca +spec: + ca: + secretName: interlink-test-ca-secret +CA_YAML + +# Wait for the bootstrap CA certificate to be issued +echo "Waiting for interlink-test-ca certificate to be issued..." +for i in $(seq 1 30); do + if kubectl get secret interlink-test-ca-secret -n cert-manager &>/dev/null; then + echo "✓ interlink-test-ca secret is ready" + break + fi + echo " Waiting for CA secret... ($i/30)" + sleep 5 +done + +if ! kubectl get secret interlink-test-ca-secret -n cert-manager &>/dev/null; then + echo "WARNING: interlink-test-ca-secret was not issued in time – TLS tests may fail" + kubectl describe certificate interlink-test-ca -n cert-manager || true +fi + +# Wait for CA ClusterIssuer to become Ready +for i in $(seq 1 20); do + ISSUER_READY=$(kubectl get clusterissuer interlink-test-ca \ + -o jsonpath='{.status.conditions[?(@.type=="Ready")].status}' 2>/dev/null || echo "") + if [ "${ISSUER_READY}" = "True" ]; then + echo "✓ interlink-test-ca ClusterIssuer is Ready" + break + fi + echo " Waiting for ClusterIssuer to be ready... ($i/20)" + sleep 5 +done +echo "" echo "=== Building Docker images ===" echo "Building interLink API image..." @@ -229,6 +332,43 @@ docker logs -f interlink-plugin > "${TEST_DIR}/interlink-plugin.log" 2>&1 & echo $! > "${TEST_DIR}/plugin-log.pid" echo " Plugin logs streaming to: ${TEST_DIR}/interlink-plugin.log" +# --------------------------------------------------------------------------- +# Start rathole port-forwarding test environment (PR #529) +# Runs independently of Kubernetes: a standalone two-network Docker topology +# that verifies TCP and WebSocket tunnel connectivity end-to-end. +# --------------------------------------------------------------------------- +echo "" +echo "=== Starting rathole tunnel test environment ===" + +# Pre-pull images so that docker compose startup is fast and failures are clear. +echo " Pulling container images for port-forwarding test..." +docker pull rapiz1/rathole:v0.5.0 2>&1 | tail -3 || \ + echo "WARNING: Could not pre-pull rapiz1/rathole:v0.5.0 – compose will try on startup" +docker pull nginx:alpine 2>&1 | tail -3 || true + +docker compose \ + -f "${PROJECT_ROOT}/test/portforward/docker-compose.yml" \ + --project-name interlink-portforward \ + up -d 2>&1 | tee "${TEST_DIR}/rathole-compose-up.log" + +# Verify at least the rathole server containers are starting. +# They use restart:on-failure so transient connection errors auto-heal. +RATHOLE_RUNNING=$(docker compose \ + -f "${PROJECT_ROOT}/test/portforward/docker-compose.yml" \ + --project-name interlink-portforward \ + ps --services --filter status=running 2>/dev/null | wc -l) + +if [ "${RATHOLE_RUNNING}" -gt 0 ]; then + echo "✓ Rathole tunnel containers started (${RATHOLE_RUNNING} services running)" +else + echo "WARNING: No rathole containers are running yet – they may still be starting" +fi + +docker compose \ + -f "${PROJECT_ROOT}/test/portforward/docker-compose.yml" \ + --project-name interlink-portforward \ + ps + # --------------------------------------------------------------------------- # Start interLink API container # --------------------------------------------------------------------------- @@ -294,7 +434,7 @@ rules: verbs: ["update", "create", "get", "list", "watch", "patch"] - apiGroups: [""] resources: ["configmaps", "secrets", "services", "serviceaccounts", "namespaces"] - verbs: ["get", "list", "watch"] + verbs: ["get", "list", "watch", "create", "update", "patch", "delete"] - apiGroups: [""] resources: ["serviceaccounts/token"] verbs: ["create", "get", "list"] @@ -313,6 +453,18 @@ rules: - apiGroups: [""] resources: ["events"] verbs: ["create", "patch"] +- apiGroups: ["apps"] + resources: ["deployments"] + verbs: ["create", "get", "list", "watch", "update", "patch", "delete"] +- apiGroups: ["networking.k8s.io"] + resources: ["ingresses"] + verbs: ["create", "get", "list", "watch", "update", "patch", "delete"] +- apiGroups: ["cert-manager.io"] + resources: ["certificates"] + verbs: ["create", "get", "list", "watch", "update", "patch", "delete"] +- apiGroups: ["traefik.io"] + resources: ["ingressroutetcps"] + verbs: ["create", "get", "list", "watch", "update", "patch", "delete"] - apiGroups: ["certificates.k8s.io"] resources: ["certificatesigningrequests"] verbs: ["create", "get", "list", "watch", "delete"] @@ -339,8 +491,9 @@ subjects: YAML echo "✓ Service account and RBAC created" -# --------------------------------------------------------------------------- -# Build Virtual Kubelet binary +# Create the interlink namespace used by vk-test-set +kubectl create namespace interlink --dry-run=client -o yaml | kubectl apply -f - +echo "✓ interlink namespace ready" # --------------------------------------------------------------------------- echo "" echo "=== Building Virtual Kubelet binary ===" @@ -407,6 +560,11 @@ HTTP: Insecure: true KubeletHTTP: Insecure: true +Network: + EnableTunnel: true + TunnelType: "rathole" + WildcardDNS: "tunnel.test.local" + RatholeCAIssuerName: "interlink-test-ca" EOF # --------------------------------------------------------------------------- diff --git a/test/portforward/.gitignore b/test/portforward/.gitignore new file mode 100644 index 00000000..c18dd8d8 --- /dev/null +++ b/test/portforward/.gitignore @@ -0,0 +1 @@ +__pycache__/ diff --git a/test/portforward/README.md b/test/portforward/README.md new file mode 100644 index 00000000..8eea5295 --- /dev/null +++ b/test/portforward/README.md @@ -0,0 +1,106 @@ +# Port-Forwarding Tunnel Tests + +Integration tests for the rathole tunnel backend added in [PR #529](https://github.com/interlink-hq/interLink/pull/529), which abstracts the port-forwarding middleware to support `rathole` as an alternative to `wstunnel`. + +## What is tested + +| Test class | Transport | interLink code path | +|---|---|---| +| `TestTCPTunnel` | TCP (default) | `TunnelType="rathole"`, `RatholeCAIssuerName` set → TLS mode | +| `TestWebSocketTunnel` | WebSocket | `TunnelType="rathole"`, no CA issuer → `DefaultRatholeWSCommand` | +| `TestTCPMultiPort` | TCP | Multiple `[server.services.pNNNN]` entries in `rathole-template.yaml` | +| `TestNetworkIsolation` | — | Backend is unreachable except through the tunnel | +| `TestAnnotationCommandFormat` | — | Validates `DefaultRatholeWSCommand` / `DefaultRatholeCommand` format verbs | + +## Network topology + +``` +[remote network – isolated "HPC" side] + backend (nginx:alpine) + port 80 → "Hello from remote backend (port 80)" + port 9090 → "Metrics from remote backend (port 9090)" + +[cluster network – "Kubernetes" side] + rathole-server-tcp (TCP transport) → host:18080, host:19090 + rathole-server-ws (WS transport) → host:18082 + +[bridge: remote + cluster] + rathole-client-tcp (forwards backend:80 → server:8080, backend:9090 → server:9090) + rathole-client-ws (forwards backend:80 → server:8082 via WebSocket) +``` + +The backend has **no** ports exposed to the host, so the only path to reach it is through a rathole tunnel — matching the real interLink deployment where the remote service is inside the HPC network. + +## Prerequisites + +- Docker Engine ≥ 20.10 with the Compose plugin +- Python ≥ 3.8 + pip + +## Quickstart + +```bash +# 1. Start the test environment +docker compose up -d + +# 2. Install Python dependencies +pip install -e . +# or: pip install pytest requests pytest-timeout + +# 3. Run all tests +pytest -v + +# 4. Tear down +docker compose down -v +``` + +### Run a specific test class + +```bash +# TCP tunnel only +pytest -v test_tunnel.py::TestTCPTunnel + +# WebSocket tunnel only +pytest -v test_tunnel.py::TestWebSocketTunnel + +# Multi-port forwarding +pytest -v test_tunnel.py::TestTCPMultiPort + +# Annotation command format checks (no docker needed) +pytest -v test_tunnel.py::TestAnnotationCommandFormat +``` + +### Useful docker compose commands + +```bash +# Check container status +docker compose ps + +# Stream container logs (useful for debugging tunnel handshake) +docker compose logs -f + +# Restart a single service (e.g. to test tunnel reconnect) +docker compose restart rathole-client-tcp + +# Watch rathole server logs +docker compose logs -f rathole-server-tcp +``` + +## Environment variables + +| Variable | Default | Description | +|---|---|---| +| `TCP_HTTP_URL` | `http://localhost:18080` | TCP tunnel HTTP endpoint | +| `TCP_METRICS_URL` | `http://localhost:19090` | TCP tunnel secondary port | +| `WS_HTTP_URL` | `http://localhost:18082` | WebSocket tunnel HTTP endpoint | +| `TUNNEL_WAIT_TIMEOUT` | `60` | Seconds to wait for tunnels to become ready | + +## Troubleshooting + +**Tests time out waiting for tunnels** +Run `docker compose logs rathole-client-tcp` to see if the client is failing to connect. The client uses `restart: on-failure` so it will retry automatically once the server is up. Increase `TUNNEL_WAIT_TIMEOUT` if your machine is slow. + +**`rapiz1/rathole:v0.5.0` image not found** +Ensure Docker Hub access is available. The image is the same one used in `rathole-template.yaml`. + +**Port conflicts on 18080 / 18082 / 19090** +Override with environment variables before running `docker compose up -d`, or edit the `ports:` section in `docker-compose.yml`. diff --git a/test/portforward/configs/client-tcp.toml b/test/portforward/configs/client-tcp.toml new file mode 100644 index 00000000..fec1560e --- /dev/null +++ b/test/portforward/configs/client-tcp.toml @@ -0,0 +1,14 @@ +# Rathole client config – TCP transport mode. +# Mirrors the TOML embedded in the annotation written by +# addWstunnelClientAnnotation (tunnelTypeRathole, no CA issuer) but using +# direct TCP transport rather than WebSocket (the WebSocket variant is in client-ws.toml). +[client] +remote_addr = "rathole-server-tcp:2333" + +[client.services.http] +token = "interlink-test-token" +local_addr = "backend:80" + +[client.services.metrics] +token = "interlink-test-token" +local_addr = "backend:9090" diff --git a/test/portforward/configs/client-ws.toml b/test/portforward/configs/client-ws.toml new file mode 100644 index 00000000..9cfe0615 --- /dev/null +++ b/test/portforward/configs/client-ws.toml @@ -0,0 +1,16 @@ +# Rathole client config – WebSocket transport mode. +# Mirrors the TOML generated by addWstunnelClientAnnotation when TunnelType=="rathole" +# and RatholeCAIssuerName is empty (the DefaultRatholeWSCommand path). +# The client connects over WebSocket; in production it would go through nginx on port 80. +[client] +remote_addr = "rathole-server-ws:2334" + +[client.transport] +type = "websocket" + +[client.transport.websocket] +tls = false + +[client.services.http] +token = "interlink-test-token" +local_addr = "backend:80" diff --git a/test/portforward/configs/nginx.conf b/test/portforward/configs/nginx.conf new file mode 100644 index 00000000..006a5c84 --- /dev/null +++ b/test/portforward/configs/nginx.conf @@ -0,0 +1,17 @@ +server { + listen 80; + server_name _; + location / { + return 200 "Hello from remote backend (port 80)\n"; + add_header Content-Type text/plain; + } +} + +server { + listen 9090; + server_name _; + location / { + return 200 "Metrics from remote backend (port 9090)\n"; + add_header Content-Type text/plain; + } +} diff --git a/test/portforward/configs/server-tcp.toml b/test/portforward/configs/server-tcp.toml new file mode 100644 index 00000000..a522a94f --- /dev/null +++ b/test/portforward/configs/server-tcp.toml @@ -0,0 +1,12 @@ +# Rathole server config – TCP transport mode. +# Mirrors the ConfigMap rendered by pkg/virtualkubelet/templates/rathole-template.yaml. +[server] +bind_addr = "0.0.0.0:2333" + +[server.services.http] +token = "interlink-test-token" +bind_addr = "0.0.0.0:8080" + +[server.services.metrics] +token = "interlink-test-token" +bind_addr = "0.0.0.0:9090" diff --git a/test/portforward/configs/server-ws.toml b/test/portforward/configs/server-ws.toml new file mode 100644 index 00000000..9fbc2a0c --- /dev/null +++ b/test/portforward/configs/server-ws.toml @@ -0,0 +1,16 @@ +# Rathole server config – WebSocket transport mode. +# This is what interLink uses when RatholeCAIssuerName is NOT set (DefaultRatholeWSCommand path). +# In production a nginx Ingress proxies WebSocket connections to port 2334; here the +# client connects directly so the test needs no nginx sidecar. +[server] +bind_addr = "0.0.0.0:2334" + +[server.transport] +type = "websocket" + +[server.transport.websocket] +tls = false + +[server.services.http] +token = "interlink-test-token" +bind_addr = "0.0.0.0:8082" diff --git a/test/portforward/conftest.py b/test/portforward/conftest.py new file mode 100644 index 00000000..d36d9e70 --- /dev/null +++ b/test/portforward/conftest.py @@ -0,0 +1,163 @@ +""" +Pytest fixtures for the rathole port-forwarding integration tests. + +The tests assume that docker compose is running (docker compose up -d) before +pytest is invoked. Each fixture waits up to WAIT_TIMEOUT seconds for its +target URL to become reachable so that slow container start-ups don't cause +false failures. + +For Kubernetes TLS resource tests (test_tls_k8s.py) set KUBECONFIG before +running; those tests are skipped automatically when KUBECONFIG is absent. +""" + +import time +import os +import socket + +import pytest +import requests + +# ── tuneable constants ────────────────────────────────────────────────────── +WAIT_TIMEOUT = int(os.environ.get("TUNNEL_WAIT_TIMEOUT", "60")) +WAIT_INTERVAL = 1.0 + +TCP_HTTP_URL = os.environ.get("TCP_HTTP_URL", "http://localhost:18080") +TCP_METRICS_URL = os.environ.get("TCP_METRICS_URL", "http://localhost:19090") +WS_HTTP_URL = os.environ.get("WS_HTTP_URL", "http://localhost:18082") + + +# ── helpers ───────────────────────────────────────────────────────────────── + +def _wait_for_http(url: str, timeout: int = WAIT_TIMEOUT) -> None: + """ + Poll *url* until it returns a non-5xx HTTP response or *timeout* seconds + elapse, whichever comes first. + + Raises TimeoutError if the service does not become reachable in time. + """ + deadline = time.monotonic() + timeout + last_exc: Exception = RuntimeError("never tried") + + while time.monotonic() < deadline: + try: + resp = requests.get(url, timeout=3) + if resp.status_code < 500: + return + except (requests.exceptions.ConnectionError, + requests.exceptions.Timeout) as exc: + last_exc = exc + time.sleep(WAIT_INTERVAL) + + raise TimeoutError( + f"Service at {url!r} did not become reachable within {timeout}s " + f"(last error: {last_exc})" + ) + + +def _wait_for_tcp(host: str, port: int, timeout: int = WAIT_TIMEOUT) -> None: + """Wait until a raw TCP connection to host:port succeeds.""" + deadline = time.monotonic() + timeout + while time.monotonic() < deadline: + try: + with socket.create_connection((host, port), timeout=2): + return + except OSError: + time.sleep(WAIT_INTERVAL) + raise TimeoutError( + f"TCP port {host}:{port} did not open within {timeout}s" + ) + + +# ── session-scoped fixtures ────────────────────────────────────────────────── + +@pytest.fixture(scope="session") +def tcp_http_url() -> str: + """ + URL reachable through the TCP-mode rathole tunnel. + Points at the nginx backend running in the isolated "remote" network. + """ + _wait_for_http(TCP_HTTP_URL) + return TCP_HTTP_URL + + +@pytest.fixture(scope="session") +def tcp_metrics_url() -> str: + """ + URL reachable on the secondary port (9090) through the TCP-mode tunnel. + Tests that multi-port forwarding works correctly. + """ + _wait_for_http(TCP_METRICS_URL) + return TCP_METRICS_URL + + +@pytest.fixture(scope="session") +def ws_http_url() -> str: + """ + URL reachable through the WebSocket-mode rathole tunnel. + This exercises the DefaultRatholeWSCommand code path in interLink. + """ + _wait_for_http(WS_HTTP_URL) + return WS_HTTP_URL + + +@pytest.fixture(scope="session") +def http_session() -> requests.Session: + """A shared requests Session with a sensible default timeout.""" + session = requests.Session() + session.headers.update({"User-Agent": "interlink-portforward-test/1.0"}) + return session + + +# ── Kubernetes fixtures (used by test_tls_k8s.py) ──────────────────────────── + +@pytest.fixture(scope="session") +def k8s_core_v1(): + """ + kubernetes.client.CoreV1Api instance. + Skips tests in the current session if KUBECONFIG is not set. + """ + kubeconfig = os.environ.get("KUBECONFIG") + if not kubeconfig: + pytest.skip("KUBECONFIG not set") + try: + from kubernetes import client as k8s_client + from kubernetes import config as k8s_config + k8s_config.load_kube_config(kubeconfig) + return k8s_client.CoreV1Api() + except Exception as exc: # noqa: BLE001 + pytest.skip(f"Cannot initialise Kubernetes client: {exc}") + + +@pytest.fixture(scope="session") +def k8s_custom_api(): + """ + kubernetes.client.CustomObjectsApi instance. + Skips tests in the current session if KUBECONFIG is not set. + """ + kubeconfig = os.environ.get("KUBECONFIG") + if not kubeconfig: + pytest.skip("KUBECONFIG not set") + try: + from kubernetes import client as k8s_client + from kubernetes import config as k8s_config + k8s_config.load_kube_config(kubeconfig) + return k8s_client.CustomObjectsApi() + except Exception as exc: # noqa: BLE001 + pytest.skip(f"Cannot initialise Kubernetes client: {exc}") + + +@pytest.fixture(scope="session") +def vk_node_name(k8s_core_v1): + """ + Returns the name of the virtual-kubelet node, or skips if not found. + """ + try: + nodes = k8s_core_v1.list_node( + field_selector="metadata.name=virtual-kubelet" + ).items + except Exception as exc: # noqa: BLE001 + pytest.skip(f"Cannot list Kubernetes nodes: {exc}") + if not nodes: + pytest.skip("virtual-kubelet node not found in cluster") + return nodes[0].metadata.name + diff --git a/test/portforward/docker-compose.yml b/test/portforward/docker-compose.yml new file mode 100644 index 00000000..3c9b8b9b --- /dev/null +++ b/test/portforward/docker-compose.yml @@ -0,0 +1,105 @@ +# Test environment simulating the rathole port-forwarding tunnel introduced in PR #529. +# +# Topology: +# +# [remote network] [cluster network] +# backend (nginx) <-- rathole-client-tcp --> rathole-server-tcp --> host:18080, host:19090 +# backend (nginx) <-- rathole-client-ws --> rathole-server-ws --> host:18082 +# +# The "remote" network is isolated (simulates the HPC/compute side). +# The "cluster" network is where the rathole servers run (simulates the Kubernetes side). +# Rathole clients bridge both networks, mirroring how interLink's VK deploys the tunnel. +# +# Usage: +# docker compose up -d +# pytest -v +# docker compose down -v + +networks: + # Simulates the isolated remote/HPC side. Only the backend and rathole clients can see it. + remote: + driver: bridge + # Simulates the Kubernetes cluster side. Rathole servers and clients share this. + cluster: + driver: bridge + +services: + + # ── Backend ──────────────────────────────────────────────────────────────── + # Represents a service running on the remote/HPC node. + # It is intentionally NOT connected to the cluster network so that the only + # path from the cluster side is through the rathole tunnel. + backend: + image: nginx:alpine + networks: + - remote + volumes: + - ./configs/nginx.conf:/etc/nginx/conf.d/default.conf:ro + healthcheck: + test: ["CMD", "wget", "-qO-", "http://127.0.0.1/"] + interval: 3s + timeout: 3s + retries: 10 + + # ── TCP mode ─────────────────────────────────────────────────────────────── + # rathole server: runs in the "cluster", exposes forwarded ports to the host + # so that pytest can reach them. + rathole-server-tcp: + image: rapiz1/rathole:v0.5.0 + networks: + - cluster + volumes: + - ./configs/server-tcp.toml:/etc/rathole/server.toml:ro + command: ["--server", "/etc/rathole/server.toml"] + ports: + - "18080:8080" # HTTP port forwarded from backend:80 + - "19090:9090" # Secondary port forwarded from backend:9090 + restart: on-failure + + # rathole client: bridges "remote" and "cluster", connects to server and + # exposes backend ports as if they were local. + rathole-client-tcp: + image: rapiz1/rathole:v0.5.0 + networks: + - remote + - cluster + volumes: + - ./configs/client-tcp.toml:/etc/rathole/client.toml:ro + command: ["--client", "/etc/rathole/client.toml"] + depends_on: + backend: + condition: service_healthy + rathole-server-tcp: + condition: service_started + restart: on-failure + + # ── WebSocket mode ───────────────────────────────────────────────────────── + # Same topology as TCP mode but uses WebSocket transport, which is what + # interLink uses when RatholeCAIssuerName is not set (no cert-manager TLS). + # In production an nginx Ingress proxies the WebSocket connection; here the + # client connects to the rathole server directly to keep the test self-contained. + rathole-server-ws: + image: rapiz1/rathole:v0.5.0 + networks: + - cluster + volumes: + - ./configs/server-ws.toml:/etc/rathole/server.toml:ro + command: ["--server", "/etc/rathole/server.toml"] + ports: + - "18082:8082" # HTTP port forwarded from backend:80 via WebSocket tunnel + restart: on-failure + + rathole-client-ws: + image: rapiz1/rathole:v0.5.0 + networks: + - remote + - cluster + volumes: + - ./configs/client-ws.toml:/etc/rathole/client.toml:ro + command: ["--client", "/etc/rathole/client.toml"] + depends_on: + backend: + condition: service_healthy + rathole-server-ws: + condition: service_started + restart: on-failure diff --git a/test/portforward/pyproject.toml b/test/portforward/pyproject.toml new file mode 100644 index 00000000..54233133 --- /dev/null +++ b/test/portforward/pyproject.toml @@ -0,0 +1,21 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "interlink-portforward-tests" +version = "0.1.0" +description = "Integration tests for the rathole port-forwarding tunnel (PR #529)" +requires-python = ">=3.8" +dependencies = [ + "pytest>=7.4", + "requests>=2.28", + "pytest-timeout>=2.2", + "kubernetes>=28.0", +] + +[tool.pytest.ini_options] +testpaths = ["."] +timeout = 120 +log_cli = true +log_cli_level = "INFO" diff --git a/test/portforward/test_tls_k8s.py b/test/portforward/test_tls_k8s.py new file mode 100644 index 00000000..f38cd60e --- /dev/null +++ b/test/portforward/test_tls_k8s.py @@ -0,0 +1,482 @@ +""" +Kubernetes-side integration tests for the rathole TLS tunnel resources. + +These tests verify that the Virtual Kubelet (configured with TunnelType=rathole +and RatholeCAIssuerName=interlink-test-ca) correctly creates: + - cert-manager Certificate CRs (server + client) + - Traefik IngressRouteTCP + - TLS secrets issued by cert-manager + - pod annotation with the rathole bootstrap command + +Prerequisites (set up by k3s-test-setup.sh): + - cert-manager installed and running + - interlink-test-ca ClusterIssuer ready (self-signed CA chain) + - Traefik v3 CRDs installed (traefik.io/v1alpha1) + - Virtual Kubelet running with Network.TunnelType=rathole and + Network.RatholeCAIssuerName=interlink-test-ca + +Automatically skipped when KUBECONFIG is not set. +""" + +import base64 +import os +import time +import uuid + +import pytest + +# ── helpers ───────────────────────────────────────────────────────────────── + +def _wait_for(fn, timeout: int = 120, interval: float = 3.0, label: str = "resource"): + """ + Poll *fn()* until it returns a truthy value. The function must not raise + (use try/except inside) – a falsy return triggers the next retry. + Raises TimeoutError if *timeout* seconds elapse without success. + """ + deadline = time.monotonic() + timeout + last_exc: Exception = RuntimeError("never tried") + while time.monotonic() < deadline: + try: + result = fn() + if result: + return result + except Exception as exc: # noqa: BLE001 + last_exc = exc + time.sleep(interval) + raise TimeoutError( + f"Timed out after {timeout}s waiting for {label!r}. Last error: {last_exc}" + ) + + +# ── fixtures ───────────────────────────────────────────────────────────────── + +@pytest.fixture(scope="module") +def k8s_apis(request): + """ + Returns a dict of Kubernetes API clients. + Skips the entire module if KUBECONFIG is not set or the VK node is absent. + """ + kubeconfig = os.environ.get("KUBECONFIG") + if not kubeconfig: + pytest.skip("KUBECONFIG not set – skipping Kubernetes TLS resource tests") + + try: + from kubernetes import client as k8s_client + from kubernetes import config as k8s_config + k8s_config.load_kube_config(kubeconfig) + except Exception as exc: # noqa: BLE001 + pytest.skip(f"Cannot load kubeconfig: {exc}") + + core = k8s_client.CoreV1Api() + apps = k8s_client.AppsV1Api() + custom = k8s_client.CustomObjectsApi() + networking = k8s_client.NetworkingV1Api() + + # Confirm the virtual-kubelet node is present + try: + nodes = core.list_node(field_selector="metadata.name=virtual-kubelet").items + except Exception as exc: # noqa: BLE001 + pytest.skip(f"Cannot list nodes: {exc}") + if not nodes: + pytest.skip("virtual-kubelet node not found – skipping Kubernetes TLS resource tests") + + return {"core": core, "apps": apps, "custom": custom, "networking": networking} + + +@pytest.fixture(scope="module") +def tunnel_pod(k8s_apis): + """ + Creates a short-lived test pod on the virtual-kubelet node with a TCP + containerPort so the VK triggers tunnel TLS resource creation. + The pod is deleted after the module's tests finish. + """ + core = k8s_apis["core"] + + pod_name = f"tls-test-{uuid.uuid4().hex[:6]}" + namespace = "default" + + pod_manifest = { + "apiVersion": "v1", + "kind": "Pod", + "metadata": { + "name": pod_name, + "namespace": namespace, + "annotations": { + # Give the VK more time to pull the rathole image before + # declaring the shadow pod IP wait a failure. + "interlink.virtual-kubelet.io/wstunnel-timeout": "120s", + }, + }, + "spec": { + "nodeName": "virtual-kubelet", + "tolerations": [ + { + "key": "virtual-node.interlink/no-schedule", + "operator": "Exists", + "effect": "NoSchedule", + } + ], + "containers": [ + { + "name": "service", + "image": "nginx:alpine", + "ports": [ + {"containerPort": 8080, "protocol": "TCP"}, + ], + } + ], + # Prevent the pod from ever re-starting – we only need it to + # trigger VK tunnel creation, not actually complete a job. + "restartPolicy": "Never", + }, + } + + from kubernetes import client as k8s_client + + pod = core.create_namespaced_pod(namespace=namespace, body=pod_manifest) + yield pod + + # Teardown: delete the pod (best-effort) + try: + core.delete_namespaced_pod( + pod_name, + namespace, + body=k8s_client.V1DeleteOptions(grace_period_seconds=0), + ) + except Exception: # noqa: BLE001 + pass + + +# ── derived names ───────────────────────────────────────────────────────────── + +def _resource_names(pod_name: str, pod_ns: str = "default"): + """ + Replicate the Go name-computation logic from computeWstunnelResourceNames. + Returns (shadow_namespace, resource_base_name). + """ + import re + + def _sanitize(s: str) -> str: + s = s.lower() + s = re.sub(r"[^a-z0-9]", "-", s) + s = s.strip("-") + while "--" in s: + s = s.replace("--", "-") + return s[:63].rstrip("-") or "default" + + sanitized_ns = _sanitize(pod_ns) + sanitized_name = _sanitize(pod_name) + + shadow_ns = (sanitized_ns + "-wstunnel")[:63].rstrip("-") + base = (sanitized_name + "-" + sanitized_ns)[:63].rstrip("-") + return shadow_ns, base + + +# ── tests ───────────────────────────────────────────────────────────────────── + +class TestRatholeTLSResourceCreation: + """ + Verify the VK creates the correct Kubernetes resources when a pod with + exposed TCP ports is scheduled in rathole TLS mode. + """ + + def test_shadow_namespace_created(self, k8s_apis, tunnel_pod): + """VK creates the -wstunnel shadow namespace.""" + core = k8s_apis["core"] + pod_name = tunnel_pod.metadata.name + shadow_ns, _ = _resource_names(pod_name) + + def _check(): + namespaces = [ns.metadata.name for ns in core.list_namespace().items] + return shadow_ns in namespaces + + _wait_for(_check, timeout=120, label=f"namespace/{shadow_ns}") + namespaces = [ns.metadata.name for ns in core.list_namespace().items] + assert shadow_ns in namespaces, f"Shadow namespace {shadow_ns!r} not found" + + def test_rathole_deployment_created(self, k8s_apis, tunnel_pod): + """VK creates a rathole Deployment in the shadow namespace.""" + apps = k8s_apis["apps"] + pod_name = tunnel_pod.metadata.name + shadow_ns, base_name = _resource_names(pod_name) + + def _check(): + try: + apps.read_namespaced_deployment(base_name, shadow_ns) + return True + except Exception: + return False + + _wait_for(_check, timeout=120, label=f"deployment/{shadow_ns}/{base_name}") + dep = apps.read_namespaced_deployment(base_name, shadow_ns) + assert dep.metadata.name == base_name + + def test_server_certificate_created(self, k8s_apis, tunnel_pod): + """VK creates a cert-manager Certificate for the rathole server TLS.""" + custom = k8s_apis["custom"] + pod_name = tunnel_pod.metadata.name + shadow_ns, base_name = _resource_names(pod_name) + cert_name = f"{base_name}-rathole-server-tls" + + def _check(): + try: + custom.get_namespaced_custom_object( + group="cert-manager.io", + version="v1", + namespace=shadow_ns, + plural="certificates", + name=cert_name, + ) + return True + except Exception: + return False + + _wait_for(_check, timeout=120, label=f"Certificate/{shadow_ns}/{cert_name}") + cert = custom.get_namespaced_custom_object( + group="cert-manager.io", version="v1", + namespace=shadow_ns, plural="certificates", name=cert_name, + ) + assert cert["metadata"]["name"] == cert_name + # Must reference the test CA issuer + assert cert["spec"]["issuerRef"]["name"] == "interlink-test-ca" + assert cert["spec"]["secretName"] == cert_name + # DNS name must contain the rathole hostname pattern + assert any("rathole" in dns for dns in cert["spec"].get("dnsNames", [])) + + def test_client_certificate_created(self, k8s_apis, tunnel_pod): + """VK creates a cert-manager Certificate for the rathole client.""" + custom = k8s_apis["custom"] + pod_name = tunnel_pod.metadata.name + shadow_ns, base_name = _resource_names(pod_name) + cert_name = f"{base_name}-rathole-client-tls" + + def _check(): + try: + custom.get_namespaced_custom_object( + group="cert-manager.io", version="v1", + namespace=shadow_ns, plural="certificates", name=cert_name, + ) + return True + except Exception: + return False + + _wait_for(_check, timeout=120, label=f"Certificate/{shadow_ns}/{cert_name}") + cert = custom.get_namespaced_custom_object( + group="cert-manager.io", version="v1", + namespace=shadow_ns, plural="certificates", name=cert_name, + ) + assert cert["metadata"]["name"] == cert_name + assert cert["spec"]["issuerRef"]["name"] == "interlink-test-ca" + # Must request client auth usage + assert "client auth" in cert["spec"].get("usages", []) + + def test_ingress_route_tcp_created(self, k8s_apis, tunnel_pod): + """VK creates a Traefik IngressRouteTCP for TLS termination.""" + custom = k8s_apis["custom"] + pod_name = tunnel_pod.metadata.name + shadow_ns, base_name = _resource_names(pod_name) + + def _check(): + try: + custom.get_namespaced_custom_object( + group="traefik.io", version="v1alpha1", + namespace=shadow_ns, plural="ingressroutetcps", name=base_name, + ) + return True + except Exception: + return False + + _wait_for(_check, timeout=120, label=f"IngressRouteTCP/{shadow_ns}/{base_name}") + route = custom.get_namespaced_custom_object( + group="traefik.io", version="v1alpha1", + namespace=shadow_ns, plural="ingressroutetcps", name=base_name, + ) + spec = route["spec"] + # Must use the websecure entrypoint + assert "websecure" in spec.get("entryPoints", []) + # TLS secretName must match the server certificate + assert spec["tls"]["secretName"] == f"{base_name}-rathole-server-tls" + # Route must have a HostSNI match rule + routes = spec.get("routes", []) + assert len(routes) > 0 + assert "HostSNI" in routes[0]["match"] + # Service name must match the shadow deployment + services = routes[0].get("services", []) + assert any(s["name"] == base_name for s in services) + + def test_server_tls_secret_issued(self, k8s_apis, tunnel_pod): + """cert-manager issues the server TLS secret with all required keys.""" + core = k8s_apis["core"] + pod_name = tunnel_pod.metadata.name + shadow_ns, base_name = _resource_names(pod_name) + secret_name = f"{base_name}-rathole-server-tls" + + def _check(): + try: + secret = core.read_namespaced_secret(secret_name, shadow_ns) + data = secret.data or {} + return all(k in data for k in ("tls.crt", "tls.key", "ca.crt")) + except Exception: + return False + + _wait_for(_check, timeout=180, label=f"Secret/{shadow_ns}/{secret_name}") + secret = core.read_namespaced_secret(secret_name, shadow_ns) + data = secret.data + assert "tls.crt" in data and len(data["tls.crt"]) > 0 + assert "tls.key" in data and len(data["tls.key"]) > 0 + assert "ca.crt" in data and len(data["ca.crt"]) > 0 + # Validate tls.crt is valid base64 containing a PEM certificate + decoded = base64.b64decode(data["tls.crt"]) + assert b"-----BEGIN CERTIFICATE-----" in decoded + + def test_client_tls_secret_issued(self, k8s_apis, tunnel_pod): + """cert-manager issues the client TLS secret with all required keys.""" + core = k8s_apis["core"] + pod_name = tunnel_pod.metadata.name + shadow_ns, base_name = _resource_names(pod_name) + secret_name = f"{base_name}-rathole-client-tls" + + def _check(): + try: + secret = core.read_namespaced_secret(secret_name, shadow_ns) + data = secret.data or {} + return all(k in data for k in ("tls.crt", "tls.key", "ca.crt")) + except Exception: + return False + + _wait_for(_check, timeout=180, label=f"Secret/{shadow_ns}/{secret_name}") + secret = core.read_namespaced_secret(secret_name, shadow_ns) + data = secret.data + assert "tls.crt" in data and len(data["tls.crt"]) > 0 + assert "tls.key" in data and len(data["tls.key"]) > 0 + assert "ca.crt" in data and len(data["ca.crt"]) > 0 + + def test_pod_annotation_set(self, k8s_apis, tunnel_pod): + """ + After cert-manager issues the client TLS secret, the VK patches the pod + with interlink.eu/rathole-client-commands containing the bootstrap script. + """ + core = k8s_apis["core"] + pod_name = tunnel_pod.metadata.name + + def _check(): + pod = core.read_namespaced_pod(pod_name, "default") + ann = (pod.metadata.annotations or {}).get("interlink.eu/rathole-client-commands", "") + return ann.strip() != "" + + _wait_for(_check, timeout=300, label=f"pod/{pod_name} rathole annotation") + pod = core.read_namespaced_pod(pod_name, "default") + annotation = pod.metadata.annotations["interlink.eu/rathole-client-commands"] + + # The bootstrap command downloads and runs rathole with TLS certs + assert "rathole" in annotation.lower() + # Must contain base64-decoded cert file writes + assert "base64" in annotation + assert "rathole-ca.crt" in annotation + assert "rathole-client.crt" in annotation + assert "rathole-client.key" in annotation + assert "rathole-client.toml" in annotation + # Must background rathole + assert annotation.rstrip().endswith("&") + + def test_annotation_toml_has_correct_host(self, k8s_apis, tunnel_pod): + """ + The embedded base64 client TOML references the correct rathole server + hostname derived from WildcardDNS (tunnel.test.local) and the resource name. + """ + import re + import base64 as b64lib + + core = k8s_apis["core"] + pod_name = tunnel_pod.metadata.name + _, base_name = _resource_names(pod_name) + expected_host = f"rathole-{base_name}.tunnel.test.local" + + pod = core.read_namespaced_pod(pod_name, "default") + annotation = pod.metadata.annotations.get("interlink.eu/rathole-client-commands", "") + + # The hostname lives inside the base64-encoded TOML blob written by the + # bootstrap command ("echo | base64 -d > /tmp/rathole-client.toml"). + # Decode every such blob and search for the expected hostname there. + decoded_parts = annotation + for blob in re.findall(r'echo\s+([A-Za-z0-9+/=]+)\s*\|', annotation): + try: + decoded_parts += b64lib.b64decode(blob).decode("utf-8", errors="ignore") + except Exception: + pass + + assert expected_host in decoded_parts or base_name in decoded_parts, ( + f"Expected {expected_host!r} or {base_name!r} not found in annotation " + f"(checked plain text and all base64-decoded blobs)" + ) + + +class TestRatholeTLSCleanup: + """ + Verify that deleting the pod causes the VK to remove all tunnel TLS resources. + This test must run AFTER TestRatholeTLSResourceCreation and reuses the same + pod name (derived from tunnel_pod fixture, module-scoped). + """ + + def test_resources_cleaned_up_on_pod_delete(self, k8s_apis, tunnel_pod): + """ + After the test pod is deleted (fixture teardown runs at end of module), + trigger deletion here and check that the shadow namespace resources + are removed. + """ + from kubernetes import client as k8s_client + + core = k8s_apis["core"] + apps = k8s_apis["apps"] + custom = k8s_apis["custom"] + pod_name = tunnel_pod.metadata.name + shadow_ns, base_name = _resource_names(pod_name) + + # Delete the pod explicitly now (fixture will also try, harmlessly) + try: + core.delete_namespaced_pod( + pod_name, + "default", + body=k8s_client.V1DeleteOptions(grace_period_seconds=0), + ) + except Exception: + pass + + # Wait for Deployment to be gone + def _dep_gone(): + try: + apps.read_namespaced_deployment(base_name, shadow_ns) + return False + except Exception: + return True + + _wait_for(_dep_gone, timeout=120, label=f"deployment/{shadow_ns}/{base_name} deletion") + + # Verify Certificate CRs are removed (poll — API may lag behind deletion) + for cert_suffix in ("rathole-server-tls", "rathole-client-tls"): + cert_name = f"{base_name}-{cert_suffix}" + + def _cert_gone(cn=cert_name): + try: + custom.get_namespaced_custom_object( + group="cert-manager.io", version="v1", + namespace=shadow_ns, plural="certificates", name=cn, + ) + return False + except Exception: + return True + + _wait_for(_cert_gone, timeout=60, label=f"Certificate/{shadow_ns}/{cert_name} deletion") + + # Verify IngressRouteTCP is removed + def _route_gone(): + try: + custom.get_namespaced_custom_object( + group="traefik.io", version="v1alpha1", + namespace=shadow_ns, plural="ingressroutetcps", name=base_name, + ) + return False + except Exception: + return True + + _wait_for(_route_gone, timeout=60, label=f"IngressRouteTCP/{shadow_ns}/{base_name} deletion") diff --git a/test/portforward/test_tunnel.py b/test/portforward/test_tunnel.py new file mode 100644 index 00000000..67810e8b --- /dev/null +++ b/test/portforward/test_tunnel.py @@ -0,0 +1,285 @@ +""" +Port-forwarding integration tests for the rathole tunnel backend introduced +in interLink PR #529. + +These tests verify that traffic flows correctly through the rathole tunnel in +both transport modes: + - TCP mode → mirrors TunnelType=="rathole" with RatholeCAIssuerName set + - WebSocket mode → mirrors TunnelType=="rathole" with no CA issuer + (DefaultRatholeWSCommand path) + +Prerequisites: + docker compose up -d # bring up the test environment + pytest -v # run tests + docker compose down -v # tear down + +Environment variables (all optional): + TCP_HTTP_URL (default http://localhost:18080) + TCP_METRICS_URL (default http://localhost:19090) + WS_HTTP_URL (default http://localhost:18082) + TUNNEL_WAIT_TIMEOUT seconds to wait for tunnels to initialise (default 60) +""" + +import threading +import time + +import pytest +import requests + + +# ── TCP transport mode ─────────────────────────────────────────────────────── + +class TestTCPTunnel: + """ + TCP-mode rathole tunnel: rathole server + client using default TCP transport. + Exercises the annotation-building path in addWstunnelClientAnnotation when + TunnelType=="rathole" and RatholeCAIssuerName is configured. + """ + + def test_http_connectivity(self, tcp_http_url, http_session): + """Traffic reaches the backend nginx through the TCP tunnel.""" + resp = http_session.get(tcp_http_url, timeout=5) + assert resp.status_code == 200 + assert "remote backend" in resp.text + + def test_response_contains_expected_port(self, tcp_http_url, http_session): + """Backend response identifies the correct port (80) was reached.""" + resp = http_session.get(tcp_http_url, timeout=5) + assert "port 80" in resp.text + + def test_http_response_headers(self, tcp_http_url, http_session): + """Server header is present (proves nginx is the origin, not a proxy error).""" + resp = http_session.get(tcp_http_url, timeout=5) + assert "server" in resp.headers + assert resp.headers["server"].lower().startswith("nginx") + + def test_multiple_sequential_requests(self, tcp_http_url, http_session): + """Multiple sequential requests all succeed (connection re-use / keep-alive).""" + for _ in range(5): + resp = http_session.get(tcp_http_url, timeout=5) + assert resp.status_code == 200 + + def test_concurrent_requests(self, tcp_http_url): + """Concurrent requests are all served correctly (no connection starvation).""" + results: list[int] = [] + errors: list[str] = [] + + def do_request() -> None: + try: + resp = requests.get(tcp_http_url, timeout=10) + results.append(resp.status_code) + except Exception as exc: # noqa: BLE001 + errors.append(str(exc)) + + threads = [threading.Thread(target=do_request) for _ in range(10)] + for t in threads: + t.start() + for t in threads: + t.join() + + assert not errors, f"Concurrent requests produced errors: {errors}" + assert all(s == 200 for s in results), f"Non-200 responses: {results}" + + def test_large_payload_passthrough(self, tcp_http_url, http_session): + """ + Verify that the tunnel doesn't truncate responses. + We request a URL that returns a known-size response and check the length. + (nginx returns the full plain-text string; no truncation expected.) + """ + resp = http_session.get(tcp_http_url, timeout=10) + assert resp.status_code == 200 + # Must contain at least our marker string + assert len(resp.text) > 0 + + +# ── Multi-port forwarding (TCP mode) ───────────────────────────────────────── + +class TestTCPMultiPort: + """ + Tests that rathole correctly forwards multiple ports simultaneously, + matching the template in rathole-template.yaml which can list multiple + [server.services.pNNNN] entries. + """ + + def test_secondary_port_reachable(self, tcp_metrics_url, http_session): + """The secondary forwarded port (9090) is independently reachable.""" + resp = http_session.get(tcp_metrics_url, timeout=5) + assert resp.status_code == 200 + assert "Metrics" in resp.text + + def test_secondary_port_has_correct_content(self, tcp_metrics_url, http_session): + """Content on port 9090 differs from port 80 – proves separate endpoints.""" + resp = http_session.get(tcp_metrics_url, timeout=5) + assert "port 9090" in resp.text + + def test_both_ports_independent(self, tcp_http_url, tcp_metrics_url, http_session): + """Requests to both forwarded ports succeed in the same test.""" + http_resp = http_session.get(tcp_http_url, timeout=5) + metrics_resp = http_session.get(tcp_metrics_url, timeout=5) + assert http_resp.status_code == 200 + assert metrics_resp.status_code == 200 + # Content should differ between the two ports + assert http_resp.text != metrics_resp.text + + +# ── WebSocket transport mode ────────────────────────────────────────────────── + +class TestWebSocketTunnel: + """ + WebSocket-mode rathole tunnel: mirrors the DefaultRatholeWSCommand path in + addWstunnelClientAnnotation (TunnelType=="rathole", RatholeCAIssuerName=""). + + The rathole server and client use [transport] type = "websocket" which is + what interLink injects into the client TOML annotation. + """ + + def test_http_connectivity(self, ws_http_url, http_session): + """Traffic reaches the backend nginx through the WebSocket tunnel.""" + resp = http_session.get(ws_http_url, timeout=5) + assert resp.status_code == 200 + assert "remote backend" in resp.text + + def test_response_contains_expected_port(self, ws_http_url, http_session): + """Backend response identifies the correct port (80) was reached.""" + resp = http_session.get(ws_http_url, timeout=5) + assert "port 80" in resp.text + + def test_multiple_sequential_requests(self, ws_http_url, http_session): + """WebSocket tunnel handles sequential requests without session errors.""" + for _ in range(5): + resp = http_session.get(ws_http_url, timeout=5) + assert resp.status_code == 200 + + def test_concurrent_requests(self, ws_http_url): + """WebSocket tunnel handles concurrent requests.""" + results: list[int] = [] + errors: list[str] = [] + + def do_request() -> None: + try: + resp = requests.get(ws_http_url, timeout=10) + results.append(resp.status_code) + except Exception as exc: # noqa: BLE001 + errors.append(str(exc)) + + threads = [threading.Thread(target=do_request) for _ in range(8)] + for t in threads: + t.start() + for t in threads: + t.join() + + assert not errors, f"Concurrent WS-tunnel requests produced errors: {errors}" + assert all(s == 200 for s in results), f"Non-200 responses: {results}" + + +# ── Network isolation ───────────────────────────────────────────────────────── + +class TestNetworkIsolation: + """ + Verify that the backend is NOT directly reachable from the "cluster" side + (i.e., from the host where pytest runs). Traffic MUST go through a tunnel. + + These tests are important because the docker-compose intentionally places + the backend on an isolated "remote" network. + """ + + def test_backend_not_directly_reachable_on_80(self): + """ + Port 80 is not exposed directly to the host – the backend is only + reachable through the forwarded ports (18080, 18082). + If something else is listening on 80 it must NOT be our backend. + """ + try: + resp = requests.get("http://localhost:80", timeout=2) + # Something is listening, but it must not be our backend nginx + assert "remote backend" not in resp.text, ( + "Backend content is directly accessible on port 80 – " + "docker-compose.yml isolation is broken" + ) + except requests.exceptions.ConnectionError: + pass # Nothing on port 80 – expected when compose is running + + def test_backend_not_directly_reachable_on_9090(self): + """Port 9090 is not exposed directly to the host.""" + try: + resp = requests.get("http://localhost:9090", timeout=2) + assert "remote backend" not in resp.text, ( + "Backend content is directly accessible on port 9090 – " + "docker-compose.yml isolation is broken" + ) + except requests.exceptions.ConnectionError: + pass + + +# ── Annotation command smoke tests ──────────────────────────────────────────── + +class TestAnnotationCommandFormat: + """ + Unit-style checks that verify the format of the bootstrap commands that + interLink writes into pod annotations (DefaultRatholeWSCommand / + DefaultRatholeCommand). These do not start actual containers – they parse + the command strings that the VK would inject. + """ + + # These constants are copied from pkg/virtualkubelet/virtualkubelet.go + # to keep the test independent of the Go build. + DEFAULT_RATHOLE_EXECUTABLE_URL = ( + "https://github.com/rathole-org/rathole/releases/download/v0.5.0/" + "rathole-x86_64-unknown-linux-gnu.zip" + ) + DEFAULT_WS_CMD_TEMPLATE = ( + "curl -L -f -k %s -o rathole.zip && unzip -q rathole.zip && " + "chmod +x rathole && echo %s | base64 -d > /tmp/rathole-client.toml && " + "./rathole --client /tmp/rathole-client.toml &" + ) + DEFAULT_TLS_CMD_TEMPLATE = ( + "curl -L -f -k %s -o rathole.zip && unzip -q rathole.zip && " + "chmod +x rathole && echo %s | base64 -d > /tmp/rathole-ca.crt && " + "echo %s | base64 -d > /tmp/rathole-client.crt && " + "echo %s | base64 -d > /tmp/rathole-client.key && " + "echo %s | base64 -d > /tmp/rathole-client.toml && " + "./rathole --client /tmp/rathole-client.toml &" + ) + + def test_ws_command_has_two_format_verbs(self): + """DefaultRatholeWSCommand must have exactly 2 %s verbs (url, toml).""" + count = self.DEFAULT_WS_CMD_TEMPLATE.count("%s") + assert count == 2, f"Expected 2 %s verbs, found {count}" + + def test_tls_command_has_five_format_verbs(self): + """DefaultRatholeCommand must have exactly 5 %s verbs (url, ca, cert, key, toml).""" + count = self.DEFAULT_TLS_CMD_TEMPLATE.count("%s") + assert count == 5, f"Expected 5 %s verbs, found {count}" + + def test_ws_command_references_default_url(self): + """WebSocket command embeds the default download URL.""" + cmd = self.DEFAULT_WS_CMD_TEMPLATE % ( + self.DEFAULT_RATHOLE_EXECUTABLE_URL, + "BASE64TOML", + ) + assert self.DEFAULT_RATHOLE_EXECUTABLE_URL in cmd + assert "base64 -d" in cmd + assert "./rathole --client" in cmd + + def test_tls_command_references_cert_files(self): + """TLS command writes the expected cert filenames.""" + cmd = self.DEFAULT_TLS_CMD_TEMPLATE % ( + self.DEFAULT_RATHOLE_EXECUTABLE_URL, + "BASE64CA", + "BASE64CRT", + "BASE64KEY", + "BASE64TOML", + ) + assert "rathole-ca.crt" in cmd + assert "rathole-client.crt" in cmd + assert "rathole-client.key" in cmd + assert "rathole-client.toml" in cmd + assert "base64 -d" in cmd + + def test_ws_command_runs_client_in_background(self): + """Bootstrap command must launch rathole as a background process (&).""" + assert self.DEFAULT_WS_CMD_TEMPLATE.rstrip().endswith("&") + + def test_tls_command_runs_client_in_background(self): + """TLS bootstrap command must also run rathole in the background.""" + assert self.DEFAULT_TLS_CMD_TEMPLATE.rstrip().endswith("&") From 89b58ddcc87fa04474abfe677aac7ce1e7172f9e Mon Sep 17 00:00:00 2001 From: rocky Cloud User Date: Tue, 16 Jun 2026 16:45:52 +0200 Subject: [PATCH 11/14] chore: update vk-test-set submodule with port-forwarding tests Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: rocky Cloud User --- test/vk-test-set | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/vk-test-set b/test/vk-test-set index 3404e2d2..557428cc 160000 --- a/test/vk-test-set +++ b/test/vk-test-set @@ -1 +1 @@ -Subproject commit 3404e2d21bef92a04beb683ce69b43c3edbbe171 +Subproject commit 557428cce047584fe7f2c30f87e22301fcb023e8 From 1b02239a6051607e75cfc03cb856f2561992844b Mon Sep 17 00:00:00 2001 From: rocky Cloud User Date: Tue, 16 Jun 2026 17:02:47 +0200 Subject: [PATCH 12/14] docs: add manual e2e test run instructions and update Developers.md MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a new 'Running individual test suites manually' section to Developers.md covering the three independent test suites: 1. VK pod integration tests (test/vk-test-set/) — venv setup, pytest commands, filter flags, port_forward_test.py assertion table, troubleshooting tips, Rocky/CentOS 9 oauthlib workaround note. 2. Docker tunnel unit tests (test/portforward/) — Docker Compose bring-up, pytest commands by class, network topology diagram, env-var table, teardown. 3. Quick health-check commands — kubectl one-liners to verify cluster state, VK connectivity, shadow namespaces, and cert-manager Certificates. Also update: - 'What the scripts do' table: document rathole Docker Compose environment and cert-manager/Traefik setup added to k3s-test-setup.sh. - 'Artefacts and logs' listing: add portforward-test-results.log, rathole-server-tcp.log, rathole-server-ws.log, and test-results.log. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> Signed-off-by: rocky Cloud User --- docs/docs/Developers.md | 197 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 188 insertions(+), 9 deletions(-) diff --git a/docs/docs/Developers.md b/docs/docs/Developers.md index 66eb1cb5..fb20a8b9 100644 --- a/docs/docs/Developers.md +++ b/docs/docs/Developers.md @@ -62,9 +62,9 @@ make test-k3s-cleanup # cleanup only | Script | Purpose | |---|---| -| `k3s-test-setup.sh` | Installs K3s with `--egress-selector-mode disabled`; builds the `interlink-api` and `interlink-plugin` (SLURM) Docker images; writes all configs; starts containers; starts virtual-kubelet as a host process; approves kubelet CSRs; runs a Slurm/Apptainer smoke test | -| `k3s-test-run.sh` | Runs the `pytest` test suite from [`vk-test-set`](https://github.com/interlink-hq/vk-test-set) against the live cluster | -| `k3s-test-cleanup.sh` | Copies logs and job artefacts to the test directory, then stops containers, kills the virtual-kubelet process, and uninstalls K3s | +| `k3s-test-setup.sh` | Installs K3s; builds `interlink-api` and `interlink-plugin` (SLURM) images; installs Traefik v3 CRDs and cert-manager; creates a CA ClusterIssuer; starts all containers; starts virtual-kubelet as a host process; approves kubelet CSRs; runs a Slurm/Apptainer smoke test; starts the rathole Docker Compose tunnel environment | +| `k3s-test-run.sh` | (1) Runs the `pytest` vk-test-set suite (pod lifecycle + port-forwarding infra) against the live cluster; (2) runs the standalone Docker tunnel tests in `test/portforward/` | +| `k3s-test-cleanup.sh` | Captures TLS resource state and rathole container logs; tears down Docker Compose; stops containers; kills virtual-kubelet; uninstalls K3s | ### Artefacts and logs @@ -73,12 +73,16 @@ setup script (printed as `TEST_DIR` at startup): ``` /tmp/interlink-test-XXXXXX/ - k3s-install.log – K3s install output - vk.log – virtual-kubelet stdout/stderr - interlink-api.log – interlink-api container logs (live-streamed) - interlink-plugin.log – interlink-plugin container logs (live-streamed) - plugin-jobs/ – job directories from inside the plugin container - slurm-logs/ – Slurm daemon logs from inside the plugin container + k3s-install.log – K3s install output + vk.log – virtual-kubelet stdout/stderr + interlink-api.log – interlink-api container logs (live-streamed) + interlink-plugin.log – interlink-plugin container logs (live-streamed) + test-results.log – pytest vk-test-set output + portforward-test-results.log – pytest Docker tunnel test output + rathole-server-tcp.log – rathole TCP server container logs + rathole-server-ws.log – rathole WebSocket server container logs + plugin-jobs/ – job directories from inside the plugin container + slurm-logs/ – Slurm daemon logs from inside the plugin container ``` ### Environment variables @@ -90,6 +94,181 @@ setup script (printed as `TEST_DIR` at startup): --- +## Running individual test suites manually + +Once `k3s-test-setup.sh` has completed (or you have an equivalent environment +running), you can iterate on individual test suites without re-running the full +setup. All commands below assume you are at the repository root and the cluster +is reachable via `/etc/rancher/k3s/k3s.yaml`. + +```bash +export KUBECONFIG=/etc/rancher/k3s/k3s.yaml +TEST_DIR=$(cat /tmp/interlink-test-dir.txt) # set by setup script +``` + +### 1. VK pod integration tests (`test/vk-test-set/`) + +These tests submit real pods to the virtual-kubelet node and validate pod +lifecycle, logs, volumes, probes, init containers, and projected volumes. + +```bash +cd test/vk-test-set + +# Create an isolated Python venv (avoids oauthlib conflicts on RHEL/Rocky 9) +python3 -m venv .venv +source .venv/bin/activate +pip install -q pytest kubernetes jinja2 pydantic requests pytest-timeout + +# Run the full suite (excluding slow / optional test classes) +pytest -v -k "not rclone and not limits and not stress and not multi-init and not fail" + +# Run a single template test +pytest -v vktestset/basic_test.py::test_manifest[virtual-kubelet-000-hello-world.yaml] + +# Run the port-forwarding infrastructure tests (separate file) +pytest -v vktestset/port_forward_test.py + +# Run one specific port-forwarding test +pytest -v vktestset/port_forward_test.py::test_rathole_annotation_set + +deactivate +``` + +**What `port_forward_test.py` checks** — when a pod with `containerPort` is +scheduled on the virtual-kubelet node the VK must provision the full rathole +tunnel infrastructure. The 10 tests verify: + +| Test | Assertion | +|---|---| +| `test_port_forward_pod_scheduled` | Pod reaches `Running`/`Succeeded` | +| `test_rathole_annotation_set` | `interlink.eu/rathole-client-commands` annotation is set and non-empty | +| `test_shadow_namespace_exists` | `-wstunnel` namespace is created | +| `test_rathole_deployment_exists` | Rathole server `Deployment` exists in the shadow namespace | +| `test_rathole_service_exists` | Rathole server `Service` exists in the shadow namespace | +| `test_rathole_server_certificate_ready` | cert-manager `Certificate` (server TLS) reaches `Ready=True` | +| `test_rathole_client_certificate_ready` | cert-manager `Certificate` (client TLS) reaches `Ready=True` | +| `test_tls_secrets_issued` | Both TLS `Secret`s contain `ca.crt`, `tls.crt`, `tls.key` | +| `test_traefik_ingressroutetcp_exists` | Traefik `IngressRouteTCP` is created for SNI routing | +| `test_annotation_contains_valid_toml` | Annotation encodes valid TOML: `[client]`, port `443`, TLS transport, `p8080` service | + +**Troubleshooting** + +```bash +# Check VK is running and Ready +kubectl get node virtual-kubelet + +# Tail VK logs in real time +tail -f "${TEST_DIR}/vk.log" + +# Check if interlink API can reach the plugin +curl http://localhost:3000/pinglink + +# Inspect shadow namespace resources after a port-forward pod is created +kubectl get all,certificates -n interlink-wstunnel +``` + +> **Rocky/CentOS 9 note:** The system `oauthlib` package is missing +> `SIGNATURE_RSA`, which breaks the `kubernetes` Python client. Always use a +> fresh `venv` (as shown above) rather than the system Python. + +--- + +### 2. Docker tunnel unit tests (`test/portforward/`) + +These tests are **independent of Kubernetes**. They run against a local Docker +Compose topology that mimics the rathole tunnel between a remote HPC node and +the cluster side. They can be run on any machine with Docker — no K3s required. + +```bash +cd test/portforward + +# Start the two-network Docker Compose environment +docker compose up -d + +# Wait for tunnels to be ready (rathole clients retry automatically) +# You can watch tunnel handshake with: +docker compose logs -f rathole-client-tcp + +# Install test dependencies +python3 -m venv .venv-pf +source .venv-pf/bin/activate +pip install pytest requests pytest-timeout "kubernetes>=28.0" + +# Run all 37 tests +pytest -v + +# Run only the TCP tunnel tests +pytest -v test_tunnel.py::TestTCPTunnel + +# Run only the WebSocket tunnel tests +pytest -v test_tunnel.py::TestWebSocketTunnel + +# Run the Kubernetes TLS resource tests (requires KUBECONFIG to be set) +# These verify cert-manager Certificates and Traefik IngressRouteTCP +export KUBECONFIG=/etc/rancher/k3s/k3s.yaml +pytest -v test_tls_k8s.py + +deactivate + +# Tear down when done +docker compose down -v +``` + +**Network topology** + +``` +[remote network – isolated "HPC" side] + backend (nginx:alpine) + port 80 → "Hello from remote backend" + port 9090 → "Metrics from remote backend" + +[cluster network – "Kubernetes" side] + rathole-server-tcp (TCP) → host:18080, host:19090 + rathole-server-ws (WebSocket) → host:18082 + +[bridge: remote ↔ cluster] + rathole-client-tcp forwards backend:80 and backend:9090 through TCP tunnel + rathole-client-ws forwards backend:80 through WebSocket tunnel +``` + +The backend has **no** ports exposed to the host, so the only way to reach it is +through a rathole tunnel — matching real interLink deployments where the remote +service is inside the HPC network. + +**Environment variables for the Docker tests** + +| Variable | Default | Description | +|---|---|---| +| `TCP_HTTP_URL` | `http://localhost:18080` | TCP tunnel primary endpoint | +| `TCP_METRICS_URL` | `http://localhost:19090` | TCP tunnel secondary port | +| `WS_HTTP_URL` | `http://localhost:18082` | WebSocket tunnel endpoint | +| `TUNNEL_WAIT_TIMEOUT` | `60` | Seconds to wait for tunnel readiness | + +--- + +### 3. Quick health checks after setup + +```bash +# Cluster and node status +kubectl get nodes +kubectl get pods -A + +# VK connectivity to interlink API +curl http://localhost:3000/pinglink + +# Shadow namespace should exist if a port-forwarding pod was scheduled before +kubectl get ns | grep wstunnel + +# Check all cert-manager Certificates in shadow namespaces +kubectl get certificates -A | grep wstunnel + +# Inspect a specific rathole tunnel resource set (replace with real pod name/ns) +kubectl get deployment,service,certificate,ingressroutetcp \ + -n interlink-wstunnel -l app=rathole +``` + +--- + ## Dagger-based tests (legacy) > **Note:** The Dagger-based CI pipeline is disabled. The K3s-based workflow From 9eabef346cf1de8baef094dd894f760c2a991cde Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 17 Jun 2026 12:28:22 +0000 Subject: [PATCH 13/14] fix: update vk-test-set submodule to valid remote commit (3404e2d) The submodule was pointing to commit 557428cce047584fe7f2c30f87e22301fcb023e8 which does not exist in the remote vk-test-set repository, causing the e2e CI job to fail during submodule checkout. Update to 3404e2d21bef92a04beb683ce69b43c3edbbe171 (HEAD of main branch). --- test/vk-test-set | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/vk-test-set b/test/vk-test-set index 557428cc..3404e2d2 160000 --- a/test/vk-test-set +++ b/test/vk-test-set @@ -1 +1 @@ -Subproject commit 557428cce047584fe7f2c30f87e22301fcb023e8 +Subproject commit 3404e2d21bef92a04beb683ce69b43c3edbbe171 From 0c15f5e91ad765e639991df219d193522ba1fd09 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 24 Jun 2026 08:29:59 +0000 Subject: [PATCH 14/14] Apply remaining changes --- pkg/virtualkubelet/backend_rathole.go | 166 ++++++++++++++++++++ pkg/virtualkubelet/backend_rathole_test.go | 44 ++++++ pkg/virtualkubelet/backend_ssh.go | 124 +++++++++++++++ pkg/virtualkubelet/backend_ssh_test.go | 92 +++++++++++ pkg/virtualkubelet/backend_wstunnel.go | 64 ++++++++ pkg/virtualkubelet/backend_wstunnel_test.go | 28 ++++ pkg/virtualkubelet/config.go | 18 ++- pkg/virtualkubelet/config_test.go | 20 +++ pkg/virtualkubelet/mesh.go | 145 ++--------------- pkg/virtualkubelet/mesh_annotations_test.go | 18 +++ pkg/virtualkubelet/tunnel_backend.go | 34 ++++ pkg/virtualkubelet/tunnel_backend_test.go | 35 +++++ pkg/virtualkubelet/virtualkubelet.go | 81 ++++++---- 13 files changed, 705 insertions(+), 164 deletions(-) create mode 100644 pkg/virtualkubelet/backend_rathole.go create mode 100644 pkg/virtualkubelet/backend_rathole_test.go create mode 100644 pkg/virtualkubelet/backend_ssh.go create mode 100644 pkg/virtualkubelet/backend_ssh_test.go create mode 100644 pkg/virtualkubelet/backend_wstunnel.go create mode 100644 pkg/virtualkubelet/backend_wstunnel_test.go create mode 100644 pkg/virtualkubelet/tunnel_backend.go create mode 100644 pkg/virtualkubelet/tunnel_backend_test.go diff --git a/pkg/virtualkubelet/backend_rathole.go b/pkg/virtualkubelet/backend_rathole.go new file mode 100644 index 00000000..3967e838 --- /dev/null +++ b/pkg/virtualkubelet/backend_rathole.go @@ -0,0 +1,166 @@ +package virtualkubelet + +import ( + "context" + "encoding/base64" + "fmt" + "strings" + + "github.com/containerd/containerd/log" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/dynamic" +) + +type RatholeBackend struct { + cfg Network + dynamicClient dynamic.Interface + provider *Provider +} + +func (b *RatholeBackend) bindProvider(p *Provider) { + b.provider = p + if b.dynamicClient == nil { + b.dynamicClient = p.dynamicClient + } +} + +func (b *RatholeBackend) Name() string { + return tunnelTypeRathole +} + +func (b *RatholeBackend) ServerResources(ctx context.Context, td WstunnelTemplateData) error { + if b.cfg.RatholeCAIssuerName == "" { + return nil + } + if b.provider == nil { + return fmt.Errorf("rathole backend not bound to provider") + } + return b.provider.applyRatholeTLSResources(ctx, td) +} + +func (b *RatholeBackend) ClientCommand(ctx context.Context, td WstunnelTemplateData, pod *v1.Pod) (string, error) { + if b.provider == nil || b.provider.clientSet == nil { + return "", fmt.Errorf("rathole backend requires initialized Kubernetes client") + } + + ratholeEndpoint := fmt.Sprintf("rathole-%s.%s", td.Name, td.WildcardDNS) + ratholeEndpoint = sanitizeFullDNSName(ratholeEndpoint) + if td.WildcardDNS == "" { + ratholeEndpoint = td.Name + } + + ratholeURL := b.cfg.RatholeExecutableURL + if ratholeURL == "" { + ratholeURL = DefaultRatholeExecutableURL + } + + if b.cfg.RatholeCAIssuerName != "" { + clientCertSecretName := td.Name + "-rathole-client-tls" + if err := b.provider.waitForRatholeCertSecret(ctx, clientCertSecretName, td.Namespace); err != nil { + return "", fmt.Errorf("rathole client certificate not ready: %w", err) + } + + certSecret, err := b.provider.clientSet.CoreV1().Secrets(td.Namespace).Get(ctx, clientCertSecretName, metav1.GetOptions{}) + if err != nil { + return "", fmt.Errorf("failed to read rathole client certificate secret: %w", err) + } + + for _, key := range []string{"ca.crt", "tls.crt", "tls.key"} { + if len(certSecret.Data[key]) == 0 { + return "", fmt.Errorf("rathole client certificate secret %s/%s is missing required key %q", td.Namespace, clientCertSecretName, key) + } + } + + caCrtB64 := base64.StdEncoding.EncodeToString(certSecret.Data["ca.crt"]) + clientCrtB64 := base64.StdEncoding.EncodeToString(certSecret.Data["tls.crt"]) + clientKeyB64 := base64.StdEncoding.EncodeToString(certSecret.Data["tls.key"]) + + var tomlBuilder strings.Builder + fmt.Fprintf(&tomlBuilder, "[client]\nremote_addr = \"%s:443\"\n\n", ratholeEndpoint) + tomlBuilder.WriteString("[client.transport]\ntype = \"tls\"\n\n") + tomlBuilder.WriteString("[client.transport.tls]\n") + fmt.Fprintf(&tomlBuilder, "hostname = \"%s\"\n", ratholeEndpoint) + tomlBuilder.WriteString("trusted_root = \"/tmp/rathole-ca.crt\"\n") + tomlBuilder.WriteString("cert = \"/tmp/rathole-client.crt\"\n") + tomlBuilder.WriteString("key = \"/tmp/rathole-client.key\"\n\n") + for _, port := range td.ExposedPorts { + if strings.ToUpper(port.Protocol) == protocolUDP { + log.G(ctx).Debugf("Skipping UDP port %d in rathole client config (TLS transport forwards TCP only)", port.Port) + continue + } + fmt.Fprintf(&tomlBuilder, "[client.services.p%d]\ntoken = \"%s\"\nlocal_addr = \"127.0.0.1:%d\"\n\n", + port.Port, td.RandomPassword, port.Port) + } + + configB64 := base64.StdEncoding.EncodeToString([]byte(tomlBuilder.String())) + ratholeCmd := b.cfg.RatholeCommand + if ratholeCmd == "" { + ratholeCmd = DefaultRatholeCommand + } + if strings.Count(ratholeCmd, "%s") != 5 { + return "", fmt.Errorf("RatholeCommand must have exactly 5 %%s format verbs (url, ca, cert, key, toml); got %d in %q", + strings.Count(ratholeCmd, "%s"), b.cfg.RatholeCommand) + } + return fmt.Sprintf(ratholeCmd, ratholeURL, caCrtB64, clientCrtB64, clientKeyB64, configB64), nil + } + + if pod != nil { + log.G(ctx).Debugf("RatholeCAIssuerName not set; using WebSocket transport for pod %s/%s", pod.Namespace, pod.Name) + } + var tomlBuilder strings.Builder + fmt.Fprintf(&tomlBuilder, "[client]\nremote_addr = \"%s:80\"\n\n", ratholeEndpoint) + tomlBuilder.WriteString("[client.transport]\ntype = \"websocket\"\n\n") + for _, port := range td.ExposedPorts { + if strings.ToUpper(port.Protocol) == protocolUDP { + log.G(ctx).Debugf("Skipping UDP port %d in rathole client config (websocket transport forwards TCP only)", port.Port) + continue + } + fmt.Fprintf(&tomlBuilder, "[client.services.p%d]\ntoken = \"%s\"\nlocal_addr = \"127.0.0.1:%d\"\n\n", + port.Port, td.RandomPassword, port.Port) + } + + configB64 := base64.StdEncoding.EncodeToString([]byte(tomlBuilder.String())) + ratholeWSCmd := b.cfg.RatholeWSCommand + if ratholeWSCmd == "" { + ratholeWSCmd = DefaultRatholeWSCommand + } + if strings.Count(ratholeWSCmd, "%s") != 2 { + return "", fmt.Errorf("RatholeWSCommand must have exactly 2 %%s format verbs (url, toml); got %d in %q", + strings.Count(ratholeWSCmd, "%s"), b.cfg.RatholeWSCommand) + } + + return fmt.Sprintf(ratholeWSCmd, ratholeURL, configB64), nil +} + +func (b *RatholeBackend) ClientAnnotationKey() string { + return annRatholeClientCmds +} + +func (b *RatholeBackend) KubernetesTemplate() (string, error) { + content, err := defaultRatholeTemplate.ReadFile("templates/rathole-template.yaml") + if err != nil { + return "", fmt.Errorf("failed to read embedded rathole template: %w", err) + } + return string(content), nil +} + +func (b *RatholeBackend) CleanupResources(ctx context.Context, name, namespace string) error { + if b.provider == nil || b.provider.dynamicClient == nil { + return nil + } + + for _, certName := range []string{name + "-rathole-server-tls", name + "-rathole-client-tls"} { + if err := b.provider.deleteUnstructuredResource(ctx, certManagerCertGVR, certName, namespace); err != nil { + return fmt.Errorf("failed to delete rathole cert-manager Certificate %s/%s: %w", namespace, certName, err) + } + log.G(ctx).Infof("Deleted rathole cert-manager Certificate %s/%s", namespace, certName) + } + + if err := b.provider.deleteUnstructuredResource(ctx, traefikIngressRouteTCPGVR, name, namespace); err != nil { + return fmt.Errorf("failed to delete rathole IngressRouteTCP %s/%s: %w", namespace, name, err) + } + log.G(ctx).Infof("Deleted rathole IngressRouteTCP %s/%s", namespace, name) + + return nil +} diff --git a/pkg/virtualkubelet/backend_rathole_test.go b/pkg/virtualkubelet/backend_rathole_test.go new file mode 100644 index 00000000..27c566b8 --- /dev/null +++ b/pkg/virtualkubelet/backend_rathole_test.go @@ -0,0 +1,44 @@ +package virtualkubelet + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + v1 "k8s.io/api/core/v1" + "k8s.io/client-go/kubernetes/fake" +) + +func TestRatholeBackend_ClientCommandWebsocket(t *testing.T) { + backend := &RatholeBackend{ + cfg: Network{ + TunnelType: tunnelTypeRathole, + WildcardDNS: "tunnel.example.com", + }, + } + backend.bindProvider(&Provider{clientSet: fake.NewClientset()}) + + td := WstunnelTemplateData{ + Name: "mypod-default", + Namespace: "default-wstunnel", + WildcardDNS: "tunnel.example.com", + RandomPassword: "token123", + ExposedPorts: []PortMapping{ + {Port: 8080, Protocol: "TCP"}, + }, + } + + cmd, err := backend.ClientCommand(context.Background(), td, &v1.Pod{}) + require.NoError(t, err) + assert.Contains(t, cmd, DefaultRatholeExecutableURL) + assert.Contains(t, cmd, "rathole-client.toml") + assert.Equal(t, annRatholeClientCmds, backend.ClientAnnotationKey()) +} + +func TestRatholeBackend_KubernetesTemplate(t *testing.T) { + backend := &RatholeBackend{cfg: Network{TunnelType: tunnelTypeRathole}} + tpl, err := backend.KubernetesTemplate() + require.NoError(t, err) + assert.Contains(t, tpl, "rapiz1/rathole") +} diff --git a/pkg/virtualkubelet/backend_ssh.go b/pkg/virtualkubelet/backend_ssh.go new file mode 100644 index 00000000..44ca640d --- /dev/null +++ b/pkg/virtualkubelet/backend_ssh.go @@ -0,0 +1,124 @@ +package virtualkubelet + +import ( + "context" + "encoding/base64" + "fmt" + "strings" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +type SSHBackend struct { + cfg Network + provider *Provider +} + +func (b *SSHBackend) bindProvider(p *Provider) { + b.provider = p +} + +func (b *SSHBackend) Name() string { + return tunnelTypeSSH +} + +func (b *SSHBackend) ServerResources(_ context.Context, _ WstunnelTemplateData) error { + return nil +} + +func (b *SSHBackend) ClientCommand(ctx context.Context, td WstunnelTemplateData, pod *v1.Pod) (string, error) { + if b.provider == nil || b.provider.clientSet == nil { + return "", fmt.Errorf("ssh backend requires initialized Kubernetes client") + } + if strings.TrimSpace(b.cfg.SSHJumpHost) == "" { + return "", fmt.Errorf("SSHJumpHost is required when TunnelType is %q", tunnelTypeSSH) + } + if strings.TrimSpace(b.cfg.SSHJumpKeySecretName) == "" { + return "", fmt.Errorf("SSHJumpKeySecretName is required when TunnelType is %q", tunnelTypeSSH) + } + + secretNamespace := strings.TrimSpace(b.cfg.SSHJumpKeySecretNamespace) + if secretNamespace == "" && b.provider != nil { + secretNamespace = strings.TrimSpace(b.provider.config.Namespace) + } + if secretNamespace == "" && pod != nil { + secretNamespace = pod.Namespace + } + if secretNamespace == "" { + return "", fmt.Errorf("cannot resolve SSHJumpKeySecretNamespace") + } + + secret, err := b.provider.clientSet.CoreV1().Secrets(secretNamespace).Get(ctx, b.cfg.SSHJumpKeySecretName, metav1.GetOptions{}) + if err != nil { + return "", fmt.Errorf("failed to read SSH jump key secret %s/%s: %w", secretNamespace, b.cfg.SSHJumpKeySecretName, err) + } + + privateKey := secret.Data["id_rsa"] + if len(privateKey) == 0 { + privateKey = secret.Data["id_ed25519"] + } + if len(privateKey) == 0 { + return "", fmt.Errorf("secret %s/%s must contain id_rsa or id_ed25519", secretNamespace, b.cfg.SSHJumpKeySecretName) + } + keyB64 := base64.StdEncoding.EncodeToString(privateKey) + + remoteHost := strings.TrimSpace(b.cfg.SSHRemoteHost) + if remoteHost == "" { + remoteHost = "localhost" + } + + forwardSpecs := make([]string, 0, len(td.ExposedPorts)) + for _, port := range td.ExposedPorts { + if strings.ToUpper(port.Protocol) == protocolUDP { + continue + } + forwardSpecs = append(forwardSpecs, fmt.Sprintf("0.0.0.0:%d:%s:%d", port.Port, remoteHost, port.Port)) + } + if len(forwardSpecs) == 0 { + return "", fmt.Errorf("no TCP exposed ports available for ssh tunnel") + } + + if strings.TrimSpace(b.cfg.SSHCommand) != "" { + // SSHCommand format: first %s is base64 private key, second %s is jump host, + // third %s is remote host, remaining %s are one per forward spec. + expected := len(forwardSpecs) + 3 + if strings.Count(b.cfg.SSHCommand, "%s") != expected { + return "", fmt.Errorf("SSHCommand must have exactly %d %%s format verbs (key, jump-host, remote-host, and one per forward spec); got %d", + expected, strings.Count(b.cfg.SSHCommand, "%s")) + } + args := make([]any, 0, expected) + args = append(args, keyB64, b.cfg.SSHJumpHost, remoteHost) + for _, spec := range forwardSpecs { + args = append(args, spec) + } + return fmt.Sprintf(b.cfg.SSHCommand, args...), nil + } + + var forwardArgs strings.Builder + for _, spec := range forwardSpecs { + fmt.Fprintf(&forwardArgs, " -L %s", spec) + } + + cmd := fmt.Sprintf( + "echo %s | base64 -d > /tmp/ssh_jump_key && chmod 600 /tmp/ssh_jump_key && ssh -i /tmp/ssh_jump_key -o StrictHostKeyChecking=accept-new -o UserKnownHostsFile=/tmp/ssh_known_hosts -N -J %s%s %s &", + keyB64, + b.cfg.SSHJumpHost, + forwardArgs.String(), + remoteHost, + ) + return cmd, nil +} + +func (b *SSHBackend) ClientAnnotationKey() string { + return annSSHClientCmds +} + +func (b *SSHBackend) KubernetesTemplate() (string, error) { + // SSH backend uses the default (wstunnel) shadow resources template. + return "", nil +} + +func (b *SSHBackend) CleanupResources(_ context.Context, _, _ string) error { + return nil +} diff --git a/pkg/virtualkubelet/backend_ssh_test.go b/pkg/virtualkubelet/backend_ssh_test.go new file mode 100644 index 00000000..059afa00 --- /dev/null +++ b/pkg/virtualkubelet/backend_ssh_test.go @@ -0,0 +1,92 @@ +package virtualkubelet + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes/fake" +) + +func TestSSHBackend_ClientCommand(t *testing.T) { + fakeClient := fake.NewClientset( + &v1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "jump-key", + Namespace: "interlink", + }, + Data: map[string][]byte{ + "id_ed25519": []byte("private-key-material"), + }, + }, + ) + + backend := &SSHBackend{ + cfg: Network{ + TunnelType: tunnelTypeSSH, + SSHJumpHost: "user@jump.example.com:22", + SSHJumpKeySecretName: "jump-key", + SSHJumpKeySecretNamespace: "interlink", + }, + } + backend.bindProvider(&Provider{ + clientSet: fakeClient, + config: Config{Namespace: "interlink"}, + }) + + td := WstunnelTemplateData{ + ExposedPorts: []PortMapping{ + {Port: 8080, Protocol: "TCP"}, + {Port: 53, Protocol: "UDP"}, + }, + } + + cmd, err := backend.ClientCommand(context.Background(), td, &v1.Pod{ObjectMeta: metav1.ObjectMeta{Namespace: "default"}}) + require.NoError(t, err) + assert.Contains(t, cmd, "ssh -i /tmp/ssh_jump_key") + assert.Contains(t, cmd, "-J user@jump.example.com:22") + assert.Contains(t, cmd, "-L 0.0.0.0:8080:localhost:8080") + assert.Contains(t, cmd, " localhost &") + assert.NotContains(t, cmd, ":53:") + assert.Equal(t, annSSHClientCmds, backend.ClientAnnotationKey()) +} + +func TestSSHBackend_CustomCommandVerbValidation(t *testing.T) { + fakeClient := fake.NewClientset( + &v1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "jump-key", + Namespace: "interlink", + }, + Data: map[string][]byte{ + "id_rsa": []byte("private-key-material"), + }, + }, + ) + + backend := &SSHBackend{ + cfg: Network{ + TunnelType: tunnelTypeSSH, + SSHJumpHost: "user@jump.example.com:22", + SSHJumpKeySecretName: "jump-key", + SSHJumpKeySecretNamespace: "interlink", + SSHCommand: "custom %s %s %s", + }, + } + backend.bindProvider(&Provider{ + clientSet: fakeClient, + config: Config{Namespace: "interlink"}, + }) + + _, err := backend.ClientCommand(context.Background(), WstunnelTemplateData{ + ExposedPorts: []PortMapping{ + {Port: 8080, Protocol: "TCP"}, + {Port: 9090, Protocol: "TCP"}, + }, + }, &v1.Pod{}) + require.Error(t, err) + assert.Contains(t, err.Error(), "must have exactly") +} diff --git a/pkg/virtualkubelet/backend_wstunnel.go b/pkg/virtualkubelet/backend_wstunnel.go new file mode 100644 index 00000000..11c88bc3 --- /dev/null +++ b/pkg/virtualkubelet/backend_wstunnel.go @@ -0,0 +1,64 @@ +package virtualkubelet + +import ( + "context" + "fmt" + "strings" + + "github.com/containerd/containerd/log" + v1 "k8s.io/api/core/v1" +) + +type WstunnelBackend struct { + cfg Network +} + +func (b *WstunnelBackend) Name() string { + return "wstunnel" +} + +func (b *WstunnelBackend) ServerResources(_ context.Context, _ WstunnelTemplateData) error { + return nil +} + +func (b *WstunnelBackend) ClientCommand(ctx context.Context, td WstunnelTemplateData, _ *v1.Pod) (string, error) { + ingressEndpoint := fmt.Sprintf("%s-%s.%s", td.Name, td.Namespace, td.WildcardDNS) + ingressEndpoint = sanitizeFullDNSName(ingressEndpoint) + if td.WildcardDNS == "" { + ingressEndpoint = td.Name + } + + var rOptions []string + for _, port := range td.ExposedPorts { + if strings.ToUpper(port.Protocol) == protocolUDP { + continue + } + rOptions = append(rOptions, fmt.Sprintf("-R tcp://0.0.0.0:%d:localhost:%d", port.Port, port.Port)) + } + + wstunnelCommandTemplate := b.cfg.WstunnelCommand + if wstunnelCommandTemplate == "" { + wstunnelCommandTemplate = DefaultWstunnelCommand + } + + log.G(ctx).Infof("Default ws tunnel command is: %s", wstunnelCommandTemplate) + + return fmt.Sprintf( + wstunnelCommandTemplate, + td.RandomPassword, + strings.Join(rOptions, " "), + ingressEndpoint, + ), nil +} + +func (b *WstunnelBackend) ClientAnnotationKey() string { + return annWSTunnelClientCmds +} + +func (b *WstunnelBackend) KubernetesTemplate() (string, error) { + return "", nil +} + +func (b *WstunnelBackend) CleanupResources(_ context.Context, _, _ string) error { + return nil +} diff --git a/pkg/virtualkubelet/backend_wstunnel_test.go b/pkg/virtualkubelet/backend_wstunnel_test.go new file mode 100644 index 00000000..f393e9a4 --- /dev/null +++ b/pkg/virtualkubelet/backend_wstunnel_test.go @@ -0,0 +1,28 @@ +package virtualkubelet + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestWstunnelBackend_ClientCommand(t *testing.T) { + backend := &WstunnelBackend{cfg: Network{}} + td := WstunnelTemplateData{ + Name: "mypod-default", + Namespace: "default-wstunnel", + WildcardDNS: "tunnel.example.com", + RandomPassword: "token123", + ExposedPorts: []PortMapping{ + {Port: 8080, Protocol: "TCP"}, + {Port: 5353, Protocol: "UDP"}, + }, + } + + cmd, err := backend.ClientCommand(context.Background(), td, nil) + assert.NoError(t, err) + assert.Contains(t, cmd, "-R tcp://0.0.0.0:8080:localhost:8080") + assert.NotContains(t, cmd, "5353") + assert.Equal(t, annWSTunnelClientCmds, backend.ClientAnnotationKey()) +} diff --git a/pkg/virtualkubelet/config.go b/pkg/virtualkubelet/config.go index 6b9185c5..d17bd0c0 100644 --- a/pkg/virtualkubelet/config.go +++ b/pkg/virtualkubelet/config.go @@ -131,7 +131,7 @@ type PodCIDR struct { type Network struct { // EnableTunnel enables WebSocket tunneling for pod port exposure EnableTunnel bool `yaml:"EnableTunnel" default:"false"` - // TunnelType selects the port-forwarding backend: "" or "wstunnel" (default, backward-compatible) or "rathole". + // TunnelType selects the port-forwarding backend: "" or "wstunnel" (default, backward-compatible), "rathole", or "ssh". TunnelType string `yaml:"TunnelType,omitempty"` // WildcardDNS specifies the DNS domain for generating tunnel endpoints WildcardDNS string `yaml:"WildcardDNS,omitempty"` @@ -161,6 +161,22 @@ type Network struct { RatholeCAIssuerName string `yaml:"RatholeCAIssuerName,omitempty"` // RatholeCAIssuerKind is the kind of the cert-manager issuer: "ClusterIssuer" (default) or "Issuer". RatholeCAIssuerKind string `yaml:"RatholeCAIssuerKind,omitempty"` + // SSHJumpHost is the SSH jump/bastion host in the form user@host:port. + // Used only when TunnelType is "ssh". + SSHJumpHost string `yaml:"SSHJumpHost,omitempty"` + // SSHJumpKeySecretName is the Kubernetes Secret name containing the jump-host private key + // (key name id_rsa or id_ed25519). Used only when TunnelType is "ssh". + SSHJumpKeySecretName string `yaml:"SSHJumpKeySecretName,omitempty"` + // SSHJumpKeySecretNamespace is the namespace of SSHJumpKeySecretName. + // Defaults to Config.Namespace when empty. + SSHJumpKeySecretNamespace string `yaml:"SSHJumpKeySecretNamespace,omitempty"` + // SSHRemoteHost is the destination host reachable from the jump host. + // Defaults to "localhost" when empty. + SSHRemoteHost string `yaml:"SSHRemoteHost,omitempty"` + // SSHCommand specifies an optional custom SSH command template. + // Format verbs: first %s = base64 private key, second %s = jump host, + // third %s = remote host, remaining %s = one per forwarded host:port pair. + SSHCommand string `yaml:"SSHCommand,omitempty"` // FullMesh enables full mesh networking with slirp4netns and WireGuard FullMesh bool `yaml:"FullMesh" default:"false"` // MeshScriptTemplatePath is the path to a custom mesh.sh template file diff --git a/pkg/virtualkubelet/config_test.go b/pkg/virtualkubelet/config_test.go index 9726396c..bd2a070b 100644 --- a/pkg/virtualkubelet/config_test.go +++ b/pkg/virtualkubelet/config_test.go @@ -157,6 +157,26 @@ func TestNetwork_WstunnelDefaultTunnelType(t *testing.T) { assert.Empty(t, network.TunnelType, "empty TunnelType should default to wstunnel behaviour") } +func TestNetwork_SSHConfiguration(t *testing.T) { + network := Network{ + EnableTunnel: true, + TunnelType: "ssh", + SSHJumpHost: "user@jump.example.com:22", + SSHJumpKeySecretName: "jump-key", + SSHJumpKeySecretNamespace: "interlink", + SSHRemoteHost: "localhost", + SSHCommand: "custom %s %s %s %s", + } + + assert.True(t, network.EnableTunnel) + assert.Equal(t, "ssh", network.TunnelType) + assert.Equal(t, "user@jump.example.com:22", network.SSHJumpHost) + assert.Equal(t, "jump-key", network.SSHJumpKeySecretName) + assert.Equal(t, "interlink", network.SSHJumpKeySecretNamespace) + assert.Equal(t, "localhost", network.SSHRemoteHost) + assert.NotEmpty(t, network.SSHCommand) +} + func TestAccelerator_AvailableIsKubernetesQuantity(t *testing.T) { tests := []struct { name string diff --git a/pkg/virtualkubelet/mesh.go b/pkg/virtualkubelet/mesh.go index 345962c8..63a78ec0 100644 --- a/pkg/virtualkubelet/mesh.go +++ b/pkg/virtualkubelet/mesh.go @@ -236,7 +236,7 @@ func deriveWGPublicKey(privB64 string) (string, error) { } // addWstunnelClientAnnotation adds the tunnel client command annotation to the original pod. -// In rathole mode it writes a rathole client command; otherwise it writes a wstunnel command. +// In non-full-mesh mode it delegates command generation to the configured tunnel backend. func (p *Provider) addWstunnelClientAnnotation(ctx context.Context, pod *v1.Pod, td *WstunnelTemplateData) error { if pod.Annotations == nil { pod.Annotations = make(map[string]string) @@ -253,8 +253,7 @@ func (p *Provider) addWstunnelClientAnnotation(ctx context.Context, pod *v1.Pod, clearConflictingNetworkAnnotations(pod, fullMeshEnabledForPod) // Check if FullMesh mode is enabled and not disabled for this specific pod - switch { - case fullMeshEnabledForPod: + if fullMeshEnabledForPod { log.G(ctx).Infof("FullMesh mode enabled, generating pre-exec script for pod %s/%s", pod.Namespace, pod.Name) // Generate full mesh script @@ -290,138 +289,21 @@ PersistentKeepalive = %d `, clientPriv, td.WGMTU, serverPub, td.KeepaliveSecs) pod.Annotations["interlink.eu/wireguard-client-snippet"] = wgSnippet - - case p.config.Network.TunnelType == tunnelTypeRathole: - // Rathole mode: build a client TOML config and generate the client bootstrap command. - // When RatholeCAIssuerName is set, use TLS transport with cert-manager-issued certificates; - // otherwise fall back to WebSocket transport for backward compatibility. - ratholeEndpoint := fmt.Sprintf("rathole-%s.%s", td.Name, td.WildcardDNS) - ratholeEndpoint = sanitizeFullDNSName(ratholeEndpoint) - if td.WildcardDNS == "" { - ratholeEndpoint = td.Name - } - - ratholeURL := p.config.Network.RatholeExecutableURL - if ratholeURL == "" { - ratholeURL = DefaultRatholeExecutableURL + } else { + if err := p.ensureTunnelBackend(); err != nil { + return err } - var mainCmd string - - if p.config.Network.RatholeCAIssuerName != "" { - // TLS mode: rathole client uses TLS transport; Traefik terminates TLS at port 443. - // Wait for the client certificate secret to be issued by cert-manager. - clientCertSecretName := td.Name + "-rathole-client-tls" - if err := p.waitForRatholeCertSecret(ctx, clientCertSecretName, td.Namespace); err != nil { - return fmt.Errorf("rathole client certificate not ready: %w", err) - } - - certSecret, err := p.clientSet.CoreV1().Secrets(td.Namespace).Get(ctx, clientCertSecretName, metav1.GetOptions{}) - if err != nil { - return fmt.Errorf("failed to read rathole client certificate secret: %w", err) - } - - // Validate all required keys are present and non-empty. - for _, key := range []string{"ca.crt", "tls.crt", "tls.key"} { - if len(certSecret.Data[key]) == 0 { - return fmt.Errorf("rathole client certificate secret %s/%s is missing required key %q", td.Namespace, clientCertSecretName, key) - } - } - - caCrtB64 := base64.StdEncoding.EncodeToString(certSecret.Data["ca.crt"]) - clientCrtB64 := base64.StdEncoding.EncodeToString(certSecret.Data["tls.crt"]) - clientKeyB64 := base64.StdEncoding.EncodeToString(certSecret.Data["tls.key"]) - - var tomlBuilder strings.Builder - fmt.Fprintf(&tomlBuilder, "[client]\nremote_addr = \"%s:443\"\n\n", ratholeEndpoint) - tomlBuilder.WriteString("[client.transport]\ntype = \"tls\"\n\n") - tomlBuilder.WriteString("[client.transport.tls]\n") - fmt.Fprintf(&tomlBuilder, "hostname = \"%s\"\n", ratholeEndpoint) - tomlBuilder.WriteString("trusted_root = \"/tmp/rathole-ca.crt\"\n") - tomlBuilder.WriteString("cert = \"/tmp/rathole-client.crt\"\n") - tomlBuilder.WriteString("key = \"/tmp/rathole-client.key\"\n\n") - for _, port := range td.ExposedPorts { - if strings.ToUpper(port.Protocol) == protocolUDP { - log.G(ctx).Debugf("Skipping UDP port %d in rathole client config (TLS transport forwards TCP only)", port.Port) - continue - } - fmt.Fprintf(&tomlBuilder, "[client.services.p%d]\ntoken = \"%s\"\nlocal_addr = \"127.0.0.1:%d\"\n\n", - port.Port, td.RandomPassword, port.Port) - } - - configB64 := base64.StdEncoding.EncodeToString([]byte(tomlBuilder.String())) - - ratholeCmd := p.config.Network.RatholeCommand - if ratholeCmd == "" { - ratholeCmd = DefaultRatholeCommand - } - // Validate that the TLS command template has exactly 5 %s format verbs - // (URL, CA cert, client cert, client key, client TOML). - if strings.Count(ratholeCmd, "%s") != 5 { - return fmt.Errorf("RatholeCommand must have exactly 5 %%s format verbs (url, ca, cert, key, toml); got %d in %q", - strings.Count(ratholeCmd, "%s"), p.config.Network.RatholeCommand) - } - mainCmd = fmt.Sprintf(ratholeCmd, ratholeURL, caCrtB64, clientCrtB64, clientKeyB64, configB64) - } else { - // WebSocket fallback (no CA issuer configured) - log.G(ctx).Debugf("RatholeCAIssuerName not set; using WebSocket transport for pod %s/%s", pod.Namespace, pod.Name) - - var tomlBuilder strings.Builder - fmt.Fprintf(&tomlBuilder, "[client]\nremote_addr = \"%s:80\"\n\n", ratholeEndpoint) - tomlBuilder.WriteString("[client.transport]\ntype = \"websocket\"\n\n") - for _, port := range td.ExposedPorts { - if strings.ToUpper(port.Protocol) == protocolUDP { - log.G(ctx).Debugf("Skipping UDP port %d in rathole client config (websocket transport forwards TCP only)", port.Port) - continue - } - fmt.Fprintf(&tomlBuilder, "[client.services.p%d]\ntoken = \"%s\"\nlocal_addr = \"127.0.0.1:%d\"\n\n", - port.Port, td.RandomPassword, port.Port) - } - - configB64 := base64.StdEncoding.EncodeToString([]byte(tomlBuilder.String())) - - ratholeWSCmd := p.config.Network.RatholeWSCommand - if ratholeWSCmd == "" { - ratholeWSCmd = DefaultRatholeWSCommand - } - // Validate that the WebSocket command template has exactly 2 %s format verbs - // (URL, client TOML). - if strings.Count(ratholeWSCmd, "%s") != 2 { - return fmt.Errorf("RatholeWSCommand must have exactly 2 %%s format verbs (url, toml); got %d in %q", - strings.Count(ratholeWSCmd, "%s"), p.config.Network.RatholeWSCommand) - } - mainCmd = fmt.Sprintf(ratholeWSCmd, ratholeURL, configB64) + mainCmd, err := p.tunnelBackend.ClientCommand(ctx, *td, pod) + if err != nil { + return err } - // Remove any stale wstunnel annotation and set the rathole one + // clear stale tunnel command annotations and set the selected backend annotation delete(pod.Annotations, annWSTunnelClientCmds) - pod.Annotations[annRatholeClientCmds] = mainCmd - - default: - var rOptions []string - for _, port := range td.ExposedPorts { - if strings.ToUpper(port.Protocol) == protocolUDP { - continue - } - rOptions = append(rOptions, fmt.Sprintf("-R tcp://0.0.0.0:%d:localhost:%d", port.Port, port.Port)) - } - - wstunnelCommandTemplate := p.config.Network.WstunnelCommand - if wstunnelCommandTemplate == "" { - wstunnelCommandTemplate = DefaultWstunnelCommand - } - - log.G(ctx).Infof("Default ws tunnel command is: %s", wstunnelCommandTemplate) - - mainCmd := fmt.Sprintf( - wstunnelCommandTemplate, - td.RandomPassword, - strings.Join(rOptions, " "), - ingressEndpoint, - ) - - pod.Annotations["interlink.eu/wstunnel-client-commands"] = mainCmd - + delete(pod.Annotations, annRatholeClientCmds) + delete(pod.Annotations, annSSHClientCmds) + pod.Annotations[p.tunnelBackend.ClientAnnotationKey()] = mainCmd } // Patch the pod on Kubernetes @@ -455,7 +337,7 @@ PersistentKeepalive = %d // clearConflictingNetworkAnnotations removes generated annotations that are specific to // the opposite network mode to keep pod network bootstrap behavior uniform. -// When fullMeshEnabledForPod is true, any stale wstunnel and rathole client command annotations +// When fullMeshEnabledForPod is true, any stale tunnel client command annotations // are removed. When false, any stale WireGuard snippet annotation is removed. func clearConflictingNetworkAnnotations(pod *v1.Pod, fullMeshEnabledForPod bool) { if pod == nil || pod.Annotations == nil { @@ -465,6 +347,7 @@ func clearConflictingNetworkAnnotations(pod *v1.Pod, fullMeshEnabledForPod bool) if fullMeshEnabledForPod { delete(pod.Annotations, annWSTunnelClientCmds) delete(pod.Annotations, annRatholeClientCmds) + delete(pod.Annotations, annSSHClientCmds) return } diff --git a/pkg/virtualkubelet/mesh_annotations_test.go b/pkg/virtualkubelet/mesh_annotations_test.go index 315ed81d..597a6d83 100644 --- a/pkg/virtualkubelet/mesh_annotations_test.go +++ b/pkg/virtualkubelet/mesh_annotations_test.go @@ -45,6 +45,24 @@ func TestClearConflictingNetworkAnnotations(t *testing.T) { assert.Equal(t, "value", pod.Annotations["keep"]) }) + t.Run("full mesh also removes ssh command annotation", func(t *testing.T) { + pod := &v1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + annSSHClientCmds: "ssh-command", + annWGClientSnippet: "wireguard-snippet", + "keep": "value", + }, + }, + } + + clearConflictingNetworkAnnotations(pod, true) + + assert.NotContains(t, pod.Annotations, annSSHClientCmds) + assert.Contains(t, pod.Annotations, annWGClientSnippet) + assert.Equal(t, "value", pod.Annotations["keep"]) + }) + t.Run("non mesh removes wireguard snippet annotation", func(t *testing.T) { pod := &v1.Pod{ ObjectMeta: metav1.ObjectMeta{ diff --git a/pkg/virtualkubelet/tunnel_backend.go b/pkg/virtualkubelet/tunnel_backend.go new file mode 100644 index 00000000..3c7a1489 --- /dev/null +++ b/pkg/virtualkubelet/tunnel_backend.go @@ -0,0 +1,34 @@ +package virtualkubelet + +import ( + "context" + "fmt" + "strings" + + v1 "k8s.io/api/core/v1" + "k8s.io/client-go/dynamic" +) + +// TunnelBackend encapsulates tunnel-specific server resources, client command generation, +// template selection, and backend cleanup. +type TunnelBackend interface { + Name() string + ServerResources(ctx context.Context, td WstunnelTemplateData) error + ClientCommand(ctx context.Context, td WstunnelTemplateData, pod *v1.Pod) (string, error) + ClientAnnotationKey() string + KubernetesTemplate() (string, error) + CleanupResources(ctx context.Context, name, namespace string) error +} + +func newTunnelBackend(cfg Network, dynamicClient dynamic.Interface) (TunnelBackend, error) { + switch strings.TrimSpace(strings.ToLower(cfg.TunnelType)) { + case "", "wstunnel": + return &WstunnelBackend{cfg: cfg}, nil + case tunnelTypeRathole: + return &RatholeBackend{cfg: cfg, dynamicClient: dynamicClient}, nil + case tunnelTypeSSH: + return &SSHBackend{cfg: cfg}, nil + default: + return nil, fmt.Errorf("unsupported tunnel backend %q (supported: \"\", \"wstunnel\", \"rathole\", \"ssh\")", cfg.TunnelType) + } +} diff --git a/pkg/virtualkubelet/tunnel_backend_test.go b/pkg/virtualkubelet/tunnel_backend_test.go new file mode 100644 index 00000000..a71c9842 --- /dev/null +++ b/pkg/virtualkubelet/tunnel_backend_test.go @@ -0,0 +1,35 @@ +package virtualkubelet + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNewTunnelBackend(t *testing.T) { + tests := []struct { + name string + tunnelType string + wantType string + wantErr bool + }{ + {name: "default empty", tunnelType: "", wantType: "wstunnel"}, + {name: "wstunnel", tunnelType: "wstunnel", wantType: "wstunnel"}, + {name: "rathole", tunnelType: "rathole", wantType: tunnelTypeRathole}, + {name: "ssh", tunnelType: "ssh", wantType: tunnelTypeSSH}, + {name: "unknown", tunnelType: "invalid", wantErr: true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + backend, err := newTunnelBackend(Network{TunnelType: tt.tunnelType}, nil) + if tt.wantErr { + require.Error(t, err) + return + } + require.NoError(t, err) + assert.Equal(t, tt.wantType, backend.Name()) + }) + } +} diff --git a/pkg/virtualkubelet/virtualkubelet.go b/pkg/virtualkubelet/virtualkubelet.go index b75659e0..cdab7d87 100644 --- a/pkg/virtualkubelet/virtualkubelet.go +++ b/pkg/virtualkubelet/virtualkubelet.go @@ -76,6 +76,7 @@ const ( protocolUDP = "UDP" // tunnelTypeRathole is the TunnelType value that selects the rathole port-forwarding backend. tunnelTypeRathole = "rathole" + tunnelTypeSSH = "ssh" DefaultWstunnelCommand = "curl -L -f -k https://github.com/erebe/wstunnel/releases/download/v10.4.4/wstunnel_10.4.4_linux_amd64.tar.gz -o wstunnel.tar.gz && tar -xzvf wstunnel.tar.gz && chmod +x wstunnel && ./wstunnel client --http-upgrade-path-prefix %s %s ws://%s:80 &" // DefaultRatholeExecutableURL is the default URL to download the rathole executable zip archive. // Source: https://github.com/rathole-org/rathole (note: x86_64 musl was dropped in v0.5.0; use gnu). @@ -97,6 +98,7 @@ const ( annWgKeepaliveSeconds = "interlink.eu/wg-keepalive-seconds" // optional, default 25 annWSTunnelClientCmds = "interlink.eu/wstunnel-client-commands" annRatholeClientCmds = "interlink.eu/rathole-client-commands" + annSSHClientCmds = "interlink.eu/ssh-client-commands" annWGClientSnippet = "interlink.eu/wireguard-client-snippet" annDisableOffloadContainers = "interlink.eu/disable-offload-containers" // comma-separated container names annDisableOffloadInitContainers = "interlink.eu/disable-offload-init-containers" // comma-separated init container names @@ -163,6 +165,7 @@ type Provider struct { onNodeChangeCallback func(*v1.Node) clientSet kubernetes.Interface dynamicClient dynamic.Interface + tunnelBackend TunnelBackend clientHTTPTransport *http.Transport podIPs []string } @@ -757,6 +760,10 @@ func copyPodLabelsAndAnnotations(pod *v1.Pod) (map[string]string, map[string]str // createDummyPod creates wstunnel infrastructure from template for containers with exposed ports func (p *Provider) createDummyPod(ctx context.Context, originalPod *v1.Pod) (*v1.Pod, *WstunnelTemplateData, error) { + if err := p.ensureTunnelBackend(); err != nil { + return nil, nil, err + } + log.G(ctx).Infof("Creating wstunnel infrastructure for %s/%s with exposed ports", originalPod.Namespace, originalPod.Name) // If not exists, create the namespace for wstunnel @@ -900,13 +907,8 @@ func (p *Provider) createDummyPod(ctx context.Context, originalPod *v1.Pod) (*v1 return nil, nil, fmt.Errorf("failed to apply wstunnel manifests: %w", err) } - // For rathole TLS mode, also create the cert-manager Certificates and Traefik IngressRouteTCP. - // This is a hard requirement: if TLS resource creation fails when RatholeCAIssuerName is set, - // the annotation path will block waiting for a cert secret that will never appear. - if p.config.Network.TunnelType == tunnelTypeRathole && p.config.Network.RatholeCAIssuerName != "" { - if tlsErr := p.applyRatholeTLSResources(ctx, templateData); tlsErr != nil { - return nil, nil, fmt.Errorf("failed to apply rathole TLS resources for %s/%s: %w", originalPod.Namespace, originalPod.Name, tlsErr) - } + if err := p.tunnelBackend.ServerResources(ctx, templateData); err != nil { + return nil, nil, fmt.Errorf("failed to apply tunnel backend resources for %s/%s: %w", originalPod.Namespace, originalPod.Name, err) } log.G(ctx).Infof("Created tunnel infrastructure (%s) for %s/%s", p.config.Network.TunnelType, originalPod.Namespace, originalPod.Name) @@ -985,6 +987,10 @@ func mergeMaps(dst, src map[string]string) map[string]string { // executeWstunnelTemplate loads and executes the tunnel template (wstunnel or rathole based on configuration) func (p *Provider) executeWstunnelTemplate(ctx context.Context, data WstunnelTemplateData) (string, error) { + if err := p.ensureTunnelBackend(); err != nil { + return "", err + } + var templateContent string // Try to load from custom path first (applies to both wstunnel and rathole) @@ -999,23 +1005,19 @@ func (p *Provider) executeWstunnelTemplate(ctx context.Context, data WstunnelTem // Fall back to the built-in template for the configured tunnel type if templateContent == "" { - var ( - content []byte - err error - ) - if p.config.Network.TunnelType == tunnelTypeRathole { - content, err = defaultRatholeTemplate.ReadFile("templates/rathole-template.yaml") - if err != nil { - return "", fmt.Errorf("failed to read embedded rathole template: %w", err) - } - log.G(ctx).Info("Using built-in rathole template") + backendTemplate, err := p.tunnelBackend.KubernetesTemplate() + if err != nil { + return "", err + } + if backendTemplate != "" { + templateContent = backendTemplate } else { - content, err = defaultWstunnelTemplate.ReadFile("templates/wstunnel-template.yaml") + content, err := defaultWstunnelTemplate.ReadFile("templates/wstunnel-template.yaml") if err != nil { return "", fmt.Errorf("failed to read embedded template: %w", err) } + templateContent = string(content) } - templateContent = string(content) } // Parse and execute template @@ -1306,6 +1308,10 @@ func (p *Provider) waitForDeploymentPod(ctx context.Context, deploymentName, nam // cleanupWstunnelResources removes all tunnel resources for a given name and namespace func (p *Provider) cleanupWstunnelResources(ctx context.Context, wstunnelName, namespace string) { + if err := p.ensureTunnelBackend(); err != nil { + log.G(ctx).Warningf("Failed to initialize tunnel backend during cleanup: %v", err) + } + log.G(ctx).Infof("Cleaning up tunnel resources for %s/%s", namespace, wstunnelName) // Delete deployment @@ -1349,19 +1355,9 @@ func (p *Provider) cleanupWstunnelResources(ctx context.Context, wstunnelName, n log.G(ctx).Infof("Successfully deleted rathole configmap %s/%s", namespace, wstunnelName+"-rathole-config") } - // Delete rathole TLS resources (cert-manager Certificates and Traefik IngressRouteTCP) - if p.dynamicClient != nil { - for _, certName := range []string{wstunnelName + "-rathole-server-tls", wstunnelName + "-rathole-client-tls"} { - if delErr := p.deleteUnstructuredResource(ctx, certManagerCertGVR, certName, namespace); delErr != nil { - log.G(ctx).Warningf("Failed to delete rathole cert-manager Certificate %s/%s: %v", namespace, certName, delErr) - } else { - log.G(ctx).Infof("Deleted rathole cert-manager Certificate %s/%s", namespace, certName) - } - } - if delErr := p.deleteUnstructuredResource(ctx, traefikIngressRouteTCPGVR, wstunnelName, namespace); delErr != nil { - log.G(ctx).Warningf("Failed to delete rathole IngressRouteTCP %s/%s: %v", namespace, wstunnelName, delErr) - } else { - log.G(ctx).Infof("Deleted rathole IngressRouteTCP %s/%s", namespace, wstunnelName) + if p.tunnelBackend != nil { + if err := p.tunnelBackend.CleanupResources(ctx, wstunnelName, namespace); err != nil { + log.G(ctx).Warningf("Failed to delete backend-specific tunnel resources for %s/%s: %v", namespace, wstunnelName, err) } } } @@ -2471,6 +2467,23 @@ func CheckIfAnnotationExists(pod *v1.Pod, key string) bool { return ok } +func (p *Provider) ensureTunnelBackend() error { + if p.tunnelBackend != nil { + return nil + } + + backend, err := newTunnelBackend(p.config.Network, p.dynamicClient) + if err != nil { + return err + } + if binder, ok := backend.(interface{ bindProvider(*Provider) }); ok { + binder.bindProvider(p) + } + p.tunnelBackend = backend + + return nil +} + func (p *Provider) initClientSet(ctx context.Context) error { start := time.Now().Unix() tracer := otel.Tracer("interlink-service") @@ -2507,5 +2520,9 @@ func (p *Provider) initClientSet(ctx context.Context) error { } } + if err := p.ensureTunnelBackend(); err != nil { + return err + } + return nil }