Skip to content

Commit d7577f1

Browse files
committed
test(helm): Add kube gateway e2e tests
Signed-off-by: Taylor Mutch <taylormutch@gmail.com>
1 parent 084c93b commit d7577f1

8 files changed

Lines changed: 286 additions & 0 deletions

File tree

e2e/rust/e2e-helm.sh

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#!/usr/bin/env bash
2+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
# Run a Rust e2e test against a Helm-deployed OpenShell gateway. Set
6+
# OPENSHELL_E2E_KUBE_CONTEXT to target an existing cluster; otherwise an
7+
# ephemeral k3d cluster is created and torn down by with-kube-gateway.sh.
8+
9+
set -euo pipefail
10+
11+
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
12+
E2E_TEST="${OPENSHELL_E2E_KUBE_TEST:-smoke}"
13+
14+
cargo build -p openshell-cli --features openshell-core/dev-settings
15+
16+
exec "${ROOT}/e2e/with-kube-gateway.sh" \
17+
cargo test --manifest-path "${ROOT}/e2e/rust/Cargo.toml" \
18+
--features e2e \
19+
--test "${E2E_TEST}" \
20+
-- --nocapture

e2e/rust/src/harness/driver.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
//! Active compute-driver detection for tests with driver-specific assumptions.
5+
6+
/// Returns true and prints a skip notice when running against the kube driver.
7+
///
8+
/// Tests that depend on docker/podman host-network features (e.g.
9+
/// `host.openshell.internal` reachability, sibling-container test servers)
10+
/// can early-return when this is true.
11+
pub fn skip_if_kube(reason: &str) -> bool {
12+
if matches!(
13+
std::env::var("OPENSHELL_E2E_DRIVER").as_deref(),
14+
Ok("kubernetes")
15+
) {
16+
eprintln!("skipping on kubernetes driver: {reason}");
17+
return true;
18+
}
19+
false
20+
}

e2e/rust/src/harness/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
66
pub mod binary;
77
pub mod container;
8+
pub mod driver;
89
pub mod gateway;
910
pub mod output;
1011
pub mod port;

e2e/rust/tests/forward_proxy_graphql_l7.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
use std::io::Write;
1414

1515
use openshell_e2e::harness::container::ContainerHttpServer;
16+
use openshell_e2e::harness::driver::skip_if_kube;
1617
use openshell_e2e::harness::sandbox::SandboxGuard;
1718
use tempfile::NamedTempFile;
1819

@@ -131,6 +132,9 @@ network_policies:
131132
#[tokio::test]
132133
#[allow(clippy::too_many_lines)]
133134
async fn graphql_l7_enforces_allow_and_deny_rules_on_forward_and_connect_paths() {
135+
if skip_if_kube("uses host.openshell.internal to reach a sibling container") {
136+
return;
137+
}
134138
let server = start_test_server().await.expect("start test server");
135139
let policy = write_graphql_policy(&server.host, server.port).expect("write custom policy");
136140
let policy_path = policy

e2e/rust/tests/forward_proxy_l7_bypass.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
use std::io::Write;
1212

1313
use openshell_e2e::harness::container::ContainerHttpServer;
14+
use openshell_e2e::harness::driver::skip_if_kube;
1415
use openshell_e2e::harness::sandbox::SandboxGuard;
1516
use tempfile::NamedTempFile;
1617

@@ -98,6 +99,9 @@ network_policies:
9899
/// GET /allowed should succeed — the L7 policy explicitly allows it.
99100
#[tokio::test]
100101
async fn forward_proxy_allows_l7_permitted_request() {
102+
if skip_if_kube("uses host.openshell.internal to reach a sibling container") {
103+
return;
104+
}
101105
let server = start_test_server().await.expect("start test server");
102106
let policy =
103107
write_policy_with_l7_rules(&server.host, server.port).expect("write custom policy");
@@ -138,6 +142,9 @@ except Exception as e:
138142
/// POST /allowed should be denied — the L7 policy only allows GET.
139143
#[tokio::test]
140144
async fn forward_proxy_denies_l7_blocked_request() {
145+
if skip_if_kube("uses host.openshell.internal to reach a sibling container") {
146+
return;
147+
}
141148
let server = start_test_server().await.expect("start test server");
142149
let policy =
143150
write_policy_with_l7_rules(&server.host, server.port).expect("write custom policy");

e2e/rust/tests/host_gateway_alias.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use std::process::Stdio;
88
use std::sync::Mutex;
99

1010
use openshell_e2e::harness::binary::openshell_cmd;
11+
use openshell_e2e::harness::driver::skip_if_kube;
1112
use openshell_e2e::harness::sandbox::SandboxGuard;
1213
use tempfile::NamedTempFile;
1314
use tokio::io::AsyncReadExt;
@@ -190,6 +191,9 @@ network_policies:
190191

191192
#[tokio::test]
192193
async fn sandbox_reaches_host_openshell_internal_via_host_gateway_alias() {
194+
if skip_if_kube("requires host.openshell.internal alias") {
195+
return;
196+
}
193197
let server = HostServer::start(r#"{"message":"hello-from-host"}"#)
194198
.await
195199
.expect("start host echo server");
@@ -225,6 +229,9 @@ async fn sandbox_reaches_host_openshell_internal_via_host_gateway_alias() {
225229

226230
#[tokio::test]
227231
async fn sandbox_inference_local_routes_to_host_openshell_internal() {
232+
if skip_if_kube("requires host.openshell.internal alias") {
233+
return;
234+
}
228235
let _inference_lock = INFERENCE_ROUTE_LOCK
229236
.lock()
230237
.unwrap_or_else(std::sync::PoisonError::into_inner);
@@ -301,6 +308,9 @@ async fn sandbox_inference_local_routes_to_host_openshell_internal() {
301308

302309
#[tokio::test]
303310
async fn inference_set_supports_no_verify_for_unreachable_endpoint() {
311+
if skip_if_kube("uses host.openshell.internal as the unreachable target") {
312+
return;
313+
}
304314
let _inference_lock = INFERENCE_ROUTE_LOCK
305315
.lock()
306316
.unwrap_or_else(std::sync::PoisonError::into_inner);

e2e/with-kube-gateway.sh

Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
#!/usr/bin/env bash
2+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
# Run an e2e command against a Helm-deployed OpenShell gateway in Kubernetes.
6+
#
7+
# Modes:
8+
# - OPENSHELL_E2E_KUBE_CONTEXT set:
9+
# Target the named kubectl context, install the chart into an ephemeral
10+
# namespace, and port-forward the gateway. Cluster lifecycle is the
11+
# caller's responsibility (e.g. CI provisions kind via helm/kind-action).
12+
# - OPENSHELL_E2E_KUBE_CONTEXT unset:
13+
# Create a local k3d cluster via tasks/scripts/helm-k3s-local.sh, install
14+
# the chart, port-forward, and tear the cluster down on exit.
15+
#
16+
# Helm e2e currently uses plaintext gateway traffic (ci/values-tls-disabled.yaml).
17+
#
18+
# Image source: helm install pulls from ${OPENSHELL_REGISTRY}/{gateway,supervisor}:${IMAGE_TAG}
19+
# (defaults: ghcr.io/nvidia/openshell, latest). CI sets IMAGE_TAG to the commit SHA;
20+
# local devs should set it to a tag pulled from a registry the cluster can reach,
21+
# or build and import images via a separate bootstrap step before running this script.
22+
23+
set -euo pipefail
24+
25+
if [ "$#" -eq 0 ]; then
26+
echo "Usage: e2e/with-kube-gateway.sh <command> [args...]" >&2
27+
exit 2
28+
fi
29+
30+
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
31+
# shellcheck source=e2e/support/gateway-common.sh
32+
source "${ROOT}/e2e/support/gateway-common.sh"
33+
34+
WORKDIR_PARENT="${TMPDIR:-/tmp}"
35+
WORKDIR_PARENT="${WORKDIR_PARENT%/}"
36+
WORKDIR="$(mktemp -d "${WORKDIR_PARENT}/openshell-e2e-kube.XXXXXX")"
37+
38+
CLUSTER_CREATED_BY_US=0
39+
CLUSTER_NAME=""
40+
KUBE_CONTEXT=""
41+
NAMESPACE="openshell"
42+
RELEASE_NAME="openshell"
43+
PORTFORWARD_PID=""
44+
PORTFORWARD_LOG="${WORKDIR}/portforward.log"
45+
HELM_INSTALLED=0
46+
47+
# Isolate CLI/SDK gateway metadata from the developer's real config.
48+
export XDG_CONFIG_HOME="${WORKDIR}/config"
49+
export XDG_DATA_HOME="${WORKDIR}/data"
50+
51+
kctl() {
52+
kubectl --context "${KUBE_CONTEXT}" "$@"
53+
}
54+
55+
helmctl() {
56+
helm --kube-context "${KUBE_CONTEXT}" "$@"
57+
}
58+
59+
cleanup() {
60+
local exit_code=$?
61+
62+
if [ -n "${PORTFORWARD_PID}" ]; then
63+
kill "${PORTFORWARD_PID}" >/dev/null 2>&1 || true
64+
wait "${PORTFORWARD_PID}" >/dev/null 2>&1 || true
65+
fi
66+
67+
if [ "${exit_code}" -ne 0 ] && [ -n "${KUBE_CONTEXT}" ] && [ -n "${NAMESPACE}" ]; then
68+
if command -v kubectl >/dev/null 2>&1 \
69+
&& kctl get namespace "${NAMESPACE}" >/dev/null 2>&1; then
70+
echo "=== gateway pod state (preserved for debugging) ==="
71+
kctl -n "${NAMESPACE}" get pods -o wide 2>&1 || true
72+
echo "=== gateway events ==="
73+
kctl -n "${NAMESPACE}" get events --sort-by=.lastTimestamp 2>&1 \
74+
| tail -n 80 || true
75+
echo "=== gateway logs (last 200 lines) ==="
76+
kctl -n "${NAMESPACE}" logs \
77+
-l "app.kubernetes.io/instance=${RELEASE_NAME}" --tail=200 \
78+
--all-containers --prefix 2>&1 || true
79+
echo "=== end gateway debug output ==="
80+
fi
81+
if [ -f "${PORTFORWARD_LOG}" ]; then
82+
echo "=== port-forward log ==="
83+
cat "${PORTFORWARD_LOG}" || true
84+
echo "=== end port-forward log ==="
85+
fi
86+
fi
87+
88+
if [ "${HELM_INSTALLED}" = "1" ] && [ -n "${KUBE_CONTEXT}" ] && [ -n "${NAMESPACE}" ]; then
89+
if command -v helm >/dev/null 2>&1; then
90+
helmctl uninstall "${RELEASE_NAME}" --namespace "${NAMESPACE}" --wait \
91+
--timeout 60s >/dev/null 2>&1 || true
92+
fi
93+
if command -v kubectl >/dev/null 2>&1; then
94+
kctl delete namespace "${NAMESPACE}" --wait=false \
95+
--ignore-not-found >/dev/null 2>&1 || true
96+
fi
97+
fi
98+
99+
if [ "${CLUSTER_CREATED_BY_US}" = "1" ] && [ -n "${CLUSTER_NAME}" ]; then
100+
if command -v k3d >/dev/null 2>&1 && k3d cluster list "${CLUSTER_NAME}" \
101+
>/dev/null 2>&1; then
102+
echo "Deleting ephemeral k3d cluster ${CLUSTER_NAME}..."
103+
k3d cluster delete "${CLUSTER_NAME}" >/dev/null 2>&1 || true
104+
fi
105+
fi
106+
107+
rm -rf "${WORKDIR}" 2>/dev/null || true
108+
}
109+
trap cleanup EXIT
110+
111+
require_cmd() {
112+
if ! command -v "$1" >/dev/null 2>&1; then
113+
echo "ERROR: $1 is required to run Helm-backed e2e tests" >&2
114+
exit 2
115+
fi
116+
}
117+
118+
require_cmd helm
119+
require_cmd kubectl
120+
require_cmd curl
121+
122+
if [ -n "${OPENSHELL_E2E_KUBE_CONTEXT:-}" ]; then
123+
KUBE_CONTEXT="${OPENSHELL_E2E_KUBE_CONTEXT}"
124+
echo "Using existing kubectl context: ${KUBE_CONTEXT}"
125+
if ! kctl cluster-info >/dev/null 2>&1; then
126+
echo "ERROR: kubectl context '${KUBE_CONTEXT}' is not reachable." >&2
127+
exit 2
128+
fi
129+
else
130+
require_cmd k3d
131+
CLUSTER_NAME="oshe2e-$$-$(date +%s | tail -c 8)"
132+
echo "Creating ephemeral k3d cluster ${CLUSTER_NAME}..."
133+
HELM_K3S_CLUSTER_NAME="${CLUSTER_NAME}" \
134+
HELM_K3S_KUBECONFIG="${WORKDIR}/kubeconfig" \
135+
bash "${ROOT}/tasks/scripts/helm-k3s-local.sh" create
136+
CLUSTER_CREATED_BY_US=1
137+
export KUBECONFIG="${WORKDIR}/kubeconfig"
138+
KUBE_CONTEXT="k3d-${CLUSTER_NAME}"
139+
fi
140+
141+
IMAGE_TAG_VALUE="${IMAGE_TAG:-latest}"
142+
REGISTRY_VALUE="${OPENSHELL_REGISTRY:-ghcr.io/nvidia/openshell}"
143+
REGISTRY_VALUE="${REGISTRY_VALUE%/}"
144+
145+
# When this script created the cluster, import locally-available gateway and
146+
# supervisor images so devs without a registry login can iterate. Best-effort:
147+
# missing images fall through to the cluster's pull behavior at install time.
148+
if [ "${CLUSTER_CREATED_BY_US}" = "1" ]; then
149+
for image in \
150+
"${REGISTRY_VALUE}/gateway:${IMAGE_TAG_VALUE}" \
151+
"${REGISTRY_VALUE}/supervisor:${IMAGE_TAG_VALUE}"; do
152+
if docker image inspect "${image}" >/dev/null 2>&1; then
153+
echo "Importing ${image} into k3d cluster ${CLUSTER_NAME}..."
154+
k3d image import "${image}" --cluster "${CLUSTER_NAME}" \
155+
--mode direct >/dev/null
156+
fi
157+
done
158+
fi
159+
160+
# The Kubernetes compute driver creates and watches Sandbox CRs reconciled
161+
# by the upstream agent-sandbox-controller. Without the CRD + controller,
162+
# every gateway K8s call 404s and CreateSandbox never produces a Pod.
163+
echo "Installing agent-sandbox CRDs and controller..."
164+
kctl apply -f "${ROOT}/deploy/kube/manifests/agent-sandbox.yaml"
165+
kctl wait --for=condition=Established crd/sandboxes.agents.x-k8s.io --timeout=120s
166+
kctl -n agent-sandbox-system rollout status statefulset/agent-sandbox-controller --timeout=300s
167+
168+
echo "Installing Helm chart (release=${RELEASE_NAME}, namespace=${NAMESPACE}, tag=${IMAGE_TAG_VALUE})..."
169+
helmctl install "${RELEASE_NAME}" "${ROOT}/deploy/helm/openshell" \
170+
--namespace "${NAMESPACE}" --create-namespace \
171+
--values "${ROOT}/deploy/helm/openshell/ci/values-tls-disabled.yaml" \
172+
--set "fullnameOverride=openshell" \
173+
--set "image.repository=${REGISTRY_VALUE}/gateway" \
174+
--set "image.tag=${IMAGE_TAG_VALUE}" \
175+
--set "supervisor.image.repository=${REGISTRY_VALUE}/supervisor" \
176+
--set "supervisor.image.tag=${IMAGE_TAG_VALUE}" \
177+
--wait --timeout 5m
178+
HELM_INSTALLED=1
179+
180+
LOCAL_PORT="$(e2e_pick_port)"
181+
echo "Starting kubectl port-forward svc/openshell ${LOCAL_PORT}:8080..."
182+
kctl -n "${NAMESPACE}" port-forward "svc/openshell" \
183+
"${LOCAL_PORT}:8080" >"${PORTFORWARD_LOG}" 2>&1 &
184+
PORTFORWARD_PID=$!
185+
186+
elapsed=0
187+
timeout=30
188+
while [ "${elapsed}" -lt "${timeout}" ]; do
189+
if ! kill -0 "${PORTFORWARD_PID}" 2>/dev/null; then
190+
echo "ERROR: kubectl port-forward exited before becoming reachable" >&2
191+
cat "${PORTFORWARD_LOG}" >&2 || true
192+
exit 1
193+
fi
194+
if curl -s -o /dev/null --connect-timeout 1 "http://127.0.0.1:${LOCAL_PORT}"; then
195+
break
196+
fi
197+
sleep 1
198+
elapsed=$((elapsed + 1))
199+
done
200+
if [ "${elapsed}" -ge "${timeout}" ]; then
201+
echo "ERROR: port-forward did not accept TCP within ${timeout}s" >&2
202+
cat "${PORTFORWARD_LOG}" >&2 || true
203+
exit 1
204+
fi
205+
206+
GATEWAY_NAME="openshell-e2e-kube-${LOCAL_PORT}"
207+
GATEWAY_ENDPOINT="http://127.0.0.1:${LOCAL_PORT}"
208+
e2e_register_plaintext_gateway \
209+
"${XDG_CONFIG_HOME}" \
210+
"${GATEWAY_NAME}" \
211+
"${GATEWAY_ENDPOINT}" \
212+
"${LOCAL_PORT}"
213+
214+
export OPENSHELL_GATEWAY="${GATEWAY_NAME}"
215+
export OPENSHELL_E2E_DRIVER="kubernetes"
216+
export OPENSHELL_E2E_SANDBOX_NAMESPACE="${NAMESPACE}"
217+
export OPENSHELL_PROVISION_TIMEOUT="${OPENSHELL_PROVISION_TIMEOUT:-300}"
218+
219+
echo "Running e2e command against ${GATEWAY_ENDPOINT}: $*"
220+
"$@"

tasks/test.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,10 @@ run = "e2e/with-docker-gateway.sh uv run pytest -o python_files='test_*.py' -m g
5050
description = "Run Rust CLI e2e tests against a Podman-backed gateway"
5151
run = "e2e/rust/e2e-podman.sh"
5252

53+
["e2e:helm"]
54+
description = "Run smoke e2e against a Helm-deployed gateway (set OPENSHELL_E2E_KUBE_CONTEXT to reuse a cluster, otherwise creates a local k3d cluster)"
55+
run = "e2e/rust/e2e-helm.sh"
56+
5357
["e2e:vm"]
5458
description = "Start openshell-gateway with the VM compute driver and run the cluster-agnostic smoke e2e"
5559
run = "e2e/rust/e2e-vm.sh"

0 commit comments

Comments
 (0)