#!/usr/bin/env python3 # Covers a real k3s deployment smoke path for vllm-serve: admin-created ordinary # user with integer GPU memory quota, tenant namespace/ResourceQuota creation, # Harbor chart deployment with the requested vLLM image, diagnostics fetch, and # cleanup of the instance and test user. import json import os import subprocess import sys import tempfile import time import uuid from dataclasses import dataclass from typing import Any from urllib.error import HTTPError, URLError from urllib.parse import quote, urljoin from urllib.request import Request, urlopen RAW_BASE_URL = os.environ.get("BASE_URL", "http://localhost:18081/api/v1").rstrip("/") BASE_URL = RAW_BASE_URL + "/" ADMIN_USER = os.environ.get("ADMIN_USER", os.environ.get("BOOTSTRAP_ADMIN_USER", "admin")) ADMIN_PASS = os.environ.get("ADMIN_PASS", os.environ.get("BOOTSTRAP_ADMIN_PASS", "")) TARGET_CLUSTER_NAME = os.environ.get("TARGET_CLUSTER_NAME", "k3s") TARGET_REGISTRY_NAME = os.environ.get("TARGET_REGISTRY_NAME", "harbor-bwgdi") CHART_REPOSITORY = os.environ.get("VLLM_CHART_REPOSITORY", "charts/vllm-serve") CHART_TAG = os.environ.get("VLLM_CHART_TAG", "0.6.0") VLLM_IMAGE = os.environ.get("VLLM_IMAGE", "harbor.bwgdi.com/library/vllm-openai:v0.17.1") MODEL_NAME = os.environ.get("VLLM_MODEL", "Qwen/Qwen2.5-0.5B-Instruct") GPU_MEM_MB = os.environ.get("GPU_MEM_MB", "10000") @dataclass class Response: status: int headers: dict[str, str] body: str json: Any def parse_json(body: str) -> Any: if not body: return None try: return json.loads(body) except json.JSONDecodeError: return None def request(method: str, path: str, token: str | None = None, payload: Any = None, timeout: int = 30) -> Response: url = path if path.startswith("http") else urljoin(BASE_URL, path.lstrip("/")) data = None headers = {"Accept": "application/json"} if payload is not None: data = json.dumps(payload).encode("utf-8") headers["Content-Type"] = "application/json" if token: headers["Authorization"] = f"Bearer {token}" req = Request(url, data=data, headers=headers, method=method) try: with urlopen(req, timeout=timeout) as res: body = res.read().decode("utf-8", errors="replace") return Response(res.status, dict(res.headers), body, parse_json(body)) except HTTPError as exc: body = exc.read().decode("utf-8", errors="replace") return Response(exc.code, dict(exc.headers), body, parse_json(body)) except URLError as exc: raise AssertionError(f"Cannot reach BASE_URL={BASE_URL}: {exc}") from exc def assert_status(resp: Response, expected: set[int], context: str) -> None: if resp.status not in expected: raise AssertionError(f"{context}: expected HTTP {sorted(expected)}, got {resp.status}. Body: {resp.body[:800]}") def login(username: str, password: str) -> str: resp = request("POST", "/auth/login", payload={"username": username, "password": password}) assert_status(resp, {200}, f"login {username}") return str(resp.json["accessToken"]) def list_items(path: str, token: str, context: str) -> list[dict[str, Any]]: resp = request("GET", path, token) assert_status(resp, {200}, context) if isinstance(resp.json, list): return [item for item in resp.json if isinstance(item, dict)] if isinstance(resp.json, dict): for key in ("items", "clusters", "registries", "instances"): if isinstance(resp.json.get(key), list): return [item for item in resp.json[key] if isinstance(item, dict)] raise AssertionError(f"{context}: expected list response. Body: {resp.body[:800]}") def find_by_name(items: list[dict[str, Any]], name: str, context: str) -> dict[str, Any]: for item in items: if item.get("name") == name: return item raise AssertionError(f"{context}: could not find {name!r}. Available: {[item.get('name') for item in items]}") def issue_kubeconfig(token: str, workspace_id: str, cluster_id: str) -> str: resp = request( "POST", f"/workspaces/{workspace_id}/kubeconfig", token, {"clusterId": cluster_id, "ttlSeconds": 7200}, ) assert_status(resp, {200}, "issue tenant kubeconfig") return str(resp.json["kubeconfig"]) def issue_current_kubeconfig(token: str) -> str: resp = request("GET", "/workspaces/credentials/kubeconfig", token) assert_status(resp, {200}, "issue current default-cluster kubeconfig") if "server:" not in resp.body or "token:" not in resp.body: raise AssertionError(f"current kubeconfig response does not look like kubeconfig YAML: {resp.body[:300]}") return resp.body def kubectl_json(kubeconfig: str, args: list[str]) -> Any: with tempfile.NamedTemporaryFile("w", delete=False) as handle: handle.write(kubeconfig) kubeconfig_path = handle.name try: proc = subprocess.run( ["kubectl", "--kubeconfig", kubeconfig_path, *args, "-o", "json"], text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=60, check=False, ) if proc.returncode != 0: raise AssertionError(f"kubectl {' '.join(args)} failed: {proc.stderr.strip()}") return json.loads(proc.stdout) finally: os.unlink(kubeconfig_path) def values_yaml() -> str: return f"""vllm: image: "{VLLM_IMAGE}" model: huggingfaceName: "{MODEL_NAME}" resources: gpuLimit: 1 gpuMem: {GPU_MEM_MB} cpuRequest: 4 memoryLimit: "12Gi" shmSize: "4Gi" replicaCount: 1 workerSize: 1 initContainers: enabled: false """ def main() -> int: if not ADMIN_PASS: raise AssertionError("ADMIN_PASS or BOOTSTRAP_ADMIN_PASS is required") suffix = uuid.uuid4().hex[:6] username = f"vllm-k3s-{suffix}" password = "VllmK3s123!" namespace = f"ocdp-u-vllm-{suffix}" release = f"ocdp-vllm-k3s-{suffix}" admin_token = login(ADMIN_USER, ADMIN_PASS) user_id = "" instance_id = "" cluster_id = "" try: clusters = list_items("/clusters", admin_token, "list clusters") cluster = find_by_name(clusters, TARGET_CLUSTER_NAME, "select target cluster") cluster_id = str(cluster["id"]) registries = list_items("/registries", admin_token, "list registries") registry = find_by_name(registries, TARGET_REGISTRY_NAME, "select target registry") registry_id = str(registry["id"]) artifacts = request( "GET", f"/registries/{registry_id}/repositories/{quote(CHART_REPOSITORY, safe='')}/artifacts?media_type=chart", admin_token, ) assert_status(artifacts, {200}, "verify vllm chart artifacts") if CHART_TAG not in artifacts.body: raise AssertionError(f"{CHART_REPOSITORY}:{CHART_TAG} was not visible in Harbor artifacts") created = request( "POST", "/users", admin_token, { "username": username, "password": password, "role": "user", "namespace": namespace, "defaultClusterId": cluster_id, "quotaCpu": "6", "quotaMemory": "16Gi", "quotaGpu": "1", "quotaGpuMemory": GPU_MEM_MB, "isActive": True, "mustChangePassword": False, }, ) assert_status(created, {201}, "create vllm smoke user") user_id = str(created.json["id"]) if str(created.json.get("quotaGpuMemory")) != GPU_MEM_MB: raise AssertionError(f"quotaGpuMemory should stay integer {GPU_MEM_MB}, got {created.json.get('quotaGpuMemory')}") user_token = login(username, password) workspaces = list_items("/workspaces", user_token, "user lists own workspace") workspace = workspaces[0] workspace_id = str(workspace["id"]) kubeconfig = issue_current_kubeconfig(user_token) quota = kubectl_json(kubeconfig, ["get", "resourcequota", "tenant-quota", "-n", namespace]) hard = quota.get("status", {}).get("hard") or quota.get("spec", {}).get("hard") or {} gpumem_hard = str(hard.get("requests.nvidia.com/gpumem", "")) if gpumem_hard not in {GPU_MEM_MB, "10k"}: raise AssertionError(f"ResourceQuota gpumem should be {GPU_MEM_MB} or Kubernetes canonical 10k, got {gpumem_hard!r}") print(f"quota gpumem={gpumem_hard}") payload = { "name": release, "namespace": namespace, "registryId": registry_id, "repository": CHART_REPOSITORY, "tag": CHART_TAG, "description": f"smoke deploy {MODEL_NAME}", "valuesYaml": values_yaml(), } created_instance = request("POST", f"/clusters/{cluster_id}/instances", user_token, payload, timeout=1200) assert_status(created_instance, {201}, "create vllm instance") instance_id = str(created_instance.json["id"]) print(f"instance={instance_id} release={release} cluster={TARGET_CLUSTER_NAME} namespace={namespace}") current = created_instance for attempt in range(1, 61): current = request("GET", f"/clusters/{cluster_id}/instances/{instance_id}", user_token) assert_status(current, {200}, "poll vllm instance") status = str(current.json.get("status")) print(f"poll={attempt} status={status}") if status == "deployed": break if status == "failed": raise AssertionError(f"vLLM instance failed: {current.body[:1200]}") time.sleep(10) else: raise AssertionError(f"vLLM instance did not reach deployed. Last: {current.body[:1200]}") diagnostics = request("GET", f"/clusters/{cluster_id}/instances/{instance_id}/diagnostics?tailLines=80", user_token, timeout=60) assert_status(diagnostics, {200}, "fetch diagnostics") pods = diagnostics.json.get("pods", []) if isinstance(diagnostics.json, dict) else [] services = diagnostics.json.get("services", []) if isinstance(diagnostics.json, dict) else [] logs = diagnostics.json.get("logs", []) if isinstance(diagnostics.json, dict) else [] print(f"diagnostics pods={len(pods)} services={len(services)} logs={len(logs)}") live_services = kubectl_json(kubeconfig, ["get", "svc", "-n", namespace]) service_names = {item.get("metadata", {}).get("name") for item in live_services.get("items", [])} if f"{release}-svc" not in service_names: raise AssertionError(f"expected service {release}-svc in tenant namespace {namespace}, got {sorted(service_names)}") live_deployments = kubectl_json(kubeconfig, ["get", "deployments", "-n", namespace]) deployment_names = {item.get("metadata", {}).get("name") for item in live_deployments.get("items", [])} if release not in deployment_names: raise AssertionError(f"expected deployment {release} in tenant namespace {namespace}, got {sorted(deployment_names)}") print(f"tenant namespace resources service={release}-svc deployment={release}") return 0 finally: if instance_id and cluster_id: cleanup = request("DELETE", f"/clusters/{cluster_id}/instances/{instance_id}", admin_token, timeout=300) print(f"cleanup instance http={cleanup.status}") if user_id: cleanup_user = request("DELETE", f"/users/{user_id}", admin_token) print(f"cleanup user http={cleanup_user.status}") if __name__ == "__main__": try: raise SystemExit(main()) except AssertionError as exc: print(f"FAIL: {exc}", file=sys.stderr) raise SystemExit(1)