refactor: full-stack restructure with multi-tenancy, workspace management, and K8s diagnostics

- Add Workspace domain (entity, repository, service, handler, DTO)
- Add multi-tenant K8s client with tenant binding and quota management
- Add K8s diagnostics client (instance diagnostics)
- Add authorization middleware (authz package)
- Restructure frontend to feature-based architecture (features/)
- Add User Management page in configuration
- Add AccessDenied page and route guards
- Refactor shared components (form inputs, layout, UI)
- Update Tailwind config for new design system
- Add comprehensive documentation (docs/, tasks/, plans)
- Improve cluster service with better kubeconfig handling
- Add tests for crypto, config, helm client, tenant binding
This commit is contained in:
Ivan087
2026-05-12 16:15:14 +08:00
parent c5e51ed069
commit 7f238a3168
172 changed files with 15703 additions and 3162 deletions

View File

@ -0,0 +1,77 @@
#!/usr/bin/env python3
# Covers Harbor Helm chart defaults retrieval: authenticate as admin, locate an
# available registry, fetch charts/nginx:22.1.1 values-yaml, and assert the
# response is the top-level chart values.yaml rather than a dependency values file.
import json
import os
import sys
from urllib.error import HTTPError, URLError
from urllib.parse import quote
from urllib.request import Request, urlopen
BASE_URL = os.environ.get("BASE_URL", "http://localhost:18081/api/v1").rstrip("/")
ADMIN_USER = os.environ.get("ADMIN_USER", os.environ.get("BOOTSTRAP_ADMIN_USER", "admin"))
ADMIN_PASS = os.environ.get("ADMIN_PASS", os.environ.get("BOOTSTRAP_ADMIN_PASS", ""))
def request(method: str, path: str, token: str | None = None, payload=None):
headers = {"Accept": "application/json"}
data = None
if payload is not None:
headers["Content-Type"] = "application/json"
data = json.dumps(payload).encode("utf-8")
if token:
headers["Authorization"] = f"Bearer {token}"
req = Request(BASE_URL + path, data=data, headers=headers, method=method)
try:
with urlopen(req, timeout=60) as res:
body = res.read().decode("utf-8", errors="replace")
return res.status, json.loads(body) if body else None, body
except HTTPError as exc:
body = exc.read().decode("utf-8", errors="replace")
try:
parsed = json.loads(body) if body else None
except json.JSONDecodeError:
parsed = None
return exc.code, parsed, body
except URLError as exc:
print(f"FAIL: cannot reach {BASE_URL}: {exc}", file=sys.stderr)
return 1, None, str(exc)
def main() -> int:
if not ADMIN_PASS:
print("SKIP: ADMIN_PASS or BOOTSTRAP_ADMIN_PASS is required")
return 77
status, auth, body = request("POST", "/auth/login", payload={"username": ADMIN_USER, "password": ADMIN_PASS})
if status != 200 or not isinstance(auth, dict):
print(f"FAIL: admin login failed HTTP {status}: {body[:300]}", file=sys.stderr)
return 1
token = auth["accessToken"]
status, registries, body = request("GET", "/registries", token)
if status != 200 or not isinstance(registries, list) or not registries:
print(f"SKIP: no registries available for values-yaml contract: HTTP {status}")
return 77
registry_id = registries[0]["id"]
repository = "charts/nginx"
reference = "22.1.1"
path = f"/registries/{registry_id}/repositories/{quote(repository, safe='')}/artifacts/{quote(reference, safe='')}/values-yaml"
status, response, body = request("GET", path, token)
if status != 200 or not isinstance(response, dict):
print(f"FAIL: values-yaml returned HTTP {status}: {body[:300]}", file=sys.stderr)
return 1
values_yaml = response.get("valuesYaml", "")
if len(values_yaml) < 10_000 or "global:" not in values_yaml or "image:" not in values_yaml or "service:" not in values_yaml:
print("FAIL: values-yaml did not look like the top-level nginx chart values.yaml", file=sys.stderr)
return 1
if "exampleValue: common-chart" in values_yaml[:1000]:
print("FAIL: values-yaml returned dependency chart values instead of top-level chart values", file=sys.stderr)
return 1
print("PASS: chart values-yaml API contract")
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@ -0,0 +1,104 @@
#!/usr/bin/env bash
# Covers the current OCDP workflow: login, registry health, Harbor chart repository browsing,
# chart artifact listing, optional values schema fetch, and optional nginx deployment cleanup.
set -euo pipefail
BASE_URL="${BASE_URL:-http://localhost:18081/api/v1}"
ADMIN_USER="${ADMIN_USER:-${BOOTSTRAP_ADMIN_USER:-admin}}"
ADMIN_PASS="${ADMIN_PASS:-${BOOTSTRAP_ADMIN_PASS:-}}"
RUN_DEPLOY_TEST="${RUN_DEPLOY_TEST:-false}"
TEST_NAMESPACE="${TEST_NAMESPACE:-ocdp-smoke}"
TEST_RELEASE="${TEST_RELEASE:-ocdp-smoke-nginx}"
TEST_REPOSITORY_CONTAINS="${TEST_REPOSITORY_CONTAINS:-}"
json_get() {
python3 -c 'import json,sys; data=json.load(sys.stdin); cur=data
for part in sys.argv[1].split("."):
cur = cur[int(part)] if isinstance(cur, list) else cur.get(part)
print("" if cur is None else cur)' "$1"
}
urlencode() {
python3 -c 'import urllib.parse,sys; print(urllib.parse.quote(sys.argv[1], safe=""))' "$1"
}
echo "==> Health"
curl -fsS "${BASE_URL%/api/v1}/health" >/dev/null
if [[ -z "$ADMIN_PASS" ]]; then
echo "ADMIN_PASS or BOOTSTRAP_ADMIN_PASS must be set for smoke tests" >&2
exit 1
fi
echo "==> Login"
login_payload=$(printf '{"username":"%s","password":"%s"}' "$ADMIN_USER" "$ADMIN_PASS")
login_response=$(curl -fsS -H 'Content-Type: application/json' -d "$login_payload" "$BASE_URL/auth/login")
token=$(printf '%s' "$login_response" | json_get "accessToken")
auth_header="Authorization: Bearer $token"
echo "==> Registries"
registries=$(curl -fsS -H "$auth_header" "$BASE_URL/registries")
registry_id=$(printf '%s' "$registries" | json_get "0.id")
test -n "$registry_id"
curl -fsS -H "$auth_header" "$BASE_URL/registries/$registry_id/health" >/dev/null
echo "==> Chart repositories"
repos=$(curl -fsS -H "$auth_header" "$BASE_URL/registries/$registry_id/repositories?artifact_type=chart")
if [[ -n "$TEST_REPOSITORY_CONTAINS" ]]; then
repo_name=$(printf '%s' "$repos" | python3 -c 'import json,os,sys
needle=os.environ["TEST_REPOSITORY_CONTAINS"].lower()
for repo in json.load(sys.stdin).get("repositories", []):
if needle in repo.lower():
print(repo)
break')
else
repo_name=$(printf '%s' "$repos" | json_get "repositories.0")
fi
test -n "$repo_name"
encoded_repo=$(urlencode "$repo_name")
echo "==> Chart artifacts"
artifacts=$(curl -fsS -H "$auth_header" "$BASE_URL/registries/$registry_id/repositories/$encoded_repo/artifacts?media_type=chart")
tag=$(printf '%s' "$artifacts" | json_get "0.tag")
test -n "$tag"
encoded_tag=$(urlencode "$tag")
curl -fsS -H "$auth_header" "$BASE_URL/registries/$registry_id/repositories/$encoded_repo/artifacts/$encoded_tag/values-schema" >/dev/null || true
if [[ "$RUN_DEPLOY_TEST" != "true" ]]; then
echo "==> Smoke passed without deployment. Set RUN_DEPLOY_TEST=true to create and cleanup a release."
exit 0
fi
echo "==> Clusters"
clusters=$(curl -fsS -H "$auth_header" "$BASE_URL/clusters")
cluster_id=$(printf '%s' "$clusters" | json_get "0.id")
test -n "$cluster_id"
echo "==> Deploy test release"
deploy_payload=$(TEST_RELEASE="$TEST_RELEASE" TEST_NAMESPACE="$TEST_NAMESPACE" REGISTRY_ID="$registry_id" REPOSITORY="$repo_name" TAG="$tag" python3 -c 'import json, os
print(json.dumps({
"name": os.environ["TEST_RELEASE"],
"namespace": os.environ["TEST_NAMESPACE"],
"registryId": os.environ["REGISTRY_ID"],
"repository": os.environ["REPOSITORY"],
"tag": os.environ["TAG"],
"valuesYaml": "replicaCount: 1\n",
}))')
instance=$(curl -fsS -H "$auth_header" -H 'Content-Type: application/json' -d "$deploy_payload" "$BASE_URL/clusters/$cluster_id/instances")
instance_id=$(printf '%s' "$instance" | json_get "id")
test -n "$instance_id"
echo "==> Poll instance status"
for _ in $(seq 1 60); do
current=$(curl -fsS -H "$auth_header" "$BASE_URL/clusters/$cluster_id/instances/$instance_id")
status=$(printf '%s' "$current" | json_get "status")
echo "status=$status"
[[ "$status" == "deployed" ]] && break
[[ "$status" == "failed" ]] && { printf '%s\n' "$current"; exit 1; }
sleep 5
done
echo "==> Cleanup"
curl -fsS -X DELETE -H "$auth_header" "$BASE_URL/clusters/$cluster_id/instances/$instance_id" >/dev/null || true
echo "==> Deploy smoke completed"

View File

@ -0,0 +1,61 @@
# Scope: isolated runtime verification for backend API + PostgreSQL + .env bootstrap without using project fixed container names.
services:
postgres:
image: postgres:17-alpine
container_name: ocdp-verify-postgres
restart: "no"
environment:
POSTGRES_DB: ${POSTGRES_DB:-ocdp}
POSTGRES_USER: ${POSTGRES_USER:-postgres}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres}
POSTGRES_INITDB_ARGS: "--encoding=UTF8 --lc-collate=C --lc-ctype=C"
ports:
- "${VERIFY_POSTGRES_PORT:-15432}:5432"
volumes:
- ocdp_verify_postgres_data:/var/lib/postgresql/data
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-postgres} -d ${POSTGRES_DB:-ocdp}"]
interval: 5s
timeout: 5s
retries: 30
start_period: 180s
networks:
- ocdp-verify-network
backend:
image: golang:1.24-alpine
container_name: ocdp-verify-backend
restart: "no"
working_dir: /src
env_file:
- path: ../.env
required: false
environment:
ADAPTER_MODE: production
PORT: 8080
JWT_SECRET: ${JWT_SECRET:-verify-jwt-secret}
ENCRYPTION_KEY: ${ENCRYPTION_KEY:-verify-encryption-key}
DATABASE_URL: postgresql://${POSTGRES_USER:-postgres}:${POSTGRES_PASSWORD:-postgres}@postgres:5432/${POSTGRES_DB:-ocdp}?sslmode=disable
ports:
- "${VERIFY_BACKEND_PORT:-18081}:8080"
volumes:
- ../backend:/src
command: >
sh -c "
apk add --no-cache git &&
go run cmd/api/main.go
"
depends_on:
postgres:
condition: service_healthy
networks:
- ocdp-verify-network
networks:
ocdp-verify-network:
name: ocdp-verify-network
volumes:
ocdp_verify_postgres_data:
name: ocdp-verify-postgres-data

View File

@ -0,0 +1,176 @@
#!/usr/bin/env python3
# Covers frontend button/API interaction audit: auth, navigation, config modals,
# registry/cluster health buttons, chart launch modes, copy action, instances, and mobile overflow.
import os
from playwright.sync_api import expect, sync_playwright
BASE_URL = os.environ.get("FRONTEND_URL", "http://localhost:18080")
ADMIN_USER = os.environ.get("ADMIN_USER", "admin")
ADMIN_PASS = os.environ["ADMIN_PASS"]
def login(page):
page.goto(BASE_URL, wait_until="networkidle")
if page.locator("input[type='password']").count() == 0:
return
page.locator("input:not([type='password'])").first.fill(ADMIN_USER)
page.locator("input[type='password']").first.fill(ADMIN_PASS)
page.get_by_role("button", name="Login").last.click()
page.wait_for_url("**/home", timeout=15000)
page.wait_for_load_state("networkidle")
def click_nav(page, name, index=0):
button = page.get_by_role("button", name=name).nth(index)
try:
button.click(timeout=5000)
except Exception:
button.evaluate("element => element.click()")
page.wait_for_load_state("networkidle")
page.wait_for_timeout(400)
def click_if_present(locator, timeout=2500):
if locator.count() == 0:
return False
locator.first.click(timeout=timeout)
return True
def collect_console_error(errors, msg):
text = msg.text
ignored = [
"Failed to load resource: the server responded with a status of 404",
"[LaunchModal] Failed to load values schema",
"No chart repositories found in this registry",
]
if msg.type == "error" and not any(item in text for item in ignored):
errors.append(text)
def assert_no_overlay_overflow(page):
overflow = page.evaluate(
"document.documentElement.scrollWidth > document.documentElement.clientWidth + 2"
)
assert not overflow, "page has horizontal overflow"
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
context = browser.new_context(
viewport={"width": 1440, "height": 950},
permissions=["clipboard-read", "clipboard-write"],
)
page = context.new_page()
errors = []
page.on("pageerror", lambda exc: errors.append(str(exc)))
page.on("console", lambda msg: collect_console_error(errors, msg))
page.on("dialog", lambda dialog: dialog.dismiss())
login(page)
for label, nav_index, assertion in [
("Home", 0, "Operations Workbench"),
("Clusters", 0, "Configuration - Clusters"),
("Registries", 0, "Configuration - Registries"),
("Cluster Monitoring", 0, "Cluster"),
("Launch Instance", 0, "Chart Browser"),
("Instances", 0, "Instance"),
]:
click_nav(page, label, nav_index)
expect(page.locator("body")).to_contain_text(assertion, timeout=15000)
assert_no_overlay_overflow(page)
click_nav(page, "Registries", 0)
page.get_by_role("button", name="Refresh").first.click()
page.wait_for_load_state("networkidle")
page.get_by_role("button", name="Add Registry").click()
expect(page.get_by_text("Add Registry Configuration")).to_be_visible(timeout=5000)
page.get_by_role("button", name="Cancel").click()
if page.locator("button[title='Edit']").count() > 0:
page.locator("button[title='Edit']").first.click()
expect(page.get_by_text("Edit Registry Configuration")).to_be_visible(timeout=5000)
if page.get_by_role("button", name="Test Connection").count() > 0:
page.get_by_role("button", name="Test Connection").click()
page.wait_for_timeout(800)
page.get_by_role("button", name="Cancel").click()
if page.locator("button[title='Delete']").count() > 0:
page.locator("button[title='Delete']").first.click()
page.wait_for_timeout(300)
click_nav(page, "Clusters", 0)
page.get_by_role("button", name="Refresh").first.click()
page.wait_for_load_state("networkidle")
page.get_by_role("button", name="Add Cluster").click()
expect(page.get_by_text("Add Cluster Configuration")).to_be_visible(timeout=5000)
page.get_by_role("button", name="Cancel").click()
if page.locator("button[title='Test Connection']").count() > 0:
page.locator("button[title='Test Connection']").first.click()
page.wait_for_timeout(800)
if page.locator("button[title='Edit']").count() > 0:
page.locator("button[title='Edit']").first.click()
expect(page.get_by_text("Edit Cluster Configuration")).to_be_visible(timeout=5000)
page.get_by_role("button", name="Cancel").click()
if page.locator("button[title='Delete']").count() > 0:
page.locator("button[title='Delete']").first.click()
page.wait_for_timeout(300)
click_nav(page, "Launch Instance")
expect(page.get_by_text("Chart Browser")).to_be_visible(timeout=15000)
page.get_by_role("button", name="Refresh").first.click()
page.wait_for_load_state("networkidle")
if page.get_by_role("button", name="All tags").count() > 0:
page.get_by_role("button", name="All tags").click()
page.wait_for_load_state("networkidle")
page.get_by_role("button", name="Charts", exact=True).click()
page.wait_for_load_state("networkidle")
if page.get_by_role("button", name="Copy").count() > 0:
page.get_by_role("button", name="Copy").first.click()
page.wait_for_timeout(300)
if page.get_by_role("button", name="Launch").count() > 0:
page.get_by_role("button", name="Launch").first.click()
if page.get_by_role("button", name="Cancel").count() > 0:
expect(page.get_by_role("heading", name="Launch Instance")).to_be_visible(timeout=10000)
expect(page.get_by_role("button", name="Quick")).to_be_visible()
page.get_by_role("button", name="YAML").click()
page.locator("textarea").last.fill("replicaCount: 1\n")
if page.get_by_role("button", name="Guided").is_enabled():
page.get_by_role("button", name="Guided").click()
page.get_by_role("button", name="Cancel").click()
click_nav(page, "Instances", 0)
page.get_by_role("button", name="Refresh").first.click()
page.wait_for_load_state("networkidle")
if page.get_by_role("button", name="Entries").count() > 0:
page.get_by_role("button", name="Entries").first.click()
expect(page.locator("body")).to_contain_text("Entries", timeout=5000)
click_if_present(page.get_by_role("button", name="Close"))
if page.get_by_role("button", name="Modify").count() > 0:
page.get_by_role("button", name="Modify").first.click()
expect(page.locator("body")).to_contain_text("Modify Instance", timeout=5000)
if page.get_by_role("button", name="YAML").count() > 0:
page.get_by_role("button", name="YAML").click()
page.get_by_role("button", name="Cancel").click()
if page.get_by_role("button", name="Delete", exact=True).count() > 0:
delete_button = page.get_by_role("button", name="Delete", exact=True).first
box = delete_button.bounding_box()
viewport = page.viewport_size or {"width": 1440, "height": 950}
assert box and box["x"] >= 0 and box["x"] + box["width"] <= viewport["width"], "Delete instance button overflows horizontally"
delete_button.click()
page.wait_for_timeout(300)
mobile = browser.new_page(viewport={"width": 390, "height": 844}, is_mobile=True)
mobile.on("pageerror", lambda exc: errors.append(str(exc)))
mobile.on("console", lambda msg: collect_console_error(errors, msg))
login(mobile)
click_nav(mobile, "Launch Instance")
expect(mobile.get_by_text("Chart Browser")).to_be_visible(timeout=15000)
assert_no_overlay_overflow(mobile)
browser.close()
if errors:
raise AssertionError("\n".join(errors[:12]))

View File

@ -0,0 +1,96 @@
#!/usr/bin/env python3
# Covers frontend smoke flows: login, chart browser rendering, launch modal,
# registry/cluster configuration pages, instances page, and mobile layout sanity.
import os
from pathlib import Path
from playwright.sync_api import expect, sync_playwright
BASE_URL = os.environ.get("FRONTEND_URL", "http://localhost:18080")
ADMIN_USER = os.environ.get("ADMIN_USER", "admin")
ADMIN_PASS = os.environ["ADMIN_PASS"]
def login(page):
page.goto(BASE_URL, wait_until="networkidle")
if page.locator("input[type='password']").count() == 0:
return
text_inputs = page.locator("input:not([type='password'])")
text_inputs.first.fill(ADMIN_USER)
page.locator("input[type='password']").first.fill(ADMIN_PASS)
page.get_by_role("button").filter(has_text="Login").last.click()
page.wait_for_url("**/home", timeout=15000)
page.wait_for_load_state("networkidle")
expect(page.locator("body")).not_to_contain_text("Login failed")
def screenshot(page, name):
page.screenshot(path=f"/tmp/{name}.png", full_page=True)
def click_nav(page, name, index=0):
item = page.get_by_role("button", name=name).nth(index)
try:
item.click(timeout=5000)
except Exception:
item.evaluate("element => element.click()")
page.wait_for_load_state("networkidle")
page.wait_for_timeout(500)
def record_console_error(errors, msg):
text = msg.text
ignored = [
"Failed to load resource: the server responded with a status of 404",
"[LaunchModal] Failed to load values schema",
]
if any(item in text for item in ignored):
return
errors.append(text)
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page(viewport={"width": 1440, "height": 950})
errors = []
page.on("pageerror", lambda exc: errors.append(str(exc)))
page.on("console", lambda msg: record_console_error(errors, msg) if msg.type == "error" else None)
login(page)
click_nav(page, "Launch Instance")
expect(page.get_by_text("Chart Browser")).to_be_visible(timeout=15000)
screenshot(page, "ocdp-chart-browser-desktop")
if page.get_by_role("button", name="Launch").count() > 0:
page.get_by_role("button", name="Launch").first.click()
if page.get_by_role("button", name="Cancel").count() > 0:
expect(page.get_by_role("heading", name="Launch Instance")).to_be_visible(timeout=10000)
screenshot(page, "ocdp-launch-modal")
page.get_by_role("button", name="Cancel").click()
click_nav(page, "Registries")
expect(page.locator("body")).to_contain_text("Registry")
screenshot(page, "ocdp-registry-config")
click_nav(page, "Clusters")
expect(page.locator("body")).to_contain_text("Cluster")
screenshot(page, "ocdp-cluster-config")
click_nav(page, "Instances")
expect(page.locator("body")).to_contain_text("Instance")
screenshot(page, "ocdp-instances")
mobile = browser.new_page(viewport={"width": 390, "height": 844}, is_mobile=True)
login(mobile)
click_nav(mobile, "Launch Instance")
expect(mobile.get_by_text("Chart Browser")).to_be_visible(timeout=15000)
screenshot(mobile, "ocdp-chart-browser-mobile")
overflow = mobile.evaluate("document.documentElement.scrollWidth > document.documentElement.clientWidth + 2")
assert not overflow, "mobile page has horizontal overflow"
browser.close()
if errors:
raise AssertionError("\\n".join(errors[:10]))

View File

@ -0,0 +1,160 @@
#!/usr/bin/env python3
# Covers InstanceCard action layout: creates a harmless failed metadata instance
# with an invalid chart before Helm runs, opens the Instances page, verifies the
# Delete button remains inside the card and viewport, clicks it, and cleans up.
import json
import os
import time
import uuid
from dataclasses import dataclass
from typing import Any
from urllib.error import HTTPError, URLError
from urllib.parse import urljoin
from urllib.request import Request, urlopen
from playwright.sync_api import expect, sync_playwright
RAW_BASE_URL = os.environ.get("BASE_URL", "http://localhost:18081/api/v1").rstrip("/")
BASE_URL = RAW_BASE_URL + "/"
FRONTEND_URL = os.environ.get("FRONTEND_URL", "http://localhost:18080")
ADMIN_USER = os.environ.get("ADMIN_USER", "admin")
ADMIN_PASS = os.environ["ADMIN_PASS"]
@dataclass
class Response:
status: int
body: str
json: Any
def parse_json(body: str) -> Any:
try:
return json.loads(body) if body else None
except json.JSONDecodeError:
return None
def request(method: str, path: str, token: str | None = None, payload: Any = None) -> Response:
data = None
headers = {"Accept": "application/json"}
if payload is not None:
data = json.dumps(payload).encode("utf-8")
headers["Content-Type"] = "application/json"
if token:
headers["Authorization"] = f"Bearer {token}"
try:
with urlopen(Request(urljoin(BASE_URL, path.lstrip("/")), data=data, headers=headers, method=method), timeout=40) as res:
body = res.read().decode("utf-8", errors="replace")
return Response(res.status, body, parse_json(body))
except HTTPError as exc:
body = exc.read().decode("utf-8", errors="replace")
return Response(exc.code, body, parse_json(body))
except URLError as exc:
raise AssertionError(f"Cannot reach {BASE_URL}: {exc}") from exc
def login_api(username: str, password: str) -> str:
resp = request("POST", "/auth/login", payload={"username": username, "password": password})
if resp.status != 200:
raise AssertionError(f"login failed: HTTP {resp.status} {resp.body[:300]}")
return str(resp.json["accessToken"])
def first_id(items: Any, name: str | None = None) -> str:
if not isinstance(items, list):
raise AssertionError(f"expected list, got {items!r}")
for item in items:
if isinstance(item, dict) and item.get("id") and (name is None or item.get("name") == name):
return str(item["id"])
raise AssertionError(f"could not find id for {name or 'first item'}")
def list_instances(cluster_id: str, token: str) -> list[dict[str, Any]]:
resp = request("GET", f"/clusters/{cluster_id}/instances", token)
if resp.status != 200 or not isinstance(resp.json, dict):
raise AssertionError(f"list instances failed: HTTP {resp.status} {resp.body[:300]}")
return [item for item in resp.json.get("instances", []) if isinstance(item, dict)]
def login_ui(page) -> None:
page.goto(FRONTEND_URL, wait_until="networkidle")
if page.locator("input[type='password']").count() == 0:
return
page.locator("input:not([type='password'])").first.fill(ADMIN_USER)
page.locator("input[type='password']").first.fill(ADMIN_PASS)
page.get_by_role("button", name="Login").last.click()
page.wait_for_url("**/home", timeout=15000)
page.wait_for_load_state("networkidle")
def main() -> int:
token = login_api(ADMIN_USER, ADMIN_PASS)
clusters = request("GET", "/clusters", token)
registries = request("GET", "/registries", token)
if clusters.status != 200 or registries.status != 200:
raise AssertionError("clusters/registries must be available")
cluster_id = first_id(clusters.json, "k3s") if any(item.get("name") == "k3s" for item in clusters.json) else first_id(clusters.json)
registry_id = first_id(registries.json)
suffix = uuid.uuid4().hex[:8]
release = f"ocdp-ui-overflow-{suffix}"
namespace = f"ocdp-ui-overflow-{suffix}"
instance_id = ""
try:
create = request(
"POST",
f"/clusters/{cluster_id}/instances",
token,
{
"name": release,
"namespace": namespace,
"registryId": registry_id,
"repository": f"charts/nonexistent-ui-overflow-{suffix}",
"tag": "0.0.0",
"valuesYaml": "replicaCount: 1\n",
},
)
if create.status not in {201, 400}:
raise AssertionError(f"expected create to succeed or fail after DB insert, got HTTP {create.status}: {create.body[:500]}")
for _ in range(20):
matches = [item for item in list_instances(cluster_id, token) if item.get("name") == release]
if matches:
instance_id = str(matches[0]["id"])
break
time.sleep(0.5)
if not instance_id:
raise AssertionError("test instance was not visible after failed chart download")
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page(viewport={"width": 920, "height": 760})
page.on("dialog", lambda dialog: dialog.accept())
login_ui(page)
page.get_by_role("button", name="Instances", exact=True).click()
page.wait_for_load_state("networkidle")
heading = page.get_by_role("heading", name=release, exact=True)
expect(heading).to_be_visible(timeout=15000)
card = heading.locator("xpath=ancestor::div[contains(@class, 'group')][1]")
delete_button = card.get_by_role("button", name="Delete", exact=True)
expect(delete_button).to_be_visible()
card_box = card.bounding_box()
button_box = delete_button.bounding_box()
viewport = page.viewport_size or {"width": 920, "height": 760}
assert card_box and button_box, "card and delete button must have layout boxes"
assert button_box["x"] >= card_box["x"] - 1, "Delete button overflows left edge of card"
assert button_box["x"] + button_box["width"] <= card_box["x"] + card_box["width"] + 1, "Delete button overflows right edge of card"
assert button_box["x"] + button_box["width"] <= viewport["width"], "Delete button overflows viewport"
delete_button.click()
page.wait_for_timeout(500)
browser.close()
print("PASS: instance card action layout")
return 0
finally:
if instance_id:
request("DELETE", f"/clusters/{cluster_id}/instances/{instance_id}", token)
if __name__ == "__main__":
raise SystemExit(main())

View File

@ -0,0 +1,384 @@
#!/usr/bin/env python3
# Covers the multi-tenant API contract: auth denial for business APIs, admin/user
# RBAC differences, private cluster/registry resource isolation, user-owned
# cluster/registry CRUD, global_shared rejection for normal users, admin cleanup
# across tenants, optional namespace policy probes, and best-effort kubeconfig
# TTL/no-token persistence checks when DATABASE_URL and psql are available.
import json
import os
import shutil
import subprocess
import sys
import time
import uuid
from dataclasses import dataclass
from typing import Any
from urllib.error import HTTPError, URLError
from urllib.parse import urljoin
from urllib.request import Request, urlopen
RAW_BASE_URL = os.environ.get("BASE_URL", "http://localhost:18081/api/v1").rstrip("/")
BASE_URL = RAW_BASE_URL + "/"
HEALTH_URL = (RAW_BASE_URL[:-7] if RAW_BASE_URL.endswith("/api/v1") else RAW_BASE_URL) + "/health"
ADMIN_USER = os.environ.get("ADMIN_USER", os.environ.get("BOOTSTRAP_ADMIN_USER", "admin"))
ADMIN_PASS = os.environ.get("ADMIN_PASS", os.environ.get("BOOTSTRAP_ADMIN_PASS", ""))
USER_A = os.environ.get("USER_A", f"rbac-a-{uuid.uuid4().hex[:8]}")
USER_A_PASS = os.environ.get("USER_A_PASS", "RbacUserA123!")
USER_B = os.environ.get("USER_B", f"rbac-b-{uuid.uuid4().hex[:8]}")
USER_B_PASS = os.environ.get("USER_B_PASS", "RbacUserB123!")
RUN_NAMESPACE_CONTRACT = os.environ.get("RUN_NAMESPACE_CONTRACT", "").lower() == "true"
DATABASE_URL = os.environ.get("DATABASE_URL", "")
class ContractFailure(AssertionError):
pass
@dataclass
class Response:
status: int
headers: dict[str, str]
body: str
json: Any
def fail(message: str) -> None:
raise ContractFailure(message)
def parse_json(body: str) -> Any:
if not body:
return None
try:
return json.loads(body)
except json.JSONDecodeError:
return None
def request(method: str, path: str, token: str | None = None, payload: Any = None) -> Response:
url = path if path.startswith("http") else urljoin(BASE_URL, path.lstrip("/"))
data = None
headers = {"Accept": "application/json"}
if payload is not None:
data = json.dumps(payload).encode("utf-8")
headers["Content-Type"] = "application/json"
if token is not None:
headers["Authorization"] = f"Bearer {token}"
req = Request(url, data=data, headers=headers, method=method)
try:
with urlopen(req, timeout=20) as res:
body = res.read().decode("utf-8", errors="replace")
return Response(res.status, dict(res.headers), body, parse_json(body))
except HTTPError as exc:
body = exc.read().decode("utf-8", errors="replace")
return Response(exc.code, dict(exc.headers), body, parse_json(body))
except URLError as exc:
fail(f"Cannot reach BASE_URL={BASE_URL}: {exc}")
def assert_status(resp: Response, expected: set[int], context: str) -> None:
if resp.status not in expected:
fail(f"{context}: expected HTTP {sorted(expected)}, got {resp.status}. Body: {resp.body[:500]}")
def assert_denied(resp: Response, context: str, allowed: set[int] | None = None) -> None:
denied = allowed or {401, 403}
if resp.status not in denied:
fail(f"{context}: expected denial HTTP {sorted(denied)}, got {resp.status}. Body: {resp.body[:500]}")
def response_id(resp: Response, context: str) -> str:
if not isinstance(resp.json, dict) or not resp.json.get("id"):
fail(f"{context}: response must be a JSON object with id. Body: {resp.body[:500]}")
return str(resp.json["id"])
def list_items(path: str, token: str, context: str) -> list[dict[str, Any]]:
resp = request("GET", path, token)
assert_status(resp, {200}, context)
if isinstance(resp.json, list):
return [item for item in resp.json if isinstance(item, dict)]
if isinstance(resp.json, dict):
for key in ("items", "clusters", "registries", "instances"):
value = resp.json.get(key)
if isinstance(value, list):
return [item for item in value if isinstance(item, dict)]
fail(f"{context}: expected a list response. Body: {resp.body[:500]}")
def login(username: str, password: str, context: str) -> str:
resp = request("POST", "/auth/login", payload={"username": username, "password": password})
assert_status(resp, {200}, context)
if not isinstance(resp.json, dict) or not resp.json.get("accessToken"):
fail(f"{context}: login response must include accessToken. Body: {resp.body[:500]}")
return str(resp.json["accessToken"])
def ensure_user(username: str, password: str, admin_token: str) -> str:
register = request("POST", "/users", admin_token, {"username": username, "password": password, "role": "user"})
if register.status not in {201, 400, 409}:
fail(f"Register test user {username}: expected 201/400/409, got {register.status}. Body: {register.body[:500]}")
return login(username, password, f"Login test user {username}")
def create_cluster(token: str, name: str, global_shared: bool = False) -> str:
payload = {
"name": name,
"host": "https://127.0.0.1:65535",
"token": f"test-only-{uuid.uuid4().hex}",
"description": "RBAC API contract test metadata only",
"visibility": "private",
"globalShared": global_shared,
"global_shared": global_shared,
}
resp = request("POST", "/clusters", token, payload)
assert_status(resp, {201}, f"Create private cluster {name}")
if isinstance(resp.json, dict) and any(str(resp.json.get(k, "")).startswith("test-only-") for k in ("token", "accessToken")):
fail(f"Create private cluster {name}: response leaked raw cluster token")
return response_id(resp, f"Create private cluster {name}")
def create_registry(token: str, name: str, global_shared: bool = False) -> str:
payload = {
"name": name,
"url": "https://registry.invalid",
"username": "contract-user",
"password": f"test-only-{uuid.uuid4().hex}",
"description": "RBAC API contract test metadata only",
"insecure": True,
"visibility": "private",
"globalShared": global_shared,
"global_shared": global_shared,
}
resp = request("POST", "/registries", token, payload)
assert_status(resp, {201}, f"Create private registry {name}")
if isinstance(resp.json, dict) and str(resp.json.get("password", "")).startswith("test-only-"):
fail(f"Create private registry {name}: response leaked raw registry password")
return response_id(resp, f"Create private registry {name}")
def cleanup(path: str, resource_id: str, token: str, label: str) -> None:
if not resource_id:
return
resp = request("DELETE", f"{path}/{resource_id}", token)
if resp.status not in {204, 404}:
print(f"WARN: cleanup {label} returned HTTP {resp.status}: {resp.body[:300]}", file=sys.stderr)
def assert_not_visible(path: str, resource_id: str, token: str, label: str) -> None:
items = list_items(path, token, f"List {label} resources as another user")
ids = {str(item.get("id")) for item in items}
if resource_id in ids:
fail(f"{label} isolation: private resource {resource_id} is visible in another user's list")
def assert_global_shared_rejected(token: str, path: str, payload: dict[str, Any], label: str, admin_token: str) -> None:
resp = request("POST", path, token, payload)
if resp.status in {400, 401, 403, 422}:
return
leaked_id = ""
if resp.status == 201 and isinstance(resp.json, dict):
leaked_id = str(resp.json.get("id", ""))
cleanup(path, leaked_id, admin_token, f"unexpected global_shared {label}")
fail(f"{label} global_shared guard: normal user must not create global shared resources; got HTTP {resp.status}")
def check_kubeconfig_contract(user_token: str) -> None:
resp = request("GET", "/workspaces/credentials/kubeconfig", user_token)
if resp.status == 404:
fail("Kubeconfig contract: required endpoint GET /workspaces/credentials/kubeconfig is not implemented")
assert_status(resp, {200}, "Kubeconfig contract")
body = resp.body
if "apiVersion:" not in body or "kind: Config" not in body:
fail("Kubeconfig contract: response should contain kubeconfig YAML")
if "token:" not in body:
fail("Kubeconfig contract: response should include an ephemeral bearer token in the generated kubeconfig")
ttl_hints = ("expiration", "expires", "ttl", "TokenRequest", "exp:")
header_text = "\n".join(f"{k}: {v}" for k, v in resp.headers.items())
if not any(hint.lower() in (body + header_text).lower() for hint in ttl_hints):
fail("Kubeconfig contract: response should expose TTL/expiration information for the short-lived token")
def check_optional_db_no_kubeconfig_token() -> None:
if not DATABASE_URL:
print("SKIP: DATABASE_URL is not set; skipping optional kubeconfig token persistence scan")
return
if not shutil.which("psql"):
print("SKIP: psql is not installed; skipping optional kubeconfig token persistence scan")
return
query = (
"select table_name, column_name from information_schema.columns "
"where table_schema='public' and column_name ~* '(kubeconfig|service_account_token|jwt|access_token|refresh_token|bearer_token)';"
)
proc = subprocess.run(
["psql", DATABASE_URL, "-v", "ON_ERROR_STOP=1", "-Atc", query],
text=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
timeout=20,
check=False,
)
if proc.returncode != 0:
print(f"SKIP: optional DB scan failed: {proc.stderr.strip()}", file=sys.stderr)
return
rows = [line.strip() for line in proc.stdout.splitlines() if line.strip()]
if rows:
fail("Optional DB scan: kubeconfig/JWT-looking token storage columns found: " + ", ".join(rows))
def check_namespace_contract(user_token: str, cluster_id: str, registry_id: str) -> None:
if not RUN_NAMESPACE_CONTRACT:
print("SKIP: namespace rejection probes require RUN_NAMESPACE_CONTRACT=true to avoid real cluster operations")
return
forbidden = ["default", "kube-system", "other-workspace-contract"]
for namespace in forbidden:
payload = {
"name": f"contract-ns-{uuid.uuid4().hex[:8]}",
"namespace": namespace,
"registryId": registry_id,
"repository": "charts/nonexistent-contract",
"tag": "0.0.0",
"valuesYaml": "replicaCount: 1\n",
}
resp = request("POST", f"/clusters/{cluster_id}/instances", user_token, payload)
if resp.status not in {400, 401, 403, 422}:
fail(f"Namespace policy: namespace {namespace!r} must be rejected before deployment, got HTTP {resp.status}")
def main() -> int:
created: list[tuple[str, str, str]] = []
suffix = uuid.uuid4().hex[:8]
admin_token = ""
try:
if not ADMIN_PASS:
fail("ADMIN_PASS or BOOTSTRAP_ADMIN_PASS is required for admin/user RBAC contract assertions")
print("==> Health")
assert_status(request("GET", HEALTH_URL), {200}, "Health check")
print("==> Business API auth denial")
assert_denied(request("GET", "/clusters"), "Unauthenticated GET /clusters")
assert_denied(request("GET", "/registries", token="invalid-token"), "Invalid-token GET /registries")
assert_denied(
request("POST", "/auth/register", payload={"username": f"public-{suffix}", "password": "Public123!"}),
"Public self-registration must be disabled",
{401, 403, 404, 405},
)
print("==> Accounts")
admin_token = login(ADMIN_USER, ADMIN_PASS, "Login admin")
assert_status(request("GET", "/users", admin_token), {200}, "Admin lists users")
user_a_token = ensure_user(USER_A, USER_A_PASS, admin_token)
user_b_token = ensure_user(USER_B, USER_B_PASS, admin_token)
print("==> User-owned private cluster/registry CRUD")
cluster_a = create_cluster(user_a_token, f"contract-a-cluster-{suffix}")
created.append(("/clusters", cluster_a, admin_token))
registry_a = create_registry(user_a_token, f"contract-a-registry-{suffix}")
created.append(("/registries", registry_a, admin_token))
assert_status(
request("PUT", f"/clusters/{cluster_a}", user_a_token, {"description": "owner update"}),
{200},
"User updates own private cluster",
)
assert_status(
request("PUT", f"/registries/{registry_a}", user_a_token, {"description": "owner update"}),
{200},
"User updates own private registry",
)
print("==> global_shared rejection for normal users")
assert_global_shared_rejected(
user_a_token,
"/clusters",
{
"name": f"contract-shared-cluster-{suffix}",
"host": "https://127.0.0.1:65535",
"token": "test-only-global-shared",
"globalShared": True,
"global_shared": True,
},
"cluster",
admin_token,
)
assert_global_shared_rejected(
user_a_token,
"/registries",
{
"name": f"contract-shared-registry-{suffix}",
"url": "https://registry.invalid",
"globalShared": True,
"global_shared": True,
},
"registry",
admin_token,
)
print("==> Cross-tenant isolation")
cluster_b = create_cluster(user_b_token, f"contract-b-cluster-{suffix}")
created.append(("/clusters", cluster_b, admin_token))
registry_b = create_registry(user_b_token, f"contract-b-registry-{suffix}")
created.append(("/registries", registry_b, admin_token))
assert_not_visible("/clusters", cluster_b, user_a_token, "cluster")
assert_denied(request("GET", f"/clusters/{cluster_b}", user_a_token), "UserA GET UserB cluster", {403, 404})
assert_denied(
request("PUT", f"/clusters/{cluster_b}", user_a_token, {"description": "cross update"}),
"UserA update UserB cluster",
{403, 404},
)
assert_denied(request("DELETE", f"/clusters/{cluster_b}", user_a_token), "UserA delete UserB cluster", {403, 404})
assert_not_visible("/registries", registry_b, user_a_token, "registry")
assert_denied(request("GET", f"/registries/{registry_b}", user_a_token), "UserA GET UserB registry", {403, 404})
assert_denied(
request("PUT", f"/registries/{registry_b}", user_a_token, {"description": "cross update"}),
"UserA update UserB registry",
{403, 404},
)
assert_denied(request("DELETE", f"/registries/{registry_b}", user_a_token), "UserA delete UserB registry", {403, 404})
assert_denied(
request("GET", f"/clusters/{cluster_b}/instances", user_a_token),
"UserA list UserB private cluster instances",
{403, 404},
)
print("==> Admin can manage tenant resources")
assert_status(request("GET", f"/clusters/{cluster_b}", admin_token), {200}, "Admin reads UserB cluster")
assert_status(request("GET", f"/registries/{registry_b}", admin_token), {200}, "Admin reads UserB registry")
assert_status(
request("PUT", f"/clusters/{cluster_b}", admin_token, {"description": "admin update"}),
{200},
"Admin updates UserB cluster",
)
assert_status(
request("PUT", f"/registries/{registry_b}", admin_token, {"description": "admin update"}),
{200},
"Admin updates UserB registry",
)
print("==> Namespace and kubeconfig contracts")
check_namespace_contract(user_a_token, cluster_a, registry_a)
check_kubeconfig_contract(user_a_token)
check_optional_db_no_kubeconfig_token()
print("==> Cleanup")
while created:
path, resource_id, token = created.pop()
cleanup(path, resource_id, token, resource_id)
print("PASS: multi-tenant/RBAC API contract")
return 0
except ContractFailure as exc:
print(f"FAIL: {exc}", file=sys.stderr)
return 1
finally:
if admin_token:
time.sleep(0.1)
while created:
path, resource_id, token = created.pop()
cleanup(path, resource_id, token or admin_token, resource_id)
if __name__ == "__main__":
sys.exit(main())

View File

@ -0,0 +1,117 @@
#!/usr/bin/env python3
# Covers frontend role UI behavior for the multi-tenant/RBAC plan: admin/user
# login, admin-only navigation affordances, user inability to access admin
# resource management routes, and absence of global-shared controls for users.
import os
import sys
try:
from playwright.sync_api import expect, sync_playwright
except ImportError:
print("SKIP: Playwright is not installed; run after installing frontend test dependencies")
sys.exit(77)
FRONTEND_URL = os.environ.get("FRONTEND_URL", "http://localhost:18080")
ADMIN_USER = os.environ.get("ADMIN_USER", os.environ.get("BOOTSTRAP_ADMIN_USER", "admin"))
ADMIN_PASS = os.environ.get("ADMIN_PASS", os.environ.get("BOOTSTRAP_ADMIN_PASS", ""))
USER_A = os.environ.get("USER_A", "")
USER_A_PASS = os.environ.get("USER_A_PASS", "")
def require_env(name: str, value: str) -> None:
if not value:
print(f"SKIP: {name} is required for role UI contract checks")
sys.exit(77)
def login(page, username: str, password: str) -> None:
page.goto(FRONTEND_URL, wait_until="networkidle")
assert "Register" not in page.locator("body").inner_text(timeout=10000), "login page must not expose public registration"
if page.locator("input[type='password']").count() == 0:
page.evaluate("localStorage.clear()")
page.goto(FRONTEND_URL, wait_until="networkidle")
text_inputs = page.locator("input:not([type='password'])")
expect(text_inputs.first).to_be_visible(timeout=10000)
text_inputs.first.fill(username)
page.locator("input[type='password']").first.fill(password)
page.get_by_role("button").filter(has_text="Login").last.click()
page.wait_for_url("**/home", timeout=15000)
page.wait_for_load_state("networkidle")
expect(page.locator("body")).not_to_contain_text("Login failed")
def visible_text(page) -> str:
return page.locator("body").inner_text(timeout=10000)
def assert_user_restrictions(page) -> None:
body = visible_text(page).lower()
forbidden_labels = [
"global shared",
"global_shared",
"make shared",
"all workspaces",
"admin console",
"user management",
"workspace management",
]
found = [label for label in forbidden_labels if label in body]
assert not found, f"user UI exposes admin/global controls: {found}"
admin_paths = [
"/admin",
"/admin/users",
"/admin/workspaces",
"/configuration/users",
"/configuration/workspaces",
]
for path in admin_paths:
page.goto(FRONTEND_URL.rstrip("/") + path, wait_until="networkidle")
page.wait_for_timeout(500)
text = visible_text(page).lower()
assert "forbidden" in text or "unauthorized" in text or page.url.rstrip("/") != FRONTEND_URL.rstrip("/") + path, (
f"user can access admin route {path}"
)
def assert_admin_affordances(page) -> None:
page.goto(FRONTEND_URL.rstrip("/") + "/configuration/clusters", wait_until="networkidle")
page.wait_for_load_state("networkidle")
text = visible_text(page).lower()
expected_any = ["cluster", "registry", "workspace", "user", "admin"]
assert any(item in text for item in expected_any), "admin UI did not render management affordances"
page.goto(FRONTEND_URL.rstrip("/") + "/configuration/users", wait_until="networkidle")
page.wait_for_load_state("networkidle")
text = visible_text(page).lower()
assert "create user" in text and "accounts" in text, "admin user management UI did not render"
if USER_A:
row = page.locator("tr").filter(has_text=USER_A).first
expect(row).to_be_visible(timeout=10000)
delete_button = row.get_by_role("button", name="Delete")
expect(delete_button).to_be_visible(timeout=5000)
box = delete_button.bounding_box()
viewport = page.viewport_size or {"width": 1440, "height": 950}
assert box and 0 <= box["x"] <= viewport["width"] - box["width"], "Delete User button is outside the visible viewport"
require_env("ADMIN_PASS or BOOTSTRAP_ADMIN_PASS", ADMIN_PASS)
require_env("USER_A", USER_A)
require_env("USER_A_PASS", USER_A_PASS)
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
admin_page = browser.new_page(viewport={"width": 1440, "height": 950})
login(admin_page, ADMIN_USER, ADMIN_PASS)
assert_admin_affordances(admin_page)
user_page = browser.new_page(viewport={"width": 1440, "height": 950})
login(user_page, USER_A, USER_A_PASS)
assert_user_restrictions(user_page)
browser.close()
print("PASS: multi-tenant/RBAC UI contract")

View File

@ -0,0 +1,17 @@
#!/usr/bin/env bash
# Scope: validates backend unit tests and frontend production build for the README deployment refresh.
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
(
cd "$ROOT_DIR/backend"
go test ./...
)
(
cd "$ROOT_DIR/frontend"
npm ci
npm run build
)

View File

@ -0,0 +1,198 @@
#!/usr/bin/env python3
# Covers admin-created user tenant metadata: per-user namespace, CPU/memory/GPU/GPU
# memory limits, default cluster assignment, login and /auth/me propagation,
# normal-user business API access, and invalid quota rejection.
import json
import os
import sys
import uuid
from dataclasses import dataclass
from typing import Any
from urllib.error import HTTPError, URLError
from urllib.parse import urljoin
from urllib.request import Request, urlopen
RAW_BASE_URL = os.environ.get("BASE_URL", "http://localhost:18081/api/v1").rstrip("/")
BASE_URL = RAW_BASE_URL + "/"
ADMIN_USER = os.environ.get("ADMIN_USER", os.environ.get("BOOTSTRAP_ADMIN_USER", "admin"))
ADMIN_PASS = os.environ.get("ADMIN_PASS", os.environ.get("BOOTSTRAP_ADMIN_PASS", ""))
@dataclass
class Response:
status: int
body: str
json: Any
def parse_json(body: str) -> Any:
try:
return json.loads(body) if body else None
except json.JSONDecodeError:
return None
def request(method: str, path: str, token: str | None = None, payload: Any = None) -> Response:
data = None
headers = {"Accept": "application/json"}
if payload is not None:
data = json.dumps(payload).encode("utf-8")
headers["Content-Type"] = "application/json"
if token:
headers["Authorization"] = f"Bearer {token}"
try:
with urlopen(Request(urljoin(BASE_URL, path.lstrip("/")), data=data, headers=headers, method=method), timeout=20) as res:
body = res.read().decode("utf-8", errors="replace")
return Response(res.status, body, parse_json(body))
except HTTPError as exc:
body = exc.read().decode("utf-8", errors="replace")
return Response(exc.code, body, parse_json(body))
except URLError as exc:
raise AssertionError(f"Cannot reach {BASE_URL}: {exc}") from exc
def assert_status(resp: Response, expected: set[int], context: str) -> None:
if resp.status not in expected:
raise AssertionError(f"{context}: expected {sorted(expected)}, got {resp.status}. Body: {resp.body[:500]}")
def login(username: str, password: str) -> Response:
resp = request("POST", "/auth/login", payload={"username": username, "password": password})
assert_status(resp, {200}, f"login {username}")
if not isinstance(resp.json, dict) or not resp.json.get("accessToken"):
raise AssertionError(f"login {username}: missing accessToken")
return resp
def assert_field(obj: dict[str, Any], field: str, expected: str, context: str) -> None:
if str(obj.get(field, "")) != expected:
raise AssertionError(f"{context}: expected {field}={expected!r}, got {obj.get(field)!r}. Body: {obj}")
def main() -> int:
if not ADMIN_PASS:
raise AssertionError("ADMIN_PASS or BOOTSTRAP_ADMIN_PASS is required")
suffix = uuid.uuid4().hex[:8]
username = f"quota-user-{suffix}"
auto_username = f"auto-ns-{suffix}"
password = "QuotaUser123!"
namespace = f"ocdp-u-quota-{suffix}"
admin = login(ADMIN_USER, ADMIN_PASS)
admin_token = admin.json["accessToken"]
clusters = request("GET", "/clusters", admin_token)
assert_status(clusters, {200}, "admin lists clusters")
default_cluster_id = ""
if isinstance(clusters.json, list) and clusters.json:
preferred = next((item for item in clusters.json if item.get("name") == "k3s"), clusters.json[0])
default_cluster_id = str(preferred.get("id", ""))
invalid = request("POST", "/users", admin_token, {
"username": f"bad-quota-{suffix}",
"password": password,
"role": "user",
"namespace": f"ocdp-u-bad-{suffix}",
"quotaCpu": "not-a-quantity",
})
assert_status(invalid, {400}, "invalid quota is rejected")
auto_created = request("POST", "/users", admin_token, {
"username": auto_username,
"password": password,
"role": "user",
"defaultClusterId": default_cluster_id,
"mustChangePassword": False,
"isActive": True,
})
assert_status(auto_created, {201}, "create user with generated namespace")
assert_field(auto_created.json, "namespace", f"ocdp-u-auto-ns-{suffix}", "generated namespace user response")
if str(auto_created.json.get("workspaceId", "")) == "00000000-0000-0000-0000-000000000010":
raise AssertionError(f"generated namespace user must not use default workspace: {auto_created.json}")
created = request("POST", "/users", admin_token, {
"username": username,
"password": password,
"role": "user",
"namespace": namespace,
"defaultClusterId": default_cluster_id,
"quotaCpu": "2500m",
"quotaMemory": "12Gi",
"quotaGpu": "1",
"quotaGpuMemory": "24000",
"mustChangePassword": False,
"isActive": True,
})
assert_status(created, {201}, "create quota user")
if not isinstance(created.json, dict):
raise AssertionError("create quota user: expected object response")
for field, expected in {
"namespace": namespace,
"defaultClusterId": default_cluster_id,
"quotaCpu": "2500m",
"quotaMemory": "12Gi",
"quotaGpu": "1",
"quotaGpuMemory": "24000",
}.items():
assert_field(created.json, field, expected, "create quota user response")
user_login = login(username, password)
user_token = user_login.json["accessToken"]
assert_field(user_login.json, "namespace", namespace, "login response")
if default_cluster_id:
assert_field(user_login.json, "defaultClusterId", default_cluster_id, "login response")
if "home:view" not in user_login.json.get("permissions", []):
raise AssertionError(f"login response: ordinary user permissions must include home:view, got {user_login.json.get('permissions')}")
if "monitoring:clusters:view" not in user_login.json.get("permissions", []):
raise AssertionError("login response: ordinary user permissions must include monitoring:clusters:view")
me = request("GET", "/auth/me", user_token)
assert_status(me, {200}, "/auth/me")
assert_field(me.json, "namespace", namespace, "/auth/me")
if default_cluster_id:
assert_field(me.json, "defaultClusterId", default_cluster_id, "/auth/me")
assert_field(me.json, "quotaMemory", "12Gi", "/auth/me")
updated = request("PUT", f"/users/{created.json['id']}", admin_token, {
"namespace": namespace,
"defaultClusterId": default_cluster_id,
"quotaCpu": "3",
"quotaMemory": "10Gi",
"quotaGpu": "1",
"quotaGpuMemory": "10000",
})
assert_status(updated, {200}, "update quota user limits")
assert_field(updated.json, "quotaCpu", "3", "update quota user response")
assert_field(updated.json, "quotaGpuMemory", "10000", "update quota user response")
user_login_after_update = login(username, password)
user_token = user_login_after_update.json["accessToken"]
assert_field(user_login_after_update.json, "quotaGpuMemory", "10000", "login response after quota update")
workspaces = request("GET", "/workspaces", user_token)
assert_status(workspaces, {200}, "user lists own workspace")
if not isinstance(workspaces.json, list) or len(workspaces.json) != 1:
raise AssertionError(f"user lists own workspace: expected one workspace, got {workspaces.body[:500]}")
assert_field(workspaces.json[0], "k8sNamespace", namespace, "workspace response")
if default_cluster_id:
assert_field(workspaces.json[0], "defaultClusterId", default_cluster_id, "workspace response")
assert_field(workspaces.json[0], "quotaGpuMemory", "10000", "workspace response")
assert_status(request("GET", "/clusters", user_token), {200}, "normal user can list clusters")
assert_status(request("GET", "/registries", user_token), {200}, "normal user can list registries")
delete = request("DELETE", f"/users/{created.json['id']}", admin_token)
assert_status(delete, {204}, "cleanup quota user")
auto_delete = request("DELETE", f"/users/{auto_created.json['id']}", admin_token)
assert_status(auto_delete, {204}, "cleanup generated namespace user")
print("PASS: user namespace/quota API contract")
return 0
if __name__ == "__main__":
try:
sys.exit(main())
except AssertionError as exc:
print(f"FAIL: {exc}", file=sys.stderr)
sys.exit(1)

View File

@ -0,0 +1,283 @@
#!/usr/bin/env python3
# Covers a real k3s deployment smoke path for vllm-serve: admin-created ordinary
# user with integer GPU memory quota, tenant namespace/ResourceQuota creation,
# Harbor chart deployment with the requested vLLM image, diagnostics fetch, and
# cleanup of the instance and test user.
import json
import os
import subprocess
import sys
import tempfile
import time
import uuid
from dataclasses import dataclass
from typing import Any
from urllib.error import HTTPError, URLError
from urllib.parse import quote, urljoin
from urllib.request import Request, urlopen
RAW_BASE_URL = os.environ.get("BASE_URL", "http://localhost:18081/api/v1").rstrip("/")
BASE_URL = RAW_BASE_URL + "/"
ADMIN_USER = os.environ.get("ADMIN_USER", os.environ.get("BOOTSTRAP_ADMIN_USER", "admin"))
ADMIN_PASS = os.environ.get("ADMIN_PASS", os.environ.get("BOOTSTRAP_ADMIN_PASS", ""))
TARGET_CLUSTER_NAME = os.environ.get("TARGET_CLUSTER_NAME", "k3s")
TARGET_REGISTRY_NAME = os.environ.get("TARGET_REGISTRY_NAME", "harbor-bwgdi")
CHART_REPOSITORY = os.environ.get("VLLM_CHART_REPOSITORY", "charts/vllm-serve")
CHART_TAG = os.environ.get("VLLM_CHART_TAG", "0.6.0")
VLLM_IMAGE = os.environ.get("VLLM_IMAGE", "harbor.bwgdi.com/library/vllm-openai:v0.17.1")
MODEL_NAME = os.environ.get("VLLM_MODEL", "Qwen/Qwen2.5-0.5B-Instruct")
GPU_MEM_MB = os.environ.get("GPU_MEM_MB", "10000")
@dataclass
class Response:
status: int
headers: dict[str, str]
body: str
json: Any
def parse_json(body: str) -> Any:
if not body:
return None
try:
return json.loads(body)
except json.JSONDecodeError:
return None
def request(method: str, path: str, token: str | None = None, payload: Any = None, timeout: int = 30) -> Response:
url = path if path.startswith("http") else urljoin(BASE_URL, path.lstrip("/"))
data = None
headers = {"Accept": "application/json"}
if payload is not None:
data = json.dumps(payload).encode("utf-8")
headers["Content-Type"] = "application/json"
if token:
headers["Authorization"] = f"Bearer {token}"
req = Request(url, data=data, headers=headers, method=method)
try:
with urlopen(req, timeout=timeout) as res:
body = res.read().decode("utf-8", errors="replace")
return Response(res.status, dict(res.headers), body, parse_json(body))
except HTTPError as exc:
body = exc.read().decode("utf-8", errors="replace")
return Response(exc.code, dict(exc.headers), body, parse_json(body))
except URLError as exc:
raise AssertionError(f"Cannot reach BASE_URL={BASE_URL}: {exc}") from exc
def assert_status(resp: Response, expected: set[int], context: str) -> None:
if resp.status not in expected:
raise AssertionError(f"{context}: expected HTTP {sorted(expected)}, got {resp.status}. Body: {resp.body[:800]}")
def login(username: str, password: str) -> str:
resp = request("POST", "/auth/login", payload={"username": username, "password": password})
assert_status(resp, {200}, f"login {username}")
return str(resp.json["accessToken"])
def list_items(path: str, token: str, context: str) -> list[dict[str, Any]]:
resp = request("GET", path, token)
assert_status(resp, {200}, context)
if isinstance(resp.json, list):
return [item for item in resp.json if isinstance(item, dict)]
if isinstance(resp.json, dict):
for key in ("items", "clusters", "registries", "instances"):
if isinstance(resp.json.get(key), list):
return [item for item in resp.json[key] if isinstance(item, dict)]
raise AssertionError(f"{context}: expected list response. Body: {resp.body[:800]}")
def find_by_name(items: list[dict[str, Any]], name: str, context: str) -> dict[str, Any]:
for item in items:
if item.get("name") == name:
return item
raise AssertionError(f"{context}: could not find {name!r}. Available: {[item.get('name') for item in items]}")
def issue_kubeconfig(token: str, workspace_id: str, cluster_id: str) -> str:
resp = request(
"POST",
f"/workspaces/{workspace_id}/kubeconfig",
token,
{"clusterId": cluster_id, "ttlSeconds": 7200},
)
assert_status(resp, {200}, "issue tenant kubeconfig")
return str(resp.json["kubeconfig"])
def issue_current_kubeconfig(token: str) -> str:
resp = request("GET", "/workspaces/credentials/kubeconfig", token)
assert_status(resp, {200}, "issue current default-cluster kubeconfig")
if "server:" not in resp.body or "token:" not in resp.body:
raise AssertionError(f"current kubeconfig response does not look like kubeconfig YAML: {resp.body[:300]}")
return resp.body
def kubectl_json(kubeconfig: str, args: list[str]) -> Any:
with tempfile.NamedTemporaryFile("w", delete=False) as handle:
handle.write(kubeconfig)
kubeconfig_path = handle.name
try:
proc = subprocess.run(
["kubectl", "--kubeconfig", kubeconfig_path, *args, "-o", "json"],
text=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
timeout=60,
check=False,
)
if proc.returncode != 0:
raise AssertionError(f"kubectl {' '.join(args)} failed: {proc.stderr.strip()}")
return json.loads(proc.stdout)
finally:
os.unlink(kubeconfig_path)
def values_yaml() -> str:
return f"""vllm:
image: "{VLLM_IMAGE}"
model:
huggingfaceName: "{MODEL_NAME}"
resources:
gpuLimit: 1
gpuMem: {GPU_MEM_MB}
cpuRequest: 4
memoryLimit: "12Gi"
shmSize: "4Gi"
replicaCount: 1
workerSize: 1
initContainers:
enabled: false
"""
def main() -> int:
if not ADMIN_PASS:
raise AssertionError("ADMIN_PASS or BOOTSTRAP_ADMIN_PASS is required")
suffix = uuid.uuid4().hex[:6]
username = f"vllm-k3s-{suffix}"
password = "VllmK3s123!"
namespace = f"ocdp-u-vllm-{suffix}"
release = f"ocdp-vllm-k3s-{suffix}"
admin_token = login(ADMIN_USER, ADMIN_PASS)
user_id = ""
instance_id = ""
cluster_id = ""
try:
clusters = list_items("/clusters", admin_token, "list clusters")
cluster = find_by_name(clusters, TARGET_CLUSTER_NAME, "select target cluster")
cluster_id = str(cluster["id"])
registries = list_items("/registries", admin_token, "list registries")
registry = find_by_name(registries, TARGET_REGISTRY_NAME, "select target registry")
registry_id = str(registry["id"])
artifacts = request(
"GET",
f"/registries/{registry_id}/repositories/{quote(CHART_REPOSITORY, safe='')}/artifacts?media_type=chart",
admin_token,
)
assert_status(artifacts, {200}, "verify vllm chart artifacts")
if CHART_TAG not in artifacts.body:
raise AssertionError(f"{CHART_REPOSITORY}:{CHART_TAG} was not visible in Harbor artifacts")
created = request(
"POST",
"/users",
admin_token,
{
"username": username,
"password": password,
"role": "user",
"namespace": namespace,
"defaultClusterId": cluster_id,
"quotaCpu": "6",
"quotaMemory": "16Gi",
"quotaGpu": "1",
"quotaGpuMemory": GPU_MEM_MB,
"isActive": True,
"mustChangePassword": False,
},
)
assert_status(created, {201}, "create vllm smoke user")
user_id = str(created.json["id"])
if str(created.json.get("quotaGpuMemory")) != GPU_MEM_MB:
raise AssertionError(f"quotaGpuMemory should stay integer {GPU_MEM_MB}, got {created.json.get('quotaGpuMemory')}")
user_token = login(username, password)
workspaces = list_items("/workspaces", user_token, "user lists own workspace")
workspace = workspaces[0]
workspace_id = str(workspace["id"])
kubeconfig = issue_current_kubeconfig(user_token)
quota = kubectl_json(kubeconfig, ["get", "resourcequota", "tenant-quota", "-n", namespace])
hard = quota.get("status", {}).get("hard") or quota.get("spec", {}).get("hard") or {}
gpumem_hard = str(hard.get("requests.nvidia.com/gpumem", ""))
if gpumem_hard not in {GPU_MEM_MB, "10k"}:
raise AssertionError(f"ResourceQuota gpumem should be {GPU_MEM_MB} or Kubernetes canonical 10k, got {gpumem_hard!r}")
print(f"quota gpumem={gpumem_hard}")
payload = {
"name": release,
"namespace": namespace,
"registryId": registry_id,
"repository": CHART_REPOSITORY,
"tag": CHART_TAG,
"description": f"smoke deploy {MODEL_NAME}",
"valuesYaml": values_yaml(),
}
created_instance = request("POST", f"/clusters/{cluster_id}/instances", user_token, payload, timeout=1200)
assert_status(created_instance, {201}, "create vllm instance")
instance_id = str(created_instance.json["id"])
print(f"instance={instance_id} release={release} cluster={TARGET_CLUSTER_NAME} namespace={namespace}")
current = created_instance
for attempt in range(1, 61):
current = request("GET", f"/clusters/{cluster_id}/instances/{instance_id}", user_token)
assert_status(current, {200}, "poll vllm instance")
status = str(current.json.get("status"))
print(f"poll={attempt} status={status}")
if status == "deployed":
break
if status == "failed":
raise AssertionError(f"vLLM instance failed: {current.body[:1200]}")
time.sleep(10)
else:
raise AssertionError(f"vLLM instance did not reach deployed. Last: {current.body[:1200]}")
diagnostics = request("GET", f"/clusters/{cluster_id}/instances/{instance_id}/diagnostics?tailLines=80", user_token, timeout=60)
assert_status(diagnostics, {200}, "fetch diagnostics")
pods = diagnostics.json.get("pods", []) if isinstance(diagnostics.json, dict) else []
services = diagnostics.json.get("services", []) if isinstance(diagnostics.json, dict) else []
logs = diagnostics.json.get("logs", []) if isinstance(diagnostics.json, dict) else []
print(f"diagnostics pods={len(pods)} services={len(services)} logs={len(logs)}")
live_services = kubectl_json(kubeconfig, ["get", "svc", "-n", namespace])
service_names = {item.get("metadata", {}).get("name") for item in live_services.get("items", [])}
if f"{release}-svc" not in service_names:
raise AssertionError(f"expected service {release}-svc in tenant namespace {namespace}, got {sorted(service_names)}")
live_deployments = kubectl_json(kubeconfig, ["get", "deployments", "-n", namespace])
deployment_names = {item.get("metadata", {}).get("name") for item in live_deployments.get("items", [])}
if release not in deployment_names:
raise AssertionError(f"expected deployment {release} in tenant namespace {namespace}, got {sorted(deployment_names)}")
print(f"tenant namespace resources service={release}-svc deployment={release}")
return 0
finally:
if instance_id and cluster_id:
cleanup = request("DELETE", f"/clusters/{cluster_id}/instances/{instance_id}", admin_token, timeout=300)
print(f"cleanup instance http={cleanup.status}")
if user_id:
cleanup_user = request("DELETE", f"/users/{user_id}", admin_token)
print(f"cleanup user http={cleanup_user.status}")
if __name__ == "__main__":
try:
raise SystemExit(main())
except AssertionError as exc:
print(f"FAIL: {exc}", file=sys.stderr)
raise SystemExit(1)