From cb6cf582290b85f506b2d64ba3017a73516708fb Mon Sep 17 00:00:00 2001 From: jackyliu Date: Tue, 2 Jun 2026 09:48:58 +0000 Subject: [PATCH] chore: update workload runtime integration --- AGENTS.md | 7 +++--- README.md | 23 +++++++++++-------- apps/code-server/README.md | 16 ++++++++++--- apps/code-server/base/deployment.yaml | 19 ++++++++------- apps/code-server/base/kustomization.yaml | 2 -- apps/code-server/base/pvc.yaml | 15 ------------ .../components/nfs/kustomization.yaml | 8 ------- .../components/nfs/pvc-rwx-patch.yaml | 4 ---- .../deployment-patch.yaml | 9 +++++--- .../code-server-nodeport/kustomization.yaml | 5 ---- .../code-server-nodeport/pvc-patch.yaml | 4 ---- 11 files changed, 45 insertions(+), 67 deletions(-) delete mode 100644 apps/code-server/base/pvc.yaml delete mode 100644 apps/code-server/components/nfs/kustomization.yaml delete mode 100644 apps/code-server/components/nfs/pvc-rwx-patch.yaml delete mode 100644 tests/kustomize/code-server-nodeport/pvc-patch.yaml diff --git a/AGENTS.md b/AGENTS.md index a20a3c6..a3f088c 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -26,14 +26,15 @@ This file is the execution guide for `ocdp-workload-manifests`. - Runtime source generation may create Secret, ConfigMap, and patch files, then run Kustomize. - Bases should stay template-free YAML. -- App bases should keep Services as `ClusterIP`; expose apps from - WorkloadTemplate values such as `exposure=nodeport` or `exposure=loadbalancer`. +- App bases should keep Services internally reachable; expose apps from + WorkloadClaim top-level intent such as `exposure=internal` or + `exposure=external`, rendered by the agent/template path. Components such as `service-nodeport`, `service-loadbalancer`, or `ingress` are implementation building blocks, not the user-facing contract. - Environment overlays are platform/admin choices for cluster- or site-level differences such as StorageClass, IngressClass, GPU runtime, registry prefix, pull secrets, node selectors, tolerations, and site labels. -- NodePort numbers belong in per-claim values handled by `ocdp-server`, not in +- NodePort numbers should be assigned per claim by Kubernetes, not hard-coded in reusable app bases or components. ## Validation diff --git a/README.md b/README.md index e92a1ed..063a107 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ ocdp-server PostgreSQL WorkloadTemplate metadata WorkloadTemplate source.repositoryUrl/ref/path user-facing values schema / parameters - environment overlay/profile metadata + environment overlay and policy metadata access bindings no WorkloadClaim instance storage @@ -56,6 +56,8 @@ user-created claim lives in Kubernetes, not in PostgreSQL. ```text apps/ + earth2studio-demo/ + base/ code-server/ base/ components/ @@ -87,10 +89,10 @@ Secrets, ConfigMaps, components, and patches. ## Exposure -Base services are `ClusterIP`. User-facing exposure choices such as -`clusterip`, `nodeport`, and `loadbalancer` belong in the WorkloadTemplate -values schema. `ocdp-server` can translate that value into runtime Service -configuration or a generated patch. +Base services should stay internally reachable. User-facing exposure choices +belong on the WorkloadClaim top-level intent such as `exposure=internal` or +`exposure=external`; the agent/template renderer turns that intent into runtime +Service resources. Reusable components are still useful implementation building blocks: @@ -99,19 +101,20 @@ Reusable components are still useful implementation building blocks: - `components/service-loadbalancer`: change the app Service to `LoadBalancer`. - `components/service-nodeport`: change the app Service to `NodePort`. -When `nodeport` is selected by the user, `ocdp-server` may generate an instance -patch or runtime Service field for an explicit port: +When `exposure=external` is selected by the user, the agent/template renderer +may generate a NodePort Service and leave the concrete nodePort for Kubernetes +to allocate: ```yaml - op: add - path: /spec/ports/0/nodePort - value: 30080 + path: /spec/type + value: NodePort ``` Do not hard-code shared NodePort values in app bases or reusable components. Environment overlays are different from user choices. They are selected by the -platform from cluster, workspace, or customer profile information and can carry +platform from cluster, workspace, or customer policy information and can carry things like StorageClass, IngressClass, GPU runtime class, registry prefix, pull-secret wiring, node selectors, tolerations, and site-specific labels. diff --git a/apps/code-server/README.md b/apps/code-server/README.md index 2a25773..cd235d4 100644 --- a/apps/code-server/README.md +++ b/apps/code-server/README.md @@ -1,7 +1,17 @@ # code-server -The base deploys code-server with a PVC, Service, and password Secret reference. +The base deploys code-server from `harbor.bwgdi.com/library/earth2studio-demo:v6` +with a Service and password Secret reference. -The Secret is generated by the instance overlay. Do not commit real passwords or -tokens to this catalog. +The Secret is generated by the instance overlay. Workspace storage is mounted +from top-level WorkloadClaim `storage`, and exposure is rendered from top-level +`exposure`. Do not commit real passwords or tokens to this catalog. +The WorkloadTemplate exposes storage intent (`temporary`, `retained`, +`existing`) and exposure intent (`internal`, `external`) as claim-time choices. +Workspace storage defaults to retained. The template also mounts a hidden +retained `weight` StorageClass PVC at `/models` for model weights; this +StorageClass detail is platform-owned and is not exposed as a user parameter. + +The Deployment keeps HAMi resource keys in `resources.limits` at all times: +`nvidia.com/gpu` for GPU count and `nvidia.com/gpumem` for GPU memory in MiB. diff --git a/apps/code-server/base/deployment.yaml b/apps/code-server/base/deployment.yaml index 8f75759..c5e6894 100644 --- a/apps/code-server/base/deployment.yaml +++ b/apps/code-server/base/deployment.yaml @@ -19,18 +19,23 @@ spec: app.kubernetes.io/component: ide app.kubernetes.io/part-of: ocdp-workload spec: + imagePullSecrets: + - name: regcred + schedulerName: hami-scheduler securityContext: fsGroup: 1000 containers: - name: code-server - image: codercom/code-server:latest + image: harbor.bwgdi.com/library/earth2studio-demo:v6 imagePullPolicy: IfNotPresent + command: + - code-server args: - --bind-addr - 0.0.0.0:8080 - --auth - password - - /home/coder/project + - /workspace env: - name: PASSWORD valueFrom: @@ -55,11 +60,5 @@ spec: limits: cpu: "2" memory: 4Gi - volumeMounts: - - name: workspace - mountPath: /home/coder/project - volumes: - - name: workspace - persistentVolumeClaim: - claimName: code-server-data - + nvidia.com/gpu: "1" + nvidia.com/gpumem: "8192" diff --git a/apps/code-server/base/kustomization.yaml b/apps/code-server/base/kustomization.yaml index 1df75ef..a33121c 100644 --- a/apps/code-server/base/kustomization.yaml +++ b/apps/code-server/base/kustomization.yaml @@ -1,7 +1,5 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - - pvc.yaml - deployment.yaml - service.yaml - diff --git a/apps/code-server/base/pvc.yaml b/apps/code-server/base/pvc.yaml deleted file mode 100644 index bb93d24..0000000 --- a/apps/code-server/base/pvc.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - name: code-server-data - labels: - app.kubernetes.io/name: code-server - app.kubernetes.io/component: workspace-storage - app.kubernetes.io/part-of: ocdp-workload -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 20Gi - diff --git a/apps/code-server/components/nfs/kustomization.yaml b/apps/code-server/components/nfs/kustomization.yaml deleted file mode 100644 index d5f4b11..0000000 --- a/apps/code-server/components/nfs/kustomization.yaml +++ /dev/null @@ -1,8 +0,0 @@ -apiVersion: kustomize.config.k8s.io/v1alpha1 -kind: Component -patches: - - path: pvc-rwx-patch.yaml - target: - kind: PersistentVolumeClaim - name: code-server-data - diff --git a/apps/code-server/components/nfs/pvc-rwx-patch.yaml b/apps/code-server/components/nfs/pvc-rwx-patch.yaml deleted file mode 100644 index df2a78a..0000000 --- a/apps/code-server/components/nfs/pvc-rwx-patch.yaml +++ /dev/null @@ -1,4 +0,0 @@ -- op: replace - path: /spec/accessModes/0 - value: ReadWriteMany - diff --git a/tests/kustomize/code-server-nodeport/deployment-patch.yaml b/tests/kustomize/code-server-nodeport/deployment-patch.yaml index e3b1d00..e518fe4 100644 --- a/tests/kustomize/code-server-nodeport/deployment-patch.yaml +++ b/tests/kustomize/code-server-nodeport/deployment-patch.yaml @@ -11,10 +11,13 @@ path: /spec/template/spec/containers/0/resources/limits/memory value: 4Gi - op: replace - path: /spec/template/spec/containers/0/args/4 - value: /home/coder/workspace + path: /spec/template/spec/containers/0/resources/limits/nvidia.com~1gpu + value: "1" - op: replace - path: /spec/template/spec/containers/0/volumeMounts/0/mountPath + path: /spec/template/spec/containers/0/resources/limits/nvidia.com~1gpumem + value: "8192" +- op: replace + path: /spec/template/spec/containers/0/args/4 value: /home/coder/workspace - op: add path: /spec/template/spec/containers/0/env/- diff --git a/tests/kustomize/code-server-nodeport/kustomization.yaml b/tests/kustomize/code-server-nodeport/kustomization.yaml index 26dedad..8e0b712 100644 --- a/tests/kustomize/code-server-nodeport/kustomization.yaml +++ b/tests/kustomize/code-server-nodeport/kustomization.yaml @@ -6,7 +6,6 @@ resources: - ../../../apps/code-server/base - generated-secret.yaml components: - - ../../../apps/code-server/components/nfs - ../../../apps/code-server/components/service-nodeport patches: - path: deployment-patch.yaml @@ -15,10 +14,6 @@ patches: version: v1 kind: Deployment name: code-server - - path: pvc-patch.yaml - target: - kind: PersistentVolumeClaim - name: code-server-data - path: service-nodeport-patch.yaml target: kind: Service diff --git a/tests/kustomize/code-server-nodeport/pvc-patch.yaml b/tests/kustomize/code-server-nodeport/pvc-patch.yaml deleted file mode 100644 index 2819ff3..0000000 --- a/tests/kustomize/code-server-nodeport/pvc-patch.yaml +++ /dev/null @@ -1,4 +0,0 @@ -- op: replace - path: /spec/resources/requests/storage - value: 10Gi -