fix: scale replicas in response, K8s metrics client, quota precheck, auth tests

- Add GetMetrics method to MetricsClient interface and implement cluster metrics API
- Add QuotaPrecheck service for validating resource quotas before deployment
- Add auth DTO with role/permission models and auth handler tests
- Add instance diagnostics: mounted NFS volumes, labels, annotations in pod diagnostics
- Update workspace handler with GetWorkspace endpoint and shared-user list
- Fix monitoring handler to use correct service method name
- Add tail_lines fallback in instance handler for snake_case query params
- Update nginx config for SSE log streaming support (no buffering)
- Add comprehensive test coverage: auth_service_test, auth_handler_test,
  auth_dto_test, metrics_client_test, quota_precheck_test
- Update error messages for quota validation and instance operations
- ModifyModal: fix YAML lineWidth:0, modified keys summary, delta-only submit
- InstanceCard: correctly disable scale-minus when replicas <= 0
- SidebarLayout: add hover transition for sidebar items
- Update todo.md and lessons.md with latest fixes
This commit is contained in:
Ivan087
2026-05-20 16:56:29 +08:00
parent 8f90cf0f0d
commit 33ddaf97db
59 changed files with 4805 additions and 457 deletions

View File

@ -3,7 +3,7 @@
* 显示单个集群的监控信息
*/
import React, { useState } from "react";
import { Activity, CheckCircle, AlertTriangle, XCircle, HelpCircle, Clock, Cpu, Database, Server as ServerIcon, ChevronDown, ChevronUp, TrendingUp } from "lucide-react";
import { Activity, CheckCircle, AlertTriangle, XCircle, HelpCircle, Clock, Cpu, Database, Server as ServerIcon, ChevronDown, ChevronUp, TrendingUp, Users } from "lucide-react";
import { Card, Badge } from "@/shared/components";
import type { ClusterMetrics } from "@/core/types";
import { NodeMetricCard } from "./NodeMetricCard";
@ -20,6 +20,9 @@ export const ClusterMonitorCard: React.FC<ClusterMonitorCardProps> = ({ cluster
const podCount = cluster.podCount ?? 0;
const totalGpu = cluster.totalGpu ?? 0;
const usedGpu = cluster.usedGpu ?? 0;
const allocatedGpu = firstNumber(cluster.gpuAllocated, cluster.allocatedGpu, cluster.gpuAllocation, usedGpu);
const usedGpuMemory = firstDisplayValue(cluster.allocatedGpuMemoryMb, cluster.allocatedGpuMemoryMB, cluster.gpuMemoryRequestsMb, cluster.usedGpuMemory, cluster.gpuMemoryUsed, cluster.usedGpuMem);
const totalGpuMemory = firstDisplayValue(cluster.totalGpuMemory, cluster.totalGpuMem);
const cpuUsage = cluster.cpuUsage ?? 0;
const memoryUsage = cluster.memoryUsage ?? 0;
const gpuUsage = cluster.gpuUsage ?? 0;
@ -27,7 +30,11 @@ export const ClusterMonitorCard: React.FC<ClusterMonitorCardProps> = ({ cluster
const totalCpu = cluster.totalCpu ?? "N/A";
const usedMemory = cluster.usedMemory ?? "N/A";
const totalMemory = cluster.totalMemory ?? "N/A";
const cpuRequestText = firstDisplayValue(cluster.cpuRequests, usedCpu);
const memoryRequestText = firstDisplayValue(cluster.memoryRequests, usedMemory);
const hasClusterTotals = Boolean(cluster.totalCpu || cluster.totalMemory || cluster.nodeCount);
const lastCheckedText = cluster.lastCheck ? new Date(cluster.lastCheck).toLocaleString() : "N/A";
const userResourceRows = getUserResourceRows(cluster);
const getStatusBadge = () => {
switch (status) {
@ -76,13 +83,13 @@ export const ClusterMonitorCard: React.FC<ClusterMonitorCardProps> = ({ cluster
</div>
{/* Metrics Grid */}
<div className="grid grid-cols-2 sm:grid-cols-4 gap-4 mb-3">
<div className="grid grid-cols-2 gap-4 mb-3 md:grid-cols-3 xl:grid-cols-5">
<div>
<p className="text-xs text-slate-500">Uptime</p>
<p className="text-sm text-slate-700 font-mono mt-1">{uptime}</p>
</div>
<div>
<p className="text-xs text-slate-500">Nodes</p>
<p className="text-xs text-slate-500">{hasClusterTotals ? "Nodes" : "Visible Nodes"}</p>
<div className="flex items-center gap-1 mt-1">
<ServerIcon className="w-3 h-3 text-blue-400" />
<p className="text-sm text-slate-700 font-mono">{nodeCount}</p>
@ -95,7 +102,13 @@ export const ClusterMonitorCard: React.FC<ClusterMonitorCardProps> = ({ cluster
<div>
<p className="text-xs text-slate-500">GPU</p>
<p className="text-sm text-slate-700 font-mono mt-1">
{usedGpu}/{totalGpu || "N/A"}
{hasClusterTotals ? `${usedGpu}/${totalGpu || "N/A"}` : `${allocatedGpu} allocated`}
</p>
</div>
<div>
<p className="text-xs text-slate-500">GPU Mem</p>
<p className="text-sm text-slate-700 font-mono mt-1">
{usedGpuMemory || "N/A"}{totalGpuMemory ? ` / ${totalGpuMemory}` : ""}
</p>
</div>
</div>
@ -105,16 +118,18 @@ export const ClusterMonitorCard: React.FC<ClusterMonitorCardProps> = ({ cluster
<div>
<div className="flex items-center gap-2 mb-1">
<Cpu className="w-3 h-3 text-blue-400" />
<p className="text-xs text-slate-500">CPU (Cluster Total)</p>
<p className="text-xs text-slate-500">{hasClusterTotals ? "CPU (Cluster Total)" : "CPU Requests"}</p>
</div>
<p className="text-sm text-slate-700 font-mono">{usedCpu} / {totalCpu}</p>
<p className="text-sm text-slate-700 font-mono">
{hasClusterTotals ? `${usedCpu} / ${totalCpu}` : cpuRequestText || "0 cores"}
</p>
<div className="mt-1 h-1.5 bg-slate-100 rounded-full overflow-hidden">
<div
className="h-full bg-blue-500 rounded-full transition-all"
style={{ width: `${Math.min(cpuUsage, 100)}%` }}
/>
</div>
<p className="text-xs text-slate-500 mt-1">{cpuUsage.toFixed(1)}%</p>
<p className="text-xs text-slate-500 mt-1">{hasClusterTotals ? `${cpuUsage.toFixed(1)}%` : "self-scoped allocation"}</p>
{cluster.maxNodeCpu && (
<div className="mt-1.5 pt-1.5 border-t border-slate-200">
<div className="flex items-center gap-1">
@ -132,16 +147,18 @@ export const ClusterMonitorCard: React.FC<ClusterMonitorCardProps> = ({ cluster
<div>
<div className="flex items-center gap-2 mb-1">
<Database className="w-3 h-3 text-green-400" />
<p className="text-xs text-slate-500">Memory (Cluster Total)</p>
<p className="text-xs text-slate-500">{hasClusterTotals ? "Memory (Cluster Total)" : "Memory Requests"}</p>
</div>
<p className="text-sm text-slate-700 font-mono">{usedMemory} / {totalMemory}</p>
<p className="text-sm text-slate-700 font-mono">
{hasClusterTotals ? `${usedMemory} / ${totalMemory}` : memoryRequestText || "0 B"}
</p>
<div className="mt-1 h-1.5 bg-slate-100 rounded-full overflow-hidden">
<div
className="h-full bg-green-500 rounded-full transition-all"
style={{ width: `${Math.min(memoryUsage, 100)}%` }}
/>
</div>
<p className="text-xs text-slate-500 mt-1">{memoryUsage.toFixed(1)}%</p>
<p className="text-xs text-slate-500 mt-1">{hasClusterTotals ? `${memoryUsage.toFixed(1)}%` : "self-scoped allocation"}</p>
{cluster.maxNodeMemory && (
<div className="mt-1.5 pt-1.5 border-t border-slate-200">
<div className="flex items-center gap-1">
@ -156,20 +173,20 @@ export const ClusterMonitorCard: React.FC<ClusterMonitorCardProps> = ({ cluster
)}
</div>
{totalGpu > 0 && (
{(totalGpu > 0 || allocatedGpu > 0) && (
<div>
<div className="flex items-center gap-2 mb-1">
<Activity className="w-3 h-3 text-purple-400" />
<p className="text-xs text-slate-500">GPU (Cluster Total)</p>
<p className="text-xs text-slate-500">GPU Allocation</p>
</div>
<p className="text-sm text-slate-700 font-mono">{usedGpu} / {totalGpu}</p>
<p className="text-sm text-slate-700 font-mono">{allocatedGpu} / {totalGpu || "N/A"}</p>
<div className="mt-1 h-1.5 bg-slate-100 rounded-full overflow-hidden">
<div
className="h-full bg-purple-500 rounded-full transition-all"
style={{ width: `${Math.min(gpuUsage, 100)}%` }}
/>
</div>
<p className="text-xs text-slate-500 mt-1">{gpuUsage.toFixed(1)}%</p>
<p className="text-xs text-slate-500 mt-1">{hasClusterTotals ? `${gpuUsage.toFixed(1)}%` : "self-scoped allocation"}</p>
{cluster.maxNodeGpu && cluster.maxNodeGpu > 0 && (
<div className="mt-1.5 pt-1.5 border-t border-slate-200">
<div className="flex items-center gap-1">
@ -184,8 +201,62 @@ export const ClusterMonitorCard: React.FC<ClusterMonitorCardProps> = ({ cluster
)}
</div>
)}
{(usedGpuMemory || totalGpuMemory) && (
<div>
<div className="flex items-center gap-2 mb-1">
<Database className="w-3 h-3 text-fuchsia-500" />
<p className="text-xs text-slate-500">GPU Mem</p>
</div>
<p className="text-sm text-slate-700 font-mono">
{usedGpuMemory || "0"}{totalGpuMemory ? ` / ${totalGpuMemory}` : ""}
</p>
<p className="text-xs text-slate-500 mt-1">requests.nvidia.com/gpumem</p>
</div>
)}
</div>
{userResourceRows.length > 0 && (
<div className="mt-3 overflow-hidden rounded-lg border border-slate-200">
<div className="flex items-center gap-2 border-b border-slate-200 bg-slate-50 px-3 py-2">
<Users className="h-4 w-4 text-slate-500" />
<h4 className="text-sm font-semibold text-slate-900">User Resources</h4>
</div>
<div className="overflow-x-auto">
<table className="min-w-[720px] w-full text-left text-xs">
<thead className="bg-white text-slate-500">
<tr>
<th className="px-3 py-2 font-medium">User</th>
<th className="px-3 py-2 font-medium">Namespace</th>
<th className="px-3 py-2 font-medium">CPU</th>
<th className="px-3 py-2 font-medium">Memory</th>
<th className="px-3 py-2 font-medium">GPU</th>
<th className="px-3 py-2 font-medium">GPU Mem</th>
<th className="px-3 py-2 font-medium">Pods</th>
</tr>
</thead>
<tbody className="divide-y divide-slate-100">
{userResourceRows.map((row, index) => (
<tr key={`${row.userId || row.username || row.userName || "user"}-${index}`} className="bg-white">
<td className="max-w-[180px] truncate px-3 py-2 font-medium text-slate-800">
{row.username || row.userName || shortId(row.userId) || "-"}
</td>
<td className="max-w-[180px] truncate px-3 py-2 font-mono text-slate-600">{row.namespace || "-"}</td>
<td className="px-3 py-2 font-mono text-slate-700">{firstDisplayValue(row.cpuRequests, row.usedCpu, row.cpuUsed, row.cpuRequest, row.cpuLimits, row.cpuLimit) || "-"}</td>
<td className="px-3 py-2 font-mono text-slate-700">{firstDisplayValue(row.memoryRequests, row.usedMemory, row.memoryUsed, row.memoryRequest, row.memoryLimits, row.memoryLimit) || "-"}</td>
<td className="px-3 py-2 font-mono text-slate-700">{firstNumber(row.gpuRequests, row.gpuAllocated, row.gpuAllocation, row.usedGpu, row.gpuUsed) ?? 0}</td>
<td className="px-3 py-2 font-mono text-slate-700">
{firstDisplayValue(row.gpuMemoryRequestsMb, row.gpuMemoryAllocated, row.gpuMemAllocated, row.usedGpuMemory, row.gpuMemoryUsed, row.gpuMemUsed) || "0"}
</td>
<td className="px-3 py-2 font-mono text-slate-700">{row.podCount ?? "-"}</td>
</tr>
))}
</tbody>
</table>
</div>
</div>
)}
<div className="mt-3 flex items-center gap-2 text-xs text-slate-500">
<Clock className="w-3 h-3" />
<span>Last checked: {lastCheckedText}</span>
@ -233,3 +304,34 @@ export const ClusterMonitorCard: React.FC<ClusterMonitorCardProps> = ({ cluster
</Card>
);
};
const firstNumber = (...values: Array<number | undefined | null>): number => {
for (const value of values) {
if (typeof value === "number" && Number.isFinite(value)) {
return value;
}
}
return 0;
};
const firstDisplayValue = (...values: Array<string | number | undefined | null>): string => {
for (const value of values) {
if (typeof value === "number" && Number.isFinite(value)) {
return String(value);
}
if (typeof value === "string" && value.trim()) {
return value.trim();
}
}
return "";
};
const getUserResourceRows = (cluster: ClusterMetrics) =>
cluster.resourceUsageByUser || cluster.userResources || cluster.userResourceUsage || cluster.resourcesByUser || cluster.userResourceRows || [];
const shortId = (value?: string): string => {
const id = value?.trim();
if (!id) return "";
if (id.length <= 12) return id;
return `${id.slice(0, 8)}...${id.slice(-4)}`;
};