This commit is contained in:
mangomqy
2025-11-13 02:54:06 +00:00
commit c5e51ed069
254 changed files with 54901 additions and 0 deletions

View File

@ -0,0 +1,235 @@
/**
* Cluster Monitor Card Component
* 显示单个集群的监控信息
*/
import React, { useState } from "react";
import { Activity, CheckCircle, AlertTriangle, XCircle, HelpCircle, Clock, Cpu, Database, Server as ServerIcon, ChevronDown, ChevronUp, TrendingUp } from "lucide-react";
import { Card, Badge } from "@/shared/components";
import type { ClusterMetrics } from "@/core/types";
import { NodeMetricCard } from "./NodeMetricCard";
interface ClusterMonitorCardProps {
cluster: ClusterMetrics;
}
export const ClusterMonitorCard: React.FC<ClusterMonitorCardProps> = ({ cluster }) => {
const [showNodes, setShowNodes] = useState(false);
const status = cluster.status ?? "unknown";
const uptime = cluster.uptime ?? "N/A";
const nodeCount = cluster.nodeCount ?? 0;
const podCount = cluster.podCount ?? 0;
const totalGpu = cluster.totalGpu ?? 0;
const usedGpu = cluster.usedGpu ?? 0;
const cpuUsage = cluster.cpuUsage ?? 0;
const memoryUsage = cluster.memoryUsage ?? 0;
const gpuUsage = cluster.gpuUsage ?? 0;
const usedCpu = cluster.usedCpu ?? "N/A";
const totalCpu = cluster.totalCpu ?? "N/A";
const usedMemory = cluster.usedMemory ?? "N/A";
const totalMemory = cluster.totalMemory ?? "N/A";
const lastCheckedText = cluster.lastCheck ? new Date(cluster.lastCheck).toLocaleString() : "N/A";
const getStatusBadge = () => {
switch (status) {
case "healthy":
return <Badge variant="success">Healthy</Badge>;
case "warning":
case "unknown":
return <Badge variant="warning">Warning</Badge>;
case "error":
case "unhealthy":
return <Badge variant="danger">Error</Badge>;
default:
return <Badge variant="gray">Unknown</Badge>;
}
};
const getStatusIcon = () => {
switch (status) {
case "healthy":
return <CheckCircle className="w-5 h-5 text-green-400" />;
case "warning":
case "unknown":
return <AlertTriangle className="w-5 h-5 text-yellow-400" />;
case "error":
case "unhealthy":
return <XCircle className="w-5 h-5 text-red-400" />;
default:
return <HelpCircle className="w-5 h-5 text-gray-400" />;
}
};
return (
<Card className="p-5">
<div className="flex items-start justify-between">
<div className="flex items-start gap-4 flex-1">
{/* Status Icon */}
<div className="p-3 bg-gray-800 rounded-lg">
{getStatusIcon()}
</div>
{/* Cluster Info */}
<div className="flex-1 min-w-0">
<div className="flex items-center gap-3 mb-2">
<h3 className="text-lg font-semibold text-white truncate">{cluster.clusterName || "Unnamed Cluster"}</h3>
{getStatusBadge()}
</div>
{/* Metrics Grid */}
<div className="grid grid-cols-2 sm:grid-cols-4 gap-4 mb-3">
<div>
<p className="text-xs text-gray-500">Uptime</p>
<p className="text-sm text-gray-300 font-mono mt-1">{uptime}</p>
</div>
<div>
<p className="text-xs text-gray-500">Nodes</p>
<div className="flex items-center gap-1 mt-1">
<ServerIcon className="w-3 h-3 text-blue-400" />
<p className="text-sm text-gray-300 font-mono">{nodeCount}</p>
</div>
</div>
<div>
<p className="text-xs text-gray-500">Pods</p>
<p className="text-sm text-gray-300 font-mono mt-1">{podCount}</p>
</div>
<div>
<p className="text-xs text-gray-500">GPU</p>
<p className="text-sm text-gray-300 font-mono mt-1">
{usedGpu}/{totalGpu || "N/A"}
</p>
</div>
</div>
{/* Resource Usage */}
<div className="grid grid-cols-1 sm:grid-cols-3 gap-3 mt-3 p-3 bg-gray-800/50 rounded-lg">
<div>
<div className="flex items-center gap-2 mb-1">
<Cpu className="w-3 h-3 text-blue-400" />
<p className="text-xs text-gray-500">CPU (Cluster Total)</p>
</div>
<p className="text-sm text-gray-300 font-mono">{usedCpu} / {totalCpu}</p>
<div className="mt-1 h-1.5 bg-gray-700 rounded-full overflow-hidden">
<div
className="h-full bg-blue-500 rounded-full transition-all"
style={{ width: `${Math.min(cpuUsage, 100)}%` }}
/>
</div>
<p className="text-xs text-gray-400 mt-1">{cpuUsage.toFixed(1)}%</p>
{cluster.maxNodeCpu && (
<div className="mt-1.5 pt-1.5 border-t border-gray-700/50">
<div className="flex items-center gap-1">
<TrendingUp className="w-3 h-3 text-blue-400/60" />
<p className="text-xs text-gray-500">Max per node</p>
</div>
<p className="text-xs text-gray-400 font-mono">{cluster.maxNodeCpu}</p>
{cluster.maxNodeCpuUsage && cluster.maxNodeCpuUsage > 0 && (
<p className="text-xs text-gray-500">Peak: {cluster.maxNodeCpuUsage.toFixed(1)}%</p>
)}
</div>
)}
</div>
<div>
<div className="flex items-center gap-2 mb-1">
<Database className="w-3 h-3 text-green-400" />
<p className="text-xs text-gray-500">Memory (Cluster Total)</p>
</div>
<p className="text-sm text-gray-300 font-mono">{usedMemory} / {totalMemory}</p>
<div className="mt-1 h-1.5 bg-gray-700 rounded-full overflow-hidden">
<div
className="h-full bg-green-500 rounded-full transition-all"
style={{ width: `${Math.min(memoryUsage, 100)}%` }}
/>
</div>
<p className="text-xs text-gray-400 mt-1">{memoryUsage.toFixed(1)}%</p>
{cluster.maxNodeMemory && (
<div className="mt-1.5 pt-1.5 border-t border-gray-700/50">
<div className="flex items-center gap-1">
<TrendingUp className="w-3 h-3 text-green-400/60" />
<p className="text-xs text-gray-500">Max per node</p>
</div>
<p className="text-xs text-gray-400 font-mono">{cluster.maxNodeMemory}</p>
{cluster.maxNodeMemUsage && cluster.maxNodeMemUsage > 0 && (
<p className="text-xs text-gray-500">Peak: {cluster.maxNodeMemUsage.toFixed(1)}%</p>
)}
</div>
)}
</div>
{totalGpu > 0 && (
<div>
<div className="flex items-center gap-2 mb-1">
<Activity className="w-3 h-3 text-purple-400" />
<p className="text-xs text-gray-500">GPU (Cluster Total)</p>
</div>
<p className="text-sm text-gray-300 font-mono">{usedGpu} / {totalGpu}</p>
<div className="mt-1 h-1.5 bg-gray-700 rounded-full overflow-hidden">
<div
className="h-full bg-purple-500 rounded-full transition-all"
style={{ width: `${Math.min(gpuUsage, 100)}%` }}
/>
</div>
<p className="text-xs text-gray-400 mt-1">{gpuUsage.toFixed(1)}%</p>
{cluster.maxNodeGpu && cluster.maxNodeGpu > 0 && (
<div className="mt-1.5 pt-1.5 border-t border-gray-700/50">
<div className="flex items-center gap-1">
<TrendingUp className="w-3 h-3 text-purple-400/60" />
<p className="text-xs text-gray-500">Max per node</p>
</div>
<p className="text-xs text-gray-400 font-mono">{cluster.maxNodeGpu} GPUs</p>
{cluster.maxNodeGpuUsage && cluster.maxNodeGpuUsage > 0 && (
<p className="text-xs text-gray-500">Peak: {cluster.maxNodeGpuUsage.toFixed(1)}%</p>
)}
</div>
)}
</div>
)}
</div>
<div className="mt-3 flex items-center gap-2 text-xs text-gray-500">
<Clock className="w-3 h-3" />
<span>Last checked: {lastCheckedText}</span>
</div>
</div>
</div>
{/* Actions */}
<div className="flex gap-2">
{cluster.nodes && cluster.nodes.length > 0 && (
<button
onClick={() => setShowNodes(!showNodes)}
className="px-3 py-1.5 text-sm text-blue-400 hover:text-blue-300 hover:bg-blue-400/10 rounded-lg transition flex items-center gap-2"
>
{showNodes ? (
<>
<ChevronUp className="w-4 h-4" />
Hide Nodes
</>
) : (
<>
<ChevronDown className="w-4 h-4" />
Show Nodes ({cluster.nodes.length})
</>
)}
</button>
)}
</div>
</div>
{/* Nodes List */}
{showNodes && cluster.nodes && cluster.nodes.length > 0 && (
<div className="mt-4 pt-4 border-t border-gray-700/50">
<h4 className="text-sm font-semibold text-white mb-3 flex items-center gap-2">
<ServerIcon className="w-4 h-4 text-blue-400" />
Cluster Nodes ({cluster.nodes.length})
</h4>
<div className="grid grid-cols-1 lg:grid-cols-2 gap-3">
{cluster.nodes.map((node) => (
<NodeMetricCard key={node.nodeName} node={node} />
))}
</div>
</div>
)}
</Card>
);
};

View File

@ -0,0 +1,147 @@
/**
* Node Metric Card Component
* 显示单个节点的监控信息
*/
import React from "react";
import { Server, Cpu, Database, CheckCircle, XCircle, Activity } from "lucide-react";
import { Badge } from "@/shared/components";
import type { NodeMetrics } from "@/core/types";
interface NodeMetricCardProps {
node: NodeMetrics;
}
export const NodeMetricCard: React.FC<NodeMetricCardProps> = ({ node }) => {
const getStatusBadge = () => {
if (node.status === "Ready") {
return <Badge variant="success">Ready</Badge>;
}
return <Badge variant="danger">NotReady</Badge>;
};
const getStatusIcon = () => {
if (node.status === "Ready") {
return <CheckCircle className="w-4 h-4 text-green-400" />;
}
return <XCircle className="w-4 h-4 text-red-400" />;
};
const getRoleBadge = () => {
if (node.role === "control-plane") {
return <Badge variant="blue">Control Plane</Badge>;
}
return <Badge variant="gray">Worker</Badge>;
};
const cpuPercent = node.cpuPercent ?? 0;
const memoryPercent = node.memoryPercent ?? 0;
const gpuPercent = node.gpuPercent ?? 0;
const gpuCapacity = node.gpuCapacity ?? 0;
return (
<div className="p-4 bg-gray-800/30 rounded-lg border border-gray-700/50 hover:border-gray-600/50 transition">
{/* Node Header */}
<div className="flex items-center justify-between mb-3">
<div className="flex items-center gap-3">
<div className="p-2 bg-gray-700/50 rounded">
<Server className="w-4 h-4 text-blue-400" />
</div>
<div>
<div className="flex items-center gap-2 mb-1">
<h4 className="text-sm font-semibold text-white">{node.nodeName}</h4>
{getStatusIcon()}
</div>
<div className="flex items-center gap-2">
{getStatusBadge()}
{getRoleBadge()}
</div>
</div>
</div>
<div className="text-right">
<p className="text-xs text-gray-500">Age</p>
<p className="text-xs text-gray-300 font-mono">{node.age}</p>
</div>
</div>
{/* Node Metrics Grid */}
<div className="grid grid-cols-3 gap-3">
{/* CPU */}
<div>
<div className="flex items-center gap-1.5 mb-1">
<Cpu className="w-3 h-3 text-blue-400" />
<p className="text-xs text-gray-500">CPU</p>
</div>
<p className="text-xs text-gray-300 font-mono mb-1">
{node.cpuUsage ?? "N/A"} / {node.cpuAllocatable ?? "N/A"}
</p>
<div className="h-1 bg-gray-700 rounded-full overflow-hidden">
<div
className="h-full bg-blue-500 rounded-full transition-all"
style={{ width: `${Math.min(cpuPercent, 100)}%` }}
/>
</div>
<p className="text-xs text-gray-400 mt-0.5">{cpuPercent.toFixed(1)}%</p>
</div>
{/* Memory */}
<div>
<div className="flex items-center gap-1.5 mb-1">
<Database className="w-3 h-3 text-green-400" />
<p className="text-xs text-gray-500">Memory</p>
</div>
<p className="text-xs text-gray-300 font-mono mb-1">
{node.memoryUsage ?? "N/A"} / {node.memoryAllocatable ?? "N/A"}
</p>
<div className="h-1 bg-gray-700 rounded-full overflow-hidden">
<div
className="h-full bg-green-500 rounded-full transition-all"
style={{ width: `${Math.min(memoryPercent, 100)}%` }}
/>
</div>
<p className="text-xs text-gray-400 mt-0.5">{memoryPercent.toFixed(1)}%</p>
</div>
{/* GPU */}
<div>
<div className="flex items-center gap-1.5 mb-1">
<Activity className="w-3 h-3 text-purple-400" />
<p className="text-xs text-gray-500">GPU</p>
</div>
{gpuCapacity > 0 ? (
<>
<p className="text-xs text-gray-300 font-mono mb-1">
{node.gpuUsage ?? "N/A"} / {gpuCapacity}
</p>
<div className="h-1 bg-gray-700 rounded-full overflow-hidden">
<div
className="h-full bg-purple-500 rounded-full transition-all"
style={{ width: `${Math.min(gpuPercent, 100)}%` }}
/>
</div>
<p className="text-xs text-gray-400 mt-0.5">
{gpuPercent.toFixed(1)}%
{node.gpuType && <span className="ml-1 text-gray-500">({node.gpuType})</span>}
</p>
</>
) : (
<p className="text-xs text-gray-500 mt-1">No GPU</p>
)}
</div>
</div>
{/* Additional Info */}
<div className="mt-3 pt-3 border-t border-gray-700/50 grid grid-cols-2 gap-2">
<div>
<p className="text-xs text-gray-500">Pods</p>
<p className="text-xs text-gray-300 font-mono">{node.podCount ?? 0}</p>
</div>
{node.kubeletVersion && (
<div>
<p className="text-xs text-gray-500">Kubelet</p>
<p className="text-xs text-gray-300 font-mono">{node.kubeletVersion}</p>
</div>
)}
</div>
</div>
);
};

View File

@ -0,0 +1,8 @@
/**
* Monitoring Feature Module
* 监控功能模块
*/
export { default as MonitoringClustersPage } from "./pages/MonitoringClustersPage";
export { ClusterMonitorCard } from "./components/ClusterMonitorCard";

View File

@ -0,0 +1,175 @@
/**
* Monitoring - Clusters Page
* 监控集群状态和健康信息
*/
import React, { useState, useEffect } from "react";
import { Activity, Server, RefreshCw } from "lucide-react";
import { PageHeader, StatsCard, Button, LoadingState, ErrorState, EmptyState } from "@/shared";
import { useToast } from "@/shared";
import { ClusterErrors, SuccessMessages, formatApiError } from "@/shared/utils";
import { listClusterMonitoring } from "@/api";
import type { ClusterMetrics } from "@/core/types";
import { ClusterMonitorCard } from "../components/ClusterMonitorCard";
const MonitoringClustersPage: React.FC = () => {
const { info: toastInfo, success: toastSuccess, error: toastError } = useToast();
const [clusters, setClusters] = useState<ClusterMetrics[]>([]);
const [loading, setLoading] = useState(true);
const [error, setError] = useState<string | null>(null);
const [refreshing, setRefreshing] = useState(false);
// Load cluster monitoring data
const loadClusters = async (isMounted = { current: true }, isRefresh = false) => {
let succeeded = false;
if (isRefresh) {
setRefreshing(true);
} else {
setLoading(true);
}
setError(null);
try {
const data = await listClusterMonitoring();
if (isMounted.current) {
setClusters(data);
succeeded = true;
}
} catch (err) {
const errorMsg = formatApiError(err) || ClusterErrors.LOAD_FAILED;
if (isMounted.current) {
setError(errorMsg);
toastError(errorMsg);
console.error(err);
}
} finally {
if (isMounted.current) {
setLoading(false);
setRefreshing(false);
}
}
return succeeded;
};
useEffect(() => {
const isMounted = { current: true };
loadClusters(isMounted);
// Auto-refresh every 30 seconds
const interval = setInterval(() => {
loadClusters(isMounted, true);
}, 30000);
return () => {
isMounted.current = false;
clearInterval(interval);
};
// eslint-disable-next-line react-hooks/exhaustive-deps
}, []);
// Refresh clusters
const handleRefresh = async () => {
toastInfo("Refreshing cluster metrics...", {
title: "Monitoring Refresh",
durationMs: 1800,
mergeKey: "monitoring-refresh",
});
const refreshed = await loadClusters({ current: true }, true);
if (refreshed) {
toastSuccess(SuccessMessages.DATA_REFRESHED);
}
};
if (loading) {
return <LoadingState message="Loading cluster monitoring data..." />;
}
if (error) {
return (
<ErrorState
title="Failed to Load Clusters"
message={error}
onRetry={() => loadClusters({ current: true })}
/>
);
}
if (clusters.length === 0) {
return (
<EmptyState
icon={Server}
title="No Clusters Available"
description="No clusters configured for monitoring. Please add clusters in the configuration section."
/>
);
}
const healthyCount = clusters.filter(c => c.status === "healthy").length;
const warningCount = clusters.filter(c => c.status === "warning" || c.status === "unknown").length;
const errorCount = clusters.filter(c => c.status === "error" || c.status === "unhealthy").length;
return (
<div className="space-y-6">
{/* Page Header */}
<PageHeader
title="Cluster Monitoring"
description="Monitor cluster health and status"
icon={Activity}
>
<Button
variant="secondary"
icon={RefreshCw}
onClick={handleRefresh}
loading={refreshing}
>
{refreshing ? "Refreshing..." : "Refresh"}
</Button>
</PageHeader>
{/* Summary Stats */}
<div className="grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-4 gap-4">
<StatsCard
title="Total Clusters"
value={clusters.length}
icon={Server}
variant="blue"
/>
<StatsCard
title="Healthy"
value={healthyCount}
icon={Activity}
variant="green"
/>
<StatsCard
title="Warning"
value={warningCount}
icon={Activity}
variant="orange"
/>
<StatsCard
title="Error"
value={errorCount}
icon={Activity}
variant="red"
/>
</div>
{/* Auto-refresh Info */}
<div className="text-sm text-gray-400">
Auto-refresh every 30 seconds {refreshing && "• Refreshing..."}
</div>
{/* Cluster List */}
<div className="grid gap-4">
{clusters.map((cluster, index) => (
<ClusterMonitorCard
key={cluster.clusterId || cluster.id || `${cluster.clusterName || 'cluster'}-${index}`}
cluster={cluster}
/>
))}
</div>
</div>
);
};
export default MonitoringClustersPage;

View File

@ -0,0 +1,9 @@
/**
* Monitoring Module
* 监控模块 - 集群监控
*/
// Clusters
export { default as MonitoringClustersPage } from './clusters/pages/MonitoringClustersPage';
export * from './clusters/components/ClusterMonitorCard';