- Add GetMetrics method to MetricsClient interface and implement cluster metrics API - Add QuotaPrecheck service for validating resource quotas before deployment - Add auth DTO with role/permission models and auth handler tests - Add instance diagnostics: mounted NFS volumes, labels, annotations in pod diagnostics - Update workspace handler with GetWorkspace endpoint and shared-user list - Fix monitoring handler to use correct service method name - Add tail_lines fallback in instance handler for snake_case query params - Update nginx config for SSE log streaming support (no buffering) - Add comprehensive test coverage: auth_service_test, auth_handler_test, auth_dto_test, metrics_client_test, quota_precheck_test - Update error messages for quota validation and instance operations - ModifyModal: fix YAML lineWidth:0, modified keys summary, delta-only submit - InstanceCard: correctly disable scale-minus when replicas <= 0 - SidebarLayout: add hover transition for sidebar items - Update todo.md and lessons.md with latest fixes
144 lines
4.6 KiB
Go
144 lines
4.6 KiB
Go
package rest
|
|
|
|
import (
|
|
"net/http"
|
|
|
|
"github.com/gorilla/mux"
|
|
"github.com/ocdp/cluster-service/internal/adapter/input/http/dto"
|
|
"github.com/ocdp/cluster-service/internal/domain/service"
|
|
)
|
|
|
|
// MonitoringHandler 监控处理器
|
|
type MonitoringHandler struct {
|
|
monitoringService *service.MonitoringService
|
|
}
|
|
|
|
// NewMonitoringHandler 创建监控处理器
|
|
func NewMonitoringHandler(monitoringService *service.MonitoringService) *MonitoringHandler {
|
|
return &MonitoringHandler{
|
|
monitoringService: monitoringService,
|
|
}
|
|
}
|
|
|
|
// GetClusterMonitoring 获取单个集群的监控信息
|
|
// @Summary 获取集群监控
|
|
// @Tags Monitoring
|
|
// @Produce json
|
|
// @Security BearerAuth
|
|
// @Param cluster_id path string true "集群 ID"
|
|
// @Success 200 {object} dto.ClusterMetricsResponse
|
|
// @Failure 500 {object} dto.ErrorResponse
|
|
// @Router /monitoring/clusters/{cluster_id} [get]
|
|
func (h *MonitoringHandler) GetClusterMonitoring(w http.ResponseWriter, r *http.Request) {
|
|
vars := mux.Vars(r)
|
|
clusterID := vars["cluster_id"]
|
|
|
|
metrics, err := h.monitoringService.GetClusterMonitoring(r.Context(), clusterID)
|
|
if err != nil {
|
|
respondError(w, http.StatusInternalServerError, "MONITORING_ERROR", err.Error())
|
|
return
|
|
}
|
|
|
|
response := dto.ToClusterMetricsResponse(metrics)
|
|
respondJSON(w, http.StatusOK, response)
|
|
}
|
|
|
|
// GetClusterStats is a compatibility alias for cluster detail dashboards that
|
|
// historically read stats from /clusters/{id}/stats.
|
|
func (h *MonitoringHandler) GetClusterStats(w http.ResponseWriter, r *http.Request) {
|
|
h.GetClusterMonitoring(w, r)
|
|
}
|
|
|
|
// ListClusterMonitoring 获取所有集群的监控信息
|
|
// @Summary 列出集群监控
|
|
// @Tags Monitoring
|
|
// @Produce json
|
|
// @Security BearerAuth
|
|
// @Success 200 {array} dto.ClusterMetricsResponse
|
|
// @Failure 500 {object} dto.ErrorResponse
|
|
// @Router /monitoring/clusters [get]
|
|
func (h *MonitoringHandler) ListClusterMonitoring(w http.ResponseWriter, r *http.Request) {
|
|
monitoringList, err := h.monitoringService.ListClusterMonitoring(r.Context())
|
|
if err != nil {
|
|
respondError(w, http.StatusInternalServerError, "MONITORING_ERROR", err.Error())
|
|
return
|
|
}
|
|
|
|
// 转换为响应格式
|
|
response := make([]*dto.ClusterMetricsResponse, len(monitoringList))
|
|
for i, m := range monitoringList {
|
|
response[i] = dto.ToClusterMetricsResponse(m)
|
|
}
|
|
|
|
respondJSON(w, http.StatusOK, response)
|
|
}
|
|
|
|
// GetMonitoringSummary 获取监控汇总信息
|
|
// @Summary 获取监控汇总
|
|
// @Tags Monitoring
|
|
// @Produce json
|
|
// @Security BearerAuth
|
|
// @Success 200 {object} dto.MonitoringSummaryResponse
|
|
// @Failure 500 {object} dto.ErrorResponse
|
|
// @Router /monitoring/summary [get]
|
|
func (h *MonitoringHandler) GetMonitoringSummary(w http.ResponseWriter, r *http.Request) {
|
|
summary, err := h.monitoringService.GetMonitoringSummary(r.Context())
|
|
if err != nil {
|
|
respondError(w, http.StatusInternalServerError, "MONITORING_ERROR", err.Error())
|
|
return
|
|
}
|
|
|
|
response := dto.ToMonitoringSummaryResponse(summary)
|
|
respondJSON(w, http.StatusOK, response)
|
|
}
|
|
|
|
// GetNodeMetrics 获取集群的节点指标
|
|
// @Summary 获取节点指标
|
|
// @Tags Monitoring
|
|
// @Produce json
|
|
// @Security BearerAuth
|
|
// @Param cluster_id path string true "集群 ID"
|
|
// @Success 200 {array} dto.NodeMetricsResponse
|
|
// @Failure 500 {object} dto.ErrorResponse
|
|
// @Router /monitoring/clusters/{cluster_id}/nodes [get]
|
|
func (h *MonitoringHandler) GetNodeMetrics(w http.ResponseWriter, r *http.Request) {
|
|
vars := mux.Vars(r)
|
|
clusterID := vars["cluster_id"]
|
|
|
|
nodes, err := h.monitoringService.GetNodeMetrics(r.Context(), clusterID)
|
|
if err != nil {
|
|
respondError(w, http.StatusInternalServerError, "MONITORING_ERROR", err.Error())
|
|
return
|
|
}
|
|
|
|
// 转换为响应格式
|
|
response := make([]dto.NodeMetricsResponse, len(nodes))
|
|
for i, node := range nodes {
|
|
response[i] = dto.NodeMetricsResponse{
|
|
NodeName: node.NodeName,
|
|
Status: node.Status,
|
|
Role: node.Role,
|
|
Age: node.Age,
|
|
PodCount: node.PodCount,
|
|
CPUCapacity: node.CPUCapacity,
|
|
CPUAllocatable: node.CPUAllocatable,
|
|
CPUUsage: node.CPUUsage,
|
|
CPUPercent: node.CPUPercent,
|
|
MemoryCapacity: node.MemoryCapacity,
|
|
MemoryAllocatable: node.MemoryAllocatable,
|
|
MemoryUsage: node.MemoryUsage,
|
|
MemoryPercent: node.MemoryPercent,
|
|
GPUCapacity: node.GPUCapacity,
|
|
GPUUsage: node.GPUUsage,
|
|
GPUPercent: node.GPUPercent,
|
|
GPUType: node.GPUType,
|
|
OSImage: node.OSImage,
|
|
KernelVersion: node.KernelVersion,
|
|
ContainerRuntime: node.ContainerRuntime,
|
|
KubeletVersion: node.KubeletVersion,
|
|
}
|
|
}
|
|
|
|
respondJSON(w, http.StatusOK, response)
|
|
}
|