Files
ocdp-go/backend/internal/adapter/input/http/rest/monitoring_handler.go
Ivan087 33ddaf97db fix: scale replicas in response, K8s metrics client, quota precheck, auth tests
- Add GetMetrics method to MetricsClient interface and implement cluster metrics API
- Add QuotaPrecheck service for validating resource quotas before deployment
- Add auth DTO with role/permission models and auth handler tests
- Add instance diagnostics: mounted NFS volumes, labels, annotations in pod diagnostics
- Update workspace handler with GetWorkspace endpoint and shared-user list
- Fix monitoring handler to use correct service method name
- Add tail_lines fallback in instance handler for snake_case query params
- Update nginx config for SSE log streaming support (no buffering)
- Add comprehensive test coverage: auth_service_test, auth_handler_test,
  auth_dto_test, metrics_client_test, quota_precheck_test
- Update error messages for quota validation and instance operations
- ModifyModal: fix YAML lineWidth:0, modified keys summary, delta-only submit
- InstanceCard: correctly disable scale-minus when replicas <= 0
- SidebarLayout: add hover transition for sidebar items
- Update todo.md and lessons.md with latest fixes
2026-05-20 16:56:29 +08:00

144 lines
4.6 KiB
Go

package rest
import (
"net/http"
"github.com/gorilla/mux"
"github.com/ocdp/cluster-service/internal/adapter/input/http/dto"
"github.com/ocdp/cluster-service/internal/domain/service"
)
// MonitoringHandler 监控处理器
type MonitoringHandler struct {
monitoringService *service.MonitoringService
}
// NewMonitoringHandler 创建监控处理器
func NewMonitoringHandler(monitoringService *service.MonitoringService) *MonitoringHandler {
return &MonitoringHandler{
monitoringService: monitoringService,
}
}
// GetClusterMonitoring 获取单个集群的监控信息
// @Summary 获取集群监控
// @Tags Monitoring
// @Produce json
// @Security BearerAuth
// @Param cluster_id path string true "集群 ID"
// @Success 200 {object} dto.ClusterMetricsResponse
// @Failure 500 {object} dto.ErrorResponse
// @Router /monitoring/clusters/{cluster_id} [get]
func (h *MonitoringHandler) GetClusterMonitoring(w http.ResponseWriter, r *http.Request) {
vars := mux.Vars(r)
clusterID := vars["cluster_id"]
metrics, err := h.monitoringService.GetClusterMonitoring(r.Context(), clusterID)
if err != nil {
respondError(w, http.StatusInternalServerError, "MONITORING_ERROR", err.Error())
return
}
response := dto.ToClusterMetricsResponse(metrics)
respondJSON(w, http.StatusOK, response)
}
// GetClusterStats is a compatibility alias for cluster detail dashboards that
// historically read stats from /clusters/{id}/stats.
func (h *MonitoringHandler) GetClusterStats(w http.ResponseWriter, r *http.Request) {
h.GetClusterMonitoring(w, r)
}
// ListClusterMonitoring 获取所有集群的监控信息
// @Summary 列出集群监控
// @Tags Monitoring
// @Produce json
// @Security BearerAuth
// @Success 200 {array} dto.ClusterMetricsResponse
// @Failure 500 {object} dto.ErrorResponse
// @Router /monitoring/clusters [get]
func (h *MonitoringHandler) ListClusterMonitoring(w http.ResponseWriter, r *http.Request) {
monitoringList, err := h.monitoringService.ListClusterMonitoring(r.Context())
if err != nil {
respondError(w, http.StatusInternalServerError, "MONITORING_ERROR", err.Error())
return
}
// 转换为响应格式
response := make([]*dto.ClusterMetricsResponse, len(monitoringList))
for i, m := range monitoringList {
response[i] = dto.ToClusterMetricsResponse(m)
}
respondJSON(w, http.StatusOK, response)
}
// GetMonitoringSummary 获取监控汇总信息
// @Summary 获取监控汇总
// @Tags Monitoring
// @Produce json
// @Security BearerAuth
// @Success 200 {object} dto.MonitoringSummaryResponse
// @Failure 500 {object} dto.ErrorResponse
// @Router /monitoring/summary [get]
func (h *MonitoringHandler) GetMonitoringSummary(w http.ResponseWriter, r *http.Request) {
summary, err := h.monitoringService.GetMonitoringSummary(r.Context())
if err != nil {
respondError(w, http.StatusInternalServerError, "MONITORING_ERROR", err.Error())
return
}
response := dto.ToMonitoringSummaryResponse(summary)
respondJSON(w, http.StatusOK, response)
}
// GetNodeMetrics 获取集群的节点指标
// @Summary 获取节点指标
// @Tags Monitoring
// @Produce json
// @Security BearerAuth
// @Param cluster_id path string true "集群 ID"
// @Success 200 {array} dto.NodeMetricsResponse
// @Failure 500 {object} dto.ErrorResponse
// @Router /monitoring/clusters/{cluster_id}/nodes [get]
func (h *MonitoringHandler) GetNodeMetrics(w http.ResponseWriter, r *http.Request) {
vars := mux.Vars(r)
clusterID := vars["cluster_id"]
nodes, err := h.monitoringService.GetNodeMetrics(r.Context(), clusterID)
if err != nil {
respondError(w, http.StatusInternalServerError, "MONITORING_ERROR", err.Error())
return
}
// 转换为响应格式
response := make([]dto.NodeMetricsResponse, len(nodes))
for i, node := range nodes {
response[i] = dto.NodeMetricsResponse{
NodeName: node.NodeName,
Status: node.Status,
Role: node.Role,
Age: node.Age,
PodCount: node.PodCount,
CPUCapacity: node.CPUCapacity,
CPUAllocatable: node.CPUAllocatable,
CPUUsage: node.CPUUsage,
CPUPercent: node.CPUPercent,
MemoryCapacity: node.MemoryCapacity,
MemoryAllocatable: node.MemoryAllocatable,
MemoryUsage: node.MemoryUsage,
MemoryPercent: node.MemoryPercent,
GPUCapacity: node.GPUCapacity,
GPUUsage: node.GPUUsage,
GPUPercent: node.GPUPercent,
GPUType: node.GPUType,
OSImage: node.OSImage,
KernelVersion: node.KernelVersion,
ContainerRuntime: node.ContainerRuntime,
KubeletVersion: node.KubeletVersion,
}
}
respondJSON(w, http.StatusOK, response)
}