feat: fix YAML field conversion, admin namespace, streaming logs, and vllm-serve deploy

- Fix Axios keysToSnake converting user values map keys (gpuMem -> gpu_mem)
  - Add skipRecurseKeys to keysToSnake for values/valuesYaml fields
  - Add values_yaml alt json tag and Normalize() in DTOs
  - Check both camelCase/snake_case in enforceNamespaceValues
  - Read both tailLines/tail_lines query param for diagnostics
- Admin users can freely choose namespace in LaunchModal (free-text input)
  - Block only kube-system/kube-public/kube-node-lease for admin
  - Regular users keep existing namespace restrictions
- Add SSE streaming pod logs endpoint (backend + frontend)
  - New PodLogStreamer interface and K8s Follow:true implementation
  - SSE handler with text/event-stream output
  - Frontend DiagnosticsModal: Stream button, auto-scroll, live indicator
- Remove per-card Refresh button from InstanceCard (redundant with page refresh)
- Deploy bge-m3 on vllm-serve 0.6.0 (gpuMem=10000, status=deployed)
This commit is contained in:
Ivan087
2026-05-12 16:50:25 +08:00
parent 7f238a3168
commit 7d9545f827
13 changed files with 475 additions and 61 deletions

View File

@ -1,6 +1,7 @@
package k8s
import (
"bufio"
"context"
"fmt"
"io"
@ -36,6 +37,23 @@ func (*MockDiagnosticsClient) GetDiagnostics(ctx context.Context, cluster *entit
}, nil
}
func (*MockDiagnosticsClient) StreamPodLogs(ctx context.Context, cluster *entity.Cluster, namespace, podName, containerName string, tailLines int64) (<-chan string, <-chan error, error) {
lines := make(chan string, 10)
errs := make(chan error, 1)
go func() {
defer close(lines)
defer close(errs)
select {
case <-ctx.Done():
return
case lines <- "[mock] Streaming pod logs...":
case lines <- "[mock] Container started successfully":
case lines <- "[mock] Listening on :8080":
}
}()
return lines, errs, nil
}
func (c *DiagnosticsClient) GetDiagnostics(ctx context.Context, cluster *entity.Cluster, instance *entity.Instance, tailLines int64) (*entity.InstanceDiagnostics, error) {
clientset, err := diagnosticsClientset(cluster)
if err != nil {
@ -73,6 +91,68 @@ func (c *DiagnosticsClient) GetDiagnostics(ctx context.Context, cluster *entity.
}, nil
}
func (c *DiagnosticsClient) StreamPodLogs(ctx context.Context, cluster *entity.Cluster, namespace, podName, containerName string, tailLines int64) (<-chan string, <-chan error, error) {
clientset, err := diagnosticsClientset(cluster)
if err != nil {
return nil, nil, err
}
if tailLines <= 0 {
tailLines = 200
}
if tailLines > 2000 {
tailLines = 2000
}
req := clientset.CoreV1().Pods(namespace).GetLogs(podName, &corev1.PodLogOptions{
Container: containerName,
Follow: true,
TailLines: &tailLines,
})
stream, err := req.Stream(ctx)
if err != nil {
return nil, nil, fmt.Errorf("failed to open log stream for %s/%s: %w", podName, containerName, err)
}
lines := make(chan string, 64)
errs := make(chan error, 1)
go func() {
defer close(lines)
defer close(errs)
defer func() { _ = stream.Close() }()
scanner := bufio.NewScanner(stream)
// Allow long lines; Kubernetes log entries can exceed the default 64 KiB
scanner.Buffer(make([]byte, 0, 64*1024), 2*1024*1024)
for scanner.Scan() {
select {
case <-ctx.Done():
return
default:
}
line := scanner.Text()
if line == "" {
continue
}
select {
case lines <- line:
case <-ctx.Done():
return
}
}
if err := scanner.Err(); err != nil {
select {
case errs <- err:
case <-ctx.Done():
}
}
}()
return lines, errs, nil
}
func diagnosticsClientset(cluster *entity.Cluster) (kubernetes.Interface, error) {
config, err := restConfigFromCluster(cluster)
if err != nil {