Skip to content

Commit

Permalink
Merge pull request #100905 from qiutongs/gpu_metrics_fix
Browse files Browse the repository at this point in the history
"Add AcceleratorStats to cri_stats_provider for 1.18"
  • Loading branch information
k8s-ci-robot committed Apr 9, 2021
2 parents 2dbd3c2 + 24ba1a1 commit d738446
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 1 deletion.
21 changes: 20 additions & 1 deletion pkg/kubelet/stats/cri_stats_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ func (p *criStatsProvider) ListPodCPUAndMemoryStats() ([]statsapi.PodStats, erro
if !caFound {
klog.V(4).Infof("Unable to find cadvisor stats for %q", containerID)
} else {
p.addCadvisorContainerStats(cs, &caStats)
p.addCadvisorContainerCPUAndMemoryStats(cs, &caStats)
}
ps.Containers = append(ps.Containers, *cs)
}
Expand Down Expand Up @@ -769,6 +769,25 @@ func (p *criStatsProvider) addCadvisorContainerStats(
cs.UserDefinedMetrics = cadvisorInfoToUserDefinedMetrics(caPodStats)
}

cpu, memory := cadvisorInfoToCPUandMemoryStats(caPodStats)
if cpu != nil {
cs.CPU = cpu
}
if memory != nil {
cs.Memory = memory
}
accelerators := cadvisorInfoToAcceleratorStats(caPodStats)
cs.Accelerators = accelerators
}

func (p *criStatsProvider) addCadvisorContainerCPUAndMemoryStats(
cs *statsapi.ContainerStats,
caPodStats *cadvisorapiv2.ContainerInfo,
) {
if caPodStats.Spec.HasCustomMetrics {
cs.UserDefinedMetrics = cadvisorInfoToUserDefinedMetrics(caPodStats)
}

cpu, memory := cadvisorInfoToCPUandMemoryStats(caPodStats)
if cpu != nil {
cs.CPU = cpu
Expand Down
17 changes: 17 additions & 0 deletions pkg/kubelet/stats/cri_stats_provider_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -259,11 +259,14 @@ func TestCRIListPodStats(t *testing.T) {
c0 := containerStatsMap[cName0]
assert.Equal(container0.CreatedAt, c0.StartTime.UnixNano())
checkCRICPUAndMemoryStats(assert, c0, infos[container0.ContainerStatus.Id].Stats[0])
checkCRIAcceleratorStats(assert, c0, infos[container0.ContainerStatus.Id].Stats[0])
checkCRIRootfsStats(assert, c0, containerStats0, &imageFsInfo)
checkCRILogsStats(assert, c0, &rootFsInfo, containerLogStats0)

c1 := containerStatsMap[cName1]
assert.Equal(container1.CreatedAt, c1.StartTime.UnixNano())
checkCRICPUAndMemoryStats(assert, c1, infos[container1.ContainerStatus.Id].Stats[0])
checkCRIAcceleratorStats(assert, c1, infos[container1.ContainerStatus.Id].Stats[0])
checkCRIRootfsStats(assert, c1, containerStats1, nil)
checkCRILogsStats(assert, c1, &rootFsInfo, containerLogStats1)
checkCRINetworkStats(assert, p0.Network, infos[sandbox0.PodSandboxStatus.Id].Stats[0].Network)
Expand All @@ -279,6 +282,7 @@ func TestCRIListPodStats(t *testing.T) {
assert.Equal(cName2, c2.Name)
assert.Equal(container2.CreatedAt, c2.StartTime.UnixNano())
checkCRICPUAndMemoryStats(assert, c2, infos[container2.ContainerStatus.Id].Stats[0])
checkCRIAcceleratorStats(assert, c2, infos[container2.ContainerStatus.Id].Stats[0])
checkCRIRootfsStats(assert, c2, containerStats2, &imageFsInfo)
checkCRILogsStats(assert, c2, &rootFsInfo, containerLogStats2)
checkCRINetworkStats(assert, p1.Network, infos[sandbox1.PodSandboxStatus.Id].Stats[0].Network)
Expand All @@ -295,6 +299,7 @@ func TestCRIListPodStats(t *testing.T) {
assert.Equal(cName3, c3.Name)
assert.Equal(container4.CreatedAt, c3.StartTime.UnixNano())
checkCRICPUAndMemoryStats(assert, c3, infos[container4.ContainerStatus.Id].Stats[0])
checkCRIAcceleratorStats(assert, c3, infos[container4.ContainerStatus.Id].Stats[0])
checkCRIRootfsStats(assert, c3, containerStats4, &imageFsInfo)

checkCRILogsStats(assert, c3, &rootFsInfo, containerLogStats4)
Expand Down Expand Up @@ -674,6 +679,18 @@ func checkCRICPUAndMemoryStats(assert *assert.Assertions, actual statsapi.Contai
assert.Equal(cs.Memory.ContainerData.Pgmajfault, *actual.Memory.MajorPageFaults)
}

func checkCRIAcceleratorStats(assert *assert.Assertions, actual statsapi.ContainerStats, cs *cadvisorapiv2.ContainerStats) {
assert.Equal(len(cs.Accelerators), len(actual.Accelerators))
for i := range cs.Accelerators {
assert.Equal(cs.Accelerators[i].Make, actual.Accelerators[i].Make)
assert.Equal(cs.Accelerators[i].Model, actual.Accelerators[i].Model)
assert.Equal(cs.Accelerators[i].ID, actual.Accelerators[i].ID)
assert.Equal(cs.Accelerators[i].MemoryTotal, actual.Accelerators[i].MemoryTotal)
assert.Equal(cs.Accelerators[i].MemoryUsed, actual.Accelerators[i].MemoryUsed)
assert.Equal(cs.Accelerators[i].DutyCycle, actual.Accelerators[i].DutyCycle)
}
}

func checkCRIRootfsStats(assert *assert.Assertions, actual statsapi.ContainerStats, cs *runtimeapi.ContainerStats, imageFsInfo *cadvisorapiv2.FsInfo) {
assert.Equal(cs.WritableLayer.Timestamp, actual.Rootfs.Time.UnixNano())
if imageFsInfo != nil {
Expand Down
21 changes: 21 additions & 0 deletions pkg/kubelet/stats/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,27 @@ func cadvisorInfoToContainerCPUAndMemoryStats(name string, info *cadvisorapiv2.C
return result
}

// cadvisorInfoToAcceleratorStats returns the statsapi.AcceleratorStats converted from
// the container info from cadvisor.
func cadvisorInfoToAcceleratorStats(info *cadvisorapiv2.ContainerInfo) []statsapi.AcceleratorStats {
cstat, found := latestContainerStats(info)
if !found || cstat.Accelerators == nil {
return nil
}
var result []statsapi.AcceleratorStats
for _, acc := range cstat.Accelerators {
result = append(result, statsapi.AcceleratorStats{
Make: acc.Make,
Model: acc.Model,
ID: acc.ID,
MemoryTotal: acc.MemoryTotal,
MemoryUsed: acc.MemoryUsed,
DutyCycle: acc.DutyCycle,
})
}
return result
}

// cadvisorInfoToNetworkStats returns the statsapi.NetworkStats converted from
// the container info from cadvisor.
func cadvisorInfoToNetworkStats(name string, info *cadvisorapiv2.ContainerInfo) *statsapi.NetworkStats {
Expand Down
11 changes: 11 additions & 0 deletions pkg/kubelet/stats/stats_provider_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ const (
offsetFsTotalUsageBytes
offsetFsBaseUsageBytes
offsetFsInodeUsage
offsetAcceleratorDutyCycle
)

var (
Expand Down Expand Up @@ -499,6 +500,16 @@ func getTestContainerInfo(seed int, podName string, podNamespace string, contain
BaseUsageBytes: &baseUsageBytes,
InodeUsage: &inodeUsage,
},
Accelerators: []cadvisorapiv1.AcceleratorStats{
{
Make: "nvidia",
Model: "Tesla K80",
ID: "foobar",
MemoryTotal: uint64(seed + offsetMemUsageBytes),
MemoryUsed: uint64(seed + offsetMemUsageBytes),
DutyCycle: uint64(seed + offsetAcceleratorDutyCycle),
},
},
}
stats.Cpu.Usage.Total = uint64(seed + offsetCPUUsageCoreSeconds)
stats.CpuInst.Usage.Total = uint64(seed + offsetCPUUsageCores)
Expand Down

0 comments on commit d738446

Please sign in to comment.