Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 36 additions & 22 deletions cmd/telemetry/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,19 @@ var (

flagAll bool

flagCPU bool
flagFrequency bool
flagIPC bool
flagC6 bool
flagIRQRate bool
flagMemory bool
flagNetwork bool
flagStorage bool
flagPower bool
flagTemperature bool
flagInstrMix bool
flagCPU bool
flagFrequency bool
flagIPC bool
flagC6 bool
flagIRQRate bool
flagMemory bool
flagNetwork bool
flagStorage bool
flagPower bool
flagTemperature bool
flagInstrMix bool
flagVirtualMemory bool
flagProcess bool

flagNoSystemSummary bool

Expand All @@ -76,17 +78,19 @@ const (

flagAllName = "all"

flagCPUName = "cpu"
flagFrequencyName = "frequency"
flagIPCName = "ipc"
flagC6Name = "c6"
flagIRQRateName = "irqrate"
flagMemoryName = "memory"
flagNetworkName = "network"
flagStorageName = "storage"
flagPowerName = "power"
flagTemperatureName = "temperature"
flagInstrMixName = "instrmix"
flagCPUName = "cpu"
flagFrequencyName = "frequency"
flagIPCName = "ipc"
flagC6Name = "c6"
flagIRQRateName = "irqrate"
flagMemoryName = "memory"
flagNetworkName = "network"
flagStorageName = "storage"
flagPowerName = "power"
flagTemperatureName = "temperature"
flagInstrMixName = "instrmix"
flagVirtualMemoryName = "virtual-memory"
flagProcessName = "process"

flagNoSystemSummaryName = "no-summary"

Expand All @@ -108,6 +112,8 @@ var categories = []app.Category{
{FlagName: flagStorageName, FlagVar: &flagStorage, DefaultValue: false, Help: "monitor storage", Tables: []table.TableDefinition{tableDefinitions[DriveTelemetryTableName]}},
{FlagName: flagIRQRateName, FlagVar: &flagIRQRate, DefaultValue: false, Help: "monitor IRQ rate", Tables: []table.TableDefinition{tableDefinitions[IRQRateTelemetryTableName]}},
{FlagName: flagInstrMixName, FlagVar: &flagInstrMix, DefaultValue: false, Help: "monitor instruction mix", Tables: []table.TableDefinition{tableDefinitions[InstructionTelemetryTableName]}},
{FlagName: flagVirtualMemoryName, FlagVar: &flagVirtualMemory, DefaultValue: false, Help: "monitor virtual memory", Tables: []table.TableDefinition{tableDefinitions[VirtualMemoryTelemetryTableName]}},
{FlagName: flagProcessName, FlagVar: &flagProcess, DefaultValue: false, Help: "monitor process telemetry", Tables: []table.TableDefinition{tableDefinitions[ProcessTelemetryTableName]}},
}

const (
Expand Down Expand Up @@ -338,6 +344,8 @@ func runCmd(cmd *cobra.Command, args []string) error {
report.RegisterHTMLRenderer(InstructionTelemetryTableName, instructionTelemetryTableHTMLRenderer)
report.RegisterHTMLRenderer(GaudiTelemetryTableName, gaudiTelemetryTableHTMLRenderer)
report.RegisterHTMLRenderer(PDUTelemetryTableName, pduTelemetryTableHTMLRenderer)
report.RegisterHTMLRenderer(VirtualMemoryTelemetryTableName, virtualMemoryTelemetryTableHTMLRenderer)
report.RegisterHTMLRenderer(ProcessTelemetryTableName, processTelemetryTableHTMLRenderer)

return reportingCommand.Run()
}
Expand All @@ -363,6 +371,9 @@ func summaryFromTableValues(allTableValues []table.TableValues, _ map[string]scr
networkReads := getMetricAverage(getTableValues(allTableValues, NetworkTelemetryTableName), []string{"rxkB/s"}, "Time")
networkWrites := getMetricAverage(getTableValues(allTableValues, NetworkTelemetryTableName), []string{"txkB/s"}, "Time")
memAvail := getMetricAverage(getTableValues(allTableValues, MemoryTelemetryTableName), []string{"avail"}, "Time")
minorFaults := getMetricAverage(getTableValues(allTableValues, VirtualMemoryTelemetryTableName), []string{"Minor Faults/s"}, "Time")
majorFaults := getMetricAverage(getTableValues(allTableValues, VirtualMemoryTelemetryTableName), []string{"Major Faults/s"}, "Time")
ctxSwitches := getMetricAverage(getTableValues(allTableValues, ProcessTelemetryTableName), []string{"Context Switches/s"}, "Time")
return table.TableValues{
TableDefinition: table.TableDefinition{
Name: telemetrySummaryTableName,
Expand All @@ -381,6 +392,9 @@ func summaryFromTableValues(allTableValues []table.TableValues, _ map[string]scr
{Name: "Drive Writes (kB/s)", Values: []string{driveWrites}},
{Name: "Network RX (kB/s)", Values: []string{networkReads}},
{Name: "Network TX (kB/s)", Values: []string{networkWrites}},
{Name: "Minor Page Faults/s", Values: []string{minorFaults}},
{Name: "Major Page Faults/s", Values: []string{majorFaults}},
{Name: "Context Switches/s", Values: []string{ctxSwitches}},
},
}
}
Expand Down
70 changes: 70 additions & 0 deletions cmd/telemetry/telemetry_renderers.go
Original file line number Diff line number Diff line change
Expand Up @@ -673,3 +673,73 @@ func pduTelemetryTableHTMLRenderer(tableValues table.TableValues, targetName str
}
return telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig, nil)
}

func virtualMemoryTelemetryTableHTMLRenderer(tableValues table.TableValues, targetName string) string {
data := [][]float64{}
datasetNames := []string{}
for _, field := range tableValues.Fields[1:] {
points := []float64{}
for _, val := range field.Values {
if val == "" {
break
}
stat, err := strconv.ParseFloat(val, 64)
if err != nil {
slog.Error("error parsing stat", slog.String("error", err.Error()))
return ""
}
points = append(points, stat)
}
if len(points) > 0 {
data = append(data, points)
datasetNames = append(datasetNames, field.Name)
}
}
chartConfig := report.ChartTemplateStruct{
ID: fmt.Sprintf("%s%d", tableValues.Name, util.RandUint(10000)),
XaxisText: "Time",
YaxisText: "count per second",
TitleText: "",
DisplayTitle: "false",
DisplayLegend: "true",
AspectRatio: "2",
SuggestedMin: "0",
SuggestedMax: "0",
}
return telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig, nil)
}

func processTelemetryTableHTMLRenderer(tableValues table.TableValues, targetName string) string {
data := [][]float64{}
datasetNames := []string{}
for _, field := range tableValues.Fields[1:] {
points := []float64{}
for _, val := range field.Values {
if val == "" {
break
}
stat, err := strconv.ParseFloat(val, 64)
if err != nil {
slog.Error("error parsing stat", slog.String("error", err.Error()))
return ""
}
points = append(points, stat)
}
if len(points) > 0 {
data = append(data, points)
datasetNames = append(datasetNames, field.Name)
}
}
chartConfig := report.ChartTemplateStruct{
ID: fmt.Sprintf("%s%d", tableValues.Name, util.RandUint(10000)),
XaxisText: "Time",
YaxisText: "count per second",
TitleText: "",
DisplayTitle: "false",
DisplayLegend: "true",
AspectRatio: "2",
SuggestedMin: "0",
SuggestedMax: "0",
}
return telemetryTableHTMLRenderer(tableValues, data, datasetNames, chartConfig, nil)
}
112 changes: 112 additions & 0 deletions cmd/telemetry/telemetry_tables.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ const (
TemperatureTelemetryTableName = "Temperature Telemetry"
GaudiTelemetryTableName = "Gaudi Telemetry"
PDUTelemetryTableName = "PDU Telemetry"
VirtualMemoryTelemetryTableName = "Virtual Memory Telemetry"
ProcessTelemetryTableName = "Process Telemetry"
)

// telemetry table menu labels
Expand All @@ -51,6 +53,8 @@ const (
TemperatureTelemetryMenuLabel = "Temperature"
GaudiTelemetryMenuLabel = "Gaudi"
PDUTelemetryMenuLabel = "PDU"
VirtualMemoryTelemetryMenuLabel = "Virtual Memory"
ProcessTelemetryMenuLabel = "Process"
)

var tableDefinitions = map[string]table.TableDefinition{
Expand Down Expand Up @@ -177,6 +181,22 @@ var tableDefinitions = map[string]table.TableDefinition{
script.PDUTelemetryScriptName,
},
FieldsFunc: pduTelemetryTableValues},
VirtualMemoryTelemetryTableName: {
Name: VirtualMemoryTelemetryTableName,
MenuLabel: VirtualMemoryTelemetryMenuLabel,
HasRows: true,
ScriptNames: []string{
script.KernelTelemetryScriptName,
},
FieldsFunc: virtualMemoryTelemetryTableValues},
ProcessTelemetryTableName: {
Name: ProcessTelemetryTableName,
MenuLabel: ProcessTelemetryMenuLabel,
HasRows: true,
ScriptNames: []string{
script.KernelTelemetryScriptName,
},
FieldsFunc: processTelemetryTableValues},
}

func cpuUtilizationTelemetryTableValues(outputs map[string]script.ScriptOutput) []table.Field {
Expand Down Expand Up @@ -707,3 +727,95 @@ func instructionTelemetryTableValues(outputs map[string]script.ScriptOutput) []t
}
return fields
}

func virtualMemoryTelemetryTableValues(outputs map[string]script.ScriptOutput) []table.Field {
fields := []table.Field{
{Name: "Time"},
{Name: "Minor Faults/s"},
{Name: "Major Faults/s"},
{Name: "Pgscan/s"},
{Name: "Pgsteal/s"},
{Name: "Swapin/s"},
{Name: "Swapout/s"},
}
// the output is in CSV format:
// timestamp,ctx_switches_per_sec,procs_running,procs_blocked,minor_faults_per_sec,major_faults_per_sec,pgscan_per_sec,pgsteal_per_sec,swapin_per_sec,swapout_per_sec
reader := csv.NewReader(strings.NewReader(outputs[script.KernelTelemetryScriptName].Stdout))
records, err := reader.ReadAll()
if err != nil {
slog.Error("failed to read virtual memory telemetry CSV output", slog.String("error", err.Error()))
return []table.Field{}
}
if len(records) == 0 {
return []table.Field{}
}
// first row is the header, find the indices of the fields we're interested in
header := records[0]
fieldIndices := make(map[string]int)
for i, fieldName := range header {
fieldIndices[fieldName] = i
}
requiredFields := []string{"timestamp", "minor_faults_per_sec", "major_faults_per_sec", "pgscan_per_sec", "pgsteal_per_sec", "swapin_per_sec", "swapout_per_sec"}
for _, field := range requiredFields {
if _, ok := fieldIndices[field]; !ok {
slog.Error("missing expected field in virtual memory telemetry output", slog.String("field", field))
return []table.Field{}
}
}
// subsequent rows are data
for _, record := range records[1:] {
if len(record) != len(header) {
slog.Error("unexpected number of fields in virtual memory telemetry output", slog.Int("expected", len(header)), slog.Int("got", len(record)))
continue
}
fields[0].Values = append(fields[0].Values, record[fieldIndices["timestamp"]])
fields[1].Values = append(fields[1].Values, record[fieldIndices["minor_faults_per_sec"]])
fields[2].Values = append(fields[2].Values, record[fieldIndices["major_faults_per_sec"]])
fields[3].Values = append(fields[3].Values, record[fieldIndices["pgscan_per_sec"]])
fields[4].Values = append(fields[4].Values, record[fieldIndices["pgsteal_per_sec"]])
fields[5].Values = append(fields[5].Values, record[fieldIndices["swapin_per_sec"]])
fields[6].Values = append(fields[6].Values, record[fieldIndices["swapout_per_sec"]])
}
return fields
}

func processTelemetryTableValues(outputs map[string]script.ScriptOutput) []table.Field {
fields := []table.Field{
{Name: "Time"},
{Name: "Context Switches/s"},
}
// the output is in CSV format:
// timestamp,ctx_switches_per_sec,procs_running,procs_blocked,minor_faults_per_sec,major_faults_per_sec,pgscan_per_sec,pgsteal_per_sec,swapin_per_sec,swapout_per_sec
reader := csv.NewReader(strings.NewReader(outputs[script.KernelTelemetryScriptName].Stdout))
records, err := reader.ReadAll()
if err != nil {
slog.Error("failed to read process telemetry CSV output", slog.String("error", err.Error()))
return []table.Field{}
}
if len(records) == 0 {
return []table.Field{}
}
// first row is the header, find the indices of the fields we're interested in
header := records[0]
fieldIndices := make(map[string]int)
for i, fieldName := range header {
fieldIndices[fieldName] = i
}
requiredFields := []string{"timestamp", "ctx_switches_per_sec"}
for _, field := range requiredFields {
if _, ok := fieldIndices[field]; !ok {
slog.Error("missing expected field in process telemetry output", slog.String("field", field))
return []table.Field{}
}
}
// subsequent rows are data
for _, record := range records[1:] {
if len(record) != len(header) {
slog.Error("unexpected number of fields in process telemetry output", slog.Int("expected", len(header)), slog.Int("got", len(record)))
continue
}
fields[0].Values = append(fields[0].Values, record[fieldIndices["timestamp"]])
fields[1].Values = append(fields[1].Values, record[fieldIndices["ctx_switches_per_sec"]])
}
return fields
}
Loading