From 382e53b099c76b0a16ac66fda2b1b34847c3e2f6 Mon Sep 17 00:00:00 2001 From: Fulvio Galeazzi <fulvio.galeazzi@garr.it> Date: Mon, 13 May 2019 14:10:38 +0200 Subject: [PATCH] 2019-05-13: FG; Added GPU monitoring. --- Any/Sudo/zabbix | 4 +- GPU/Config/userparameter_nvidia-smi.conf | 9 + .../userparameter_nvidia-smi.conf.linux | 9 + .../userparameter_nvidia-smi.conf.windows | 9 + GPU/README.md | 44 ++ GPU/Script/get_gpus_info.bat | 35 ++ GPU/Script/get_gpus_info.sh | 27 + GPU/Template/zbx_nvidia-smi-multi-gpu.xml | 587 ++++++++++++++++++ 8 files changed, 723 insertions(+), 1 deletion(-) create mode 100644 GPU/Config/userparameter_nvidia-smi.conf create mode 100644 GPU/Config/userparameter_nvidia-smi.conf.linux create mode 100644 GPU/Config/userparameter_nvidia-smi.conf.windows create mode 100644 GPU/README.md create mode 100644 GPU/Script/get_gpus_info.bat create mode 100755 GPU/Script/get_gpus_info.sh create mode 100644 GPU/Template/zbx_nvidia-smi-multi-gpu.xml diff --git a/Any/Sudo/zabbix b/Any/Sudo/zabbix index 2571ccd..464cb03 100644 --- a/Any/Sudo/zabbix +++ b/Any/Sudo/zabbix @@ -11,4 +11,6 @@ Cmnd_Alias ZBXSMCLI = /bin/SMcli, /usr/bin/SMcli, /opt/dell/mdstoragemanager/c Cmnd_Alias ZBXPDNS = /usr/bin/pdns_control, /usr/bin/rec_control -zabbix ALL = (root) NOPASSWD: ZBXCEPH, ZBXMULTIPATH, ZBXCRMCMD, ZBXHAPROXY, ZBXIPSTATE, ZBXPS, ZBXSMCLI, ZBXPDNS +Cmnd_Alias ZBXNVIDIA = /usr/bin/nvidia-smi + +zabbix ALL = (root) NOPASSWD: ZBXCEPH, ZBXMULTIPATH, ZBXCRMCMD, ZBXHAPROXY, ZBXIPSTATE, ZBXPS, ZBXSMCLI, ZBXPDNS, ZBXNVIDIA diff --git a/GPU/Config/userparameter_nvidia-smi.conf b/GPU/Config/userparameter_nvidia-smi.conf new file mode 100644 index 0000000..970f08a --- /dev/null +++ b/GPU/Config/userparameter_nvidia-smi.conf @@ -0,0 +1,9 @@ +UserParameter=gpu.number,/usr/bin/nvidia-smi -L | /usr/bin/wc -l +UserParameter=gpu.discovery,/etc/zabbix/scripts/get_gpus_info.sh +UserParameter=gpu.fanspeed[*],nvidia-smi --query-gpu=fan.speed --format=csv,noheader,nounits -i $1 | tr -d "\n" +UserParameter=gpu.power[*],nvidia-smi --query-gpu=power.draw --format=csv,noheader,nounits -i $1 | tr -d "\n" +UserParameter=gpu.temp[*],nvidia-smi --query-gpu=temperature.gpu --format=csv,noheader,nounits -i $1 | tr -d "\n" +UserParameter=gpu.utilization[*],nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i $1 | tr -d "\n" +UserParameter=gpu.memfree[*],nvidia-smi --query-gpu=memory.free --format=csv,noheader,nounits -i $1 | tr -d "\n" +UserParameter=gpu.memused[*],nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i $1 | tr -d "\n" +UserParameter=gpu.memtotal[*],nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits -i $1 | tr -d "\n" diff --git a/GPU/Config/userparameter_nvidia-smi.conf.linux b/GPU/Config/userparameter_nvidia-smi.conf.linux new file mode 100644 index 0000000..970f08a --- /dev/null +++ b/GPU/Config/userparameter_nvidia-smi.conf.linux @@ -0,0 +1,9 @@ +UserParameter=gpu.number,/usr/bin/nvidia-smi -L | /usr/bin/wc -l +UserParameter=gpu.discovery,/etc/zabbix/scripts/get_gpus_info.sh +UserParameter=gpu.fanspeed[*],nvidia-smi --query-gpu=fan.speed --format=csv,noheader,nounits -i $1 | tr -d "\n" +UserParameter=gpu.power[*],nvidia-smi --query-gpu=power.draw --format=csv,noheader,nounits -i $1 | tr -d "\n" +UserParameter=gpu.temp[*],nvidia-smi --query-gpu=temperature.gpu --format=csv,noheader,nounits -i $1 | tr -d "\n" +UserParameter=gpu.utilization[*],nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i $1 | tr -d "\n" +UserParameter=gpu.memfree[*],nvidia-smi --query-gpu=memory.free --format=csv,noheader,nounits -i $1 | tr -d "\n" +UserParameter=gpu.memused[*],nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i $1 | tr -d "\n" +UserParameter=gpu.memtotal[*],nvidia-smi --query-gpu=memory.total --format=csv,noheader,nounits -i $1 | tr -d "\n" diff --git a/GPU/Config/userparameter_nvidia-smi.conf.windows b/GPU/Config/userparameter_nvidia-smi.conf.windows new file mode 100644 index 0000000..f17cb19 --- /dev/null +++ b/GPU/Config/userparameter_nvidia-smi.conf.windows @@ -0,0 +1,9 @@ +UserParameter=gpu.number,"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe" -L | find /c /v "" +UserParameter=gpu.discovery,C:\scripts\get_gpus_info.bat +UserParameter=gpu.fanspeed[*],"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe" --query-gpu=fan.speed --format=csv,noheader,nounits -i $1 +UserParameter=gpu.power[*],"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe" --query-gpu=power.draw --format=csv,noheader,nounits -i $1 +UserParameter=gpu.temp[*],"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe" --query-gpu=temperature.gpu --format=csv,noheader,nounits -i $1 +UserParameter=gpu.utilization[*],"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe" --query-gpu=utilization.gpu --format=csv,noheader,nounits -i $1 +UserParameter=gpu.memfree[*],"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe" --query-gpu=memory.free --format=csv,noheader,nounits -i $1 +UserParameter=gpu.memused[*],"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe" --query-gpu=memory.used --format=csv,noheader,nounits -i $1 +UserParameter=gpu.memtotal[*],"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe" --query-gpu=memory.total --format=csv,noheader,nounits -i $1 diff --git a/GPU/README.md b/GPU/README.md new file mode 100644 index 0000000..888af9a --- /dev/null +++ b/GPU/README.md @@ -0,0 +1,44 @@ +# +# This is copied from: +# https://github.com/plambe/zabbix-nvidia-smi-multi-gpu +######## +# +# zabbix-nvidia-smi-multi-gpu +A zabbix template using nvidia-smi. Works with multiple GPUs on Windows and Linux. + +## Features: + +- low-level discovery of all the graphics cards +- item prototypes for: + - fan speed + - total, free and used memory + - power draw in decaWatts (tens of Watts, so that it can fit in the graphs nicely) + - temperature + - utilization +- a graph prototype having the fan speed, power draw and temperature in one graph +- trigger prototypes set at different GPU temperatures +- a batch script for low-level discovery on Windows +- a BASH script for low-level discovery on Linux + +Events caused by the triggers: + + +And the graphs: + + +This is essentially a multi-GPU rewrite of RichardKav's template, as found here: https://github.com/RichardKav/zabbix-nvidia-smi-integration/ + +## On Windows: + +* Add the contents of the file `userparameter_nvidia-smi.conf.windows` to your zabbix_agentd.conf file. +* The Windows script get_gpus_info.bat file should be put in C:\scripts\ +* It doesn't have any dependencies, other than having nvidia-smi.exe +* Of course, it is possible to use other paths, but: + * In case you change C:\scripts\, you need to update the "UserParameter=gpu.discovery" line in zabbix_agentd.conf + * In case you have installed nvidia-smi.exe in an alternate location, you need to update both the get_gpus_info.bat and the zabbix_agentd.conf + +## On Linux: + +The following UserParameters need to be added to the zabbix-agent configuration: +* Add the contents of the file `userparameter_nvidia-smi.conf.linux` to your zabbix_agentd.conf file. +* The get_gpus_info.sh file should be put in /etc/zabbix/scripts/ and made executable by running ```chmod +x get_gpus_info.sh``` diff --git a/GPU/Script/get_gpus_info.bat b/GPU/Script/get_gpus_info.bat new file mode 100644 index 0000000..abd2e45 --- /dev/null +++ b/GPU/Script/get_gpus_info.bat @@ -0,0 +1,35 @@ +@ECHO OFF +SETLOCAL ENABLEDELAYEDEXPANSION + +echo { +echo "data":[ + +SET count=1 +FOR /F "tokens=* USEBACKQ" %%F IN (`"C:\Program Files\NVIDIA Corporation\NVSMI\nvidia-smi.exe" -L`) DO ( + if !count! GTR 1 echo , + + SET line=%%F + SET var!count!=%%F + SET /a count=!count!+1 + for /f "tokens=1 delims=:" %%A in ('echo %%F') do ( + for /f "tokens=2 delims= " %%B in ('echo %%A') do ( + echo|set /p={"{#GPUINDEX}":" + echo|set /p=%%B", " + ) + ) + for /f "tokens=3 delims=:" %%A in ('echo %%F') do ( + echo|set /p={#GPUUUID}":" + for /f "tokens=1 delims= " %%B in ('echo %%A') do ( + for /f "tokens=1 delims=)" %%C in ('echo %%B') do ( + echo|set /p=%%C"} + ) + ) + rem echo|set /p= + ) +) +echo. +echo ] +echo } + +ENDLOCAL + diff --git a/GPU/Script/get_gpus_info.sh b/GPU/Script/get_gpus_info.sh new file mode 100755 index 0000000..8cc6104 --- /dev/null +++ b/GPU/Script/get_gpus_info.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +result=$(/usr/bin/nvidia-smi -L) +first=1 + +echo "{" +echo "\"data\":[" + +while IFS= read -r line +do + if (( "$first" != "1" )) + then + echo , + fi + index=$(echo -n $line | cut -d ":" -f 1 | cut -d " " -f 2) + gpuuuid=$(echo -n $line | cut -d ":" -f 3 | tr -d ")" | tr -d " ") + echo -n {"\"{#GPUINDEX}"\":\"$index"\", \"{#GPUUUID}"\":\"$gpuuuid\"} + if (( "$first" == "1" )) + then +# echo , + first=0 + fi +done < <(printf '%s\n' "$result") + +echo +echo "]" +echo "}" diff --git a/GPU/Template/zbx_nvidia-smi-multi-gpu.xml b/GPU/Template/zbx_nvidia-smi-multi-gpu.xml new file mode 100644 index 0000000..7722fd3 --- /dev/null +++ b/GPU/Template/zbx_nvidia-smi-multi-gpu.xml @@ -0,0 +1,587 @@ +<?xml version="1.0" encoding="UTF-8"?> +<zabbix_export> + <version>3.0</version> + <date>2018-06-05T20:56:12Z</date> + <groups> + <group> + <name>Templates</name> + </group> + </groups> + <templates> + <template> + <template>Template Nvidia GPUs Performance</template> + <name>Template Nvidia GPUs Performance</name> + <description/> + <groups> + <group> + <name>Templates</name> + </group> + </groups> + <applications> + <application> + <name>Nvidia</name> + </application> + </applications> + <items> + <item> + <name>Number of GPUs</name> + <type>0</type> + <snmp_community/> + <multiplier>0</multiplier> + <snmp_oid/> + <key>gpu.number</key> + <delay>30</delay> + <history>90</history> + <trends>365</trends> + <status>0</status> + <value_type>0</value_type> + <allowed_hosts/> + <units/> + <delta>0</delta> + <snmpv3_contextname/> + <snmpv3_securityname/> + <snmpv3_securitylevel>0</snmpv3_securitylevel> + <snmpv3_authprotocol>0</snmpv3_authprotocol> + <snmpv3_authpassphrase/> + <snmpv3_privprotocol>0</snmpv3_privprotocol> + <snmpv3_privpassphrase/> + <formula>1</formula> + <delay_flex/> + <params/> + <ipmi_sensor/> + <data_type>0</data_type> + <authtype>0</authtype> + <username/> + <password/> + <publickey/> + <privatekey/> + <port/> + <description>The number of GPUs present on this system.</description> + <inventory_link>0</inventory_link> + <applications> + <application> + <name>Nvidia</name> + </application> + </applications> + <valuemap/> + <logtimefmt/> + </item> + </items> + <discovery_rules> + <discovery_rule> + <name>GPU discovery</name> + <type>0</type> + <snmp_community/> + <snmp_oid/> + <key>gpu.discovery</key> + <delay>600</delay> + <status>0</status> + <allowed_hosts/> + <snmpv3_contextname/> + <snmpv3_securityname/> + <snmpv3_securitylevel>0</snmpv3_securitylevel> + <snmpv3_authprotocol>0</snmpv3_authprotocol> + <snmpv3_authpassphrase/> + <snmpv3_privprotocol>0</snmpv3_privprotocol> + <snmpv3_privpassphrase/> + <delay_flex/> + <params/> + <ipmi_sensor/> + <authtype>0</authtype> + <username/> + <password/> + <publickey/> + <privatekey/> + <port/> + <filter> + <evaltype>0</evaltype> + <formula/> + <conditions/> + </filter> + <lifetime>30</lifetime> + <description>Discovery of graphics cards.</description> + <item_prototypes> + <item_prototype> + <name>GPU $1 Fan Speed</name> + <type>0</type> + <snmp_community/> + <multiplier>1</multiplier> + <snmp_oid/> + <key>gpu.fanspeed[{#GPUINDEX}]</key> + <delay>60</delay> + <history>7</history> + <trends>365</trends> + <status>0</status> + <value_type>3</value_type> + <allowed_hosts/> + <units>%</units> + <delta>0</delta> + <snmpv3_contextname/> + <snmpv3_securityname/> + <snmpv3_securitylevel>0</snmpv3_securitylevel> + <snmpv3_authprotocol>0</snmpv3_authprotocol> + <snmpv3_authpassphrase/> + <snmpv3_privprotocol>0</snmpv3_privprotocol> + <snmpv3_privpassphrase/> + <formula>1</formula> + <delay_flex/> + <params/> + <ipmi_sensor/> + <data_type>0</data_type> + <authtype>0</authtype> + <username/> + <password/> + <publickey/> + <privatekey/> + <port/> + <description/> + <inventory_link>0</inventory_link> + <applications> + <application> + <name>Nvidia</name> + </application> + </applications> + <valuemap/> + <logtimefmt/> + <application_prototypes/> + </item_prototype> + <item_prototype> + <name>GPU $1 Memory Free</name> + <type>0</type> + <snmp_community/> + <multiplier>0</multiplier> + <snmp_oid/> + <key>gpu.memfree[{#GPUINDEX}]</key> + <delay>60</delay> + <history>7</history> + <trends>365</trends> + <status>0</status> + <value_type>3</value_type> + <allowed_hosts/> + <units>MB</units> + <delta>0</delta> + <snmpv3_contextname/> + <snmpv3_securityname/> + <snmpv3_securitylevel>0</snmpv3_securitylevel> + <snmpv3_authprotocol>0</snmpv3_authprotocol> + <snmpv3_authpassphrase/> + <snmpv3_privprotocol>0</snmpv3_privprotocol> + <snmpv3_privpassphrase/> + <formula>1</formula> + <delay_flex/> + <params/> + <ipmi_sensor/> + <data_type>0</data_type> + <authtype>0</authtype> + <username/> + <password/> + <publickey/> + <privatekey/> + <port/> + <description/> + <inventory_link>0</inventory_link> + <applications> + <application> + <name>Nvidia</name> + </application> + </applications> + <valuemap/> + <logtimefmt/> + <application_prototypes/> + </item_prototype> + <item_prototype> + <name>GPU $1 Memory Total</name> + <type>0</type> + <snmp_community/> + <multiplier>0</multiplier> + <snmp_oid/> + <key>gpu.memtotal[{#GPUINDEX}]</key> + <delay>60</delay> + <history>7</history> + <trends>365</trends> + <status>0</status> + <value_type>3</value_type> + <allowed_hosts/> + <units>MB</units> + <delta>0</delta> + <snmpv3_contextname/> + <snmpv3_securityname/> + <snmpv3_securitylevel>0</snmpv3_securitylevel> + <snmpv3_authprotocol>0</snmpv3_authprotocol> + <snmpv3_authpassphrase/> + <snmpv3_privprotocol>0</snmpv3_privprotocol> + <snmpv3_privpassphrase/> + <formula>1</formula> + <delay_flex/> + <params/> + <ipmi_sensor/> + <data_type>0</data_type> + <authtype>0</authtype> + <username/> + <password/> + <publickey/> + <privatekey/> + <port/> + <description/> + <inventory_link>0</inventory_link> + <applications> + <application> + <name>Nvidia</name> + </application> + </applications> + <valuemap/> + <logtimefmt/> + <application_prototypes/> + </item_prototype> + <item_prototype> + <name>GPU $1 Memory Used</name> + <type>0</type> + <snmp_community/> + <multiplier>0</multiplier> + <snmp_oid/> + <key>gpu.memused[{#GPUINDEX}]</key> + <delay>60</delay> + <history>7</history> + <trends>365</trends> + <status>0</status> + <value_type>3</value_type> + <allowed_hosts/> + <units>MB</units> + <delta>0</delta> + <snmpv3_contextname/> + <snmpv3_securityname/> + <snmpv3_securitylevel>0</snmpv3_securitylevel> + <snmpv3_authprotocol>0</snmpv3_authprotocol> + <snmpv3_authpassphrase/> + <snmpv3_privprotocol>0</snmpv3_privprotocol> + <snmpv3_privpassphrase/> + <formula>1</formula> + <delay_flex/> + <params/> + <ipmi_sensor/> + <data_type>0</data_type> + <authtype>0</authtype> + <username/> + <password/> + <publickey/> + <privatekey/> + <port/> + <description/> + <inventory_link>0</inventory_link> + <applications> + <application> + <name>Nvidia</name> + </application> + </applications> + <valuemap/> + <logtimefmt/> + <application_prototypes/> + </item_prototype> + <item_prototype> + <name>GPU $1 Power in decaWatts</name> + <type>0</type> + <snmp_community/> + <multiplier>1</multiplier> + <snmp_oid/> + <key>gpu.power[{#GPUINDEX}]</key> + <delay>60</delay> + <history>7</history> + <trends>365</trends> + <status>0</status> + <value_type>0</value_type> + <allowed_hosts/> + <units>dW</units> + <delta>0</delta> + <snmpv3_contextname/> + <snmpv3_securityname/> + <snmpv3_securitylevel>0</snmpv3_securitylevel> + <snmpv3_authprotocol>0</snmpv3_authprotocol> + <snmpv3_authpassphrase/> + <snmpv3_privprotocol>0</snmpv3_privprotocol> + <snmpv3_privpassphrase/> + <formula>0.1</formula> + <delay_flex/> + <params/> + <ipmi_sensor/> + <data_type>0</data_type> + <authtype>0</authtype> + <username/> + <password/> + <publickey/> + <privatekey/> + <port/> + <description/> + <inventory_link>0</inventory_link> + <applications> + <application> + <name>Nvidia</name> + </application> + </applications> + <valuemap/> + <logtimefmt/> + <application_prototypes/> + </item_prototype> + <item_prototype> + <name>GPU $1 Temperature</name> + <type>0</type> + <snmp_community/> + <multiplier>0</multiplier> + <snmp_oid/> + <key>gpu.temp[{#GPUINDEX}]</key> + <delay>60</delay> + <history>7</history> + <trends>365</trends> + <status>0</status> + <value_type>0</value_type> + <allowed_hosts/> + <units>C</units> + <delta>0</delta> + <snmpv3_contextname/> + <snmpv3_securityname/> + <snmpv3_securitylevel>0</snmpv3_securitylevel> + <snmpv3_authprotocol>0</snmpv3_authprotocol> + <snmpv3_authpassphrase/> + <snmpv3_privprotocol>0</snmpv3_privprotocol> + <snmpv3_privpassphrase/> + <formula>1</formula> + <delay_flex/> + <params/> + <ipmi_sensor/> + <data_type>0</data_type> + <authtype>0</authtype> + <username/> + <password/> + <publickey/> + <privatekey/> + <port/> + <description/> + <inventory_link>0</inventory_link> + <applications> + <application> + <name>Nvidia</name> + </application> + </applications> + <valuemap/> + <logtimefmt/> + <application_prototypes/> + </item_prototype> + <item_prototype> + <name>GPU $1 Utilization</name> + <type>0</type> + <snmp_community/> + <multiplier>0</multiplier> + <snmp_oid/> + <key>gpu.utilization[{#GPUINDEX}]</key> + <delay>60</delay> + <history>7</history> + <trends>365</trends> + <status>0</status> + <value_type>3</value_type> + <allowed_hosts/> + <units>%</units> + <delta>0</delta> + <snmpv3_contextname/> + <snmpv3_securityname/> + <snmpv3_securitylevel>0</snmpv3_securitylevel> + <snmpv3_authprotocol>0</snmpv3_authprotocol> + <snmpv3_authpassphrase/> + <snmpv3_privprotocol>0</snmpv3_privprotocol> + <snmpv3_privpassphrase/> + <formula>1</formula> + <delay_flex/> + <params/> + <ipmi_sensor/> + <data_type>0</data_type> + <authtype>0</authtype> + <username/> + <password/> + <publickey/> + <privatekey/> + <port/> + <description/> + <inventory_link>0</inventory_link> + <applications> + <application> + <name>Nvidia</name> + </application> + </applications> + <valuemap/> + <logtimefmt/> + <application_prototypes/> + </item_prototype> + </item_prototypes> + <trigger_prototypes> + <trigger_prototype> + <expression>{Template Nvidia GPUs Performance:gpu.temp[{#GPUINDEX}].last()}>80</expression> + <name>GPU {#GPUINDEX} Temperature is extremely high</name> + <url/> + <status>0</status> + <priority>5</priority> + <description>A GPU's temperature is getting extremely high!</description> + <type>0</type> + <dependencies/> + </trigger_prototype> + <trigger_prototype> + <expression>{Template Nvidia GPUs Performance:gpu.temp[{#GPUINDEX}].last()}>70</expression> + <name>GPU {#GPUINDEX} Temperature is high</name> + <url/> + <status>0</status> + <priority>2</priority> + <description>A GPU's temperature is getting high!</description> + <type>0</type> + <dependencies/> + </trigger_prototype> + <trigger_prototype> + <expression>{Template Nvidia GPUs Performance:gpu.temp[{#GPUINDEX}].last()}>75</expression> + <name>GPU {#GPUINDEX} Temperature is very high</name> + <url/> + <status>0</status> + <priority>4</priority> + <description>A GPU's temperature is getting very high!</description> + <type>0</type> + <dependencies/> + </trigger_prototype> + </trigger_prototypes> + <graph_prototypes> + <graph_prototype> + <name>GPU {#GPUINDEX} Memory</name> + <width>900</width> + <height>200</height> + <yaxismin>0.0000</yaxismin> + <yaxismax>100.0000</yaxismax> + <show_work_period>1</show_work_period> + <show_triggers>1</show_triggers> + <type>0</type> + <show_legend>1</show_legend> + <show_3d>0</show_3d> + <percent_left>0.0000</percent_left> + <percent_right>0.0000</percent_right> + <ymin_type_1>0</ymin_type_1> + <ymax_type_1>0</ymax_type_1> + <ymin_item_1>0</ymin_item_1> + <ymax_item_1>0</ymax_item_1> + <graph_items> + <graph_item> + <sortorder>0</sortorder> + <drawtype>0</drawtype> + <color>00AA00</color> + <yaxisside>0</yaxisside> + <calc_fnc>2</calc_fnc> + <type>0</type> + <item> + <host>Template Nvidia GPUs Performance</host> + <key>gpu.memfree[{#GPUINDEX}]</key> + </item> + </graph_item> + <graph_item> + <sortorder>1</sortorder> + <drawtype>0</drawtype> + <color>0000DD</color> + <yaxisside>0</yaxisside> + <calc_fnc>2</calc_fnc> + <type>0</type> + <item> + <host>Template Nvidia GPUs Performance</host> + <key>gpu.memused[{#GPUINDEX}]</key> + </item> + </graph_item> + </graph_items> + </graph_prototype> + <graph_prototype> + <name>GPU {#GPUINDEX} Temperature, Fan Speed and Power</name> + <width>900</width> + <height>200</height> + <yaxismin>0.0000</yaxismin> + <yaxismax>100.0000</yaxismax> + <show_work_period>1</show_work_period> + <show_triggers>1</show_triggers> + <type>0</type> + <show_legend>1</show_legend> + <show_3d>0</show_3d> + <percent_left>0.0000</percent_left> + <percent_right>0.0000</percent_right> + <ymin_type_1>0</ymin_type_1> + <ymax_type_1>0</ymax_type_1> + <ymin_item_1>0</ymin_item_1> + <ymax_item_1>0</ymax_item_1> + <graph_items> + <graph_item> + <sortorder>0</sortorder> + <drawtype>0</drawtype> + <color>1A7C11</color> + <yaxisside>0</yaxisside> + <calc_fnc>2</calc_fnc> + <type>0</type> + <item> + <host>Template Nvidia GPUs Performance</host> + <key>gpu.power[{#GPUINDEX}]</key> + </item> + </graph_item> + <graph_item> + <sortorder>1</sortorder> + <drawtype>0</drawtype> + <color>2774A4</color> + <yaxisside>0</yaxisside> + <calc_fnc>2</calc_fnc> + <type>0</type> + <item> + <host>Template Nvidia GPUs Performance</host> + <key>gpu.fanspeed[{#GPUINDEX}]</key> + </item> + </graph_item> + <graph_item> + <sortorder>2</sortorder> + <drawtype>0</drawtype> + <color>F63100</color> + <yaxisside>0</yaxisside> + <calc_fnc>2</calc_fnc> + <type>0</type> + <item> + <host>Template Nvidia GPUs Performance</host> + <key>gpu.temp[{#GPUINDEX}]</key> + </item> + </graph_item> + </graph_items> + </graph_prototype> + <graph_prototype> + <name>GPU {#GPUINDEX} Utilization</name> + <width>900</width> + <height>200</height> + <yaxismin>0.0000</yaxismin> + <yaxismax>100.0000</yaxismax> + <show_work_period>1</show_work_period> + <show_triggers>1</show_triggers> + <type>0</type> + <show_legend>1</show_legend> + <show_3d>0</show_3d> + <percent_left>0.0000</percent_left> + <percent_right>0.0000</percent_right> + <ymin_type_1>0</ymin_type_1> + <ymax_type_1>0</ymax_type_1> + <ymin_item_1>0</ymin_item_1> + <ymax_item_1>0</ymax_item_1> + <graph_items> + <graph_item> + <sortorder>0</sortorder> + <drawtype>0</drawtype> + <color>2774A4</color> + <yaxisside>0</yaxisside> + <calc_fnc>2</calc_fnc> + <type>0</type> + <item> + <host>Template Nvidia GPUs Performance</host> + <key>gpu.utilization[{#GPUINDEX}]</key> + </item> + </graph_item> + </graph_items> + </graph_prototype> + </graph_prototypes> + <host_prototypes/> + </discovery_rule> + </discovery_rules> + <macros/> + <templates/> + <screens/> + </template> + </templates> +</zabbix_export> -- GitLab