From d4dd67e6ba4aaa8e45e4572272d6b9c762506bea Mon Sep 17 00:00:00 2001 From: Fulvio Galeazzi <fulvio.galeazzi@garr.it> Date: Wed, 21 Oct 2020 12:38:32 +0200 Subject: [PATCH] 2020-10-21: FG; Added some PG states, updated for Nautilus misplaced/degraded objects. --- Ceph/Config/userparameter_ceph.conf | 3 + Ceph/Script/cephHealth.pl | 16 ++ Ceph/Script/cephPg.pl | 10 +- Ceph/Template/zbx_tmpl_ceph-adm.xml | 260 +++++++++++++++++++++++++++- 4 files changed, 279 insertions(+), 10 deletions(-) diff --git a/Ceph/Config/userparameter_ceph.conf b/Ceph/Config/userparameter_ceph.conf index 94e1801..1fda748 100644 --- a/Ceph/Config/userparameter_ceph.conf +++ b/Ceph/Config/userparameter_ceph.conf @@ -45,6 +45,7 @@ UserParameter=ceph.pgstat.cln[*],/etc/zabbix/scripts/cephPg.pl $1 -f cln UserParameter=ceph.pgstat.dee[*],/etc/zabbix/scripts/cephPg.pl $1 -f dee UserParameter=ceph.pgstat.deg[*],/etc/zabbix/scripts/cephPg.pl $1 -f deg UserParameter=ceph.pgstat.dow[*],/etc/zabbix/scripts/cephPg.pl $1 -f dow +UserParameter=ceph.pgstat.ful[*],/etc/zabbix/scripts/cephPg.pl $1 -f ful UserParameter=ceph.pgstat.ncm[*],/etc/zabbix/scripts/cephPg.pl $1 -f ncm UserParameter=ceph.pgstat.ncs[*],/etc/zabbix/scripts/cephPg.pl $1 -f ncs UserParameter=ceph.pgstat.oth[*],/etc/zabbix/scripts/cephPg.pl $1 -f oth @@ -53,8 +54,10 @@ UserParameter=ceph.pgstat.rec[*],/etc/zabbix/scripts/cephPg.pl $1 -f rec UserParameter=ceph.pgstat.rem[*],/etc/zabbix/scripts/cephPg.pl $1 -f rem UserParameter=ceph.pgstat.rep[*],/etc/zabbix/scripts/cephPg.pl $1 -f rep UserParameter=ceph.pgstat.scr[*],/etc/zabbix/scripts/cephPg.pl $1 -f scr +UserParameter=ceph.pgstat.sna[*],/etc/zabbix/scripts/cephPg.pl $1 -f sna UserParameter=ceph.pgstat.sta[*],/etc/zabbix/scripts/cephPg.pl $1 -f sta UserParameter=ceph.pgstat.und[*],/etc/zabbix/scripts/cephPg.pl $1 -f und +UserParameter=ceph.pgstat.unf[*],/etc/zabbix/scripts/cephPg.pl $1 -f unf # Disks UserParameter=ceph.osdsize.all.discovery[*],/etc/zabbix/scripts/queryCephDisks.pl $1 $2 -j UserParameter=ceph.osdsize.all.count[*],/etc/zabbix/scripts/queryCephDisks.pl $1 $2 -s $3 diff --git a/Ceph/Script/cephHealth.pl b/Ceph/Script/cephHealth.pl index 3e3fc9b..036083e 100755 --- a/Ceph/Script/cephHealth.pl +++ b/Ceph/Script/cephHealth.pl @@ -281,6 +281,22 @@ foreach my $_line (@_data) } } +# Fetch the data and put it in an array +# Nautilus fix: misplaced no longer in health detail +my @_data2 = `$cephCmd pg stat`; +chomp @_data2; +foreach my $_line2 (@_data2) +{ + if ($_line2 =~ m/ (\d+)\/(\d+) objects misplaced \(([-+]?[0-9]*\.?[0-9]+)\%\)/) { + $objMisplaced = $1; + $objMisplacedFrac = $3; + } + if ($_line2 =~ m/ (\d+)\/(\d+) objects degraded \(([-+]?[0-9]*\.?[0-9]+)\%\)/) { + $objDegraded = $1; + $objDegradedFrac = $3; + } +} + if (defined $opt_status) { print $status."\n"; } elsif (defined $opt_object_degraded) { diff --git a/Ceph/Script/cephPg.pl b/Ceph/Script/cephPg.pl index 13ca5c2..fca7d0e 100755 --- a/Ceph/Script/cephPg.pl +++ b/Ceph/Script/cephPg.pl @@ -95,26 +95,34 @@ my %statesHash = ( 'activeclean' => 'acc', 'active' => 'act', 'backfilling' => 'bck', + 'forced_backfill' => 'bck', 'wait_backfill' => 'bck', - 'backfill_toofull' => 'bck', 'clean' => 'cln', 'deep' => 'dee', 'degraded' => 'deg', 'down' => 'dow', + 'backfill_toofull' => 'ful', + 'recovery_toofull' => 'ful', 'incomplete' => 'ncm', 'inconsistent' => 'ncs', + 'activating' => 'oth', 'creating' => 'oth', 'replay' => 'oth', 'splitting' => 'oth', 'peered' => 'pee', 'peering' => 'pee', + 'forced_recovery' => 'rec', 'recovering' => 'rec', 'recovery_wait' => 'rec', 'remapped' => 'rem', 'repair' => 'rep', 'scrubbing' => 'scr', + 'snaptrim' => 'sna', + 'snaptrim_wait' => 'sna', 'stale' => 'sta', 'undersized' => 'und', + 'backfill_unfound' => 'unf', + 'recovery_unfound' => 'unf', ); my %pgHash = ( 'tot' => 0, diff --git a/Ceph/Template/zbx_tmpl_ceph-adm.xml b/Ceph/Template/zbx_tmpl_ceph-adm.xml index df94ce4..c4b6329 100644 --- a/Ceph/Template/zbx_tmpl_ceph-adm.xml +++ b/Ceph/Template/zbx_tmpl_ceph-adm.xml @@ -1,7 +1,7 @@ <?xml version="1.0" encoding="UTF-8"?> <zabbix_export> <version>3.0</version> - <date>2018-05-08T06:28:09Z</date> + <date>2020-10-21T10:36:49Z</date> <groups> <group> <name>Templates</name> @@ -1276,6 +1276,49 @@ <valuemap/> <logtimefmt/> </item> + <item> + <name>Ceph PG full - toofull</name> + <type>0</type> + <snmp_community/> + <multiplier>0</multiplier> + <snmp_oid/> + <key>ceph.pgstat.ful[{$CEPH_CONNPAR}]</key> + <delay>60</delay> + <history>15</history> + <trends>365</trends> + <status>0</status> + <value_type>3</value_type> + <allowed_hosts/> + <units/> + <delta>0</delta> + <snmpv3_contextname/> + <snmpv3_securityname/> + <snmpv3_securitylevel>0</snmpv3_securitylevel> + <snmpv3_authprotocol>0</snmpv3_authprotocol> + <snmpv3_authpassphrase/> + <snmpv3_privprotocol>0</snmpv3_privprotocol> + <snmpv3_privpassphrase/> + <formula>1</formula> + <delay_flex/> + <params/> + <ipmi_sensor/> + <data_type>0</data_type> + <authtype>0</authtype> + <username/> + <password/> + <publickey/> + <privatekey/> + <port/> + <description>Ceph PG: backfill_toofull and recovery_toofull</description> + <inventory_link>0</inventory_link> + <applications> + <application> + <name>Ceph ADM</name> + </application> + </applications> + <valuemap/> + <logtimefmt/> + </item> <item> <name>Ceph PG incomplete</name> <type>0</type> @@ -1620,6 +1663,49 @@ <valuemap/> <logtimefmt/> </item> + <item> + <name>Ceph PG snaptrim</name> + <type>0</type> + <snmp_community/> + <multiplier>0</multiplier> + <snmp_oid/> + <key>ceph.pgstat.sna[{$CEPH_CONNPAR}]</key> + <delay>60</delay> + <history>15</history> + <trends>365</trends> + <status>0</status> + <value_type>3</value_type> + <allowed_hosts/> + <units/> + <delta>0</delta> + <snmpv3_contextname/> + <snmpv3_securityname/> + <snmpv3_securitylevel>0</snmpv3_securitylevel> + <snmpv3_authprotocol>0</snmpv3_authprotocol> + <snmpv3_authpassphrase/> + <snmpv3_privprotocol>0</snmpv3_privprotocol> + <snmpv3_privpassphrase/> + <formula>1</formula> + <delay_flex/> + <params/> + <ipmi_sensor/> + <data_type>0</data_type> + <authtype>0</authtype> + <username/> + <password/> + <publickey/> + <privatekey/> + <port/> + <description>Ceph PG: snaptrim</description> + <inventory_link>0</inventory_link> + <applications> + <application> + <name>Ceph ADM</name> + </application> + </applications> + <valuemap/> + <logtimefmt/> + </item> <item> <name>Ceph PG stale</name> <type>0</type> @@ -1749,6 +1835,49 @@ <valuemap/> <logtimefmt/> </item> + <item> + <name>Ceph PG unfound</name> + <type>0</type> + <snmp_community/> + <multiplier>0</multiplier> + <snmp_oid/> + <key>ceph.pgstat.unf[{$CEPH_CONNPAR}]</key> + <delay>60</delay> + <history>15</history> + <trends>365</trends> + <status>0</status> + <value_type>3</value_type> + <allowed_hosts/> + <units/> + <delta>0</delta> + <snmpv3_contextname/> + <snmpv3_securityname/> + <snmpv3_securitylevel>0</snmpv3_securitylevel> + <snmpv3_authprotocol>0</snmpv3_authprotocol> + <snmpv3_authpassphrase/> + <snmpv3_privprotocol>0</snmpv3_privprotocol> + <snmpv3_privpassphrase/> + <formula>1</formula> + <delay_flex/> + <params/> + <ipmi_sensor/> + <data_type>0</data_type> + <authtype>0</authtype> + <username/> + <password/> + <publickey/> + <privatekey/> + <port/> + <description>Ceph PG: backfill_unfound, recovery_unfound</description> + <inventory_link>0</inventory_link> + <applications> + <application> + <name>Ceph ADM</name> + </application> + </applications> + <valuemap/> + <logtimefmt/> + </item> <item> <name>Ceph Pool number</name> <type>0</type> @@ -2327,7 +2456,7 @@ <snmp_oid/> <key>ceph.pool.discovery[{$CEPH_CONNPAR}]</key> <delay>3600</delay> - <status>1</status> + <status>0</status> <allowed_hosts/> <snmpv3_contextname/> <snmpv3_securityname/> @@ -2360,7 +2489,7 @@ <multiplier>0</multiplier> <snmp_oid/> <key>ceph.pool.frac[{$CEPH_CONNPAR},{#POOL}]</key> - <delay>120</delay> + <delay>300</delay> <history>15</history> <trends>365</trends> <status>0</status> @@ -2404,7 +2533,7 @@ <multiplier>0</multiplier> <snmp_oid/> <key>ceph.pool.objs[{$CEPH_CONNPAR},{#POOL}]</key> - <delay>120</delay> + <delay>300</delay> <history>15</history> <trends>365</trends> <status>0</status> @@ -2448,7 +2577,7 @@ <multiplier>0</multiplier> <snmp_oid/> <key>ceph.pool.size[{$CEPH_CONNPAR},{#POOL}]</key> - <delay>120</delay> + <delay>300</delay> <history>15</history> <trends>365</trends> <status>0</status> @@ -2487,7 +2616,84 @@ </item_prototype> </item_prototypes> <trigger_prototypes/> - <graph_prototypes/> + <graph_prototypes> + <graph_prototype> + <name>Pool {#POOL}: object number</name> + <width>900</width> + <height>200</height> + <yaxismin>0.0000</yaxismin> + <yaxismax>100.0000</yaxismax> + <show_work_period>1</show_work_period> + <show_triggers>1</show_triggers> + <type>0</type> + <show_legend>1</show_legend> + <show_3d>0</show_3d> + <percent_left>0.0000</percent_left> + <percent_right>0.0000</percent_right> + <ymin_type_1>0</ymin_type_1> + <ymax_type_1>0</ymax_type_1> + <ymin_item_1>0</ymin_item_1> + <ymax_item_1>0</ymax_item_1> + <graph_items> + <graph_item> + <sortorder>0</sortorder> + <drawtype>0</drawtype> + <color>009900</color> + <yaxisside>0</yaxisside> + <calc_fnc>2</calc_fnc> + <type>0</type> + <item> + <host>Template App Ceph</host> + <key>ceph.pool.objs[{$CEPH_CONNPAR},{#POOL}]</key> + </item> + </graph_item> + </graph_items> + </graph_prototype> + <graph_prototype> + <name>Pool {#POOL}: size and used fraction</name> + <width>900</width> + <height>200</height> + <yaxismin>0.0000</yaxismin> + <yaxismax>100.0000</yaxismax> + <show_work_period>1</show_work_period> + <show_triggers>1</show_triggers> + <type>0</type> + <show_legend>1</show_legend> + <show_3d>0</show_3d> + <percent_left>0.0000</percent_left> + <percent_right>0.0000</percent_right> + <ymin_type_1>1</ymin_type_1> + <ymax_type_1>0</ymax_type_1> + <ymin_item_1>0</ymin_item_1> + <ymax_item_1>0</ymax_item_1> + <graph_items> + <graph_item> + <sortorder>0</sortorder> + <drawtype>0</drawtype> + <color>990000</color> + <yaxisside>1</yaxisside> + <calc_fnc>2</calc_fnc> + <type>0</type> + <item> + <host>Template App Ceph</host> + <key>ceph.pool.frac[{$CEPH_CONNPAR},{#POOL}]</key> + </item> + </graph_item> + <graph_item> + <sortorder>1</sortorder> + <drawtype>0</drawtype> + <color>000099</color> + <yaxisside>0</yaxisside> + <calc_fnc>2</calc_fnc> + <type>0</type> + <item> + <host>Template App Ceph</host> + <key>ceph.pool.size[{$CEPH_CONNPAR},{#POOL}]</key> + </item> + </graph_item> + </graph_items> + </graph_prototype> + </graph_prototypes> <host_prototypes/> </discovery_rule> <discovery_rule> @@ -2530,7 +2736,7 @@ <multiplier>0</multiplier> <snmp_oid/> <key>ceph.poolgrp.frac[{$CEPH_CONNPAR},{#POOLGRP}]</key> - <delay>120</delay> + <delay>300</delay> <history>15</history> <trends>365</trends> <status>0</status> @@ -2574,7 +2780,7 @@ <multiplier>0</multiplier> <snmp_oid/> <key>ceph.poolgrp.objs[{$CEPH_CONNPAR},{#POOLGRP}]</key> - <delay>120</delay> + <delay>300</delay> <history>15</history> <trends>365</trends> <status>0</status> @@ -2618,7 +2824,7 @@ <multiplier>0</multiplier> <snmp_oid/> <key>ceph.poolgrp.size[{$CEPH_CONNPAR},{#POOLGRP}]</key> - <delay>120</delay> + <delay>300</delay> <history>15</history> <trends>365</trends> <status>0</status> @@ -3490,6 +3696,42 @@ <key>ceph.pgstat.rep[{$CEPH_CONNPAR}]</key> </item> </graph_item> + <graph_item> + <sortorder>14</sortorder> + <drawtype>0</drawtype> + <color>274482</color> + <yaxisside>0</yaxisside> + <calc_fnc>2</calc_fnc> + <type>0</type> + <item> + <host>Template App Ceph</host> + <key>ceph.pgstat.ful[{$CEPH_CONNPAR}]</key> + </item> + </graph_item> + <graph_item> + <sortorder>15</sortorder> + <drawtype>0</drawtype> + <color>2B5429</color> + <yaxisside>0</yaxisside> + <calc_fnc>2</calc_fnc> + <type>0</type> + <item> + <host>Template App Ceph</host> + <key>ceph.pgstat.sna[{$CEPH_CONNPAR}]</key> + </item> + </graph_item> + <graph_item> + <sortorder>16</sortorder> + <drawtype>0</drawtype> + <color>8048B4</color> + <yaxisside>0</yaxisside> + <calc_fnc>2</calc_fnc> + <type>0</type> + <item> + <host>Template App Ceph</host> + <key>ceph.pgstat.unf[{$CEPH_CONNPAR}]</key> + </item> + </graph_item> </graph_items> </graph> <graph> -- GitLab