From d4dd67e6ba4aaa8e45e4572272d6b9c762506bea Mon Sep 17 00:00:00 2001
From: Fulvio Galeazzi <fulvio.galeazzi@garr.it>
Date: Wed, 21 Oct 2020 12:38:32 +0200
Subject: [PATCH] 2020-10-21:  FG;  Added some PG states, updated for Nautilus
 misplaced/degraded objects.

---
 Ceph/Config/userparameter_ceph.conf |   3 +
 Ceph/Script/cephHealth.pl           |  16 ++
 Ceph/Script/cephPg.pl               |  10 +-
 Ceph/Template/zbx_tmpl_ceph-adm.xml | 260 +++++++++++++++++++++++++++-
 4 files changed, 279 insertions(+), 10 deletions(-)

diff --git a/Ceph/Config/userparameter_ceph.conf b/Ceph/Config/userparameter_ceph.conf
index 94e1801..1fda748 100644
--- a/Ceph/Config/userparameter_ceph.conf
+++ b/Ceph/Config/userparameter_ceph.conf
@@ -45,6 +45,7 @@ UserParameter=ceph.pgstat.cln[*],/etc/zabbix/scripts/cephPg.pl $1 -f cln
 UserParameter=ceph.pgstat.dee[*],/etc/zabbix/scripts/cephPg.pl $1 -f dee
 UserParameter=ceph.pgstat.deg[*],/etc/zabbix/scripts/cephPg.pl $1 -f deg
 UserParameter=ceph.pgstat.dow[*],/etc/zabbix/scripts/cephPg.pl $1 -f dow
+UserParameter=ceph.pgstat.ful[*],/etc/zabbix/scripts/cephPg.pl $1 -f ful
 UserParameter=ceph.pgstat.ncm[*],/etc/zabbix/scripts/cephPg.pl $1 -f ncm
 UserParameter=ceph.pgstat.ncs[*],/etc/zabbix/scripts/cephPg.pl $1 -f ncs
 UserParameter=ceph.pgstat.oth[*],/etc/zabbix/scripts/cephPg.pl $1 -f oth
@@ -53,8 +54,10 @@ UserParameter=ceph.pgstat.rec[*],/etc/zabbix/scripts/cephPg.pl $1 -f rec
 UserParameter=ceph.pgstat.rem[*],/etc/zabbix/scripts/cephPg.pl $1 -f rem
 UserParameter=ceph.pgstat.rep[*],/etc/zabbix/scripts/cephPg.pl $1 -f rep
 UserParameter=ceph.pgstat.scr[*],/etc/zabbix/scripts/cephPg.pl $1 -f scr
+UserParameter=ceph.pgstat.sna[*],/etc/zabbix/scripts/cephPg.pl $1 -f sna
 UserParameter=ceph.pgstat.sta[*],/etc/zabbix/scripts/cephPg.pl $1 -f sta
 UserParameter=ceph.pgstat.und[*],/etc/zabbix/scripts/cephPg.pl $1 -f und
+UserParameter=ceph.pgstat.unf[*],/etc/zabbix/scripts/cephPg.pl $1 -f unf
 # Disks
 UserParameter=ceph.osdsize.all.discovery[*],/etc/zabbix/scripts/queryCephDisks.pl $1 $2 -j
 UserParameter=ceph.osdsize.all.count[*],/etc/zabbix/scripts/queryCephDisks.pl     $1 $2 -s $3
diff --git a/Ceph/Script/cephHealth.pl b/Ceph/Script/cephHealth.pl
index 3e3fc9b..036083e 100755
--- a/Ceph/Script/cephHealth.pl
+++ b/Ceph/Script/cephHealth.pl
@@ -281,6 +281,22 @@ foreach my $_line (@_data)
     }
 }
 
+# Fetch the data and put it in an array
+# Nautilus fix: misplaced no longer in health detail
+my @_data2 = `$cephCmd pg stat`;
+chomp @_data2;
+foreach my $_line2 (@_data2)
+{
+	if ($_line2 =~ m/ (\d+)\/(\d+) objects misplaced \(([-+]?[0-9]*\.?[0-9]+)\%\)/) {
+	    $objMisplaced = $1;
+	    $objMisplacedFrac = $3;
+	}
+	if ($_line2 =~ m/ (\d+)\/(\d+) objects degraded \(([-+]?[0-9]*\.?[0-9]+)\%\)/) {
+	    $objDegraded = $1;
+	    $objDegradedFrac = $3;
+	}
+}
+
 if (defined $opt_status) {
     print $status."\n";
 } elsif (defined $opt_object_degraded) {
diff --git a/Ceph/Script/cephPg.pl b/Ceph/Script/cephPg.pl
index 13ca5c2..fca7d0e 100755
--- a/Ceph/Script/cephPg.pl
+++ b/Ceph/Script/cephPg.pl
@@ -95,26 +95,34 @@ my %statesHash = (
     'activeclean' => 'acc',
     'active' => 'act',
     'backfilling' => 'bck',
+    'forced_backfill' => 'bck',
     'wait_backfill' => 'bck',
-    'backfill_toofull' => 'bck',
     'clean' => 'cln',
     'deep' => 'dee',
     'degraded' => 'deg',
     'down' => 'dow',
+    'backfill_toofull' => 'ful',
+    'recovery_toofull' => 'ful',
     'incomplete' => 'ncm',
     'inconsistent' => 'ncs',
+    'activating' => 'oth',
     'creating' => 'oth',
     'replay' => 'oth',
     'splitting' => 'oth',
     'peered' => 'pee',
     'peering' => 'pee',
+    'forced_recovery' => 'rec',
     'recovering' => 'rec',
     'recovery_wait' => 'rec',
     'remapped' => 'rem',
     'repair' => 'rep',
     'scrubbing' => 'scr',
+    'snaptrim' => 'sna',
+    'snaptrim_wait' => 'sna',
     'stale' => 'sta',
     'undersized' => 'und',
+    'backfill_unfound' => 'unf',
+    'recovery_unfound' => 'unf',
     );
 my %pgHash = (
     'tot' => 0,
diff --git a/Ceph/Template/zbx_tmpl_ceph-adm.xml b/Ceph/Template/zbx_tmpl_ceph-adm.xml
index df94ce4..c4b6329 100644
--- a/Ceph/Template/zbx_tmpl_ceph-adm.xml
+++ b/Ceph/Template/zbx_tmpl_ceph-adm.xml
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <zabbix_export>
     <version>3.0</version>
-    <date>2018-05-08T06:28:09Z</date>
+    <date>2020-10-21T10:36:49Z</date>
     <groups>
         <group>
             <name>Templates</name>
@@ -1276,6 +1276,49 @@
                     <valuemap/>
                     <logtimefmt/>
                 </item>
+                <item>
+                    <name>Ceph PG full - toofull</name>
+                    <type>0</type>
+                    <snmp_community/>
+                    <multiplier>0</multiplier>
+                    <snmp_oid/>
+                    <key>ceph.pgstat.ful[{$CEPH_CONNPAR}]</key>
+                    <delay>60</delay>
+                    <history>15</history>
+                    <trends>365</trends>
+                    <status>0</status>
+                    <value_type>3</value_type>
+                    <allowed_hosts/>
+                    <units/>
+                    <delta>0</delta>
+                    <snmpv3_contextname/>
+                    <snmpv3_securityname/>
+                    <snmpv3_securitylevel>0</snmpv3_securitylevel>
+                    <snmpv3_authprotocol>0</snmpv3_authprotocol>
+                    <snmpv3_authpassphrase/>
+                    <snmpv3_privprotocol>0</snmpv3_privprotocol>
+                    <snmpv3_privpassphrase/>
+                    <formula>1</formula>
+                    <delay_flex/>
+                    <params/>
+                    <ipmi_sensor/>
+                    <data_type>0</data_type>
+                    <authtype>0</authtype>
+                    <username/>
+                    <password/>
+                    <publickey/>
+                    <privatekey/>
+                    <port/>
+                    <description>Ceph PG: backfill_toofull and recovery_toofull</description>
+                    <inventory_link>0</inventory_link>
+                    <applications>
+                        <application>
+                            <name>Ceph ADM</name>
+                        </application>
+                    </applications>
+                    <valuemap/>
+                    <logtimefmt/>
+                </item>
                 <item>
                     <name>Ceph PG incomplete</name>
                     <type>0</type>
@@ -1620,6 +1663,49 @@
                     <valuemap/>
                     <logtimefmt/>
                 </item>
+                <item>
+                    <name>Ceph PG snaptrim</name>
+                    <type>0</type>
+                    <snmp_community/>
+                    <multiplier>0</multiplier>
+                    <snmp_oid/>
+                    <key>ceph.pgstat.sna[{$CEPH_CONNPAR}]</key>
+                    <delay>60</delay>
+                    <history>15</history>
+                    <trends>365</trends>
+                    <status>0</status>
+                    <value_type>3</value_type>
+                    <allowed_hosts/>
+                    <units/>
+                    <delta>0</delta>
+                    <snmpv3_contextname/>
+                    <snmpv3_securityname/>
+                    <snmpv3_securitylevel>0</snmpv3_securitylevel>
+                    <snmpv3_authprotocol>0</snmpv3_authprotocol>
+                    <snmpv3_authpassphrase/>
+                    <snmpv3_privprotocol>0</snmpv3_privprotocol>
+                    <snmpv3_privpassphrase/>
+                    <formula>1</formula>
+                    <delay_flex/>
+                    <params/>
+                    <ipmi_sensor/>
+                    <data_type>0</data_type>
+                    <authtype>0</authtype>
+                    <username/>
+                    <password/>
+                    <publickey/>
+                    <privatekey/>
+                    <port/>
+                    <description>Ceph PG: snaptrim</description>
+                    <inventory_link>0</inventory_link>
+                    <applications>
+                        <application>
+                            <name>Ceph ADM</name>
+                        </application>
+                    </applications>
+                    <valuemap/>
+                    <logtimefmt/>
+                </item>
                 <item>
                     <name>Ceph PG stale</name>
                     <type>0</type>
@@ -1749,6 +1835,49 @@
                     <valuemap/>
                     <logtimefmt/>
                 </item>
+                <item>
+                    <name>Ceph PG unfound</name>
+                    <type>0</type>
+                    <snmp_community/>
+                    <multiplier>0</multiplier>
+                    <snmp_oid/>
+                    <key>ceph.pgstat.unf[{$CEPH_CONNPAR}]</key>
+                    <delay>60</delay>
+                    <history>15</history>
+                    <trends>365</trends>
+                    <status>0</status>
+                    <value_type>3</value_type>
+                    <allowed_hosts/>
+                    <units/>
+                    <delta>0</delta>
+                    <snmpv3_contextname/>
+                    <snmpv3_securityname/>
+                    <snmpv3_securitylevel>0</snmpv3_securitylevel>
+                    <snmpv3_authprotocol>0</snmpv3_authprotocol>
+                    <snmpv3_authpassphrase/>
+                    <snmpv3_privprotocol>0</snmpv3_privprotocol>
+                    <snmpv3_privpassphrase/>
+                    <formula>1</formula>
+                    <delay_flex/>
+                    <params/>
+                    <ipmi_sensor/>
+                    <data_type>0</data_type>
+                    <authtype>0</authtype>
+                    <username/>
+                    <password/>
+                    <publickey/>
+                    <privatekey/>
+                    <port/>
+                    <description>Ceph PG: backfill_unfound, recovery_unfound</description>
+                    <inventory_link>0</inventory_link>
+                    <applications>
+                        <application>
+                            <name>Ceph ADM</name>
+                        </application>
+                    </applications>
+                    <valuemap/>
+                    <logtimefmt/>
+                </item>
                 <item>
                     <name>Ceph Pool number</name>
                     <type>0</type>
@@ -2327,7 +2456,7 @@
                     <snmp_oid/>
                     <key>ceph.pool.discovery[{$CEPH_CONNPAR}]</key>
                     <delay>3600</delay>
-                    <status>1</status>
+                    <status>0</status>
                     <allowed_hosts/>
                     <snmpv3_contextname/>
                     <snmpv3_securityname/>
@@ -2360,7 +2489,7 @@
                             <multiplier>0</multiplier>
                             <snmp_oid/>
                             <key>ceph.pool.frac[{$CEPH_CONNPAR},{#POOL}]</key>
-                            <delay>120</delay>
+                            <delay>300</delay>
                             <history>15</history>
                             <trends>365</trends>
                             <status>0</status>
@@ -2404,7 +2533,7 @@
                             <multiplier>0</multiplier>
                             <snmp_oid/>
                             <key>ceph.pool.objs[{$CEPH_CONNPAR},{#POOL}]</key>
-                            <delay>120</delay>
+                            <delay>300</delay>
                             <history>15</history>
                             <trends>365</trends>
                             <status>0</status>
@@ -2448,7 +2577,7 @@
                             <multiplier>0</multiplier>
                             <snmp_oid/>
                             <key>ceph.pool.size[{$CEPH_CONNPAR},{#POOL}]</key>
-                            <delay>120</delay>
+                            <delay>300</delay>
                             <history>15</history>
                             <trends>365</trends>
                             <status>0</status>
@@ -2487,7 +2616,84 @@
                         </item_prototype>
                     </item_prototypes>
                     <trigger_prototypes/>
-                    <graph_prototypes/>
+                    <graph_prototypes>
+                        <graph_prototype>
+                            <name>Pool {#POOL}: object number</name>
+                            <width>900</width>
+                            <height>200</height>
+                            <yaxismin>0.0000</yaxismin>
+                            <yaxismax>100.0000</yaxismax>
+                            <show_work_period>1</show_work_period>
+                            <show_triggers>1</show_triggers>
+                            <type>0</type>
+                            <show_legend>1</show_legend>
+                            <show_3d>0</show_3d>
+                            <percent_left>0.0000</percent_left>
+                            <percent_right>0.0000</percent_right>
+                            <ymin_type_1>0</ymin_type_1>
+                            <ymax_type_1>0</ymax_type_1>
+                            <ymin_item_1>0</ymin_item_1>
+                            <ymax_item_1>0</ymax_item_1>
+                            <graph_items>
+                                <graph_item>
+                                    <sortorder>0</sortorder>
+                                    <drawtype>0</drawtype>
+                                    <color>009900</color>
+                                    <yaxisside>0</yaxisside>
+                                    <calc_fnc>2</calc_fnc>
+                                    <type>0</type>
+                                    <item>
+                                        <host>Template App Ceph</host>
+                                        <key>ceph.pool.objs[{$CEPH_CONNPAR},{#POOL}]</key>
+                                    </item>
+                                </graph_item>
+                            </graph_items>
+                        </graph_prototype>
+                        <graph_prototype>
+                            <name>Pool {#POOL}: size and used fraction</name>
+                            <width>900</width>
+                            <height>200</height>
+                            <yaxismin>0.0000</yaxismin>
+                            <yaxismax>100.0000</yaxismax>
+                            <show_work_period>1</show_work_period>
+                            <show_triggers>1</show_triggers>
+                            <type>0</type>
+                            <show_legend>1</show_legend>
+                            <show_3d>0</show_3d>
+                            <percent_left>0.0000</percent_left>
+                            <percent_right>0.0000</percent_right>
+                            <ymin_type_1>1</ymin_type_1>
+                            <ymax_type_1>0</ymax_type_1>
+                            <ymin_item_1>0</ymin_item_1>
+                            <ymax_item_1>0</ymax_item_1>
+                            <graph_items>
+                                <graph_item>
+                                    <sortorder>0</sortorder>
+                                    <drawtype>0</drawtype>
+                                    <color>990000</color>
+                                    <yaxisside>1</yaxisside>
+                                    <calc_fnc>2</calc_fnc>
+                                    <type>0</type>
+                                    <item>
+                                        <host>Template App Ceph</host>
+                                        <key>ceph.pool.frac[{$CEPH_CONNPAR},{#POOL}]</key>
+                                    </item>
+                                </graph_item>
+                                <graph_item>
+                                    <sortorder>1</sortorder>
+                                    <drawtype>0</drawtype>
+                                    <color>000099</color>
+                                    <yaxisside>0</yaxisside>
+                                    <calc_fnc>2</calc_fnc>
+                                    <type>0</type>
+                                    <item>
+                                        <host>Template App Ceph</host>
+                                        <key>ceph.pool.size[{$CEPH_CONNPAR},{#POOL}]</key>
+                                    </item>
+                                </graph_item>
+                            </graph_items>
+                        </graph_prototype>
+                    </graph_prototypes>
                     <host_prototypes/>
                 </discovery_rule>
                 <discovery_rule>
@@ -2530,7 +2736,7 @@
                             <multiplier>0</multiplier>
                             <snmp_oid/>
                             <key>ceph.poolgrp.frac[{$CEPH_CONNPAR},{#POOLGRP}]</key>
-                            <delay>120</delay>
+                            <delay>300</delay>
                             <history>15</history>
                             <trends>365</trends>
                             <status>0</status>
@@ -2574,7 +2780,7 @@
                             <multiplier>0</multiplier>
                             <snmp_oid/>
                             <key>ceph.poolgrp.objs[{$CEPH_CONNPAR},{#POOLGRP}]</key>
-                            <delay>120</delay>
+                            <delay>300</delay>
                             <history>15</history>
                             <trends>365</trends>
                             <status>0</status>
@@ -2618,7 +2824,7 @@
                             <multiplier>0</multiplier>
                             <snmp_oid/>
                             <key>ceph.poolgrp.size[{$CEPH_CONNPAR},{#POOLGRP}]</key>
-                            <delay>120</delay>
+                            <delay>300</delay>
                             <history>15</history>
                             <trends>365</trends>
                             <status>0</status>
@@ -3490,6 +3696,42 @@
                         <key>ceph.pgstat.rep[{$CEPH_CONNPAR}]</key>
                     </item>
                 </graph_item>
+                <graph_item>
+                    <sortorder>14</sortorder>
+                    <drawtype>0</drawtype>
+                    <color>274482</color>
+                    <yaxisside>0</yaxisside>
+                    <calc_fnc>2</calc_fnc>
+                    <type>0</type>
+                    <item>
+                        <host>Template App Ceph</host>
+                        <key>ceph.pgstat.ful[{$CEPH_CONNPAR}]</key>
+                    </item>
+                </graph_item>
+                <graph_item>
+                    <sortorder>15</sortorder>
+                    <drawtype>0</drawtype>
+                    <color>2B5429</color>
+                    <yaxisside>0</yaxisside>
+                    <calc_fnc>2</calc_fnc>
+                    <type>0</type>
+                    <item>
+                        <host>Template App Ceph</host>
+                        <key>ceph.pgstat.sna[{$CEPH_CONNPAR}]</key>
+                    </item>
+                </graph_item>
+                <graph_item>
+                    <sortorder>16</sortorder>
+                    <drawtype>0</drawtype>
+                    <color>8048B4</color>
+                    <yaxisside>0</yaxisside>
+                    <calc_fnc>2</calc_fnc>
+                    <type>0</type>
+                    <item>
+                        <host>Template App Ceph</host>
+                        <key>ceph.pgstat.unf[{$CEPH_CONNPAR}]</key>
+                    </item>
+                </graph_item>
             </graph_items>
         </graph>
         <graph>
-- 
GitLab