From 6c7240d06d83a45dfcf622c5ceb1eb4219873112 Mon Sep 17 00:00:00 2001 From: Jeremy Filizetti Date: Tue, 16 Oct 2012 09:41:27 -0400 Subject: [PATCH] LU-1460 snmp: Expose lnet stats through SNMP Add to the MIB and lustre functionality to monitor lnet stats via SNMP Signed-off-by: Jeremy Filizetti Change-Id: I9ae360d7e100af5aef34a6b645fce963376928d1 Reviewed-on: http://review.whamcloud.com/4823 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Doug Oucharek Reviewed-by: Andreas Dilger --- snmp/Lustre-MIB.txt | 96 ++++++++++++++++++++++++++++++++++++++++++- snmp/lustre-snmp-util.c | 9 +++++ snmp/lustre-snmp-util.h | 15 +++++++ snmp/lustre-snmp.c | 105 +++++++++++++++++++++++++++++++++++++++++++++++- snmp/lustre-snmp.h | 1 + 5 files changed, 224 insertions(+), 2 deletions(-) diff --git a/snmp/Lustre-MIB.txt b/snmp/Lustre-MIB.txt index 3780d08..f7c16b7 100644 --- a/snmp/Lustre-MIB.txt +++ b/snmp/Lustre-MIB.txt @@ -43,6 +43,7 @@ metaDataClients OBJECT IDENTIFIER ::= { lustreMIB 5 } lustreClients OBJECT IDENTIFIER ::= { lustreMIB 6 } logicalObjectVolume OBJECT IDENTIFIER ::= { lustreMIB 7 } lustreLDLM OBJECT IDENTIFIER ::= { lustreMIB 8 } +lnetInformation OBJECT IDENTIFIER ::= { lustreMIB 9 } --============================================================================ @@ -523,7 +524,7 @@ mdcMDSServerUUID OBJECT-TYPE DESCRIPTION "The Lustre Meta data server UUID accessed by Metadata Client. This string also contains a state name, and possibly a DEACTIVATED flag." - ::= { mdcEntry 4 } + ::= { mdcEntry 4 } mdcCapacity OBJECT-TYPE SYNTAX Counter64 @@ -919,6 +920,99 @@ ldlmResourceCount OBJECT-TYPE --============================================================================ -- +-- Lustre Lnet +-- +--============================================================================ +lnetMsgsAlloc OBJECT-TYPE + SYNTAX Unsigned32 + MAX-ACCESS read-only + STATUS current + DESCRIPTION + "LNet messages allocated." + ::= { lnetInformation 1 } + +lnetMsgsMax OBJECT-TYPE + SYNTAX Unsigned32 + MAX-ACCESS read-only + STATUS current + DESCRIPTION + "LNet messages maximum." + ::= { lnetInformation 2 } + +lnetErrors OBJECT-TYPE + SYNTAX Unsigned32 + MAX-ACCESS read-only + STATUS current + DESCRIPTION + "LNet errors." + ::= { lnetInformation 3 } + +lnetSendCount OBJECT-TYPE + SYNTAX Unsigned32 + MAX-ACCESS read-only + STATUS current + DESCRIPTION + "LNet send RPCs." + ::= { lnetInformation 4 } + +lnetRecvCount OBJECT-TYPE + SYNTAX Unsigned32 + MAX-ACCESS read-only + STATUS current + DESCRIPTION + "LNet receive RPCs." + ::= { lnetInformation 5 } + +lnetRouteCount OBJECT-TYPE + SYNTAX Unsigned32 + MAX-ACCESS read-only + STATUS current + DESCRIPTION + "LNet routed RPCs." + ::= { lnetInformation 6 } + +lnetDropCount OBJECT-TYPE + SYNTAX Unsigned32 + MAX-ACCESS read-only + STATUS current + DESCRIPTION + "LNet dropped RPCs." + ::= { lnetInformation 7 } + +lnetSendBytes OBJECT-TYPE + SYNTAX Unsigned64 + MAX-ACCESS read-only + STATUS current + DESCRIPTION + "LNet send bytes." + ::= { lnetInformation 8 } + +lnetRecvBytes OBJECT-TYPE + SYNTAX Unsigned64 + MAX-ACCESS read-only + STATUS current + DESCRIPTION + "LNet receive bytes." + ::= { lnetInformation 9 } + +lnetRouteBytes OBJECT-TYPE + SYNTAX Unsigned64 + MAX-ACCESS read-only + STATUS current + DESCRIPTION + "LNet routed bytes." + ::= { lnetInformation 10 } + +lnetDropBytes OBJECT-TYPE + SYNTAX Unsigned64 + MAX-ACCESS read-only + STATUS current + DESCRIPTION + "LNet dropped bytes." + ::= { lnetInformation 11 } + +--============================================================================ +-- -- Lustre Management Traps -- --============================================================================ diff --git a/snmp/lustre-snmp-util.c b/snmp/lustre-snmp-util.c index 36845ad..93908a1 100644 --- a/snmp/lustre-snmp-util.c +++ b/snmp/lustre-snmp-util.c @@ -763,3 +763,12 @@ extern int mds_stats_values(char * name_value, unsigned long long * nb_sample, u return SUCCESS; } + +void convert_ull(counter64 *c64, unsigned long long ull, size_t *var_len) +{ + *var_len = sizeof(c64); + c64->low = (unsigned long long) (0x0ffffffff & ull); + ull >>= 32; + c64->high = (unsigned long long) (0x0ffffffff & ull); +} + diff --git a/snmp/lustre-snmp-util.h b/snmp/lustre-snmp-util.h index 1710ef5..6e46d24 100644 --- a/snmp/lustre-snmp-util.h +++ b/snmp/lustre-snmp-util.h @@ -112,8 +112,22 @@ #define LDLMUNUSEDLOCKCOUNT 103 #define LDLMRESOURCECOUNT 104 +#define LNETNUMBER 110 +#define LNETMSGSALLOC 111 +#define LNETMSGSMAX 112 +#define LNETERRORS 113 +#define LNETSENDCOUNT 114 +#define LNETRECVCOUNT 115 +#define LNETROUTECOUNT 116 +#define LNETDROPCOUNT 117 +#define LNETSENDBYTES 118 +#define LNETRECVBYTES 119 +#define LNETROUTEBYTES 120 +#define LNETDROPBYTES 121 + /* Defining the proc paths for Lustre file system */ #define LUSTRE_PATH "/proc/fs/lustre/" +#define LNET_PATH "/proc/sys/lnet/" #define OSD_PATH LUSTRE_PATH "obdfilter/" #define OSC_PATH LUSTRE_PATH "osc/" #define MDS_PATH LUSTRE_PATH "mds/" @@ -215,6 +229,7 @@ unsigned char * const char *path, struct oid_table *ptable); +void convert_ull(counter64 *c64, unsigned long long ull, size_t *var_len); int stats_values(char * filepath,char * name_value, unsigned long long * nb_sample, unsigned long long * min, unsigned long long * max, unsigned long long * sum, unsigned long long * sum_square); extern int mds_stats_values(char * name_value, unsigned long long * nb_sample, unsigned long long * min, unsigned long long * max, unsigned long long * sum, unsigned long long * sum_square); diff --git a/snmp/lustre-snmp.c b/snmp/lustre-snmp.c index 0473852..41070e0 100644 --- a/snmp/lustre-snmp.c +++ b/snmp/lustre-snmp.c @@ -45,6 +45,8 @@ #include #include "lustre-snmp.h" +#define LNET_CHECK_INTERVAL 500 + /* * clusterFileSystems_variables_oid: * this is the top level oid that we want to register under. This @@ -135,7 +137,7 @@ struct variable7 clusterFileSystems_variables[] = { /* logicalObjectVolume 2.1.7 */ { LOVNUMBER , ASN_UNSIGNED , RONLY , var_clusterFileSystems, 4, { 2,1,7,1 } }, - /* logicalObjectVolume.osdTable.lovTable 2.1.2.2.1 */ + /* logicalObjectVolume.osdTable.lovTable 2.1.7.2.1 */ { LOVUUID , ASN_OCTET_STR , RONLY , var_lovTable, 6, { 2,1,7,2,1,2 } }, { LOVCOMMONNAME , ASN_OCTET_STR , RONLY , var_lovTable, 6, { 2,1,7,2,1,3 } }, { LOVNUMOBD , ASN_UNSIGNED , RONLY , var_lovTable, 6, { 2,1,7,2,1,4 } }, @@ -158,6 +160,18 @@ struct variable7 clusterFileSystems_variables[] = { { LDLMUNUSEDLOCKCOUNT , ASN_UNSIGNED , RONLY , var_ldlmTable, 6, { 2,1,8,2,1,4 } }, { LDLMRESOURCECOUNT , ASN_UNSIGNED , RONLY , var_ldlmTable, 6, { 2,1,8,2,1,5 } }, + /* lnetInformation 2.1.9 */ + { LNETMSGSALLOC, ASN_UNSIGNED, RONLY, var_lnetInformation, 4, { 2,1,9,1 } }, + { LNETMSGSMAX, ASN_UNSIGNED, RONLY, var_lnetInformation, 4, { 2,1,9,2 } }, + { LNETERRORS, ASN_UNSIGNED, RONLY, var_lnetInformation, 4, { 2,1,9,3 } }, + { LNETSENDCOUNT, ASN_UNSIGNED, RONLY, var_lnetInformation, 4, { 2,1,9,4 } }, + { LNETRECVCOUNT, ASN_UNSIGNED, RONLY, var_lnetInformation, 4, { 2,1,9,5 } }, + { LNETROUTECOUNT, ASN_UNSIGNED, RONLY, var_lnetInformation, 4, { 2,1,9,6 } }, + { LNETDROPCOUNT, ASN_UNSIGNED, RONLY, var_lnetInformation, 4, { 2,1,9,7 } }, + { LNETSENDBYTES, ASN_COUNTER64, RONLY, var_lnetInformation, 4, { 2,1,9,8 } }, + { LNETRECVBYTES, ASN_COUNTER64, RONLY, var_lnetInformation, 4, { 2,1,9,9 } }, + { LNETROUTEBYTES, ASN_COUNTER64, RONLY, var_lnetInformation, 4, { 2,1,9,10 } }, + { LNETDROPBYTES, ASN_COUNTER64, RONLY, var_lnetInformation, 4, { 2,1,9,11 } }, }; /***************************************************************************** @@ -528,6 +542,95 @@ var_ldlmTable(struct variable *vp, LDLM_PATH,ldlm_table); } +/***************************************************************************** + * Function: var_lnetInformation + * + ****************************************************************************/ +unsigned char * +var_lnetInformation(struct variable *vp, + oid *name, + size_t *length, + int exact, + size_t *var_len, + WriteMethod **write_method) +{ + /* variables we may use later */ + static unsigned char string[SPRINT_MAX_LEN]; + static unsigned int i[7]; + static unsigned long long ull[4]; + static unsigned long next_update; + static counter64 c64; + static unsigned int c32; + struct timeval current_tv; + unsigned long current; + char file_path[MAX_PATH_SIZE]; + + /* Update at most every LNET_STATS_INTERVAL milliseconds */ + gettimeofday(¤t_tv, NULL); + current = current_tv.tv_sec * 1000000 + current_tv.tv_usec; + if (current >= next_update) { + sprintf(file_path, "%s%s", LNET_PATH, "stats"); + if (read_string(file_path, (char *) string, sizeof(string)) + != SUCCESS) + return NULL; + + sscanf((char *) string, + "%u %u %u %u %u %u %u %llu %llu %llu %llu", + &i[0], &i[1], &i[2], &i[3], &i[4], &i[5], &i[6], + &ull[0], &ull[1], &ull[2], &ull[3]); + + next_update = current + (LNET_CHECK_INTERVAL * 1000); + } + + if (header_generic(vp, name, length, exact, var_len, write_method) + == MATCH_FAILED) + return NULL; + + switch (vp->magic) { + case LNETMSGSALLOC: + *var_len = sizeof(c32); + c32 = i[0]; + return (unsigned char *) &c32; + case LNETMSGSMAX: + *var_len = sizeof(c32); + c32 = i[1]; + return (unsigned char *) &c32; + case LNETERRORS: + *var_len = sizeof(c32); + c32 = i[2]; + return (unsigned char *) &c32; + case LNETSENDCOUNT: + *var_len = sizeof(c32); + c32 = i[3]; + return (unsigned char *) &c32; + case LNETRECVCOUNT: + *var_len = sizeof(c32); + c32 = i[4]; + return (unsigned char *) &c32; + case LNETROUTECOUNT: + *var_len = sizeof(c32); + c32 = i[5]; + return (unsigned char *) &c32; + case LNETDROPCOUNT: + *var_len = sizeof(c32); + c32 = i[6]; + return (unsigned char *) &c32; + case LNETSENDBYTES: + convert_ull(&c64, ull[0], var_len); + return (unsigned char *) &c64; + case LNETRECVBYTES: + convert_ull(&c64, ull[1], var_len); + return (unsigned char *) &c64; + case LNETROUTEBYTES: + convert_ull(&c64, ull[2], var_len); + return (unsigned char *) &c64; + case LNETDROPBYTES: + convert_ull(&c64, ull[3], var_len); + return (unsigned char *) &c64; + default: + return NULL; + } +} /***************************************************************************** * Function: var_mdsNbSampledReq diff --git a/snmp/lustre-snmp.h b/snmp/lustre-snmp.h index 89eddae..27eaae3 100644 --- a/snmp/lustre-snmp.h +++ b/snmp/lustre-snmp.h @@ -57,6 +57,7 @@ FindVarMethod var_mdcTable; FindVarMethod var_cliTable; FindVarMethod var_ldlmTable; FindVarMethod var_lovTable; +FindVarMethod var_lnetInformation; FindVarMethod var_mdsNbSampledReq; WriteMethod write_sysStatus; -- 1.8.3.1