Whamcloud - gitweb
b=1693
authorpjkirner <pjkirner>
Tue, 12 Jul 2005 13:24:45 +0000 (13:24 +0000)
committerpjkirner <pjkirner>
Tue, 12 Jul 2005 13:24:45 +0000 (13:24 +0000)
r=adilger

Landing "Provide a healht-check routine for MDS and OSTs

12 files changed:
lustre/ChangeLog
lustre/include/linux/lustre_net.h
lustre/include/linux/obd.h
lustre/include/linux/obd_class.h
lustre/include/linux/obd_support.h
lustre/mds/handler.c
lustre/obdclass/class_obd.c
lustre/obdclass/lprocfs_status.c
lustre/obdfilter/filter.c
lustre/ost/ost_handler.c
lustre/ptlrpc/ptlrpc_module.c
lustre/ptlrpc/service.c

index 1f3145d..4200c89 100644 (file)
@@ -111,6 +111,12 @@ Description: Failover mode is now the default for OSTs.
 Details    : By default, OSTs will now run in failover mode.  To return to
             the old behaviour, add '--failout' to the lmc line for OSTs.
 
+Severity   : enhancement
+Bugzilla   : 1693
+Description: Health checks are now provided for MDS and OSTs
+Details    : Additional detailed health check information on MSD and OSTs   
+            is now provided through the procfs health_check value.
+
 ------------------------------------------------------------------------------
 
 2005-06-20  Cluster File Systems, Inc. <info@clusterfs.com>
index 3e44c52..658e6e0 100644 (file)
@@ -726,6 +726,7 @@ int ptlrpc_start_thread(struct obd_device *dev, struct ptlrpc_service *svc,
 int ptlrpc_unregister_service(struct ptlrpc_service *service);
 int liblustre_check_services (void *arg);
 void ptlrpc_daemonize(void);
+int ptlrpc_service_health_check(struct ptlrpc_service *);
 
 
 struct ptlrpc_svc_data {
index 51f1910..823b24e 100644 (file)
@@ -368,6 +368,7 @@ struct mds_obd {
         struct lustre_quota_info         mds_quota_info;
         struct lustre_quota_ctxt         mds_quota_ctxt;
         atomic_t                         mds_quotachecking;
+        struct semaphore                 mds_health_sem;
 };
 
 struct echo_obd {
@@ -409,6 +410,7 @@ struct recovd_obd {
 struct ost_obd {
         struct ptlrpc_service *ost_service;
         struct ptlrpc_service *ost_create_service;
+        struct semaphore       ost_health_sem;
 };
 
 struct echo_client_obd {
@@ -737,6 +739,8 @@ struct obd_ops {
         int (*o_notify)(struct obd_device *obd, struct obd_device *watched,
                         int active);
 
+        int (*o_health_check)(struct obd_device *);
+
         /* quota methods */
         int (*o_quotacheck)(struct obd_export *, struct obd_quotactl *);
         int (*o_quotactl)(struct obd_export *, struct obd_quotactl *);
@@ -745,6 +749,10 @@ struct obd_ops {
          * NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line
          * to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c.
          * Also, add a wrapper function in include/linux/obd_class.h.
+         *
+         * Also note that if you add it to the END, you also have to change
+         * the num_stats calculation.
+         *
          */
 };
 
index c24c01b..fe2b2c0 100644 (file)
@@ -1046,6 +1046,31 @@ static inline int obd_quotactl(struct obd_export *exp,
         RETURN(rc);
 }
 
+static inline int obd_health_check(struct obd_device *obd)
+{
+        /* returns: 0 on healthy
+         *         >0 on unhealthy + reason code/flag
+         *            however the only suppored reason == 1 right now
+         *            We'll need to define some better reasons
+         *            or flags in the future.
+         *         <0 on error
+         */
+        int rc;
+        ENTRY;
+
+        /* don't use EXP_CHECK_OP, because NULL method is normal here */
+        if (obd == NULL || !OBT(obd)) {
+                CERROR("cleaned up obd\n");
+                RETURN(-EOPNOTSUPP);
+        }
+        if (!obd->obd_set_up || obd->obd_stopping)
+                RETURN(0);
+        if (!OBP(obd, health_check))
+                RETURN(0);
+
+        rc = OBP(obd, health_check)(obd);
+        RETURN(rc);
+}
 
 static inline int obd_register_observer(struct obd_device *obd,
                                         struct obd_device *observer)
index 86c87bd..554461b 100644 (file)
@@ -40,6 +40,7 @@ extern unsigned int obd_dump_on_timeout;
 extern unsigned int obd_timeout;          /* seconds */
 #define PING_INTERVAL max(obd_timeout / 4, 1U)
 extern unsigned int ldlm_timeout;
+extern unsigned int obd_health_check_timeout;
 extern char obd_lustre_upcall[128];
 extern unsigned int obd_sync_filter;
 extern wait_queue_head_t obd_race_waitq;
index c1c72f3..5ee8956 100644 (file)
@@ -2047,6 +2047,8 @@ static int mdt_setup(struct obd_device *obd, obd_count len, void *buf)
 
         lprocfs_init_vars(mdt, &lvars);
         lprocfs_obd_setup(obd, lvars.obd_vars);
+        
+        sema_init(&mds->mds_health_sem, 1);
 
         mds->mds_service =
                 ptlrpc_init_svc(MDS_NBUFS, MDS_BUFSIZE, MDS_MAXREQSIZE,
@@ -2101,10 +2103,13 @@ static int mdt_setup(struct obd_device *obd, obd_count len, void *buf)
 
 err_thread3:
         ptlrpc_unregister_service(mds->mds_readpage_service);
+        mds->mds_readpage_service = NULL;
 err_thread2:
         ptlrpc_unregister_service(mds->mds_setattr_service);
+        mds->mds_setattr_service = NULL;
 err_thread:
         ptlrpc_unregister_service(mds->mds_service);
+        mds->mds_service = NULL;
 err_lprocfs:
         lprocfs_obd_cleanup(obd);
         return rc;
@@ -2115,15 +2120,42 @@ static int mdt_cleanup(struct obd_device *obd)
         struct mds_obd *mds = &obd->u.mds;
         ENTRY;
 
+        down(&mds->mds_health_sem);
         ptlrpc_unregister_service(mds->mds_readpage_service);
         ptlrpc_unregister_service(mds->mds_setattr_service);
         ptlrpc_unregister_service(mds->mds_service);
+        mds->mds_readpage_service = NULL;
+        mds->mds_setattr_service = NULL;
+        mds->mds_service = NULL;
+        up(&mds->mds_health_sem);
 
         lprocfs_obd_cleanup(obd);
 
         RETURN(0);
 }
 
+static int mdt_health_check(struct obd_device *obd)
+{
+        struct mds_obd *mds = &obd->u.mds;
+        int rc = 0;
+        
+        down(&mds->mds_health_sem);
+        rc |= ptlrpc_service_health_check(mds->mds_readpage_service);
+        rc |= ptlrpc_service_health_check(mds->mds_setattr_service);
+        rc |= ptlrpc_service_health_check(mds->mds_service);
+        up(&mds->mds_health_sem);
+
+        /*
+         * health_check to return 0 on healthy
+         * and 1 on unhealthy.
+         */
+        if(rc != 0)
+                rc = 1;
+        
+        return rc;
+}
+
+
 static struct dentry *mds_lvfs_fid2dentry(__u64 id, __u32 gen, __u64 gr,
                                           void *data)
 {
@@ -2162,6 +2194,7 @@ static struct obd_ops mdt_obd_ops = {
         .o_owner           = THIS_MODULE,
         .o_setup           = mdt_setup,
         .o_cleanup         = mdt_cleanup,
+        .o_health_check    = mdt_health_check,        
 };
 
 static int __init mds_init(void)
index 4f314e4..e8ceae7 100644 (file)
@@ -90,6 +90,7 @@ unsigned int obd_fail_loc;
 unsigned int obd_dump_on_timeout;
 unsigned int obd_timeout = 100; /* seconds */
 unsigned int ldlm_timeout = 20; /* seconds */
+unsigned int obd_health_check_timeout = 120; /* seconds */
 char obd_lustre_upcall[128] = "DEFAULT"; /* or NONE or /full/path/to/upcall  */
 unsigned int obd_sync_filter; /* = 0, don't sync by default */
 
@@ -379,6 +380,7 @@ EXPORT_SYMBOL(obd_race_waitq);
 EXPORT_SYMBOL(obd_dump_on_timeout);
 EXPORT_SYMBOL(obd_timeout);
 EXPORT_SYMBOL(ldlm_timeout);
+EXPORT_SYMBOL(obd_health_check_timeout);
 EXPORT_SYMBOL(obd_lustre_upcall);
 EXPORT_SYMBOL(obd_sync_filter);
 EXPORT_SYMBOL(ptlrpc_put_connection_superhack);
@@ -455,13 +457,12 @@ int obd_proc_read_pinger(char *page, char **start, off_t off, int count,
 static int obd_proc_read_health(char *page, char **start, off_t off,
                                 int count, int *eof, void *data)
 {
-        int rc = 0; //, i;
+        int rc = 0 , i;
         *eof = 1;
 
         if (portals_catastrophe)
                 rc += snprintf(page + rc, count - rc, "LBUG\n");
 
-#if 0
         spin_lock(&obd_dev_lock);
         for (i = 0; i < MAX_OBD_DEVICES; i++) {
                 struct obd_device *obd;
@@ -482,7 +483,6 @@ static int obd_proc_read_health(char *page, char **start, off_t off,
                 spin_lock(&obd_dev_lock);
         }
         spin_unlock(&obd_dev_lock);
-#endif
 
         if (rc == 0)
                 return snprintf(page, count, "healthy\n");
@@ -491,12 +491,35 @@ static int obd_proc_read_health(char *page, char **start, off_t off,
         return rc;
 }
 
+static int obd_proc_rd_health_timeout(char *page, char **start, off_t off,
+                                      int count, int *eof, void *data)
+{
+        *eof = 1;
+        return snprintf(page, count, "%d\n", obd_health_check_timeout);
+}
+
+static int obd_proc_wr_health_timeout(struct file *file, const char *buffer,
+                                      unsigned long count, void *data)
+{
+        int val, rc;
+
+        rc = lprocfs_write_helper(buffer, count, &val);
+        if (rc)
+                return rc;
+
+        obd_health_check_timeout = val;
+
+        return count;
+}
+
 /* Root for /proc/fs/lustre */
 struct lprocfs_vars lprocfs_base[] = {
         { "version", obd_proc_read_version, NULL, NULL },
         { "kernel_version", obd_proc_read_kernel_version, NULL, NULL },
         { "pinger", obd_proc_read_pinger, NULL, NULL },
         { "health_check", obd_proc_read_health, NULL, NULL },
+        { "health_check_timeout", obd_proc_rd_health_timeout,
+          obd_proc_wr_health_timeout, NULL },        
         { 0 }
 };
 #else
index 8a0db22..a3fcfc0 100644 (file)
@@ -671,6 +671,7 @@ int lprocfs_alloc_obd_stats(struct obd_device *obd, unsigned num_private_stats)
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, notify);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, quotacheck);
         LPROCFS_OBD_OP_INIT(num_private_stats, stats, quotactl);
+        LPROCFS_OBD_OP_INIT(num_private_stats, stats, health_check);
 
         for (i = num_private_stats; i < num_stats; i++) {
                 /* If this LBUGs, it is likely that an obd
index 3f26239..fadf1bd 100644 (file)
@@ -2722,6 +2722,21 @@ int filter_iocontrol(unsigned int cmd, struct obd_export *exp,
         RETURN(0);
 }
 
+static int filter_health_check(struct obd_device *obd)
+{
+        struct filter_obd *filter = &obd->u.filter;
+        int rc = 0;
+       
+        /*
+         * health_check to return 0 on healthy
+         * and 1 on unhealthy.
+         */
+        if(filter->fo_sb->s_flags & MS_RDONLY)
+                rc = 1;
+
+        return rc;
+}
+
 static struct dentry *filter_lvfs_fid2dentry(__u64 id, __u32 gen, __u64 gr,
                                              void *data)
 {
@@ -2758,6 +2773,7 @@ static struct obd_ops filter_obd_ops = {
         .o_iocontrol      = filter_iocontrol,
         .o_quotacheck     = filter_quotacheck,
         .o_quotactl       = filter_quotactl,
+        .o_health_check   = filter_health_check,
 };
 
 static struct obd_ops filter_sanobd_ops = {
index 9ca7b6d..025beaa 100644 (file)
@@ -1294,6 +1294,8 @@ static int ost_setup(struct obd_device *obd, obd_count len, void *buf)
         lprocfs_init_vars(ost, &lvars);
         lprocfs_obd_setup(obd, lvars.obd_vars);
 
+        sema_init(&ost->ost_health_sem, 1);
+
         ost->ost_service =
                 ptlrpc_init_svc(OST_NBUFS, OST_BUFSIZE, OST_MAXREQSIZE,
                                 OST_REQUEST_PORTAL, OSC_REPLY_PORTAL,
@@ -1330,8 +1332,10 @@ static int ost_setup(struct obd_device *obd, obd_count len, void *buf)
 
 out_create:
         ptlrpc_unregister_service(ost->ost_create_service);
+        ost->ost_create_service = NULL;
 out_service:
         ptlrpc_unregister_service(ost->ost_service);
+        ost->ost_service = NULL;
 out_lprocfs:
         lprocfs_obd_cleanup(obd);
         RETURN(rc);
@@ -1350,14 +1354,38 @@ static int ost_cleanup(struct obd_device *obd)
         }
         spin_unlock_bh(&obd->obd_processing_task_lock);
 
+        down(&ost->ost_health_sem);
         ptlrpc_unregister_service(ost->ost_service);
         ptlrpc_unregister_service(ost->ost_create_service);
+        ost->ost_service = NULL;
+        ost->ost_create_service = NULL;
+        up(&ost->ost_health_sem);
 
         lprocfs_obd_cleanup(obd);
 
         RETURN(err);
 }
 
+static int ost_health_check(struct obd_device *obd)
+{
+        struct ost_obd *ost = &obd->u.ost;
+        int rc = 0;
+
+        down(&ost->ost_health_sem);
+        rc |= ptlrpc_service_health_check(ost->ost_service);
+        rc |= ptlrpc_service_health_check(ost->ost_create_service);
+        up(&ost->ost_health_sem);
+
+        /*
+         * health_check to return 0 on healthy
+         * and 1 on unhealthy.
+         */
+        if( rc != 0)
+                rc = 1;
+
+        return rc;
+}
+
 struct ost_thread_local_cache *ost_tls(struct ptlrpc_request *r)
 {
         return (struct ost_thread_local_cache *)(r->rq_svc_thread->t_data);
@@ -1368,6 +1396,7 @@ static struct obd_ops ost_obd_ops = {
         .o_owner        = THIS_MODULE,
         .o_setup        = ost_setup,
         .o_cleanup      = ost_cleanup,
+        .o_health_check = ost_health_check,
 };
 
 static int __init ost_init(void)
index f9c4bdb..3ca1cee 100644 (file)
@@ -135,6 +135,7 @@ EXPORT_SYMBOL(ptlrpc_start_n_threads);
 EXPORT_SYMBOL(ptlrpc_start_thread);
 EXPORT_SYMBOL(ptlrpc_unregister_service);
 EXPORT_SYMBOL(ptlrpc_daemonize);
+EXPORT_SYMBOL(ptlrpc_service_health_check);
 
 /* pack_generic.c */
 EXPORT_SYMBOL(lustre_msg_swabbed);
index e54449d..14d99b5 100644 (file)
@@ -1136,3 +1136,43 @@ int ptlrpc_unregister_service(struct ptlrpc_service *service)
                           srv_interfaces[ptlrpc_ninterfaces]));
         return 0;
 }
+
+/* Returns 0 if the service is healthy.
+ *
+ * Right now, it just checks to make sure that requests aren't languishing
+ * in the queue.  We'll use this health check to govern whether a node needs
+ * to be shot, so it's intentionally non-aggressive. */
+int ptlrpc_service_health_check(struct ptlrpc_service *svc)
+{
+        struct ptlrpc_request *request;
+        struct timeval         right_now;
+        long                   timediff, cutoff;
+        unsigned long          flags;
+        int                    rc;
+
+        if (svc == NULL)
+                return 0;
+
+        spin_lock_irqsave(&svc->srv_lock, flags);
+        if (list_empty(&svc->srv_request_queue)) {
+                rc = 0;
+                goto out;
+        }
+
+        request = list_entry(svc->srv_request_queue.next,
+                             struct ptlrpc_request, rq_list);
+
+        do_gettimeofday(&right_now);
+        timediff = timeval_sub(&right_now, &request->rq_arrival_time);
+
+        cutoff = obd_health_check_timeout;
+
+        if (timediff / 1000000 > cutoff) {
+                rc = -1;
+                goto out;
+        }
+
+ out:
+        spin_unlock_irqrestore(&svc->srv_lock, flags);
+        return rc;
+}