Details : By default, OSTs will now run in failover mode. To return to
the old behaviour, add '--failout' to the lmc line for OSTs.
+Severity : enhancement
+Bugzilla : 1693
+Description: Health checks are now provided for MDS and OSTs
+Details : Additional detailed health check information on MSD and OSTs
+ is now provided through the procfs health_check value.
+
------------------------------------------------------------------------------
2005-06-20 Cluster File Systems, Inc. <info@clusterfs.com>
int ptlrpc_unregister_service(struct ptlrpc_service *service);
int liblustre_check_services (void *arg);
void ptlrpc_daemonize(void);
+int ptlrpc_service_health_check(struct ptlrpc_service *);
struct ptlrpc_svc_data {
struct lustre_quota_info mds_quota_info;
struct lustre_quota_ctxt mds_quota_ctxt;
atomic_t mds_quotachecking;
+ struct semaphore mds_health_sem;
};
struct echo_obd {
struct ost_obd {
struct ptlrpc_service *ost_service;
struct ptlrpc_service *ost_create_service;
+ struct semaphore ost_health_sem;
};
struct echo_client_obd {
int (*o_notify)(struct obd_device *obd, struct obd_device *watched,
int active);
+ int (*o_health_check)(struct obd_device *);
+
/* quota methods */
int (*o_quotacheck)(struct obd_export *, struct obd_quotactl *);
int (*o_quotactl)(struct obd_export *, struct obd_quotactl *);
* NOTE: If adding ops, add another LPROCFS_OBD_OP_INIT() line
* to lprocfs_alloc_obd_stats() in obdclass/lprocfs_status.c.
* Also, add a wrapper function in include/linux/obd_class.h.
+ *
+ * Also note that if you add it to the END, you also have to change
+ * the num_stats calculation.
+ *
*/
};
RETURN(rc);
}
+static inline int obd_health_check(struct obd_device *obd)
+{
+ /* returns: 0 on healthy
+ * >0 on unhealthy + reason code/flag
+ * however the only suppored reason == 1 right now
+ * We'll need to define some better reasons
+ * or flags in the future.
+ * <0 on error
+ */
+ int rc;
+ ENTRY;
+
+ /* don't use EXP_CHECK_OP, because NULL method is normal here */
+ if (obd == NULL || !OBT(obd)) {
+ CERROR("cleaned up obd\n");
+ RETURN(-EOPNOTSUPP);
+ }
+ if (!obd->obd_set_up || obd->obd_stopping)
+ RETURN(0);
+ if (!OBP(obd, health_check))
+ RETURN(0);
+
+ rc = OBP(obd, health_check)(obd);
+ RETURN(rc);
+}
static inline int obd_register_observer(struct obd_device *obd,
struct obd_device *observer)
extern unsigned int obd_timeout; /* seconds */
#define PING_INTERVAL max(obd_timeout / 4, 1U)
extern unsigned int ldlm_timeout;
+extern unsigned int obd_health_check_timeout;
extern char obd_lustre_upcall[128];
extern unsigned int obd_sync_filter;
extern wait_queue_head_t obd_race_waitq;
lprocfs_init_vars(mdt, &lvars);
lprocfs_obd_setup(obd, lvars.obd_vars);
+
+ sema_init(&mds->mds_health_sem, 1);
mds->mds_service =
ptlrpc_init_svc(MDS_NBUFS, MDS_BUFSIZE, MDS_MAXREQSIZE,
err_thread3:
ptlrpc_unregister_service(mds->mds_readpage_service);
+ mds->mds_readpage_service = NULL;
err_thread2:
ptlrpc_unregister_service(mds->mds_setattr_service);
+ mds->mds_setattr_service = NULL;
err_thread:
ptlrpc_unregister_service(mds->mds_service);
+ mds->mds_service = NULL;
err_lprocfs:
lprocfs_obd_cleanup(obd);
return rc;
struct mds_obd *mds = &obd->u.mds;
ENTRY;
+ down(&mds->mds_health_sem);
ptlrpc_unregister_service(mds->mds_readpage_service);
ptlrpc_unregister_service(mds->mds_setattr_service);
ptlrpc_unregister_service(mds->mds_service);
+ mds->mds_readpage_service = NULL;
+ mds->mds_setattr_service = NULL;
+ mds->mds_service = NULL;
+ up(&mds->mds_health_sem);
lprocfs_obd_cleanup(obd);
RETURN(0);
}
+static int mdt_health_check(struct obd_device *obd)
+{
+ struct mds_obd *mds = &obd->u.mds;
+ int rc = 0;
+
+ down(&mds->mds_health_sem);
+ rc |= ptlrpc_service_health_check(mds->mds_readpage_service);
+ rc |= ptlrpc_service_health_check(mds->mds_setattr_service);
+ rc |= ptlrpc_service_health_check(mds->mds_service);
+ up(&mds->mds_health_sem);
+
+ /*
+ * health_check to return 0 on healthy
+ * and 1 on unhealthy.
+ */
+ if(rc != 0)
+ rc = 1;
+
+ return rc;
+}
+
+
static struct dentry *mds_lvfs_fid2dentry(__u64 id, __u32 gen, __u64 gr,
void *data)
{
.o_owner = THIS_MODULE,
.o_setup = mdt_setup,
.o_cleanup = mdt_cleanup,
+ .o_health_check = mdt_health_check,
};
static int __init mds_init(void)
unsigned int obd_dump_on_timeout;
unsigned int obd_timeout = 100; /* seconds */
unsigned int ldlm_timeout = 20; /* seconds */
+unsigned int obd_health_check_timeout = 120; /* seconds */
char obd_lustre_upcall[128] = "DEFAULT"; /* or NONE or /full/path/to/upcall */
unsigned int obd_sync_filter; /* = 0, don't sync by default */
EXPORT_SYMBOL(obd_dump_on_timeout);
EXPORT_SYMBOL(obd_timeout);
EXPORT_SYMBOL(ldlm_timeout);
+EXPORT_SYMBOL(obd_health_check_timeout);
EXPORT_SYMBOL(obd_lustre_upcall);
EXPORT_SYMBOL(obd_sync_filter);
EXPORT_SYMBOL(ptlrpc_put_connection_superhack);
static int obd_proc_read_health(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
- int rc = 0; //, i;
+ int rc = 0 , i;
*eof = 1;
if (portals_catastrophe)
rc += snprintf(page + rc, count - rc, "LBUG\n");
-#if 0
spin_lock(&obd_dev_lock);
for (i = 0; i < MAX_OBD_DEVICES; i++) {
struct obd_device *obd;
spin_lock(&obd_dev_lock);
}
spin_unlock(&obd_dev_lock);
-#endif
if (rc == 0)
return snprintf(page, count, "healthy\n");
return rc;
}
+static int obd_proc_rd_health_timeout(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ *eof = 1;
+ return snprintf(page, count, "%d\n", obd_health_check_timeout);
+}
+
+static int obd_proc_wr_health_timeout(struct file *file, const char *buffer,
+ unsigned long count, void *data)
+{
+ int val, rc;
+
+ rc = lprocfs_write_helper(buffer, count, &val);
+ if (rc)
+ return rc;
+
+ obd_health_check_timeout = val;
+
+ return count;
+}
+
/* Root for /proc/fs/lustre */
struct lprocfs_vars lprocfs_base[] = {
{ "version", obd_proc_read_version, NULL, NULL },
{ "kernel_version", obd_proc_read_kernel_version, NULL, NULL },
{ "pinger", obd_proc_read_pinger, NULL, NULL },
{ "health_check", obd_proc_read_health, NULL, NULL },
+ { "health_check_timeout", obd_proc_rd_health_timeout,
+ obd_proc_wr_health_timeout, NULL },
{ 0 }
};
#else
LPROCFS_OBD_OP_INIT(num_private_stats, stats, notify);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, quotacheck);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, quotactl);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, health_check);
for (i = num_private_stats; i < num_stats; i++) {
/* If this LBUGs, it is likely that an obd
RETURN(0);
}
+static int filter_health_check(struct obd_device *obd)
+{
+ struct filter_obd *filter = &obd->u.filter;
+ int rc = 0;
+
+ /*
+ * health_check to return 0 on healthy
+ * and 1 on unhealthy.
+ */
+ if(filter->fo_sb->s_flags & MS_RDONLY)
+ rc = 1;
+
+ return rc;
+}
+
static struct dentry *filter_lvfs_fid2dentry(__u64 id, __u32 gen, __u64 gr,
void *data)
{
.o_iocontrol = filter_iocontrol,
.o_quotacheck = filter_quotacheck,
.o_quotactl = filter_quotactl,
+ .o_health_check = filter_health_check,
};
static struct obd_ops filter_sanobd_ops = {
lprocfs_init_vars(ost, &lvars);
lprocfs_obd_setup(obd, lvars.obd_vars);
+ sema_init(&ost->ost_health_sem, 1);
+
ost->ost_service =
ptlrpc_init_svc(OST_NBUFS, OST_BUFSIZE, OST_MAXREQSIZE,
OST_REQUEST_PORTAL, OSC_REPLY_PORTAL,
out_create:
ptlrpc_unregister_service(ost->ost_create_service);
+ ost->ost_create_service = NULL;
out_service:
ptlrpc_unregister_service(ost->ost_service);
+ ost->ost_service = NULL;
out_lprocfs:
lprocfs_obd_cleanup(obd);
RETURN(rc);
}
spin_unlock_bh(&obd->obd_processing_task_lock);
+ down(&ost->ost_health_sem);
ptlrpc_unregister_service(ost->ost_service);
ptlrpc_unregister_service(ost->ost_create_service);
+ ost->ost_service = NULL;
+ ost->ost_create_service = NULL;
+ up(&ost->ost_health_sem);
lprocfs_obd_cleanup(obd);
RETURN(err);
}
+static int ost_health_check(struct obd_device *obd)
+{
+ struct ost_obd *ost = &obd->u.ost;
+ int rc = 0;
+
+ down(&ost->ost_health_sem);
+ rc |= ptlrpc_service_health_check(ost->ost_service);
+ rc |= ptlrpc_service_health_check(ost->ost_create_service);
+ up(&ost->ost_health_sem);
+
+ /*
+ * health_check to return 0 on healthy
+ * and 1 on unhealthy.
+ */
+ if( rc != 0)
+ rc = 1;
+
+ return rc;
+}
+
struct ost_thread_local_cache *ost_tls(struct ptlrpc_request *r)
{
return (struct ost_thread_local_cache *)(r->rq_svc_thread->t_data);
.o_owner = THIS_MODULE,
.o_setup = ost_setup,
.o_cleanup = ost_cleanup,
+ .o_health_check = ost_health_check,
};
static int __init ost_init(void)
EXPORT_SYMBOL(ptlrpc_start_thread);
EXPORT_SYMBOL(ptlrpc_unregister_service);
EXPORT_SYMBOL(ptlrpc_daemonize);
+EXPORT_SYMBOL(ptlrpc_service_health_check);
/* pack_generic.c */
EXPORT_SYMBOL(lustre_msg_swabbed);
srv_interfaces[ptlrpc_ninterfaces]));
return 0;
}
+
+/* Returns 0 if the service is healthy.
+ *
+ * Right now, it just checks to make sure that requests aren't languishing
+ * in the queue. We'll use this health check to govern whether a node needs
+ * to be shot, so it's intentionally non-aggressive. */
+int ptlrpc_service_health_check(struct ptlrpc_service *svc)
+{
+ struct ptlrpc_request *request;
+ struct timeval right_now;
+ long timediff, cutoff;
+ unsigned long flags;
+ int rc;
+
+ if (svc == NULL)
+ return 0;
+
+ spin_lock_irqsave(&svc->srv_lock, flags);
+ if (list_empty(&svc->srv_request_queue)) {
+ rc = 0;
+ goto out;
+ }
+
+ request = list_entry(svc->srv_request_queue.next,
+ struct ptlrpc_request, rq_list);
+
+ do_gettimeofday(&right_now);
+ timediff = timeval_sub(&right_now, &request->rq_arrival_time);
+
+ cutoff = obd_health_check_timeout;
+
+ if (timediff / 1000000 > cutoff) {
+ rc = -1;
+ goto out;
+ }
+
+ out:
+ spin_unlock_irqrestore(&svc->srv_lock, flags);
+ return rc;
+}