From f3e49e66608a20ac1b006b5e4202ad8ad88c1979 Mon Sep 17 00:00:00 2001 From: Mikhail Pershin Date: Wed, 28 Jan 2015 02:25:04 +0300 Subject: [PATCH] LU-6175 ha: add health_check routine to the MDS, MGS and OSD Patch adds obd_health_check() methods in MDS and MGS to check ptlrpc services health like OST does. Patch adds also health_check() routine directly to OSD to check it is mounted and is not read-only. Signed-off-by: Mikhail Pershin Change-Id: Ib4af652b08e7e3616ebb3b99ce3e4ad03bdd5ab5 Reviewed-on: http://review.whamcloud.com/13558 Tested-by: Jenkins Reviewed-by: Jinshan Xiong Tested-by: Maloo Reviewed-by: John L. Hammond Reviewed-by: Niu Yawei Reviewed-by: Oleg Drokin --- lustre/mdt/mdt_mds.c | 43 ++++++++++++++++++++++++++++++++-------- lustre/mgs/mgs_handler.c | 16 +++++++++++++++ lustre/mgs/mgs_internal.h | 1 + lustre/osd-ldiskfs/osd_handler.c | 9 +++++++++ lustre/ost/ost_handler.c | 20 +++++++------------ 5 files changed, 68 insertions(+), 21 deletions(-) diff --git a/lustre/mdt/mdt_mds.c b/lustre/mdt/mdt_mds.c index 2f68264..0d6d416 100644 --- a/lustre/mdt/mdt_mds.c +++ b/lustre/mdt/mdt_mds.c @@ -56,14 +56,15 @@ struct mds_device { /* super-class */ - struct md_device mds_md_dev; - struct ptlrpc_service *mds_regular_service; - struct ptlrpc_service *mds_readpage_service; - struct ptlrpc_service *mds_out_service; - struct ptlrpc_service *mds_setattr_service; - struct ptlrpc_service *mds_mdsc_service; - struct ptlrpc_service *mds_mdss_service; - struct ptlrpc_service *mds_fld_service; + struct md_device mds_md_dev; + struct ptlrpc_service *mds_regular_service; + struct ptlrpc_service *mds_readpage_service; + struct ptlrpc_service *mds_out_service; + struct ptlrpc_service *mds_setattr_service; + struct ptlrpc_service *mds_mdsc_service; + struct ptlrpc_service *mds_mdss_service; + struct ptlrpc_service *mds_fld_service; + struct mutex mds_health_mutex; }; /* @@ -103,6 +104,8 @@ CFS_MODULE_PARM(mds_attr_num_cpts, "c", charp, 0444, static void mds_stop_ptlrpc_service(struct mds_device *m) { ENTRY; + + mutex_lock(&m->mds_health_mutex); if (m->mds_regular_service != NULL) { ptlrpc_unregister_service(m->mds_regular_service); m->mds_regular_service = NULL; @@ -131,6 +134,8 @@ static void mds_stop_ptlrpc_service(struct mds_device *m) ptlrpc_unregister_service(m->mds_fld_service); m->mds_fld_service = NULL; } + mutex_unlock(&m->mds_health_mutex); + EXIT; } @@ -502,6 +507,8 @@ static struct lu_device *mds_device_alloc(const struct lu_env *env, return l; } + mutex_init(&m->mds_health_mutex); + rc = mds_start_ptlrpc_service(m); if (rc != 0) { @@ -534,8 +541,28 @@ static struct lu_device_type mds_device_type = { .ldt_ctx_tags = LCT_MD_THREAD }; +static int mds_health_check(const struct lu_env *env, struct obd_device *obd) +{ + struct mds_device *mds = mds_dev(obd->obd_lu_dev); + int rc = 0; + + + mutex_lock(&mds->mds_health_mutex); + rc |= ptlrpc_service_health_check(mds->mds_regular_service); + rc |= ptlrpc_service_health_check(mds->mds_readpage_service); + rc |= ptlrpc_service_health_check(mds->mds_out_service); + rc |= ptlrpc_service_health_check(mds->mds_setattr_service); + rc |= ptlrpc_service_health_check(mds->mds_mdsc_service); + rc |= ptlrpc_service_health_check(mds->mds_mdss_service); + rc |= ptlrpc_service_health_check(mds->mds_fld_service); + mutex_unlock(&mds->mds_health_mutex); + + return rc != 0 ? 1 : 0; +} + static struct obd_ops mds_obd_device_ops = { .o_owner = THIS_MODULE, + .o_health_check = mds_health_check, }; int mds_mod_init(void) diff --git a/lustre/mgs/mgs_handler.c b/lustre/mgs/mgs_handler.c index 168ed8e..05c2aa5 100644 --- a/lustre/mgs/mgs_handler.c +++ b/lustre/mgs/mgs_handler.c @@ -1152,6 +1152,7 @@ static int mgs_init0(const struct lu_env *env, struct mgs_device *mgs, mutex_init(&mgs->mgs_mutex); mgs->mgs_start_time = cfs_time_current_sec(); spin_lock_init(&mgs->mgs_lock); + mutex_init(&mgs->mgs_health_mutex); rc = lproc_mgs_setup(mgs, lustre_cfg_string(lcfg, 3)); if (rc != 0) { @@ -1379,7 +1380,9 @@ static struct lu_device *mgs_device_fini(const struct lu_env *env, ping_evictor_stop(); + mutex_lock(&mgs->mgs_health_mutex); ptlrpc_unregister_service(mgs->mgs_service); + mutex_unlock(&mgs->mgs_health_mutex); obd_exports_barrier(obd); obd_zombie_barrier(); @@ -1517,6 +1520,18 @@ static int mgs_obd_disconnect(struct obd_export *exp) RETURN(rc); } +static int mgs_health_check(const struct lu_env *env, struct obd_device *obd) +{ + struct mgs_device *mgs = lu2mgs_dev(obd->obd_lu_dev); + int rc = 0; + + mutex_lock(&mgs->mgs_health_mutex); + rc |= ptlrpc_service_health_check(mgs->mgs_service); + mutex_unlock(&mgs->mgs_health_mutex); + + return rc != 0 ? 1 : 0; +} + /* use obd ops to offer management infrastructure */ static struct obd_ops mgs_obd_device_ops = { .o_owner = THIS_MODULE, @@ -1526,6 +1541,7 @@ static struct obd_ops mgs_obd_device_ops = { .o_init_export = mgs_init_export, .o_destroy_export = mgs_destroy_export, .o_iocontrol = mgs_iocontrol, + .o_health_check = mgs_health_check, }; static int __init mgs_init(void) diff --git a/lustre/mgs/mgs_internal.h b/lustre/mgs/mgs_internal.h index 4fdaee4..781cd2c 100644 --- a/lustre/mgs/mgs_internal.h +++ b/lustre/mgs/mgs_internal.h @@ -175,6 +175,7 @@ struct mgs_device { struct obd_device *mgs_obd; struct local_oid_storage *mgs_los; struct mutex mgs_mutex; + struct mutex mgs_health_mutex; struct lu_target mgs_lut; }; diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index ef1799b..0465afb 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -6415,6 +6415,14 @@ static struct lu_device_type osd_device_type = { .ldt_ctx_tags = LCT_LOCAL, }; +static int osd_health_check(const struct lu_env *env, struct obd_device *obd) +{ + struct osd_device *osd = osd_dev(obd->obd_lu_dev); + struct super_block *sb = osd_sb(osd); + + return (osd->od_mnt == NULL || sb->s_flags & MS_RDONLY); +} + /* * lprocfs legacy support. */ @@ -6423,6 +6431,7 @@ static struct obd_ops osd_obd_device_ops = { .o_connect = osd_obd_connect, .o_disconnect = osd_obd_disconnect, .o_fid_alloc = osd_fid_alloc, + .o_health_check = osd_health_check, }; static int __init osd_mod_init(void) diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index 99f86b4..e53b6ee 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -387,23 +387,17 @@ static int ost_cleanup(struct obd_device *obd) static int ost_health_check(const struct lu_env *env, struct obd_device *obd) { - struct ost_obd *ost = &obd->u.ost; - int rc = 0; + struct ost_obd *ost = &obd->u.ost; + int rc = 0; mutex_lock(&ost->ost_health_mutex); - rc |= ptlrpc_service_health_check(ost->ost_service); - rc |= ptlrpc_service_health_check(ost->ost_create_service); - rc |= ptlrpc_service_health_check(ost->ost_io_service); + rc |= ptlrpc_service_health_check(ost->ost_service); + rc |= ptlrpc_service_health_check(ost->ost_create_service); + rc |= ptlrpc_service_health_check(ost->ost_io_service); + rc |= ptlrpc_service_health_check(ost->ost_seq_service); mutex_unlock(&ost->ost_health_mutex); - /* - * health_check to return 0 on healthy - * and 1 on unhealthy. - */ - if( rc != 0) - rc = 1; - - return rc; + return rc != 0 ? 1 : 0; } /* use obd ops to offer management infrastructure */ -- 1.8.3.1