Whamcloud - gitweb
LU-6175 ha: add health_check routine to the MDS, MGS and OSD 58/13558/2
authorMikhail Pershin <mike.pershin@intel.com>
Tue, 27 Jan 2015 23:25:04 +0000 (02:25 +0300)
committerOleg Drokin <oleg.drokin@intel.com>
Sun, 8 Feb 2015 02:44:00 +0000 (02:44 +0000)
Patch adds obd_health_check() methods in MDS and MGS to check
ptlrpc services health like OST does. Patch adds also health_check()
routine directly to OSD to check it is mounted and is not read-only.

Signed-off-by: Mikhail Pershin <mike.pershin@intel.com>
Change-Id: Ib4af652b08e7e3616ebb3b99ce3e4ad03bdd5ab5
Reviewed-on: http://review.whamcloud.com/13558
Tested-by: Jenkins
Reviewed-by: Jinshan Xiong <jinshan.xiong@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: John L. Hammond <john.hammond@intel.com>
Reviewed-by: Niu Yawei <yawei.niu@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/mdt/mdt_mds.c
lustre/mgs/mgs_handler.c
lustre/mgs/mgs_internal.h
lustre/osd-ldiskfs/osd_handler.c
lustre/ost/ost_handler.c

index 2f68264..0d6d416 100644 (file)
 
 struct mds_device {
        /* super-class */
 
 struct mds_device {
        /* super-class */
-       struct md_device           mds_md_dev;
-       struct ptlrpc_service     *mds_regular_service;
-       struct ptlrpc_service     *mds_readpage_service;
-       struct ptlrpc_service     *mds_out_service;
-       struct ptlrpc_service     *mds_setattr_service;
-       struct ptlrpc_service     *mds_mdsc_service;
-       struct ptlrpc_service     *mds_mdss_service;
-       struct ptlrpc_service     *mds_fld_service;
+       struct md_device         mds_md_dev;
+       struct ptlrpc_service   *mds_regular_service;
+       struct ptlrpc_service   *mds_readpage_service;
+       struct ptlrpc_service   *mds_out_service;
+       struct ptlrpc_service   *mds_setattr_service;
+       struct ptlrpc_service   *mds_mdsc_service;
+       struct ptlrpc_service   *mds_mdss_service;
+       struct ptlrpc_service   *mds_fld_service;
+       struct mutex             mds_health_mutex;
 };
 
 /*
 };
 
 /*
@@ -103,6 +104,8 @@ CFS_MODULE_PARM(mds_attr_num_cpts, "c", charp, 0444,
 static void mds_stop_ptlrpc_service(struct mds_device *m)
 {
        ENTRY;
 static void mds_stop_ptlrpc_service(struct mds_device *m)
 {
        ENTRY;
+
+       mutex_lock(&m->mds_health_mutex);
        if (m->mds_regular_service != NULL) {
                ptlrpc_unregister_service(m->mds_regular_service);
                m->mds_regular_service = NULL;
        if (m->mds_regular_service != NULL) {
                ptlrpc_unregister_service(m->mds_regular_service);
                m->mds_regular_service = NULL;
@@ -131,6 +134,8 @@ static void mds_stop_ptlrpc_service(struct mds_device *m)
                ptlrpc_unregister_service(m->mds_fld_service);
                m->mds_fld_service = NULL;
        }
                ptlrpc_unregister_service(m->mds_fld_service);
                m->mds_fld_service = NULL;
        }
+       mutex_unlock(&m->mds_health_mutex);
+
        EXIT;
 }
 
        EXIT;
 }
 
@@ -502,6 +507,8 @@ static struct lu_device *mds_device_alloc(const struct lu_env *env,
                return l;
        }
 
                return l;
        }
 
+       mutex_init(&m->mds_health_mutex);
+
        rc = mds_start_ptlrpc_service(m);
 
        if (rc != 0) {
        rc = mds_start_ptlrpc_service(m);
 
        if (rc != 0) {
@@ -534,8 +541,28 @@ static struct lu_device_type mds_device_type = {
        .ldt_ctx_tags = LCT_MD_THREAD
 };
 
        .ldt_ctx_tags = LCT_MD_THREAD
 };
 
+static int mds_health_check(const struct lu_env *env, struct obd_device *obd)
+{
+       struct mds_device *mds = mds_dev(obd->obd_lu_dev);
+       int rc = 0;
+
+
+       mutex_lock(&mds->mds_health_mutex);
+       rc |= ptlrpc_service_health_check(mds->mds_regular_service);
+       rc |= ptlrpc_service_health_check(mds->mds_readpage_service);
+       rc |= ptlrpc_service_health_check(mds->mds_out_service);
+       rc |= ptlrpc_service_health_check(mds->mds_setattr_service);
+       rc |= ptlrpc_service_health_check(mds->mds_mdsc_service);
+       rc |= ptlrpc_service_health_check(mds->mds_mdss_service);
+       rc |= ptlrpc_service_health_check(mds->mds_fld_service);
+       mutex_unlock(&mds->mds_health_mutex);
+
+       return rc != 0 ? 1 : 0;
+}
+
 static struct obd_ops mds_obd_device_ops = {
        .o_owner           = THIS_MODULE,
 static struct obd_ops mds_obd_device_ops = {
        .o_owner           = THIS_MODULE,
+       .o_health_check    = mds_health_check,
 };
 
 int mds_mod_init(void)
 };
 
 int mds_mod_init(void)
index 168ed8e..05c2aa5 100644 (file)
@@ -1152,6 +1152,7 @@ static int mgs_init0(const struct lu_env *env, struct mgs_device *mgs,
        mutex_init(&mgs->mgs_mutex);
        mgs->mgs_start_time = cfs_time_current_sec();
        spin_lock_init(&mgs->mgs_lock);
        mutex_init(&mgs->mgs_mutex);
        mgs->mgs_start_time = cfs_time_current_sec();
        spin_lock_init(&mgs->mgs_lock);
+       mutex_init(&mgs->mgs_health_mutex);
 
        rc = lproc_mgs_setup(mgs, lustre_cfg_string(lcfg, 3));
        if (rc != 0) {
 
        rc = lproc_mgs_setup(mgs, lustre_cfg_string(lcfg, 3));
        if (rc != 0) {
@@ -1379,7 +1380,9 @@ static struct lu_device *mgs_device_fini(const struct lu_env *env,
 
        ping_evictor_stop();
 
 
        ping_evictor_stop();
 
+       mutex_lock(&mgs->mgs_health_mutex);
        ptlrpc_unregister_service(mgs->mgs_service);
        ptlrpc_unregister_service(mgs->mgs_service);
+       mutex_unlock(&mgs->mgs_health_mutex);
 
        obd_exports_barrier(obd);
        obd_zombie_barrier();
 
        obd_exports_barrier(obd);
        obd_zombie_barrier();
@@ -1517,6 +1520,18 @@ static int mgs_obd_disconnect(struct obd_export *exp)
        RETURN(rc);
 }
 
        RETURN(rc);
 }
 
+static int mgs_health_check(const struct lu_env *env, struct obd_device *obd)
+{
+       struct mgs_device *mgs = lu2mgs_dev(obd->obd_lu_dev);
+       int rc = 0;
+
+       mutex_lock(&mgs->mgs_health_mutex);
+       rc |= ptlrpc_service_health_check(mgs->mgs_service);
+       mutex_unlock(&mgs->mgs_health_mutex);
+
+       return rc != 0 ? 1 : 0;
+}
+
 /* use obd ops to offer management infrastructure */
 static struct obd_ops mgs_obd_device_ops = {
        .o_owner                = THIS_MODULE,
 /* use obd ops to offer management infrastructure */
 static struct obd_ops mgs_obd_device_ops = {
        .o_owner                = THIS_MODULE,
@@ -1526,6 +1541,7 @@ static struct obd_ops mgs_obd_device_ops = {
        .o_init_export          = mgs_init_export,
        .o_destroy_export       = mgs_destroy_export,
        .o_iocontrol            = mgs_iocontrol,
        .o_init_export          = mgs_init_export,
        .o_destroy_export       = mgs_destroy_export,
        .o_iocontrol            = mgs_iocontrol,
+       .o_health_check         = mgs_health_check,
 };
 
 static int __init mgs_init(void)
 };
 
 static int __init mgs_init(void)
index 4fdaee4..781cd2c 100644 (file)
@@ -175,6 +175,7 @@ struct mgs_device {
        struct obd_device               *mgs_obd;
        struct local_oid_storage        *mgs_los;
        struct mutex                     mgs_mutex;
        struct obd_device               *mgs_obd;
        struct local_oid_storage        *mgs_los;
        struct mutex                     mgs_mutex;
+       struct mutex                     mgs_health_mutex;
        struct lu_target                 mgs_lut;
 };
 
        struct lu_target                 mgs_lut;
 };
 
index ef1799b..0465afb 100644 (file)
@@ -6415,6 +6415,14 @@ static struct lu_device_type osd_device_type = {
         .ldt_ctx_tags = LCT_LOCAL,
 };
 
         .ldt_ctx_tags = LCT_LOCAL,
 };
 
+static int osd_health_check(const struct lu_env *env, struct obd_device *obd)
+{
+       struct osd_device *osd = osd_dev(obd->obd_lu_dev);
+       struct super_block *sb = osd_sb(osd);
+
+       return (osd->od_mnt == NULL || sb->s_flags & MS_RDONLY);
+}
+
 /*
  * lprocfs legacy support.
  */
 /*
  * lprocfs legacy support.
  */
@@ -6423,6 +6431,7 @@ static struct obd_ops osd_obd_device_ops = {
        .o_connect      = osd_obd_connect,
        .o_disconnect   = osd_obd_disconnect,
        .o_fid_alloc    = osd_fid_alloc,
        .o_connect      = osd_obd_connect,
        .o_disconnect   = osd_obd_disconnect,
        .o_fid_alloc    = osd_fid_alloc,
+       .o_health_check = osd_health_check,
 };
 
 static int __init osd_mod_init(void)
 };
 
 static int __init osd_mod_init(void)
index 99f86b4..e53b6ee 100644 (file)
@@ -387,23 +387,17 @@ static int ost_cleanup(struct obd_device *obd)
 
 static int ost_health_check(const struct lu_env *env, struct obd_device *obd)
 {
 
 static int ost_health_check(const struct lu_env *env, struct obd_device *obd)
 {
-        struct ost_obd *ost = &obd->u.ost;
-        int rc = 0;
+       struct ost_obd *ost = &obd->u.ost;
+       int rc = 0;
 
        mutex_lock(&ost->ost_health_mutex);
 
        mutex_lock(&ost->ost_health_mutex);
-        rc |= ptlrpc_service_health_check(ost->ost_service);
-        rc |= ptlrpc_service_health_check(ost->ost_create_service);
-        rc |= ptlrpc_service_health_check(ost->ost_io_service);
+       rc |= ptlrpc_service_health_check(ost->ost_service);
+       rc |= ptlrpc_service_health_check(ost->ost_create_service);
+       rc |= ptlrpc_service_health_check(ost->ost_io_service);
+       rc |= ptlrpc_service_health_check(ost->ost_seq_service);
        mutex_unlock(&ost->ost_health_mutex);
 
        mutex_unlock(&ost->ost_health_mutex);
 
-        /*
-         * health_check to return 0 on healthy
-         * and 1 on unhealthy.
-         */
-        if( rc != 0)
-                rc = 1;
-
-        return rc;
+       return rc != 0 ? 1 : 0;
 }
 
 /* use obd ops to offer management infrastructure */
 }
 
 /* use obd ops to offer management infrastructure */