Whamcloud - gitweb
LU-6175 ha: add health_check routine to the MDS, MGS and OSD
[fs/lustre-release.git] / lustre / mdt / mdt_mds.c
index 64231f7..0d6d416 100644 (file)
 
 struct mds_device {
        /* super-class */
-       struct md_device           mds_md_dev;
-       struct ptlrpc_service     *mds_regular_service;
-       struct ptlrpc_service     *mds_readpage_service;
-       struct ptlrpc_service     *mds_out_service;
-       struct ptlrpc_service     *mds_setattr_service;
-       struct ptlrpc_service     *mds_mdsc_service;
-       struct ptlrpc_service     *mds_mdss_service;
-       struct ptlrpc_service     *mds_fld_service;
+       struct md_device         mds_md_dev;
+       struct ptlrpc_service   *mds_regular_service;
+       struct ptlrpc_service   *mds_readpage_service;
+       struct ptlrpc_service   *mds_out_service;
+       struct ptlrpc_service   *mds_setattr_service;
+       struct ptlrpc_service   *mds_mdsc_service;
+       struct ptlrpc_service   *mds_mdss_service;
+       struct ptlrpc_service   *mds_fld_service;
+       struct mutex             mds_health_mutex;
 };
 
 /*
@@ -103,6 +104,8 @@ CFS_MODULE_PARM(mds_attr_num_cpts, "c", charp, 0444,
 static void mds_stop_ptlrpc_service(struct mds_device *m)
 {
        ENTRY;
+
+       mutex_lock(&m->mds_health_mutex);
        if (m->mds_regular_service != NULL) {
                ptlrpc_unregister_service(m->mds_regular_service);
                m->mds_regular_service = NULL;
@@ -131,6 +134,8 @@ static void mds_stop_ptlrpc_service(struct mds_device *m)
                ptlrpc_unregister_service(m->mds_fld_service);
                m->mds_fld_service = NULL;
        }
+       mutex_unlock(&m->mds_health_mutex);
+
        EXIT;
 }
 
@@ -138,7 +143,7 @@ static int mds_start_ptlrpc_service(struct mds_device *m)
 {
        static struct ptlrpc_service_conf conf;
        struct obd_device *obd = m->mds_md_dev.md_lu_dev.ld_obd;
-       cfs_proc_dir_entry_t *procfs_entry;
+       struct proc_dir_entry *procfs_entry;
        int rc = 0;
        ENTRY;
 
@@ -288,7 +293,7 @@ static int mds_start_ptlrpc_service(struct mds_device *m)
                        .bc_req_max_size        = OUT_MAXREQSIZE,
                        .bc_rep_max_size        = OUT_MAXREPSIZE,
                        .bc_req_portal          = OUT_PORTAL,
-                       .bc_rep_portal          = MDC_REPLY_PORTAL,
+                       .bc_rep_portal          = OSC_REPLY_PORTAL,
                },
                /*
                 * We'd like to have a mechanism to set this on a per-device
@@ -302,7 +307,8 @@ static int mds_start_ptlrpc_service(struct mds_device *m)
                        .tc_nthrs_max           = MDS_NTHRS_MAX,
                        .tc_nthrs_user          = mds_num_threads,
                        .tc_cpu_affinity        = 1,
-                       .tc_ctx_tags            = LCT_MD_THREAD,
+                       .tc_ctx_tags            = LCT_MD_THREAD |
+                                                 LCT_DT_THREAD,
                },
                .psc_cpt                = {
                        .cc_pattern             = mds_num_cpts,
@@ -463,6 +469,13 @@ static struct lu_device *mds_device_free(const struct lu_env *env,
        RETURN(NULL);
 }
 
+LPROC_SEQ_FOPS_RO_TYPE(mds, uuid);
+
+static struct lprocfs_vars lprocfs_mds_obd_vars[] = {
+       { "uuid",       &mds_uuid_fops  },
+       { NULL }
+};
+
 static struct lu_device *mds_device_alloc(const struct lu_env *env,
                                          struct lu_device_type *t,
                                          struct lustre_cfg *cfg)
@@ -486,13 +499,16 @@ static struct lu_device *mds_device_alloc(const struct lu_env *env,
        /* set this lu_device to obd, because error handling need it */
        obd->obd_lu_dev = l;
 
-       rc = lprocfs_obd_setup(obd, lprocfs_mds_obd_vars);
+       obd->obd_vars = lprocfs_mds_obd_vars;
+       rc = lprocfs_obd_setup(obd);
        if (rc != 0) {
                mds_device_free(env, l);
                l = ERR_PTR(rc);
                return l;
        }
 
+       mutex_init(&m->mds_health_mutex);
+
        rc = mds_start_ptlrpc_service(m);
 
        if (rc != 0) {
@@ -525,8 +541,28 @@ static struct lu_device_type mds_device_type = {
        .ldt_ctx_tags = LCT_MD_THREAD
 };
 
+static int mds_health_check(const struct lu_env *env, struct obd_device *obd)
+{
+       struct mds_device *mds = mds_dev(obd->obd_lu_dev);
+       int rc = 0;
+
+
+       mutex_lock(&mds->mds_health_mutex);
+       rc |= ptlrpc_service_health_check(mds->mds_regular_service);
+       rc |= ptlrpc_service_health_check(mds->mds_readpage_service);
+       rc |= ptlrpc_service_health_check(mds->mds_out_service);
+       rc |= ptlrpc_service_health_check(mds->mds_setattr_service);
+       rc |= ptlrpc_service_health_check(mds->mds_mdsc_service);
+       rc |= ptlrpc_service_health_check(mds->mds_mdss_service);
+       rc |= ptlrpc_service_health_check(mds->mds_fld_service);
+       mutex_unlock(&mds->mds_health_mutex);
+
+       return rc != 0 ? 1 : 0;
+}
+
 static struct obd_ops mds_obd_device_ops = {
        .o_owner           = THIS_MODULE,
+       .o_health_check    = mds_health_check,
 };
 
 int mds_mod_init(void)
@@ -538,11 +574,8 @@ int mds_mod_init(void)
                mds_num_threads = mdt_num_threads;
        }
 
-       return class_register_type(&mds_obd_device_ops, NULL, NULL,
-#ifndef HAVE_ONLY_PROCFS_SEQ
-                                       lprocfs_mds_module_vars,
-#endif
-                                       LUSTRE_MDS_NAME, &mds_device_type);
+       return class_register_type(&mds_obd_device_ops, NULL, true, NULL,
+                                  LUSTRE_MDS_NAME, &mds_device_type);
 }
 
 void mds_mod_exit(void)