Whamcloud - gitweb
Branch: HEAD
authorwangdi <wangdi>
Thu, 3 Jul 2008 22:37:08 +0000 (22:37 +0000)
committerwangdi <wangdi>
Thu, 3 Jul 2008 22:37:08 +0000 (22:37 +0000)
sync procfs/llog_thread with import destory
b=15684
i=Robert, Jay

lustre/ChangeLog
lustre/include/lprocfs_status.h
lustre/include/obd.h
lustre/ldlm/ldlm_lib.c
lustre/mdc/mdc_request.c
lustre/mgc/mgc_request.c
lustre/obdclass/llog_obd.c
lustre/osc/osc_request.c
lustre/ptlrpc/llog_client.c
lustre/ptlrpc/llog_net.c
lustre/ptlrpc/recov_thread.c

index 2f2bda4..d281ffd 100644 (file)
@@ -1149,6 +1149,12 @@ Description: filter threads hungs on waiting journal commit
 Details    : Cleanup filter group llog code, then only filter group llog will
             be only created in the MDS/OST syncing process.
 
+Severity   : normal
+Bugzilla   : 15684 
+Description: Procfs and llog threads access destoryed import sometimes. 
+Details    : Sync the import destoryed process with procfs and llog threads by
+            the import refcount and semaphore.
+
 --------------------------------------------------------------------------------
 
 2007-08-10         Cluster File Systems, Inc. <info@clusterfs.com>
index 0f4d9b3..f6f0b40 100644 (file)
@@ -505,14 +505,14 @@ extern struct rw_semaphore _lprocfs_lock;
  * the import in a client obd_device for a lprocfs entry */
 #define LPROCFS_CLIMP_CHECK(obd) do {           \
         typecheck(struct obd_device *, obd);    \
-        mutex_down(&(obd)->u.cli.cl_sem);       \
+        down_read(&(obd)->u.cli.cl_sem);        \
         if ((obd)->u.cli.cl_import == NULL) {   \
-             mutex_up(&(obd)->u.cli.cl_sem);    \
+             up_read(&(obd)->u.cli.cl_sem);     \
              return -ENODEV;                    \
         }                                       \
 } while(0)
 #define LPROCFS_CLIMP_EXIT(obd)                 \
-        mutex_up(&(obd)->u.cli.cl_sem);
+        up_read(&(obd)->u.cli.cl_sem);
 
 
 /* write the name##_seq_show function, call LPROC_SEQ_FOPS_RO for read-only 
index 1995330..5836004 100644 (file)
@@ -386,7 +386,7 @@ struct mdc_rpc_lock;
 struct obd_import;
 struct lustre_cache;
 struct client_obd {
-        struct semaphore         cl_sem;
+        struct rw_semaphore      cl_sem;
         struct obd_uuid          cl_target_uuid;
         struct obd_import       *cl_import; /* ptlrpc connection state */
         int                      cl_conn_count;
index 8b0a493..2bb5e64 100644 (file)
@@ -244,7 +244,7 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
                 RETURN(-EINVAL);
         }
 
-        sema_init(&cli->cl_sem, 1);
+        init_rwsem(&cli->cl_sem);
         sema_init(&cli->cl_mgc_sem, 1);
         sptlrpc_rule_set_init(&cli->cl_sptlrpc_rset);
         cli->cl_sec_part = LUSTRE_SP_ANY;
@@ -382,7 +382,7 @@ int client_connect_import(const struct lu_env *env,
         int rc;
         ENTRY;
 
-        mutex_down(&cli->cl_sem);
+        down_write(&cli->cl_sem);
         rc = class_connect(dlm_handle, obd, cluuid);
         if (rc)
                 GOTO(out_sem, rc);
@@ -441,7 +441,7 @@ out_disco:
                 class_export_put(exp);
         }
 out_sem:
-        mutex_up(&cli->cl_sem);
+        up_write(&cli->cl_sem);
         if (to_be_freed)
                 ldlm_namespace_free_post(to_be_freed);
 
@@ -466,7 +466,7 @@ int client_disconnect_export(struct obd_export *exp)
         cli = &obd->u.cli;
         imp = cli->cl_import;
 
-        mutex_down(&cli->cl_sem);
+        down_write(&cli->cl_sem);
         if (!cli->cl_conn_count) {
                 CERROR("disconnecting disconnected device (%s)\n",
                        obd->obd_name);
@@ -516,7 +516,7 @@ int client_disconnect_export(struct obd_export *exp)
         if (!rc && err)
                 rc = err;
  out_sem:
-        mutex_up(&cli->cl_sem);
+        up_write(&cli->cl_sem);
         if (to_be_freed)
                 ldlm_namespace_free_post(to_be_freed);
 
index cf6d57a..14ddcc9 100644 (file)
@@ -1212,13 +1212,24 @@ static int mdc_statfs(struct obd_device *obd, struct obd_statfs *osfs,
 {
         struct ptlrpc_request *req;
         struct obd_statfs     *msfs;
+        struct obd_import     *imp = NULL;
         int                    rc;
         ENTRY;
 
-        req = ptlrpc_request_alloc_pack(obd->u.cli.cl_import, &RQF_MDS_STATFS,
+                        
+        /*Since the request might also come from lprocfs, so we need 
+         *sync this with client_disconnect_export Bug15684*/
+        down_read(&obd->u.cli.cl_sem);
+        if (obd->u.cli.cl_import)
+                imp = class_import_get(obd->u.cli.cl_import);
+        up_read(&obd->u.cli.cl_sem);
+        if (!imp)
+                RETURN(-ENODEV);
+        
+        req = ptlrpc_request_alloc_pack(imp, &RQF_MDS_STATFS,
                                         LUSTRE_MDS_VERSION, MDS_STATFS);
         if (req == NULL)
-                RETURN(-ENOMEM);
+                GOTO(output, rc = -ENOMEM);
 
         ptlrpc_request_set_replen(req);
 
@@ -1231,8 +1242,8 @@ static int mdc_statfs(struct obd_device *obd, struct obd_statfs *osfs,
         rc = ptlrpc_queue_wait(req);
         if (rc) {
                 /* check connection error first */
-                if (obd->u.cli.cl_import->imp_connect_error)
-                        rc = obd->u.cli.cl_import->imp_connect_error;
+                if (imp->imp_connect_error)
+                        rc = imp->imp_connect_error;
                 GOTO(out, rc);
         }
 
@@ -1244,6 +1255,8 @@ static int mdc_statfs(struct obd_device *obd, struct obd_statfs *osfs,
         EXIT;
 out:
         ptlrpc_req_finished(req);
+output:
+        class_import_put(imp);
         return rc;
 }
 
@@ -1584,11 +1597,12 @@ static int mdc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
                         class_destroy_import(imp);
                         obd->u.cli.cl_import = NULL;
                 }
-                break;
-        case OBD_CLEANUP_SELF_EXP:
                 rc = obd_llog_finish(obd, 0);
                 if (rc != 0)
                         CERROR("failed to cleanup llogging subsystems\n");
+                break;
+        case OBD_CLEANUP_SELF_EXP:
+                break;
         case OBD_CLEANUP_OBD:
                 break;
         }
index e8ddf27..1f0411f 100644 (file)
@@ -467,12 +467,12 @@ static int mgc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
                         spin_unlock(&config_list_lock);
                         cfs_waitq_signal(&rq_waitq);
                 }
-                break;
-        case OBD_CLEANUP_SELF_EXP:
                 rc = obd_llog_finish(obd, 0);
                 if (rc != 0)
                         CERROR("failed to cleanup llogging subsystems\n");
                 break;
+        case OBD_CLEANUP_SELF_EXP:
+                break;
         case OBD_CLEANUP_OBD:
                 break;
         }
index a26e336..cc27ba9 100644 (file)
@@ -56,6 +56,11 @@ static void llog_ctxt_destroy(struct llog_ctxt *ctxt)
 {
         if (ctxt->loc_exp)
                 class_export_put(ctxt->loc_exp);
+        if (ctxt->loc_imp) {
+                class_import_put(ctxt->loc_imp);
+                ctxt->loc_imp = NULL;
+        }
+
         OBD_FREE_PTR(ctxt);
         return;
 }
index 38bd034..f82849b 100644 (file)
@@ -3347,16 +3347,29 @@ static int osc_statfs(struct obd_device *obd, struct obd_statfs *osfs,
 {
         struct obd_statfs     *msfs;
         struct ptlrpc_request *req;
+        struct obd_import     *imp = NULL;
         int rc;
         ENTRY;
 
+        /*Since the request might also come from lprocfs, so we need 
+         *sync this with client_disconnect_export Bug15684*/
+        down_read(&obd->u.cli.cl_sem);
+        if (obd->u.cli.cl_import)
+                imp = class_import_get(obd->u.cli.cl_import);
+        up_read(&obd->u.cli.cl_sem);
+        if (!imp)
+                RETURN(-ENODEV);
+        
         /* We could possibly pass max_age in the request (as an absolute
          * timestamp or a "seconds.usec ago") so the target can avoid doing
          * extra calls into the filesystem if that isn't necessary (e.g.
          * during mount that would help a bit).  Having relative timestamps
          * is not so great if request processing is slow, while absolute
          * timestamps are not ideal because they need time synchronization. */
-        req = ptlrpc_request_alloc(obd->u.cli.cl_import, &RQF_OST_STATFS);
+        req = ptlrpc_request_alloc(imp, &RQF_OST_STATFS);
+        
+        class_import_put(imp);
+        
         if (req == NULL)
                 RETURN(-ENOMEM);
 
@@ -3966,13 +3979,13 @@ static int osc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
                         class_destroy_import(imp);
                         obd->u.cli.cl_import = NULL;
                 }
-                break;
-        }
-        case OBD_CLEANUP_SELF_EXP:
                 rc = obd_llog_finish(obd, 0);
                 if (rc != 0)
                         CERROR("failed to cleanup llogging subsystems\n");
                 break;
+        }
+        case OBD_CLEANUP_SELF_EXP:
+                break;
         case OBD_CLEANUP_OBD:
                 break;
         }
index 395d756..7e4b12d 100644 (file)
 #include <lustre_net.h>
 #include <libcfs/list.h>
 
+#define  LLOG_CLIENT_ENTRY(ctxt, imp) do {                            \
+        mutex_down(&ctxt->loc_sem);                                   \
+        if (ctxt->loc_imp) {                                          \
+                imp = class_import_get(ctxt->loc_imp);                \
+        } else {                                                      \
+                CERROR("ctxt->loc_imp == NULL for context idx %d."    \
+                       "Unable to complete MDS/OSS recovery,"         \
+                       "but I'll try again next time.  Not fatal.\n", \
+                       ctxt->loc_idx);                                \
+                imp = NULL;                                           \
+                mutex_up(&ctxt->loc_sem);                             \
+                return (-EINVAL);                                     \
+        }                                                             \
+        mutex_up(&ctxt->loc_sem);                                     \
+} while(0)
+
+#define  LLOG_CLIENT_EXIT(ctxt, imp) do {                  \
+        mutex_down(&ctxt->loc_sem);                        \
+        if (ctxt->loc_imp != imp)                          \
+                CWARN("loc_imp has changed from %p to %p", \
+                       ctxt->loc_imp, imp);                \
+        class_import_put(imp);                             \
+        mutex_up(&ctxt->loc_sem);                          \
+} while(0)
+
 /* This is a callback from the llog_* functions.
  * Assumes caller has already pushed us into the kernel context. */
 static int llog_client_create(struct llog_ctxt *ctxt, struct llog_handle **res,
@@ -55,14 +80,7 @@ static int llog_client_create(struct llog_ctxt *ctxt, struct llog_handle **res,
         int                    rc;
         ENTRY;
 
-        if (ctxt->loc_imp == NULL) {
-                /* This used to be an assert; bug 6200 */
-                CERROR("ctxt->loc_imp == NULL for context idx %d.  Unable to "
-                       "complete MDS/OSS recovery, but I'll try again next "
-                       "time.  Not fatal.\n", ctxt->loc_idx);
-                RETURN(-EINVAL);
-        }
-        imp = ctxt->loc_imp;
+        LLOG_CLIENT_ENTRY(ctxt, imp);
 
         handle = llog_alloc_handle();
         if (handle == NULL)
@@ -110,6 +128,7 @@ static int llog_client_create(struct llog_ctxt *ctxt, struct llog_handle **res,
         handle->lgh_ctxt = ctxt;
         EXIT;
 out:
+        LLOG_CLIENT_EXIT(ctxt, imp);
         ptlrpc_req_finished(req);
         return rc;
 err_free:
@@ -119,17 +138,18 @@ err_free:
 
 static int llog_client_destroy(struct llog_handle *loghandle)
 {
+        struct obd_import     *imp;
         struct ptlrpc_request *req = NULL;
         struct llogd_body     *body;
         int                    rc;
         ENTRY;
 
-        req = ptlrpc_request_alloc_pack(loghandle->lgh_ctxt->loc_imp,
-                                        &RQF_LLOG_ORIGIN_HANDLE_DESTROY,
+        LLOG_CLIENT_ENTRY(loghandle->lgh_ctxt, imp);
+        req = ptlrpc_request_alloc_pack(imp, &RQF_LLOG_ORIGIN_HANDLE_DESTROY,
                                         LUSTRE_LOG_VERSION,
                                         LLOG_ORIGIN_HANDLE_DESTROY);
         if (req == NULL)
-                RETURN(-ENOMEM);
+                GOTO(err_exit, rc =-ENOMEM);
 
         body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY);
         body->lgd_logid = loghandle->lgh_id;
@@ -139,6 +159,8 @@ static int llog_client_destroy(struct llog_handle *loghandle)
         rc = ptlrpc_queue_wait(req);
         
         ptlrpc_req_finished(req);
+err_exit:
+        LLOG_CLIENT_EXIT(loghandle->lgh_ctxt, imp);
         RETURN(rc);
 }
 
@@ -147,19 +169,20 @@ static int llog_client_next_block(struct llog_handle *loghandle,
                                   int *cur_idx, int next_idx,
                                   __u64 *cur_offset, void *buf, int len)
 {
+        struct obd_import     *imp;
         struct ptlrpc_request *req = NULL;
         struct llogd_body     *body;
         void                  *ptr;
         int                    rc;
         ENTRY;
 
-        req = ptlrpc_request_alloc_pack(loghandle->lgh_ctxt->loc_imp,
-                                        &RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK,
+        LLOG_CLIENT_ENTRY(loghandle->lgh_ctxt, imp);
+        req = ptlrpc_request_alloc_pack(imp, &RQF_LLOG_ORIGIN_HANDLE_NEXT_BLOCK,
                                         LUSTRE_LOG_VERSION,
                                         LLOG_ORIGIN_HANDLE_NEXT_BLOCK);
         if (req == NULL)
-                RETURN(-ENOMEM);
-
+                GOTO(err_exit, rc =-ENOMEM);
+                
         body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY);
         body->lgd_logid = loghandle->lgh_id;
         body->lgd_ctxt_idx = loghandle->lgh_ctxt->loc_idx - 1;
@@ -191,24 +214,27 @@ static int llog_client_next_block(struct llog_handle *loghandle,
         EXIT;
 out:
         ptlrpc_req_finished(req);
+err_exit:
+        LLOG_CLIENT_EXIT(loghandle->lgh_ctxt, imp);
         return rc;
 }
 
 static int llog_client_prev_block(struct llog_handle *loghandle,
                                   int prev_idx, void *buf, int len)
 {
+        struct obd_import     *imp;
         struct ptlrpc_request *req = NULL;
         struct llogd_body     *body;
         void                  *ptr;
         int                    rc;
         ENTRY;
 
-        req = ptlrpc_request_alloc_pack(loghandle->lgh_ctxt->loc_imp,
-                                        &RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK,
+        LLOG_CLIENT_ENTRY(loghandle->lgh_ctxt, imp);
+        req = ptlrpc_request_alloc_pack(imp, &RQF_LLOG_ORIGIN_HANDLE_PREV_BLOCK,
                                         LUSTRE_LOG_VERSION,
                                         LLOG_ORIGIN_HANDLE_PREV_BLOCK);
         if (req == NULL)
-                RETURN(-ENOMEM);
+                GOTO(err_exit, rc = -ENOMEM);
 
         body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY);
         body->lgd_logid = loghandle->lgh_id;
@@ -236,11 +262,14 @@ static int llog_client_prev_block(struct llog_handle *loghandle,
         EXIT;
 out:
         ptlrpc_req_finished(req);
+err_exit:
+        LLOG_CLIENT_EXIT(loghandle->lgh_ctxt, imp);
         return rc;
 }
 
 static int llog_client_read_header(struct llog_handle *handle)
 {
+        struct obd_import     *imp;
         struct ptlrpc_request *req = NULL;
         struct llogd_body     *body;
         struct llog_log_hdr   *hdr;
@@ -248,12 +277,12 @@ static int llog_client_read_header(struct llog_handle *handle)
         int                    rc;
         ENTRY;
 
-        req = ptlrpc_request_alloc_pack(handle->lgh_ctxt->loc_imp,
-                                        &RQF_LLOG_ORIGIN_HANDLE_READ_HEADER,
+        LLOG_CLIENT_ENTRY(handle->lgh_ctxt, imp);
+        req = ptlrpc_request_alloc_pack(imp,&RQF_LLOG_ORIGIN_HANDLE_READ_HEADER,
                                         LUSTRE_LOG_VERSION,
                                         LLOG_ORIGIN_HANDLE_READ_HEADER);
         if (req == NULL)
-                RETURN(-ENOMEM);
+                GOTO(err_exit, rc = -ENOMEM);
 
         body = req_capsule_client_get(&req->rq_pill, &RMF_LLOGD_BODY);
         body->lgd_logid = handle->lgh_id;
@@ -288,6 +317,8 @@ static int llog_client_read_header(struct llog_handle *handle)
         EXIT;
 out:
         ptlrpc_req_finished(req);
+err_exit:
+        LLOG_CLIENT_EXIT(handle->lgh_ctxt, imp);
         return rc;
 }
 
index 772f937..ccf615f 100644 (file)
@@ -122,10 +122,16 @@ EXPORT_SYMBOL(llog_handle_connect);
 int llog_receptor_accept(struct llog_ctxt *ctxt, struct obd_import *imp)
 {
         ENTRY;
+
         LASSERT(ctxt);
-        LASSERTF(ctxt->loc_imp == NULL || ctxt->loc_imp == imp,
-                 "%p - %p\n", ctxt->loc_imp, imp);
-        ctxt->loc_imp = imp;
+        mutex_down(&ctxt->loc_sem);
+        if (ctxt->loc_imp != imp) {
+                CWARN("changing the import %p - %p\n", ctxt->loc_imp, imp);
+                if (ctxt->loc_imp)
+                        class_import_put(ctxt->loc_imp);
+                ctxt->loc_imp = class_import_get(imp);
+        }
+        mutex_up(&ctxt->loc_sem);
         RETURN(0);
 }
 EXPORT_SYMBOL(llog_receptor_accept);
@@ -149,7 +155,13 @@ int llog_initiator_connect(struct llog_ctxt *ctxt)
         new_imp = ctxt->loc_obd->u.cli.cl_import;
         LASSERTF(ctxt->loc_imp == NULL || ctxt->loc_imp == new_imp,
                  "%p - %p\n", ctxt->loc_imp, new_imp);
-        ctxt->loc_imp = new_imp;
+        mutex_down(&ctxt->loc_sem);
+        if (ctxt->loc_imp != new_imp) {
+                if (ctxt->loc_imp)
+                        class_import_put(ctxt->loc_imp);
+                ctxt->loc_imp = class_import_get(new_imp);
+        }
+        mutex_up(&ctxt->loc_sem);
         RETURN(0);
 }
 EXPORT_SYMBOL(llog_initiator_connect);
index 1f21db5..5d002c8 100644 (file)
@@ -167,6 +167,7 @@ static void ctxt_llcd_put(struct llog_ctxt *ctxt)
                 llcd_put(ctxt->loc_llcd);
                 ctxt->loc_llcd = NULL;
         }
+        class_import_put(ctxt->loc_imp);
         ctxt->loc_imp = NULL;
         mutex_up(&ctxt->loc_sem);
 }