Whamcloud - gitweb
Branch: b1_8
authorwangdi <wangdi>
Fri, 4 Jul 2008 21:11:18 +0000 (21:11 +0000)
committerwangdi <wangdi>
Fri, 4 Jul 2008 21:11:18 +0000 (21:11 +0000)
sync import destory with llog_thread and lprocfs access
b=15684
i=Robert, Jay

12 files changed:
lustre/ChangeLog
lustre/include/lprocfs_status.h
lustre/include/obd.h
lustre/ldlm/ldlm_lib.c
lustre/mdc/mdc_request.c
lustre/mgc/libmgc.c
lustre/mgc/mgc_request.c
lustre/obdclass/llog_obd.c
lustre/osc/osc_request.c
lustre/ptlrpc/llog_client.c
lustre/ptlrpc/llog_net.c
lustre/ptlrpc/recov_thread.c

index 732402a..5003421 100644 (file)
@@ -244,6 +244,12 @@ Details    : The direct IO path doesn't call check_rpcs to submit a new RPC once
             one is completed. As a result, some RPCs are stuck in the queue
             and are never sent.
 
+Severity   : normal
+Bugzilla   : 15684
+Description: Procfs and llog threads access destoryed import sometimes. 
+Details    : Sync the import destoryed process with procfs and llog threads by
+            the import refcount and semaphore.
+
 -------------------------------------------------------------------------------
 
 
index 842576f..688fbdc 100644 (file)
@@ -493,14 +493,14 @@ extern struct rw_semaphore _lprocfs_lock;
  * the import in a client obd_device for a lprocfs entry */
 #define LPROCFS_CLIMP_CHECK(obd) do {           \
         typecheck(struct obd_device *, obd);    \
-        mutex_down(&(obd)->u.cli.cl_sem);       \
+        down_read(&(obd)->u.cli.cl_sem);        \
         if ((obd)->u.cli.cl_import == NULL) {   \
-             mutex_up(&(obd)->u.cli.cl_sem);    \
+             up_read(&(obd)->u.cli.cl_sem);     \
              return -ENODEV;                    \
         }                                       \
 } while(0)
 #define LPROCFS_CLIMP_EXIT(obd)                 \
-        mutex_up(&(obd)->u.cli.cl_sem);
+        up_read(&(obd)->u.cli.cl_sem);
 
 
 /* write the name##_seq_show function, call LPROC_SEQ_FOPS_RO for read-only 
index 5f8cf4d..7267fba 100644 (file)
@@ -337,7 +337,7 @@ struct mdc_rpc_lock;
 struct obd_import;
 struct lustre_cache;
 struct client_obd {
-        struct semaphore         cl_sem;
+        struct rw_semaphore      cl_sem;
         struct obd_uuid          cl_target_uuid;
         struct obd_import       *cl_import; /* ptlrpc connection state */
         int                      cl_conn_count;
index 648cd0e..c5293b3 100644 (file)
@@ -239,7 +239,7 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf)
                 RETURN(-EINVAL);
         }
 
-        sema_init(&cli->cl_sem, 1);
+        init_rwsem(&cli->cl_sem);
         sema_init(&cli->cl_mgc_sem, 1);
         cli->cl_conn_count = 0;
         memcpy(server_uuid.uuid, lustre_cfg_buf(lcfg, 2),
@@ -371,7 +371,7 @@ int client_connect_import(struct lustre_handle *dlm_handle,
         int rc;
         ENTRY;
 
-        mutex_down(&cli->cl_sem);
+        down_write(&cli->cl_sem);
         rc = class_connect(dlm_handle, obd, cluuid);
         if (rc)
                 GOTO(out_sem, rc);
@@ -428,7 +428,7 @@ out_disco:
                 class_export_put(exp);
         }
 out_sem:
-        mutex_up(&cli->cl_sem);
+        up_write(&cli->cl_sem);
         if (to_be_freed)
                 ldlm_namespace_free_post(to_be_freed);
         return rc;
@@ -452,7 +452,7 @@ int client_disconnect_export(struct obd_export *exp)
         cli = &obd->u.cli;
         imp = cli->cl_import;
 
-        mutex_down(&cli->cl_sem);
+        down_write(&cli->cl_sem);
         if (!cli->cl_conn_count) {
                 CERROR("disconnecting disconnected device (%s)\n",
                        obd->obd_name);
@@ -503,7 +503,7 @@ int client_disconnect_export(struct obd_export *exp)
         if (!rc && err)
                 rc = err;
  out_sem:
-        mutex_up(&cli->cl_sem);
+        up_write(&cli->cl_sem);
         if (to_be_freed)
                 ldlm_namespace_free_post(to_be_freed);
         RETURN(rc);
index 0073231..51cc8bc 100644 (file)
@@ -982,19 +982,30 @@ static int mdc_statfs(struct obd_device *obd, struct obd_statfs *osfs,
 {
         struct ptlrpc_request *req;
         struct obd_statfs *msfs;
+        struct obd_import     *imp = NULL;
         int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*msfs) };
         ENTRY;
 
+        /*Since the request might also come from lprocfs, so we need 
+         *sync this with client_disconnect_export Bug15684*/
+        down_read(&obd->u.cli.cl_sem);
+        if (obd->u.cli.cl_import)
+                imp = class_import_get(obd->u.cli.cl_import);
+        up_read(&obd->u.cli.cl_sem);
+        if (!imp)
+                RETURN(-ENODEV);
+        
+
         /* We could possibly pass max_age in the request (as an absolute
          * timestamp or a "seconds.usec ago") so the target can avoid doing
          * extra calls into the filesystem if that isn't necessary (e.g.
          * during mount that would help a bit).  Having relative timestamps
          * is not so great if request processing is slow, while absolute
          * timestamps are not ideal because they need time synchronization. */
-        req = ptlrpc_prep_req(obd->u.cli.cl_import, LUSTRE_MDS_VERSION,
-                              MDS_STATFS, 1, NULL, NULL);
+        req = ptlrpc_prep_req(imp, LUSTRE_MDS_VERSION, MDS_STATFS, 1, NULL, 
+                              NULL);
         if (!req)
-                RETURN(-ENOMEM);
+                GOTO(output, rc = -ENOMEM);
 
         ptlrpc_req_set_repsize(req, 2, size);
 
@@ -1020,7 +1031,8 @@ static int mdc_statfs(struct obd_device *obd, struct obd_statfs *osfs,
         EXIT;
 out:
         ptlrpc_req_finished(req);
-
+output:
+        class_import_put(imp);
         return rc;
 }
 
@@ -1281,11 +1293,12 @@ static int mdc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
                         class_destroy_import(imp);
                         obd->u.cli.cl_import = NULL;
                 }
-                break;
-        case OBD_CLEANUP_SELF_EXP:
                 rc = obd_llog_finish(obd, 0);
                 if (rc != 0)
                         CERROR("failed to cleanup llogging subsystems\n");
+                break;
+        case OBD_CLEANUP_SELF_EXP:
+                break;
         case OBD_CLEANUP_OBD:
                 break;
         }
@@ -1320,7 +1333,7 @@ static int mdc_llog_init(struct obd_device *obd, struct obd_device *tgt,
                         &llog_client_ops);
         if (rc == 0) {
                 ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT);
-                ctxt->loc_imp = obd->u.cli.cl_import;
+                llog_initiator_connect(ctxt);
                 llog_ctxt_put(ctxt);
         }
 
@@ -1328,7 +1341,7 @@ static int mdc_llog_init(struct obd_device *obd, struct obd_device *tgt,
                        &llog_client_ops);
         if (rc == 0) {
                 ctxt = llog_get_context(obd, LLOG_LOVEA_REPL_CTXT);
-                ctxt->loc_imp = obd->u.cli.cl_import;
+                llog_initiator_connect(ctxt);
                 llog_ctxt_put(ctxt);
         }
 
index b72e8bb..c8fd0d5 100644 (file)
@@ -111,7 +111,7 @@ static int mgc_llog_init(struct obd_device *obd, struct obd_device *tgt,
                         &llog_client_ops);
         if (rc == 0) {
                 ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT);
-                ctxt->loc_imp = obd->u.cli.cl_import;
+                llog_initiator_connect(ctxt);
                 llog_ctxt_put(ctxt);
         }
 
index 9b06f72..660229d 100644 (file)
@@ -469,12 +469,12 @@ static int mgc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
                         spin_unlock(&config_list_lock);
                         cfs_waitq_signal(&rq_waitq);
                 }
-                break;
-        case OBD_CLEANUP_SELF_EXP:
                 rc = obd_llog_finish(obd, 0);
                 if (rc != 0)
                         CERROR("failed to cleanup llogging subsystems\n");
                 break;
+        case OBD_CLEANUP_SELF_EXP:
+                break;
         case OBD_CLEANUP_OBD:
                 break;
         }
@@ -921,7 +921,7 @@ static int mgc_llog_init(struct obd_device *obd, struct obd_device *tgt,
                         &llog_client_ops);
         if (rc == 0) {
                 ctxt = llog_get_context(obd, LLOG_CONFIG_REPL_CTXT);
-                ctxt->loc_imp = obd->u.cli.cl_import;
+                llog_initiator_connect(ctxt);
                 llog_ctxt_put(ctxt);
         }
 
index c9e9a95..a49c621 100644 (file)
@@ -56,6 +56,10 @@ static void llog_ctxt_destroy(struct llog_ctxt *ctxt)
 {
         if (ctxt->loc_exp)
                 class_export_put(ctxt->loc_exp);
+        if (ctxt->loc_imp) {
+                class_import_put(ctxt->loc_imp);
+                ctxt->loc_imp = NULL;
+        }
         OBD_FREE(ctxt, sizeof(*ctxt));
         return;
 }
index 9c5a2db..a4f7394 100644 (file)
@@ -3236,17 +3236,29 @@ static int osc_statfs(struct obd_device *obd, struct obd_statfs *osfs,
 {
         struct obd_statfs *msfs;
         struct ptlrpc_request *req;
+        struct obd_import     *imp = NULL;
         int rc, size[2] = { sizeof(struct ptlrpc_body), sizeof(*osfs) };
         ENTRY;
 
+        /*Since the request might also come from lprocfs, so we need 
+         *sync this with client_disconnect_export Bug15684*/
+        down_read(&obd->u.cli.cl_sem);
+        if (obd->u.cli.cl_import)
+                imp = class_import_get(obd->u.cli.cl_import);
+        up_read(&obd->u.cli.cl_sem);
+        if (!imp)
+                RETURN(-ENODEV);
+  
         /* We could possibly pass max_age in the request (as an absolute
          * timestamp or a "seconds.usec ago") so the target can avoid doing
          * extra calls into the filesystem if that isn't necessary (e.g.
          * during mount that would help a bit).  Having relative timestamps
          * is not so great if request processing is slow, while absolute
          * timestamps are not ideal because they need time synchronization. */
-        req = ptlrpc_prep_req(obd->u.cli.cl_import, LUSTRE_OST_VERSION,
+        req = ptlrpc_prep_req(imp, LUSTRE_OST_VERSION,
                               OST_STATFS, 1, NULL, NULL);
+
+        class_import_put(imp);
         if (!req)
                 RETURN(-ENOMEM);
 
@@ -3742,6 +3754,7 @@ static int osc_import_event(struct obd_device *obd,
                         oscc->oscc_flags &= ~OSCC_FLAG_NOSPC;
                         spin_unlock(&oscc->oscc_lock);
                 }
+                CDEBUG(D_INFO, "notify server \n");
                 rc = obd_notify_observer(obd, obd, OBD_NOTIFY_ACTIVE, NULL);
                 break;
         }
@@ -3836,13 +3849,13 @@ static int osc_precleanup(struct obd_device *obd, enum obd_cleanup_stage stage)
                         class_destroy_import(imp);
                         obd->u.cli.cl_import = NULL;
                 }
-                break;
-        }
-        case OBD_CLEANUP_SELF_EXP:
                 rc = obd_llog_finish(obd, 0);
                 if (rc != 0)
                         CERROR("failed to cleanup llogging subsystems\n");
                 break;
+        }
+        case OBD_CLEANUP_SELF_EXP:
+                break;
         case OBD_CLEANUP_OBD:
                 break;
         }
index 27578a4..ff5fddd 100644 (file)
 #include <lustre_net.h>
 #include <libcfs/list.h>
 
+#define  LLOG_CLIENT_ENTRY(ctxt, imp) do {                            \
+        mutex_down(&ctxt->loc_sem);                                   \
+        if (ctxt->loc_imp) {                                          \
+                imp = class_import_get(ctxt->loc_imp);                \
+        } else {                                                      \
+                CERROR("ctxt->loc_imp == NULL for context idx %d."    \
+                       "Unable to complete MDS/OSS recovery,"         \
+                       "but I'll try again next time.  Not fatal.\n", \
+                       ctxt->loc_idx);                                \
+                imp = NULL;                                           \
+                mutex_up(&ctxt->loc_sem);                             \
+                return (-EINVAL);                                     \
+        }                                                             \
+        mutex_up(&ctxt->loc_sem);                                     \
+} while(0)
+
+#define  LLOG_CLIENT_EXIT(ctxt, imp) do {                  \
+        mutex_down(&ctxt->loc_sem);                        \
+        if (ctxt->loc_imp != imp)                          \
+                CWARN("loc_imp has changed from %p to %p", \
+                       ctxt->loc_imp, imp);                \
+        class_import_put(imp);                             \
+        mutex_up(&ctxt->loc_sem);                          \
+} while(0)
+
 /* This is a callback from the llog_* functions.
  * Assumes caller has already pushed us into the kernel context. */
 static int llog_client_create(struct llog_ctxt *ctxt, struct llog_handle **res,
@@ -59,18 +84,11 @@ static int llog_client_create(struct llog_ctxt *ctxt, struct llog_handle **res,
         int rc;
         ENTRY;
 
-        if (ctxt->loc_imp == NULL) {
-                /* This used to be an assert; bug 6200 */
-                CERROR("ctxt->loc_imp == NULL for context idx %d.  Unable to "
-                       "complete MDS/OSS recovery, but I'll try again next "
-                       "time.  Not fatal.\n", ctxt->loc_idx);
-                RETURN(-EINVAL);
-        }
-        imp = ctxt->loc_imp;
+        LLOG_CLIENT_ENTRY(ctxt, imp);
 
         handle = llog_alloc_handle();
         if (handle == NULL)
-                RETURN(-ENOMEM);
+                GOTO(out, rc = -ENOMEM);
         *res = handle;
 
         memset(&req_body, 0, sizeof(req_body));
@@ -107,6 +125,7 @@ static int llog_client_create(struct llog_ctxt *ctxt, struct llog_handle **res,
 out:
         if (req)
                 ptlrpc_req_finished(req);
+        LLOG_CLIENT_EXIT(ctxt, imp);
         RETURN(rc);
 
 err_free:
@@ -116,17 +135,18 @@ err_free:
 
 static int llog_client_destroy(struct llog_handle *loghandle)
 {
-        struct obd_import *imp = loghandle->lgh_ctxt->loc_imp;
+        struct obd_import     *imp;
         struct ptlrpc_request *req = NULL;
         struct llogd_body *body;
         int size[] = { sizeof(struct ptlrpc_body), sizeof(*body) };
         int rc;
         ENTRY;
 
+        LLOG_CLIENT_ENTRY(loghandle->lgh_ctxt, imp);
         req = ptlrpc_prep_req(imp, LUSTRE_LOG_VERSION, 
                               LLOG_ORIGIN_HANDLE_DESTROY, 2, size, NULL);
         if (!req)
-                RETURN(-ENOMEM);
+                GOTO(out, rc = -ENOMEM);
 
         body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
         body->lgd_logid = loghandle->lgh_id;
@@ -136,6 +156,8 @@ static int llog_client_destroy(struct llog_handle *loghandle)
         rc = ptlrpc_queue_wait(req);
         
         ptlrpc_req_finished(req);
+out:
+        LLOG_CLIENT_EXIT(loghandle->lgh_ctxt, imp);
         RETURN(rc);
 }
 
@@ -144,7 +166,7 @@ static int llog_client_next_block(struct llog_handle *loghandle,
                                   int *cur_idx, int next_idx,
                                   __u64 *cur_offset, void *buf, int len)
 {
-        struct obd_import *imp = loghandle->lgh_ctxt->loc_imp;
+        struct obd_import *imp;
         struct ptlrpc_request *req = NULL;
         struct llogd_body *body;
         void * ptr;
@@ -152,10 +174,11 @@ static int llog_client_next_block(struct llog_handle *loghandle,
         int rc;
         ENTRY;
 
+        LLOG_CLIENT_ENTRY(loghandle->lgh_ctxt, imp);
         req = ptlrpc_prep_req(imp, LUSTRE_LOG_VERSION,
                               LLOG_ORIGIN_HANDLE_NEXT_BLOCK, 2, size, NULL);
         if (!req)
-                GOTO(out, rc = -ENOMEM);
+                GOTO(out, rc =-ENOMEM);
 
         body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
         body->lgd_logid = loghandle->lgh_id;
@@ -194,13 +217,14 @@ static int llog_client_next_block(struct llog_handle *loghandle,
 out:
         if (req)
                 ptlrpc_req_finished(req);
+        LLOG_CLIENT_EXIT(loghandle->lgh_ctxt, imp);
         RETURN(rc);
 }
 
 static int llog_client_prev_block(struct llog_handle *loghandle,
                                   int prev_idx, void *buf, int len)
 {
-        struct obd_import *imp = loghandle->lgh_ctxt->loc_imp;
+        struct obd_import *imp;
         struct ptlrpc_request *req = NULL;
         struct llogd_body *body;
         void * ptr;
@@ -208,6 +232,7 @@ static int llog_client_prev_block(struct llog_handle *loghandle,
         int rc;
         ENTRY;
 
+        LLOG_CLIENT_ENTRY(loghandle->lgh_ctxt, imp);
         req = ptlrpc_prep_req(imp, LUSTRE_LOG_VERSION,
                               LLOG_ORIGIN_HANDLE_PREV_BLOCK, 2, size, NULL);
         if (!req)
@@ -244,12 +269,13 @@ static int llog_client_prev_block(struct llog_handle *loghandle,
 out:
         if (req)
                 ptlrpc_req_finished(req);
+        LLOG_CLIENT_EXIT(loghandle->lgh_ctxt, imp);
         RETURN(rc);
 }
 
 static int llog_client_read_header(struct llog_handle *handle)
 {
-        struct obd_import *imp = handle->lgh_ctxt->loc_imp;
+        struct obd_import *imp;
         struct ptlrpc_request *req = NULL;
         struct llogd_body *body;
         struct llog_log_hdr *hdr;
@@ -259,6 +285,7 @@ static int llog_client_read_header(struct llog_handle *handle)
         int rc;
         ENTRY;
 
+        LLOG_CLIENT_ENTRY(handle->lgh_ctxt, imp);
         req = ptlrpc_prep_req(imp, LUSTRE_LOG_VERSION,
                               LLOG_ORIGIN_HANDLE_READ_HEADER, 2, size, NULL);
         if (!req)
@@ -301,6 +328,7 @@ static int llog_client_read_header(struct llog_handle *handle)
 out:
         if (req)
                 ptlrpc_req_finished(req);
+        LLOG_CLIENT_EXIT(handle->lgh_ctxt, imp);
         RETURN(rc);
 }
 
index 3779ed5..92b1418 100644 (file)
@@ -147,16 +147,31 @@ int llog_receptor_accept(struct llog_ctxt *ctxt, struct obd_import *imp)
 {
         ENTRY;
         LASSERT(ctxt);
-        ctxt->loc_imp = imp;
+        mutex_down(&ctxt->loc_sem);
+        if (ctxt->loc_imp != imp) {
+                CWARN("changing the import %p - %p\n", ctxt->loc_imp, imp);
+                if (ctxt->loc_imp)
+                        class_import_put(ctxt->loc_imp);
+                ctxt->loc_imp = class_import_get(imp);
+        }
+        mutex_up(&ctxt->loc_sem);
         RETURN(0);
 }
 EXPORT_SYMBOL(llog_receptor_accept);
 
 int llog_initiator_connect(struct llog_ctxt *ctxt)
 {
+        struct obd_import *new_imp;
         ENTRY;
         LASSERT(ctxt);
-        ctxt->loc_imp = ctxt->loc_obd->u.cli.cl_import;
+        new_imp = ctxt->loc_obd->u.cli.cl_import;
+        mutex_down(&ctxt->loc_sem);
+        if (ctxt->loc_imp != new_imp) {
+                if (ctxt->loc_imp)
+                        class_import_put(ctxt->loc_imp);
+                ctxt->loc_imp = class_import_get(new_imp);
+        }
+        mutex_up(&ctxt->loc_sem);
         RETURN(0);
 }
 EXPORT_SYMBOL(llog_initiator_connect);
index 35383e7..b930aaa 100644 (file)
@@ -168,7 +168,10 @@ static void ctxt_llcd_put(struct llog_ctxt *ctxt)
                 llcd_put(ctxt->loc_llcd);
                 ctxt->loc_llcd = NULL;
         }
-        ctxt->loc_imp = NULL;
+        if (ctxt->loc_imp) {
+                class_import_put(ctxt->loc_imp);
+                ctxt->loc_imp = NULL;
+        }
         mutex_up(&ctxt->loc_sem);
 }