Whamcloud - gitweb
lock ordering violation between &cli->cl_sem and _lprocfs_lock
authorshadow <shadow>
Wed, 8 Jul 2009 03:45:44 +0000 (03:45 +0000)
committershadow <shadow>
Wed, 8 Jul 2009 03:45:44 +0000 (03:45 +0000)
Branch b1_8
b=18380
i=jay
i=deen

lustre/ChangeLog
lustre/ldlm/ldlm_lib.c
lustre/ldlm/ldlm_resource.c
lustre/lov/lov_obd.c

index 90a1a3a..1cbc24f 100644 (file)
@@ -14,6 +14,13 @@ tbd Sun Microsystems, Inc.
          of Lustre filesystem with 4K stack may cause a stack overflow. For
          more information, please refer to bugzilla 17630.
 
+Severity   : normal
+Frequency  : rare
+Bugzilla   : 18380
+Descriptoin: lock ordering violation between &cli->cl_sem and _lprocfs_lock
+Details    : move ldlm namespace creation in setup phase to avoid grab
+            _lprocfs_lock with cli_sem held.
+
 Severity   : enhancement
 Bugzilla   : 19846
 Description: Update kernel to RHEL5 2.6.18-128.1.14.el5.
index 18601e6..f6061c9 100644 (file)
@@ -341,11 +341,20 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf)
                                name, obddev->obd_name,
                                cli->cl_target_uuid.uuid);
                         spin_lock(&imp->imp_lock);
-                        imp->imp_invalid = 1;
+                        imp->imp_deactive = 1;
                         spin_unlock(&imp->imp_lock);
                 }
         }
 
+        obddev->obd_namespace = ldlm_namespace_new(obddev, obddev->obd_name,
+                                                   LDLM_NAMESPACE_CLIENT,
+                                                   LDLM_NAMESPACE_GREEDY);
+        if (obddev->obd_namespace == NULL) {
+                CERROR("Unable to create client namespace - %s\n",
+                       obddev->obd_name);
+                GOTO(err_import, rc = -ENOMEM);
+        }
+
         cli->cl_qchk_stat = CL_NOT_QUOTACHECKED;
 
         RETURN(rc);
@@ -362,6 +371,10 @@ err:
 int client_obd_cleanup(struct obd_device *obddev)
 {
         ENTRY;
+
+        ldlm_namespace_free_post(obddev->obd_namespace);
+        obddev->obd_namespace = NULL;
+
         ldlm_put_ref();
         RETURN(0);
 }
@@ -375,11 +388,13 @@ int client_connect_import(struct lustre_handle *dlm_handle,
         struct obd_import *imp = cli->cl_import;
         struct obd_export **exp = localdata;
         struct obd_connect_data *ocd;
-        struct ldlm_namespace *to_be_freed = NULL;
         int rc;
         ENTRY;
 
         down_write(&cli->cl_sem);
+        CDEBUG(D_INFO, "connect %s - %d\n", obd->obd_name, 
+               cli->cl_conn_count);
+
         if (cli->cl_conn_count > 0)
                 GOTO(out_sem, rc = -EALREADY);
 
@@ -390,13 +405,7 @@ int client_connect_import(struct lustre_handle *dlm_handle,
         cli->cl_conn_count++;
         *exp = class_conn2export(dlm_handle);
 
-        if (obd->obd_namespace != NULL)
-                CERROR("already have namespace!\n");
-        obd->obd_namespace = ldlm_namespace_new(obd, obd->obd_name,
-                                                LDLM_NAMESPACE_CLIENT,
-                                                LDLM_NAMESPACE_GREEDY);
-        if (obd->obd_namespace == NULL)
-                GOTO(out_disco, rc = -ENOMEM);
+        LASSERT(obd->obd_namespace);
 
         imp->imp_dlm_handle = *dlm_handle;
         rc = ptlrpc_init_import(imp);
@@ -427,18 +436,12 @@ int client_connect_import(struct lustre_handle *dlm_handle,
 
         if (rc) {
 out_ldlm:
-                ldlm_namespace_free_prior(obd->obd_namespace, imp, 0);
-                to_be_freed = obd->obd_namespace;
-                obd->obd_namespace = NULL;
-out_disco:
                 cli->cl_conn_count--;
                 class_disconnect(*exp);
                 *exp = NULL;
-        } 
+        }
 out_sem:
         up_write(&cli->cl_sem);
-        if (to_be_freed)
-                ldlm_namespace_free_post(to_be_freed);
         return rc;
 }
 
@@ -447,7 +450,6 @@ int client_disconnect_export(struct obd_export *exp)
         struct obd_device *obd = class_exp2obd(exp);
         struct client_obd *cli;
         struct obd_import *imp;
-        struct ldlm_namespace *to_be_freed = NULL;
         int rc = 0, err;
         ENTRY;
 
@@ -461,6 +463,9 @@ int client_disconnect_export(struct obd_export *exp)
         imp = cli->cl_import;
 
         down_write(&cli->cl_sem);
+        CDEBUG(D_INFO, "disconnect %s - %d\n", obd->obd_name,
+               cli->cl_conn_count);
+
         if (!cli->cl_conn_count) {
                 CERROR("disconnecting disconnected device (%s)\n",
                        obd->obd_name);
@@ -490,16 +495,11 @@ int client_disconnect_export(struct obd_export *exp)
                                        NULL);
                 ldlm_namespace_free_prior(obd->obd_namespace, imp,
                                           obd->obd_force);
-                to_be_freed = obd->obd_namespace;
         }
 
         rc = ptlrpc_disconnect_import(imp, 0);
 
         ptlrpc_invalidate_import(imp);
-        /* set obd_namespace to NULL only after invalidate, because we can have
-         * some connect requests in flight, and his need store a connect flags
-         * in obd_namespace. bug 14260 */
-        obd->obd_namespace = NULL;
 
         if (imp->imp_rq_pool) {
                 ptlrpc_free_rq_pool(imp->imp_rq_pool);
@@ -516,10 +516,8 @@ int client_disconnect_export(struct obd_export *exp)
         err = class_disconnect(exp);
         if (!rc && err)
                 rc = err;
-
         up_write(&cli->cl_sem);
-        if (to_be_freed)
-                ldlm_namespace_free_post(to_be_freed);
+
         RETURN(rc);
 }
 
index 9857485..7d03ea8 100644 (file)
@@ -573,8 +573,6 @@ void ldlm_namespace_free_prior(struct ldlm_namespace *ns,
                 return;
         }
 
-        /* Make sure that nobody can find this ns in its list. */
-        ldlm_namespace_unregister(ns, ns->ns_client);
 
         /* Can fail with -EINTR when force == 0 in which case try harder */
         rc = __ldlm_namespace_free(ns, force);
@@ -600,6 +598,9 @@ void ldlm_namespace_free_post(struct ldlm_namespace *ns)
                 return;
         }
 
+        /* Make sure that nobody can find this ns in its list. */
+        ldlm_namespace_unregister(ns, ns->ns_client);
+
         /* Fini pool _before_ parent proc dir is removed. This is important
          * as ldlm_pool_fini() removes own proc dir which is child to @dir.
          * Removing it after @dir may cause oops. */
index 14097fe..69d6b6e 100644 (file)
@@ -468,13 +468,13 @@ static int lov_disconnect_obd(struct obd_device *obd, struct lov_tgt_desc *tgt)
                 }
         }
 
-        if (obd->obd_no_recov) {
+        if (obd->obd_force) {
                 /* Pass it on to our clients.
                  * XXX This should be an argument to disconnect,
                  * XXX not a back-door flag on the OBD.  Ah well.
                  */
                 if (osc_obd)
-                        osc_obd->obd_no_recov = 1;
+                        osc_obd->obd_force = 1;
         }
 
         obd_register_observer(osc_obd, NULL);