Whamcloud - gitweb
b=20321
authordzogin <dzogin>
Sun, 2 Aug 2009 15:54:54 +0000 (15:54 +0000)
committerdzogin <dzogin>
Sun, 2 Aug 2009 15:54:54 +0000 (15:54 +0000)
i=dmitry.zogin
i=adilger

Severity   : normal
Bugzilla   : 20321
Description: Deadlock between filter_destroy() and filter_commitrw_write().
Details    : filter_destroy() does not hold the DLM lock over the whole
             operation. If the DLM lock is dropped, filter_commitrw() can go
             through, causing the deadlock between page lock and i_mutex.

lustre/ChangeLog
lustre/obdfilter/filter.c

index fb6be26..f641c6e 100644 (file)
        * ext4 support for RHEL5 is experimental and thus should not be
          used in production.
 
+Severity   : normal
+Bugzilla   : 20321
+Description: Deadlock between filter_destroy() and filter_commitrw_write().
+Details    : filter_destroy() does not hold the DLM lock over the whole
+            operation. If the DLM lock is dropped, filter_commitrw() can go
+            through, causing the deadlock between page lock and i_mutex.
+
 Severity   : enhancement
 Bugzilla   : 19847
 Description: Update kernel to SLES10 SP2 2.6.16.60-0.39.3.
index 936adef..ed7b2f9 100644 (file)
@@ -1435,9 +1435,9 @@ struct dentry *filter_fid2dentry(struct obd_device *obd,
         RETURN(dchild);
 }
 
-static int filter_prepare_destroy(struct obd_device *obd, obd_id objid)
+static int filter_prepare_destroy(struct obd_device *obd, obd_id objid,
+                                  struct lustre_handle *lockh)
 {
-        struct lustre_handle lockh;
         int flags = LDLM_AST_DISCARD_DATA, rc;
         struct ldlm_res_id res_id = { .name = { objid } };
         ldlm_policy_data_t policy = { .l_extent = { 0, OBD_OBJECT_EOF } };
@@ -1448,15 +1448,23 @@ static int filter_prepare_destroy(struct obd_device *obd, obd_id objid)
         rc = ldlm_cli_enqueue_local(obd->obd_namespace, &res_id, LDLM_EXTENT,
                                     &policy, LCK_PW, &flags, ldlm_blocking_ast,
                                     ldlm_completion_ast, NULL, NULL, 0, NULL,
-                                    &lockh);
-
-        /* We only care about the side-effects, just drop the lock. */
-        if (rc == ELDLM_OK)
-                ldlm_lock_decref(&lockh, LCK_PW);
+                                    lockh);
 
+        if (rc != ELDLM_OK) {
+                lockh->cookie = 0;
+                CERROR("%s: failed to get lock to destroy objid "LPU64" (%d)\n",
+                       obd->obd_name, objid, rc);
+        }
         RETURN(rc);
 }
 
+static void filter_fini_destroy(struct obd_device *obd,
+                                struct lustre_handle *lockh)
+{
+        if (lustre_handle_is_used(lockh))
+                ldlm_lock_decref(lockh, LCK_PW);
+}
+
 /* This is vfs_unlink() without down(i_sem).  If we call regular vfs_unlink()
  * we have 2.6 lock ordering issues with filter_commitrw_write() as it takes
  * i_sem before starting a handle, while filter_destroy() + vfs_unlink do the
@@ -3504,6 +3512,7 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa,
         struct obd_device *obd;
         struct filter_obd *filter;
         struct dentry *dchild = NULL, *dparent = NULL;
+        struct lustre_handle lockh = { 0 };
         struct lvfs_run_ctxt saved;
         void *handle = NULL;
         struct llog_cookie *fcc = NULL;
@@ -3543,7 +3552,9 @@ int filter_destroy(struct obd_export *exp, struct obdo *oa,
                 GOTO(cleanup, rc = -ENOENT);
         }
 
-        filter_prepare_destroy(obd, oa->o_id);
+        rc = filter_prepare_destroy(obd, oa->o_id, &lockh);
+        if (rc)
+                GOTO(cleanup, rc);
 
         /* Our MDC connection is established by the MDS to us */
         if (oa->o_valid & OBD_MD_FLCOOKIE) {
@@ -3639,6 +3650,8 @@ cleanup:
         case 3:
                 filter_parent_unlock(dparent);
         case 2:
+                filter_fini_destroy(obd, &lockh);
+
                 f_dput(dchild);
                 if (fcc != NULL)
                         OBD_FREE(fcc, sizeof(*fcc));