Whamcloud - gitweb
LU-2193 ofd: look up FID to destroy before locking
authorAndreas Dilger <adilger@whamcloud.com>
Tue, 16 Oct 2012 05:15:17 +0000 (23:15 -0600)
committerOleg Drokin <green@whamcloud.com>
Wed, 17 Oct 2012 01:26:04 +0000 (21:26 -0400)
If the MDS is replaying object destroys after recovery, then it may
be trying to destroy non-existent objects.  This can provoke spurious
errors in lvbo_init() due to the inability to populate the lock LVB.
Rather than quiet the useful error message from lvbo_init(), instead
do the object lookup on the to-be-destroyed FID first.  If lookup
fails to find an object, skip the object locking entirely since it
isn't needed and would just flood the console after recovery.

During destroy RPCs from the MDS, the ELC buffer is always empty, so
short-circuit the initial lock cancellation attempt that is useless.

Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: Id6197f23773ea271e0cb0912b19585b3df500c1e
Reviewed-on: http://review.whamcloud.com/4276
Reviewed-by: Mike Pershin <tappro@whamcloud.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/ldlm/ldlm_lockd.c
lustre/ofd/ofd_obd.c

index b38d854..8dcde3e 100644 (file)
@@ -1506,6 +1506,9 @@ int ldlm_request_cancel(struct ptlrpc_request *req,
         if (first >= count)
                 RETURN(0);
 
+       if (count == 1 && dlm_req->lock_handle[0].cookie == 0)
+               RETURN(0);
+
         /* There is no lock on the server at the replay time,
          * skip lock cancelling to make replay tests to pass. */
         if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY)
index 80fe705..2a05f1d 100644 (file)
@@ -951,6 +951,10 @@ static int ofd_destroy_by_fid(const struct lu_env *env,
 
        ENTRY;
 
+       fo = ofd_object_find(env, ofd, fid);
+       if (IS_ERR(fo))
+               RETURN(PTR_ERR(fo));
+
        /* Tell the clients that the object is gone now and that they should
         * throw away any cached pages. */
        ofd_build_resid(fid, &info->fti_resid);
@@ -963,9 +967,6 @@ static int ofd_destroy_by_fid(const struct lu_env *env,
        if (rc == ELDLM_OK)
                ldlm_lock_decref(&lockh, LCK_PW);
 
-       fo = ofd_object_find(env, ofd, fid);
-       if (IS_ERR(fo))
-               RETURN(PTR_ERR(fo));
        LASSERT(fo != NULL);
 
        rc = ofd_object_destroy(env, fo, orphan);
@@ -1013,14 +1014,15 @@ int ofd_destroy(const struct lu_env *env, struct obd_export *exp,
                lrc = ofd_destroy_by_fid(env, ofd, &info->fti_fid, 0);
                if (lrc == -ENOENT) {
                        CDEBUG(D_INODE,
-                              "destroying non-existent object "LPU64"\n",
-                              oa->o_id);
+                              "%s: destroying non-existent object "DFID"\n",
+                              ofd_obd(ofd)->obd_name, PFID(&info->fti_fid));
                        /* rewrite rc with -ENOENT only if it is 0 */
                        if (rc == 0)
                                rc = lrc;
                } else if (lrc != 0) {
-                       CEMERG("error destroying object "LPU64": %d\n",
-                              oa->o_id, rc);
+                       CERROR("%s: error destroying object "DFID": %d\n",
+                              ofd_obd(ofd)->obd_name, PFID(&info->fti_fid),
+                              rc);
                        rc = lrc;
                }
                count--;