Whamcloud - gitweb
LU-7173 mdt: intent vs unlink race 01/17501/4
authorVitaly Fertman <vitaly.fertman@seagate.com>
Mon, 7 Dec 2015 18:37:46 +0000 (21:37 +0300)
committerOleg Drokin <oleg.drokin@intel.com>
Mon, 21 Dec 2015 12:41:32 +0000 (12:41 +0000)
a race between intent and unlink results in working on stale object,
the intent resend finds the lock and !existent object and LBUG:
    ASSERTION( ((o)->lo_header->loh_attr & LOHA_EXISTS) != 0 )
in mdt_getattr_name_lock()->lu_object_attr(). a check for getattr is
added for !existent object in RESENT case.

another case is when resend finds inode cached on thread,
osd_iget_check() checking the inode returns ESTALE as nlinks==0.
ldlm_lock_enqueue() gets an error from ns_policy() and tries to
destroy the found lock, which is granted, getting "lock still on
resource" LBUG.

It is unclear if the lock reached the client in the original reply,
just leave the lock on server, not returning it again to the client.
Due to LU-6529, the server will not OOM in case the 2nd reply will
be handled on the client.

Signed-off-by: Vitaly Fertman <vitaly.fertman@seagate.com>
Change-Id: I128cd6eeda579c6477bf4564db5e551a46a74d71
Reviewed-on: http://es-gerrit.xyus.xyratex.com:8080/8849
Tested-by: Jenkins
Seagate-bug-id: MRP-3042
Reviewed-by: Alexander Nikolaevich Boyko <alexander.boyko@seagate.com>
Reviewed-by: Andriy Skulysh <andriy.skulysh@seagate.com>
Reviewed-by: Alexey Leonidovich Lyashkov <alexey.lyashkov@seagate.com>
Tested-by: Elena V. Gryaznova <elena.gryaznova@seagate.com>
Reviewed-on: http://review.whamcloud.com/17501
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Alexander Boyko <alexander.boyko@seagate.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/obd_support.h
lustre/ldlm/ldlm_lock.c
lustre/ldlm/ldlm_lockd.c
lustre/mdt/mdt_handler.c
lustre/ofd/ofd_io.c
lustre/osd-ldiskfs/osd_handler.c
lustre/osd-zfs/osd_oi.c
lustre/tests/recovery-small.sh
lustre/tests/sanity.sh

index 88e4a07..5f74f80 100644 (file)
@@ -248,6 +248,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_MDS_REINT_MULTI_NET     0x159
 #define OBD_FAIL_MDS_REINT_MULTI_NET_REP 0x15a
 #define OBD_FAIL_MDS_LLOG_CREATE_FAILED2 0x15b
+#define OBD_FAIL_MDS_INTENT_DELAY              0x160
 
 /* layout lock */
 #define OBD_FAIL_MDS_NO_LL_GETATTR      0x170
@@ -297,7 +298,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_OST_BRW_PAUSE_BULK      0x214
 #define OBD_FAIL_OST_ENOSPC              0x215
 #define OBD_FAIL_OST_EROFS               0x216
-#define OBD_FAIL_OST_ENOENT              0x217
+#define OBD_FAIL_SRV_ENOENT              0x217
 /*     OBD_FAIL_OST_QUOTACHECK_NET      0x218 obsolete since 2.4 */
 #define OBD_FAIL_OST_QUOTACTL_NET        0x219
 #define OBD_FAIL_OST_CHECKSUM_RECEIVE    0x21a
index 486e7b3..b402a2d 100644 (file)
@@ -1732,6 +1732,16 @@ enum ldlm_error ldlm_lock_enqueue(struct ldlm_namespace *ns,
                         }
                         *flags |= LDLM_FL_LOCK_CHANGED;
                         RETURN(0);
+               } else if (rc != ELDLM_OK &&
+                          lock->l_req_mode == lock->l_granted_mode) {
+                       LASSERT(*flags & LDLM_FL_RESENT);
+                       /* It may happen that ns_policy returns an error in
+                        * resend case, object may be unlinked or just some
+                        * error occurs. It is unclear if lock reached the
+                        * client in the original reply, just leave the lock on
+                        * server, not returning it again to client. Due to
+                        * LU-6529, the server will not OOM. */
+                       RETURN(rc);
                 } else if (rc != ELDLM_OK ||
                            (rc == ELDLM_OK && (*flags & LDLM_FL_INTENT_ONLY))) {
                         ldlm_lock_destroy(lock);
index 72a6fb2..5060265 100644 (file)
@@ -1515,7 +1515,7 @@ existing_lock:
                        }
                }
 
-               if (rc != 0) {
+               if (rc != 0 && !(flags & LDLM_FL_RESENT)) {
                        if (lock->l_export) {
                                ldlm_lock_cancel(lock);
                        } else {
index e27fee3..216cb4d 100644 (file)
@@ -1436,6 +1436,13 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info,
                GOTO(out_parent, rc = PTR_ERR(child));
 
        OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_RESEND, obd_timeout * 2);
+       if (!mdt_object_exists(child)) {
+               LU_OBJECT_DEBUG(D_INODE, info->mti_env,
+                               &child->mot_obj,
+                               "Object doesn't exist!\n");
+               GOTO(out_child, rc = -ENOENT);
+       }
+
        rc = mdt_check_resent_lock(info, child, lhc);
        if (rc < 0) {
                GOTO(out_child, rc);
@@ -1444,13 +1451,6 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info,
                mdt_lock_reg_init(lhc, LCK_PR);
                try_layout = false;
 
-               if (!mdt_object_exists(child)) {
-                       LU_OBJECT_DEBUG(D_INODE, info->mti_env,
-                                       &child->mot_obj,
-                                       "Object doesn't exist!\n");
-                       GOTO(out_child, rc = -ENOENT);
-               }
-
                if (!(child_bits & MDS_INODELOCK_UPDATE) &&
                      mdt_object_exists(child) && !mdt_object_remote(child)) {
                         struct md_attr *ma = &info->mti_attr;
@@ -3390,6 +3390,8 @@ static int mdt_intent_opc(enum ldlm_intent_flags itopc,
        if (flv->it_act != NULL) {
                struct ldlm_reply *rep;
 
+               OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_INTENT_DELAY, 10);
+
                /* execute policy */
                rc = flv->it_act(opc, info, lockp, flags);
 
index 04445af..0608cd9 100644 (file)
@@ -711,7 +711,7 @@ int ofd_preprw(const struct lu_env *env, int cmd, struct obd_export *exp,
 
        LASSERT(oa != NULL);
 
-       if (OBD_FAIL_CHECK(OBD_FAIL_OST_ENOENT)) {
+       if (OBD_FAIL_CHECK(OBD_FAIL_SRV_ENOENT)) {
                struct ofd_seq          *oseq;
 
                oseq = ofd_seq_load(env, ofd, ostid_seq(&oa->o_oi));
index 619662a..2bf04b4 100644 (file)
@@ -747,7 +747,7 @@ static int osd_fid_lookup(const struct lu_env *env, struct osd_object *obj,
        LASSERT(info);
        oic = &info->oti_cache;
 
-       if (OBD_FAIL_CHECK(OBD_FAIL_OST_ENOENT))
+       if (OBD_FAIL_CHECK(OBD_FAIL_SRV_ENOENT))
                RETURN(-ENOENT);
 
        /* For the object is created as locking anchor, or for the object to
index 2333685..ef9aefc 100644 (file)
@@ -467,7 +467,7 @@ int osd_fid_lookup(const struct lu_env *env, struct osd_device *dev,
        int                     rc = 0;
        ENTRY;
 
-       if (OBD_FAIL_CHECK(OBD_FAIL_OST_ENOENT))
+       if (OBD_FAIL_CHECK(OBD_FAIL_SRV_ENOENT))
                RETURN(-ENOENT);
 
        if (unlikely(fid_is_acct(fid))) {
index 86d824f..976ecf6 100755 (executable)
@@ -2252,6 +2252,56 @@ test_113() {
 }
 run_test 113 "ldlm enqueue dropped reply should not cause deadlocks"
 
+T130_PID=0
+test_130_base() {
+       test_mkdir -p $DIR/$tdir
+
+       # get only LOOKUP lock on $tdir
+       cancel_lru_locks mdc
+       ls $DIR/$tdir/$tfile 2>/dev/null
+
+       # get getattr by fid on $tdir
+       #
+       # we need to race with unlink, unlink must complete before we will
+       # take a DLM lock, otherwise unlink will wait until getattr will
+       # complete; but later than getattr starts so that getattr found
+       # the object
+#define OBD_FAIL_MDS_INTENT_DELAY              0x160
+       set_nodes_failloc "$(mdts_nodes)" 0x80000160
+       stat $DIR/$tdir &
+       T130_PID=$!
+       sleep 2
+
+       rm -rf $DIR/$tdir
+
+       # drop the reply so that resend happens on an unlinked file.
+#define OBD_FAIL_MDS_LDLM_REPLY_NET     0x157
+       set_nodes_failloc "$(mdts_nodes)" 0x80000157
+}
+
+test_130a() {
+       remote_mds_nodsh && skip "remote MDS with nodsh" && return
+       test_130_base
+
+       wait $T130_PID || [ $? -eq 0 ] && error "stat should fail"
+       return 0
+}
+run_test 130a "enqueue resend on not existing file"
+
+test_130b() {
+       remote_mds_nodsh && skip "remote MDS with nodsh" && return
+       test_130_base
+       # let the reply to be dropped
+       sleep 10
+
+#define OBD_FAIL_SRV_ENOENT              0x217
+       set_nodes_failloc "$(mdts_nodes)" 0x80000217
+
+       wait $T130_PID || [ $? -eq 0 ] && error "stat should fail"
+       return 0
+}
+run_test 130b "enqueue resend on a stale inode"
+
 complete $SECONDS
 check_and_cleanup_lustre
 exit_status
index 7a25e4b..a66f35f 100755 (executable)
@@ -5649,7 +5649,7 @@ test_68b() {  # was test_68
 run_test 68b "support swapping to Lustre ========================"
 
 # bug5265, obdfilter oa2dentry return -ENOENT
-# #define OBD_FAIL_OST_ENOENT 0x217
+# #define OBD_FAIL_SRV_ENOENT 0x217
 test_69() {
        [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
        remote_ost_nodsh && skip "remote OST with nodsh" && return
@@ -7597,7 +7597,7 @@ test_118b()
 
        reset_async
 
-       #define OBD_FAIL_OST_ENOENT 0x217
+       #define OBD_FAIL_SRV_ENOENT 0x217
        set_nodes_failloc "$(osts_nodes)" 0x217
        $MULTIOP $DIR/$tfile oO_CREAT:O_RDWR:O_SYNC:w4096c
        RC=$?