Whamcloud - gitweb
LU-5344 llite: lookup master inode by ilookup5_nowait 66/16066/8
authorwang di <di.wang@intel.com>
Sat, 22 Aug 2015 20:54:52 +0000 (13:54 -0700)
committerOleg Drokin <oleg.drokin@intel.com>
Sat, 19 Sep 2015 03:22:32 +0000 (03:22 +0000)
Do not lookup master inode by ilookup5, instead it should
use ilookup5_nowait, otherwise it will cause dead lock,

1. Client1 send chmod req to the MDT0, then on MDT0, it
enqueues master and all of its slaves lock, (mdt_attr_set()
->mdt_lock_slaves()), after gets master and stripe0 lock,
it will send the enqueue request(for stripe1) to MDT1, then
MDT1 finds the lock has been granted to client2. Then MDT1
sends blocking ast to client2.

2. At the same time, client2 tries to unlink the striped
dir (rm -rf striped_dir), and during lookup, it will hold
the master inode of the striped directory, whose inode state
is NEW, then tries to revalidate all of its slaves,
(ll_prep_inode()->ll_iget()->ll_read_inode2()->
ll_update_inode().). And it will be blocked on the server
side because of 1.

3.Then the client get the blocking_ast request, cancel the
lock, but being blocked by ilookup5 in ll_md_blocking_ast(),
because the inode state is still NEW.

Add test_90/91 in sanityn.sh to verify the deadlock

Signed-off-by: wang di <di.wang@intel.com>
Change-Id: I8ce88595998dc35b6165951873192a65674bf3a7
Reviewed-on: http://review.whamcloud.com/16066
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: John L. Hammond <john.hammond@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/llite/namei.c
lustre/tests/sanityn.sh

index d7c28c3..29e379e 100644 (file)
@@ -288,8 +288,34 @@ int ll_md_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                                hash = cl_fid_build_ino(&lli->lli_pfid,
                                        ll_need_32bit_api(ll_i2sbi(inode)));
 
                                hash = cl_fid_build_ino(&lli->lli_pfid,
                                        ll_need_32bit_api(ll_i2sbi(inode)));
 
-                               master_inode = ilookup5(inode->i_sb, hash,
-                                                       ll_test_inode_by_fid,
+                               /* Do not lookup the inode with ilookup5,
+                                * otherwise it will cause dead lock,
+                                *
+                                * 1. Client1 send chmod req to the MDT0, then
+                                * on MDT0, it enqueues master and all of its
+                                * slaves lock, (mdt_attr_set() ->
+                                * mdt_lock_slaves()), after gets master and
+                                * stripe0 lock, it will send the enqueue req
+                                * (for stripe1) to MDT1, then MDT1 finds the
+                                * lock has been granted to client2. Then MDT1
+                                * sends blocking ast to client2.
+                                *
+                                * 2. At the same time, client2 tries to unlink
+                                * the striped dir (rm -rf striped_dir), and
+                                * during lookup, it will hold the master inode
+                                * of the striped directory, whose inode state
+                                * is NEW, then tries to revalidate all of its
+                                * slaves, (ll_prep_inode()->ll_iget()->
+                                * ll_read_inode2()-> ll_update_inode().). And
+                                * it will be blocked on the server side because
+                                * of 1.
+                                *
+                                * 3. Then the client get the blocking_ast req,
+                                * cancel the lock, but being blocked if using
+                                * ->ilookup5()), because master inode state is
+                                *  NEW. */
+                               master_inode = ilookup5_nowait(inode->i_sb,
+                                                   hash, ll_test_inode_by_fid,
                                                        (void *)&lli->lli_pfid);
                                if (master_inode != NULL &&
                                        !IS_ERR(master_inode)) {
                                                        (void *)&lli->lli_pfid);
                                if (master_inode != NULL &&
                                        !IS_ERR(master_inode)) {
index e29bdc2..b906b15 100644 (file)
@@ -3361,6 +3361,80 @@ test_83() {
 }
 run_test 83 "access striped directory while it is being created/unlinked"
 
 }
 run_test 83 "access striped directory while it is being created/unlinked"
 
+test_90() {
+       [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
+       local pid1
+       local pid2
+       local duration=180
+
+       [ "$SLOW" = "yes" ] && duration=600
+       # Open/Create under striped directory
+       (
+               cd $DIR1
+               while true; do
+                       $LFS mkdir -c$MDSCOUNT $tdir > /dev/null 2>&1
+                       touch $tdir/f{0..3} > /dev/null 2>&1
+               done
+       ) &
+       pid1=$!
+       echo "start pid $pid1 to open/create under striped directory"
+
+       # unlink the striped directory at the same time
+       (
+               cd $DIR2
+               while true; do
+                       rm -rf $tdir > /dev/null 2>&1
+               done
+       ) &
+       pid2=$!
+       echo "start pid $pid2 to unlink striped directory"
+
+       sleep $duration
+
+       kill $pid1 $pid2
+       wait $pid1 $pid2
+
+       return 0
+}
+run_test 90 "open/create and unlink striped directory"
+
+test_91() {
+       [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
+       local pid1
+       local pid2
+       local duration=180
+
+       [ "$SLOW" = "yes" ] && duration=600
+       # chmod striped directory
+       (
+               cd $DIR1
+               while true; do
+                       $LFS mkdir -c$MDSCOUNT $tdir > /dev/null 2>&1
+                       chmod go+w $tdir > /dev/null 2>&1
+               done
+       ) &
+       pid1=$!
+       echo "start pid $pid1 to chmod striped directory"
+
+       # unlink the striped directory at the same time
+       (
+               cd $DIR2
+               while true; do
+                       rm -rf $tdir > /dev/null 2>&1
+               done
+       ) &
+       pid2=$!
+       echo "start pid $pid2 to unlink striped directory"
+
+       sleep $duration
+
+       kill $pid1 $pid2
+       wait $pid1 $pid2
+
+       return 0
+}
+run_test 91 "chmod and unlink striped directory"
+
 log "cleanup: ======================================================"
 
 [ "$(mount | grep $MOUNT2)" ] && umount $MOUNT2
 log "cleanup: ======================================================"
 
 [ "$(mount | grep $MOUNT2)" ] && umount $MOUNT2