Whamcloud - gitweb
LU-8907 llite: handle client racy case during create 96/25296/5
authorBruno Faccini <bruno.faccini@intel.com>
Tue, 7 Feb 2017 11:30:35 +0000 (12:30 +0100)
committerOleg Drokin <oleg.drokin@intel.com>
Sun, 26 Mar 2017 06:50:20 +0000 (06:50 +0000)
Some very infrequent situations exists on client side
able to cause a race during create when concurrent access
by fid occurs. The result of the race can allow a d_alias
to be already present when it was not expected when original
code/LBUG has been written.

One of the identified scenario is when a concurrent access of
inode thru the .lustre/fid/<[FID]> method occurs.

New sanity/test_161d has been added to reproduce this scenario.

Final fix is to remove inaccurate
LASSERT(ll_d_hlist_empty(&inode->i_dentry));
in ll_create_node().

Signed-off-by: Bruno Faccini <bruno.faccini@intel.com>
Change-Id: I1ff19883cc5b53831f1d5c577f4152225fa9e0fb
Reviewed-on: https://review.whamcloud.com/25296
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Niu Yawei <yawei.niu@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/obd_support.h
lustre/llite/namei.c
lustre/tests/sanity.sh

index 0c47add..7322802 100644 (file)
@@ -530,6 +530,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_LLITE_CREATE_FILE_PAUSE           0x1409
 #define OBD_FAIL_LLITE_NEWNODE_PAUSE               0x140a
 #define OBD_FAIL_LLITE_SETDIRSTRIPE_PAUSE          0x140b
+#define OBD_FAIL_LLITE_CREATE_NODE_PAUSE           0x140c
 
 
 #define OBD_FAIL_FID_INDIR     0x1501
index 1d9e9ce..3b8a889 100644 (file)
@@ -904,7 +904,8 @@ static struct inode *ll_create_node(struct inode *dir, struct lookup_intent *it)
         if (rc)
                 GOTO(out, inode = ERR_PTR(rc));
 
-       LASSERT(ll_d_hlist_empty(&inode->i_dentry));
+       /* Pause to allow for a race with concurrent access by fid */
+       OBD_FAIL_TIMEOUT(OBD_FAIL_LLITE_CREATE_NODE_PAUSE, cfs_fail_val);
 
         /* We asked for a lock on the directory, but were granted a
          * lock on the inode.  Since we finally have an inode pointer,
index 75bb587..c7dde17 100755 (executable)
@@ -11206,6 +11206,55 @@ test_161c() {
 }
 run_test 161c "check CL_RENME[UNLINK] changelog record flags"
 
+test_161d() {
+       local user
+       local pid
+       local fid
+
+       # cleanup previous run
+       rm -rf $DIR/$tdir/$tfile
+
+       user=$(do_facet $SINGLEMDS $LCTL --device $MDT0 \
+               changelog_register -n)
+       [[ $? -eq 0 ]] || error "changelog_register failed"
+
+       # work in a standalone dir to avoid locking on $DIR/$MOUNT to
+       # interfer with $MOUNT/.lustre/fid/ access
+       mkdir $DIR/$tdir
+       [[ $? -eq 0 ]] || error "mkdir failed"
+
+       #define OBD_FAIL_LLITE_CREATE_NODE_PAUSE 0x140c | OBD_FAIL_ONCE
+       $LCTL set_param fail_loc=0x8000140c
+       # 5s pause
+       $LCTL set_param fail_val=5
+
+       # create file
+       echo foofoo > $DIR/$tdir/$tfile &
+       pid=$!
+
+       # wait for create to be delayed
+       sleep 2
+
+       ps -q $pid
+       [[ $? -eq 0 ]] || error "create should be blocked"
+
+       local tempfile=$(mktemp)
+       fid=$(changelog_extract_field $MDT0 "CREAT" "$tfile" "t=")
+       cat $MOUNT/.lustre/fid/$fid 2>/dev/null >$tempfile || error "cat failed"
+       # some delay may occur during ChangeLog publishing and file read just
+       # above, that could allow file write to happen finally
+       [[ -s $tempfile ]] && echo "file should be empty"
+
+       $LCTL set_param fail_loc=0
+
+       wait $pid
+       [[ $? -eq 0 ]] || error "create failed"
+
+       $LFS changelog_clear $MDT0 $user 0
+       do_facet $SINGLEMDS $LCTL --device $MDT0 changelog_deregister $user
+}
+run_test 161d "create with concurrent .lustre/fid access"
+
 check_path() {
     local expected=$1
     shift