+test_135() {
+ [ "$MDS1_VERSION" -lt $(version_code 2.12.51) ] &&
+ skip "Need MDS version at least 2.12.51"
+
+ mkdir -p $DIR/$tdir
+ $LFS setstripe -E 1M -L mdt $DIR/$tdir
+ # to have parent dir write lock before open/resend
+ touch $DIR/$tdir/$tfile
+ #define OBD_FAIL_MDS_LDLM_REPLY_NET 0x157
+ do_nodes $(comma_list $(mdts_nodes)) $LCTL set_param fail_loc=0x80000157
+ openfile -f O_RDWR:O_CREAT -m 0755 $DIR/$tdir/$tfile ||
+ error "Failed to open DOM file"
+}
+run_test 135 "DOM: open/create resend to return size"
+
+test_136() {
+ remote_mds_nodsh && skip "remote MDS with nodsh"
+ [[ "$MDS1_VERSION" -ge $(version_code 2.12.52) ]] ||
+ skip "Need MDS version at least 2.12.52"
+
+ local mdts=$(comma_list $(mdts_nodes))
+ local MDT0=$(facet_svc $SINGLEMDS)
+
+ local clog=$(do_facet mds1 $LCTL --device $MDT0 changelog_register -n)
+ [ -n "$clog" ] || error "changelog_register failed"
+ cl_mask=$(do_facet mds1 $LCTL get_param \
+ mdd.$MDT0.changelog_mask -n)
+ changelog_chmask "ALL"
+
+ # generate some changelog records to accumulate
+ test_mkdir -i 0 -c 0 $DIR/$tdir || error "mkdir $tdir failed"
+ createmany -m $DIR/$tdir/$tfile 10000 ||
+ error "create $DIR/$tdir/$tfile failed"
+
+ local size1=$(do_facet $SINGLEMDS \
+ $LCTL get_param -n mdd.$MDT0.changelog_size)
+ echo "Changelog size $size1"
+
+ #define OBD_FAIL_LLOG_PURGE_DELAY 0x1318
+ do_nodes $mdts $LCTL set_param fail_loc=0x1318 fail_val=30
+
+ # launch changelog_deregister in background on MDS
+ do_facet mds1 "nohup $LCTL --device $MDT0 changelog_deregister $clog \
+ > foo.out 2> foo.err < /dev/null &"
+ # give time to reach fail_loc
+ sleep 15
+
+ # fail_loc will make MDS sleep in the middle of changelog_deregister
+ # take this opportunity to abruptly kill MDS
+ FAILURE_MODE_save=$FAILURE_MODE
+ FAILURE_MODE=HARD
+ fail mds1
+ FAILURE_MODE=$FAILURE_MODE_save
+
+ do_nodes $mdts $LCTL set_param fail_loc=0x0 fail_val=0
+
+ local size2=$(do_facet $SINGLEMDS \
+ $LCTL get_param -n mdd.$MDT0.changelog_size)
+ echo "Changelog size $size2"
+ local clog2=$(do_facet $SINGLEMDS "$LCTL get_param -n \
+ mdd.$MDT0.changelog_users | grep $clog")
+ echo "After crash, changelog user $clog2"
+
+ [ -n "$clog2" -o $size2 -lt $size1 ] ||
+ error "changelog record count unchanged"
+
+ do_facet mds1 $LCTL set_param mdd.$MDT0.changelog_mask=\'$cl_mask\' -n
+}
+run_test 136 "changelog_deregister leaving pending records"
+
+test_137() {
+ df $DIR
+ mkdir -p $DIR/d1
+ mkdir -p $DIR/d2
+ dd if=/dev/zero of=$DIR/d1/$tfile bs=4096 count=1
+ dd if=/dev/zero of=$DIR/d2/$tfile bs=4096 count=1
+ cancel_lru_locks osc
+
+ #define OBD_FAIL_PTLRPC_RESEND_RACE 0x525
+ do_facet $SINGLEMDS "lctl set_param fail_loc=0x80000525"
+
+ # RPC1: any reply is to be delayed to disable last_xid logic
+ ln $DIR/d1/$tfile $DIR/d1/f2 &
+ sleep 1
+
+ # RPC2: setattr1 reply is delayed & resent
+ # original reply comes to client; the resend get asleep
+ chmod 666 $DIR/d2/$tfile
+
+ # RPC3: setattr2 on the same file; run ahead of RPC2 resend
+ chmod 777 $DIR/d2/$tfile
+
+ # RPC2 resend wakes up
+ sleep 5
+ [ $(stat -c "%a" $DIR/d2/$tfile) == 777 ] || error "resend got applied"
+}
+run_test 137 "late resend must be skipped if already applied"
+
+test_138() {
+ remote_mds_nodsh && skip "remote MDS with nodsh"
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ [[ "$MDS1_VERSION" -ge $(version_code 2.12.59) ]] ||
+ skip "Need server version newer than 2.12.59"
+
+ zconf_umount_clients $CLIENTS $MOUNT
+
+#define OBD_FAIL_TGT_RECOVERY_CONNECT 0x724
+ #delay a first step of recovey when MDS waiting clients
+ #and failing to get osp logs
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0x724 fail_val=5
+
+ facet_failover $SINGLEMDS
+
+ #waiting failover and recovery timer
+ #the valuse is based on target_recovery_overseer() wait_event timeout
+ sleep 55
+ stop $SINGLEMDS || error "stop MDS failed"
+ do_facet $SINGLEMDS $LCTL set_param fail_loc=0
+ start $SINGLEMDS $(mdsdevname ${SINGLEMDS//mds/}) ||
+ error "start MDS failed"
+ zconf_mount_clients $CLIENTS $MOUNT
+}
+run_test 138 "Umount MDT during recovery"
+
+test_139() {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ [ $MDS1_VERSION -lt $(version_code 2.13.50) ] &&
+ skip "Need MDS version at least 2.13.50"
+
+ mdt_dev=$(mdsdevname 1)
+
+ stop $SINGLEMDS || error "stop $SINGLEMDS failed"
+
+#define OBD_FAIL_OSP_INVALID_LOGID 0x2106
+ do_facet $SINGLEMDS $LCTL set_param fail_val=0x68 fail_loc=0x80002106
+ start $SINGLEMDS $mdt_dev $MDS_MOUNT_OPTS || error "Fail to start MDT"
+}
+run_test 139 "corrupted catid won't cause crash"
+
+test_140a() {
+ [ $MDS1_VERSION -lt $(version_code 2.12.58) ] &&
+ skip "Need MDS version at least 2.13.50"
+
+ [ "$SHARED_KEY" = true ] &&
+ skip "server local client incompatible with SSK keys installed"
+
+ slr=$(do_facet mds1 \
+ $LCTL get_param -n mdt.$FSNAME-MDT0000.local_recovery)
+ stack_trap "do_facet mds1 $LCTL set_param \
+ mdt.*.local_recovery=$slr" EXIT
+
+ # disable recovery for local clients
+ # so local clients should be marked with no_recovery flag
+ do_facet mds1 $LCTL set_param mdt.*.local_recovery=0
+ mount_mds_client
+
+ local cnt
+ cnt=$(do_facet mds1 $LCTL get_param "mdt.*.exports.*.export" |
+ grep export_flags.*no_recovery | wc -l)
+ echo "$cnt clients with recovery disabled"
+ umount_mds_client
+ [ $cnt -eq 0 ] && error "no clients with recovery disabled"
+
+ # enable recovery for local clients
+ # so no local clients should be marked with no_recovery flag
+ do_facet mds1 $LCTL set_param mdt.*.local_recovery=1
+ mount_mds_client
+
+ cnt=$(do_facet mds1 $LCTL get_param "mdt.*.exports.*.export" |
+ grep export_flags.*no_recovery | wc -l)
+ echo "$cnt clients with recovery disabled"
+ umount_mds_client
+ [ $cnt -eq 0 ] || error "$cnt clients with recovery disabled"
+}
+run_test 140a "local mount is flagged properly"
+
+test_140b() {
+ [ $MDS1_VERSION -lt $(version_code 2.12.58) ] &&
+ skip "Need MDS version at least 2.13.50"
+
+ [ "$SHARED_KEY" = true ] &&
+ skip "server local client incompatible with SSK keys installed"
+
+ slr=$(do_facet mds1 \
+ $LCTL get_param -n mdt.$FSNAME-MDT0000.local_recovery)
+ stack_trap "do_facet mds1 $LCTL set_param \
+ mdt.*.local_recovery=$slr" EXIT
+
+ # disable recovery for local clients
+ do_facet mds1 $LCTL set_param mdt.*.local_recovery=0
+
+ mount_mds_client
+ replay_barrier mds1
+ umount_mds_client
+ fail mds1
+ local recovery=$(do_facet mds1 dmesg |
+ awk -F: '/Recovery over after/ { print $4 }' |
+ cut -d, -f1 | tail -1)
+ (( $recovery < $TIMEOUT*2 )) ||
+ error "recovery took too long $recovery > $((TIMEOUT * 2))"
+}
+run_test 140b "local mount is excluded from recovery"
+
+test_141() {
+ local oldc
+ local newc
+
+ [ $PARALLEL == "yes" ] && skip "skip parallel run"
+ combined_mgs_mds || skip "needs combined MGS/MDT"
+ ( local_mode || from_build_tree ) &&
+ skip "cannot run in local mode or from build tree"
+
+ # some get_param have a bug to handle dot in param name
+ do_rpc_nodes $(facet_active_host $SINGLEMDS) cancel_lru_locks MGC
+ oldc=$(do_facet $SINGLEMDS $LCTL get_param -n \
+ 'ldlm.namespaces.MGC*.lock_count')
+ fail $SINGLEMDS
+ do_rpc_nodes $(facet_active_host $SINGLEMDS) cancel_lru_locks MGC
+ newc=$(do_facet $SINGLEMDS $LCTL get_param -n \
+ 'ldlm.namespaces.MGC*.lock_count')
+
+ [ $oldc -eq $newc ] || error "mgc lost locks ($oldc != $newc)"
+ return 0
+}
+run_test 141 "do not lose locks on MGS restart"
+
+test_142() {
+ [ $MDS1_VERSION -lt $(version_code 2.11.56) ] &&
+ skip "Need MDS version at least 2.11.56"
+
+ #define OBD_FAIL_MDS_ORPHAN_DELETE 0x165
+ do_facet mds1 $LCTL set_param fail_loc=0x165
+ $MULTIOP $DIR/$tfile Ouc || error "multiop failed"
+
+ stop mds1
+ start mds1 $(mdsdevname 1) $MDS_MOUNT_OPTS
+
+ wait_update_facet mds1 "pgrep orph_.*-MDD | wc -l" "0" ||
+ error "MDD orphan cleanup thread not quit"
+}
+run_test 142 "orphan name stub can be cleaned up in startup"
+
+test_143() {
+ [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.13.00) ] &&
+ skip "Need MDS version at least 2.13.00"
+ [ $PARALLEL == "yes" ] && skip "skip parallel run"
+
+ local mntpt=$(facet_mntpt $SINGLEMDS)
+ stop mds1
+ mount_fstype $SINGLEMDS || error "mount as fstype $SINGLEMDS failed"
+ do_facet $SINGLEMDS touch $mntpt/PENDING/$tfile
+ unmount_fstype $SINGLEMDS
+ start mds1 $(mdsdevname 1) $MDS_MOUNT_OPTS || error "mds1 start fail"
+
+ wait_recovery_complete $SINGLEMDS || error "MDS recovery not done"
+ wait_update_facet mds1 "pgrep orph_.*-MDD | wc -l" "0" ||
+ error "MDD orphan cleanup thread not quit"
+}
+run_test 143 "orphan cleanup thread shouldn't be blocked even delete failed"
+