obd->obd_replayed_requests++;
} else if (type == UPDATE_RECOVERY && transno != 0) {
struct distribute_txn_replay_req *dtrq;
+ bool update_transno = false;
spin_unlock(&obd->obd_recovery_task_lock);
dtrq->dtrq_master_transno);
list_add(&dtrq->dtrq_list,
&tdtd->tdtd_replay_finish_list);
+ update_transno = true;
} else {
dtrq_destroy(dtrq);
}
spin_unlock(&tdtd->tdtd_replay_list_lock);
- spin_lock(&obd->obd_recovery_task_lock);
- if (transno == obd->obd_next_recovery_transno)
- obd->obd_next_recovery_transno++;
- else if (transno > obd->obd_next_recovery_transno)
- obd->obd_next_recovery_transno = transno + 1;
- spin_unlock(&obd->obd_recovery_task_lock);
-
+ if (update_transno) {
+ spin_lock(&obd->obd_recovery_task_lock);
+ if (transno == obd->obd_next_recovery_transno)
+ obd->obd_next_recovery_transno++;
+ else if (transno >
+ obd->obd_next_recovery_transno)
+ obd->obd_next_recovery_transno =
+ transno + 1;
+ spin_unlock(&obd->obd_recovery_task_lock);
+ }
} else {
spin_unlock(&obd->obd_recovery_task_lock);
LASSERT(list_empty(&obd->obd_req_replay_queue));
}
run_test 70e "rename cross-MDT with random fails"
+cleanup_71a() {
+ trap 0
+ kill -9 $mkdir_71a_pid
+}
+
+random_double_fail_mdt() {
+ local max_index=$1
+ local duration=$2
+ local monitor_pid=$3
+ local elapsed
+ local start_ts=$(date +%s)
+ local num_failovers=0
+ local fail_index
+ local second_index
+
+ elapsed=$(($(date +%s) - start_ts))
+ while [ $elapsed -lt $duration ]; do
+ fail_index=$((RANDOM%max_index + 1))
+ if [ $fail_index -eq $max_index ]; then
+ second_index=1
+ else
+ second_index=$((fail_index + 1))
+ fi
+ kill -0 $monitor_pid ||
+ error "$monitor_pid stopped"
+ sleep 120
+ replay_barrier mds$fail_index
+ replay_barrier mds$second_index
+ sleep 10
+ # Increment the number of failovers
+ num_failovers=$((num_failovers+1))
+ log "fail mds$fail_index mds$second_index $num_failovers times"
+ fail mds${fail_index},mds${second_index}
+ elapsed=$(($(date +%s) - start_ts))
+ done
+}
+
+test_71a () {
+ [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+ local clients=${CLIENTS:-$HOSTNAME}
+ local rc=0
+
+ zconf_mount_clients $clients $MOUNT
+
+ local duration=300
+ [ "$SLOW" = "no" ] && duration=180
+ # set duration to 900 because it takes some time to boot node
+ [ "$FAILURE_MODE" = HARD ] && duration=900
+
+ mkdir -p $DIR/$tdir
+
+ local elapsed
+ local start_ts=$(date +%s)
+
+ trap cleanup_71a EXIT
+ (
+ while true; do
+ $LFS mkdir -i0 -c2 $DIR/$tdir/test
+ rmdir $DIR/$tdir/test
+ done
+ )&
+ mkdir_71a_pid=$!
+ echo "Started $mkdir_71a_pid"
+
+ random_double_fail_mdt 2 $duration $mkdir_71a_pid
+ kill -0 $mkdir_71a_pid || error "mkdir/rmdir $mkdir_71a_pid stopped"
+
+ cleanup_71a
+ true
+}
+run_test 71a "mkdir/rmdir striped dir with 2 mdts recovery"
test_73a() {
multiop_bg_pause $DIR/$tfile O_tSc ||