as the OST-MDT resync may be not finished by the end of the recovery
it may happen new enqueue for a write op may fail due to an absent
object. Return EINPROGRESS so that the enqueue was resent until get
resynced.
to not get stuck forever in case of disappeared MDT or a double
failure, return EINPROGRESS during hard failover timeout only.
also, cleanup replay-ost-single test 12:
- eliminate a need in the hard failover
- no need in a special obd_fail_loc, just use replay_barrier
- createmany is able to create files with unique names,
no need in special steps
Lustre-change: https://review.whamcloud.com/45459
Lustre-commit:
28769c65987cb1546918fe12d6f34b95ab9c5507
HPE-bug-id: LUS-10267
Signed-off-by: Vitaly Fertman <vitaly.fertman@hpe.com>
Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Change-Id: I5f16b63454c51ad8d112770c15c7e6e7f41f3c40
Reviewed-by: Sergey Cheremencev <c17829@cray.com>
Reviewed-by: Alexander Boyko <alexander.boyko@hpe.com>
Reviewed-by: Andriy Skulysh <c17819@cray.com>
Reviewed-by: Andriy Skulysh <andriy.skulysh@hpe.com>
Reviewed-by: Sergey Cheremencev <sergey.cheremencev@hpe.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/47686
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
#define OBD_FAIL_MDS_COMMITRW_DELAY 0x16b
#define OBD_FAIL_MDS_CHANGELOG_DEL 0x16c
#define OBD_FAIL_MDS_CHANGELOG_IDX_PUMP 0x16d
+#define OBD_FAIL_MDS_DELAY_DELORPHAN 0x16e
#define OBD_FAIL_MDS_DIR_PAGE_WALK 0x16f
/* layout lock */
#define OBD_FAIL_OST_INTEGRITY_FAULT 0x243
#define OBD_FAIL_OST_INTEGRITY_CMP 0x244
#define OBD_FAIL_OST_DISCONNECT_DELAY 0x245
-#define OBD_FAIL_OST_DELAY_TRANS 0x246
#define OBD_FAIL_OST_PREPARE_DELAY 0x247
#define OBD_FAIL_OST_2BIG_NIOBUF 0x248
#define OBD_FAIL_OST_FALLOCATE_NET 0x249
return 0;
}
+static bool ofd_resync_allowed(struct ofd_device *ofd)
+{
+ struct obd_device *obd = ofd_obd(ofd);
+
+ if (obd->obd_recovery_start == 0)
+ return false;
+
+ if (obd->obd_recovery_start + obd->obd_recovery_time_hard <
+ ktime_get_seconds())
+ return false;
+
+ return true;
+}
+
/**
* Implementation of ldlm_valblock_ops::lvbo_init for OFD.
*
oseq = ofd_seq_load(env, ofd, fid_seq_is_idif(seq) ?
FID_SEQ_OST_MDT0 : seq);
if (!IS_ERR_OR_NULL(oseq)) {
- if (!oseq->os_last_id_synced)
- rc = -EAGAIN;
+ if (!oseq->os_last_id_synced &&
+ ofd_resync_allowed(ofd))
+ rc = -EINPROGRESS;
ofd_seq_put(env, oseq);
}
}
if (error)
CERROR("transaction @0x%p commit error: %d\n", th, error);
- OBD_FAIL_TIMEOUT(OBD_FAIL_OST_DELAY_TRANS, 40);
/* call per-transaction callbacks if any */
list_for_each_entry_safe(dcb, tmp, &oh->ot_commit_dcb_list,
dcb_linkage) {
CDEBUG(D_HA, "%s: going to cleanup orphans since "DFID"\n",
d->opd_obd->obd_name, PFID(&d->opd_last_used_fid));
+ OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_DELAY_DELORPHAN, cfs_fail_val);
+
*last_fid = d->opd_last_used_fid;
/* The OSP should already get the valid seq now */
LASSERT(!fid_is_zero(last_fid));
}
run_test 10 "conflicting PW & PR locks on a client"
-test_12() {
- [ $FAILURE_MODE != "HARD" ] &&
- skip "Test needs FAILURE_MODE HARD" && return 0
+test_12a() {
remote_ost || { skip "need remote OST" && return 0; }
local tmp=$TMP/$tdir
local dir=$DIR/$tdir
- declare -a pids
-
mkdir -p $tmp || error "can't create $tmp"
mkdir -p $dir || error "can't create $dir"
$LFS setstripe -c 1 -i 0 $dir
- for i in `seq 1 10`; do mkdir $dir/d$i; done
+ for i in $(seq 1 10); do mkdir $dir/d$i; done
- #define OBD_FAIL_OST_DELAY_TRANS 0x245
- do_facet ost1 "$LCTL set_param fail_loc=0x245" ||
- error "can't set fail_loc"
+ # get client connected if was idle
+ touch $dir/file1
+ sync
- for i in `seq 1 10`;
- do
- createmany -o $dir/d$i/$(openssl rand -base64 12) 500 &
- pids+=($!)
+ replay_barrier ost1
+
+ for i in $(seq 1 10); do
+ createmany -o $dir/d$i/file 500
done
- echo "Waiting createmany pids"
- wait ${pids[@]}
ls -lR $dir > $tmp/ls_r_out 2>&1&
local ls_pid=$!
rm -rf $tmp
rm -rf $dir
}
-run_test 12 "check stat after OST failover"
+run_test 12a "glimpse after OST failover to a missing object"
+
+test_12b() {
+ remote_ost || { skip "need remote OST" && return 0; }
+
+ local dir=$DIR/$tdir
+ local rc
+
+ test_mkdir -p -i 0 $dir || error "can't create $dir"
+
+ $LFS setstripe -c 1 -i 0 $dir
+
+ for i in $(seq 1 10); do mkdir $dir/d$i; done
+ replay_barrier ost1
+
+ for i in $(seq 1 10); do
+ createmany -o $dir/d$i/file 500
+ done
+
+ #define OBD_FAIL_MDS_DELAY_DELORPHAN 0x16e
+ do_facet mds1 "$LCTL set_param fail_loc=0x16e fail_val=10" ||
+ error "can't set fail_loc"
+ facet_failover ost1
+
+ dd if=/dev/zero of=$dir/d10/file499 count=1 bs=4K > /dev/null
+ rc=$?
+ [[ $rc -eq 0 ]] || error "dd failed: $rc"
+
+ rm -rf $dir
+}
+run_test 12b "write after OST failover to a missing object"
complete $SECONDS
check_and_cleanup_lustre