summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
fdda2ad)
It is known issue that FID based operation will hit -EINPROGRESS
or -EREMCHG failure if related OI mapping is invalid (most cases
because file-level backup/restore).
On the other hand, the recovery for cross-MDTs modifications will
trigger FID based operation(s) before OI scrub rebuilding related
OI mappings.
So during sanity-scrub tests, the scripts should avoid cross-MDTs
recovery via sync all transactions before file-level backup.
More warning message about the recovery failure if because of bad
OI mappings.
Another fix is about setting LOC_F_NEW flag for the object to be
created via out_create().
Test-Parameters: mdtfilesystemtype=ldiskfs mdsfilesystemtype=ldiskfs ostfilesystemtype=ldiskfs mdscount=2 mdtcount=4 testlist=sanity-scrub,sanity-scrub,sanity-scrub
Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: I6e8bc9c5d587be72ecd7e33fa7e9959fe5b34006
Reviewed-on: http://review.whamcloud.com/21918
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Jian Yu <jian.yu@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
reply->ourp_count = updates;
tti->tti_u.update.tti_update_reply = reply;
tti->tti_mult_trans = !req_is_replay(tgt_ses_req(tsi));
reply->ourp_count = updates;
tti->tti_u.update.tti_update_reply = reply;
tti->tti_mult_trans = !req_is_replay(tgt_ses_req(tsi));
/* Walk through updates in the request to execute them */
for (i = 0; i < update_buf_count; i++) {
struct tgt_handler *h;
/* Walk through updates in the request to execute them */
for (i = 0; i < update_buf_count; i++) {
struct tgt_handler *h;
our = update_bufs[i];
update_count = our->ourq_count;
for (j = 0; j < update_count; j++) {
our = update_bufs[i];
update_count = our->ourq_count;
for (j = 0; j < update_count; j++) {
+ struct lu_object_conf conf;
+
update = object_update_request_get(our, j, NULL);
update = object_update_request_get(our, j, NULL);
+ if (update->ou_type == OUT_CREATE)
+ conf.loc_flags = LOC_F_NEW;
+ else
+ conf.loc_flags = 0;
- dt_obj = dt_locate(env, dt, &update->ou_fid);
+ dt_obj = dt_locate_at(env, dt, &update->ou_fid,
+ dt->dd_lu_dev.ld_site->ls_top_dev, &conf);
if (IS_ERR(dt_obj))
GOTO(out, rc = PTR_ERR(dt_obj));
if (IS_ERR(dt_obj))
GOTO(out, rc = PTR_ERR(dt_obj));
dt_obj = dt_locate(env, tdtd->tdtd_dt, fid);
if (IS_ERR(dt_obj)) {
rc = PTR_ERR(dt_obj);
dt_obj = dt_locate(env, tdtd->tdtd_dt, fid);
if (IS_ERR(dt_obj)) {
rc = PTR_ERR(dt_obj);
+ if (rc == -EREMCHG)
+ LCONSOLE_WARN("%.16s: hit invalid OI mapping "
+ "for "DFID" during recovering, "
+ "that may because auto scrub is "
+ "disabled on related MDT, and "
+ "will cause recovery failure. "
+ "Please enable auto scrub and "
+ "retry the recovery.\n",
+ tdtd->tdtd_lut->lut_obd->obd_name,
+ PFID(fid));
+
break;
}
sub_dt_obj = dt_object_child(dt_obj);
break;
}
sub_dt_obj = dt_object_child(dt_obj);
done
echo "prepared $(date)."
cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!"
done
echo "prepared $(date)."
cleanup_mount $MOUNT > /dev/null || error "Fail to stop client!"
+
+ # sync local transactions on every MDT
+ do_nodes $(comma_list $(mdts_nodes)) \
+ "$LCTL set_param -n osd*.*MDT*.force_sync=1"
+
+ # wait for a while to cancel update logs after transactions committed.
+ sleep 3
+
+ # sync again to guarantee all things done.
+ do_nodes $(comma_list $(mdts_nodes)) \
+ "$LCTL set_param -n osd*.*MDT*.force_sync=1"
+
for n in $(seq $MDSCOUNT); do
echo "stop mds$n"
stop mds$n > /dev/null || error "Fail to stop MDS$n!"
for n in $(seq $MDSCOUNT); do
echo "stop mds$n"
stop mds$n > /dev/null || error "Fail to stop MDS$n!"
$LCTL set_param fail_val=3 fail_loc=0x190
local n
$LCTL set_param fail_val=3 fail_loc=0x190
local n
for n in $(seq $MDSCOUNT); do
for n in $(seq $MDSCOUNT); do
- stat $DIR/$tdir/mds$n/${tfile}800 ||
- error "(17) Failed to stat mds$n/${tfile}800"
+ stat $DIR/$tdir/mds$n/${tfile}800 &
+ pids[$n]=$!
- scrub_check_status 18 scanning
+ sleep 3
+
+ scrub_check_status 17 scanning
do_nodes $(comma_list $(mdts_nodes)) \
$LCTL set_param fail_loc=0 fail_val=0
do_nodes $(comma_list $(mdts_nodes)) \
$LCTL set_param fail_loc=0 fail_val=0
+ for n in $(seq $MDSCOUNT); do
+ wait ${pids[$n]} || error "(18) Fail to stat mds$n/${tfile}800"
+ done
+
scrub_check_status 19 completed
scrub_check_flags 20 ""
}
scrub_check_status 19 completed
scrub_check_flags 20 ""
}