From dc52a88cde1e7cea093b25fc9a15509fe0ac527a Mon Sep 17 00:00:00 2001 From: Sergey Cheremencev Date: Wed, 24 Oct 2018 13:23:43 +0300 Subject: [PATCH] LU-11765 ofd: return EAGAIN during 1st CLEANUP_ORPHAN During the 1st CLEANUP_ORPHAN after failover some objects could absent - they haven't been recreated yet. Issue exists when MDS last_id much grater than OST last_id and ofd should recreate thousands of objects. Some of these objects could be assigned to a FID and requested by client through glimpse RPC. Thus if object is not found return EAGAIN instead of ENOENT during the 1st CLEANUP_ORPHAN. Patch is also adding a test to reproduce the issue. Test adds a delay to osd_trans_commit_cb() causing large number OST objects not written to the disk after failover. And checks that all objects have been successfully recreated after failover. The test works only with FAILURE_MODE=HARD option. Cray-bug-id: LUS-6414 Change-Id: Ia6899b4c1c35e1681f49faf1cb93a501ad159ec2 Signed-off-by: Sergey Cheremencev Reviewed-on: https://es-gerrit.dev.cray.com/154151 Reviewed-by: Alexander Boyko Reviewed-by: Andriy Skulysh Tested-by: Alexander Lezhoev Reviewed-on: https://review.whamcloud.com/33836 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Alexandr Boyko Reviewed-by: Oleg Drokin --- lustre/include/obd_support.h | 1 + lustre/ofd/ofd_dev.c | 2 ++ lustre/ofd/ofd_fs.c | 1 + lustre/ofd/ofd_internal.h | 3 ++- lustre/ofd/ofd_lvb.c | 18 +++++++++++++++- lustre/osd-ldiskfs/osd_handler.c | 1 + lustre/tests/replay-ost-single.sh | 43 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 67 insertions(+), 2 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index d958bae..ce83162 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -341,6 +341,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_OST_INTEGRITY_FAULT 0x243 #define OBD_FAIL_OST_INTEGRITY_CMP 0x244 #define OBD_FAIL_OST_DISCONNECT_DELAY 0x245 +#define OBD_FAIL_OST_DELAY_TRANS 0x246 #define OBD_FAIL_LDLM 0x300 #define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301 diff --git a/lustre/ofd/ofd_dev.c b/lustre/ofd/ofd_dev.c index 03f8893..af33796 100644 --- a/lustre/ofd/ofd_dev.c +++ b/lustre/ofd/ofd_dev.c @@ -1715,6 +1715,8 @@ static int ofd_create_hdl(struct tgt_session_info *tsi) EXIT; ofd_counter_incr(exp, LPROC_OFD_STATS_CREATE, tsi->tsi_jobid, 1); + if (unlikely(!oseq->os_last_id_synced)) + oseq->os_last_id_synced = 1; out: mutex_unlock(&oseq->os_create_lock); out_nolock: diff --git a/lustre/ofd/ofd_fs.c b/lustre/ofd/ofd_fs.c index 8be08a3..f8c3e2d 100644 --- a/lustre/ofd/ofd_fs.c +++ b/lustre/ofd/ofd_fs.c @@ -407,6 +407,7 @@ struct ofd_seq *ofd_seq_load(const struct lu_env *env, struct ofd_device *ofd, mutex_init(&oseq->os_create_lock); spin_lock_init(&oseq->os_last_oid_lock); ostid_set_seq(&oseq->os_oi, seq); + oseq->os_last_id_synced = 0; atomic_set(&oseq->os_refc, 1); diff --git a/lustre/ofd/ofd_internal.h b/lustre/ofd/ofd_internal.h index c76124b..afc210b9 100644 --- a/lustre/ofd/ofd_internal.h +++ b/lustre/ofd/ofd_internal.h @@ -106,7 +106,8 @@ struct ofd_seq { struct mutex os_create_lock; atomic_t os_refc; struct dt_object *os_lastid_obj; - unsigned long os_destroys_in_progress:1; + unsigned long os_destroys_in_progress:1, + os_last_id_synced:1; }; struct ofd_device { diff --git a/lustre/ofd/ofd_lvb.c b/lustre/ofd/ofd_lvb.c index 7b46388..7c21683 100644 --- a/lustre/ofd/ofd_lvb.c +++ b/lustre/ofd/ofd_lvb.c @@ -126,8 +126,24 @@ static int ofd_lvbo_init(const struct lu_env *env, struct ldlm_resource *res) GOTO(out_lvb, rc = PTR_ERR(fo)); rc = ofd_attr_get(env, fo, &info->fti_attr); - if (rc) + if (rc) { + struct ofd_seq *oseq; + __u64 seq; + + /* Object could be recreated during the first + * CLEANUP_ORPHAN request. */ + if (rc == -ENOENT) { + seq = fid_seq(&info->fti_fid); + oseq = ofd_seq_load(env, ofd, fid_seq_is_idif(seq) ? + FID_SEQ_OST_MDT0 : seq); + if (!IS_ERR_OR_NULL(oseq)) { + if (!oseq->os_last_id_synced) + rc = -EAGAIN; + ofd_seq_put(env, oseq); + } + } GOTO(out_obj, rc); + } lvb->lvb_size = info->fti_attr.la_size; lvb->lvb_blocks = info->fti_attr.la_blocks; diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index f1f50d7..adc7b4a 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -1707,6 +1707,7 @@ static void osd_trans_commit_cb(struct super_block *sb, if (error) CERROR("transaction @0x%p commit error: %d\n", th, error); + OBD_FAIL_TIMEOUT(OBD_FAIL_OST_DELAY_TRANS, 40); /* call per-transaction callbacks if any */ list_for_each_entry_safe(dcb, tmp, &oh->ot_commit_dcb_list, dcb_linkage) { diff --git a/lustre/tests/replay-ost-single.sh b/lustre/tests/replay-ost-single.sh index f7d4d4a..9764e6b 100755 --- a/lustre/tests/replay-ost-single.sh +++ b/lustre/tests/replay-ost-single.sh @@ -439,6 +439,49 @@ test_10() { } run_test 10 "conflicting PW & PR locks on a client" +test_12() { + [ $FAILURE_MODE != "HARD" ] && + skip "Test needs FAILURE_MODE HARD" && return 0 + remote_ost || { skip "need remote OST" && return 0; } + + local tmp=$TMP/$tdir + local dir=$DIR/$tdir + declare -a pids + + + mkdir -p $tmp || error "can't create $tmp" + mkdir -p $dir || error "can't create $dir" + + $LFS setstripe -c 1 -i 0 $dir + + for i in `seq 1 10`; do mkdir $dir/d$i; done + + #define OBD_FAIL_OST_DELAY_TRANS 0x245 + do_facet ost1 "$LCTL set_param fail_loc=0x245" || + error "can't set fail_loc" + + for i in `seq 1 10`; + do + createmany -o $dir/d$i/$(openssl rand -base64 12) 500 & + pids+=($!) + done + echo "Waiting createmany pids" + wait ${pids[@]} + + ls -lR $dir > $tmp/ls_r_out 2>&1& + local ls_pid=$! + + facet_failover ost1 + + echo "starting wait for ls -l" + wait $ls_pid + grep "?\|No such file or directory" $tmp/ls_r_out && + error "Found file without object on OST" + rm -rf $tmp + rm -rf $dir +} +run_test 12 "check stat after OST failover" + complete $SECONDS check_and_cleanup_lustre exit_status -- 1.8.3.1