From 9230561f268f9c3d7f84ac7824d7a1d3769a3dfe Mon Sep 17 00:00:00 2001 From: Alex Zhuravlev Date: Tue, 6 Jun 2017 12:05:57 +0400 Subject: [PATCH] LU-9498 osp: revert till a fix Revert "LU-8367 osp: orphan cleanup do not wait for reserved" LU-9498 was discovered with soak testing, the root cause is unknown at the moment. This reverts commit 1b3028ab142a1f605e6274a6019bb39d89ae8d25. Change-Id: I65016876f3e42345a06194771c9731e28a850adc Signed-off-by: Alex Zhuravlev Reviewed-on: https://review.whamcloud.com/27447 Tested-by: Jenkins Reviewed-by: Lai Siyao Reviewed-by: Niu Yawei Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/include/obd_support.h | 1 - lustre/osp/osp_dev.c | 8 ++- lustre/osp/osp_internal.h | 22 -------- lustre/osp/osp_precreate.c | 118 ++++++++++++++++++++--------------------- lustre/tests/recovery-small.sh | 47 ---------------- 5 files changed, 64 insertions(+), 132 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 0a3813b..433c2f8 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -249,7 +249,6 @@ extern char obd_jobid_var[]; #define OBD_FAIL_MDS_XATTR_REP 0x161 #define OBD_FAIL_MDS_TRACK_OVERFLOW 0x162 #define OBD_FAIL_MDS_LOV_CREATE_RACE 0x163 -#define OBD_FAIL_MDS_OSP_PRECREATE_WAIT 0x164 /* layout lock */ #define OBD_FAIL_MDS_NO_LL_GETATTR 0x170 diff --git a/lustre/osp/osp_dev.c b/lustre/osp/osp_dev.c index 546b6ba..7e3819f 100644 --- a/lustre/osp/osp_dev.c +++ b/lustre/osp/osp_dev.c @@ -747,7 +747,13 @@ static int osp_statfs(const struct lu_env *env, struct dt_device *dev, * how many objects are available for immediate creation */ spin_lock(&d->opd_pre_lock); - sfs->os_fprecreated = osp_objs_precreated(env, d); + LASSERTF(fid_seq(&d->opd_pre_last_created_fid) == + fid_seq(&d->opd_pre_used_fid), + "last_created "DFID", next_fid "DFID"\n", + PFID(&d->opd_pre_last_created_fid), + PFID(&d->opd_pre_used_fid)); + sfs->os_fprecreated = fid_oid(&d->opd_pre_last_created_fid) - + fid_oid(&d->opd_pre_used_fid); sfs->os_fprecreated -= d->opd_pre_reserved; LASSERTF(sfs->os_fprecreated <= OST_MAX_PRECREATE * 2, "last_created "DFID", next_fid "DFID", reserved %llu\n", diff --git a/lustre/osp/osp_internal.h b/lustre/osp/osp_internal.h index 774f37e..49ea455 100644 --- a/lustre/osp/osp_internal.h +++ b/lustre/osp/osp_internal.h @@ -602,28 +602,6 @@ static inline int osp_is_fid_client(struct osp_device *osp) return imp->imp_connect_data.ocd_connect_flags & OBD_CONNECT_FID; } -/** - * Return number of precreated objects - * - * A simple helper to calculate the number of precreated objects on the device. - * - * \param[in] env LU environment provided by the caller - * \param[in] osp OSP device - * - * \retval the number of the precreated objects - */ -static inline int osp_objs_precreated(const struct lu_env *env, - struct osp_device *osp) -{ - /* if pre_used_fid isn't initialized yet, - * then precreated hasn't started and no - * objects have been precreated */ - if (fid_seq(&osp->opd_pre_used_fid) == 0) - return 0; - return osp_fid_diff(&osp->opd_pre_last_created_fid, - &osp->opd_pre_used_fid); -} - struct object_update * update_buffer_get_update(struct object_update_request *request, unsigned int index); diff --git a/lustre/osp/osp_precreate.c b/lustre/osp/osp_precreate.c index c88eeb1..5e0c8c7 100644 --- a/lustre/osp/osp_precreate.c +++ b/lustre/osp/osp_precreate.c @@ -239,6 +239,23 @@ void osp_statfs_need_now(struct osp_device *d) } /** + * Return number of precreated objects + * + * A simple helper to calculate the number of precreated objects on the device. + * + * \param[in] env LU environment provided by the caller + * \param[in] osp OSP device + * + * \retval the number of the precreated objects + */ +static inline int osp_objs_precreated(const struct lu_env *env, + struct osp_device *osp) +{ + return osp_fid_diff(&osp->opd_pre_last_created_fid, + &osp->opd_pre_used_fid); +} + +/** * Check pool of precreated objects is nearly empty * * We should not wait till the pool of the precreated objects is exhausted, @@ -758,7 +775,6 @@ static int osp_precreate_cleanup_orphans(struct lu_env *env, { struct osp_thread_info *osi = osp_env_info(env); struct lu_fid *last_fid = &osi->osi_fid; - struct lu_fid tmp; struct ptlrpc_request *req = NULL; struct obd_import *imp; struct ost_body *body; @@ -771,15 +787,24 @@ static int osp_precreate_cleanup_orphans(struct lu_env *env, /* * wait for local recovery to finish, so we can cleanup orphans - * orphans are all objects since "last used" (assigned). we do not - * block waiting for all reservations as this can lead to a deadlock - * see LU-8972 for the details. + * orphans are all objects since "last used" (assigned), but + * there might be objects reserved and in some cases they won't + * be used. we can't cleanup them till we're sure they won't be + * used. also can't we allow new reservations because they may + * end up getting orphans being cleaned up below. so we block + * new reservations and wait till all reserved objects either + * user or released. */ spin_lock(&d->opd_pre_lock); d->opd_pre_recovering = 1; spin_unlock(&d->opd_pre_lock); - - l_wait_event(d->opd_pre_waitq, d->opd_recovery_completed || + /* + * The locking above makes sure the opd_pre_reserved check below will + * catch all osp_precreate_reserve() calls who find + * "!opd_pre_recovering". + */ + l_wait_event(d->opd_pre_waitq, + (!d->opd_pre_reserved && d->opd_recovery_completed) || !osp_precreate_running(d) || d->opd_got_disconnected, &lwi); if (!osp_precreate_running(d) || d->opd_got_disconnected) @@ -793,7 +818,6 @@ static int osp_precreate_cleanup_orphans(struct lu_env *env, LASSERT(!fid_is_zero(last_fid)); if (fid_oid(&d->opd_last_used_fid) < 2) { /* lastfid looks strange... ask OST */ - LCONSOLE_WARN("%s: refresh last id\n", d->opd_obd->obd_name); rc = osp_get_lastfid_from_ost(env, d); if (rc) GOTO(out, rc); @@ -820,21 +844,7 @@ static int osp_precreate_cleanup_orphans(struct lu_env *env, body->oa.o_flags = OBD_FL_DELORPHAN; body->oa.o_valid = OBD_MD_FLFLAGS | OBD_MD_FLGROUP; - /* cleanup objects upto used+reserved as we do not - * want to block the orphan cleanup procedure */ - spin_lock(&d->opd_pre_lock); - if (fid_seq(&d->opd_pre_used_fid) != 0) { - tmp = d->opd_pre_used_fid; - tmp.f_oid += d->opd_pre_reserved; - /* shrink current precreate window to let reserved - * already objects be created and block new - * precreations */ - d->opd_pre_last_created_fid = tmp; - } else { - tmp = d->opd_last_used_fid; - } - fid_to_ostid(&tmp, &body->oa.o_oi); - spin_unlock(&d->opd_pre_lock); + fid_to_ostid(&d->opd_last_used_fid, &body->oa.o_oi); ptlrpc_request_set_replen(req); @@ -851,9 +861,30 @@ static int osp_precreate_cleanup_orphans(struct lu_env *env, if (body == NULL) GOTO(out, rc = -EPROTO); - /* OST provides us with id new pool starts from in body->oa.o_id */ + /* + * OST provides us with id new pool starts from in body->oa.o_id + */ ostid_to_fid(last_fid, &body->oa.o_oi, d->opd_index); + spin_lock(&d->opd_pre_lock); + diff = osp_fid_diff(&d->opd_last_used_fid, last_fid); + if (diff > 0) { + d->opd_pre_create_count = OST_MIN_PRECREATE + diff; + d->opd_pre_last_created_fid = d->opd_last_used_fid; + } else { + d->opd_pre_create_count = OST_MIN_PRECREATE; + d->opd_pre_last_created_fid = *last_fid; + } + /* + * This empties the pre-creation pool and effectively blocks any new + * reservations. + */ + LASSERT(fid_oid(&d->opd_pre_last_created_fid) <= + LUSTRE_DATA_SEQ_MAX_WIDTH); + d->opd_pre_used_fid = d->opd_pre_last_created_fid; + d->opd_pre_create_slow = 0; + spin_unlock(&d->opd_pre_lock); + CDEBUG(D_HA, "%s: Got last_id "DFID" from OST, last_created "DFID "last_used is "DFID"\n", d->opd_obd->obd_name, PFID(last_fid), PFID(&d->opd_pre_last_created_fid), PFID(&d->opd_last_used_fid)); @@ -879,41 +910,12 @@ out: } else { wake_up(&d->opd_pre_user_waitq); } - GOTO(ret, rc); - } - - spin_lock(&d->opd_pre_lock); - d->opd_pre_recovering = 0; - spin_unlock(&d->opd_pre_lock); - - /* now we wait until all reserved objects are consumed or released, - * so that the window doesn't change. otherwise we can get objects - * with wrong FIDs */ - l_wait_event(d->opd_pre_waitq, d->opd_pre_reserved == 0 || - !osp_precreate_running(d) || d->opd_got_disconnected, &lwi); - if (!osp_precreate_running(d)) - GOTO(ret, rc = 0); - - spin_lock(&d->opd_pre_lock); - diff = osp_fid_diff(&d->opd_last_used_fid, last_fid); - if (diff > 0) { - d->opd_pre_create_count = OST_MIN_PRECREATE + diff; - d->opd_pre_last_created_fid = d->opd_last_used_fid; } else { - d->opd_pre_create_count = OST_MIN_PRECREATE; - d->opd_pre_last_created_fid = *last_fid; + spin_lock(&d->opd_pre_lock); + d->opd_pre_recovering = 0; + spin_unlock(&d->opd_pre_lock); } - /* - * This empties the pre-creation pool and effectively blocks any new - * reservations. - */ - LASSERT(fid_oid(&d->opd_pre_last_created_fid) <= - LUSTRE_DATA_SEQ_MAX_WIDTH); - d->opd_pre_used_fid = d->opd_pre_last_created_fid; - d->opd_pre_create_slow = 0; - spin_unlock(&d->opd_pre_lock); -ret: RETURN(rc); } @@ -1369,12 +1371,6 @@ int osp_precreate_reserve(const struct lu_env *env, struct osp_device *d) if (d->opd_pre_max_create_count == 0) RETURN(-ENOBUFS); - if (OBD_FAIL_PRECHECK(OBD_FAIL_MDS_OSP_PRECREATE_WAIT)) { - if (d->opd_index == cfs_fail_val) - OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_OSP_PRECREATE_WAIT, - obd_timeout); - } - /* * wait till: * - preallocation is done diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 4ae5c42..e8cf128 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -2685,53 +2685,6 @@ test_133() { } run_test 133 "don't fail on flock resend" -test_134() { - local file1 - local pid1 - local pid2 - local i - - [ $OSTCOUNT -lt 2 ] && skip "needs >= 2 OSTs" && return 0 - [[ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.8.59) ]] && - skip "Need MDS version at least 2.8.59" && return - - test_mkdir -p $DIR/$tdir - file1="$DIR/$tdir/file1" - file2="$DIR/$tdir/file2" - -#define OBD_FAIL_MDS_OSP_PRECREATE_WAIT 0x164 - # reserve stripe on ost1, block on ost2 - do_facet $SINGLEMDS \ - "lctl set_param fail_loc=0x80000164 fail_val=1" - $SETSTRIPE -c 2 -o 0,1 $file1 & - pid1=$! - sleep 1 - - # initiate recovery with orphan cleanup on ost1 - facet_failover ost1 - - # when OST1 recovery is over, the first setstripe should still - # have the object reserved, but that should not block new creates - # on OST1 - $SETSTRIPE -c 1 -o 0 $file2 & - pid2=$! - for ((i=0;i<$((TIMEOUT/2));i++)); do - if ! stat /proc/$pid2 >&/dev/null; then - echo "DONE!" - break - fi - echo "WAITING ..." - sleep 1 - done - if let "i >= (TIMEOUT/2)"; then - error "create seem to get blocked by recovery" - fi - wait $pid1 - wait $pid2 - return 0 -} -run_test 134 "MDT<>OST recovery don't block multistripe file creation" - complete $SECONDS check_and_cleanup_lustre exit_status -- 1.8.3.1