From 6aee406c84b6b8fddf08b560acfcdf7c13c97e63 Mon Sep 17 00:00:00 2001 From: Lai Siyao Date: Wed, 30 Mar 2022 17:50:22 -0400 Subject: [PATCH] LU-14719 lod: distributed transaction check space Distributed transaction failure may cause file missing or disconnected directories, to avoid failure on disk full, check remote MDT free space before transaction start. The block/inode watermarks in obd_statfs_info are used to check whether MDT has enough free blocks/inodes. Add sanity 230x. Signed-off-by: Lai Siyao Change-Id: I0922e9c8668e8b842d313576bd68b52fa5d434ac Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/47039 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Qian Yingjin Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/lod/lod_dev.c | 59 ++++++++++++++++++++++++++++++++++++++++++- lustre/lod/lod_internal.h | 3 ++- lustre/lod/lproc_lod.c | 32 +++++++++++++++++++++++ lustre/osp/osp_dev.c | 6 +---- lustre/osp/osp_precreate.c | 25 ++++++++++-------- lustre/tests/replay-single.sh | 16 +++++++----- lustre/tests/sanity.sh | 42 ++++++++++++++++++++++++++++++ 7 files changed, 159 insertions(+), 24 deletions(-) diff --git a/lustre/lod/lod_dev.c b/lustre/lod/lod_dev.c index 7a69520..1c72ce4 100644 --- a/lustre/lod/lod_dev.c +++ b/lustre/lod/lod_dev.c @@ -1444,6 +1444,52 @@ static struct thandle *lod_trans_create(const struct lu_env *env, return th; } +/* distributed transaction failure may cause object missing or disconnected + * directories, check space before transaction start. + */ +static int lod_trans_space_check(const struct lu_env *env, + struct thandle *th) +{ + struct lod_thread_info *info = lod_env_info(env); + struct obd_statfs *sfs = &info->lti_osfs; + struct top_thandle *top_th = container_of(th, struct top_thandle, + tt_super); + struct top_multiple_thandle *tmt = top_th->tt_multiple_thandle; + struct sub_thandle *st; + int rc; + + if (likely(!tmt)) + return 0; + + list_for_each_entry(st, &tmt->tmt_sub_thandle_list, st_sub_list) { + struct dt_device *sub_dt; + + if (st->st_sub_th == NULL) + continue; + + if (st->st_sub_th == top_th->tt_master_sub_thandle) + continue; + + sub_dt = st->st_sub_th->th_dev; + rc = dt_statfs(env, sub_dt, sfs); + if (rc) { + CDEBUG(D_INFO, "%s: fail - statfs error: rc = %d\n", + sub_dt->dd_lu_dev.ld_obd->obd_name, rc); + return rc; + } + + if (unlikely(sfs->os_state & + (OS_STATFS_ENOINO | OS_STATFS_ENOSPC))) { + CDEBUG(D_INFO, "%s: fail - target state %x: rc = %d\n", + sub_dt->dd_lu_dev.ld_obd->obd_name, + sfs->os_state, -ENOSPC); + return -ENOSPC; + } + } + + return 0; +} + /** * Implementation of dt_device_operations::dt_trans_start() for LOD * @@ -1455,7 +1501,17 @@ static struct thandle *lod_trans_create(const struct lu_env *env, static int lod_trans_start(const struct lu_env *env, struct dt_device *dt, struct thandle *th) { - return top_trans_start(env, dt2lod_dev(dt)->lod_child, th); + struct lod_device *lod = dt2lod_dev(dt); + + if (lod->lod_dist_txn_check_space) { + int rc; + + rc = lod_trans_space_check(env, th); + if (rc) + return rc; + } + + return top_trans_start(env, lod->lod_child, th); } static int lod_trans_cb_add(struct thandle *th, @@ -1827,6 +1883,7 @@ static int lod_init0(const struct lu_env *env, struct lod_device *lod, lu_tgt_descs_init(&lod->lod_ost_descs, false); lu_qos_rr_init(&lod->lod_mdt_descs.ltd_qos.lq_rr); lu_qos_rr_init(&lod->lod_ost_descs.ltd_qos.lq_rr); + lod->lod_dist_txn_check_space = 1; RETURN(0); diff --git a/lustre/lod/lod_internal.h b/lustre/lod/lod_internal.h index 43a1c09..09863d9 100644 --- a/lustre/lod/lod_internal.h +++ b/lustre/lod/lod_internal.h @@ -115,7 +115,8 @@ struct lod_device { unsigned int lod_recovery_completed:1, lod_initialized:1, lod_lmv_failout:1, - lod_child_got_update_log:1; + lod_child_got_update_log:1, + lod_dist_txn_check_space:1; /* protect ld_active_tgt_count, ltd_active and lod_md_root */ spinlock_t lod_lock; diff --git a/lustre/lod/lproc_lod.c b/lustre/lod/lproc_lod.c index 363b3a7..3130a72 100644 --- a/lustre/lod/lproc_lod.c +++ b/lustre/lod/lproc_lod.c @@ -1098,6 +1098,37 @@ out: } LUSTRE_RW_ATTR(mdt_hash); +static ssize_t dist_txn_check_space_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct dt_device *dt = container_of(kobj, struct dt_device, + dd_kobj); + struct lod_device *lod = dt2lod_dev(dt); + + return scnprintf(buf, PAGE_SIZE, "%d\n", lod->lod_dist_txn_check_space); +} + +static ssize_t dist_txn_check_space_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) +{ + struct dt_device *dt = container_of(kobj, struct dt_device, + dd_kobj); + struct lod_device *lod = dt2lod_dev(dt); + bool val = 0; + int rc; + + rc = kstrtobool(buffer, &val); + if (rc) + return rc; + + lod->lod_dist_txn_check_space = val; + + return count; +} +LUSTRE_RW_ATTR(dist_txn_check_space); + static const struct proc_ops lod_proc_mdt_fops = { PROC_OWNER(THIS_MODULE) .proc_open = lod_mdts_seq_open, @@ -1319,6 +1350,7 @@ static struct attribute *lod_attrs[] = { &lustre_attr_mdt_qos_prio_free.attr, &lustre_attr_mdt_qos_threshold_rr.attr, &lustre_attr_mdt_hash.attr, + &lustre_attr_dist_txn_check_space.attr, NULL, }; diff --git a/lustre/osp/osp_dev.c b/lustre/osp/osp_dev.c index a8cd2e5..1fae9ab 100644 --- a/lustre/osp/osp_dev.c +++ b/lustre/osp/osp_dev.c @@ -795,9 +795,6 @@ static int osp_statfs(const struct lu_env *env, struct dt_device *dev, info->os_reserved_mb_high = d->opd_reserved_mb_high; } - if (d->opd_pre == NULL) - RETURN(0); - CDEBUG(D_OTHER, "%s: %llu blocks, %llu free, %llu avail, " "%u bsize, %u reserved mb low, %u reserved mb high, " "%llu files, %llu free files\n", d->opd_obd->obd_name, @@ -805,8 +802,7 @@ static int osp_statfs(const struct lu_env *env, struct dt_device *dev, d->opd_reserved_mb_low, d->opd_reserved_mb_high, sfs->os_files, sfs->os_ffree); - - if (info && !info->os_enable_pre) + if (d->opd_pre == NULL || (info && !info->os_enable_pre)) RETURN(0); /* diff --git a/lustre/osp/osp_precreate.c b/lustre/osp/osp_precreate.c index 672cd82..94c315b 100644 --- a/lustre/osp/osp_precreate.c +++ b/lustre/osp/osp_precreate.c @@ -161,7 +161,7 @@ static int osp_statfs_interpret(const struct lu_env *env, if (d->opd_pre) osp_pre_update_status_msfs(d, msfs, 0); else - d->opd_statfs = *msfs; + osp_pre_update_msfs(d, msfs); /* schedule next update */ maxage_ns = d->opd_statfs_maxage * NSEC_PER_SEC; @@ -171,12 +171,13 @@ static int osp_statfs_interpret(const struct lu_env *env, d->opd_statfs_update_in_progress = 0; sfs = &d->opd_statfs; - CDEBUG(D_CACHE, "%s (%p): %llu blocks, %llu free, %llu avail, " - "%u bsize, %u reserved mb low, %u reserved mb high," - "%llu files, %llu free files\n", d->opd_obd->obd_name, d, - sfs->os_blocks, sfs->os_bfree, sfs->os_bavail, sfs->os_bsize, - d->opd_reserved_mb_low, d->opd_reserved_mb_high, - sfs->os_files, sfs->os_ffree); + CDEBUG(D_CACHE, + "%s (%p): %llu blocks, %llu free, %llu avail, %u bsize, %u reserved mb low, %u reserved mb high, %u reserved ino low, %u reserved ino high, %llu files, %llu free files %#x\n", + d->opd_obd->obd_name, d, sfs->os_blocks, sfs->os_bfree, + sfs->os_bavail, sfs->os_bsize, d->opd_reserved_mb_low, + d->opd_reserved_mb_high, d->opd_reserved_ino_low, + d->opd_reserved_ino_high, sfs->os_files, sfs->os_ffree, + sfs->os_state); RETURN(0); out: @@ -1062,8 +1063,8 @@ static void osp_pre_update_msfs(struct osp_device *d, struct obd_statfs *msfs) if (unlikely(d->opd_reserved_ino_high == 0 && d->opd_reserved_ino_low == 0)) { - /* Use ~0.1% by default to disallow distributed transactions, - * and ~0.2% to allow, set both watermark + /* Use ~0.0001% by default to disallow distributed transactions, + * and ~0.0002% to allow, set both watermark */ spin_lock(&d->opd_pre_lock); if (d->opd_reserved_ino_high == 0 && @@ -1095,7 +1096,10 @@ static void osp_pre_update_msfs(struct osp_device *d, struct obd_statfs *msfs) d->opd_obd->obd_name, msfs->os_blocks, msfs->os_bfree, msfs->os_bavail, available_mb, d->opd_reserved_mb_high, msfs->os_files, msfs->os_ffree, msfs->os_state, - d->opd_pre_status); + d->opd_pre ? d->opd_pre_status : 0); + + if (!d->opd_pre) + goto update; if (msfs->os_state & (OS_STATFS_ENOINO | OS_STATFS_ENOSPC)) { d->opd_pre_status = -ENOSPC; @@ -1128,6 +1132,7 @@ static void osp_pre_update_msfs(struct osp_device *d, struct obd_statfs *msfs) msfs->os_state |= OS_STATFS_NOPRECREATE; /* else don't clear flags in new msfs->os_state sent from OST */ +update: /* copy only new statfs state to make it visible to MDS threads */ if (&d->opd_statfs != msfs) d->opd_statfs = *msfs; diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index a0ac037ea..47989b8 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -4159,23 +4159,25 @@ test_111f() { run_test 111f "DNE: unlink striped dir, uncommit on MDT1, fail MDT1/MDT2" test_111g() { - [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0 - [[ "$MDS1_VERSION" -ge $(version_code 2.7.56) ]] || + (( $MDSCOUNT >= 2 )) || skip "needs >= 2 MDTs" + (( $MDS1_VERSION -ge $(version_code 2.7.56) )) || skip "Need MDS version at least 2.7.56" - ([ $FAILURE_MODE == "HARD" ] && - [ "$(facet_host mds1)" == "$(facet_host mds2)" ]) && - skip "MDTs needs to be on diff hosts for HARD fail mode" && - return 0 + ([ $FAILURE_MODE != "HARD" ] || + [ "$(facet_host mds1)" != "$(facet_host mds2)" ]) || + skip "MDTs needs to be on diff hosts for HARD fail mode" + start_full_debug_logging mkdir -p $DIR/$tdir $LFS mkdir -i1 -c2 $DIR/$tdir/striped_dir + $LFS df -i replay_barrier mds1 replay_barrier mds2 rm -rf $DIR/$tdir/striped_dir fail mds1,mds2 $CHECKSTAT -t dir $DIR/$tdir/striped_dir && - error "striped dir still exists" + error "striped dir still exists" + stop_full_debug_logging return 0 } run_test 111g "DNE: unlink striped dir, fail MDT1/MDT2" diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 119e91c..ff60160 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -21027,6 +21027,48 @@ test_230w() { } run_test 230w "non-recursive mode dir migration" +test_230x() { + (( MDSCOUNT > 1 )) || skip "needs >= 2 MDTs" + (( MDS1_VERSION >= $(version_code 2.15.0) )) || + skip "Need MDS version at least 2.15.0" + + mkdir -p $DIR/$tdir || error "mkdir failed" + createmany -d $DIR/$tdir/sub 100 || error "createmany failed" + + local mdt_name=$(mdtname_from_index 0) + local low=$(do_facet mds2 $LCTL get_param -n \ + osp.*$mdt_name-osp-MDT0001.reserved_ino_low) + local high=$(do_facet mds2 $LCTL get_param -n \ + osp.*$mdt_name-osp-MDT0001.reserved_ino_high) + local ffree=$($LFS df -i $MOUNT | awk "/$mdt_name/ { print \$4 }") + local maxage=$(do_facet mds2 $LCTL get_param -n \ + osp.*$mdt_name-osp-MDT0001.maxage) + + stack_trap "do_facet mds2 $LCTL set_param -n \ + osp.*$mdt_name-osp-MDT0001.reserved_ino_low=$low \ + osp.*$mdt_name-osp-MDT0001.reserved_ino_high=$high" EXIT + stack_trap "do_facet mds2 $LCTL set_param -n \ + osp.*$mdt_name-osp-MDT0001.maxage=$maxage" EXIT + + do_facet mds2 $LCTL set_param -n \ + osp.*$mdt_name-osp-MDT0001.reserved_ino_low=$((ffree + 1)) + do_facet mds2 $LCTL set_param -n osp.*$mdt_name-osp-MDT0001.maxage=1 + sleep 4 + $LFS migrate -m 1 -c $MDSCOUNT $DIR/$tdir && + error "migrate $tdir should fail" + + do_facet mds2 $LCTL set_param -n \ + osp.*$mdt_name-osp-MDT0001.reserved_ino_low=$low + do_facet mds2 $LCTL set_param -n \ + osp.*$mdt_name-osp-MDT0001.reserved_ino_high=$high + sleep 4 + $LFS migrate -m 1 -c $MDSCOUNT $DIR/$tdir || + error "migrate failed" + (( $($LFS getdirstripe -c $DIR/$tdir) == $MDSCOUNT )) || + error "$tdir stripe count mismatch" +} +run_test 230x "dir migration check space" + test_231a() { # For simplicity this test assumes that max_pages_per_rpc -- 1.8.3.1