From bb5d81ea95502fb5709e176b561b70aa5280ee07 Mon Sep 17 00:00:00 2001 From: Vladimir Saveliev Date: Fri, 19 Mar 2021 15:08:47 +0300 Subject: [PATCH] LU-14543 target: prevent overflowing of tgd->tgd_tot_granted If tgd->tgd_tot_granted < ted->ted_grant then there should not be: tgd->tgd_tot_granted -= ted->ted_grant; which breaks tgd->tgd_tot_granted. In case of obvious ted->ted_grant damage, recalculate tgd->tgd_tot_granted using list of exports. The same change is made for tgd->tgd_tot_dirty. This patch also adds sanity check for exp->exp_target_data.ted_grant increase in tgt_grant_alloc() to catch grant counting corruption as soon as it happened. By default, the detected corruption is CERROR()-ed, if needed that can be switched to LBUG() using lctl set_param *.*.lbug_on_grant_miscount. test-framework.sh:init_param_vars() enables LBUG(). Fixes: af2d3ac30e ("LU-11939 tgt: Do not assert during grant cleanup") Change-Id: I36ba7496f7b72b4881e98c06ec254a8eefd4c13f Signed-off-by: Vladimir Saveliev Cray-bug-id: LUS-9875 Reviewed-on: https://review.whamcloud.com/42129 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Alexander Boyko Reviewed-by: Oleg Drokin --- lustre/include/lu_target.h | 9 +++- lustre/mdt/mdt_lproc.c | 2 + lustre/ofd/lproc_ofd.c | 2 + lustre/target/tgt_grant.c | 102 +++++++++++++++++++++++++++++++++++------ lustre/target/tgt_main.c | 1 + lustre/tests/test-framework.sh | 3 +- 6 files changed, 103 insertions(+), 16 deletions(-) diff --git a/lustre/include/lu_target.h b/lustre/include/lu_target.h index 880bbb1..0711a2f 100644 --- a/lustre/include/lu_target.h +++ b/lustre/include/lu_target.h @@ -134,7 +134,9 @@ struct tg_grants_data { int tgd_tot_granted_clients; /* shall we grant space to clients not * supporting OBD_CONNECT_GRANT_PARAM? */ - int tgd_grant_compat_disable; + unsigned int tgd_grant_compat_disable:1, + /* if 1 then LBUG on grant miscount, CERROR otherwise */ + tgd_lbug_on_grant_miscount:1; /* protect all statfs-related counters */ spinlock_t tgd_osfs_lock; time64_t tgd_osfs_age; @@ -550,6 +552,11 @@ ssize_t grant_compat_disable_show(struct kobject *kobj, struct attribute *attr, ssize_t grant_compat_disable_store(struct kobject *kobj, struct attribute *attr, const char *buffer, size_t count); +ssize_t lbug_on_grant_miscount_show(struct kobject *kobj, + struct attribute *attr, char *buf); +ssize_t lbug_on_grant_miscount_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count); #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 16, 53, 0) ssize_t sync_lock_cancel_show(struct kobject *kobj, struct attribute *attr, char *buf); diff --git a/lustre/mdt/mdt_lproc.c b/lustre/mdt/mdt_lproc.c index 1d09ad4..6798d8a 100644 --- a/lustre/mdt/mdt_lproc.c +++ b/lustre/mdt/mdt_lproc.c @@ -1378,6 +1378,7 @@ LUSTRE_RO_ATTR(tot_dirty); LUSTRE_RO_ATTR(tot_granted); LUSTRE_RO_ATTR(tot_pending); LUSTRE_RW_ATTR(grant_compat_disable); +LUSTRE_RW_ATTR(lbug_on_grant_miscount); LUSTRE_RO_ATTR(instance); LUSTRE_RO_ATTR(num_exports); @@ -1387,6 +1388,7 @@ static struct attribute *mdt_attrs[] = { &lustre_attr_tot_granted.attr, &lustre_attr_tot_pending.attr, &lustre_attr_grant_compat_disable.attr, + &lustre_attr_lbug_on_grant_miscount.attr, &lustre_attr_instance.attr, &lustre_attr_recovery_time_hard.attr, &lustre_attr_recovery_time_soft.attr, diff --git a/lustre/ofd/lproc_ofd.c b/lustre/ofd/lproc_ofd.c index e0bdfed..3d6eb4b 100644 --- a/lustre/ofd/lproc_ofd.c +++ b/lustre/ofd/lproc_ofd.c @@ -1035,6 +1035,7 @@ LUSTRE_RO_ATTR(tot_dirty); LUSTRE_RO_ATTR(tot_granted); LUSTRE_RO_ATTR(tot_pending); LUSTRE_RW_ATTR(grant_compat_disable); +LUSTRE_RW_ATTR(lbug_on_grant_miscount); LUSTRE_RO_ATTR(instance); LUSTRE_RO_ATTR(num_exports); @@ -1109,6 +1110,7 @@ static struct attribute *ofd_attrs[] = { &lustre_attr_tot_granted.attr, &lustre_attr_tot_pending.attr, &lustre_attr_grant_compat_disable.attr, + &lustre_attr_lbug_on_grant_miscount.attr, &lustre_attr_instance.attr, &lustre_attr_recovery_time_hard.attr, &lustre_attr_recovery_time_soft.attr, diff --git a/lustre/target/tgt_grant.c b/lustre/target/tgt_grant.c index 72416e0..2e1df78 100644 --- a/lustre/target/tgt_grant.c +++ b/lustre/target/tgt_grant.c @@ -884,6 +884,7 @@ static void tgt_grant_check(const struct lu_env *env, struct obd_export *exp, * have * \param[in] left remaining free space with granted space taken * out + * \param[in] chunk grant allocation unit * \param[in] conservative if set to true, the server should be cautious * and limit how much space is granted back to the * client. Otherwise, the server should try hard to @@ -964,12 +965,13 @@ static long tgt_grant_alloc(struct obd_export *exp, u64 curgrant, tgd->tgd_tot_granted += grant; ted->ted_grant += grant; - if (ted->ted_grant < 0) { + if (unlikely(ted->ted_grant < 0 || ted->ted_grant > want + chunk)) { CERROR("%s: cli %s/%p grant %ld want %llu current %llu\n", obd->obd_name, exp->exp_client_uuid.uuid, exp, ted->ted_grant, want, curgrant); spin_unlock(&tgd->tgd_grant_lock); - LBUG(); + if (tgd->tgd_lbug_on_grant_miscount) + LBUG(); } CDEBUG(D_CACHE, @@ -1092,13 +1094,35 @@ void tgt_grant_discard(struct obd_export *exp) tgd = &lut->lut_tgd; spin_lock(&tgd->tgd_grant_lock); - if (tgd->tgd_tot_granted < ted->ted_grant) { - CERROR("%s: tot_granted %llu < cli %s/%p ted_grant %ld\n", - obd->obd_name, tgd->tgd_tot_granted, - exp->exp_client_uuid.uuid, exp, ted->ted_grant); + if (unlikely(tgd->tgd_tot_granted < ted->ted_grant || + tgd->tgd_tot_dirty < ted->ted_dirty)) { + struct obd_export *e; + u64 ttg = 0; + u64 ttd = 0; + + list_for_each_entry(e, &obd->obd_exports, exp_obd_chain) { + LASSERT(exp != e); + ttg += e->exp_target_data.ted_grant; + ttg += e->exp_target_data.ted_pending; + ttd += e->exp_target_data.ted_dirty; + } + if (tgd->tgd_tot_granted < ted->ted_grant) + CERROR("%s: cli %s/%p: tot_granted %llu < ted_grant %ld, corrected to %llu", + obd->obd_name, exp->exp_client_uuid.uuid, exp, + tgd->tgd_tot_granted, ted->ted_grant, ttg); + if (tgd->tgd_tot_dirty < ted->ted_dirty) + CERROR("%s: cli %s/%p: tot_dirty %llu < ted_dirty %ld, corrected to %llu", + obd->obd_name, exp->exp_client_uuid.uuid, exp, + tgd->tgd_tot_dirty, ted->ted_dirty, ttd); + tgd->tgd_tot_granted = ttg; + tgd->tgd_tot_dirty = ttd; + } else { + tgd->tgd_tot_granted -= ted->ted_grant; + tgd->tgd_tot_dirty -= ted->ted_dirty; } - tgd->tgd_tot_granted -= ted->ted_grant; ted->ted_grant = 0; + ted->ted_dirty = 0; + if (tgd->tgd_tot_pending < ted->ted_pending) { CERROR("%s: tot_pending %llu < cli %s/%p ted_pending %ld\n", obd->obd_name, tgd->tgd_tot_pending, @@ -1106,13 +1130,6 @@ void tgt_grant_discard(struct obd_export *exp) } /* tgd_tot_pending is handled in tgt_grant_commit as bulk * commmits */ - if (tgd->tgd_tot_dirty < ted->ted_dirty) { - CERROR("%s: tot_dirty %llu < cli %s/%p ted_dirty %ld\n", - obd->obd_name, tgd->tgd_tot_dirty, - exp->exp_client_uuid.uuid, exp, ted->ted_dirty); - } - tgd->tgd_tot_dirty -= ted->ted_dirty; - ted->ted_dirty = 0; spin_unlock(&tgd->tgd_grant_lock); } EXPORT_SYMBOL(tgt_grant_discard); @@ -1675,3 +1692,60 @@ ssize_t grant_compat_disable_store(struct kobject *kobj, return count; } EXPORT_SYMBOL(grant_compat_disable_store); + +/** + * Show lbug_on_grant_miscount mode. + * + * @kobj kobject embedded in obd_device + * @attr unused + * @buf buf used by sysfs to print out data + * + * Return: string length of @buf output on success + */ +ssize_t lbug_on_grant_miscount_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct tg_grants_data *tgd = &obd->u.obt.obt_lut->lut_tgd; + + return scnprintf(buf, PAGE_SIZE, "%u\n", + tgd->tgd_lbug_on_grant_miscount); +} +EXPORT_SYMBOL(lbug_on_grant_miscount_show); + +/** + * Change lbug on grant miscount mode. + * + * Setting tgd_lbug_on_grant_miscount to 1 makes tgt_alloc_grant() to + * LBUG on apparently wrong ted->ted_grant + * + * @kobj kobject embedded in obd_device + * @attr unused + * @buffer string which represents mode + * 1: use LBUG on grant miscount + * 0: use CERROR on grant miscount + * @count @buffer length + * + * Return: @count on success + * negative number on error + */ +ssize_t lbug_on_grant_miscount_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct tg_grants_data *tgd = &obd->u.obt.obt_lut->lut_tgd; + bool val; + int rc; + + rc = kstrtobool(buffer, &val); + if (rc) + return rc; + + tgd->tgd_lbug_on_grant_miscount = val; + + return count; +} +EXPORT_SYMBOL(lbug_on_grant_miscount_store); diff --git a/lustre/target/tgt_main.c b/lustre/target/tgt_main.c index 76ccece..88e6f94 100644 --- a/lustre/target/tgt_main.c +++ b/lustre/target/tgt_main.c @@ -533,6 +533,7 @@ int tgt_init(const struct lu_env *env, struct lu_target *lut, tgd->tgd_tot_granted = 0; tgd->tgd_tot_pending = 0; tgd->tgd_grant_compat_disable = 0; + tgd->tgd_lbug_on_grant_miscount = 0; /* populate cached statfs data */ osfs = &tgt_th_info(env)->tti_u.osfs; diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index c507425..2bc5b70 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -5343,6 +5343,7 @@ init_param_vars () { (( MDS1_VERSION <= $(version_code 2.13.52) )) || do_nodes $(comma_list $(mdts_nodes)) \ "$LCTL set_param lod.*.mdt_hash=crush" + do_node $(mgs_node) "$LCTL set_param -P *.*.lbug_on_grant_miscount=1" return 0 } @@ -6758,7 +6759,7 @@ check_grant() { # sync all the data and make sure no pending data on server do_nodes $clients sync - clients_up # initiate all idling connections + do_nodes $clients $LFS df # initiate all idling connections # get client grant cli_grant=$(grant_from_clients $clients) -- 1.8.3.1