From 5cbc184fbc10fd3d6c23ec3b8b687ffb34a64138 Mon Sep 17 00:00:00 2001 From: Vladimir Saveliev Date: Fri, 19 Mar 2021 15:08:47 +0300 Subject: [PATCH] LU-14543 target: prevent overflowing of tgd->tgd_tot_granted If tgd->tgd_tot_granted < ted->ted_grant then there should not be: tgd->tgd_tot_granted -= ted->ted_grant; which breaks tgd->tgd_tot_granted. In case of obvious ted->ted_grant damage, recalculate tgd->tgd_tot_granted using list of exports. The same change is made for tgd->tgd_tot_dirty. This patch also adds sanity check for exp->exp_target_data.ted_grant increase in tgt_grant_alloc() to catch grant counting corruption as soon as it happened. Lustre-change: https://review.whamcloud.com/45474 Lustre-commit: bb5d81ea95502fb5709e176b561b70aa5280ee07 Fixes: af2d3ac30e ("LU-11939 tgt: Do not assert during grant cleanup") Change-Id: I36ba7496f7b72b4881e98c06ec254a8eefd4c13f Signed-off-by: Vladimir Saveliev Reviewed-by: Andreas Dilger Reviewed-by: Alexander Boyko Reviewed-by: Oleg Drokin Signed-off-by: Mikhail Pershin Reviewed-on: https://review.whamcloud.com/45490 Tested-by: jenkins Tested-by: Maloo --- lustre/include/lu_target.h | 2 +- lustre/target/tgt_grant.c | 43 ++++++++++++++++++++++++++++-------------- lustre/tests/test-framework.sh | 2 +- 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/lustre/include/lu_target.h b/lustre/include/lu_target.h index ddd18fc..eb0aa75 100644 --- a/lustre/include/lu_target.h +++ b/lustre/include/lu_target.h @@ -133,7 +133,7 @@ struct tg_grants_data { int tgd_tot_granted_clients; /* shall we grant space to clients not * supporting OBD_CONNECT_GRANT_PARAM? */ - int tgd_grant_compat_disable; + unsigned int tgd_grant_compat_disable:1; /* protect all statfs-related counters */ spinlock_t tgd_osfs_lock; time64_t tgd_osfs_age; diff --git a/lustre/target/tgt_grant.c b/lustre/target/tgt_grant.c index e522c0e..e25f735 100644 --- a/lustre/target/tgt_grant.c +++ b/lustre/target/tgt_grant.c @@ -884,6 +884,7 @@ static void tgt_grant_check(const struct lu_env *env, struct obd_export *exp, * have * \param[in] left remaining free space with granted space taken * out + * \param[in] chunk grant allocation unit * \param[in] conservative if set to true, the server should be cautious * and limit how much space is granted back to the * client. Otherwise, the server should try hard to @@ -964,12 +965,11 @@ static long tgt_grant_alloc(struct obd_export *exp, u64 curgrant, tgd->tgd_tot_granted += grant; ted->ted_grant += grant; - if (ted->ted_grant < 0) { + if (unlikely(ted->ted_grant < 0 || ted->ted_grant > want + chunk)) { CERROR("%s: cli %s/%p grant %ld want %llu current %llu\n", obd->obd_name, exp->exp_client_uuid.uuid, exp, ted->ted_grant, want, curgrant); spin_unlock(&tgd->tgd_grant_lock); - LBUG(); } CDEBUG(D_CACHE, @@ -1092,13 +1092,35 @@ void tgt_grant_discard(struct obd_export *exp) tgd = &lut->lut_tgd; spin_lock(&tgd->tgd_grant_lock); - if (tgd->tgd_tot_granted < ted->ted_grant) { - CERROR("%s: tot_granted %llu < cli %s/%p ted_grant %ld\n", - obd->obd_name, tgd->tgd_tot_granted, - exp->exp_client_uuid.uuid, exp, ted->ted_grant); + if (unlikely(tgd->tgd_tot_granted < ted->ted_grant || + tgd->tgd_tot_dirty < ted->ted_dirty)) { + struct obd_export *e; + u64 ttg = 0; + u64 ttd = 0; + + list_for_each_entry(e, &obd->obd_exports, exp_obd_chain) { + LASSERT(exp != e); + ttg += e->exp_target_data.ted_grant; + ttg += e->exp_target_data.ted_pending; + ttd += e->exp_target_data.ted_dirty; + } + if (tgd->tgd_tot_granted < ted->ted_grant) + CERROR("%s: cli %s/%p: tot_granted %llu < ted_grant %ld, corrected to %llu", + obd->obd_name, exp->exp_client_uuid.uuid, exp, + tgd->tgd_tot_granted, ted->ted_grant, ttg); + if (tgd->tgd_tot_dirty < ted->ted_dirty) + CERROR("%s: cli %s/%p: tot_dirty %llu < ted_dirty %ld, corrected to %llu", + obd->obd_name, exp->exp_client_uuid.uuid, exp, + tgd->tgd_tot_dirty, ted->ted_dirty, ttd); + tgd->tgd_tot_granted = ttg; + tgd->tgd_tot_dirty = ttd; + } else { + tgd->tgd_tot_granted -= ted->ted_grant; + tgd->tgd_tot_dirty -= ted->ted_dirty; } - tgd->tgd_tot_granted -= ted->ted_grant; ted->ted_grant = 0; + ted->ted_dirty = 0; + if (tgd->tgd_tot_pending < ted->ted_pending) { CERROR("%s: tot_pending %llu < cli %s/%p ted_pending %ld\n", obd->obd_name, tgd->tgd_tot_pending, @@ -1106,13 +1128,6 @@ void tgt_grant_discard(struct obd_export *exp) } /* tgd_tot_pending is handled in tgt_grant_commit as bulk * commmits */ - if (tgd->tgd_tot_dirty < ted->ted_dirty) { - CERROR("%s: tot_dirty %llu < cli %s/%p ted_dirty %ld\n", - obd->obd_name, tgd->tgd_tot_dirty, - exp->exp_client_uuid.uuid, exp, ted->ted_dirty); - } - tgd->tgd_tot_dirty -= ted->ted_dirty; - ted->ted_dirty = 0; spin_unlock(&tgd->tgd_grant_lock); } EXPORT_SYMBOL(tgt_grant_discard); diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 66fce64..ef292ad 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -6341,7 +6341,7 @@ check_grant() { # sync all the data and make sure no pending data on server do_nodes $clients sync - clients_up # initiate all idling connections + do_nodes $clients $LFS df # initiate all idling connections # get client grant client_grant=$(do_nodes $clients \ -- 1.8.3.1