Whamcloud - gitweb
LU-14543 target: prevent overflowing of tgd->tgd_tot_granted 29/42129/14
authorVladimir Saveliev <vlaidimir.saveliev@hpe.com>
Fri, 19 Mar 2021 12:08:47 +0000 (15:08 +0300)
committerOleg Drokin <green@whamcloud.com>
Thu, 30 Sep 2021 13:27:01 +0000 (13:27 +0000)
If tgd->tgd_tot_granted < ted->ted_grant then there should not be:
   tgd->tgd_tot_granted -= ted->ted_grant;
which breaks tgd->tgd_tot_granted.
In case of obvious ted->ted_grant damage, recalculate
tgd->tgd_tot_granted using list of exports.

The same change is made for tgd->tgd_tot_dirty.

This patch also adds sanity check for exp->exp_target_data.ted_grant
increase in tgt_grant_alloc() to catch grant counting corruption as
soon as it happened. By default, the detected corruption is
CERROR()-ed, if needed that can be switched to LBUG() using lctl
set_param *.*.lbug_on_grant_miscount.
test-framework.sh:init_param_vars() enables LBUG().

Fixes: af2d3ac30e ("LU-11939 tgt: Do not assert during grant cleanup")
Change-Id: I36ba7496f7b72b4881e98c06ec254a8eefd4c13f
Signed-off-by: Vladimir Saveliev <vlaidimir.saveliev@hpe.com>
Cray-bug-id: LUS-9875
Reviewed-on: https://review.whamcloud.com/42129
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Alexander Boyko <alexander.boyko@hpe.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lu_target.h
lustre/mdt/mdt_lproc.c
lustre/ofd/lproc_ofd.c
lustre/target/tgt_grant.c
lustre/target/tgt_main.c
lustre/tests/test-framework.sh

index 880bbb1..0711a2f 100644 (file)
@@ -134,7 +134,9 @@ struct tg_grants_data {
        int                      tgd_tot_granted_clients;
        /* shall we grant space to clients not
         * supporting OBD_CONNECT_GRANT_PARAM? */
        int                      tgd_tot_granted_clients;
        /* shall we grant space to clients not
         * supporting OBD_CONNECT_GRANT_PARAM? */
-       int                      tgd_grant_compat_disable;
+       unsigned int             tgd_grant_compat_disable:1,
+       /* if 1 then LBUG on grant miscount, CERROR otherwise */
+                                tgd_lbug_on_grant_miscount:1;
        /* protect all statfs-related counters */
        spinlock_t               tgd_osfs_lock;
        time64_t                 tgd_osfs_age;
        /* protect all statfs-related counters */
        spinlock_t               tgd_osfs_lock;
        time64_t                 tgd_osfs_age;
@@ -550,6 +552,11 @@ ssize_t grant_compat_disable_show(struct kobject *kobj, struct attribute *attr,
 ssize_t grant_compat_disable_store(struct kobject *kobj,
                                   struct attribute *attr,
                                   const char *buffer, size_t count);
 ssize_t grant_compat_disable_store(struct kobject *kobj,
                                   struct attribute *attr,
                                   const char *buffer, size_t count);
+ssize_t lbug_on_grant_miscount_show(struct kobject *kobj,
+                                   struct attribute *attr, char *buf);
+ssize_t lbug_on_grant_miscount_store(struct kobject *kobj,
+                                    struct attribute *attr,
+                                    const char *buffer, size_t count);
 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 16, 53, 0)
 ssize_t sync_lock_cancel_show(struct kobject *kobj,
                              struct attribute *attr, char *buf);
 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 16, 53, 0)
 ssize_t sync_lock_cancel_show(struct kobject *kobj,
                              struct attribute *attr, char *buf);
index 1d09ad4..6798d8a 100644 (file)
@@ -1378,6 +1378,7 @@ LUSTRE_RO_ATTR(tot_dirty);
 LUSTRE_RO_ATTR(tot_granted);
 LUSTRE_RO_ATTR(tot_pending);
 LUSTRE_RW_ATTR(grant_compat_disable);
 LUSTRE_RO_ATTR(tot_granted);
 LUSTRE_RO_ATTR(tot_pending);
 LUSTRE_RW_ATTR(grant_compat_disable);
+LUSTRE_RW_ATTR(lbug_on_grant_miscount);
 LUSTRE_RO_ATTR(instance);
 
 LUSTRE_RO_ATTR(num_exports);
 LUSTRE_RO_ATTR(instance);
 
 LUSTRE_RO_ATTR(num_exports);
@@ -1387,6 +1388,7 @@ static struct attribute *mdt_attrs[] = {
        &lustre_attr_tot_granted.attr,
        &lustre_attr_tot_pending.attr,
        &lustre_attr_grant_compat_disable.attr,
        &lustre_attr_tot_granted.attr,
        &lustre_attr_tot_pending.attr,
        &lustre_attr_grant_compat_disable.attr,
+       &lustre_attr_lbug_on_grant_miscount.attr,
        &lustre_attr_instance.attr,
        &lustre_attr_recovery_time_hard.attr,
        &lustre_attr_recovery_time_soft.attr,
        &lustre_attr_instance.attr,
        &lustre_attr_recovery_time_hard.attr,
        &lustre_attr_recovery_time_soft.attr,
index e0bdfed..3d6eb4b 100644 (file)
@@ -1035,6 +1035,7 @@ LUSTRE_RO_ATTR(tot_dirty);
 LUSTRE_RO_ATTR(tot_granted);
 LUSTRE_RO_ATTR(tot_pending);
 LUSTRE_RW_ATTR(grant_compat_disable);
 LUSTRE_RO_ATTR(tot_granted);
 LUSTRE_RO_ATTR(tot_pending);
 LUSTRE_RW_ATTR(grant_compat_disable);
+LUSTRE_RW_ATTR(lbug_on_grant_miscount);
 LUSTRE_RO_ATTR(instance);
 
 LUSTRE_RO_ATTR(num_exports);
 LUSTRE_RO_ATTR(instance);
 
 LUSTRE_RO_ATTR(num_exports);
@@ -1109,6 +1110,7 @@ static struct attribute *ofd_attrs[] = {
        &lustre_attr_tot_granted.attr,
        &lustre_attr_tot_pending.attr,
        &lustre_attr_grant_compat_disable.attr,
        &lustre_attr_tot_granted.attr,
        &lustre_attr_tot_pending.attr,
        &lustre_attr_grant_compat_disable.attr,
+       &lustre_attr_lbug_on_grant_miscount.attr,
        &lustre_attr_instance.attr,
        &lustre_attr_recovery_time_hard.attr,
        &lustre_attr_recovery_time_soft.attr,
        &lustre_attr_instance.attr,
        &lustre_attr_recovery_time_hard.attr,
        &lustre_attr_recovery_time_soft.attr,
index 72416e0..2e1df78 100644 (file)
@@ -884,6 +884,7 @@ static void tgt_grant_check(const struct lu_env *env, struct obd_export *exp,
  *                             have
  * \param[in] left             remaining free space with granted space taken
  *                             out
  *                             have
  * \param[in] left             remaining free space with granted space taken
  *                             out
+ * \param[in] chunk            grant allocation unit
  * \param[in] conservative     if set to true, the server should be cautious
  *                             and limit how much space is granted back to the
  *                             client. Otherwise, the server should try hard to
  * \param[in] conservative     if set to true, the server should be cautious
  *                             and limit how much space is granted back to the
  *                             client. Otherwise, the server should try hard to
@@ -964,12 +965,13 @@ static long tgt_grant_alloc(struct obd_export *exp, u64 curgrant,
        tgd->tgd_tot_granted += grant;
        ted->ted_grant += grant;
 
        tgd->tgd_tot_granted += grant;
        ted->ted_grant += grant;
 
-       if (ted->ted_grant < 0) {
+       if (unlikely(ted->ted_grant < 0 || ted->ted_grant > want + chunk)) {
                CERROR("%s: cli %s/%p grant %ld want %llu current %llu\n",
                       obd->obd_name, exp->exp_client_uuid.uuid, exp,
                       ted->ted_grant, want, curgrant);
                spin_unlock(&tgd->tgd_grant_lock);
                CERROR("%s: cli %s/%p grant %ld want %llu current %llu\n",
                       obd->obd_name, exp->exp_client_uuid.uuid, exp,
                       ted->ted_grant, want, curgrant);
                spin_unlock(&tgd->tgd_grant_lock);
-               LBUG();
+               if (tgd->tgd_lbug_on_grant_miscount)
+                       LBUG();
        }
 
        CDEBUG(D_CACHE,
        }
 
        CDEBUG(D_CACHE,
@@ -1092,13 +1094,35 @@ void tgt_grant_discard(struct obd_export *exp)
 
        tgd = &lut->lut_tgd;
        spin_lock(&tgd->tgd_grant_lock);
 
        tgd = &lut->lut_tgd;
        spin_lock(&tgd->tgd_grant_lock);
-       if (tgd->tgd_tot_granted < ted->ted_grant) {
-               CERROR("%s: tot_granted %llu < cli %s/%p ted_grant %ld\n",
-                      obd->obd_name, tgd->tgd_tot_granted,
-                      exp->exp_client_uuid.uuid, exp, ted->ted_grant);
+       if (unlikely(tgd->tgd_tot_granted < ted->ted_grant ||
+                    tgd->tgd_tot_dirty < ted->ted_dirty)) {
+               struct obd_export *e;
+               u64 ttg = 0;
+               u64 ttd = 0;
+
+               list_for_each_entry(e, &obd->obd_exports, exp_obd_chain) {
+                       LASSERT(exp != e);
+                       ttg += e->exp_target_data.ted_grant;
+                       ttg += e->exp_target_data.ted_pending;
+                       ttd += e->exp_target_data.ted_dirty;
+               }
+               if (tgd->tgd_tot_granted < ted->ted_grant)
+                       CERROR("%s: cli %s/%p: tot_granted %llu < ted_grant %ld, corrected to %llu",
+                              obd->obd_name,  exp->exp_client_uuid.uuid, exp,
+                              tgd->tgd_tot_granted, ted->ted_grant, ttg);
+               if (tgd->tgd_tot_dirty < ted->ted_dirty)
+                       CERROR("%s: cli %s/%p: tot_dirty %llu < ted_dirty %ld, corrected to %llu",
+                              obd->obd_name, exp->exp_client_uuid.uuid, exp,
+                              tgd->tgd_tot_dirty, ted->ted_dirty, ttd);
+               tgd->tgd_tot_granted = ttg;
+               tgd->tgd_tot_dirty = ttd;
+       } else {
+               tgd->tgd_tot_granted -= ted->ted_grant;
+               tgd->tgd_tot_dirty -= ted->ted_dirty;
        }
        }
-       tgd->tgd_tot_granted -= ted->ted_grant;
        ted->ted_grant = 0;
        ted->ted_grant = 0;
+       ted->ted_dirty = 0;
+
        if (tgd->tgd_tot_pending < ted->ted_pending) {
                CERROR("%s: tot_pending %llu < cli %s/%p ted_pending %ld\n",
                       obd->obd_name, tgd->tgd_tot_pending,
        if (tgd->tgd_tot_pending < ted->ted_pending) {
                CERROR("%s: tot_pending %llu < cli %s/%p ted_pending %ld\n",
                       obd->obd_name, tgd->tgd_tot_pending,
@@ -1106,13 +1130,6 @@ void tgt_grant_discard(struct obd_export *exp)
        }
        /* tgd_tot_pending is handled in tgt_grant_commit as bulk
         * commmits */
        }
        /* tgd_tot_pending is handled in tgt_grant_commit as bulk
         * commmits */
-       if (tgd->tgd_tot_dirty < ted->ted_dirty) {
-               CERROR("%s: tot_dirty %llu < cli %s/%p ted_dirty %ld\n",
-                      obd->obd_name, tgd->tgd_tot_dirty,
-                      exp->exp_client_uuid.uuid, exp, ted->ted_dirty);
-       }
-       tgd->tgd_tot_dirty -= ted->ted_dirty;
-       ted->ted_dirty = 0;
        spin_unlock(&tgd->tgd_grant_lock);
 }
 EXPORT_SYMBOL(tgt_grant_discard);
        spin_unlock(&tgd->tgd_grant_lock);
 }
 EXPORT_SYMBOL(tgt_grant_discard);
@@ -1675,3 +1692,60 @@ ssize_t grant_compat_disable_store(struct kobject *kobj,
        return count;
 }
 EXPORT_SYMBOL(grant_compat_disable_store);
        return count;
 }
 EXPORT_SYMBOL(grant_compat_disable_store);
+
+/**
+ * Show lbug_on_grant_miscount mode.
+ *
+ * @kobj               kobject embedded in obd_device
+ * @attr               unused
+ * @buf                        buf used by sysfs to print out data
+ *
+ * Return:             string length of @buf output on success
+ */
+ssize_t lbug_on_grant_miscount_show(struct kobject *kobj,
+                                   struct attribute *attr, char *buf)
+{
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
+       struct tg_grants_data *tgd = &obd->u.obt.obt_lut->lut_tgd;
+
+       return scnprintf(buf, PAGE_SIZE, "%u\n",
+                        tgd->tgd_lbug_on_grant_miscount);
+}
+EXPORT_SYMBOL(lbug_on_grant_miscount_show);
+
+/**
+ * Change lbug on grant miscount mode.
+ *
+ * Setting tgd_lbug_on_grant_miscount to 1 makes tgt_alloc_grant() to
+ * LBUG on apparently wrong ted->ted_grant
+ *
+ * @kobj       kobject embedded in obd_device
+ * @attr       unused
+ * @buffer     string which represents mode
+ *             1: use LBUG on grant miscount
+ *             0: use CERROR on grant miscount
+ * @count      @buffer length
+ *
+ * Return:     @count on success
+ *             negative number on error
+ */
+ssize_t lbug_on_grant_miscount_store(struct kobject *kobj,
+                                    struct attribute *attr,
+                                    const char *buffer, size_t count)
+{
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
+       struct tg_grants_data *tgd = &obd->u.obt.obt_lut->lut_tgd;
+       bool val;
+       int rc;
+
+       rc = kstrtobool(buffer, &val);
+       if (rc)
+               return rc;
+
+       tgd->tgd_lbug_on_grant_miscount = val;
+
+       return count;
+}
+EXPORT_SYMBOL(lbug_on_grant_miscount_store);
index 76ccece..88e6f94 100644 (file)
@@ -533,6 +533,7 @@ int tgt_init(const struct lu_env *env, struct lu_target *lut,
        tgd->tgd_tot_granted = 0;
        tgd->tgd_tot_pending = 0;
        tgd->tgd_grant_compat_disable = 0;
        tgd->tgd_tot_granted = 0;
        tgd->tgd_tot_pending = 0;
        tgd->tgd_grant_compat_disable = 0;
+       tgd->tgd_lbug_on_grant_miscount = 0;
 
        /* populate cached statfs data */
        osfs = &tgt_th_info(env)->tti_u.osfs;
 
        /* populate cached statfs data */
        osfs = &tgt_th_info(env)->tti_u.osfs;
index c507425..2bc5b70 100755 (executable)
@@ -5343,6 +5343,7 @@ init_param_vars () {
        (( MDS1_VERSION <= $(version_code 2.13.52) )) ||
                do_nodes $(comma_list $(mdts_nodes)) \
                        "$LCTL set_param lod.*.mdt_hash=crush"
        (( MDS1_VERSION <= $(version_code 2.13.52) )) ||
                do_nodes $(comma_list $(mdts_nodes)) \
                        "$LCTL set_param lod.*.mdt_hash=crush"
+       do_node $(mgs_node) "$LCTL set_param -P *.*.lbug_on_grant_miscount=1"
        return 0
 }
 
        return 0
 }
 
@@ -6758,7 +6759,7 @@ check_grant() {
 
        # sync all the data and make sure no pending data on server
        do_nodes $clients sync
 
        # sync all the data and make sure no pending data on server
        do_nodes $clients sync
-       clients_up # initiate all idling connections
+       do_nodes $clients $LFS df # initiate all idling connections
 
        # get client grant
        cli_grant=$(grant_from_clients $clients)
 
        # get client grant
        cli_grant=$(grant_from_clients $clients)