Whamcloud - gitweb
LU-15095 target: lbug_on_grant_miscount module parameter 21/45521/5
authorVladimir Saveliev <c17830@cray.com>
Wed, 10 Nov 2021 08:40:50 +0000 (11:40 +0300)
committerOleg Drokin <green@whamcloud.com>
Thu, 23 Dec 2021 07:19:22 +0000 (07:19 +0000)
Some tests have hit "lctl: error invoking upcall" when setting the
lbug_on_grant_miscount tunable parameter.  Instead, define a module
parameter lbug_on_grant_miscount flag as ptlrpc module parameter,
similar to how it is done for ldiskfs_track_declares_assert.

Change-Id: I9cd0f9fa75b37539b23443bbcbb3445c87318ab1
Fixes: bb5d81ea95 ("LU-14543 target: prevent overflowing of tgd->tgd_tot_granted")
Test-Parameters: trivial
Signed-off-by: Vladimir Saveliev <vlaidimir.saveliev@hpe.com>
Reviewed-on: https://review.whamcloud.com/45521
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andrew Perepechko <andrew.perepechko@hpe.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lu_target.h
lustre/mdt/mdt_lproc.c
lustre/ofd/lproc_ofd.c
lustre/target/tgt_grant.c
lustre/target/tgt_main.c
lustre/tests/test-framework.sh

index c86ba79..2a361ac 100644 (file)
@@ -134,9 +134,7 @@ struct tg_grants_data {
        int                      tgd_tot_granted_clients;
        /* shall we grant space to clients not
         * supporting OBD_CONNECT_GRANT_PARAM? */
        int                      tgd_tot_granted_clients;
        /* shall we grant space to clients not
         * supporting OBD_CONNECT_GRANT_PARAM? */
-       unsigned int             tgd_grant_compat_disable:1,
-       /* if 1 then LBUG on grant miscount, CERROR otherwise */
-                                tgd_lbug_on_grant_miscount:1;
+       int                      tgd_grant_compat_disable;
        /* protect all statfs-related counters */
        spinlock_t               tgd_osfs_lock;
        time64_t                 tgd_osfs_age;
        /* protect all statfs-related counters */
        spinlock_t               tgd_osfs_lock;
        time64_t                 tgd_osfs_age;
@@ -567,11 +565,6 @@ ssize_t grant_compat_disable_show(struct kobject *kobj, struct attribute *attr,
 ssize_t grant_compat_disable_store(struct kobject *kobj,
                                   struct attribute *attr,
                                   const char *buffer, size_t count);
 ssize_t grant_compat_disable_store(struct kobject *kobj,
                                   struct attribute *attr,
                                   const char *buffer, size_t count);
-ssize_t lbug_on_grant_miscount_show(struct kobject *kobj,
-                                   struct attribute *attr, char *buf);
-ssize_t lbug_on_grant_miscount_store(struct kobject *kobj,
-                                    struct attribute *attr,
-                                    const char *buffer, size_t count);
 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 16, 53, 0)
 ssize_t sync_lock_cancel_show(struct kobject *kobj,
                              struct attribute *attr, char *buf);
 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 16, 53, 0)
 ssize_t sync_lock_cancel_show(struct kobject *kobj,
                              struct attribute *attr, char *buf);
index 5bf933a..c6445fb 100644 (file)
@@ -1378,7 +1378,6 @@ LUSTRE_RO_ATTR(tot_dirty);
 LUSTRE_RO_ATTR(tot_granted);
 LUSTRE_RO_ATTR(tot_pending);
 LUSTRE_RW_ATTR(grant_compat_disable);
 LUSTRE_RO_ATTR(tot_granted);
 LUSTRE_RO_ATTR(tot_pending);
 LUSTRE_RW_ATTR(grant_compat_disable);
-LUSTRE_RW_ATTR(lbug_on_grant_miscount);
 LUSTRE_RO_ATTR(instance);
 
 LUSTRE_RO_ATTR(num_exports);
 LUSTRE_RO_ATTR(instance);
 
 LUSTRE_RO_ATTR(num_exports);
@@ -1388,7 +1387,6 @@ static struct attribute *mdt_attrs[] = {
        &lustre_attr_tot_granted.attr,
        &lustre_attr_tot_pending.attr,
        &lustre_attr_grant_compat_disable.attr,
        &lustre_attr_tot_granted.attr,
        &lustre_attr_tot_pending.attr,
        &lustre_attr_grant_compat_disable.attr,
-       &lustre_attr_lbug_on_grant_miscount.attr,
        &lustre_attr_instance.attr,
        &lustre_attr_recovery_time_hard.attr,
        &lustre_attr_recovery_time_soft.attr,
        &lustre_attr_instance.attr,
        &lustre_attr_recovery_time_hard.attr,
        &lustre_attr_recovery_time_soft.attr,
index f9015cd..acff6b9 100644 (file)
@@ -1021,7 +1021,6 @@ LUSTRE_RO_ATTR(tot_dirty);
 LUSTRE_RO_ATTR(tot_granted);
 LUSTRE_RO_ATTR(tot_pending);
 LUSTRE_RW_ATTR(grant_compat_disable);
 LUSTRE_RO_ATTR(tot_granted);
 LUSTRE_RO_ATTR(tot_pending);
 LUSTRE_RW_ATTR(grant_compat_disable);
-LUSTRE_RW_ATTR(lbug_on_grant_miscount);
 LUSTRE_RO_ATTR(instance);
 
 LUSTRE_RO_ATTR(num_exports);
 LUSTRE_RO_ATTR(instance);
 
 LUSTRE_RO_ATTR(num_exports);
@@ -1096,7 +1095,6 @@ static struct attribute *ofd_attrs[] = {
        &lustre_attr_tot_granted.attr,
        &lustre_attr_tot_pending.attr,
        &lustre_attr_grant_compat_disable.attr,
        &lustre_attr_tot_granted.attr,
        &lustre_attr_tot_pending.attr,
        &lustre_attr_grant_compat_disable.attr,
-       &lustre_attr_lbug_on_grant_miscount.attr,
        &lustre_attr_instance.attr,
        &lustre_attr_recovery_time_hard.attr,
        &lustre_attr_recovery_time_soft.attr,
        &lustre_attr_instance.attr,
        &lustre_attr_recovery_time_hard.attr,
        &lustre_attr_recovery_time_soft.attr,
index 2e1df78..37d1ba8 100644 (file)
 
 #include "tgt_internal.h"
 
 
 #include "tgt_internal.h"
 
+int lbug_on_grant_miscount;
+module_param(lbug_on_grant_miscount, int, 0644);
+MODULE_PARM_DESC(lbug_on_grant_miscount, "LBUG on grant miscount");
+
 /* Clients typically hold 2x their max_rpcs_in_flight of grant space */
 #define TGT_GRANT_SHRINK_LIMIT(exp)    (2ULL * 8 * exp_max_brw_size(exp))
 
 /* Clients typically hold 2x their max_rpcs_in_flight of grant space */
 #define TGT_GRANT_SHRINK_LIMIT(exp)    (2ULL * 8 * exp_max_brw_size(exp))
 
@@ -969,9 +973,10 @@ static long tgt_grant_alloc(struct obd_export *exp, u64 curgrant,
                CERROR("%s: cli %s/%p grant %ld want %llu current %llu\n",
                       obd->obd_name, exp->exp_client_uuid.uuid, exp,
                       ted->ted_grant, want, curgrant);
                CERROR("%s: cli %s/%p grant %ld want %llu current %llu\n",
                       obd->obd_name, exp->exp_client_uuid.uuid, exp,
                       ted->ted_grant, want, curgrant);
-               spin_unlock(&tgd->tgd_grant_lock);
-               if (tgd->tgd_lbug_on_grant_miscount)
+               if (lbug_on_grant_miscount) {
+                       spin_unlock(&tgd->tgd_grant_lock);
                        LBUG();
                        LBUG();
+               }
        }
 
        CDEBUG(D_CACHE,
        }
 
        CDEBUG(D_CACHE,
@@ -1692,60 +1697,3 @@ ssize_t grant_compat_disable_store(struct kobject *kobj,
        return count;
 }
 EXPORT_SYMBOL(grant_compat_disable_store);
        return count;
 }
 EXPORT_SYMBOL(grant_compat_disable_store);
-
-/**
- * Show lbug_on_grant_miscount mode.
- *
- * @kobj               kobject embedded in obd_device
- * @attr               unused
- * @buf                        buf used by sysfs to print out data
- *
- * Return:             string length of @buf output on success
- */
-ssize_t lbug_on_grant_miscount_show(struct kobject *kobj,
-                                   struct attribute *attr, char *buf)
-{
-       struct obd_device *obd = container_of(kobj, struct obd_device,
-                                             obd_kset.kobj);
-       struct tg_grants_data *tgd = &obd->u.obt.obt_lut->lut_tgd;
-
-       return scnprintf(buf, PAGE_SIZE, "%u\n",
-                        tgd->tgd_lbug_on_grant_miscount);
-}
-EXPORT_SYMBOL(lbug_on_grant_miscount_show);
-
-/**
- * Change lbug on grant miscount mode.
- *
- * Setting tgd_lbug_on_grant_miscount to 1 makes tgt_alloc_grant() to
- * LBUG on apparently wrong ted->ted_grant
- *
- * @kobj       kobject embedded in obd_device
- * @attr       unused
- * @buffer     string which represents mode
- *             1: use LBUG on grant miscount
- *             0: use CERROR on grant miscount
- * @count      @buffer length
- *
- * Return:     @count on success
- *             negative number on error
- */
-ssize_t lbug_on_grant_miscount_store(struct kobject *kobj,
-                                    struct attribute *attr,
-                                    const char *buffer, size_t count)
-{
-       struct obd_device *obd = container_of(kobj, struct obd_device,
-                                             obd_kset.kobj);
-       struct tg_grants_data *tgd = &obd->u.obt.obt_lut->lut_tgd;
-       bool val;
-       int rc;
-
-       rc = kstrtobool(buffer, &val);
-       if (rc)
-               return rc;
-
-       tgd->tgd_lbug_on_grant_miscount = val;
-
-       return count;
-}
-EXPORT_SYMBOL(lbug_on_grant_miscount_store);
index 88e6f94..76ccece 100644 (file)
@@ -533,7 +533,6 @@ int tgt_init(const struct lu_env *env, struct lu_target *lut,
        tgd->tgd_tot_granted = 0;
        tgd->tgd_tot_pending = 0;
        tgd->tgd_grant_compat_disable = 0;
        tgd->tgd_tot_granted = 0;
        tgd->tgd_tot_pending = 0;
        tgd->tgd_grant_compat_disable = 0;
-       tgd->tgd_lbug_on_grant_miscount = 0;
 
        /* populate cached statfs data */
        osfs = &tgt_th_info(env)->tti_u.osfs;
 
        /* populate cached statfs data */
        osfs = &tgt_th_info(env)->tti_u.osfs;
index b5b87ec..e1af875 100755 (executable)
@@ -753,6 +753,7 @@ load_modules_local() {
        fi
        load_module ../lnet/klnds/$LNETLND
        load_module obdclass/obdclass
        fi
        load_module ../lnet/klnds/$LNETLND
        load_module obdclass/obdclass
+       MODOPTS_PTLRPC=${MODOPTS_PTLRPC:-"lbug_on_grant_miscount=1"}
        load_module ptlrpc/ptlrpc
        load_module ptlrpc/gss/ptlrpc_gss
        load_module fld/fld
        load_module ptlrpc/ptlrpc
        load_module ptlrpc/gss/ptlrpc_gss
        load_module fld/fld
@@ -5359,7 +5360,6 @@ init_param_vars () {
        (( MDS1_VERSION <= $(version_code 2.13.52) )) ||
                do_nodes $(comma_list $(mdts_nodes)) \
                        "$LCTL set_param lod.*.mdt_hash=crush"
        (( MDS1_VERSION <= $(version_code 2.13.52) )) ||
                do_nodes $(comma_list $(mdts_nodes)) \
                        "$LCTL set_param lod.*.mdt_hash=crush"
-       do_node $(mgs_node) "$LCTL set_param -P *.*.lbug_on_grant_miscount=1"
        return 0
 }
 
        return 0
 }