From: Vladimir Saveliev Date: Wed, 10 Nov 2021 08:40:50 +0000 (+0300) Subject: LU-15095 target: lbug_on_grant_miscount module parameter X-Git-Tag: 2.14.57~95 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=2c787065441ee60c6c163dc77851d0964f81a89c LU-15095 target: lbug_on_grant_miscount module parameter Some tests have hit "lctl: error invoking upcall" when setting the lbug_on_grant_miscount tunable parameter. Instead, define a module parameter lbug_on_grant_miscount flag as ptlrpc module parameter, similar to how it is done for ldiskfs_track_declares_assert. Change-Id: I9cd0f9fa75b37539b23443bbcbb3445c87318ab1 Fixes: bb5d81ea95 ("LU-14543 target: prevent overflowing of tgd->tgd_tot_granted") Test-Parameters: trivial Signed-off-by: Vladimir Saveliev Reviewed-on: https://review.whamcloud.com/45521 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andrew Perepechko Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/lu_target.h b/lustre/include/lu_target.h index c86ba79..2a361ac 100644 --- a/lustre/include/lu_target.h +++ b/lustre/include/lu_target.h @@ -134,9 +134,7 @@ struct tg_grants_data { int tgd_tot_granted_clients; /* shall we grant space to clients not * supporting OBD_CONNECT_GRANT_PARAM? */ - unsigned int tgd_grant_compat_disable:1, - /* if 1 then LBUG on grant miscount, CERROR otherwise */ - tgd_lbug_on_grant_miscount:1; + int tgd_grant_compat_disable; /* protect all statfs-related counters */ spinlock_t tgd_osfs_lock; time64_t tgd_osfs_age; @@ -567,11 +565,6 @@ ssize_t grant_compat_disable_show(struct kobject *kobj, struct attribute *attr, ssize_t grant_compat_disable_store(struct kobject *kobj, struct attribute *attr, const char *buffer, size_t count); -ssize_t lbug_on_grant_miscount_show(struct kobject *kobj, - struct attribute *attr, char *buf); -ssize_t lbug_on_grant_miscount_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, size_t count); #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 16, 53, 0) ssize_t sync_lock_cancel_show(struct kobject *kobj, struct attribute *attr, char *buf); diff --git a/lustre/mdt/mdt_lproc.c b/lustre/mdt/mdt_lproc.c index 5bf933a..c6445fb 100644 --- a/lustre/mdt/mdt_lproc.c +++ b/lustre/mdt/mdt_lproc.c @@ -1378,7 +1378,6 @@ LUSTRE_RO_ATTR(tot_dirty); LUSTRE_RO_ATTR(tot_granted); LUSTRE_RO_ATTR(tot_pending); LUSTRE_RW_ATTR(grant_compat_disable); -LUSTRE_RW_ATTR(lbug_on_grant_miscount); LUSTRE_RO_ATTR(instance); LUSTRE_RO_ATTR(num_exports); @@ -1388,7 +1387,6 @@ static struct attribute *mdt_attrs[] = { &lustre_attr_tot_granted.attr, &lustre_attr_tot_pending.attr, &lustre_attr_grant_compat_disable.attr, - &lustre_attr_lbug_on_grant_miscount.attr, &lustre_attr_instance.attr, &lustre_attr_recovery_time_hard.attr, &lustre_attr_recovery_time_soft.attr, diff --git a/lustre/ofd/lproc_ofd.c b/lustre/ofd/lproc_ofd.c index f9015cd..acff6b9 100644 --- a/lustre/ofd/lproc_ofd.c +++ b/lustre/ofd/lproc_ofd.c @@ -1021,7 +1021,6 @@ LUSTRE_RO_ATTR(tot_dirty); LUSTRE_RO_ATTR(tot_granted); LUSTRE_RO_ATTR(tot_pending); LUSTRE_RW_ATTR(grant_compat_disable); -LUSTRE_RW_ATTR(lbug_on_grant_miscount); LUSTRE_RO_ATTR(instance); LUSTRE_RO_ATTR(num_exports); @@ -1096,7 +1095,6 @@ static struct attribute *ofd_attrs[] = { &lustre_attr_tot_granted.attr, &lustre_attr_tot_pending.attr, &lustre_attr_grant_compat_disable.attr, - &lustre_attr_lbug_on_grant_miscount.attr, &lustre_attr_instance.attr, &lustre_attr_recovery_time_hard.attr, &lustre_attr_recovery_time_soft.attr, diff --git a/lustre/target/tgt_grant.c b/lustre/target/tgt_grant.c index 2e1df78..37d1ba8 100644 --- a/lustre/target/tgt_grant.c +++ b/lustre/target/tgt_grant.c @@ -78,6 +78,10 @@ #include "tgt_internal.h" +int lbug_on_grant_miscount; +module_param(lbug_on_grant_miscount, int, 0644); +MODULE_PARM_DESC(lbug_on_grant_miscount, "LBUG on grant miscount"); + /* Clients typically hold 2x their max_rpcs_in_flight of grant space */ #define TGT_GRANT_SHRINK_LIMIT(exp) (2ULL * 8 * exp_max_brw_size(exp)) @@ -969,9 +973,10 @@ static long tgt_grant_alloc(struct obd_export *exp, u64 curgrant, CERROR("%s: cli %s/%p grant %ld want %llu current %llu\n", obd->obd_name, exp->exp_client_uuid.uuid, exp, ted->ted_grant, want, curgrant); - spin_unlock(&tgd->tgd_grant_lock); - if (tgd->tgd_lbug_on_grant_miscount) + if (lbug_on_grant_miscount) { + spin_unlock(&tgd->tgd_grant_lock); LBUG(); + } } CDEBUG(D_CACHE, @@ -1692,60 +1697,3 @@ ssize_t grant_compat_disable_store(struct kobject *kobj, return count; } EXPORT_SYMBOL(grant_compat_disable_store); - -/** - * Show lbug_on_grant_miscount mode. - * - * @kobj kobject embedded in obd_device - * @attr unused - * @buf buf used by sysfs to print out data - * - * Return: string length of @buf output on success - */ -ssize_t lbug_on_grant_miscount_show(struct kobject *kobj, - struct attribute *attr, char *buf) -{ - struct obd_device *obd = container_of(kobj, struct obd_device, - obd_kset.kobj); - struct tg_grants_data *tgd = &obd->u.obt.obt_lut->lut_tgd; - - return scnprintf(buf, PAGE_SIZE, "%u\n", - tgd->tgd_lbug_on_grant_miscount); -} -EXPORT_SYMBOL(lbug_on_grant_miscount_show); - -/** - * Change lbug on grant miscount mode. - * - * Setting tgd_lbug_on_grant_miscount to 1 makes tgt_alloc_grant() to - * LBUG on apparently wrong ted->ted_grant - * - * @kobj kobject embedded in obd_device - * @attr unused - * @buffer string which represents mode - * 1: use LBUG on grant miscount - * 0: use CERROR on grant miscount - * @count @buffer length - * - * Return: @count on success - * negative number on error - */ -ssize_t lbug_on_grant_miscount_store(struct kobject *kobj, - struct attribute *attr, - const char *buffer, size_t count) -{ - struct obd_device *obd = container_of(kobj, struct obd_device, - obd_kset.kobj); - struct tg_grants_data *tgd = &obd->u.obt.obt_lut->lut_tgd; - bool val; - int rc; - - rc = kstrtobool(buffer, &val); - if (rc) - return rc; - - tgd->tgd_lbug_on_grant_miscount = val; - - return count; -} -EXPORT_SYMBOL(lbug_on_grant_miscount_store); diff --git a/lustre/target/tgt_main.c b/lustre/target/tgt_main.c index 88e6f94..76ccece 100644 --- a/lustre/target/tgt_main.c +++ b/lustre/target/tgt_main.c @@ -533,7 +533,6 @@ int tgt_init(const struct lu_env *env, struct lu_target *lut, tgd->tgd_tot_granted = 0; tgd->tgd_tot_pending = 0; tgd->tgd_grant_compat_disable = 0; - tgd->tgd_lbug_on_grant_miscount = 0; /* populate cached statfs data */ osfs = &tgt_th_info(env)->tti_u.osfs; diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index b5b87ec..e1af875 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -753,6 +753,7 @@ load_modules_local() { fi load_module ../lnet/klnds/$LNETLND load_module obdclass/obdclass + MODOPTS_PTLRPC=${MODOPTS_PTLRPC:-"lbug_on_grant_miscount=1"} load_module ptlrpc/ptlrpc load_module ptlrpc/gss/ptlrpc_gss load_module fld/fld @@ -5359,7 +5360,6 @@ init_param_vars () { (( MDS1_VERSION <= $(version_code 2.13.52) )) || do_nodes $(comma_list $(mdts_nodes)) \ "$LCTL set_param lod.*.mdt_hash=crush" - do_node $(mgs_node) "$LCTL set_param -P *.*.lbug_on_grant_miscount=1" return 0 }