From 6e116213e3fd7d72686a6004a3e457c3120e6b52 Mon Sep 17 00:00:00 2001 From: Alex Zhuravlev Date: Thu, 16 Sep 2021 11:20:18 +0300 Subject: [PATCH] LU-15010 mdc: add support for grant shrink just re-use existing mechanism used in OSC Signed-off-by: Alex Zhuravlev Change-Id: I4cdca057d35eaff6493d047127f1fe5eee9e9620 Reviewed-on: https://review.whamcloud.com/44956 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Mike Pershin Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/include/lustre_osc.h | 2 + lustre/include/uapi/linux/lustre/lustre_idl.h | 1 + lustre/llite/llite_lib.c | 1 + lustre/mdc/lproc_mdc.c | 106 ++++++++++++++++++++++++++ lustre/mdt/mdt_handler.c | 58 ++++++++++++++ lustre/mdt/mdt_internal.h | 1 + lustre/osc/osc_request.c | 2 + lustre/tests/sanity.sh | 44 +++++++++++ 8 files changed, 215 insertions(+) diff --git a/lustre/include/lustre_osc.h b/lustre/include/lustre_osc.h index e7c290d..3c2514a 100644 --- a/lustre/include/lustre_osc.h +++ b/lustre/include/lustre_osc.h @@ -671,6 +671,8 @@ int osc_punch_send(struct obd_export *exp, struct obdo *oa, obd_enqueue_update_f upcall, void *cookie); int osc_fallocate_base(struct obd_export *exp, struct obdo *oa, obd_enqueue_update_f upcall, void *cookie, int mode); +void osc_update_next_shrink(struct client_obd *cli); +void osc_schedule_grant_work(void); /* osc_io.c */ int osc_io_submit(const struct lu_env *env, const struct cl_io_slice *ios, diff --git a/lustre/include/uapi/linux/lustre/lustre_idl.h b/lustre/include/uapi/linux/lustre/lustre_idl.h index 0e28b08..71a20f4 100644 --- a/lustre/include/uapi/linux/lustre/lustre_idl.h +++ b/lustre/include/uapi/linux/lustre/lustre_idl.h @@ -892,6 +892,7 @@ struct ptlrpc_body_v2 { OBD_CONNECT_MULTIMODRPCS |\ OBD_CONNECT_SUBTREE | OBD_CONNECT_LARGE_ACL |\ OBD_CONNECT_GRANT_PARAM | \ + OBD_CONNECT_GRANT_SHRINK | \ OBD_CONNECT_SHORTIO | OBD_CONNECT_FLAGS2) #define MDT_CONNECT_SUPPORTED2 (OBD_CONNECT2_FILE_SECCTX | \ diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 0fb7d9a..e493791 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -324,6 +324,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt) OBD_CONNECT_SUBTREE | OBD_CONNECT_MULTIMODRPCS | OBD_CONNECT_GRANT_PARAM | + OBD_CONNECT_GRANT_SHRINK | OBD_CONNECT_SHORTIO | OBD_CONNECT_FLAGS2; data->ocd_connect_flags2 = OBD_CONNECT2_DIR_MIGRATE | diff --git a/lustre/mdc/lproc_mdc.c b/lustre/mdc/lproc_mdc.c index 3b1bbbc..796a68d 100644 --- a/lustre/mdc/lproc_mdc.c +++ b/lustre/mdc/lproc_mdc.c @@ -613,6 +613,108 @@ struct lprocfs_vars lprocfs_mdc_obd_vars[] = { { NULL } }; +static ssize_t cur_lost_grant_bytes_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct client_obd *cli = &obd->u.cli; + + return scnprintf(buf, PAGE_SIZE, "%lu\n", cli->cl_lost_grant); +} +LUSTRE_RO_ATTR(cur_lost_grant_bytes); + +static ssize_t cur_dirty_grant_bytes_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct client_obd *cli = &obd->u.cli; + + return scnprintf(buf, PAGE_SIZE, "%lu\n", cli->cl_dirty_grant); +} +LUSTRE_RO_ATTR(cur_dirty_grant_bytes); + +static ssize_t grant_shrink_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct obd_import *imp; + ssize_t len; + + with_imp_locked(obd, imp, len) + len = scnprintf(buf, PAGE_SIZE, "%d\n", + !imp->imp_grant_shrink_disabled && + OCD_HAS_FLAG(&imp->imp_connect_data, + GRANT_SHRINK)); + + return len; +} + +static ssize_t grant_shrink_store(struct kobject *kobj, struct attribute *attr, + const char *buffer, size_t count) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + struct obd_import *imp; + bool val; + int rc; + + if (obd == NULL) + return 0; + + rc = kstrtobool(buffer, &val); + if (rc) + return rc; + + with_imp_locked(obd, imp, rc) { + spin_lock(&imp->imp_lock); + imp->imp_grant_shrink_disabled = !val; + spin_unlock(&imp->imp_lock); + } + + return rc ?: count; +} +LUSTRE_RW_ATTR(grant_shrink); + +static ssize_t grant_shrink_interval_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + + return sprintf(buf, "%lld\n", obd->u.cli.cl_grant_shrink_interval); +} + +static ssize_t grant_shrink_interval_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, + size_t count) +{ + struct obd_device *obd = container_of(kobj, struct obd_device, + obd_kset.kobj); + unsigned int val; + int rc; + + rc = kstrtouint(buffer, 0, &val); + if (rc) + return rc; + + if (val == 0) + return -ERANGE; + + obd->u.cli.cl_grant_shrink_interval = val; + osc_update_next_shrink(&obd->u.cli); + osc_schedule_grant_work(); + + return count; +} +LUSTRE_RW_ATTR(grant_shrink_interval); + static struct attribute *mdc_attrs[] = { &lustre_attr_active.attr, &lustre_attr_checksums.attr, @@ -622,6 +724,10 @@ static struct attribute *mdc_attrs[] = { &lustre_attr_mds_conn_uuid.attr, &lustre_attr_conn_uuid.attr, &lustre_attr_ping.attr, + &lustre_attr_grant_shrink.attr, + &lustre_attr_grant_shrink_interval.attr, + &lustre_attr_cur_lost_grant_bytes.attr, + &lustre_attr_cur_dirty_grant_bytes.attr, NULL, }; diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index 82c7bee..a11a9bf 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -2621,6 +2621,61 @@ out: static int mdt_iocontrol(unsigned int cmd, struct obd_export *exp, int len, void *karg, void __user *uarg); +int mdt_io_set_info(struct tgt_session_info *tsi) +{ + struct ptlrpc_request *req = tgt_ses_req(tsi); + struct ost_body *body = NULL, *repbody; + void *key, *val = NULL; + int keylen, vallen, rc = 0; + bool is_grant_shrink; + + ENTRY; + + key = req_capsule_client_get(tsi->tsi_pill, &RMF_SETINFO_KEY); + if (key == NULL) { + DEBUG_REQ(D_HA, req, "no set_info key"); + RETURN(err_serious(-EFAULT)); + } + keylen = req_capsule_get_size(tsi->tsi_pill, &RMF_SETINFO_KEY, + RCL_CLIENT); + + val = req_capsule_client_get(tsi->tsi_pill, &RMF_SETINFO_VAL); + if (val == NULL) { + DEBUG_REQ(D_HA, req, "no set_info val"); + RETURN(err_serious(-EFAULT)); + } + vallen = req_capsule_get_size(tsi->tsi_pill, &RMF_SETINFO_VAL, + RCL_CLIENT); + + is_grant_shrink = KEY_IS(KEY_GRANT_SHRINK); + if (is_grant_shrink) + /* In this case the value is actually an RMF_OST_BODY, so we + * transmutate the type of this PTLRPC */ + req_capsule_extend(tsi->tsi_pill, &RQF_OST_SET_GRANT_INFO); + + rc = req_capsule_server_pack(tsi->tsi_pill); + if (rc < 0) + RETURN(rc); + + if (is_grant_shrink) { + body = req_capsule_client_get(tsi->tsi_pill, &RMF_OST_BODY); + + repbody = req_capsule_server_get(tsi->tsi_pill, &RMF_OST_BODY); + *repbody = *body; + + /** handle grant shrink, similar to a read request */ + tgt_grant_prepare_read(tsi->tsi_env, tsi->tsi_exp, + &repbody->oa); + } else { + CERROR("%s: Unsupported key %s\n", + tgt_name(tsi->tsi_tgt), (char *)key); + rc = -EOPNOTSUPP; + } + + RETURN(rc); +} + + static int mdt_set_info(struct tgt_session_info *tsi) { struct ptlrpc_request *req = tgt_ses_req(tsi); @@ -5665,6 +5720,9 @@ TGT_OST_HDL(HAS_BODY | HAS_REPLY, OST_SYNC, mdt_data_sync), TGT_OST_HDL(HAS_BODY | HAS_REPLY | IS_MUTABLE, OST_FALLOCATE, mdt_fallocate_hdl), TGT_OST_HDL(HAS_BODY | HAS_REPLY, OST_SEEK, tgt_lseek), +TGT_RPC_HANDLER(OST_FIRST_OPC, + 0, OST_SET_INFO, mdt_io_set_info, + &RQF_OBD_SET_INFO, LUSTRE_OST_VERSION), }; static struct tgt_handler mdt_sec_ctx_ops[] = { diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h index 7b3e302..e05d7ca 100644 --- a/lustre/mdt/mdt_internal.h +++ b/lustre/mdt/mdt_internal.h @@ -1388,6 +1388,7 @@ bool mdt_dom_client_has_lock(struct mdt_thread_info *info, void mdt_hp_brw(struct tgt_session_info *tsi); void mdt_hp_punch(struct tgt_session_info *tsi); int mdt_data_version_get(struct tgt_session_info *tsi); +int mdt_io_set_info(struct tgt_session_info *tsi); /* grants */ long mdt_grant_connect(const struct lu_env *env, struct obd_export *exp, diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 68da594..933fc8f 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -763,6 +763,7 @@ void osc_update_next_shrink(struct client_obd *cli) CDEBUG(D_CACHE, "next time %lld to shrink grant\n", cli->cl_next_shrink_grant); } +EXPORT_SYMBOL(osc_update_next_shrink); static void __osc_update_grant(struct client_obd *cli, u64 grant) { @@ -971,6 +972,7 @@ void osc_schedule_grant_work(void) cancel_delayed_work_sync(&work); schedule_work(&work.work); } +EXPORT_SYMBOL(osc_schedule_grant_work); /** * Start grant thread for returing grant to server for idle clients. diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index bb32167..dbb5cc1 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -8829,6 +8829,50 @@ test_64f() { } run_test 64f "check grant consumption (with grant allocation)" +test_64g() { + #[ $MDS1_VERSION -lt $(version_code 2.14.54) ] && + # skip "Need MDS version at least 2.14.54" + + local mdts=$(comma_list $(mdts_nodes)) + + local old=$($LCTL get_param mdc.$FSNAME-*.grant_shrink_interval | + tr '\n' ' ') + stack_trap "$LCTL set_param $old" + + # generate dirty pages and increase dirty granted on MDT + stack_trap "rm -f $DIR/$tfile-*" + for (( i = 0; i < 10; i++)); do + $LFS setstripe -E 1M -L mdt $DIR/$tfile-$i || + error "can't set stripe" + dd if=/dev/zero of=$DIR/$tfile-$i bs=128k count=1 || + error "can't dd" + $LFS getstripe $DIR/$tfile-$i | grep -q pattern.*mdt || { + $LFS getstripe $DIR/$tfile-$i + error "not DoM file" + } + done + + # flush dirty pages + sync + + # wait until grant shrink reset grant dirty on MDTs + for ((i = 0; i < 120; i++)); do + grant_dirty=$(do_nodes $mdts $LCTL get_param -n mdt.*.tot_dirty | + awk '{sum=sum+$1} END {print sum}') + vm_dirty=$(awk '/Dirty:/{print $2}' /proc/meminfo) + echo "$grant_dirty grants, $vm_dirty pages" + (( grant_dirty + vm_dirty == 0 )) && break + (( i == 3 )) && sync && + $LCTL set_param mdc.$FSNAME-*.grant_shrink_interval=5 + sleep 1 + done + + grant_dirty=$(do_nodes $mdts $LCTL get_param -n mdt.*.tot_dirty | + awk '{sum=sum+$1} END {print sum}') + (( grant_dirty == 0 )) || error "$grant_dirty on MDT" +} +run_test 64g "grant shrink on MDT" + # bug 1414 - set/get directories' stripe info test_65a() { [ $PARALLEL == "yes" ] && skip "skip parallel run" -- 1.8.3.1