Whamcloud - gitweb
LU-15010 mdc: add support for grant shrink 56/44956/5
authorAlex Zhuravlev <bzzz@whamcloud.com>
Thu, 16 Sep 2021 08:20:18 +0000 (11:20 +0300)
committerOleg Drokin <green@whamcloud.com>
Sun, 17 Oct 2021 18:11:38 +0000 (18:11 +0000)
just re-use existing mechanism used in OSC

Signed-off-by: Alex Zhuravlev <bzzz@whamcloud.com>
Change-Id: I4cdca057d35eaff6493d047127f1fe5eee9e9620
Reviewed-on: https://review.whamcloud.com/44956
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Mike Pershin <mpershin@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lustre_osc.h
lustre/include/uapi/linux/lustre/lustre_idl.h
lustre/llite/llite_lib.c
lustre/mdc/lproc_mdc.c
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_internal.h
lustre/osc/osc_request.c
lustre/tests/sanity.sh

index e7c290d..3c2514a 100644 (file)
@@ -671,6 +671,8 @@ int osc_punch_send(struct obd_export *exp, struct obdo *oa,
                   obd_enqueue_update_f upcall, void *cookie);
 int osc_fallocate_base(struct obd_export *exp, struct obdo *oa,
                       obd_enqueue_update_f upcall, void *cookie, int mode);
+void osc_update_next_shrink(struct client_obd *cli);
+void osc_schedule_grant_work(void);
 
 /* osc_io.c */
 int osc_io_submit(const struct lu_env *env, const struct cl_io_slice *ios,
index 0e28b08..71a20f4 100644 (file)
@@ -892,6 +892,7 @@ struct ptlrpc_body_v2 {
                                OBD_CONNECT_MULTIMODRPCS |\
                                OBD_CONNECT_SUBTREE | OBD_CONNECT_LARGE_ACL |\
                                OBD_CONNECT_GRANT_PARAM | \
+                               OBD_CONNECT_GRANT_SHRINK | \
                                OBD_CONNECT_SHORTIO | OBD_CONNECT_FLAGS2)
 
 #define MDT_CONNECT_SUPPORTED2 (OBD_CONNECT2_FILE_SECCTX | \
index 0fb7d9a..e493791 100644 (file)
@@ -324,6 +324,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt)
                                  OBD_CONNECT_SUBTREE |
                                  OBD_CONNECT_MULTIMODRPCS |
                                  OBD_CONNECT_GRANT_PARAM |
+                                 OBD_CONNECT_GRANT_SHRINK |
                                  OBD_CONNECT_SHORTIO | OBD_CONNECT_FLAGS2;
 
        data->ocd_connect_flags2 = OBD_CONNECT2_DIR_MIGRATE |
index 3b1bbbc..796a68d 100644 (file)
@@ -613,6 +613,108 @@ struct lprocfs_vars lprocfs_mdc_obd_vars[] = {
        { NULL }
 };
 
+static ssize_t cur_lost_grant_bytes_show(struct kobject *kobj,
+                                        struct attribute *attr,
+                                        char *buf)
+{
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
+       struct client_obd *cli = &obd->u.cli;
+
+       return scnprintf(buf, PAGE_SIZE, "%lu\n", cli->cl_lost_grant);
+}
+LUSTRE_RO_ATTR(cur_lost_grant_bytes);
+
+static ssize_t cur_dirty_grant_bytes_show(struct kobject *kobj,
+                                         struct attribute *attr,
+                                         char *buf)
+{
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
+       struct client_obd *cli = &obd->u.cli;
+
+       return scnprintf(buf, PAGE_SIZE, "%lu\n", cli->cl_dirty_grant);
+}
+LUSTRE_RO_ATTR(cur_dirty_grant_bytes);
+
+static ssize_t grant_shrink_show(struct kobject *kobj, struct attribute *attr,
+                                char *buf)
+{
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
+       struct obd_import *imp;
+       ssize_t len;
+
+       with_imp_locked(obd, imp, len)
+               len = scnprintf(buf, PAGE_SIZE, "%d\n",
+                               !imp->imp_grant_shrink_disabled &&
+                               OCD_HAS_FLAG(&imp->imp_connect_data,
+                                            GRANT_SHRINK));
+
+       return len;
+}
+
+static ssize_t grant_shrink_store(struct kobject *kobj, struct attribute *attr,
+                                 const char *buffer, size_t count)
+{
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
+       struct obd_import *imp;
+       bool val;
+       int rc;
+
+       if (obd == NULL)
+               return 0;
+
+       rc = kstrtobool(buffer, &val);
+       if (rc)
+               return rc;
+
+       with_imp_locked(obd, imp, rc) {
+               spin_lock(&imp->imp_lock);
+               imp->imp_grant_shrink_disabled = !val;
+               spin_unlock(&imp->imp_lock);
+       }
+
+       return rc ?: count;
+}
+LUSTRE_RW_ATTR(grant_shrink);
+
+static ssize_t grant_shrink_interval_show(struct kobject *kobj,
+                                         struct attribute *attr,
+                                         char *buf)
+{
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
+
+       return sprintf(buf, "%lld\n", obd->u.cli.cl_grant_shrink_interval);
+}
+
+static ssize_t grant_shrink_interval_store(struct kobject *kobj,
+                                          struct attribute *attr,
+                                          const char *buffer,
+                                          size_t count)
+{
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
+       unsigned int val;
+       int rc;
+
+       rc = kstrtouint(buffer, 0, &val);
+       if (rc)
+               return rc;
+
+       if (val == 0)
+               return -ERANGE;
+
+       obd->u.cli.cl_grant_shrink_interval = val;
+       osc_update_next_shrink(&obd->u.cli);
+       osc_schedule_grant_work();
+
+       return count;
+}
+LUSTRE_RW_ATTR(grant_shrink_interval);
+
 static struct attribute *mdc_attrs[] = {
        &lustre_attr_active.attr,
        &lustre_attr_checksums.attr,
@@ -622,6 +724,10 @@ static struct attribute *mdc_attrs[] = {
        &lustre_attr_mds_conn_uuid.attr,
        &lustre_attr_conn_uuid.attr,
        &lustre_attr_ping.attr,
+       &lustre_attr_grant_shrink.attr,
+       &lustre_attr_grant_shrink_interval.attr,
+       &lustre_attr_cur_lost_grant_bytes.attr,
+       &lustre_attr_cur_dirty_grant_bytes.attr,
        NULL,
 };
 
index 82c7bee..a11a9bf 100644 (file)
@@ -2621,6 +2621,61 @@ out:
 static int mdt_iocontrol(unsigned int cmd, struct obd_export *exp, int len,
                         void *karg, void __user *uarg);
 
+int mdt_io_set_info(struct tgt_session_info *tsi)
+{
+       struct ptlrpc_request   *req = tgt_ses_req(tsi);
+       struct ost_body         *body = NULL, *repbody;
+       void                    *key, *val = NULL;
+       int                      keylen, vallen, rc = 0;
+       bool                     is_grant_shrink;
+
+       ENTRY;
+
+       key = req_capsule_client_get(tsi->tsi_pill, &RMF_SETINFO_KEY);
+       if (key == NULL) {
+               DEBUG_REQ(D_HA, req, "no set_info key");
+               RETURN(err_serious(-EFAULT));
+       }
+       keylen = req_capsule_get_size(tsi->tsi_pill, &RMF_SETINFO_KEY,
+                                     RCL_CLIENT);
+
+       val = req_capsule_client_get(tsi->tsi_pill, &RMF_SETINFO_VAL);
+       if (val == NULL) {
+               DEBUG_REQ(D_HA, req, "no set_info val");
+               RETURN(err_serious(-EFAULT));
+       }
+       vallen = req_capsule_get_size(tsi->tsi_pill, &RMF_SETINFO_VAL,
+                                     RCL_CLIENT);
+
+       is_grant_shrink = KEY_IS(KEY_GRANT_SHRINK);
+       if (is_grant_shrink)
+               /* In this case the value is actually an RMF_OST_BODY, so we
+                * transmutate the type of this PTLRPC */
+               req_capsule_extend(tsi->tsi_pill, &RQF_OST_SET_GRANT_INFO);
+
+       rc = req_capsule_server_pack(tsi->tsi_pill);
+       if (rc < 0)
+               RETURN(rc);
+
+       if (is_grant_shrink) {
+               body = req_capsule_client_get(tsi->tsi_pill, &RMF_OST_BODY);
+
+               repbody = req_capsule_server_get(tsi->tsi_pill, &RMF_OST_BODY);
+               *repbody = *body;
+
+               /** handle grant shrink, similar to a read request */
+               tgt_grant_prepare_read(tsi->tsi_env, tsi->tsi_exp,
+                                      &repbody->oa);
+       } else {
+               CERROR("%s: Unsupported key %s\n",
+                      tgt_name(tsi->tsi_tgt), (char *)key);
+               rc = -EOPNOTSUPP;
+       }
+
+       RETURN(rc);
+}
+
+
 static int mdt_set_info(struct tgt_session_info *tsi)
 {
        struct ptlrpc_request   *req = tgt_ses_req(tsi);
@@ -5665,6 +5720,9 @@ TGT_OST_HDL(HAS_BODY | HAS_REPLY, OST_SYNC,       mdt_data_sync),
 TGT_OST_HDL(HAS_BODY | HAS_REPLY | IS_MUTABLE, OST_FALLOCATE,
                                                        mdt_fallocate_hdl),
 TGT_OST_HDL(HAS_BODY | HAS_REPLY, OST_SEEK, tgt_lseek),
+TGT_RPC_HANDLER(OST_FIRST_OPC,
+               0,                      OST_SET_INFO,   mdt_io_set_info,
+               &RQF_OBD_SET_INFO, LUSTRE_OST_VERSION),
 };
 
 static struct tgt_handler mdt_sec_ctx_ops[] = {
index 7b3e302..e05d7ca 100644 (file)
@@ -1388,6 +1388,7 @@ bool mdt_dom_client_has_lock(struct mdt_thread_info *info,
 void mdt_hp_brw(struct tgt_session_info *tsi);
 void mdt_hp_punch(struct tgt_session_info *tsi);
 int mdt_data_version_get(struct tgt_session_info *tsi);
+int mdt_io_set_info(struct tgt_session_info *tsi);
 
 /* grants */
 long mdt_grant_connect(const struct lu_env *env, struct obd_export *exp,
index 68da594..933fc8f 100644 (file)
@@ -763,6 +763,7 @@ void osc_update_next_shrink(struct client_obd *cli)
        CDEBUG(D_CACHE, "next time %lld to shrink grant\n",
               cli->cl_next_shrink_grant);
 }
+EXPORT_SYMBOL(osc_update_next_shrink);
 
 static void __osc_update_grant(struct client_obd *cli, u64 grant)
 {
@@ -971,6 +972,7 @@ void osc_schedule_grant_work(void)
        cancel_delayed_work_sync(&work);
        schedule_work(&work.work);
 }
+EXPORT_SYMBOL(osc_schedule_grant_work);
 
 /**
  * Start grant thread for returing grant to server for idle clients.
index bb32167..dbb5cc1 100755 (executable)
@@ -8829,6 +8829,50 @@ test_64f() {
 }
 run_test 64f "check grant consumption (with grant allocation)"
 
+test_64g() {
+       #[ $MDS1_VERSION -lt $(version_code 2.14.54) ] &&
+       #       skip "Need MDS version at least 2.14.54"
+
+       local mdts=$(comma_list $(mdts_nodes))
+
+       local old=$($LCTL get_param mdc.$FSNAME-*.grant_shrink_interval |
+                       tr '\n' ' ')
+       stack_trap "$LCTL set_param $old"
+
+       # generate dirty pages and increase dirty granted on MDT
+       stack_trap "rm -f $DIR/$tfile-*"
+       for (( i = 0; i < 10; i++)); do
+               $LFS setstripe -E 1M -L mdt $DIR/$tfile-$i ||
+                       error "can't set stripe"
+               dd if=/dev/zero of=$DIR/$tfile-$i bs=128k count=1 ||
+                       error "can't dd"
+               $LFS getstripe $DIR/$tfile-$i | grep -q pattern.*mdt || {
+                       $LFS getstripe $DIR/$tfile-$i
+                       error "not DoM file"
+               }
+       done
+
+       # flush dirty pages
+       sync
+
+       # wait until grant shrink reset grant dirty on MDTs
+       for ((i = 0; i < 120; i++)); do
+               grant_dirty=$(do_nodes $mdts $LCTL get_param -n  mdt.*.tot_dirty |
+                       awk '{sum=sum+$1} END {print sum}')
+               vm_dirty=$(awk '/Dirty:/{print $2}' /proc/meminfo)
+               echo "$grant_dirty grants, $vm_dirty pages"
+               (( grant_dirty + vm_dirty == 0 )) && break
+               (( i == 3 )) && sync &&
+                       $LCTL set_param mdc.$FSNAME-*.grant_shrink_interval=5
+               sleep 1
+       done
+
+       grant_dirty=$(do_nodes $mdts $LCTL get_param -n  mdt.*.tot_dirty |
+               awk '{sum=sum+$1} END {print sum}')
+       (( grant_dirty == 0 )) || error "$grant_dirty on MDT"
+}
+run_test 64g "grant shrink on MDT"
+
 # bug 1414 - set/get directories' stripe info
 test_65a() {
        [ $PARALLEL == "yes" ] && skip "skip parallel run"