Whamcloud - gitweb
LU-14711 osc: Notify server if cache discard takes a long time
authorOleg Drokin <green@whamcloud.com>
Fri, 28 May 2021 02:34:44 +0000 (22:34 -0400)
committerAndreas Dilger <adilger@whamcloud.com>
Thu, 15 Jul 2021 08:43:48 +0000 (08:43 +0000)
Discarding a large number of pages from a mapping under a
single lock can take a really long time (750GB is over 170s).
Since there is no stream of RPCs sent to the server as with
read or write to prolong the DLM lock timeout, the server
may evict the client as it does not see progress is being made.

As such send periodic "empty" RPCs to the server to show the
client is still alive and working on the pages under the lock.

For compatibility reasons the RPC is formed as a one-byte
OST_READ request with a special flag set to avoid doing
actual IO, but older servers actually do the one-byte read

Lustre-change: https://review.whamcloud.com/43857
Lustre-commit: 564070343ac4ccf4f97843009e1c36f5130ac19c

Change-Id: I4603c83e92c328d93e29adce8cbfac3d561b25d5
Signed-off-by: Oleg Drokin <green@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Patrick Farrell <farr0186@gmail.com>
Reviewed-on: https://review.whamcloud.com/44285
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/include/cl_object.h
lustre/osc/osc_cache.c
lustre/osc/osc_internal.h
lustre/osc/osc_request.c
lustre/target/tgt_handler.c

index 36ae325..8001fbb 100644 (file)
@@ -1911,6 +1911,9 @@ struct cl_io {
                        loff_t                   ls_result;
                        int                      ls_whence;
                } ci_lseek;
+               struct cl_misc_io {
+                       time64_t                 lm_next_rpc_time;
+               } ci_misc;
         } u;
         struct cl_2queue     ci_queue;
         size_t               ci_nob;
index 9c29955..c3db5f1 100644 (file)
@@ -3125,6 +3125,15 @@ bool osc_page_gang_lookup(const struct lu_env *env, struct cl_io *io,
 
                if (!res)
                        break;
+
+               if (io->ci_type == CIT_MISC &&
+                   io->u.ci_misc.lm_next_rpc_time &&
+                   ktime_get_seconds() > io->u.ci_misc.lm_next_rpc_time) {
+                       osc_send_empty_rpc(osc, idx << PAGE_SHIFT);
+                       io->u.ci_misc.lm_next_rpc_time = ktime_get_seconds() +
+                                                        5 * obd_timeout / 16;
+               }
+
                if (need_resched())
                        cond_resched();
 
@@ -3246,6 +3255,8 @@ int osc_lock_discard_pages(const struct lu_env *env, struct osc_object *osc,
 
        io->ci_obj = cl_object_top(osc2cl(osc));
        io->ci_ignore_layout = 1;
+       io->u.ci_misc.lm_next_rpc_time = ktime_get_seconds() +
+                                        5 * obd_timeout / 16;
        result = cl_io_init(env, io, CIT_MISC, io->ci_obj);
        if (result != 0)
                GOTO(out, result);
index 8b54535..85b0976 100644 (file)
@@ -86,6 +86,7 @@ int osc_ladvise_base(struct obd_export *exp, struct obdo *oa,
 int osc_process_config_base(struct obd_device *obd, struct lustre_cfg *cfg);
 int osc_build_rpc(const struct lu_env *env, struct client_obd *cli,
                  struct list_head *ext_list, int cmd);
+void osc_send_empty_rpc(struct osc_object *osc, pgoff_t start);
 unsigned long osc_lru_reserve(struct client_obd *cli, unsigned long npages);
 void osc_lru_unreserve(struct client_obd *cli, unsigned long npages);
 
index bf343f2..db2d96d 100644 (file)
@@ -1397,23 +1397,25 @@ osc_brw_prep_request(int cmd, struct client_obd *cli, struct obdo *oa,
        struct brw_page *pg_prev;
        void *short_io_buf;
        const char *obd_name = cli->cl_import->imp_obd->obd_name;
-       struct inode *inode;
+       struct inode *inode = NULL;
        bool directio = false;
        bool enable_checksum = true;
 
        ENTRY;
-       inode = page2inode(pga[0]->pg);
-       if (inode == NULL) {
-               /* Try to get reference to inode from cl_page if we are
-                * dealing with direct IO, as handled pages are not
-                * actual page cache pages.
-                */
-               struct osc_async_page *oap = brw_page2oap(pga[0]);
-               struct cl_page *clpage = oap2cl_page(oap);
+       if (pga[0]->pg) {
+               inode = page2inode(pga[0]->pg);
+               if (inode == NULL) {
+                       /* Try to get reference to inode from cl_page if we are
+                        * dealing with direct IO, as handled pages are not
+                        * actual page cache pages.
+                        */
+                       struct osc_async_page *oap = brw_page2oap(pga[0]);
+                       struct cl_page *clpage = oap2cl_page(oap);
 
-               inode = clpage->cp_inode;
-               if (inode)
-                       directio = true;
+                       inode = clpage->cp_inode;
+                       if (inode)
+                               directio = true;
+               }
        }
        if (OBD_FAIL_CHECK(OBD_FAIL_OSC_BRW_PREP_REQ))
                RETURN(-ENOMEM); /* Recoverable */
@@ -2653,6 +2655,34 @@ out:
        RETURN(rc);
 }
 
+/* This is to refresh our lock in face of no RPCs. */
+void osc_send_empty_rpc(struct osc_object *osc, pgoff_t start)
+{
+       struct ptlrpc_request *req;
+       struct obdo oa;
+       struct brw_page bpg = { .off = start, .count = 1};
+       struct brw_page *pga = &bpg;
+       int rc;
+
+       memset(&oa, 0, sizeof(oa));
+       oa.o_oi = osc->oo_oinfo->loi_oi;
+       oa.o_valid = OBD_MD_FLID | OBD_MD_FLGROUP | OBD_MD_FLFLAGS;
+       /* For updated servers - don't do a read */
+       oa.o_flags = OBD_FL_NORPC;
+
+       rc = osc_brw_prep_request(OBD_BRW_READ, osc_cli(osc), &oa, 1, &pga,
+                                 &req, 0);
+
+       /* If we succeeded we ship it off, if not there's no point in doing
+        * anything. Also no resends.
+        * No interpret callback, no commit callback.
+        */
+       if (!rc) {
+               req->rq_no_resend = 1;
+               ptlrpcd_add_req(req);
+       }
+}
+
 static int osc_set_lock_data(struct ldlm_lock *lock, void *data)
 {
         int set = 0;
index 76b265b..c600d85 100644 (file)
@@ -2241,6 +2241,10 @@ int tgt_brw_read(struct tgt_session_info *tsi)
        body = tsi->tsi_ost_body;
        LASSERT(body != NULL);
 
+       if (body->oa.o_valid & OBD_MD_FLFLAGS &&
+           body->oa.o_flags & OBD_FL_NORPC)
+               RETURN(0);
+
        ioo = req_capsule_client_get(tsi->tsi_pill, &RMF_OBD_IOOBJ);
        LASSERT(ioo != NULL); /* must exists after tgt_ost_body_unpack */
 
@@ -2554,6 +2558,11 @@ int tgt_brw_write(struct tgt_session_info *tsi)
        body = tsi->tsi_ost_body;
        LASSERT(body != NULL);
 
+       if (body->oa.o_valid & OBD_MD_FLFLAGS &&
+           body->oa.o_flags & OBD_FL_NORPC)
+               RETURN(0);
+
+
        ioo = req_capsule_client_get(&req->rq_pill, &RMF_OBD_IOOBJ);
        LASSERT(ioo != NULL); /* must exists after tgt_ost_body_unpack */