Whamcloud - gitweb
LU-16973 ptlrpc: flush delayed file desc if idle 05/51805/6
authorAndreas Dilger <adilger@whamcloud.com>
Sat, 29 Jul 2023 08:59:28 +0000 (02:59 -0600)
committerOleg Drokin <green@whamcloud.com>
Sat, 19 Aug 2023 05:34:59 +0000 (05:34 +0000)
The use of alloc_file_pseudo() allocates a real file descriptor,
so fput() will use a deferred cleanup for the descriptor, either
when the thread "finishes the syscall" (which never happens for
kernel threads), or a unmount time.  This accumulates too many
file descriptors (millions) on a busy system.

Instead of waiting to cleanup these file descriptors at unmount
time, call flush_delayed_fput() to clean them up when a ptlrpcd
thread becomes idle before it goes to sleep.

For kernels 3.6 and later when flush_delayed_fput() was first added,
and before kernel 5.4 when it was EXPORT_SYMBOL'd, grab a pointer
to the function with kallsyms_lookup_name() so it can be called.

Delete LN_CONFIG_STRSCPY_EXISTS reference that generates configure
warnings, since this check was renamed and moved to libcfs.

Fixes: eed43b2a427b ("LU-13783 osd-ldiskfs: use alloc_file_pseudo to create fake files")
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: I24a08f9568d7d636a69672c5c3132ab25b292407
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/51805
Reviewed-by: Neil Brown <neilb@suse.de>
Reviewed-by: Andrew Perepechko <andrew.perepechko@hpe.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Reviewed-by: Alexander Boyko <alexander.boyko@hpe.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lnet/autoconf/lustre-lnet.m4
lustre/autoconf/lustre-core.m4
lustre/ptlrpc/service.c

index acb1317..f36cae7 100644 (file)
@@ -1109,7 +1109,6 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([
 
 AC_DEFUN([LN_PROG_LINUX_SRC], [
        LN_CONFIG_O2IB_SRC
-       LN_SRC_CONFIG_STRSCPY_EXISTS
        # 3.15
        LN_SRC_CONFIG_SK_DATA_READY
        # 4.x
@@ -1125,7 +1124,6 @@ AC_DEFUN([LN_PROG_LINUX_SRC], [
 
 AC_DEFUN([LN_PROG_LINUX_RESULTS], [
        LN_CONFIG_O2IB_RESULTS
-       LN_CONFIG_STRSCPY_EXISTS
        # 3.15
        LN_CONFIG_SK_DATA_READY
        # 4.x
index b2b9ebc..1d824a5 100644 (file)
@@ -2793,6 +2793,18 @@ AC_DEFUN([LC_BIO_BI_PHYS_SEGMENTS], [
 ]) # LC_BIO_BI_PHYS_SEGMENTS
 
 #
+# LC_HAVE_FLUSH_DELAYED_FPUT
+#
+# kernel commit v3.5-rc6-284-g4a9d4b024a31 adds flush_delayed_fput()
+# kernel commit v5.3-rc2-13-g7239a40ca8bf exports flush_delayed_fput()
+#
+AC_DEFUN([LC_HAVE_FLUSH_DELAYED_FPUT], [
+LB_CHECK_EXPORT([flush_delayed_fput], [fs/file_table.c],
+       [AC_DEFINE(HAVE_FLUSH_DELAYED_FPUT, 1,
+                       [flush_delayed_fput() is exported by the kernel])])
+]) # LC_FLUSH_DELAYED_FPUT
+
+#
 # LC_LM_COMPARE_OWNER_EXISTS
 #
 # kernel 5.3-rc3 commit f85d93385e9fe6886a751f647f6812a89bf6bee3
@@ -4306,6 +4318,7 @@ AC_DEFUN([LC_PROG_LINUX_RESULTS], [
 
        # 5.3
        LC_BIO_BI_PHYS_SEGMENTS
+       LC_HAVE_FLUSH_DELAYED_FPUT
        LC_LM_COMPARE_OWNER_EXISTS
 
        # 5.5
index 7e3268f..fac60b5 100644 (file)
@@ -2755,6 +2755,16 @@ ptlrpc_wait_event(struct ptlrpc_service_part *svcpt,
        return 0;
 }
 
+#ifdef HAVE_SERVER_SUPPORT
+# ifdef HAVE_FLUSH_DELAYED_FPUT
+#  define cfs_flush_delayed_fput() flush_delayed_fput()
+# else
+void (*cfs_flush_delayed_fput)(void);
+# endif /* HAVE_FLUSH_DELAYED_FPUT */
+#else /* !HAVE_SERVER_SUPPORT */
+#define cfs_flush_delayed_fput() do {} while (0)
+#endif /* HAVE_SERVER_SUPPORT */
+
 /**
  * Main thread body for service threads.
  * Waits in a loop waiting for new requests to process to appear.
@@ -2862,8 +2872,17 @@ static int ptlrpc_main(void *arg)
        CDEBUG(D_NET, "service thread %d (#%d) started\n", thread->t_id,
               svcpt->scp_nthrs_running);
 
+#ifdef HAVE_SERVER_SUPPORT
+#ifndef HAVE_FLUSH_DELAYED_FPUT
+       if (unlikely(cfs_flush_delayed_fput == NULL))
+               cfs_flush_delayed_fput =
+                       cfs_kallsyms_lookup_name("flush_delayed_fput");
+#endif
+#endif
        /* XXX maintain a list of all managed devices: insert here */
        while (!ptlrpc_thread_stopping(thread)) {
+               bool idle = true;
+
                if (ptlrpc_wait_event(svcpt, thread))
                        break;
 
@@ -2872,6 +2891,7 @@ static int ptlrpc_main(void *arg)
                if (ptlrpc_threads_need_create(svcpt)) {
                        /* Ignore return code - we tried... */
                        ptlrpc_start_thread(svcpt, 0);
+                       idle = false;
                }
 
                /* reset le_ses to initial state */
@@ -2889,6 +2909,7 @@ static int ptlrpc_main(void *arg)
                        if (counter++ < 100)
                                continue;
                        counter = 0;
+                       idle = false;
                }
 
                if (ptlrpc_at_check(svcpt))
@@ -2898,6 +2919,7 @@ static int ptlrpc_main(void *arg)
                        lu_context_enter(&env->le_ctx);
                        ptlrpc_server_handle_request(svcpt, thread);
                        lu_context_exit(&env->le_ctx);
+                       idle = false;
                }
 
                if (ptlrpc_rqbd_pending(svcpt) &&
@@ -2910,7 +2932,16 @@ static int ptlrpc_main(void *arg)
                        svcpt->scp_rqbd_timeout = cfs_time_seconds(1) / 10;
                        CDEBUG(D_RPCTRACE, "Posted buffers: %d\n",
                               svcpt->scp_nrqbds_posted);
+                       idle = false;
                }
+
+               /* If nothing to do, flush old alloc_file_pseudo() descriptors.
+                * This has internal atomicity so it is OK to call often.
+                * We could also do other idle tasks at this time.
+                */
+               if (idle)
+                       cfs_flush_delayed_fput();
+
                /*
                 * If the number of threads has been tuned downward and this
                 * thread should be stopped, then stop in reverse order so the