Whamcloud - gitweb
LU-18826: obdclass: fix panic from shrink_slab 01/58501/17
authorLijing Chen <lijinc@amazon.com>
Fri, 21 Mar 2025 17:46:28 +0000 (17:46 +0000)
committerOleg Drokin <green@whamcloud.com>
Wed, 21 May 2025 05:15:59 +0000 (05:15 +0000)
Grant slot when obd_get_mod_rpc_slot called from kthreadd.
This change will prevent kernel panic from null pointer.

When shrink_slab triggered from kthreadd (system under
memory pressure and try to free up memory) it will invoke
ll_delete_node to free up inode cache. If in flight rpcs is
full, it will try to sleep the thread, but this will trigger
null pointer given kthreadd have null set_child_tid field.
We anyway don't want kthreadd get put into sleep.

Signed-off-by: Lijing Chen <lijinc@amazon.com>
Change-Id: Ie7dc1e4956bf508d9e2f1272cdfc377e78971ff8
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/58501
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Shaun Tancheff <shaun.tancheff@hpe.com>
Reviewed-by: Timothy Day <timday@amazon.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre.spec.in
lustre/kunit/Makefile.in
lustre/kunit/autoMakefile.am
lustre/kunit/obd_mod_rpcs_test.c [new file with mode: 0644]
lustre/obdclass/genops.c
lustre/tests/sanity.sh
rpm/kmp-lustre-tests.files

index 69e8967..26aa264 100644 (file)
@@ -861,6 +861,7 @@ mv $basemodpath/fs/osd_wbcfs.ko $basemodpath-osd-wbcfs/fs/osd_wbcfs.ko
 %if %{with lustre_tests}
 mkdir -p $basemodpath-tests/fs
 mv $basemodpath/fs/obd_test.ko $basemodpath-tests/fs/obd_test.ko
+mv $basemodpath/fs/obd_mod_rpcs_test.ko $basemodpath-tests/fs/obd_mod_rpcs_test.ko
 mv $basemodpath/fs/kinode.ko $basemodpath-tests/fs/kinode.ko
 %if %{with servers}
 mv $basemodpath/fs/ldlm_extent.ko $basemodpath-tests/fs/ldlm_extent.ko
index 8c1586d..ff69e8c 100644 (file)
@@ -8,7 +8,7 @@
 # Makefile template for kunit
 #
 
-MODULES := kinode obd_test
+MODULES := kinode obd_test obd_mod_rpcs_test
 @TESTS_TRUE@@SERVER_TRUE@MODULES += ldlm_extent
 @TESTS_TRUE@@SERVER_TRUE@MODULES += llog_test
 
@@ -16,5 +16,5 @@ EXTRA_DIST = kinode.c
 EXTRA_DIST += ldlm_extent.c
 EXTRA_DIST += llog_test.c
 EXTRA_DIST += obd_test.c
-
+EXTRA_DIST += obd_mod_rpcs_test.c
 @INCLUDE_RULES@
index dfb9a54..95ecc5f 100644 (file)
@@ -11,6 +11,7 @@
 if MODULES
 modulefs_DATA = kinode.ko
 modulefs_DATA += obd_test.ko
+modulefs_DATA += obd_mod_rpcs_test.ko
 if SERVER
 modulefs_DATA += ldlm_extent.ko
 modulefs_DATA += llog_test.ko
diff --git a/lustre/kunit/obd_mod_rpcs_test.c b/lustre/kunit/obd_mod_rpcs_test.c
new file mode 100644 (file)
index 0000000..ed60af2
--- /dev/null
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (c) 2025, Amazon and/or its affiliates. All rights reserved.
+ * Use is subject to license terms.
+ *
+ */
+
+/*
+ * This file is part of Lustre, http://www.lustre.org/
+ *
+ * Simple OBD device mock test for LU-18826.
+ *
+ * Author: Lijing Chen <lijinc@amazon.com>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/pid.h>
+#include <linux/sched/signal.h>
+#include <linux/delay.h>
+#include <linux/wait.h>
+#include <linux/bitmap.h>
+
+#include <obd_class.h>
+#include <lustre_net.h>
+
+struct test_ctx {
+       struct client_obd *cli;
+       struct obd_import *imp;
+       struct obd_device *obd;
+       unsigned int old_flags;
+       int __user *old_set_child_tid;
+};
+
+static void setup_test_context(struct test_ctx *ctx)
+{
+       int i;
+
+       OBD_ALLOC_PTR(ctx->cli);
+       OBD_ALLOC_PTR(ctx->imp);
+       OBD_ALLOC_PTR(ctx->obd);
+
+       init_waitqueue_head(&ctx->cli->cl_mod_rpcs_waitq);
+       ctx->cli->cl_max_mod_rpcs_in_flight = 0;
+       ctx->cli->cl_mod_rpcs_in_flight = 0;
+
+       /* Initialize cl_mod_rpcs_hist */
+       spin_lock_init(&ctx->cli->cl_mod_rpcs_hist.oh_lock);
+       for (i = 0; i < OBD_HIST_MAX; i++)
+               ctx->cli->cl_mod_rpcs_hist.oh_buckets[i] = 0;
+
+       /* Initialize bitmap */
+       OBD_ALLOC(ctx->cli->cl_mod_tag_bitmap,
+                 BITS_TO_LONGS(OBD_MAX_RIF_MAX) * sizeof(long));
+
+       /* Set up import and obd structures */
+       ctx->imp->imp_obd = ctx->obd;
+       ctx->cli->cl_import = ctx->imp;
+       snprintf(ctx->obd->obd_name, sizeof(ctx->obd->obd_name), "mock_obd");
+
+       /* Save current state */
+       ctx->old_flags = current->flags;
+       ctx->old_set_child_tid = current->set_child_tid;
+
+       /* Set set_child_tid to NULL for testing */
+       current->set_child_tid = NULL;
+}
+
+static void cleanup_test_context(struct test_ctx *ctx)
+{
+       OBD_FREE(ctx->cli->cl_mod_tag_bitmap,
+                BITS_TO_LONGS(OBD_MAX_RIF_MAX) * sizeof(long));
+       OBD_FREE_PTR(ctx->obd);
+       OBD_FREE_PTR(ctx->imp);
+       OBD_FREE_PTR(ctx->cli);
+
+       /* Restore original state */
+       current->flags = ctx->old_flags;
+       current->set_child_tid = ctx->old_set_child_tid;
+}
+
+static void test_kthread_flags(void)
+{
+       struct test_ctx ctx;
+       __u32 opc = MDS_STATFS;
+       __u16 ret;
+
+       pr_info("\n=== Starting test case 1: kthread flags ===\n");
+
+       setup_test_context(&ctx);
+
+       /* Set kthread flags */
+       current->flags = PF_KTHREAD|PF_MEMALLOC|PF_NOFREEZE|PF_FORKNOEXEC;
+
+       pr_info("Test 1: Current flags: 0x%x\n", current->flags);
+       pr_info("Test 1: set_child_tid: %p\n", current->set_child_tid);
+
+       /* Simulate cl_mod_rpcs_in_flight > cl_max_mod_rpcs_in_flight */
+       test_and_set_bit(0, ctx.cli->cl_mod_tag_bitmap);
+       test_and_set_bit(1, ctx.cli->cl_mod_tag_bitmap);
+       ctx.cli->cl_mod_rpcs_in_flight = 1;
+       ctx.cli->cl_close_rpcs_in_flight = 1;
+
+       ret = obd_get_mod_rpc_slot(ctx.cli, opc);
+
+       pr_info("Test 1: Return value (slot): %u\n", ret);
+       pr_info("Test 1: RPCs in flight: %d\n", ctx.cli->cl_mod_rpcs_in_flight);
+
+       cleanup_test_context(&ctx);
+
+       pr_info("=== Finished test case 1 ===\n");
+}
+
+static void test_non_mem_alloc_flags(void)
+{
+       struct test_ctx ctx;
+       __u32 opc = MDS_CLOSE;
+       __u16 ret;
+
+       pr_info("\n=== Starting test case 2: non mem_alloc flags ===\n");
+
+       setup_test_context(&ctx);
+
+       /* Set minimal flags */
+       current->flags = PF_KTHREAD|PF_NOFREEZE|PF_FORKNOEXEC;
+
+       pr_info("Test 2: Current flags: 0x%x\n", current->flags);
+       pr_info("Test 2: set_child_tid: %p\n", current->set_child_tid);
+
+       ret = obd_get_mod_rpc_slot(ctx.cli, opc);
+
+       pr_info("Test 2: Return value (slot): %u\n", ret);
+       pr_info("Test 2: RPCs in flight: %d\n", ctx.cli->cl_mod_rpcs_in_flight);
+
+       cleanup_test_context(&ctx);
+
+       pr_info("=== Finished test case 2 ===\n");
+}
+
+static int __init obd_mod_rpcs_test_init(void)
+{
+       /* Run test cases */
+       test_kthread_flags();
+       test_non_mem_alloc_flags();
+       return 0;
+}
+
+static void __exit obd_mod_rpcs_test_exit(void)
+{
+       pr_info("Task Module: Unloading module\n");
+}
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Lijing Chen <lijinc@amazon.com>");
+MODULE_DESCRIPTION("Lustre OBD test module");
+MODULE_VERSION(LUSTRE_VERSION_STRING);
+
+module_init(obd_mod_rpcs_test_init);
+module_exit(obd_mod_rpcs_test_exit);
index 2f751df..adf20de 100644 (file)
@@ -2384,27 +2384,44 @@ __u16 obd_get_mod_rpc_slot(struct client_obd *cli, __u32 opc)
 
        init_wait(&wait.wqe);
        wait.wqe.func = claim_mod_rpc_function;
-
        spin_lock_irq(&cli->cl_mod_rpcs_waitq.lock);
-       __add_wait_queue_entry_tail(&cli->cl_mod_rpcs_waitq, &wait.wqe);
-       /* This wakeup will only succeed if the maximums haven't
-        * been reached.  If that happens, wait.woken will be set
-        * and there will be no need to wait.
-        * If a close_req was enqueue, ensure we search all the way to the
-        * end of the waitqueue for a close request.
-        */
-       __wake_up_locked_key(&cli->cl_mod_rpcs_waitq, TASK_NORMAL,
-                            (void*)wait.close_req);
-
-       while (wait.woken == false) {
-               spin_unlock_irq(&cli->cl_mod_rpcs_waitq.lock);
-               wait_woken(&wait.wqe, TASK_UNINTERRUPTIBLE,
-                          MAX_SCHEDULE_TIMEOUT);
-               spin_lock_irq(&cli->cl_mod_rpcs_waitq.lock);
+       /* If it's kthread and don't have set_child_tid */
+       if ((current->flags & PF_KTHREAD) && (current->flags & PF_MEMALLOC) &&
+           !current->set_child_tid) {
+               /* Skip wait_woken as it will cause kernel panic (LU-18826).
+                * Also confirm it's on the mem alloc path by PF_MEMALLOC.
+                * In this dedicated case, grant a slot.
+                */
+               cli->cl_mod_rpcs_in_flight++;
+               if (wait.close_req)
+                       cli->cl_close_rpcs_in_flight++;
+               LCONSOLE_INFO("%s: Force grant RPC slot (%u current) to proc with flag: %x.\n",
+                       cli->cl_import->imp_obd->obd_name,
+                       cli->cl_mod_rpcs_in_flight, current->flags);
+       } else {
+               __add_wait_queue_entry_tail(&cli->cl_mod_rpcs_waitq, &wait.wqe);
+               /* This wakeup will only succeed if the maximums haven't
+                * been reached.  If that happens, wait.woken will be set
+                * and there will be no need to wait.
+                * If a close_req was enqueue, ensure we search all the way to
+                * the end of the waitqueue for a close request.
+                */
+               __wake_up_locked_key(&cli->cl_mod_rpcs_waitq, TASK_NORMAL,
+                                    (void *)wait.close_req);
+
+               while (wait.woken == false) {
+                       spin_unlock_irq(&cli->cl_mod_rpcs_waitq.lock);
+                       wait_woken(&wait.wqe, TASK_UNINTERRUPTIBLE,
+                               MAX_SCHEDULE_TIMEOUT);
+                       spin_lock_irq(&cli->cl_mod_rpcs_waitq.lock);
+               }
+               __remove_wait_queue(&cli->cl_mod_rpcs_waitq, &wait.wqe);
        }
-       __remove_wait_queue(&cli->cl_mod_rpcs_waitq, &wait.wqe);
-
-       max = cli->cl_max_mod_rpcs_in_flight;
+       /* In extreme situation like (LU-18826), cl_mod_rpcs_in_flight
+        * can go above cl_max_mod_rpcs_in_flight, use greater value here
+        * to make sure the slot can be found in cl_mod_tag_bitmap
+        */
+       max = max(cli->cl_max_mod_rpcs_in_flight, cli->cl_mod_rpcs_in_flight);
        lprocfs_oh_tally(&cli->cl_mod_rpcs_hist,
                         cli->cl_mod_rpcs_in_flight);
        /* find a free tag */
index 2d6238f..6a2e277 100755 (executable)
@@ -6829,6 +6829,21 @@ test_55b() {
 }
 run_test 55b "Load and unload max OBD devices"
 
+test_55c()
+{
+       load_module kunit/obd_mod_rpcs_test ||
+               error "load_module obd_mod_rpcs_test failed"
+       dmesg | tail -n 25 | grep "obd_mod_rpcs_test"
+       if [ $(dmesg | tail -n 25 | grep "Force grant RPC"| wc -l) -gt 1 ]; then
+               error "Force granted more than once, failing."
+       else
+               echo "Force granted exactly once. Passed."
+       fi
+       rmmod -v obd_mod_rpcs_test ||
+               error "rmmod failed (may trigger a failure in a later test)"
+}
+run_test 55c "obd_mod_rpcs_test"
+
 test_56a() {
        local numfiles=3
        local numdirs=2
index e7fb3f5..c8da2d6 100644 (file)
@@ -2,6 +2,7 @@
 %dir %{modules_fs_path}/%{lustre_name}-tests/fs
 %{modules_fs_path}/%{lustre_name}-tests/fs/kinode.ko
 %{modules_fs_path}/%{lustre_name}-tests/fs/obd_test.ko
+%{modules_fs_path}/%{lustre_name}-tests/fs/obd_mod_rpcs_test.ko
 %if %{with servers}
 %{modules_fs_path}/%{lustre_name}-tests/fs/ldlm_extent.ko
 %{modules_fs_path}/%{lustre_name}-tests/fs/llog_test.ko