From 5ee6ce1bc7df2d87b828de8293e3e84b668d1687 Mon Sep 17 00:00:00 2001 From: Lijing Chen Date: Fri, 21 Mar 2025 17:46:28 +0000 Subject: [PATCH] LU-18826: obdclass: fix panic from shrink_slab Grant slot when obd_get_mod_rpc_slot called from kthreadd. This change will prevent kernel panic from null pointer. When shrink_slab triggered from kthreadd (system under memory pressure and try to free up memory) it will invoke ll_delete_node to free up inode cache. If in flight rpcs is full, it will try to sleep the thread, but this will trigger null pointer given kthreadd have null set_child_tid field. We anyway don't want kthreadd get put into sleep. Signed-off-by: Lijing Chen Change-Id: Ie7dc1e4956bf508d9e2f1272cdfc377e78971ff8 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/58501 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Shaun Tancheff Reviewed-by: Timothy Day Reviewed-by: Oleg Drokin --- lustre.spec.in | 1 + lustre/kunit/Makefile.in | 4 +- lustre/kunit/autoMakefile.am | 1 + lustre/kunit/obd_mod_rpcs_test.c | 163 +++++++++++++++++++++++++++++++++++++++ lustre/obdclass/genops.c | 55 ++++++++----- lustre/tests/sanity.sh | 15 ++++ rpm/kmp-lustre-tests.files | 1 + 7 files changed, 219 insertions(+), 21 deletions(-) create mode 100644 lustre/kunit/obd_mod_rpcs_test.c diff --git a/lustre.spec.in b/lustre.spec.in index 69e8967..26aa264 100644 --- a/lustre.spec.in +++ b/lustre.spec.in @@ -861,6 +861,7 @@ mv $basemodpath/fs/osd_wbcfs.ko $basemodpath-osd-wbcfs/fs/osd_wbcfs.ko %if %{with lustre_tests} mkdir -p $basemodpath-tests/fs mv $basemodpath/fs/obd_test.ko $basemodpath-tests/fs/obd_test.ko +mv $basemodpath/fs/obd_mod_rpcs_test.ko $basemodpath-tests/fs/obd_mod_rpcs_test.ko mv $basemodpath/fs/kinode.ko $basemodpath-tests/fs/kinode.ko %if %{with servers} mv $basemodpath/fs/ldlm_extent.ko $basemodpath-tests/fs/ldlm_extent.ko diff --git a/lustre/kunit/Makefile.in b/lustre/kunit/Makefile.in index 8c1586d..ff69e8c 100644 --- a/lustre/kunit/Makefile.in +++ b/lustre/kunit/Makefile.in @@ -8,7 +8,7 @@ # Makefile template for kunit # -MODULES := kinode obd_test +MODULES := kinode obd_test obd_mod_rpcs_test @TESTS_TRUE@@SERVER_TRUE@MODULES += ldlm_extent @TESTS_TRUE@@SERVER_TRUE@MODULES += llog_test @@ -16,5 +16,5 @@ EXTRA_DIST = kinode.c EXTRA_DIST += ldlm_extent.c EXTRA_DIST += llog_test.c EXTRA_DIST += obd_test.c - +EXTRA_DIST += obd_mod_rpcs_test.c @INCLUDE_RULES@ diff --git a/lustre/kunit/autoMakefile.am b/lustre/kunit/autoMakefile.am index dfb9a54..95ecc5f 100644 --- a/lustre/kunit/autoMakefile.am +++ b/lustre/kunit/autoMakefile.am @@ -11,6 +11,7 @@ if MODULES modulefs_DATA = kinode.ko modulefs_DATA += obd_test.ko +modulefs_DATA += obd_mod_rpcs_test.ko if SERVER modulefs_DATA += ldlm_extent.ko modulefs_DATA += llog_test.ko diff --git a/lustre/kunit/obd_mod_rpcs_test.c b/lustre/kunit/obd_mod_rpcs_test.c new file mode 100644 index 0000000..ed60af2 --- /dev/null +++ b/lustre/kunit/obd_mod_rpcs_test.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright (c) 2025, Amazon and/or its affiliates. All rights reserved. + * Use is subject to license terms. + * + */ + +/* + * This file is part of Lustre, http://www.lustre.org/ + * + * Simple OBD device mock test for LU-18826. + * + * Author: Lijing Chen + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +struct test_ctx { + struct client_obd *cli; + struct obd_import *imp; + struct obd_device *obd; + unsigned int old_flags; + int __user *old_set_child_tid; +}; + +static void setup_test_context(struct test_ctx *ctx) +{ + int i; + + OBD_ALLOC_PTR(ctx->cli); + OBD_ALLOC_PTR(ctx->imp); + OBD_ALLOC_PTR(ctx->obd); + + init_waitqueue_head(&ctx->cli->cl_mod_rpcs_waitq); + ctx->cli->cl_max_mod_rpcs_in_flight = 0; + ctx->cli->cl_mod_rpcs_in_flight = 0; + + /* Initialize cl_mod_rpcs_hist */ + spin_lock_init(&ctx->cli->cl_mod_rpcs_hist.oh_lock); + for (i = 0; i < OBD_HIST_MAX; i++) + ctx->cli->cl_mod_rpcs_hist.oh_buckets[i] = 0; + + /* Initialize bitmap */ + OBD_ALLOC(ctx->cli->cl_mod_tag_bitmap, + BITS_TO_LONGS(OBD_MAX_RIF_MAX) * sizeof(long)); + + /* Set up import and obd structures */ + ctx->imp->imp_obd = ctx->obd; + ctx->cli->cl_import = ctx->imp; + snprintf(ctx->obd->obd_name, sizeof(ctx->obd->obd_name), "mock_obd"); + + /* Save current state */ + ctx->old_flags = current->flags; + ctx->old_set_child_tid = current->set_child_tid; + + /* Set set_child_tid to NULL for testing */ + current->set_child_tid = NULL; +} + +static void cleanup_test_context(struct test_ctx *ctx) +{ + OBD_FREE(ctx->cli->cl_mod_tag_bitmap, + BITS_TO_LONGS(OBD_MAX_RIF_MAX) * sizeof(long)); + OBD_FREE_PTR(ctx->obd); + OBD_FREE_PTR(ctx->imp); + OBD_FREE_PTR(ctx->cli); + + /* Restore original state */ + current->flags = ctx->old_flags; + current->set_child_tid = ctx->old_set_child_tid; +} + +static void test_kthread_flags(void) +{ + struct test_ctx ctx; + __u32 opc = MDS_STATFS; + __u16 ret; + + pr_info("\n=== Starting test case 1: kthread flags ===\n"); + + setup_test_context(&ctx); + + /* Set kthread flags */ + current->flags = PF_KTHREAD|PF_MEMALLOC|PF_NOFREEZE|PF_FORKNOEXEC; + + pr_info("Test 1: Current flags: 0x%x\n", current->flags); + pr_info("Test 1: set_child_tid: %p\n", current->set_child_tid); + + /* Simulate cl_mod_rpcs_in_flight > cl_max_mod_rpcs_in_flight */ + test_and_set_bit(0, ctx.cli->cl_mod_tag_bitmap); + test_and_set_bit(1, ctx.cli->cl_mod_tag_bitmap); + ctx.cli->cl_mod_rpcs_in_flight = 1; + ctx.cli->cl_close_rpcs_in_flight = 1; + + ret = obd_get_mod_rpc_slot(ctx.cli, opc); + + pr_info("Test 1: Return value (slot): %u\n", ret); + pr_info("Test 1: RPCs in flight: %d\n", ctx.cli->cl_mod_rpcs_in_flight); + + cleanup_test_context(&ctx); + + pr_info("=== Finished test case 1 ===\n"); +} + +static void test_non_mem_alloc_flags(void) +{ + struct test_ctx ctx; + __u32 opc = MDS_CLOSE; + __u16 ret; + + pr_info("\n=== Starting test case 2: non mem_alloc flags ===\n"); + + setup_test_context(&ctx); + + /* Set minimal flags */ + current->flags = PF_KTHREAD|PF_NOFREEZE|PF_FORKNOEXEC; + + pr_info("Test 2: Current flags: 0x%x\n", current->flags); + pr_info("Test 2: set_child_tid: %p\n", current->set_child_tid); + + ret = obd_get_mod_rpc_slot(ctx.cli, opc); + + pr_info("Test 2: Return value (slot): %u\n", ret); + pr_info("Test 2: RPCs in flight: %d\n", ctx.cli->cl_mod_rpcs_in_flight); + + cleanup_test_context(&ctx); + + pr_info("=== Finished test case 2 ===\n"); +} + +static int __init obd_mod_rpcs_test_init(void) +{ + /* Run test cases */ + test_kthread_flags(); + test_non_mem_alloc_flags(); + return 0; +} + +static void __exit obd_mod_rpcs_test_exit(void) +{ + pr_info("Task Module: Unloading module\n"); +} + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Lijing Chen "); +MODULE_DESCRIPTION("Lustre OBD test module"); +MODULE_VERSION(LUSTRE_VERSION_STRING); + +module_init(obd_mod_rpcs_test_init); +module_exit(obd_mod_rpcs_test_exit); diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 2f751df..adf20de 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -2384,27 +2384,44 @@ __u16 obd_get_mod_rpc_slot(struct client_obd *cli, __u32 opc) init_wait(&wait.wqe); wait.wqe.func = claim_mod_rpc_function; - spin_lock_irq(&cli->cl_mod_rpcs_waitq.lock); - __add_wait_queue_entry_tail(&cli->cl_mod_rpcs_waitq, &wait.wqe); - /* This wakeup will only succeed if the maximums haven't - * been reached. If that happens, wait.woken will be set - * and there will be no need to wait. - * If a close_req was enqueue, ensure we search all the way to the - * end of the waitqueue for a close request. - */ - __wake_up_locked_key(&cli->cl_mod_rpcs_waitq, TASK_NORMAL, - (void*)wait.close_req); - - while (wait.woken == false) { - spin_unlock_irq(&cli->cl_mod_rpcs_waitq.lock); - wait_woken(&wait.wqe, TASK_UNINTERRUPTIBLE, - MAX_SCHEDULE_TIMEOUT); - spin_lock_irq(&cli->cl_mod_rpcs_waitq.lock); + /* If it's kthread and don't have set_child_tid */ + if ((current->flags & PF_KTHREAD) && (current->flags & PF_MEMALLOC) && + !current->set_child_tid) { + /* Skip wait_woken as it will cause kernel panic (LU-18826). + * Also confirm it's on the mem alloc path by PF_MEMALLOC. + * In this dedicated case, grant a slot. + */ + cli->cl_mod_rpcs_in_flight++; + if (wait.close_req) + cli->cl_close_rpcs_in_flight++; + LCONSOLE_INFO("%s: Force grant RPC slot (%u current) to proc with flag: %x.\n", + cli->cl_import->imp_obd->obd_name, + cli->cl_mod_rpcs_in_flight, current->flags); + } else { + __add_wait_queue_entry_tail(&cli->cl_mod_rpcs_waitq, &wait.wqe); + /* This wakeup will only succeed if the maximums haven't + * been reached. If that happens, wait.woken will be set + * and there will be no need to wait. + * If a close_req was enqueue, ensure we search all the way to + * the end of the waitqueue for a close request. + */ + __wake_up_locked_key(&cli->cl_mod_rpcs_waitq, TASK_NORMAL, + (void *)wait.close_req); + + while (wait.woken == false) { + spin_unlock_irq(&cli->cl_mod_rpcs_waitq.lock); + wait_woken(&wait.wqe, TASK_UNINTERRUPTIBLE, + MAX_SCHEDULE_TIMEOUT); + spin_lock_irq(&cli->cl_mod_rpcs_waitq.lock); + } + __remove_wait_queue(&cli->cl_mod_rpcs_waitq, &wait.wqe); } - __remove_wait_queue(&cli->cl_mod_rpcs_waitq, &wait.wqe); - - max = cli->cl_max_mod_rpcs_in_flight; + /* In extreme situation like (LU-18826), cl_mod_rpcs_in_flight + * can go above cl_max_mod_rpcs_in_flight, use greater value here + * to make sure the slot can be found in cl_mod_tag_bitmap + */ + max = max(cli->cl_max_mod_rpcs_in_flight, cli->cl_mod_rpcs_in_flight); lprocfs_oh_tally(&cli->cl_mod_rpcs_hist, cli->cl_mod_rpcs_in_flight); /* find a free tag */ diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 2d6238f..6a2e277 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -6829,6 +6829,21 @@ test_55b() { } run_test 55b "Load and unload max OBD devices" +test_55c() +{ + load_module kunit/obd_mod_rpcs_test || + error "load_module obd_mod_rpcs_test failed" + dmesg | tail -n 25 | grep "obd_mod_rpcs_test" + if [ $(dmesg | tail -n 25 | grep "Force grant RPC"| wc -l) -gt 1 ]; then + error "Force granted more than once, failing." + else + echo "Force granted exactly once. Passed." + fi + rmmod -v obd_mod_rpcs_test || + error "rmmod failed (may trigger a failure in a later test)" +} +run_test 55c "obd_mod_rpcs_test" + test_56a() { local numfiles=3 local numdirs=2 diff --git a/rpm/kmp-lustre-tests.files b/rpm/kmp-lustre-tests.files index e7fb3f5..c8da2d6 100644 --- a/rpm/kmp-lustre-tests.files +++ b/rpm/kmp-lustre-tests.files @@ -2,6 +2,7 @@ %dir %{modules_fs_path}/%{lustre_name}-tests/fs %{modules_fs_path}/%{lustre_name}-tests/fs/kinode.ko %{modules_fs_path}/%{lustre_name}-tests/fs/obd_test.ko +%{modules_fs_path}/%{lustre_name}-tests/fs/obd_mod_rpcs_test.ko %if %{with servers} %{modules_fs_path}/%{lustre_name}-tests/fs/ldlm_extent.ko %{modules_fs_path}/%{lustre_name}-tests/fs/llog_test.ko -- 1.8.3.1