From: Mikhail Pershin Date: Wed, 5 Mar 2025 14:47:37 +0000 (+0300) Subject: LU-18776 mdt: prevent multiple data discard calls X-Git-Tag: 2.16.53~24 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=799b533c34f468b3d094bf10453307beed409214;p=fs%2Flustre-release.git LU-18776 mdt: prevent multiple data discard calls The mdt_dom_discard_data() might be called multiple times for the same object. That creates cyclical locks for no reason and moreover their callbacks are executed in the same thread recursively causing stack overflow Patch introduces mdt_object flag mot_discard_done to indicate that data discard was initiated once and no need for another one. Additionally patch don't allow to use the same thread for lock callback if ldlm_is_ast_discard_data() is true Fixes: 291ac6e692 ("LU-17078 ldlm: do not spin up thread for local cancels") Signed-off-by: Mikhail Pershin Change-Id: I7dc5d0da93a38e04267e007f5132ddb20788f18f Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/58302 Reviewed-by: Oleg Drokin Reviewed-by: Alex Zhuravlev Reviewed-by: Andreas Dilger Tested-by: jenkins Tested-by: Maloo --- diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index d9625e4..d82015a 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -889,7 +889,8 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, enum ldlm_mode mode) if (ldlm_is_fail_loc(lock)) CFS_RACE(OBD_FAIL_LDLM_CP_BL_RACE); - if (ldlm_is_atomic_cb(lock) || ldlm_is_local(lock) || + if (ldlm_is_atomic_cb(lock) || + (ldlm_is_local(lock) && !ldlm_is_ast_discard_data(lock)) || ldlm_bl_to_thread_lock(ns, NULL, lock) != 0) ldlm_handle_bl_callback(ns, NULL, lock); } else if (ns_is_client(ns) && diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index 624ee9b..91f7acb 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -6894,6 +6894,7 @@ static struct lu_object *mdt_object_alloc(const struct lu_env *env, mo->mot_lsom_size = 0; mo->mot_lsom_blocks = 0; mo->mot_lsom_inited = false; + mo->mot_discard_done = false; RETURN(o); } RETURN(NULL); diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h index 62bba01..7da4efb 100644 --- a/lustre/mdt/mdt_internal.h +++ b/lustre/mdt/mdt_internal.h @@ -360,7 +360,8 @@ struct mdt_object { mot_restriping:1, /* dir restriping */ /* dir auto-split disabled */ mot_auto_split_disabled:1, - mot_lsom_inited:1; /* lsom was inited */ + mot_lsom_inited:1, /* lsom was inited */ + mot_discard_done:1; /* discard lock was sent */ int mot_write_count; spinlock_t mot_write_lock; /* Lock to protect create_data */ @@ -1396,7 +1397,8 @@ static inline bool mdt_dom_check_for_discard(struct mdt_thread_info *mti, struct mdt_object *mo) { return lu_object_is_dying(&mo->mot_header) && - S_ISREG(lu_object_attr(&mo->mot_obj)); + S_ISREG(lu_object_attr(&mo->mot_obj)) && + !mo->mot_discard_done; } int mdt_dom_object_size(const struct lu_env *env, struct mdt_device *mdt, diff --git a/lustre/mdt/mdt_io.c b/lustre/mdt/mdt_io.c index c2594bb..3735b5f 100644 --- a/lustre/mdt/mdt_io.c +++ b/lustre/mdt/mdt_io.c @@ -2055,6 +2055,8 @@ void mdt_dom_discard_data(struct mdt_thread_info *info, RETURN_EXIT; } + mo->mot_discard_done = true; + lock = ldlm_handle2lock(&dom_lh); lock_res_and_lock(lock); /* if lock is not granted then there are BL ASTs in progress and