Whamcloud - gitweb
LU-18776 mdt: prevent multiple data discard calls 02/58302/3
authorMikhail Pershin <mpershin@whamcloud.com>
Wed, 5 Mar 2025 14:47:37 +0000 (17:47 +0300)
committerOleg Drokin <green@whamcloud.com>
Wed, 19 Mar 2025 23:31:37 +0000 (23:31 +0000)
The mdt_dom_discard_data() might be called multiple times
for the same object. That creates cyclical locks for no
reason and moreover their callbacks are executed in the
same thread recursively causing stack overflow

Patch introduces mdt_object flag mot_discard_done to
indicate that data discard was initiated once and no
need for another one.
Additionally patch don't allow to use the same thread
for lock callback if ldlm_is_ast_discard_data() is true

Fixes: 291ac6e692 ("LU-17078 ldlm: do not spin up thread for local cancels")
Signed-off-by: Mikhail Pershin <mpershin@whamcloud.com>
Change-Id: I7dc5d0da93a38e04267e007f5132ddb20788f18f
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/58302
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/ldlm/ldlm_lock.c
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_internal.h
lustre/mdt/mdt_io.c

index d9625e4..d82015a 100644 (file)
@@ -889,7 +889,8 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, enum ldlm_mode mode)
                if (ldlm_is_fail_loc(lock))
                        CFS_RACE(OBD_FAIL_LDLM_CP_BL_RACE);
 
-               if (ldlm_is_atomic_cb(lock) || ldlm_is_local(lock) ||
+               if (ldlm_is_atomic_cb(lock) ||
+                   (ldlm_is_local(lock) && !ldlm_is_ast_discard_data(lock)) ||
                    ldlm_bl_to_thread_lock(ns, NULL, lock) != 0)
                        ldlm_handle_bl_callback(ns, NULL, lock);
        } else if (ns_is_client(ns) &&
index 624ee9b..91f7acb 100644 (file)
@@ -6894,6 +6894,7 @@ static struct lu_object *mdt_object_alloc(const struct lu_env *env,
                mo->mot_lsom_size = 0;
                mo->mot_lsom_blocks = 0;
                mo->mot_lsom_inited = false;
+               mo->mot_discard_done = false;
                RETURN(o);
        }
        RETURN(NULL);
index 62bba01..7da4efb 100644 (file)
@@ -360,7 +360,8 @@ struct mdt_object {
                                mot_restriping:1,   /* dir restriping */
                                /* dir auto-split disabled */
                                mot_auto_split_disabled:1,
-                               mot_lsom_inited:1; /* lsom was inited */
+                               mot_lsom_inited:1, /* lsom was inited */
+                               mot_discard_done:1; /* discard lock was sent */
        int                     mot_write_count;
        spinlock_t              mot_write_lock;
        /* Lock to protect create_data */
@@ -1396,7 +1397,8 @@ static inline bool mdt_dom_check_for_discard(struct mdt_thread_info *mti,
                                             struct mdt_object *mo)
 {
        return lu_object_is_dying(&mo->mot_header) &&
-              S_ISREG(lu_object_attr(&mo->mot_obj));
+              S_ISREG(lu_object_attr(&mo->mot_obj)) &&
+              !mo->mot_discard_done;
 }
 
 int mdt_dom_object_size(const struct lu_env *env, struct mdt_device *mdt,
index c2594bb..3735b5f 100644 (file)
@@ -2055,6 +2055,8 @@ void mdt_dom_discard_data(struct mdt_thread_info *info,
                RETURN_EXIT;
        }
 
+       mo->mot_discard_done = true;
+
        lock = ldlm_handle2lock(&dom_lh);
        lock_res_and_lock(lock);
        /* if lock is not granted then there are BL ASTs in progress and