Whamcloud - gitweb
LU-17003 dne: remove REP-ACK support in DNE system 51/51851/3
authorLai Siyao <lai.siyao@whamcloud.com>
Mon, 10 Jul 2023 22:49:50 +0000 (18:49 -0400)
committerOleg Drokin <green@whamcloud.com>
Thu, 24 Aug 2023 04:35:31 +0000 (04:35 +0000)
DNE system doesn't need to support REP-ACK. In the old implementation,
write locks are kept in PW|EX mode after transaction stop, and will
be downgraded to TXN mode till REP-ACK, and then not released until
transaction commit.

While in the period between transaction stop and REP-ACK, any read
lock request will be on hold till downgrade, with this change, this
read lock request will succeed immediately.  During this period, any
write lock request may involve extra commit, since mdt_blocking_ast()
does not know whether transaction has stopped, so it needs to trigger
commit-on-sharing immediately, and also set 'sync' flag in the lock.
If transaction is not stopped yet, later when it's stopped, it will
trigger another commit-on-sharing since the 'sync' flag is set.

With this change, mdt_blocking_ast() only needs to set 'sync' flag if
its mode is PW|EX, and trigger commit-on-sharing once upon unlock.
This refuces the number of transaction commits and may improve
performance in some corner cases.

Signed-off-by: Lai Siyao <lai.siyao@whamcloud.com>
Change-Id: I159a0ad619afd10e97be3dc175a6b4ed77b31142
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/51851
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Mikhail Pershin <mpershin@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/include/lustre_net.h
lustre/ldlm/ldlm_lib.c
lustre/ldlm/ldlm_lock.c
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_internal.h
lustre/mdt/mdt_recovery.c
lustre/ptlrpc/layout.c
lustre/ptlrpc/service.c

index 71cd242..f1ec613 100644 (file)
@@ -668,8 +668,6 @@ struct ptlrpc_reply_state {
        unsigned long           rs_committed:1;/* the transaction was committed
                                                  and the rs was dispatched
                                                  by ptlrpc_commit_replies */
-       unsigned long           rs_convert_lock:1; /* need to convert saved
-                                                   * locks to COS mode */
        atomic_t                rs_refcount;    /* number of users */
        /** Number of locks awaiting client ACK */
        int                     rs_nlocks;
@@ -2236,7 +2234,7 @@ struct ptlrpc_service_conf {
  * @{
  */
 void ptlrpc_save_lock(struct ptlrpc_request *req, struct lustre_handle *lock,
-                     int mode, bool no_ack, bool convert_lock);
+                     int mode, bool no_ack);
 void ptlrpc_commit_replies(struct obd_export *exp);
 void ptlrpc_dispatch_difficult_reply(struct ptlrpc_reply_state *rs);
 void ptlrpc_schedule_difficult_reply(struct ptlrpc_reply_state *rs);
index ee248fe..4c0d616 100644 (file)
@@ -815,8 +815,6 @@ int server_disconnect_export(struct obd_export *exp)
                list_del_init(&rs->rs_exp_list);
 
                spin_lock(&rs->rs_lock);
-               /* clear rs_convert_lock to make sure rs is handled and put */
-               rs->rs_convert_lock = 0;
                ptlrpc_schedule_difficult_reply(rs);
                spin_unlock(&rs->rs_lock);
 
index b17fa40..8a8b4fe 100644 (file)
@@ -2178,13 +2178,13 @@ ldlm_work_bl_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
        bld.bl_same_client = lock->l_client_cookie ==
                             lock->l_blocking_lock->l_client_cookie;
        /* if two locks are initiated from the same MDT, transactions are
-        * independent, or the request lock mode is CR|PR|CW, no need to trigger
+        * independent, or the request lock mode isn't EX|PW, no need to trigger
         * CoS because current lock will be downgraded to TXN mode soon, then
         * the blocking lock can be granted.
         */
        if (lock->l_blocking_lock->l_policy_data.l_inodebits.li_initiator_id ==
                lock->l_policy_data.l_inodebits.li_initiator_id ||
-           lock->l_blocking_lock->l_req_mode & (LCK_CR | LCK_PR | LCK_CW))
+           !(lock->l_blocking_lock->l_req_mode & (LCK_EX | LCK_PW)))
                bld.bl_txn_dependent = false;
        else
                bld.bl_txn_dependent = true;
@@ -2898,8 +2898,26 @@ void _ldlm_lock_debug(struct ldlm_lock *lock,
                break;
 
        case LDLM_IBITS:
-               libcfs_debug_msg(msgdata,
-                                "%pV ns: %s lock: %p/%#llx lrc: %d/%d,%d mode: %s/%s res: " DLDLMRES " bits %#llx/%#llx rrc: %d type: %s gid %llu flags: %#llx nid: %s remote: %#llx expref: %d pid: %u timeout: %lld lvb_type: %d initiator: MDT%d\n",
+               if (!lock->l_remote_handle.cookie)
+                       libcfs_debug_msg(msgdata,
+                                "%pV ns: %s lock: %p/%#llx lrc: %d/%d,%d mode: %s/%s res: " DLDLMRES " bits %#llx/%#llx rrc: %d type: %s flags: %#llx pid: %u initiator: MDT%d\n",
+                                &vaf,
+                                ldlm_lock_to_ns_name(lock),
+                                lock, lock->l_handle.h_cookie,
+                                refcount_read(&lock->l_handle.h_ref),
+                                lock->l_readers, lock->l_writers,
+                                ldlm_lockname[lock->l_granted_mode],
+                                ldlm_lockname[lock->l_req_mode],
+                                PLDLMRES(resource),
+                                lock->l_policy_data.l_inodebits.bits,
+                                lock->l_policy_data.l_inodebits.try_bits,
+                                atomic_read(&resource->lr_refcount),
+                                ldlm_typename[resource->lr_type],
+                                lock->l_flags, lock->l_pid,
+                                lock->l_policy_data.l_inodebits.li_initiator_id);
+               else
+                       libcfs_debug_msg(msgdata,
+                                "%pV ns: %s lock: %p/%#llx lrc: %d/%d,%d mode: %s/%s res: " DLDLMRES " bits %#llx/%#llx rrc: %d type: %s gid %llu flags: %#llx nid: %s remote: %#llx expref: %d pid: %u timeout: %lld lvb_type: %d\n",
                                 &vaf,
                                 ldlm_lock_to_ns_name(lock),
                                 lock, lock->l_handle.h_cookie,
@@ -2917,8 +2935,7 @@ void _ldlm_lock_debug(struct ldlm_lock *lock,
                                 lock->l_remote_handle.cookie,
                                 exp ? refcount_read(&exp->exp_handle.h_ref) : -99,
                                 lock->l_pid, lock->l_callback_timestamp,
-                                lock->l_lvb_type,
-                                lock->l_policy_data.l_inodebits.li_initiator_id);
+                                lock->l_lvb_type);
                break;
 
        default:
index e0841f9..f059339 100644 (file)
@@ -3600,34 +3600,17 @@ int mdt_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
         * The 'data' parameter is l_ast_data in the first case and
         * callback arguments in the second one. Distinguish them by that.
         */
-       if (!data || data == lock->l_ast_data || !arg->bl_desc)
-               goto skip_cos_checks;
-
-       if (lock->l_req_mode & (LCK_PW | LCK_EX)) {
-               if (mdt_cos_is_enabled(mdt) &&
-                   !arg->bl_desc->bl_same_client) {
-                       mdt_set_lock_sync(lock);
-               } else if (mdt_slc_is_enabled(mdt) &&
-                          arg->bl_desc->bl_txn_dependent) {
-                       mdt_set_lock_sync(lock);
-                       /* we may do extra commit here, because there is a small
-                        * window to miss a commit:
-                        * 1. lock was unlocked (saved), but not downgraded to
-                        * TXN mode yet (REP-ACK not received).
-                        * 2. a conflict lock enqueued and we come herej if we
-                        * don't trigger commit now, the enqueued lock will wait
-                        * untill system periodic commit.
-                        *
-                        * Fortunately this window is quite small, not to say
-                        * distributed operation is rare too.
-                        */
+       if (data && data != lock->l_ast_data && arg->bl_desc) {
+               if (lock->l_req_mode & (LCK_COS | LCK_TXN))
                        commit_async = true;
-               }
-       } else if (lock->l_req_mode == LCK_COS || lock->l_req_mode == LCK_TXN) {
-               commit_async = true;
+               else if ((lock->l_req_mode & (LCK_PW | LCK_EX)) &&
+                        ((mdt_cos_is_enabled(mdt) &&
+                            !arg->bl_desc->bl_same_client) ||
+                         (mdt_slc_is_enabled(mdt) &&
+                            arg->bl_desc->bl_txn_dependent)))
+                       mdt_set_lock_sync(lock);
        }
 
-skip_cos_checks:
        rc = ldlm_blocking_ast_nocheck(lock);
 
        if (commit_async) {
@@ -4163,8 +4146,7 @@ static void mdt_save_lock(struct mdt_thread_info *info, struct lustre_handle *h,
                        struct mdt_device *mdt = info->mti_mdt;
                        struct ldlm_lock *lock = ldlm_handle2lock(h);
                        struct ptlrpc_request *req = mdt_info_req(info);
-                       bool cos = mdt_cos_is_enabled(mdt);
-                       bool convert_lock = !cos && mdt_slc_is_enabled(mdt);
+                       bool no_ack = false;
 
                        LASSERTF(lock != NULL, "no lock for cookie %#llx\n",
                                 h->cookie);
@@ -4175,15 +4157,22 @@ static void mdt_save_lock(struct mdt_thread_info *info, struct lustre_handle *h,
                                LDLM_DEBUG(lock, "save lock request %p reply "
                                        "state %p transno %lld\n", req,
                                        req->rq_reply_state, req->rq_transno);
-                               if (cos) {
-                                       ldlm_lock_mode_downgrade(lock, LCK_COS);
+                               if (mdt_cos_is_enabled(mdt)) {
                                        mode = LCK_COS;
+                                       no_ack = true;
+                                       ldlm_lock_mode_downgrade(lock, mode);
+                               } else if (mdt_slc_is_enabled(mdt)) {
+                                       no_ack = true;
+                                       if (mode != LCK_TXN) {
+                                               mode = LCK_TXN;
+                                               ldlm_lock_mode_downgrade(lock,
+                                                                        mode);
+                                       }
                                }
                                if (req->rq_export->exp_disconnected)
                                        mdt_fid_unlock(h, mode);
                                else
-                                       ptlrpc_save_lock(req, h, mode, cos,
-                                                        convert_lock);
+                                       ptlrpc_save_lock(req, h, mode, no_ack);
                        } else {
                                mdt_fid_unlock(h, mode);
                        }
index 58995ee..71fef0d 100644 (file)
@@ -1279,6 +1279,7 @@ static inline void mdt_fid_unlock(struct lustre_handle *lh, enum ldlm_mode mode)
        ldlm_lock_decref(lh, mode);
 }
 
+/* this is identical to say this is a DNE system */
 static inline bool mdt_slc_is_enabled(struct mdt_device *mdt)
 {
        return mdt->mdt_lut.lut_sync_lock_cancel == SYNC_LOCK_CANCEL_BLOCKING;
index 760e029..2c1e2ac 100644 (file)
@@ -103,8 +103,7 @@ static void mdt_steal_ack_locks(struct ptlrpc_request *req)
                spin_lock(&rs->rs_lock);
                for (i = 0; i < rs->rs_nlocks; i++)
                        ptlrpc_save_lock(req, &rs->rs_locks[i],
-                                        rs->rs_modes[i], rs->rs_no_ack,
-                                        rs->rs_convert_lock);
+                                        rs->rs_modes[i], rs->rs_no_ack);
                rs->rs_nlocks = 0;
 
                DEBUG_REQ(D_HA, req, "stole locks for");
index d471fc3..506317d 100644 (file)
@@ -2849,7 +2849,6 @@ int req_capsule_server_grow(struct req_capsule *pill,
 
                 nrs->rs_difficult = 1;
                 nrs->rs_no_ack = rs->rs_no_ack;
-               nrs->rs_convert_lock = rs->rs_convert_lock;
                 for (i = 0; i < rs->rs_nlocks; i++) {
                         nrs->rs_locks[i] = rs->rs_locks[i];
                         nrs->rs_modes[i] = rs->rs_modes[i];
index fac60b5..cc33cb9 100644 (file)
@@ -184,7 +184,7 @@ static int ptlrpc_grow_req_bufs(struct ptlrpc_service_part *svcpt, int post)
  * Puts a lock and its mode into reply state assotiated to request reply.
  */
 void ptlrpc_save_lock(struct ptlrpc_request *req, struct lustre_handle *lock,
-                     int mode, bool no_ack, bool convert_lock)
+                     int mode, bool no_ack)
 {
        struct ptlrpc_reply_state *rs = req->rq_reply_state;
        int idx;
@@ -197,7 +197,6 @@ void ptlrpc_save_lock(struct ptlrpc_request *req, struct lustre_handle *lock,
        rs->rs_modes[idx] = mode;
        rs->rs_difficult = 1;
        rs->rs_no_ack = no_ack;
-       rs->rs_convert_lock = convert_lock;
 }
 EXPORT_SYMBOL(ptlrpc_save_lock);
 
@@ -2437,56 +2436,6 @@ static int ptlrpc_handle_rs(struct ptlrpc_reply_state *rs)
         * rs_lock, which we do right next.
         */
        if (!rs->rs_committed) {
-               /*
-                * if rs was commited, no need to convert locks, don't check
-                * rs_committed here because rs may never be added into
-                * exp_uncommitted_replies and this flag never be set, see
-                * target_send_reply()
-                */
-               if (rs->rs_convert_lock &&
-                   rs->rs_transno > exp->exp_last_committed) {
-                       struct ldlm_lock *lock;
-                       struct ldlm_lock *ack_locks[RS_MAX_LOCKS] = { NULL };
-
-                       spin_lock(&rs->rs_lock);
-                       if (rs->rs_convert_lock &&
-                           rs->rs_transno > exp->exp_last_committed) {
-                               nlocks = rs->rs_nlocks;
-                               while (nlocks-- > 0) {
-                                       /*
-                                        * NB don't assume rs is always handled
-                                        * by the same service thread (see
-                                        * ptlrpc_hr_select, so REP-ACK hr may
-                                        * race with trans commit, while the
-                                        * latter will release locks, get locks
-                                        * here early to downgrade to TXN mode
-                                        * safely.
-                                        */
-                                       lock = ldlm_handle2lock(
-                                                       &rs->rs_locks[nlocks]);
-                                       LASSERT(lock);
-                                       ack_locks[nlocks] = lock;
-                                       rs->rs_modes[nlocks] = LCK_TXN;
-                               }
-                               nlocks = rs->rs_nlocks;
-                               rs->rs_convert_lock = 0;
-                               /*
-                                * clear rs_scheduled so that commit callback
-                                * can schedule again
-                                */
-                               rs->rs_scheduled = 0;
-                               spin_unlock(&rs->rs_lock);
-
-                               while (nlocks-- > 0) {
-                                       lock = ack_locks[nlocks];
-                                       ldlm_lock_mode_downgrade(lock, LCK_TXN);
-                                       LDLM_LOCK_PUT(lock);
-                               }
-                               RETURN(0);
-                       }
-                       spin_unlock(&rs->rs_lock);
-               }
-
                spin_lock(&exp->exp_uncommitted_replies_lock);
                list_del_init(&rs->rs_obd_list);
                spin_unlock(&exp->exp_uncommitted_replies_lock);
@@ -2536,7 +2485,6 @@ static int ptlrpc_handle_rs(struct ptlrpc_reply_state *rs)
        }
 
        rs->rs_scheduled = 0;
-       rs->rs_convert_lock = 0;
 
        if (rs->rs_unlinked) {
                /* Off the net */