Whamcloud - gitweb
LU-15527 dne: refactor commit-on-sharing for DNE 41/46641/11
authorLai Siyao <lai.siyao@whamcloud.com>
Sat, 13 May 2023 08:25:53 +0000 (04:25 -0400)
committerOleg Drokin <green@whamcloud.com>
Thu, 27 Jul 2023 07:19:14 +0000 (07:19 +0000)
Commit-on-sharing for DNE is different from the original
commit-on-sharing:
* the original commit-on-sharing is to eliminate dependency between
  operations from different clients.
* while commit-on-sharing for DNE is to eliminate dependency between
  operations handled by different MDTs, so that upon multiple MDT
  failures, an operaiton replay won't fail because its dependent
  operation is not replayed by another MDT yet.

Current CoS for DNE implementation checks dependency in MDT layer, and
it decides by checking whether current operation is a distributed
transaction, if so, it will trigger CoS upon conflicting locks.
Actually this may miss some cases that should trigger CoS (even local
transaction should trigger CoS if it depends on a distributed
transaction), and on the other hand it may trigger extra CoS because
if two operations are handled by the same MDT, the dependency is
ensured because they will always be replayed by transaction number.
And to avoid mixing the code of two different CoS, the following
changes are made:
* add new ldlm lock mode LCK_TXN. On DNE system, downgrade PW/EX locks
  to this mode after transaction stop.
* add li_initiator_id in struct ldlm_inodebits, which is the index of
  MDT where the lock is enqueued, i.e. where operation is handled. If
  another operation handled by a different MDT requests a conflicting
  PW|EX mode lock against this TXN mode lock, it will trigger commit
  to ensure the dependent operation is committed to disk (NB, it
  doesn't trigger commit on all involved MDTs, but only the MDT where
  the conflict happens, which is enough to allow replay succeed).
* remove LDLM_FL_COS_INCOMPAT and LDLM_FL_COS_ENABLED.
* MDT layer doesn't need to check such dependency any more, since lock
  itself knows.
* updated sanityn 33c, 33d and 33e since fewer CoS are triggered now.

Signed-off-by: Lai Siyao <lai.siyao@whamcloud.com>
Change-Id: Ib0149fcdc0178afd2c6894d211480f3c6c9284a0
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/46641
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Mikhail Pershin <mpershin@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
24 files changed:
lustre/include/lustre_dlm.h
lustre/include/lustre_dlm_flags.h
lustre/include/uapi/linux/lustre/lustre_idl.h
lustre/ldlm/ldlm_inodebits.c
lustre/ldlm/ldlm_lib.c
lustre/ldlm/ldlm_lock.c
lustre/lfsck/lfsck_lib.c
lustre/lod/lod_object.c
lustre/mdt/mdt_coordinator.c
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_hsm.c
lustre/mdt/mdt_internal.h
lustre/mdt/mdt_io.c
lustre/mdt/mdt_open.c
lustre/mdt/mdt_reint.c
lustre/mdt/mdt_restripe.c
lustre/mdt/mdt_xattr.c
lustre/osp/osp_md_object.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/service.c
lustre/ptlrpc/wiretest.c
lustre/tests/sanityn.sh
lustre/utils/wirecheck.c
lustre/utils/wiretest.c

index 3029ce5..e2f63cf 100644 (file)
@@ -113,7 +113,7 @@ enum ldlm_side {
  * Lock types are described in their respective implementation files:
  * ldlm_{extent,flock,inodebits,plain}.c.
  *
- * There are six lock modes along with a compatibility matrix to indicate if
+ * There are nine lock modes along with a compatibility matrix to indicate if
  * two locks are compatible.
  *
  * - EX: Exclusive mode. Before a new file is created, MDS requests EX lock
@@ -128,26 +128,37 @@ enum ldlm_side {
  * - CR Concurrent Read mode. When a client performs a path lookup, MDS grants
  *   an inodebit lock with the CR mode on the intermediate path component.
  * - NL Null mode.
+ * - GROUP: Group mode. Locks with the same group ID are compatible with each
+ *   other.
+ * - COS: Commit-on-Sharing mode. If Commit-on-Sharing is enabled, PW/EX locks
+ *   held in transactions are downgraded to COS mode after transaction stop.
+ * - TXN: Transaction mode. If Commit-on-Sharing is diabled on a DNE system,
+ *   PW/EX locks held in transactions are downgraded to TXN mode after
+ *   transaction stop.
  *
  * <PRE>
- *       NL  CR  CW  PR  PW  EX
- *  NL    1   1   1   1   1   1
- *  CR    1   1   1   1   1   0
- *  CW    1   1   1   0   0   0
- *  PR    1   1   0   1   0   0
- *  PW    1   1   0   0   0   0
- *  EX    1   0   0   0   0   0
+ *       NL  CR  CW  PR  PW  EX GROUP COS TXN
+ *  NL    1   1   1   1   1   1   1   1   1
+ *  CR    1   1   1   1   1   0   0   0   1
+ *  CW    1   1   1   0   0   0   0   0   1
+ *  PR    1   1   0   1   0   0   0   0   1
+ *  PW    1   1   0   0   0   0   0   0   0
+ *  EX    1   0   0   0   0   0   0   0   0
+ *  GROUP 1   0   0   0   0   0   1   0   0
+ *  COS   1   0   0   0   0   0   0   1   0
+ *  TXN   1   1   1   1   0   0   0   0   1
  * </PRE>
  */
 /** @{ */
-#define LCK_COMPAT_EX  LCK_NL
-#define LCK_COMPAT_PW  (LCK_COMPAT_EX | LCK_CR)
-#define LCK_COMPAT_PR  (LCK_COMPAT_PW | LCK_PR)
-#define LCK_COMPAT_CW  (LCK_COMPAT_PW | LCK_CW)
-#define LCK_COMPAT_CR  (LCK_COMPAT_CW | LCK_PR | LCK_PW)
-#define LCK_COMPAT_NL  (LCK_COMPAT_CR | LCK_EX | LCK_GROUP)
-#define LCK_COMPAT_GROUP  (LCK_GROUP | LCK_NL)
-#define LCK_COMPAT_COS (LCK_COS)
+#define LCK_COMPAT_EX    LCK_NL
+#define LCK_COMPAT_PW    (LCK_COMPAT_EX | LCK_CR)
+#define LCK_COMPAT_PR    (LCK_COMPAT_PW | LCK_PR | LCK_TXN)
+#define LCK_COMPAT_CW    (LCK_COMPAT_PW | LCK_CW | LCK_TXN)
+#define LCK_COMPAT_CR    (LCK_COMPAT_CW | LCK_PR | LCK_PW | LCK_TXN)
+#define LCK_COMPAT_NL    (LCK_COMPAT_CR | LCK_EX | LCK_GROUP | LCK_COS)
+#define LCK_COMPAT_GROUP (LCK_NL | LCK_GROUP)
+#define LCK_COMPAT_COS   (LCK_NL | LCK_COS)
+#define LCK_COMPAT_TXN   (LCK_COMPAT_PR | LCK_CW)
 /** @} Lock Compatibility Matrix */
 
 extern enum ldlm_mode lck_compat_array[];
@@ -657,8 +668,10 @@ struct ldlm_glimpse_work {
 };
 
 struct ldlm_bl_desc {
-       unsigned int bl_same_client:1,
-                    bl_cos_incompat:1;
+       unsigned int bl_same_client:1,  /* both ops are from the same client. */
+                    bl_txn_dependent:1;/* the op that enqueues lock depends on
+                                        * the op that holds lock.
+                                        */
 };
 
 struct ldlm_cb_set_arg {
index 1af4284..924b383 100644 (file)
 #define ldlm_set_cancel_on_block(_l)    LDLM_SET_FLAG((  _l), 1ULL << 23)
 #define ldlm_clear_cancel_on_block(_l)  LDLM_CLEAR_FLAG((_l), 1ULL << 23)
 
-/** Flag whether a lock is enqueued from a distributed transaction, and the
- *  requesting lock mode is PW/EX, if so, it will check compatibility with COS
- *  locks, and different from original COS semantic, transactions from the same
- *  client is also treated as lock conflict. */
-#define LDLM_FL_COS_INCOMPAT           0x0000000001000000ULL /* bit  24 */
-#define ldlm_is_cos_incompat(_l)       LDLM_TEST_FLAG((_l), 1ULL << 24)
-#define ldlm_set_cos_incompat(_l)      LDLM_SET_FLAG((_l), 1ULL << 24)
-#define ldlm_clear_cos_incompat(_l)    LDLM_CLEAR_FLAG((_l), 1ULL << 24)
-
 /*
  * Flag indicates that lock is being converted (downgraded) during the blocking
  * AST instead of cancelling. Used for IBITS locks now and drops conflicting
 /** Flag whether a lock is found on server for re-sent RPC. */
 #define LDLM_FL_RESENT                   0x0100000000000000ULL // bit  56
 
-/** Flag whether Commit-on-Sharing is enabled, if LDLM_FL_COS_INCOMPAT is set
- *  this flag may not be set because once the former is set this flag won't be
- *  checked, and for cross-MDT lock COS_INCOMPAT is always set but ast handle is
- *  in ldlm context which doesn't know whether COS is enabled or not. */
-#define LDLM_FL_COS_ENABLED              0x0200000000000000ULL /* bit  57 */
-#define ldlm_is_cos_enabled(_l)          LDLM_TEST_FLAG((_l), 1ULL << 57)
-#define ldlm_set_cos_enabled(_l)         LDLM_SET_FLAG((_l), 1ULL << 57)
-
 /**
  * This flags means to use non-delay RPC to send dlm request RPC.
  */
index 727cc5c..4770e7c 100644 (file)
@@ -2446,10 +2446,11 @@ enum ldlm_mode {
        LCK_NL          = 32,
        LCK_GROUP       = 64,
        LCK_COS         = 128,
+       LCK_TXN         = 256,
        LCK_MAXMODE
 };
 
-#define LCK_MODE_NUM    8
+#define LCK_MODE_NUM    9
 
 enum ldlm_type {
        LDLM_PLAIN      = 10,
@@ -2480,6 +2481,8 @@ struct ldlm_inodebits {
                __u64 cancel_bits; /* for lock convert */
        };
        __u64 li_gid;
+       __u32 li_padding;
+       __u32 li_initiator_id; /* index of MDT that initiated this lock */
 };
 
 struct ldlm_flock_wire {
index 42640f1..0fc4c1b 100644 (file)
@@ -143,6 +143,23 @@ restart:
        RETURN(rc);
 }
 
+/* lock of COS mode is compatible with locks from the same client. */
+static inline bool ldlm_cos_same_client(const struct ldlm_lock *req,
+                                       const struct ldlm_lock *lock)
+{
+       return lock->l_req_mode == LCK_COS &&
+              lock->l_client_cookie == req->l_client_cookie;
+}
+
+/* lock of TXN mode is compatible with locks from the same MDT. */
+static inline bool ldlm_txn_same_server(const struct ldlm_lock *req,
+                                       const struct ldlm_lock *lock)
+{
+       return lock->l_req_mode == LCK_TXN &&
+              lock->l_policy_data.l_inodebits.li_initiator_id ==
+                       req->l_policy_data.l_inodebits.li_initiator_id;
+}
+
 /**
  * Determine if the lock is compatible with all locks on the queue.
  *
@@ -202,16 +219,6 @@ ldlm_inodebits_compat_queue(struct list_head *queue, struct ldlm_lock *req,
                mode_tail = &list_entry(lock->l_sl_mode.prev, struct ldlm_lock,
                                        l_sl_mode)->l_res_link;
 
-               /* if request lock is not COS_INCOMPAT and COS is disabled,
-                * they are compatible, IOW this request is from a local
-                * transaction on a DNE system. */
-               if (lock->l_req_mode == LCK_COS && !ldlm_is_cos_incompat(req) &&
-                   !ldlm_is_cos_enabled(req)) {
-                       /* jump to last lock in mode group */
-                       tmp = mode_tail;
-                       continue;
-               }
-
                if (lockmode_compat(lock->l_req_mode, req_mode)) {
                        /* non group locks are compatible, bits don't matter */
                        if (likely(req_mode != LCK_GROUP)) {
@@ -232,8 +239,16 @@ ldlm_inodebits_compat_queue(struct list_head *queue, struct ldlm_lock *req,
                                ldlm_resource_insert_lock_after(lock, req);
                                RETURN(0);
                        }
+               } else if (ldlm_cos_same_client(req, lock) ||
+                          ldlm_txn_same_server(req, lock)) {
+                       /* COS/TXN locks need to be checked one by one, 
+                        * because client cookie or initiator id may be
+                        * different for locks in mode/policy skiplist.
+                        */
+                       continue;
                }
 
+
                /* GROUP locks are placed to a head of the waiting list, but
                 * grouped by gid. */
                if (unlikely(req_mode == LCK_GROUP && !ldlm_is_granted(lock))) {
@@ -276,15 +291,6 @@ ldlm_inodebits_compat_queue(struct list_head *queue, struct ldlm_lock *req,
 
                        /* Locks with overlapping bits conflict. */
                        if (lock->l_policy_data.l_inodebits.bits & req_bits) {
-                               /* COS lock mode has a special compatibility
-                                * requirement: it is only compatible with
-                                * locks from the same client. */
-                               if (lock->l_req_mode == LCK_COS &&
-                                   !ldlm_is_cos_incompat(req) &&
-                                   ldlm_is_cos_enabled(req) &&
-                                   lock->l_client_cookie == req->l_client_cookie)
-                                       goto skip_work_list;
-
                                compat = 0;
 
                                if (unlikely(lock->l_req_mode == LCK_GROUP)) {
@@ -452,6 +458,8 @@ void ldlm_ibits_policy_wire_to_local(const union ldlm_wire_policy_data *wpolicy,
                                     union ldlm_policy_data *lpolicy)
 {
        lpolicy->l_inodebits.bits = wpolicy->l_inodebits.bits;
+       lpolicy->l_inodebits.li_initiator_id =
+               wpolicy->l_inodebits.li_initiator_id;
        /**
         * try_bits and li_gid are to be handled outside of generic
         * write_to_local due to different behavior on a server and client.
@@ -465,6 +473,8 @@ void ldlm_ibits_policy_local_to_wire(const union ldlm_policy_data *lpolicy,
        wpolicy->l_inodebits.bits = lpolicy->l_inodebits.bits;
        wpolicy->l_inodebits.try_bits = lpolicy->l_inodebits.try_bits;
        wpolicy->l_inodebits.li_gid = lpolicy->l_inodebits.li_gid;
+       wpolicy->l_inodebits.li_initiator_id =
+               lpolicy->l_inodebits.li_initiator_id;
 }
 
 /**
index c20d102..371ae8e 100644 (file)
@@ -3399,6 +3399,7 @@ enum ldlm_mode lck_compat_array[] = {
        [LCK_NL]    = LCK_COMPAT_NL,
        [LCK_GROUP] = LCK_COMPAT_GROUP,
        [LCK_COS]   = LCK_COMPAT_COS,
+       [LCK_TXN]   = LCK_COMPAT_TXN,
 };
 
 /**
index df40beb..b17fa40 100644 (file)
@@ -56,7 +56,8 @@ char *ldlm_lockname[] = {
        [LCK_CR] = "CR",
        [LCK_NL] = "NL",
        [LCK_GROUP] = "GROUP",
-       [LCK_COS] = "COS"
+       [LCK_COS] = "COS",
+       [LCK_TXN] = "TXN"
 };
 EXPORT_SYMBOL(ldlm_lockname);
 
@@ -753,18 +754,18 @@ EXPORT_SYMBOL(ldlm_lock_addref);
 void ldlm_lock_addref_internal_nolock(struct ldlm_lock *lock,
                                      enum ldlm_mode mode)
 {
-        ldlm_lock_remove_from_lru(lock);
-        if (mode & (LCK_NL | LCK_CR | LCK_PR)) {
-                lock->l_readers++;
-                lu_ref_add_atomic(&lock->l_reference, "reader", lock);
-        }
-        if (mode & (LCK_EX | LCK_CW | LCK_PW | LCK_GROUP | LCK_COS)) {
-                lock->l_writers++;
-                lu_ref_add_atomic(&lock->l_reference, "writer", lock);
-        }
-        LDLM_LOCK_GET(lock);
-        lu_ref_add_atomic(&lock->l_reference, "user", lock);
-        LDLM_DEBUG(lock, "ldlm_lock_addref(%s)", ldlm_lockname[mode]);
+       ldlm_lock_remove_from_lru(lock);
+       if (mode & (LCK_NL | LCK_CR | LCK_PR)) {
+               lock->l_readers++;
+               lu_ref_add_atomic(&lock->l_reference, "reader", lock);
+       }
+       if (mode & (LCK_EX | LCK_CW | LCK_PW | LCK_GROUP | LCK_COS | LCK_TXN)) {
+               lock->l_writers++;
+               lu_ref_add_atomic(&lock->l_reference, "writer", lock);
+       }
+       LDLM_LOCK_GET(lock);
+       lu_ref_add_atomic(&lock->l_reference, "user", lock);
+       LDLM_DEBUG(lock, "ldlm_lock_addref(%s)", ldlm_lockname[mode]);
 }
 
 /**
@@ -818,20 +819,20 @@ void ldlm_lock_addref_internal(struct ldlm_lock *lock, enum ldlm_mode mode)
 void ldlm_lock_decref_internal_nolock(struct ldlm_lock *lock,
                                      enum ldlm_mode mode)
 {
-        LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
-        if (mode & (LCK_NL | LCK_CR | LCK_PR)) {
-                LASSERT(lock->l_readers > 0);
-                lu_ref_del(&lock->l_reference, "reader", lock);
-                lock->l_readers--;
-        }
-        if (mode & (LCK_EX | LCK_CW | LCK_PW | LCK_GROUP | LCK_COS)) {
-                LASSERT(lock->l_writers > 0);
-                lu_ref_del(&lock->l_reference, "writer", lock);
-                lock->l_writers--;
-        }
+       LDLM_DEBUG(lock, "ldlm_lock_decref(%s)", ldlm_lockname[mode]);
+       if (mode & (LCK_NL | LCK_CR | LCK_PR)) {
+               LASSERT(lock->l_readers > 0);
+               lu_ref_del(&lock->l_reference, "reader", lock);
+               lock->l_readers--;
+       }
+       if (mode & (LCK_EX | LCK_CW | LCK_PW | LCK_GROUP | LCK_COS | LCK_TXN)) {
+               LASSERT(lock->l_writers > 0);
+               lu_ref_del(&lock->l_reference, "writer", lock);
+               lock->l_writers--;
+       }
 
-        lu_ref_del(&lock->l_reference, "user", lock);
-        LDLM_LOCK_RELEASE(lock);    /* matches the LDLM_LOCK_GET() in addref */
+       lu_ref_del(&lock->l_reference, "user", lock);
+       LDLM_LOCK_RELEASE(lock);    /* matches the LDLM_LOCK_GET() in addref */
 }
 
 /**
@@ -1786,22 +1787,23 @@ enum ldlm_error ldlm_lock_enqueue(const struct lu_env *env,
 #endif
        ENTRY;
 
-        /* policies are not executed on the client or during replay */
-        if ((*flags & (LDLM_FL_HAS_INTENT|LDLM_FL_REPLAY)) == LDLM_FL_HAS_INTENT
-            && !local && ns->ns_policy) {
+       /* policies are not executed on the client or during replay */
+       if ((*flags & (LDLM_FL_HAS_INTENT|LDLM_FL_REPLAY)) == LDLM_FL_HAS_INTENT
+           && !local && ns->ns_policy) {
                rc = ns->ns_policy(env, ns, lockp, cookie, lock->l_req_mode,
                                   *flags, NULL);
-                if (rc == ELDLM_LOCK_REPLACED) {
-                        /* The lock that was returned has already been granted,
-                         * and placed into lockp.  If it's not the same as the
-                         * one we passed in, then destroy the old one and our
-                         * work here is done. */
-                        if (lock != *lockp) {
-                                ldlm_lock_destroy(lock);
-                                LDLM_LOCK_RELEASE(lock);
-                        }
-                        *flags |= LDLM_FL_LOCK_CHANGED;
-                        RETURN(0);
+               if (rc == ELDLM_LOCK_REPLACED) {
+                       /* The lock that was returned has already been granted,
+                        * and placed into lockp.  If it's not the same as the
+                        * one we passed in, then destroy the old one and our
+                        * work here is done.
+                        */
+                       if (lock != *lockp) {
+                               ldlm_lock_destroy(lock);
+                               LDLM_LOCK_RELEASE(lock);
+                       }
+                       *flags |= LDLM_FL_LOCK_CHANGED;
+                       RETURN(0);
                } else if (rc != ELDLM_OK &&
                           ldlm_is_granted(lock)) {
                        LASSERT(*flags & LDLM_FL_RESENT);
@@ -1810,14 +1812,15 @@ enum ldlm_error ldlm_lock_enqueue(const struct lu_env *env,
                         * error occurs. It is unclear if lock reached the
                         * client in the original reply, just leave the lock on
                         * server, not returning it again to client. Due to
-                        * LU-6529, the server will not OOM. */
+                        * LU-6529, the server will not OOM.
+                        */
                        RETURN(rc);
-                } else if (rc != ELDLM_OK ||
-                           (rc == ELDLM_OK && (*flags & LDLM_FL_INTENT_ONLY))) {
-                        ldlm_lock_destroy(lock);
-                        RETURN(rc);
-                }
-        }
+               } else if (rc != ELDLM_OK ||
+                          (rc == ELDLM_OK && (*flags & LDLM_FL_INTENT_ONLY))) {
+                       ldlm_lock_destroy(lock);
+                       RETURN(rc);
+               }
+       }
 
        if (*flags & LDLM_FL_RESENT) {
                /* Reconstruct LDLM_FL_SRV_ENQ_MASK @flags for reply.
@@ -1878,24 +1881,24 @@ enum ldlm_error ldlm_lock_enqueue(const struct lu_env *env,
                res = lock_res_and_lock(lock);
        }
        if (local && ldlm_is_granted(lock)) {
-                /* The server returned a blocked lock, but it was granted
-                 * before we got a chance to actually enqueue it.  We don't
-                 * need to do anything else. */
-                *flags &= ~LDLM_FL_BLOCKED_MASK;
+               /* The server returned a blocked lock, but it was granted
+                * before we got a chance to actually enqueue it.  We don't
+                * need to do anything else. */
+               *flags &= ~LDLM_FL_BLOCKED_MASK;
                GOTO(out, rc = ELDLM_OK);
-        }
+       }
 
-        ldlm_resource_unlink_lock(lock);
-        if (res->lr_type == LDLM_EXTENT && lock->l_tree_node == NULL) {
-                if (node == NULL) {
-                        ldlm_lock_destroy_nolock(lock);
-                        GOTO(out, rc = -ENOMEM);
-                }
+       ldlm_resource_unlink_lock(lock);
+       if (res->lr_type == LDLM_EXTENT && lock->l_tree_node == NULL) {
+               if (node == NULL) {
+                       ldlm_lock_destroy_nolock(lock);
+                       GOTO(out, rc = -ENOMEM);
+               }
 
                INIT_LIST_HEAD(&node->li_group);
-                ldlm_interval_attach(node, lock);
-                node = NULL;
-        }
+               ldlm_interval_attach(node, lock);
+               node = NULL;
+       }
 
        /* Some flags from the enqueue want to make it into the AST, via the
         * lock's l_flags. */
@@ -1903,10 +1906,6 @@ enum ldlm_error ldlm_lock_enqueue(const struct lu_env *env,
                ldlm_set_ast_discard_data(lock);
        if (*flags & LDLM_FL_TEST_LOCK)
                ldlm_set_test_lock(lock);
-       if (*flags & LDLM_FL_COS_INCOMPAT)
-               ldlm_set_cos_incompat(lock);
-       if (*flags & LDLM_FL_COS_ENABLED)
-               ldlm_set_cos_enabled(lock);
 
        /* This distinction between local lock trees is very important; a client
         * namespace only has information about locks taken by that client, and
@@ -1919,7 +1918,7 @@ enum ldlm_error ldlm_lock_enqueue(const struct lu_env *env,
         *
         * FIXME (bug 268): Detect obvious lies by checking compatibility in
         * granted queue. */
-        if (local) {
+       if (local) {
                if (*flags & (LDLM_FL_BLOCK_WAIT | LDLM_FL_BLOCK_GRANTED))
                        ldlm_resource_add_lock(res, &res->lr_waiting, lock);
                else
@@ -1940,15 +1939,15 @@ enum ldlm_error ldlm_lock_enqueue(const struct lu_env *env,
        rc = ldlm_lock_enqueue_helper(lock, flags);
        GOTO(out, rc);
 #else
-        } else {
-                CERROR("This is client-side-only module, cannot handle "
-                       "LDLM_NAMESPACE_SERVER resource type lock.\n");
-                LBUG();
-        }
+       } else {
+               CERROR("This is client-side-only module, cannot handle "
+                      "LDLM_NAMESPACE_SERVER resource type lock.\n");
+               LBUG();
+       }
 #endif
 
 out:
-        unlock_res_and_lock(lock);
+       unlock_res_and_lock(lock);
 
 #ifdef HAVE_SERVER_SUPPORT
        if (reconstruct) {
@@ -1959,9 +1958,9 @@ out:
                                  req, 0, NULL, false, 0);
        }
 #endif
-        if (node)
-                OBD_SLAB_FREE(node, ldlm_interval_slab, sizeof(*node));
-        return rc;
+       if (node)
+               OBD_SLAB_FREE(node, ldlm_interval_slab, sizeof(*node));
+       return rc;
 }
 
 #ifdef HAVE_SERVER_SUPPORT
@@ -2178,7 +2177,17 @@ ldlm_work_bl_ast_lock(struct ptlrpc_request_set *rqset, void *opaq)
         */
        bld.bl_same_client = lock->l_client_cookie ==
                             lock->l_blocking_lock->l_client_cookie;
-       bld.bl_cos_incompat = ldlm_is_cos_incompat(lock->l_blocking_lock);
+       /* if two locks are initiated from the same MDT, transactions are
+        * independent, or the request lock mode is CR|PR|CW, no need to trigger
+        * CoS because current lock will be downgraded to TXN mode soon, then
+        * the blocking lock can be granted.
+        */
+       if (lock->l_blocking_lock->l_policy_data.l_inodebits.li_initiator_id ==
+               lock->l_policy_data.l_inodebits.li_initiator_id ||
+           lock->l_blocking_lock->l_req_mode & (LCK_CR | LCK_PR | LCK_CW))
+               bld.bl_txn_dependent = false;
+       else
+               bld.bl_txn_dependent = true;
        arg->bl_desc = &bld;
 
        LASSERT(ldlm_is_ast_sent(lock));
@@ -2708,7 +2717,7 @@ int ldlm_export_cancel_locks(struct obd_export *exp)
 }
 
 /**
- * Downgrade an PW/EX lock to COS | CR mode.
+ * Downgrade an PW/EX lock to COS, TXN or CR mode.
  *
  * A lock mode convertion from PW/EX mode to less conflict mode. The
  * convertion may fail if lock was canceled before downgrade, but it doesn't
@@ -2720,6 +2729,8 @@ int ldlm_export_cancel_locks(struct obd_export *exp)
  * things are cleared, so any pending or new blocked lock on that lock will
  * cause new call to blocking_ast and force resource object commit.
  *
+ * Used by DNE to force commit upon operation dependency.
+ *
  * Also used by layout_change to replace EX lock to CR lock.
  *
  * \param lock A lock to convert
@@ -2730,7 +2741,8 @@ void ldlm_lock_mode_downgrade(struct ldlm_lock *lock, enum ldlm_mode new_mode)
 #ifdef HAVE_SERVER_SUPPORT
        ENTRY;
 
-       LASSERT(new_mode == LCK_COS || new_mode == LCK_CR);
+       LASSERT(new_mode == LCK_COS || new_mode == LCK_TXN ||
+               new_mode == LCK_CR);
 
        lock_res_and_lock(lock);
 
@@ -2887,7 +2899,7 @@ void _ldlm_lock_debug(struct ldlm_lock *lock,
 
        case LDLM_IBITS:
                libcfs_debug_msg(msgdata,
-                                "%pV ns: %s lock: %p/%#llx lrc: %d/%d,%d mode: %s/%s res: " DLDLMRES " bits %#llx/%#llx rrc: %d type: %s gid %llu flags: %#llx nid: %s remote: %#llx expref: %d pid: %u timeout: %lld lvb_type: %d\n",
+                                "%pV ns: %s lock: %p/%#llx lrc: %d/%d,%d mode: %s/%s res: " DLDLMRES " bits %#llx/%#llx rrc: %d type: %s gid %llu flags: %#llx nid: %s remote: %#llx expref: %d pid: %u timeout: %lld lvb_type: %d initiator: MDT%d\n",
                                 &vaf,
                                 ldlm_lock_to_ns_name(lock),
                                 lock, lock->l_handle.h_cookie,
@@ -2905,7 +2917,8 @@ void _ldlm_lock_debug(struct ldlm_lock *lock,
                                 lock->l_remote_handle.cookie,
                                 exp ? refcount_read(&exp->exp_handle.h_ref) : -99,
                                 lock->l_pid, lock->l_callback_timestamp,
-                                lock->l_lvb_type);
+                                lock->l_lvb_type,
+                                lock->l_policy_data.l_inodebits.li_initiator_id);
                break;
 
        default:
index 8703cb0..d9b53ad 100644 (file)
@@ -373,6 +373,7 @@ static int __lfsck_ibits_lock(const struct lu_env *env,
 
        memset(policy, 0, sizeof(*policy));
        policy->l_inodebits.bits = bits;
+       policy->l_inodebits.li_initiator_id = lfsck_dev_idx(lfsck);
        if (dt_object_remote(obj)) {
                struct ldlm_enqueue_info *einfo = &info->lti_einfo;
 
index ccf3b7a..ae0499f 100644 (file)
@@ -6841,10 +6841,6 @@ static int lod_object_lock(const struct lu_env *env,
                        ldlm_completion_callback completion = einfo->ei_cb_cp;
                        __u64 dlmflags = LDLM_FL_ATOMIC_CB;
 
-                       if (einfo->ei_mode == LCK_PW ||
-                           einfo->ei_mode == LCK_EX)
-                               dlmflags |= LDLM_FL_COS_INCOMPAT;
-
                        LASSERT(ns != NULL);
                        rc = ldlm_cli_enqueue_local(env, ns, res_id, LDLM_IBITS,
                                                    policy, einfo->ei_mode,
index 3149d37..d49e65c 100644 (file)
@@ -1614,7 +1614,7 @@ static int hsm_cdt_request_completed(struct mdt_thread_info *mti,
                        /* flush UPDATE lock so attributes are upadated */
                        lh = &mti->mti_lh[MDT_LH_OLD];
                        mdt_object_lock(mti, obj, lh, MDS_INODELOCK_UPDATE,
-                                       LCK_EX, false);
+                                       LCK_EX);
                        mdt_object_unlock(mti, obj, lh, 1);
                }
        }
index a8f86b9..ee5a3e5 100644 (file)
@@ -1773,7 +1773,7 @@ int mdt_layout_change(struct mdt_thread_info *info, struct mdt_object *obj,
                if (layout->mlc_opc == MD_LAYOUT_WRITE)
                        lockpart |= MDS_INODELOCK_UPDATE;
 
-               rc = mdt_object_lock(info, obj, lhc, lockpart, LCK_EX, false);
+               rc = mdt_object_lock(info, obj, lhc, lockpart, LCK_EX);
                if (rc)
                        RETURN(rc);
        }
@@ -1873,12 +1873,12 @@ static int mdt_swap_layouts(struct tgt_session_info *tsi)
        lh1 = &info->mti_lh[MDT_LH_NEW];
        lh2 = &info->mti_lh[MDT_LH_OLD];
        rc = mdt_object_lock(info, o1, lh1, MDS_INODELOCK_LAYOUT |
-                            MDS_INODELOCK_XATTR, LCK_EX, false);
+                            MDS_INODELOCK_XATTR, LCK_EX);
        if (rc < 0)
                GOTO(put, rc);
 
        rc = mdt_object_lock(info, o2, lh2, MDS_INODELOCK_LAYOUT |
-                            MDS_INODELOCK_XATTR, LCK_EX, false);
+                            MDS_INODELOCK_XATTR, LCK_EX);
        if (rc < 0)
                GOTO(unlock1, rc);
 
@@ -2102,7 +2102,7 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info,
                                        MDS_INODELOCK_LAYOUT);
                        child_bits |= MDS_INODELOCK_PERM;
                        rc = mdt_object_lock(info, child, lhc, child_bits,
-                                            LCK_PR, false);
+                                            LCK_PR);
                        if (rc < 0)
                                RETURN(rc);
                }
@@ -2257,8 +2257,7 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info,
                /* step 1: lock parent only if parent is a directory */
                if (S_ISDIR(lu_object_attr(&parent->mot_obj))) {
                        lhp = &info->mti_lh[MDT_LH_PARENT];
-                       rc = mdt_parent_lock(info, parent, lhp, lname, LCK_PR,
-                                             false);
+                       rc = mdt_parent_lock(info, parent, lhp, lname, LCK_PR);
                        if (unlikely(rc != 0))
                                RETURN(rc);
                }
@@ -2373,7 +2372,7 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info,
                        /* try layout lock, it may fail to be granted due to
                         * contention at LOOKUP or UPDATE */
                        rc = mdt_object_lock_try(info, child, lhc, &child_bits,
-                                                try_bits, LCK_PR, false);
+                                                try_bits, LCK_PR);
                        if (child_bits & MDS_INODELOCK_LAYOUT)
                                ma_need |= MA_LOV;
                } else {
@@ -2381,10 +2380,10 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info,
                         * client will enqueue the lock to the remote MDT */
                        if (mdt_object_remote(child))
                                rc = mdt_object_lookup_lock(info, NULL, child,
-                                                           lhc, LCK_PR, false);
+                                                           lhc, LCK_PR);
                        else
                                rc = mdt_object_lock(info, child, lhc,
-                                                    child_bits, LCK_PR, false);
+                                                    child_bits, LCK_PR);
                }
                if (unlikely(rc != 0))
                        GOTO(out_child, rc);
@@ -2530,7 +2529,6 @@ static int mdt_rmfid_unlink(struct mdt_thread_info *info,
        struct mdt_lock_handle *parent_lh;
        struct mdt_lock_handle *child_lh;
        struct mdt_object *pobj;
-       bool cos_incompat = false;
        int rc;
        ENTRY;
 
@@ -2538,11 +2536,8 @@ static int mdt_rmfid_unlink(struct mdt_thread_info *info,
        if (IS_ERR(pobj))
                GOTO(out, rc = PTR_ERR(pobj));
 
-       if (mdt_object_remote(pobj))
-               cos_incompat = true;
-
        parent_lh = &info->mti_lh[MDT_LH_PARENT];
-       rc = mdt_parent_lock(info, pobj, parent_lh, name, LCK_PW, cos_incompat);
+       rc = mdt_parent_lock(info, pobj, parent_lh, name, LCK_PW);
        if (rc != 0)
                GOTO(put_parent, rc);
 
@@ -2557,8 +2552,7 @@ static int mdt_rmfid_unlink(struct mdt_thread_info *info,
        child_lh = &info->mti_lh[MDT_LH_CHILD];
        rc = mdt_object_stripes_lock(info, pobj, obj, child_lh, einfo,
                                     MDS_INODELOCK_LOOKUP |
-                                    MDS_INODELOCK_UPDATE,
-                                    LCK_EX, cos_incompat);
+                                    MDS_INODELOCK_UPDATE, LCK_EX);
        if (rc != 0)
                GOTO(unlock_parent, rc);
 
@@ -3589,8 +3583,8 @@ int mdt_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
        struct ldlm_cb_set_arg *arg = data;
        bool commit_async = false;
        int rc;
-       ENTRY;
 
+       ENTRY;
        if (flag == LDLM_CB_CANCELING)
                RETURN(0);
 
@@ -3610,25 +3604,26 @@ int mdt_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
                goto skip_cos_checks;
 
        if (lock->l_req_mode & (LCK_PW | LCK_EX)) {
-               if (mdt_cos_is_enabled(mdt)) {
-                       if (!arg->bl_desc->bl_same_client)
-                               mdt_set_lock_sync(lock);
+               if (mdt_cos_is_enabled(mdt) &&
+                   !arg->bl_desc->bl_same_client) {
+                       mdt_set_lock_sync(lock);
                } else if (mdt_slc_is_enabled(mdt) &&
-                          arg->bl_desc->bl_cos_incompat) {
+                          arg->bl_desc->bl_txn_dependent) {
                        mdt_set_lock_sync(lock);
-                       /*
-                        * we may do extra commit here, but there is a small
-                        * window to miss a commit: lock was unlocked (saved),
-                        * then a conflict lock queued and we come here, but
-                        * REP-ACK not received, so lock was not converted to
-                        * COS mode yet.
-                        * Fortunately this window is quite small, so the
-                        * extra commit should be rare (not to say distributed
-                        * operation is rare too).
+                       /* we may do extra commit here, because there is a small
+                        * window to miss a commit:
+                        * 1. lock was unlocked (saved), but not downgraded to
+                        * TXN mode yet (REP-ACK not received).
+                        * 2. a conflict lock enqueued and we come herej if we
+                        * don't trigger commit now, the enqueued lock will wait
+                        * untill system periodic commit.
+                        *
+                        * Fortunately this window is quite small, not to say
+                        * distributed operation is rare too.
                         */
                        commit_async = true;
                }
-       } else if (lock->l_req_mode == LCK_COS) {
+       } else if (lock->l_req_mode == LCK_COS || lock->l_req_mode == LCK_TXN) {
                commit_async = true;
        }
 
@@ -3816,7 +3811,7 @@ static int mdt_remote_object_lock_try(struct mdt_thread_info *mti,
  */
 int mdt_object_pdo_lock(struct mdt_thread_info *info, struct mdt_object *obj,
                        struct mdt_lock_handle *lh, const struct lu_name *name,
-                       enum ldlm_mode mode, bool pdo_lock, bool cos_incompat)
+                       enum ldlm_mode mode, bool pdo_lock)
 {
        struct ldlm_namespace *ns = info->mti_mdt->mdt_namespace;
        union ldlm_policy_data *policy = &info->mti_policy;
@@ -3841,16 +3836,10 @@ int mdt_object_pdo_lock(struct mdt_thread_info *info, struct mdt_object *obj,
        if (!S_ISDIR(lu_object_attr(&obj->mot_obj)))
                return -ENOTDIR;
 
-       if (cos_incompat) {
-               LASSERT(mode == LCK_PW || mode == LCK_EX);
-               dlmflags |= LDLM_FL_COS_INCOMPAT;
-       } else if (mdt_cos_is_enabled(info->mti_mdt)) {
-               dlmflags |= LDLM_FL_COS_ENABLED;
-       }
-
        policy->l_inodebits.bits = MDS_INODELOCK_UPDATE;
        policy->l_inodebits.try_bits = 0;
        policy->l_inodebits.li_gid = 0;
+       policy->l_inodebits.li_initiator_id = mdt_node_id(info->mti_mdt);
        fid_build_reg_res_name(mdt_object_fid(obj), res_id);
        if (info->mti_exp)
                cookie = &info->mti_exp->exp_handle.h_cookie;
@@ -3885,7 +3874,7 @@ int mdt_object_pdo_lock(struct mdt_thread_info *info, struct mdt_object *obj,
 int mdt_object_lock_internal(struct mdt_thread_info *info,
                             struct mdt_object *obj, const struct lu_fid *fid,
                             struct mdt_lock_handle *lh, __u64 *ibits,
-                            __u64 trybits, bool cache, bool cos_incompat)
+                            __u64 trybits, bool cache)
 {
        union ldlm_policy_data *policy = &info->mti_policy;
        struct ldlm_res_id *res_id = &info->mti_res_id;
@@ -3895,6 +3884,7 @@ int mdt_object_lock_internal(struct mdt_thread_info *info,
        policy->l_inodebits.bits = *ibits;
        policy->l_inodebits.try_bits = trybits;
        policy->l_inodebits.li_gid = lh->mlh_gid;
+       policy->l_inodebits.li_initiator_id = mdt_node_id(info->mti_mdt);
        fid_build_reg_res_name(fid, res_id);
 
        if (obj && mdt_object_remote(obj)) {
@@ -3921,14 +3911,6 @@ int mdt_object_lock_internal(struct mdt_thread_info *info,
                LASSERT(lh->mlh_reg_mode != LCK_MINMODE);
                LASSERT(lh->mlh_type != MDT_NUL_LOCK);
 
-               if (cos_incompat) {
-                       LASSERT(lh->mlh_reg_mode == LCK_PW ||
-                               lh->mlh_reg_mode == LCK_EX);
-                       dlmflags |= LDLM_FL_COS_INCOMPAT;
-               } else if (mdt_cos_is_enabled(info->mti_mdt)) {
-                       dlmflags |= LDLM_FL_COS_ENABLED;
-               }
-
                /* Lease lock are granted with LDLM_FL_CANCEL_ON_BLOCK */
                if (lh->mlh_type == MDT_REG_LOCK &&
                    lh->mlh_reg_mode == LCK_EX && *ibits == MDS_INODELOCK_OPEN)
@@ -3988,20 +3970,19 @@ int mdt_object_lock_internal(struct mdt_thread_info *info,
  * \param lh           lock handle
  * \param ibits                MDS inode lock bits
  * \param mode         lock mode
- * \param cos_incompat DNE COS incompatible
  *
  * \retval             0 on success, -ev on error.
  */
 int mdt_object_lock(struct mdt_thread_info *info, struct mdt_object *obj,
                    struct mdt_lock_handle *lh, __u64 ibits,
-                   enum ldlm_mode mode, bool cos_incompat)
+                   enum ldlm_mode mode)
 {
        int rc;
 
        ENTRY;
        mdt_lock_reg_init(lh, mode);
        rc = mdt_object_lock_internal(info, obj, mdt_object_fid(obj), lh,
-                                     &ibits, 0, false, cos_incompat);
+                                     &ibits, 0, false);
        RETURN(rc);
 }
 
@@ -4020,14 +4001,13 @@ int mdt_object_lock(struct mdt_thread_info *info, struct mdt_object *obj,
  * \param lh           lock handle
  * \param ibits                MDS inode lock bits
  * \param mode         lock mode
- * \param cos_incompat DNE COS incompatible
  *
  * \retval             0 on success, -ev on error.
  */
 int mdt_object_check_lock(struct mdt_thread_info *info,
                          struct mdt_object *parent, struct mdt_object *child,
                          struct mdt_lock_handle *lh, __u64 ibits,
-                         enum ldlm_mode mode, bool cos_incompat)
+                         enum ldlm_mode mode)
 {
        int rc;
 
@@ -4046,8 +4026,7 @@ int mdt_object_check_lock(struct mdt_thread_info *info,
 
                rc = mdt_object_lock_internal(info, parent,
                                              mdt_object_fid(child), lh,
-                                             &lookup_ibits, 0, false,
-                                             cos_incompat);
+                                             &lookup_ibits, 0, false);
                if (rc)
                        RETURN(rc);
 
@@ -4055,7 +4034,7 @@ int mdt_object_check_lock(struct mdt_thread_info *info,
        }
 
        rc = mdt_object_lock_internal(info, child, mdt_object_fid(child), lh,
-                                     &ibits, 0, false, cos_incompat);
+                                     &ibits, 0, false);
        if (rc && !(ibits & MDS_INODELOCK_LOOKUP))
                mdt_object_unlock(info, NULL, lh, 1);
 
@@ -4072,13 +4051,12 @@ int mdt_object_check_lock(struct mdt_thread_info *info,
  * \param lh   lock handle
  * \param lname        child name
  * \param mode lock mode
- * \param cos_incompat DNE COS incompatible
  *
  * \retval     0 on success, -ev on error.
  */
 int mdt_parent_lock(struct mdt_thread_info *info, struct mdt_object *obj,
                    struct mdt_lock_handle *lh, const struct lu_name *lname,
-                   enum ldlm_mode mode, bool cos_incompat)
+                   enum ldlm_mode mode)
 {
        int rc;
 
@@ -4089,11 +4067,9 @@ int mdt_parent_lock(struct mdt_thread_info *info, struct mdt_object *obj,
 
                mdt_lock_reg_init(lh, mode);
                rc = mdt_object_lock_internal(info, obj, mdt_object_fid(obj),
-                                             lh, &ibits, 0, false,
-                                             cos_incompat);
+                                             lh, &ibits, 0, false);
        } else {
-               rc = mdt_object_pdo_lock(info, obj, lh, lname, mode, true,
-                                        cos_incompat);
+               rc = mdt_object_pdo_lock(info, obj, lh, lname, mode, true);
        }
        RETURN(rc);
 }
@@ -4112,13 +4088,12 @@ int mdt_parent_lock(struct mdt_thread_info *info, struct mdt_object *obj,
  * \param ibits                MDS inode lock bits
  * \param trybits      optional inode lock bits
  * \param mode         lock mode
- * \param cos_incompat DNE COS incompatible
  *
  * \retval             0 on success, -ev on error.
  */
 int mdt_object_lock_try(struct mdt_thread_info *info, struct mdt_object *obj,
                        struct mdt_lock_handle *lh, __u64 *ibits,
-                       __u64 trybits, enum ldlm_mode mode, bool cos_incompat)
+                       __u64 trybits, enum ldlm_mode mode)
 {
        bool trylock_only = *ibits == 0;
        int rc;
@@ -4127,7 +4102,7 @@ int mdt_object_lock_try(struct mdt_thread_info *info, struct mdt_object *obj,
        LASSERT(!(*ibits & trybits));
        mdt_lock_reg_init(lh, mode);
        rc = mdt_object_lock_internal(info, obj, mdt_object_fid(obj), lh, ibits,
-                                     trybits, false, cos_incompat);
+                                     trybits, false);
        if (rc && trylock_only) { /* clear error for try ibits lock only */
                LASSERT(*ibits == 0);
                rc = 0;
@@ -4142,8 +4117,7 @@ int mdt_object_lock_try(struct mdt_thread_info *info, struct mdt_object *obj,
  */
 int mdt_object_lookup_lock(struct mdt_thread_info *info,
                           struct mdt_object *pobj, struct mdt_object *obj,
-                          struct mdt_lock_handle *lh, enum ldlm_mode mode,
-                          bool cos_incompat)
+                          struct mdt_lock_handle *lh, enum ldlm_mode mode)
 {
        __u64 ibits = MDS_INODELOCK_LOOKUP;
        int rc;
@@ -4155,7 +4129,7 @@ int mdt_object_lookup_lock(struct mdt_thread_info *info,
        LASSERT(ergo(!pobj, mdt_object_remote(obj)));
        mdt_lock_reg_init(lh, mode);
        rc = mdt_object_lock_internal(info, pobj, mdt_object_fid(obj), lh,
-                                     &ibits, 0, false, cos_incompat);
+                                     &ibits, 0, false);
        RETURN(rc);
 }
 
@@ -4305,7 +4279,7 @@ struct mdt_object *mdt_object_find_lock(struct mdt_thread_info *info,
        if (!IS_ERR(o)) {
                int rc;
 
-               rc = mdt_object_lock(info, o, lh, ibits, mode, false);
+               rc = mdt_object_lock(info, o, lh, ibits, mode);
                if (rc != 0) {
                        mdt_object_put(info->mti_env, o);
                        o = ERR_PTR(rc);
@@ -4690,8 +4664,7 @@ static int mdt_intent_getxattr(enum ldlm_intent_flags it_opc,
        mdt_intent_fixup_resent(info, *lockp, lhc, flags);
        if (!lustre_handle_is_used(&lhc->mlh_reg_lh)) {
                rc = mdt_object_lock(info, info->mti_object, lhc,
-                                    MDS_INODELOCK_XATTR, (*lockp)->l_req_mode,
-                                    false);
+                                    MDS_INODELOCK_XATTR, (*lockp)->l_req_mode);
                if (rc)
                        return rc;
        }
index 234679d..63b7c30 100644 (file)
@@ -241,8 +241,7 @@ int mdt_hsm_state_get(struct tgt_session_info *tsi)
                GOTO(out, rc = err_serious(rc));
 
        lh = &info->mti_lh[MDT_LH_CHILD];
-       rc = mdt_object_lock(info, obj, lh, MDS_INODELOCK_LOOKUP, LCK_PR,
-                            false);
+       rc = mdt_object_lock(info, obj, lh, MDS_INODELOCK_LOOKUP, LCK_PR);
        if (rc < 0)
                GOTO(out_ucred, rc);
 
@@ -302,7 +301,7 @@ int mdt_hsm_state_set(struct tgt_session_info *tsi)
 
        lh = &info->mti_lh[MDT_LH_CHILD];
        rc = mdt_object_lock(info, obj, lh, MDS_INODELOCK_LOOKUP |
-                            MDS_INODELOCK_XATTR, LCK_PW, false);
+                            MDS_INODELOCK_XATTR, LCK_PW);
        if (rc < 0)
                GOTO(out_ucred, rc);
 
index 342f8d3..26e9a19 100644 (file)
@@ -721,6 +721,11 @@ static inline struct seq_server_site *mdt_seq_site(struct mdt_device *mdt)
        return &mdt->mdt_seq_site;
 }
 
+static inline u32 mdt_node_id(const struct mdt_device *mdt)
+{
+       return mdt->mdt_seq_site.ss_node_id;
+}
+
 static inline void mdt_export_evict(struct obd_export *exp)
 {
         class_fail_export(exp);
@@ -824,35 +829,34 @@ int mdt_check_resent_lock(struct mdt_thread_info *info, struct mdt_object *mo,
 
 int mdt_object_lock(struct mdt_thread_info *info, struct mdt_object *obj,
                    struct mdt_lock_handle *lh, __u64 ibits,
-                   enum ldlm_mode mode, bool cos_incompat);
+                   enum ldlm_mode mode);
 int mdt_parent_lock(struct mdt_thread_info *info, struct mdt_object *o,
                    struct mdt_lock_handle *lh, const struct lu_name *lname,
-                   enum ldlm_mode mode, bool cos_incompat);
+                   enum ldlm_mode mode);
 int mdt_object_stripes_lock(struct mdt_thread_info *info,
                            struct mdt_object *pobj, struct mdt_object *o,
                            struct mdt_lock_handle *lh,
                            struct ldlm_enqueue_info *einfo, __u64 ibits,
-                           enum ldlm_mode mode, bool cos_incompat);
+                           enum ldlm_mode mode);
 int mdt_object_check_lock(struct mdt_thread_info *info,
                          struct mdt_object *parent, struct mdt_object *child,
                          struct mdt_lock_handle *lh, __u64 ibits,
-                         enum ldlm_mode mode, bool cos_incompat);
+                         enum ldlm_mode mode);
 int mdt_object_lock_try(struct mdt_thread_info *info, struct mdt_object *mo,
                        struct mdt_lock_handle *lh, __u64 *ibits,
-                       __u64 trybits, enum ldlm_mode mode, bool cos_incompat);
+                       __u64 trybits, enum ldlm_mode mode);
 
 /* below three lock functions are used internally */
 int mdt_object_lock_internal(struct mdt_thread_info *info,
                             struct mdt_object *obj, const struct lu_fid *fid,
                             struct mdt_lock_handle *lh, __u64 *ibits,
-                            __u64 trybits, bool cache, bool cos_incompat);
+                            __u64 trybits, bool cache);
 int mdt_object_pdo_lock(struct mdt_thread_info *info, struct mdt_object *obj,
                        struct mdt_lock_handle *lh, const struct lu_name *name,
-                       enum ldlm_mode mode, bool pdo_lock, bool cos_incompat);
+                       enum ldlm_mode mode, bool pdo_lock);
 int mdt_object_lookup_lock(struct mdt_thread_info *info,
                           struct mdt_object *pobj, struct mdt_object *obj,
-                          struct mdt_lock_handle *lh, enum ldlm_mode mode,
-                          bool cos_incompat);
+                          struct mdt_lock_handle *lh, enum ldlm_mode mode);
 
 void mdt_object_unlock(struct mdt_thread_info *info, struct mdt_object *mo,
                       struct mdt_lock_handle *lh, int decref);
index 8194235..dedb58e 100644 (file)
@@ -1533,7 +1533,7 @@ int mdt_brw_enqueue(struct mdt_thread_info *mti, struct ldlm_namespace *ns,
                 * process.
                 */
                rc = mdt_object_lock_internal(mti, mo, mdt_object_fid(mo), lhc,
-                                             &ibits, 0, false, false);
+                                             &ibits, 0, false);
                if (rc)
                        GOTO(out, rc);
        }
@@ -1969,6 +1969,7 @@ void mdt_dom_discard_data(struct mdt_thread_info *info,
 
        policy.l_inodebits.bits = MDS_INODELOCK_DOM;
        policy.l_inodebits.try_bits = 0;
+       policy.l_inodebits.li_initiator_id = mdt_node_id(mdt);
        fid_build_reg_res_name(mdt_object_fid(mo), &res_id);
 
        /* Keep blocking version of discard for an old client to avoid
index a544550..59bc55a 100644 (file)
@@ -917,8 +917,7 @@ static int mdt_object_open_lock(struct mdt_thread_info *info,
        }
 
        if (*ibits | trybits)
-               rc = mdt_object_lock_try(info, obj, lhc, ibits, trybits, lm,
-                                        false);
+               rc = mdt_object_lock_try(info, obj, lhc, ibits, trybits, lm);
 
        CDEBUG(D_INODE, "%s: Requested bits lock:"DFID ", ibits = %#llx/%#llx"
               ", open_flags = %#llo, try_layout = %d : rc = %d\n",
@@ -944,7 +943,7 @@ static int mdt_object_open_lock(struct mdt_thread_info *info,
 
                LASSERT(!try_layout);
                rc = mdt_object_lock(info, obj, ll, MDS_INODELOCK_LAYOUT,
-                                    LCK_EX, false);
+                                    LCK_EX);
 
                CFS_FAIL_TIMEOUT(OBD_FAIL_MDS_LL_BLOCK, 2);
        }
@@ -1331,7 +1330,7 @@ static int mdt_lock_root_xattr(struct mdt_thread_info *info,
 
        mdt_lock_reg_init(lh, LCK_PR);
        rc = mdt_object_lock_internal(info, md_root, mdt_object_fid(md_root),
-                                     lh, &ibits, 0, true, false);
+                                     lh, &ibits, 0, true);
        if (rc < 0)
                return rc;
 
@@ -1498,7 +1497,7 @@ again_pw:
        if (lock_mode != LCK_NL) {
                lh = &info->mti_lh[MDT_LH_PARENT];
                result = mdt_parent_lock(info, parent, lh, &rr->rr_name,
-                                        lock_mode, false);
+                                        lock_mode);
                if (result != 0)
                        GOTO(out_parent, result);
 
@@ -1613,7 +1612,7 @@ again_pw:
                                GOTO(out_child, result = rc);
                        else if (rc > 0)
                                rc = mdt_object_lookup_lock(info, NULL, child,
-                                                           lhc, LCK_PR, false);
+                                                           lhc, LCK_PR);
                        repbody->mbo_fid1 = *mdt_object_fid(child);
                        repbody->mbo_valid |= (OBD_MD_FLID | OBD_MD_MDS);
                        if (rc != 0)
@@ -2052,7 +2051,7 @@ static int mdt_hsm_release(struct mdt_thread_info *info, struct mdt_object *o,
        ma->ma_hsm.mh_flags &= ~HS_RELEASED;
 
        rc = mdt_object_lock(info, o, lh, MDS_INODELOCK_LAYOUT |
-                            MDS_INODELOCK_XATTR, LCK_EX, false);
+                            MDS_INODELOCK_XATTR, LCK_EX);
        if (rc != 0)
                GOTO(out_close, rc);
 
@@ -2219,13 +2218,13 @@ static int mdt_close_handle_layouts(struct mdt_thread_info *info,
                GOTO(out_unlock_sem, rc = -ESTALE);
 
        rc = mdt_object_lock(info, o1, lh1, MDS_INODELOCK_LAYOUT |
-                            MDS_INODELOCK_XATTR, LCK_EX, false);
+                            MDS_INODELOCK_XATTR, LCK_EX);
        if (rc < 0)
                GOTO(out_unlock_sem, rc);
 
        if (o2) {
                rc = mdt_object_lock(info, o2, lh2, MDS_INODELOCK_LAYOUT |
-                                    MDS_INODELOCK_XATTR, LCK_EX, false);
+                                    MDS_INODELOCK_XATTR, LCK_EX);
                if (rc < 0)
                        GOTO(out_unlock1, rc);
        }
index 97c9e83..4e05c07 100644 (file)
@@ -247,30 +247,6 @@ static int mdt_stripes_unlock(struct mdt_thread_info *mti,
                                policy);
 }
 
-static inline int mdt_object_striped(struct mdt_thread_info *mti,
-                                    struct mdt_object *obj)
-{
-       struct lu_device *bottom_dev;
-       struct lu_object *bottom_obj;
-       int rc;
-
-       if (!S_ISDIR(obj->mot_header.loh_attr))
-               return 0;
-
-       /* getxattr from bottom obj to avoid reading in shard FIDs */
-       bottom_dev = dt2lu_dev(mti->mti_mdt->mdt_bottom);
-       bottom_obj = lu_object_find_slice(mti->mti_env, bottom_dev,
-                                         mdt_object_fid(obj), NULL);
-       if (IS_ERR(bottom_obj))
-               return PTR_ERR(bottom_obj);
-
-       rc = dt_xattr_get(mti->mti_env, lu2dt(bottom_obj), &LU_BUF_NULL,
-                         XATTR_NAME_LMV);
-       lu_object_put(mti->mti_env, bottom_obj);
-
-       return (rc > 0) ? 1 : (rc == -ENODATA) ? 0 : rc;
-}
-
 /**
  * Lock slave stripes if necessary, the lock handles of slave stripes
  * will be stored in einfo->ei_cbdata.
@@ -293,6 +269,7 @@ static int mdt_stripes_lock(struct mdt_thread_info *mti, struct mdt_object *obj,
        einfo->ei_req_slot = 1;
        memset(policy, 0, sizeof(*policy));
        policy->l_inodebits.bits = ibits;
+       policy->l_inodebits.li_initiator_id = mdt_node_id(mti->mti_mdt);
 
        return mo_object_lock(mti->mti_env, mdt_object_child(obj), NULL, einfo,
                              policy);
@@ -310,7 +287,6 @@ static int mdt_stripes_lock(struct mdt_thread_info *mti, struct mdt_object *obj,
  * \param einfo                struct ldlm_enqueue_info
  * \param ibits                MDS inode lock bits
  * \param mode         lock mode
- * \param cos_incompat DNE COS incompatible
  *
  * \retval             0 on success, -ev on error.
  */
@@ -319,7 +295,7 @@ int mdt_object_stripes_lock(struct mdt_thread_info *info,
                            struct mdt_object *child,
                            struct mdt_lock_handle *lh,
                            struct ldlm_enqueue_info *einfo, __u64 ibits,
-                           enum ldlm_mode mode, bool cos_incompat)
+                           enum ldlm_mode mode)
 {
        int rc;
 
@@ -338,31 +314,24 @@ int mdt_object_stripes_lock(struct mdt_thread_info *info,
        if (ibits & MDS_INODELOCK_LOOKUP) {
                LASSERT(parent);
                rc = mdt_object_check_lock(info, parent, child, lh, ibits,
-                                          mode, cos_incompat);
+                                          mode);
        } else {
-               rc = mdt_object_lock(info, child, lh, ibits, mode,
-                                    cos_incompat);
+               rc = mdt_object_lock(info, child, lh, ibits, mode);
        }
        if (rc)
                RETURN(rc);
 
-       rc = mdt_object_striped(info, child);
-       if (rc == 0)
-               return 0;
-
-       if (rc < 0)
-               goto unlock;
+       if (!S_ISDIR(child->mot_header.loh_attr))
+               RETURN(0);
 
        /* lock stripes for striped directory */
        rc = mdt_stripes_lock(info, child, lh->mlh_reg_mode, ibits, einfo);
        if (rc == -EIO && CFS_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_SLAVE_NAME))
                rc = 0;
-
-unlock:
        if (rc)
                mdt_object_unlock(info, child, lh, rc);
 
-       return rc;
+       RETURN(rc);
 }
 
 void mdt_object_stripes_unlock(struct mdt_thread_info *info,
@@ -410,7 +379,7 @@ static int mdt_restripe(struct mdt_thread_info *info,
                RETURN(rc);
 
        lhp = &info->mti_lh[MDT_LH_PARENT];
-       rc = mdt_parent_lock(info, parent, lhp, lname, LCK_PW, true);
+       rc = mdt_parent_lock(info, parent, lhp, lname, LCK_PW);
        if (rc)
                RETURN(rc);
 
@@ -467,7 +436,7 @@ static int mdt_restripe(struct mdt_thread_info *info,
        /* lock object */
        lhc = &info->mti_lh[MDT_LH_CHILD];
        rc = mdt_object_stripes_lock(info, parent, child, lhc, einfo,
-                                    MDS_INODELOCK_FULL, LCK_PW, true);
+                                    MDS_INODELOCK_FULL, LCK_PW);
        if (rc)
                GOTO(unlock_child, rc);
 
@@ -630,7 +599,7 @@ static int mdt_create(struct mdt_thread_info *info)
        CFS_RACE(OBD_FAIL_MDS_CREATE_RACE);
 
        lh = &info->mti_lh[MDT_LH_PARENT];
-       rc = mdt_parent_lock(info, parent, lh, &rr->rr_name, LCK_PW, false);
+       rc = mdt_parent_lock(info, parent, lh, &rr->rr_name, LCK_PW);
        if (rc)
                GOTO(put_parent, rc);
 
@@ -690,40 +659,16 @@ static int mdt_create(struct mdt_thread_info *info)
        if (rc < 0)
                GOTO(put_child, rc);
 
-       /*
-        * On DNE, we need to eliminate dependey between 'mkdir a' and
-        * 'mkdir a/b' if b is a striped directory, to achieve this, two
-        * things are done below:
-        * 1. save child and slaves lock.
-        * 2. if the child is a striped directory, relock parent so to
-        *    compare against with COS locks to ensure parent was
-        *    committed to disk.
+       /* save child locks to eliminate dependey between 'mkdir a' and
+        * 'mkdir a/b' if b is a remote directory
         */
        if (mdt_slc_is_enabled(mdt) && S_ISDIR(ma->ma_attr.la_mode)) {
                struct mdt_lock_handle *lhc;
                struct ldlm_enqueue_info *einfo = &info->mti_einfo;
-               bool cos_incompat;
-
-               rc = mdt_object_striped(info, child);
-               if (rc < 0)
-                       GOTO(put_child, rc);
-
-               cos_incompat = rc;
-               if (cos_incompat) {
-                       if (!mdt_object_remote(parent)) {
-                               mdt_object_unlock(info, parent, lh, 1);
-                               rc = mdt_parent_lock(info, parent, lh,
-                                                    &rr->rr_name, LCK_PW,
-                                                    true);
-                               if (rc)
-                                       GOTO(put_child, rc);
-                       }
-               }
 
                lhc = &info->mti_lh[MDT_LH_CHILD];
                rc = mdt_object_stripes_lock(info, parent, child, lhc, einfo,
-                                            MDS_INODELOCK_UPDATE, LCK_PW,
-                                            cos_incompat);
+                                            MDS_INODELOCK_UPDATE, LCK_PW);
                if (rc)
                        GOTO(put_child, rc);
 
@@ -752,15 +697,9 @@ static int mdt_attr_set(struct mdt_thread_info *info, struct mdt_object *mo,
                        (LA_MODE | LA_UID | LA_GID | LA_PROJID | LA_FLAGS);
        __u64 lockpart = MDS_INODELOCK_UPDATE;
        struct ldlm_enqueue_info *einfo = &info->mti_einfo;
-       bool cos_incompat;
        int rc;
 
        ENTRY;
-       rc = mdt_object_striped(info, mo);
-       if (rc < 0)
-               RETURN(rc);
-       cos_incompat = rc;
-
        if (ma->ma_attr.la_valid & (LA_MODE|LA_UID|LA_GID))
                lockpart |= MDS_INODELOCK_PERM;
        /* Clear xattr cache on clients, so the virtual project ID xattr
@@ -771,7 +710,7 @@ static int mdt_attr_set(struct mdt_thread_info *info, struct mdt_object *mo,
 
        lh = &info->mti_lh[MDT_LH_PARENT];
        rc = mdt_object_stripes_lock(info, NULL, mo, lh, einfo, lockpart,
-                                    LCK_PW, cos_incompat);
+                                    LCK_PW);
        if (rc != 0)
                RETURN(rc);
 
@@ -892,7 +831,7 @@ static int mdt_reint_setattr(struct mdt_thread_info *info,
                if (atomic_read(&mo->mot_lease_count) > 0) { /* lease exists */
                        lhc = &info->mti_lh[MDT_LH_LOCAL];
                        rc = mdt_object_lock(info, mo, lhc, MDS_INODELOCK_OPEN,
-                                            LCK_CW, false);
+                                            LCK_CW);
                        if (rc != 0) {
                                up_read(&mo->mot_open_sem);
                                GOTO(out_put, rc);
@@ -996,7 +935,7 @@ static int mdt_reint_setattr(struct mdt_thread_info *info,
                        buf->lb_len = ma->ma_lmm_size;
                        name = XATTR_NAME_LOV;
                        rc = mdt_object_lock(info, mo, lh, MDS_INODELOCK_XATTR,
-                                            LCK_PW, false);
+                                            LCK_PW);
                } else {
                        buf->lb_buf = &ma->ma_lmv->lmv_user_md;
                        buf->lb_len = ma->ma_lmv_size;
@@ -1006,7 +945,7 @@ static int mdt_reint_setattr(struct mdt_thread_info *info,
                                rc = mdt_object_lock(info, mo, lh,
                                                     MDS_INODELOCK_XATTR |
                                                     MDS_INODELOCK_LOOKUP,
-                                                    LCK_PW, false);
+                                                    LCK_PW);
                        } else {
                                struct lu_fid *pfid = &info->mti_tmp_fid1;
                                struct lu_name *pname = &info->mti_name;
@@ -1030,7 +969,7 @@ static int mdt_reint_setattr(struct mdt_thread_info *info,
                                rc = mdt_object_check_lock(info, pobj, mo, lh,
                                                           MDS_INODELOCK_XATTR |
                                                           MDS_INODELOCK_LOOKUP,
-                                                          LCK_PW, false);
+                                                          LCK_PW);
                                mdt_object_put(info->mti_env, pobj);
                        }
                }
@@ -1137,7 +1076,6 @@ static int mdt_reint_unlink(struct mdt_thread_info *info,
        struct mdt_lock_handle *child_lh;
        struct ldlm_enqueue_info *einfo = &info->mti_einfo;
        struct lu_ucred *uc  = mdt_ucred(info);
-       bool cos_incompat = false;
        int no_name = 0;
        ktime_t kstart = ktime_get();
        int rc;
@@ -1159,9 +1097,7 @@ static int mdt_reint_unlink(struct mdt_thread_info *info,
        if (IS_ERR(mp))
                RETURN(PTR_ERR(mp));
 
-       if (mdt_object_remote(mp)) {
-               cos_incompat = true;
-       } else {
+       if (!mdt_object_remote(mp)) {
                rc = mdt_version_get_check_save(info, mp, 0);
                if (rc)
                        GOTO(put_parent, rc);
@@ -1173,10 +1109,8 @@ static int mdt_reint_unlink(struct mdt_thread_info *info,
 
        CFS_RACE(OBD_FAIL_MDS_REINT_OPEN);
        CFS_RACE(OBD_FAIL_MDS_REINT_OPEN2);
-relock:
        parent_lh = &info->mti_lh[MDT_LH_PARENT];
-       rc = mdt_parent_lock(info, mp, parent_lh, &rr->rr_name, LCK_PW,
-                            cos_incompat);
+       rc = mdt_parent_lock(info, mp, parent_lh, &rr->rr_name, LCK_PW);
        if (rc != 0)
                GOTO(put_parent, rc);
 
@@ -1246,19 +1180,6 @@ relock:
                        GOTO(put_child, rc = -ENOENT);
        }
 
-       if (!cos_incompat) {
-               rc = mdt_object_striped(info, mc);
-               if (rc < 0)
-                       GOTO(put_child, rc);
-
-               cos_incompat = rc;
-               if (cos_incompat) {
-                       mdt_object_put(info->mti_env, mc);
-                       mdt_object_unlock(info, mp, parent_lh, -EAGAIN);
-                       goto relock;
-               }
-       }
-
        child_lh = &info->mti_lh[MDT_LH_CHILD];
        if (mdt_object_remote(mc)) {
                struct mdt_body  *repbody;
@@ -1283,8 +1204,7 @@ relock:
                 * would happen if another client try to grab the LOOKUP
                 * lock at the same time with unlink XXX
                 */
-               rc = mdt_object_lookup_lock(info, NULL, mc, child_lh, LCK_EX,
-                                           false);
+               rc = mdt_object_lookup_lock(info, NULL, mc, child_lh, LCK_EX);
                if (rc)
                        GOTO(put_child, rc);
 
@@ -1300,8 +1220,7 @@ relock:
         */
        rc = mdt_object_stripes_lock(info, mp, mc, child_lh, einfo,
                                     MDS_INODELOCK_LOOKUP |
-                                    MDS_INODELOCK_UPDATE,
-                                    LCK_EX, cos_incompat);
+                                    MDS_INODELOCK_UPDATE, LCK_EX);
        if (rc != 0)
                GOTO(put_child, rc);
 
@@ -1360,7 +1279,8 @@ out_stat:
        EXIT;
 
 unlock_child:
-       mdt_object_stripes_unlock(info, mc, child_lh, einfo, rc);
+       /* after unlink the object is gone, no need to keep lock */
+       mdt_object_stripes_unlock(info, mc, child_lh, einfo, 1);
 put_child:
        if (info->mti_spec.sp_cr_flags & MDS_OP_WITH_FID &&
            info->mti_big_buf.lb_buf)
@@ -1389,7 +1309,6 @@ static int mdt_reint_link(struct mdt_thread_info *info,
        struct mdt_lock_handle  *lhs;
        struct mdt_lock_handle  *lhp;
        ktime_t kstart = ktime_get();
-       bool cos_incompat;
        int rc;
 
        ENTRY;
@@ -1442,12 +1361,10 @@ static int mdt_reint_link(struct mdt_thread_info *info,
                GOTO(put_source, rc = -ENOENT);
        }
 
-       cos_incompat = (mdt_object_remote(mp) || mdt_object_remote(ms));
-
        CFS_RACE(OBD_FAIL_MDS_LINK_RENAME_RACE);
 
        lhp = &info->mti_lh[MDT_LH_PARENT];
-       rc = mdt_parent_lock(info, mp, lhp, &rr->rr_name, LCK_PW, cos_incompat);
+       rc = mdt_parent_lock(info, mp, lhp, &rr->rr_name, LCK_PW);
        if (rc != 0)
                GOTO(put_source, rc);
 
@@ -1455,8 +1372,8 @@ static int mdt_reint_link(struct mdt_thread_info *info,
 
        lhs = &info->mti_lh[MDT_LH_CHILD];
        rc = mdt_object_lock(info, ms, lhs,
-                            MDS_INODELOCK_UPDATE | MDS_INODELOCK_XATTR, LCK_EX,
-                            cos_incompat);
+                            MDS_INODELOCK_UPDATE | MDS_INODELOCK_XATTR,
+                            LCK_EX);
        if (rc != 0)
                GOTO(unlock_parent, rc);
 
@@ -1523,7 +1440,7 @@ static int mdt_rename_lock(struct mdt_thread_info *info,
 
        mdt_lock_reg_init(lh, LCK_EX);
        rc = mdt_object_lock_internal(info, obj, &LUSTRE_BFL_FID, lh,
-                                     &ibits, 0, false, false);
+                                     &ibits, 0, false);
        mdt_object_put(info->mti_env, obj);
        RETURN(rc);
 }
@@ -1580,7 +1497,7 @@ static int mdt_rename_source_lock(struct mdt_thread_info *info,
                                  struct mdt_object *child,
                                  struct mdt_lock_handle *lh,
                                  struct mdt_lock_handle *lh_lookup,
-                                 __u64 ibits, bool cos_incompat)
+                                 __u64 ibits)
 {
        int rc;
 
@@ -1594,14 +1511,14 @@ static int mdt_rename_source_lock(struct mdt_thread_info *info,
 
        if (rc == 1) {
                rc = mdt_object_lookup_lock(info, parent, child, lh_lookup,
-                                           LCK_EX, cos_incompat);
+                                           LCK_EX);
                if (rc)
                        return rc;
 
                ibits &= ~MDS_INODELOCK_LOOKUP;
        }
 
-       rc = mdt_object_lock(info, child, lh, ibits, LCK_EX, cos_incompat);
+       rc = mdt_object_lock(info, child, lh, ibits, LCK_EX);
        if (unlikely(rc && !(ibits & MDS_INODELOCK_LOOKUP)))
                mdt_object_unlock(info, NULL, lh_lookup, rc);
 
@@ -1698,7 +1615,7 @@ static int mdt_migrate_link_parent_lock(struct mdt_thread_info *info,
        if (*blocked) {
                /* revoke lock instead of take in *blocked* mode */
                rc = mdt_object_lock(info, lnkp, lhl, MDS_INODELOCK_UPDATE,
-                                    LCK_PW, true);
+                                    LCK_PW);
                if (rc)
                        RETURN(rc);
 
@@ -1733,7 +1650,7 @@ static int mdt_migrate_link_parent_lock(struct mdt_thread_info *info,
         * parent and their hash values are different.
         */
        rc = mdt_object_lock_try(info, lnkp, lhl, &ibits, MDS_INODELOCK_UPDATE,
-                                LCK_PW, true);
+                                LCK_PW);
        if (rc < 0)
                RETURN(rc);
 
@@ -1789,7 +1706,7 @@ static int mdt_migrate_link_lock(struct mdt_thread_info *info,
                }
        }
 
-       rc = mdt_object_lookup_lock(info, lnkp, obj, lhl, LCK_EX, true);
+       rc = mdt_object_lookup_lock(info, lnkp, obj, lhl, LCK_EX);
        if (rc)
                RETURN(rc);
 
@@ -2289,25 +2206,22 @@ lock_parent:
        lhtp = &info->mti_lh[MDT_LH_CHILD];
        /* lock spobj and tpobj in stripe index order */
        if (reverse) {
-               rc = mdt_parent_lock(info, tpobj, lhtp, &rr->rr_name, LCK_PW,
-                                    true);
+               rc = mdt_parent_lock(info, tpobj, lhtp, &rr->rr_name, LCK_PW);
                if (rc)
                        GOTO(put_source, rc);
 
                LASSERT(spobj != tpobj);
-               rc = mdt_parent_lock(info, spobj, lhsp, &rr->rr_name, LCK_PW,
-                                    true);
+               rc = mdt_parent_lock(info, spobj, lhsp, &rr->rr_name, LCK_PW);
                if (rc)
                        GOTO(unlock_parent, rc);
        } else {
-               rc = mdt_parent_lock(info, spobj, lhsp, &rr->rr_name, LCK_PW,
-                                    true);
+               rc = mdt_parent_lock(info, spobj, lhsp, &rr->rr_name, LCK_PW);
                if (rc)
                        GOTO(put_source, rc);
 
                if (tpobj != spobj) {
                        rc = mdt_parent_lock(info, tpobj, lhtp, &rr->rr_name,
-                                            LCK_PW, true);
+                                            LCK_PW);
                        if (rc)
                                GOTO(unlock_parent, rc);
                }
@@ -2341,7 +2255,7 @@ lock_parent:
        lhl = &info->mti_lh[MDT_LH_LOOKUP];
        rc = mdt_rename_source_lock(info, spobj, sobj, lhs, lhl,
                                    MDS_INODELOCK_LOOKUP | MDS_INODELOCK_XATTR |
-                                   MDS_INODELOCK_OPEN, true);
+                                   MDS_INODELOCK_OPEN);
        if (rc)
                GOTO(unlock_links, rc);
 
@@ -2441,8 +2355,7 @@ lock_parent:
                        mdt_rename_source_unlock(info, sobj, lhs, lhl, 1);
 
                rc = mdt_object_stripes_lock(info, tpobj, tobj, lht, einfo,
-                                            MDS_INODELOCK_UPDATE, LCK_PW,
-                                            true);
+                                            MDS_INODELOCK_UPDATE, LCK_PW);
                if (rc)
                        GOTO(put_target, rc);
 
@@ -2585,13 +2498,11 @@ static int mdt_lock_two_dirs(struct mdt_thread_info *info,
                             const struct lu_name *firstname,
                             struct mdt_object *mseconddir,
                             struct mdt_lock_handle *lh_seconddirp,
-                            const struct lu_name *secondname,
-                            bool cos_incompat)
+                            const struct lu_name *secondname)
 {
        int rc;
 
-       rc = mdt_parent_lock(info, mfirstdir, lh_firstdirp, firstname, LCK_PW,
-                            cos_incompat);
+       rc = mdt_parent_lock(info, mfirstdir, lh_firstdirp, firstname, LCK_PW);
        if (rc)
                return rc;
 
@@ -2600,13 +2511,13 @@ static int mdt_lock_two_dirs(struct mdt_thread_info *info,
 
        if (mfirstdir != mseconddir) {
                rc = mdt_parent_lock(info, mseconddir, lh_seconddirp,
-                                    secondname, LCK_PW, cos_incompat);
+                                    secondname, LCK_PW);
        } else if (!mdt_object_remote(mseconddir)) {
                if (lh_firstdirp->mlh_pdo_hash !=
                    lh_seconddirp->mlh_pdo_hash) {
                        rc = mdt_object_pdo_lock(info, mseconddir,
                                                 lh_seconddirp, secondname,
-                                                LCK_PW, false, cos_incompat);
+                                                LCK_PW, false);
                        CFS_FAIL_TIMEOUT(OBD_FAIL_MDS_PDO_LOCK2, 10);
                }
        }
@@ -2644,7 +2555,6 @@ static int mdt_reint_rename(struct mdt_thread_info *info,
        struct lu_fid *new_fid = &info->mti_tmp_fid2;
        struct lu_ucred *uc = mdt_ucred(info);
        bool reverse = false, discard = false;
-       bool cos_incompat;
        ktime_t kstart = ktime_get();
        enum mdt_stat_idx msi = 0;
        int rc;
@@ -2745,23 +2655,13 @@ static int mdt_reint_rename(struct mdt_thread_info *info,
                GOTO(out_unlock_rename, rc);
        reverse = rc;
 
-       /* source needs to be looked up after locking source parent, otherwise
-        * this rename may race with unlink source, and cause rename hang, see
-        * sanityn.sh 55b, so check parents first, if later we found source is
-        * remote, relock parents.
-        */
-       cos_incompat = (mdt_object_remote(msrcdir) ||
-                       mdt_object_remote(mtgtdir));
-
        CFS_FAIL_TIMEOUT(OBD_FAIL_MDS_RENAME4, 5);
+       CFS_RACE(OBD_FAIL_MDS_REINT_OPEN);
+       CFS_RACE(OBD_FAIL_MDS_REINT_OPEN2);
 
        /* lock parents in the proper order. */
        lh_srcdirp = &info->mti_lh[MDT_LH_PARENT];
        lh_tgtdirp = &info->mti_lh[MDT_LH_CHILD];
-
-       CFS_RACE(OBD_FAIL_MDS_REINT_OPEN);
-       CFS_RACE(OBD_FAIL_MDS_REINT_OPEN2);
-relock:
        mdt_lock_pdo_init(lh_srcdirp, LCK_PW, &rr->rr_name);
        mdt_lock_pdo_init(lh_tgtdirp, LCK_PW, &rr->rr_tgt_name);
 
@@ -2777,11 +2677,10 @@ relock:
        if (reverse)
                rc = mdt_lock_two_dirs(info, mtgtdir, lh_tgtdirp,
                                       &rr->rr_tgt_name, msrcdir, lh_srcdirp,
-                                      &rr->rr_name, cos_incompat);
+                                      &rr->rr_name);
        else
                rc = mdt_lock_two_dirs(info, msrcdir, lh_srcdirp, &rr->rr_name,
-                                      mtgtdir, lh_tgtdirp, &rr->rr_tgt_name,
-                                      cos_incompat);
+                                      mtgtdir, lh_tgtdirp, &rr->rr_tgt_name);
 
        if (rc != 0)
                GOTO(out_unlock_rename, rc);
@@ -2832,14 +2731,6 @@ relock:
        /* save version after locking */
        mdt_version_get_save(info, mold, 2);
 
-       if (!cos_incompat && mdt_object_remote(mold)) {
-               cos_incompat = true;
-               mdt_object_put(info->mti_env, mold);
-               mdt_object_unlock(info, mtgtdir, lh_tgtdirp, -EAGAIN);
-               mdt_object_unlock(info, msrcdir, lh_srcdirp, -EAGAIN);
-               goto relock;
-       }
-
        /* find mnew object:
         * mnew target object may not exist now
         * lookup with version checking
@@ -2895,7 +2786,7 @@ relock:
                rc = mdt_rename_source_lock(info, msrcdir, mold, lh_oldp,
                                            lh_lookup,
                                            MDS_INODELOCK_LOOKUP |
-                                           MDS_INODELOCK_XATTR, cos_incompat);
+                                           MDS_INODELOCK_XATTR);
                if (rc < 0)
                        GOTO(out_put_new, rc);
 
@@ -2921,8 +2812,7 @@ relock:
                lh_newp = &info->mti_lh[MDT_LH_NEW];
                rc = mdt_object_check_lock(info, mtgtdir, mnew, lh_newp,
                                           MDS_INODELOCK_LOOKUP |
-                                          MDS_INODELOCK_UPDATE, LCK_EX,
-                                          cos_incompat);
+                                          MDS_INODELOCK_UPDATE, LCK_EX);
                if (rc != 0)
                        GOTO(out_unlock_new, rc);
 
@@ -2936,7 +2826,7 @@ relock:
                rc = mdt_rename_source_lock(info, msrcdir, mold, lh_oldp,
                                            lh_lookup,
                                            MDS_INODELOCK_LOOKUP |
-                                           MDS_INODELOCK_XATTR, cos_incompat);
+                                           MDS_INODELOCK_XATTR);
                if (rc != 0)
                        GOTO(out_put_old, rc);
 
@@ -2974,7 +2864,8 @@ relock:
        EXIT;
 out_unlock_new:
        if (mnew != NULL)
-               mdt_object_unlock(info, mnew, lh_newp, rc);
+               /* mnew is gone, no need to keep lock */
+               mdt_object_unlock(info, mnew, lh_newp, 1);
 out_unlock_old:
        mdt_object_unlock(info, NULL, lh_lookup, rc);
        mdt_object_unlock(info, mold, lh_oldp, rc);
index bd843f7..e09645e 100644 (file)
@@ -389,13 +389,13 @@ static int mdt_auto_split(struct mdt_thread_info *info)
                GOTO(restriping_clear, rc);
 
        lhp = &info->mti_lh[MDT_LH_PARENT];
-       rc = mdt_parent_lock(info, parent, lhp, lname, LCK_PW, true);
+       rc = mdt_parent_lock(info, parent, lhp, lname, LCK_PW);
        if (rc)
                GOTO(restriping_clear, rc);
 
        lhc = &info->mti_lh[MDT_LH_CHILD];
        rc = mdt_object_stripes_lock(info, parent, child, lhc, einfo,
-                                    MDS_INODELOCK_FULL, LCK_EX, true);
+                                    MDS_INODELOCK_FULL, LCK_EX);
        if (rc)
                GOTO(unlock_parent, rc);
 
@@ -458,8 +458,7 @@ static int mdt_restripe_migrate_finish(struct mdt_thread_info *info,
        buf.lb_len = sizeof(*lmv);
 
        lh = &info->mti_lh[MDT_LH_PARENT];
-       rc = mdt_object_lock(info, stripe, lh, MDS_INODELOCK_XATTR, LCK_EX,
-                            false);
+       rc = mdt_object_lock(info, stripe, lh, MDS_INODELOCK_XATTR, LCK_EX);
        if (!rc)
                rc = mo_xattr_set(info->mti_env, mdt_object_child(stripe), &buf,
                                  XATTR_NAME_LMV, LU_XATTR_REPLACE);
index 5f13185..66b9eea 100644 (file)
@@ -376,7 +376,7 @@ int mdt_dir_layout_update(struct mdt_thread_info *info)
        lhp = &info->mti_lh[MDT_LH_PARENT];
        if (le32_to_cpu(lmu->lum_stripe_count) < 2) {
                rc = mdt_object_lock(info, pobj, lhp, MDS_INODELOCK_UPDATE,
-                                    LCK_PW, true);
+                                    LCK_PW);
                if (rc)
                        GOTO(put_pobj, rc);
        }
@@ -384,7 +384,7 @@ int mdt_dir_layout_update(struct mdt_thread_info *info)
        /* lock object */
        lhc = &info->mti_lh[MDT_LH_CHILD];
        rc = mdt_object_stripes_lock(info, pobj, obj, lhc, einfo,
-                                    MDS_INODELOCK_FULL, LCK_EX, true);
+                                    MDS_INODELOCK_FULL, LCK_EX);
        if (rc)
                GOTO(unlock_pobj, rc);
 
index 107a386..453d797 100644 (file)
@@ -979,17 +979,14 @@ static int osp_md_object_lock(const struct lu_env *env,
        res_id = einfo->ei_res_id;
        LASSERT(res_id != NULL);
 
-       if (einfo->ei_mode & (LCK_EX | LCK_PW))
-               flags |= LDLM_FL_COS_INCOMPAT;
-
        req = ldlm_enqueue_pack(osp->opd_exp, 0);
        if (IS_ERR(req))
                RETURN(PTR_ERR(req));
 
        osp_set_req_replay(osp, req);
        rc = ldlm_cli_enqueue(osp->opd_exp, &req, einfo, res_id,
-                             (const union ldlm_policy_data *)policy,
-                             &flags, NULL, 0, LVB_T_NONE, lh, 0);
+                             (const union ldlm_policy_data *)policy, &flags,
+                             NULL, 0, LVB_T_NONE, lh, 0);
 
        ptlrpc_req_finished(req);
 
index c46ad30..717a511 100644 (file)
@@ -2671,7 +2671,7 @@ void lustre_swab_ldlm_policy_data(union ldlm_wire_policy_data *d)
        __swab64s(&d->l_extent.start);
        __swab64s(&d->l_extent.end);
        __swab64s(&d->l_extent.gid);
-       __swab64s(&d->l_flock.lfw_owner);
+       __swab32s(&d->l_flock.lfw_padding);
        __swab32s(&d->l_flock.lfw_pid);
 }
 
index 3f30014..6c88753 100644 (file)
@@ -2455,14 +2455,14 @@ static int ptlrpc_handle_rs(struct ptlrpc_reply_state *rs)
                                         * ptlrpc_hr_select, so REP-ACK hr may
                                         * race with trans commit, while the
                                         * latter will release locks, get locks
-                                        * here early to convert to COS mode
+                                        * here early to downgrade to TXN mode
                                         * safely.
                                         */
                                        lock = ldlm_handle2lock(
                                                        &rs->rs_locks[nlocks]);
                                        LASSERT(lock);
                                        ack_locks[nlocks] = lock;
-                                       rs->rs_modes[nlocks] = LCK_COS;
+                                       rs->rs_modes[nlocks] = LCK_TXN;
                                }
                                nlocks = rs->rs_nlocks;
                                rs->rs_convert_lock = 0;
@@ -2475,7 +2475,7 @@ static int ptlrpc_handle_rs(struct ptlrpc_reply_state *rs)
 
                                while (nlocks-- > 0) {
                                        lock = ack_locks[nlocks];
-                                       ldlm_lock_mode_downgrade(lock, LCK_COS);
+                                       ldlm_lock_mode_downgrade(lock, LCK_TXN);
                                        LDLM_LOCK_PUT(lock);
                                }
                                RETURN(0);
index 2b7ba62..951be38 100644 (file)
@@ -332,9 +332,11 @@ void lustre_assert_wire_constants(void)
                 (long long)LCK_GROUP);
        LASSERTF(LCK_COS == 128, "found %lld\n",
                 (long long)LCK_COS);
-       LASSERTF(LCK_MAXMODE == 129, "found %lld\n",
+       LASSERTF(LCK_TXN == 256, "found %lld\n",
+                (long long)LCK_TXN);
+       LASSERTF(LCK_MAXMODE == 257, "found %lld\n",
                 (long long)LCK_MAXMODE);
-       LASSERTF(LCK_MODE_NUM == 8, "found %lld\n",
+       LASSERTF(LCK_MODE_NUM == 9, "found %lld\n",
                 (long long)LCK_MODE_NUM);
        BUILD_BUG_ON(LDLM_PLAIN != 10);
        BUILD_BUG_ON(LDLM_EXTENT != 11);
@@ -3634,7 +3636,7 @@ void lustre_assert_wire_constants(void)
                 (long long)(int)sizeof(((struct ldlm_extent *)0)->gid));
 
        /* Checks for struct ldlm_inodebits */
-       LASSERTF((int)sizeof(struct ldlm_inodebits) == 24, "found %lld\n",
+       LASSERTF((int)sizeof(struct ldlm_inodebits) == 32, "found %lld\n",
                 (long long)(int)sizeof(struct ldlm_inodebits));
        LASSERTF((int)offsetof(struct ldlm_inodebits, bits) == 0, "found %lld\n",
                 (long long)(int)offsetof(struct ldlm_inodebits, bits));
@@ -3655,6 +3657,14 @@ void lustre_assert_wire_constants(void)
                 (long long)(int)offsetof(struct ldlm_inodebits, li_gid));
        LASSERTF((int)sizeof(((struct ldlm_inodebits *)0)->li_gid) == 8, "found %lld\n",
                 (long long)(int)sizeof(((struct ldlm_inodebits *)0)->li_gid));
+       LASSERTF((int)offsetof(struct ldlm_inodebits, li_padding) == 24, "found %lld\n",
+                (long long)(int)offsetof(struct ldlm_inodebits, li_padding));
+       LASSERTF((int)sizeof(((struct ldlm_inodebits *)0)->li_padding) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct ldlm_inodebits *)0)->li_padding));
+       LASSERTF((int)offsetof(struct ldlm_inodebits, li_initiator_id) == 28, "found %lld\n",
+                (long long)(int)offsetof(struct ldlm_inodebits, li_initiator_id));
+       LASSERTF((int)sizeof(((struct ldlm_inodebits *)0)->li_initiator_id) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct ldlm_inodebits *)0)->li_initiator_id));
 
        /* Checks for struct ldlm_flock_wire */
        LASSERTF((int)sizeof(struct ldlm_flock_wire) == 32, "found %lld\n",
index 0dc849f..135b241 100755 (executable)
@@ -1215,45 +1215,86 @@ test_33b() {
 }
 run_test 33b "COS: cross create/delete, 2 clients, benchmark under remote dir"
 
+# arg1 is description, arg2 is operations before Sync-on-Lock-Cancel, arg3 is
+# the operation that triggers SoLC
+op_trigger_solc() {
+       local sync_count
+       local total=0
+       local nodes=$(comma_list $(mdts_nodes))
+
+       sync_all_data
+
+       # trigger CoS twice in case transaction commit before unlock
+       for i in 1 2; do
+               bash -c "$2"
+               do_nodes $nodes "$LCTL set_param -n mdt.*.sync_count=0"
+               bash -c "$3"
+               sync_count=$(do_nodes $nodes \
+                       "lctl get_param -n mdt.*MDT*.sync_count" | calc_sum)
+               total=$((total + sync_count));
+               rm -rf $DIR/$tdir/*
+               sync_all_data
+       done
+
+       echo $1
+       echo "  $2"
+       echo "  $3"
+       echo "  SoLC count $total"
+       (( total > 0 )) || error "$3 didn't trigger SoLC"
+}
+
+test_33_run() {
+       echo $1
+       echo "  $2"
+       eval $2
+#      bash -c "$2"
+}
+
 test_33c() {
-       [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs"
-       [ "$MDS1_VERSION" -lt $(version_code 2.7.63) ] &&
+       (( MDSCOUNT >= 2 )) || skip "needs >= 2 MDTs"
+       (( MDS1_VERSION >= $(version_code 2.7.63) )) ||
                skip "DNE CoS not supported"
 
        # LU-13522
        stop mds1
        start mds1 $(mdsdevname 1) $MDS_MOUNT_OPTS || error "start mds1 failed"
 
-       local sync_count
-
        mkdir_on_mdt0 $DIR/$tdir
        sync_all_data
-       do_facet mds1 "lctl set_param -n mdt.*.sync_count=0"
-       # do twice in case transaction is committed before unlock, see LU-8200
-       for i in 1 2; do
-               # remote dir is created on MDT1, which enqueued lock of $tdir on
-               # MDT0
-               $LFS mkdir -i 1 $DIR/$tdir/remote.$i
-               mkdir $DIR/$tdir/local.$i
-       done
-       sync_count=$(do_facet mds1 "lctl get_param -n mdt.*MDT0000.sync_count")
-       echo "sync_count $sync_count"
-       [ $sync_count -eq 0 ] && error "Sync-Lock-Cancel not triggered"
 
+       op_trigger_solc "create remote dir and local dir" \
+               "$LFS mkdir -i 1 $DIR/$tdir/remote" \
+               "$LFS mkdir -i 0 $DIR/$tdir/local"
+       (( MDSCOUNT > 2 )) &&
+       op_trigger_solc "create remote dirs on different MDTs" \
+               "$LFS mkdir -i 1 $DIR/$tdir/remote.1" \
+               "$LFS mkdir -i 2 $DIR/$tdir/remote.2"
+       op_trigger_solc "create file on 2nd stripe under striped directory" \
+               "$LFS mkdir -i 0 -c 2 $DIR/$tdir/striped" \
+               "touch $DIR2/$tdir/striped/subfile"
+
+       echo
+       echo "Below operations shouldn't trigger Solc:"
+       $LFS mkdir -i 0 -c 2 $DIR/$tdir/striped
        sync_all_data
        do_facet mds1 "lctl set_param -n mdt.*.sync_count=0"
-       $LFS mkdir -i 1 $DIR/$tdir/remote.3
-       # during sleep remote mkdir should have been committed and canceled
-       # remote lock spontaneously, which shouldn't trigger sync
-       sleep 6
-       mkdir $DIR/$tdir/local.3
+       if (( MDS1_VERSION >= $(version_code 2.15.55.133) )); then
+               test_33_run "create file on 2nd stripe after setattr" \
+                       "chmod 777 $DIR/$tdir/striped; \
+                        touch $DIR2/$tdir/striped/subfile"
+       fi
+       test_33_run "create local dir after remote dir creation transaction commit" \
+               "$LFS mkdir -i 1 $DIR/$tdir/remote.3; \
+                do_facet mds2 $LCTL set_param -n osd*.*MDT0001.force_sync 1;
+                mkdir $DIR/$tdir/local.3"
        sync_count=$(do_facet mds1 "lctl get_param -n mdt.*MDT0000.sync_count")
-       echo "sync_count $sync_count"
+       echo "Solc count $sync_count"
        [ $sync_count -eq 0 ] || error "Sync-Lock-Cancel triggered"
 }
-run_test 33c "Cancel cross-MDT lock should trigger Sync-Lock-Cancel"
+run_test 33c "Cancel cross-MDT lock should trigger Sync-on-Lock-Cancel"
 
-# arg1 is operations done before CoS, arg2 is the operation that triggers CoS
+# arg1 is description, arg2 is operations done before CoS, arg3 is the operation
+# that triggers CoS
 op_trigger_cos() {
        local commit_nr
        local total=0
@@ -1263,9 +1304,9 @@ op_trigger_cos() {
 
        # trigger CoS twice in case transaction commit before unlock
        for i in 1 2; do
-               bash -c "$1"
-               do_nodes $nodes "lctl set_param -n mdt.*.async_commit_count=0"
                bash -c "$2"
+               do_nodes $nodes "lctl set_param -n mdt.*.async_commit_count=0"
+               bash -c "$3"
                commit_nr=$(do_nodes $nodes \
                        "lctl get_param -n mdt.*.async_commit_count" | calc_sum)
                total=$((total + commit_nr));
@@ -1273,74 +1314,101 @@ op_trigger_cos() {
                sync_all_data
        done
 
-       echo "CoS count $total"
-       [ $total -gt 0 ] || error "$2 didn't trigger CoS"
+       echo $1
+       echo "  $2"
+       echo "  $3"
+       echo "  CoS count $total"
+       (( total > 0 )) || error "$3 didn't trigger CoS"
 }
 
 test_33d() {
-       [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs"
-       [ "$MDS1_VERSION" -lt $(version_code 2.7.63) ] &&
+       (( MDSCOUNT > 1 )) || skip "needs >= 2 MDTs"
+       (( MDS1_VERSION >= $(version_code 2.7.63) )) ||
                skip "DNE CoS not supported"
 
-       # remote directory create
-       op_trigger_cos "$LFS mkdir -i 0 $DIR/$tdir" "$LFS mkdir -i 1 $DIR/$tdir/subdir"
-       # remote directory unlink
-       op_trigger_cos "$LFS mkdir -i 1 $DIR/$tdir" "rmdir $DIR/$tdir"
-       # striped directory create
-       op_trigger_cos "mkdir $DIR/$tdir" "$LFS mkdir -c 2 $DIR/$tdir/subdir"
-       # striped directory setattr
-       op_trigger_cos "$LFS mkdir -c 2 $DIR/$tdir; touch $DIR/$tdir" \
-               "chmod 713 $DIR/$tdir"
-       # striped directory unlink
-       op_trigger_cos "$LFS mkdir -c 2 $DIR/$tdir; touch $DIR/$tdir" \
-               "rmdir $DIR/$tdir"
-       # cross-MDT link
-       op_trigger_cos "$LFS mkdir -c 2 $DIR/$tdir; \
+       if (( $MDS1_VERSION < $(version_code 2.15.55.133) )); then
+               op_trigger_cos "remote directory unlink" \
+                       "$LFS mkdir -i 1 $DIR/$tdir" "rmdir $DIR2/$tdir"
+               op_trigger_cos "striped directory create" "mkdir $DIR/$tdir" \
+                       "$LFS mkdir -c 2 $DIR2/$tdir/subdir"
+               op_trigger_cos "striped directory setattr" \
+                       "$LFS mkdir -c 2 $DIR/$tdir" "chmod 713 $DIR2/$tdir"
+               op_trigger_cos "striped directory unlink" \
+                       "$LFS mkdir -c 2 $DIR/$tdir" "rmdir $DIR2/$tdir"
+               op_trigger_cos "cross-MDT link" \
+                       "mkdir $DIR/$tdir; \
                        $LFS mkdir -i 0 $DIR/$tdir/d1; \
                        $LFS mkdir -i 1 $DIR/$tdir/d2; \
                        touch $DIR/$tdir/d1/tgt" \
-               "ln $DIR/$tdir/d1/tgt $DIR/$tdir/d2/src"
-       # cross-MDT rename
-       op_trigger_cos "$LFS mkdir -c 2 $DIR/$tdir; \
-                       $LFS mkdir -i 0 $DIR/$tdir/d1; \
-                       $LFS mkdir -i 1 $DIR/$tdir/d2; \
-                       touch $DIR/$tdir/d1/src" \
-               "mv $DIR/$tdir/d1/src $DIR/$tdir/d2/tgt"
-       # migrate
-       op_trigger_cos "$LFS mkdir -i 0 $DIR/$tdir" \
-               "$LFS migrate -m 1 $DIR/$tdir"
+                       "ln $DIR2/$tdir/d1/tgt $DIR2/$tdir/d2/src"
+       fi
+       
+       op_trigger_cos "remote directory create" "$LFS mkdir -i 0 $DIR/$tdir" \
+               "$LFS mkdir -i 1 $DIR2/$tdir/subdir"
+       op_trigger_cos "cross-MDT rename" \
+               "mkdir $DIR/$tdir; \
+               $LFS mkdir -i 0 $DIR/$tdir/d1; \
+               $LFS mkdir -i 1 $DIR/$tdir/d2; \
+               touch $DIR/$tdir/d1/src" \
+               "mv $DIR2/$tdir/d1/src $DIR2/$tdir/d2/tgt"
+       op_trigger_cos "migrate" \
+               "$LFS mkdir -i 0 $DIR/$tdir" \
+               "$LFS migrate -m 1 $DIR2/$tdir"
 
        return 0
 }
-run_test 33d "DNE distributed operation should trigger COS"
+run_test 33d "dependent transactions should trigger COS"
 
 test_33e() {
-       [ $CLIENTCOUNT -ge 2 ] ||
-               skip "Need two or more clients, have $CLIENTCOUNT"
-       [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs"
-       [ "$MDS1_VERSION" -lt $(version_code 2.7.63) ] &&
+       (( MDSCOUNT > 1 )) || skip "needs >= 2 MDTs"
+       (( MDS1_VERSION >= $(version_code 2.7.63) )) ||
                skip "DNE CoS not supported"
 
-       local client2=${CLIENT2:-$(hostname)}
-
-       sync
+       $LFS mkdir -i 0 $DIR/$tdir
+       $LFS mkdir -i 0 $DIR/$tdir/d1
+       $LFS mkdir -i 1 $DIR/$tdir/d2
 
        local nodes=$(comma_list $(mdts_nodes))
        do_nodes $nodes "lctl set_param -n mdt.*.async_commit_count=0"
 
-       $LFS mkdir -c 2 $DIR/$tdir
-       mkdir $DIR/$tdir/subdir
-       echo abc > $DIR/$tdir/$tfile
-       do_node $client2 echo dfg >> $DIR/$tdir/$tfile
-       do_node $client2 touch $DIR/$tdir/subdir
+       test_33_run "plain dir creation" "mkdir $DIR2/$tdir/plain"
+       test_33_run "open file and write" "echo abc > $DIR2/$tdir/$tfile"
+       test_33_run "append write" "echo dfg >> $DIR2/$tdir/$tfile"
+       test_33_run "setattr" "touch $DIR2/$tdir/$tfile"
+       test_33_run "file unlink" "rm $DIR2/$tdir/$tfile"
+       test_33_run "plain dir unlink" "rmdir $DIR2/$tdir/plain"
+       if (( MDS1_VERSION >= $(version_code 2.15.55.133) )); then
+               test_33_run "striped directory creation" \
+                       "$LFS mkdir -i 0 -c 2 $DIR2/$tdir/striped"
+               test_33_run "set default LMV to create striped subdir" \
+                       "$LFS setdirstripe -D -c 2 $DIR/$tdir"
+               test_33_run "striped subdir creation" \
+                       "createmany -d $DIR/$tdir/subdir 100"
+               test_33_run "sub file creation and write" \
+                       "createmany -o $DIR/$tdir/subfile 100; \
+                       echo abc > $DIR/$tdir/subfile1"
+               test_33_run "sub file append write" \
+                       "echo dfg >> $DIR2/$tdir/subfile2"
+               test_33_run "subdir setatttr" "touch $DIR2/$tdir/subdir1"
+               test_33_run "subdir unlink" \
+                       "unlinkmany -d $DIR/$tdir/subdir 100"
+               test_33_run "sub file unlink" \
+                       "unlinkmany $DIR2/$tdir/subfile 100"
+               test_33_run "sub file creation follows striped dir chmod" \
+                       "chmod 777 $DIR/$tdir/striped; \
+                        touch $DIR/$tdir/striped/subfile"
+               test_33_run "striped directory unlink" \
+                       "rm -rf $DIR2/$tdir/striped"
+       fi
+
+       test_33_run "directory unlink" "rm -rf $DIR2/$tdir"
 
        local async_commit_count=$(do_nodes $nodes \
                "lctl get_param -n mdt.*.async_commit_count" | calc_sum)
-       [ $async_commit_count -gt 0 ] && error "CoS triggerred"
-
-       return 0
+       echo "CoS count $async_commit_count"
+       (( async_commit_count == 0 )) || error "CoS triggerred"
 }
-run_test 33e "DNE local operation shouldn't trigger COS"
+run_test 33e "independent transactions shouldn't trigger COS"
 
 # End commit on sharing tests
 
index dcdfd0f..5b8adbf 100644 (file)
@@ -1631,6 +1631,8 @@ check_ldlm_inodebits(void)
        printf("#endif /* HAVE_SERVER_SUPPORT */\n");
 #endif
        CHECK_MEMBER(ldlm_inodebits, li_gid);
+       CHECK_MEMBER(ldlm_inodebits, li_padding);
+       CHECK_MEMBER(ldlm_inodebits, li_initiator_id);
 }
 
 static void
@@ -3377,6 +3379,7 @@ main(int argc, char **argv)
        CHECK_VALUE(LCK_NL);
        CHECK_VALUE(LCK_GROUP);
        CHECK_VALUE(LCK_COS);
+       CHECK_VALUE(LCK_TXN);
        CHECK_VALUE(LCK_MAXMODE);
        CHECK_VALUE(LCK_MODE_NUM);
 
index 0fea530..00ec09f 100644 (file)
@@ -357,9 +357,11 @@ void lustre_assert_wire_constants(void)
                 (long long)LCK_GROUP);
        LASSERTF(LCK_COS == 128, "found %lld\n",
                 (long long)LCK_COS);
-       LASSERTF(LCK_MAXMODE == 129, "found %lld\n",
+       LASSERTF(LCK_TXN == 256, "found %lld\n",
+                (long long)LCK_TXN);
+       LASSERTF(LCK_MAXMODE == 257, "found %lld\n",
                 (long long)LCK_MAXMODE);
-       LASSERTF(LCK_MODE_NUM == 8, "found %lld\n",
+       LASSERTF(LCK_MODE_NUM == 9, "found %lld\n",
                 (long long)LCK_MODE_NUM);
        BUILD_BUG_ON(LDLM_PLAIN != 10);
        BUILD_BUG_ON(LDLM_EXTENT != 11);
@@ -3695,7 +3697,7 @@ void lustre_assert_wire_constants(void)
                 (long long)(int)sizeof(((struct ldlm_extent *)0)->gid));
 
        /* Checks for struct ldlm_inodebits */
-       LASSERTF((int)sizeof(struct ldlm_inodebits) == 24, "found %lld\n",
+       LASSERTF((int)sizeof(struct ldlm_inodebits) == 32, "found %lld\n",
                 (long long)(int)sizeof(struct ldlm_inodebits));
        LASSERTF((int)offsetof(struct ldlm_inodebits, bits) == 0, "found %lld\n",
                 (long long)(int)offsetof(struct ldlm_inodebits, bits));
@@ -3716,6 +3718,14 @@ void lustre_assert_wire_constants(void)
                 (long long)(int)offsetof(struct ldlm_inodebits, li_gid));
        LASSERTF((int)sizeof(((struct ldlm_inodebits *)0)->li_gid) == 8, "found %lld\n",
                 (long long)(int)sizeof(((struct ldlm_inodebits *)0)->li_gid));
+       LASSERTF((int)offsetof(struct ldlm_inodebits, li_padding) == 24, "found %lld\n",
+                (long long)(int)offsetof(struct ldlm_inodebits, li_padding));
+       LASSERTF((int)sizeof(((struct ldlm_inodebits *)0)->li_padding) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct ldlm_inodebits *)0)->li_padding));
+       LASSERTF((int)offsetof(struct ldlm_inodebits, li_initiator_id) == 28, "found %lld\n",
+                (long long)(int)offsetof(struct ldlm_inodebits, li_initiator_id));
+       LASSERTF((int)sizeof(((struct ldlm_inodebits *)0)->li_initiator_id) == 4, "found %lld\n",
+                (long long)(int)sizeof(((struct ldlm_inodebits *)0)->li_initiator_id));
 
        /* Checks for struct ldlm_flock_wire */
        LASSERTF((int)sizeof(struct ldlm_flock_wire) == 32, "found %lld\n",