Whamcloud - gitweb
b=24375 Fix lvb updating race in enqueue vs completion case
[fs/lustre-release.git] / lustre / ldlm / ldlm_request.c
index 0f30f16..5d718c9 100644 (file)
@@ -337,7 +337,7 @@ int ldlm_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
         lock_res_and_lock(lock);
         /* Get this: if ldlm_blocking_ast is racing with intent_policy, such
          * that ldlm_blocking_ast is called just before intent_policy method
-         * takes the ns_lock, then by the time we get the lock, we might not
+         * takes the lr_lock, then by the time we get the lock, we might not
          * be the correct blocking function anymore.  So check, and return
          * early, if so. */
         if (lock->l_blocking_ast != ldlm_blocking_ast) {
@@ -488,6 +488,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
         int is_replay = *flags & LDLM_FL_REPLAY;
         struct ldlm_lock *lock;
         struct ldlm_reply *reply;
+        struct ost_lvb *tmplvb;
         int cleanup_phase = 1;
         ENTRY;
 
@@ -509,12 +510,11 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
                         if (reply == NULL)
                                 rc = -EPROTO;
                         if (lvb_len) {
-                                struct ost_lvb *tmplvb;
 
                                 req_capsule_set_size(&req->rq_pill,
                                                      &RMF_DLM_LVB, RCL_SERVER,
                                                      lvb_len);
-                            tmplvb = req_capsule_server_get(&req->rq_pill,
+                                tmplvb = req_capsule_server_get(&req->rq_pill,
                                                                  &RMF_DLM_LVB);
                                 if (tmplvb == NULL)
                                         GOTO(cleanup, rc = -EPROTO);
@@ -606,16 +606,25 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
 
         /* If the lock has already been granted by a completion AST, don't
          * clobber the LVB with an older one. */
-        if (lvb_len && (lock->l_req_mode != lock->l_granted_mode)) {
-                void *tmplvb;
+        if (lvb_len) {
+                /* We must lock or a racing completion might update lvb
+                   without letting us know and we'll clobber the correct value.
+                   Cannot unlock after the check either, a that still leaves
+                   a tiny window for completion to get in */
+                lock_res_and_lock(lock);
+                if (lock->l_req_mode != lock->l_granted_mode) {
 
-                req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER,
-                                     lvb_len);
-                tmplvb = req_capsule_server_get(&req->rq_pill,
-                                                     &RMF_DLM_LVB);
-                if (tmplvb == NULL)
-                        GOTO(cleanup, rc = -EPROTO);
-                memcpy(lock->l_lvb_data, tmplvb, lvb_len);
+                        req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB,
+                                             RCL_SERVER, lvb_len);
+                        tmplvb = req_capsule_server_get(&req->rq_pill,
+                                                             &RMF_DLM_LVB);
+                        if (tmplvb == NULL) {
+                                unlock_res_and_lock(lock);
+                                GOTO(cleanup, rc = -EPROTO);
+                        }
+                        memcpy(lock->l_lvb_data, tmplvb, lvb_len);
+                }
+                unlock_res_and_lock(lock);
         }
 
         if (!is_replay) {
@@ -1525,7 +1534,7 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns, cfs_list_t *cancels,
         int added = 0, unused, remained;
         ENTRY;
 
-        cfs_spin_lock(&ns->ns_unused_lock);
+        cfs_spin_lock(&ns->ns_lock);
         unused = ns->ns_nr_unused;
         remained = unused;
 
@@ -1567,7 +1576,7 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns, cfs_list_t *cancels,
                         break;
 
                 LDLM_LOCK_GET(lock);
-                cfs_spin_unlock(&ns->ns_unused_lock);
+                cfs_spin_unlock(&ns->ns_lock);
                 lu_ref_add(&lock->l_reference, __FUNCTION__, cfs_current());
 
                 /* Pass the lock through the policy filter and see if it
@@ -1588,14 +1597,14 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns, cfs_list_t *cancels,
                         lu_ref_del(&lock->l_reference,
                                    __FUNCTION__, cfs_current());
                         LDLM_LOCK_RELEASE(lock);
-                        cfs_spin_lock(&ns->ns_unused_lock);
+                        cfs_spin_lock(&ns->ns_lock);
                         break;
                 }
                 if (result == LDLM_POLICY_SKIP_LOCK) {
                         lu_ref_del(&lock->l_reference,
                                    __FUNCTION__, cfs_current());
                         LDLM_LOCK_RELEASE(lock);
-                        cfs_spin_lock(&ns->ns_unused_lock);
+                        cfs_spin_lock(&ns->ns_lock);
                         continue;
                 }
 
@@ -1612,7 +1621,7 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns, cfs_list_t *cancels,
                         lu_ref_del(&lock->l_reference,
                                    __FUNCTION__, cfs_current());
                         LDLM_LOCK_RELEASE(lock);
-                        cfs_spin_lock(&ns->ns_unused_lock);
+                        cfs_spin_lock(&ns->ns_lock);
                         continue;
                 }
                 LASSERT(!lock->l_readers && !lock->l_writers);
@@ -1634,7 +1643,7 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns, cfs_list_t *cancels,
 
                 /* We can't re-add to l_lru as it confuses the
                  * refcounting in ldlm_lock_remove_from_lru() if an AST
-                 * arrives after we drop ns_lock below. We use l_bl_ast
+                 * arrives after we drop lr_lock below. We use l_bl_ast
                  * and can't use l_pending_chain as it is used both on
                  * server and client nevertheless bug 5666 says it is
                  * used only on server */
@@ -1642,11 +1651,11 @@ static int ldlm_prepare_lru_list(struct ldlm_namespace *ns, cfs_list_t *cancels,
                 cfs_list_add(&lock->l_bl_ast, cancels);
                 unlock_res_and_lock(lock);
                 lu_ref_del(&lock->l_reference, __FUNCTION__, cfs_current());
-                cfs_spin_lock(&ns->ns_unused_lock);
+                cfs_spin_lock(&ns->ns_lock);
                 added++;
                 unused--;
         }
-        cfs_spin_unlock(&ns->ns_unused_lock);
+        cfs_spin_unlock(&ns->ns_lock);
         RETURN(added);
 }
 
@@ -1824,16 +1833,27 @@ int ldlm_cli_cancel_unused_resource(struct ldlm_namespace *ns,
         RETURN(0);
 }
 
-static inline int have_no_nsresource(struct ldlm_namespace *ns)
-{
-        int no_resource = 0;
-
-        cfs_spin_lock(&ns->ns_hash_lock);
-        if (ns->ns_resources == 0)
-                no_resource = 1;
-        cfs_spin_unlock(&ns->ns_hash_lock);
+struct ldlm_cli_cancel_arg {
+        int     lc_flags;
+        void   *lc_opaque;
+};
 
-        RETURN(no_resource);
+static int ldlm_cli_hash_cancel_unused(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+                                       cfs_hlist_node_t *hnode, void *arg)
+{
+        struct ldlm_resource           *res = cfs_hash_object(hs, hnode);
+        struct ldlm_cli_cancel_arg     *lc = arg;
+        int                             rc;
+
+        rc = ldlm_cli_cancel_unused_resource(ldlm_res_to_ns(res), &res->lr_name,
+                                             NULL, LCK_MINMODE,
+                                             lc->lc_flags, lc->lc_opaque);
+        if (rc != 0) {
+                CERROR("ldlm_cli_cancel_unused ("LPU64"): %d\n",
+                       res->lr_name.name[0], rc);
+        }
+        /* must return 0 for hash iteration */
+        return 0;
 }
 
 /* Cancel all locks on a namespace (or a specific resource, if given)
@@ -1845,48 +1865,25 @@ int ldlm_cli_cancel_unused(struct ldlm_namespace *ns,
                            const struct ldlm_res_id *res_id,
                            ldlm_cancel_flags_t flags, void *opaque)
 {
-        int i;
+        struct ldlm_cli_cancel_arg arg = {
+                .lc_flags       = flags,
+                .lc_opaque      = opaque,
+        };
+
         ENTRY;
 
         if (ns == NULL)
                 RETURN(ELDLM_OK);
 
-        if (res_id)
+        if (res_id != NULL) {
                 RETURN(ldlm_cli_cancel_unused_resource(ns, res_id, NULL,
                                                        LCK_MINMODE, flags,
                                                        opaque));
-
-        cfs_spin_lock(&ns->ns_hash_lock);
-        for (i = 0; i < RES_HASH_SIZE; i++) {
-                cfs_list_t *tmp;
-                tmp = ns->ns_hash[i].next;
-                while (tmp != &(ns->ns_hash[i])) {
-                        struct ldlm_resource *res;
-                        int rc;
-
-                        res = cfs_list_entry(tmp, struct ldlm_resource,
-                                             lr_hash);
-                        ldlm_resource_getref(res);
-                        cfs_spin_unlock(&ns->ns_hash_lock);
-
-                        LDLM_RESOURCE_ADDREF(res);
-                        rc = ldlm_cli_cancel_unused_resource(ns, &res->lr_name,
-                                                             NULL, LCK_MINMODE,
-                                                             flags, opaque);
-
-                        if (rc)
-                                CERROR("ldlm_cli_cancel_unused ("LPU64"): %d\n",
-                                       res->lr_name.name[0], rc);
-
-                        LDLM_RESOURCE_DELREF(res);
-                        cfs_spin_lock(&ns->ns_hash_lock);
-                        tmp = tmp->next;
-                        ldlm_resource_putref_locked(res);
-                }
+        } else {
+                cfs_hash_for_each_nolock(ns->ns_rs_hash,
+                                         ldlm_cli_hash_cancel_unused, &arg);
+                RETURN(ELDLM_OK);
         }
-        cfs_spin_unlock(&ns->ns_hash_lock);
-
-        RETURN(ELDLM_OK);
 }
 
 /* Lock iterators. */
@@ -1940,49 +1937,25 @@ static int ldlm_iter_helper(struct ldlm_lock *lock, void *closure)
         return helper->iter(lock, helper->closure);
 }
 
-static int ldlm_res_iter_helper(struct ldlm_resource *res, void *closure)
+static int ldlm_res_iter_helper(cfs_hash_t *hs, cfs_hash_bd_t *bd,
+                                cfs_hlist_node_t *hnode, void *arg)
+
 {
-        return ldlm_resource_foreach(res, ldlm_iter_helper, closure);
+        struct ldlm_resource *res = cfs_hash_object(hs, hnode);
+
+        return ldlm_resource_foreach(res, ldlm_iter_helper, arg) ==
+               LDLM_ITER_STOP;
 }
 
-int ldlm_namespace_foreach(struct ldlm_namespace *ns, ldlm_iterator_t iter,
-                           void *closure)
+void ldlm_namespace_foreach(struct ldlm_namespace *ns,
+                            ldlm_iterator_t iter, void *closure)
+
 {
         struct iter_helper_data helper = { iter: iter, closure: closure };
-        return ldlm_namespace_foreach_res(ns, ldlm_res_iter_helper, &helper);
-}
 
-int ldlm_namespace_foreach_res(struct ldlm_namespace *ns,
-                               ldlm_res_iterator_t iter, void *closure)
-{
-        int i, rc = LDLM_ITER_CONTINUE;
-        struct ldlm_resource *res;
-        cfs_list_t *tmp;
+        cfs_hash_for_each_nolock(ns->ns_rs_hash,
+                                 ldlm_res_iter_helper, &helper);
 
-        ENTRY;
-        cfs_spin_lock(&ns->ns_hash_lock);
-        for (i = 0; i < RES_HASH_SIZE; i++) {
-                tmp = ns->ns_hash[i].next;
-                while (tmp != &(ns->ns_hash[i])) {
-                        res = cfs_list_entry(tmp, struct ldlm_resource,
-                                             lr_hash);
-                        ldlm_resource_getref(res);
-                        cfs_spin_unlock(&ns->ns_hash_lock);
-                        LDLM_RESOURCE_ADDREF(res);
-
-                        rc = iter(res, closure);
-
-                        LDLM_RESOURCE_DELREF(res);
-                        cfs_spin_lock(&ns->ns_hash_lock);
-                        tmp = tmp->next;
-                        ldlm_resource_putref_locked(res);
-                        if (rc == LDLM_ITER_STOP)
-                                GOTO(out, rc);
-                }
-        }
- out:
-        cfs_spin_unlock(&ns->ns_hash_lock);
-        RETURN(rc);
 }
 
 /* non-blocking function to manipulate a lock whose cb_data is being put away.
@@ -2184,8 +2157,8 @@ static void ldlm_cancel_unused_locks_for_replay(struct ldlm_namespace *ns)
         CFS_LIST_HEAD(cancels);
 
         CDEBUG(D_DLMTRACE, "Dropping as many unused locks as possible before"
-                           "replay for namespace %s (%d)\n", ns->ns_name,
-                           ns->ns_nr_unused);
+                           "replay for namespace %s (%d)\n",
+                           ldlm_ns_name(ns), ns->ns_nr_unused);
 
         /* We don't need to care whether or not LRU resize is enabled
          * because the LDLM_CANCEL_NO_WAIT policy doesn't use the
@@ -2194,7 +2167,7 @@ static void ldlm_cancel_unused_locks_for_replay(struct ldlm_namespace *ns)
                                          LCF_LOCAL, LDLM_CANCEL_NO_WAIT);
 
         CDEBUG(D_DLMTRACE, "Canceled %d unused locks from namespace %s\n",
-                           canceled, ns->ns_name);
+                           canceled, ldlm_ns_name(ns));
 }
 
 int ldlm_replay_locks(struct obd_import *imp)
@@ -2218,7 +2191,7 @@ int ldlm_replay_locks(struct obd_import *imp)
         if (ldlm_cancel_unused_locks_before_replay)
                 ldlm_cancel_unused_locks_for_replay(ns);
 
-        (void)ldlm_namespace_foreach(ns, ldlm_chain_lock_for_replay, &list);
+        ldlm_namespace_foreach(ns, ldlm_chain_lock_for_replay, &list);
 
         cfs_list_for_each_entry_safe(lock, next, &list, l_pending_chain) {
                 cfs_list_del_init(&lock->l_pending_chain);