LU-11568 ldlm: Remove use of SLAB_DESTROY_BY_RCU for ldlm lock slab

[fs/lustre-release.git] / lustre / ldlm / ldlm_lockd.c
diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c

index 6edef77..dd293e2 100644 (file)
--- a/lustre/ldlm/ldlm_lockd.c
+++ b/lustre/ldlm/ldlm_lockd.c
@@ -49,6 +49,11 @@ static int ldlm_num_threads;
  module_param(ldlm_num_threads, int, 0444);
  MODULE_PARM_DESC(ldlm_num_threads, "number of DLM service threads to start");
  
+static unsigned int ldlm_cpu_bind = 1;
+module_param(ldlm_cpu_bind, uint, 0444);
+MODULE_PARM_DESC(ldlm_cpu_bind,
+                "bind DLM service threads to particular CPU partitions");
+
  static char *ldlm_cpts;
  module_param(ldlm_cpts, charp, 0444);
  MODULE_PARM_DESC(ldlm_cpts, "CPU partitions ldlm threads should run on");
@@ -704,14 +709,15 @@ static int ldlm_handle_ast_error(const struct lu_env *env,
  }
  
  static int ldlm_cb_interpret(const struct lu_env *env,
-                             struct ptlrpc_request *req, void *data, int rc)
+                            struct ptlrpc_request *req, void *args, int rc)
  {
-        struct ldlm_cb_async_args *ca   = data;
-        struct ldlm_lock          *lock = ca->ca_lock;
-        struct ldlm_cb_set_arg    *arg  = ca->ca_set_arg;
-        ENTRY;
+       struct ldlm_cb_async_args *ca = args;
+       struct ldlm_lock *lock = ca->ca_lock;
+       struct ldlm_cb_set_arg *arg  = ca->ca_set_arg;
  
-        LASSERT(lock != NULL);
+       ENTRY;
+
+       LASSERT(lock != NULL);
  
         switch (arg->type) {
         case LDLM_GL_CALLBACK:
@@ -724,7 +730,7 @@ static int ldlm_cb_interpret(const struct lu_env *env,
                  *   -ELDLM_NO_LOCK_DATA when inode is cleared. LU-274
                  */
                 if (unlikely(arg->gl_interpret_reply)) {
-                       rc = arg->gl_interpret_reply(env, req, data, rc);
+                       rc = arg->gl_interpret_reply(env, req, args, rc);
                 } else if (rc == -ELDLM_NO_LOCK_DATA) {
                         LDLM_DEBUG(lock, "lost race - client has a lock but no "
                                    "inode");
@@ -998,7 +1004,7 @@ int ldlm_server_completion_ast(struct ldlm_lock *lock, __u64 flags, void *data)
                 if (req->rq_svc_thread)
                         env = req->rq_svc_thread->t_env;
  
-               lvb_len = ldlm_lvbo_fill(env, lock, lvb, lvb_len);
+               lvb_len = ldlm_lvbo_fill(env, lock, lvb, &lvb_len);
                 if (lvb_len < 0) {
                         /* We still need to send the RPC to wake up the blocked
                          * enqueue thread on the client.
@@ -1113,22 +1119,24 @@ int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data)
         ca->ca_set_arg = arg;
         ca->ca_lock = lock;
  
-        /* server namespace, doesn't need lock */
-        req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER,
-                             ldlm_lvbo_size(lock));
-        ptlrpc_request_set_replen(req);
+       /* server namespace, doesn't need lock */
+       req_capsule_set_size(&req->rq_pill, &RMF_DLM_LVB, RCL_SERVER,
+                            ldlm_lvbo_size(lock));
+       ptlrpc_request_set_replen(req);
  
-        req->rq_send_state = LUSTRE_IMP_FULL;
-        /* ptlrpc_request_alloc_pack already set timeout */
-        if (AT_OFF)
-                req->rq_timeout = ldlm_get_rq_timeout();
+       req->rq_send_state = LUSTRE_IMP_FULL;
+       /* ptlrpc_request_alloc_pack already set timeout */
+       if (AT_OFF)
+               req->rq_timeout = ldlm_get_rq_timeout();
  
         req->rq_interpret_reply = ldlm_cb_interpret;
  
-        if (lock->l_export && lock->l_export->exp_nid_stats &&
-            lock->l_export->exp_nid_stats->nid_ldlm_stats)
-                lprocfs_counter_incr(lock->l_export->exp_nid_stats->nid_ldlm_stats,
-                                     LDLM_GL_CALLBACK - LDLM_FIRST_OPC);
+       if (lock->l_export && lock->l_export->exp_nid_stats) {
+               struct nid_stat *nid_stats = lock->l_export->exp_nid_stats;
+
+               lprocfs_counter_incr(nid_stats->nid_ldlm_stats,
+                                    LDLM_GL_CALLBACK - LDLM_FIRST_OPC);
+       }
  
         rc = ldlm_ast_fini(req, arg, lock, 0);
  
@@ -1404,43 +1412,59 @@ existing_lock:
                 LDLM_DEBUG(lock, "server-side enqueue handler, sending reply"
                            "(err=%d, rc=%d)", err, rc);
  
-               if (rc == 0) {
-                       if (req_capsule_has_field(&req->rq_pill, &RMF_DLM_LVB,
-                                                 RCL_SERVER) &&
-                           ldlm_lvbo_size(lock) > 0) {
-                               void *buf;
-                               int buflen;
-
-                               buf = req_capsule_server_get(&req->rq_pill,
-                                                            &RMF_DLM_LVB);
-                               LASSERTF(buf != NULL, "req %p, lock %p\n",
-                                        req, lock);
-                               buflen = req_capsule_get_size(&req->rq_pill,
-                                               &RMF_DLM_LVB, RCL_SERVER);
-                               /* non-replayed lock, delayed lvb init may
-                                * need to be occur now */
-                               if ((buflen > 0) && !(flags & LDLM_FL_REPLAY)) {
-                                       buflen = ldlm_lvbo_fill(env, lock, buf,
-                                                               buflen);
-                                       if (buflen >= 0)
-                                               req_capsule_shrink(
+               if (rc == 0 &&
+                   req_capsule_has_field(&req->rq_pill, &RMF_DLM_LVB,
+                                         RCL_SERVER) &&
+                   ldlm_lvbo_size(lock) > 0) {
+                       void *buf;
+                       int buflen;
+
+retry:
+                       buf = req_capsule_server_get(&req->rq_pill,
+                                                    &RMF_DLM_LVB);
+                       LASSERTF(buf != NULL, "req %p, lock %p\n", req, lock);
+                       buflen = req_capsule_get_size(&req->rq_pill,
+                                       &RMF_DLM_LVB, RCL_SERVER);
+                       /* non-replayed lock, delayed lvb init may
+                        * need to be occur now
+                        */
+                       if ((buflen > 0) && !(flags & LDLM_FL_REPLAY)) {
+                               int rc2;
+
+                               rc2 = ldlm_lvbo_fill(env, lock, buf, &buflen);
+                               if (rc2 >= 0) {
+                                       req_capsule_shrink(&req->rq_pill,
+                                                          &RMF_DLM_LVB,
+                                                          rc2, RCL_SERVER);
+                               } else if (rc2 == -ERANGE) {
+                                       rc2 = req_capsule_server_grow(
                                                         &req->rq_pill,
-                                                       &RMF_DLM_LVB,
-                                                       buflen, RCL_SERVER);
-                                       else
-                                               rc = buflen;
-                               } else if (flags & LDLM_FL_REPLAY) {
-                                       /* no LVB resend upon replay */
-                                       if (buflen > 0)
+                                                       &RMF_DLM_LVB, buflen);
+                                       if (!rc2) {
+                                               goto retry;
+                                       } else {
+                                               /* if we can't grow the buffer,
+                                                * it's ok to return empty lvb
+                                                * to client.
+                                                */
                                                 req_capsule_shrink(
                                                         &req->rq_pill,
-                                                       &RMF_DLM_LVB,
-                                                       0, RCL_SERVER);
-                                       else
-                                               rc = buflen;
+                                                       &RMF_DLM_LVB, 0,
+                                                       RCL_SERVER);
+                                       }
                                 } else {
-                                       rc = buflen;
+                                       rc = rc2;
                                 }
+                       } else if (flags & LDLM_FL_REPLAY) {
+                               /* no LVB resend upon replay */
+                               if (buflen > 0)
+                                       req_capsule_shrink(&req->rq_pill,
+                                                          &RMF_DLM_LVB,
+                                                          0, RCL_SERVER);
+                               else
+                                       rc = buflen;
+                       } else {
+                               rc = buflen;
                         }
                 }
  
@@ -1469,6 +1493,31 @@ existing_lock:
          return rc;
  }
  
+/* Clear the blocking lock, the race is possible between ldlm_handle_convert0()
+ * and ldlm_work_bl_ast_lock(), so this is done under lock with check for NULL.
+ */
+void ldlm_clear_blocking_lock(struct ldlm_lock *lock)
+{
+       if (lock->l_blocking_lock) {
+               LDLM_LOCK_RELEASE(lock->l_blocking_lock);
+               lock->l_blocking_lock = NULL;
+       }
+}
+
+/* A lock can be converted to new ibits or mode and should be considered
+ * as new lock. Clear all states related to a previous blocking AST
+ * processing so new conflicts will cause new blocking ASTs.
+ *
+ * This is used during lock convert below and lock downgrade to COS mode in
+ * ldlm_lock_mode_downgrade().
+ */
+void ldlm_clear_blocking_data(struct ldlm_lock *lock)
+{
+       ldlm_clear_ast_sent(lock);
+       lock->l_bl_ast_run = 0;
+       ldlm_clear_blocking_lock(lock);
+}
+
  /**
   * Main LDLM entry point for server code to process lock conversion requests.
   */
@@ -1522,20 +1571,8 @@ int ldlm_handle_convert0(struct ptlrpc_request *req,
                         ldlm_clear_cbpending(lock);
                         lock->l_policy_data.l_inodebits.cancel_bits = 0;
                         ldlm_inodebits_drop(lock, bits & ~new);
-                       /* if lock is in a bl_ast list, remove it from the list
-                        * here before reprocessing.
-                        */
-                       if (!list_empty(&lock->l_bl_ast)) {
-                               ldlm_discard_bl_lock(lock);
-                       } else {
-                               /* in this case lock was taken from bl_ast list
-                                * already by ldlm_work_bl_ast_lock() and lock
-                                * must clear only some remaining states.
-                                */
-                               ldlm_clear_ast_sent(lock);
-                               lock->l_bl_ast_run = 0;
-                               ldlm_clear_blocking_lock(lock);
-                       }
+
+                       ldlm_clear_blocking_data(lock);
                         unlock_res_and_lock(lock);
  
                         ldlm_reprocess_all(lock->l_resource);
@@ -1785,6 +1822,21 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req,
         }
  
         lock_res_and_lock(lock);
+
+       if (!ldlm_res_eq(&dlm_req->lock_desc.l_resource.lr_name,
+                        &lock->l_resource->lr_name)) {
+               ldlm_resource_unlink_lock(lock);
+               unlock_res_and_lock(lock);
+               rc = ldlm_lock_change_resource(ns, lock,
+                               &dlm_req->lock_desc.l_resource.lr_name);
+               if (rc < 0) {
+                       LDLM_ERROR(lock, "Failed to allocate resource");
+                       GOTO(out, rc);
+               }
+               LDLM_DEBUG(lock, "completion AST, new resource");
+               lock_res_and_lock(lock);
+       }
+
         if (ldlm_is_destroyed(lock) ||
             lock->l_granted_mode == lock->l_req_mode) {
                 /* bug 11300: the lock has already been granted */
@@ -1808,21 +1860,7 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req,
                 LDLM_DEBUG(lock, "completion AST, new policy data");
         }
  
-        ldlm_resource_unlink_lock(lock);
-        if (memcmp(&dlm_req->lock_desc.l_resource.lr_name,
-                   &lock->l_resource->lr_name,
-                   sizeof(lock->l_resource->lr_name)) != 0) {
-                unlock_res_and_lock(lock);
-               rc = ldlm_lock_change_resource(ns, lock,
-                               &dlm_req->lock_desc.l_resource.lr_name);
-               if (rc < 0) {
-                       LDLM_ERROR(lock, "Failed to allocate resource");
-                       GOTO(out, rc);
-               }
-                LDLM_DEBUG(lock, "completion AST, new resource");
-                CERROR("change resource!\n");
-                lock_res_and_lock(lock);
-        }
+       ldlm_resource_unlink_lock(lock);
  
          if (dlm_req->lock_flags & LDLM_FL_AST_SENT) {
                 /* BL_AST locks are not needed in LRU.
@@ -2954,11 +2992,12 @@ static int ldlm_setup(void)
                         .tc_nthrs_base          = LDLM_NTHRS_BASE,
                         .tc_nthrs_max           = LDLM_NTHRS_MAX,
                         .tc_nthrs_user          = ldlm_num_threads,
-                       .tc_cpu_affinity        = 1,
+                       .tc_cpu_bind            = ldlm_cpu_bind,
                         .tc_ctx_tags            = LCT_MD_THREAD | LCT_DT_THREAD,
                 },
                 .psc_cpt                = {
                         .cc_pattern             = ldlm_cpts,
+                       .cc_affinity            = true,
                 },
                 .psc_ops                = {
                         .so_req_handler         = ldlm_callback_handler,
@@ -2995,13 +3034,14 @@ static int ldlm_setup(void)
                         .tc_nthrs_base          = LDLM_NTHRS_BASE,
                         .tc_nthrs_max           = LDLM_NTHRS_MAX,
                         .tc_nthrs_user          = ldlm_num_threads,
-                       .tc_cpu_affinity        = 1,
+                       .tc_cpu_bind            = ldlm_cpu_bind,
                         .tc_ctx_tags            = LCT_MD_THREAD | \
                                                   LCT_DT_THREAD | \
                                                   LCT_CL_THREAD,
                 },
                 .psc_cpt                = {
                         .cc_pattern             = ldlm_cpts,
+                       .cc_affinity            = true,
                 },
                 .psc_ops                = {
                         .so_req_handler         = ldlm_cancel_handler,
@@ -3153,7 +3193,7 @@ int ldlm_init(void)
  
         ldlm_lock_slab = kmem_cache_create("ldlm_locks",
                               sizeof(struct ldlm_lock), 0,
-                             SLAB_HWCACHE_ALIGN | SLAB_DESTROY_BY_RCU, NULL);
+                             SLAB_HWCACHE_ALIGN, NULL);
         if (ldlm_lock_slab == NULL)
                 goto out_resource;