Whamcloud - gitweb
LU-1517 ptlrpc: throw net error to ptlrpc for bulk
[fs/lustre-release.git] / lustre / ptlrpc / ptlrpcd.c
index e4c5e73..861e3cd 100644 (file)
@@ -1,6 +1,4 @@
-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
  * GPL HEADER START
  *
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@@ -28,9 +26,8 @@
 /*
  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
- */
-/*
- * Copyright (c) 2011 Whamcloud, Inc.
+ *
+ * Copyright (c) 2011, 2012, Whamcloud, Inc.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -95,7 +92,7 @@ CFS_MODULE_PARM(ptlrpcd_bind_policy, "i", int, 0644,
 #endif
 static struct ptlrpcd *ptlrpcds;
 
-cfs_semaphore_t ptlrpcd_sem;
+cfs_mutex_t ptlrpcd_mutex;
 static int ptlrpcd_users = 0;
 
 void ptlrpcd_wake(struct ptlrpc_request *req)
@@ -106,6 +103,7 @@ void ptlrpcd_wake(struct ptlrpc_request *req)
 
         cfs_waitq_signal(&rq_set->set_waitq);
 }
+EXPORT_SYMBOL(ptlrpcd_wake);
 
 static struct ptlrpcd_ctl *
 ptlrpcd_select_pc(struct ptlrpc_request *req, pdl_policy_t policy, int index)
@@ -241,6 +239,9 @@ void ptlrpcd_add_req(struct ptlrpc_request *req, pdl_policy_t policy, int idx)
 {
         struct ptlrpcd_ctl *pc;
 
+       if (req->rq_reqmsg)
+               lustre_msg_set_jobid(req->rq_reqmsg, NULL);
+
         cfs_spin_lock(&req->rq_lock);
         if (req->rq_invalid_rqset) {
                 struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(5),
@@ -271,6 +272,7 @@ void ptlrpcd_add_req(struct ptlrpc_request *req, pdl_policy_t policy, int idx)
 
         ptlrpc_set_add_new_req(pc, req);
 }
+EXPORT_SYMBOL(ptlrpcd_add_req);
 
 static inline void ptlrpc_reqset_get(struct ptlrpc_request_set *set)
 {
@@ -281,12 +283,13 @@ static inline void ptlrpc_reqset_get(struct ptlrpc_request_set *set)
  * Check if there is more work to do on ptlrpcd set.
  * Returns 1 if yes.
  */
-static int ptlrpcd_check(const struct lu_env *env, struct ptlrpcd_ctl *pc)
+static int ptlrpcd_check(struct lu_env *env, struct ptlrpcd_ctl *pc)
 {
         cfs_list_t *tmp, *pos;
         struct ptlrpc_request *req;
         struct ptlrpc_request_set *set = pc->pc_set;
         int rc = 0;
+        int rc2;
         ENTRY;
 
         if (cfs_atomic_read(&set->set_new_count)) {
@@ -305,6 +308,25 @@ static int ptlrpcd_check(const struct lu_env *env, struct ptlrpcd_ctl *pc)
                 cfs_spin_unlock(&set->set_new_req_lock);
         }
 
+        /* We should call lu_env_refill() before handling new requests to make
+         * sure that env key the requests depending on really exists.
+         */
+        rc2 = lu_env_refill(env);
+        if (rc2 != 0) {
+                /*
+                 * XXX This is very awkward situation, because
+                 * execution can neither continue (request
+                 * interpreters assume that env is set up), nor repeat
+                 * the loop (as this potentially results in a tight
+                 * loop of -ENOMEM's).
+                 *
+                 * Fortunately, refill only ever does something when
+                 * new modules are loaded, i.e., early during boot up.
+                 */
+                CERROR("Failure to refill session: %d\n", rc2);
+                RETURN(rc);
+        }
+
         if (cfs_atomic_read(&set->set_remaining))
                 rc |= ptlrpc_check_set(env, set);
 
@@ -361,8 +383,8 @@ static int ptlrpcd_check(const struct lu_env *env, struct ptlrpcd_ctl *pc)
                                         if (rc > 0)
                                                 CDEBUG(D_RPCTRACE, "transfer %d"
                                                        " async RPCs [%d->%d]\n",
-                                                        rc, pc->pc_index,
-                                                        partner->pc_index);
+                                                        rc, partner->pc_index,
+                                                        pc->pc_index);
                                 }
                                 ptlrpc_reqset_put(ps);
                         } while (rc == 0 && pc->pc_cursor != first);
@@ -394,7 +416,7 @@ static int ptlrpcd(void *arg)
                 int index = pc->pc_index;
 
                 if (index >= 0 && index < cfs_num_possible_cpus()) {
-                        while (!cfs_cpu_online(index)) {
+                        while (!cpu_online(index)) {
                                 if (++index >= cfs_num_possible_cpus())
                                         index = 0;
                         }
@@ -425,22 +447,6 @@ static int ptlrpcd(void *arg)
                 struct l_wait_info lwi;
                 int timeout;
 
-                rc = lu_env_refill(&env);
-                if (rc != 0) {
-                        /*
-                         * XXX This is very awkward situation, because
-                         * execution can neither continue (request
-                         * interpreters assume that env is set up), nor repeat
-                         * the loop (as this potentially results in a tight
-                         * loop of -ENOMEM's).
-                         *
-                         * Fortunately, refill only ever does something when
-                         * new modules are loaded, i.e., early during boot up.
-                         */
-                        CERROR("Failure to refill session: %d\n", rc);
-                        continue;
-                }
-
                 timeout = ptlrpc_set_next_timeout(set);
                 lwi = LWI_TIMEOUT(cfs_time_seconds(timeout ? timeout : 1),
                                   ptlrpc_expired_set, set);
@@ -471,12 +477,14 @@ static int ptlrpcd(void *arg)
         if (!cfs_list_empty(&set->set_requests))
                 ptlrpc_set_wait(set);
         lu_context_fini(&env.le_ctx);
-        cfs_complete(&pc->pc_finishing);
 
         cfs_clear_bit(LIOD_START, &pc->pc_flags);
         cfs_clear_bit(LIOD_STOP, &pc->pc_flags);
         cfs_clear_bit(LIOD_FORCE, &pc->pc_flags);
         cfs_clear_bit(LIOD_BIND, &pc->pc_flags);
+
+        cfs_complete(&pc->pc_finishing);
+
         return 0;
 }
 
@@ -520,6 +528,11 @@ static int ptlrpcd_bind(int index, int max)
 {
         struct ptlrpcd_ctl *pc;
         int rc = 0;
+#if defined(CONFIG_NUMA) && defined(HAVE_NODE_TO_CPUMASK)
+        struct ptlrpcd_ctl *ppc;
+        int node, i, pidx;
+        cpumask_t mask;
+#endif
         ENTRY;
 
         LASSERT(index <= max - 1);
@@ -537,8 +550,17 @@ static int ptlrpcd_bind(int index, int max)
                 pc->pc_npartners = 1;
                 break;
         case PDB_POLICY_NEIGHBOR:
+#if defined(CONFIG_NUMA) && defined(HAVE_NODE_TO_CPUMASK)
+                node = cpu_to_node(index);
+                mask = node_to_cpumask(node);
+                for (i = max; i < cfs_num_online_cpus(); i++)
+                        cpu_clear(i, mask);
+                pc->pc_npartners = cpus_weight(mask) - 1;
+                cfs_set_bit(LIOD_BIND, &pc->pc_flags);
+#else
                 LASSERT(max >= 3);
                 pc->pc_npartners = 2;
+#endif
                 break;
         default:
                 CERROR("unknown ptlrpcd bind policy %d\n", ptlrpcd_bind_policy);
@@ -552,12 +574,10 @@ static int ptlrpcd_bind(int index, int max)
                         pc->pc_npartners = 0;
                         rc = -ENOMEM;
                 } else {
-                        if (index & 0x1)
-                                cfs_set_bit(LIOD_BIND, &pc->pc_flags);
-
                         switch (ptlrpcd_bind_policy) {
                         case PDB_POLICY_PAIR:
                                 if (index & 0x1) {
+                                        cfs_set_bit(LIOD_BIND, &pc->pc_flags);
                                         pc->pc_partners[0] = &ptlrpcds->
                                                 pd_threads[index - 1];
                                         ptlrpcds->pd_threads[index - 1].
@@ -565,6 +585,25 @@ static int ptlrpcd_bind(int index, int max)
                                 }
                                 break;
                         case PDB_POLICY_NEIGHBOR:
+#if defined(CONFIG_NUMA) && defined(HAVE_NODE_TO_CPUMASK)
+                                /* partners are cores in the same NUMA node.
+                                 * setup partnership only with ptlrpcd threads
+                                 * that are already initialized
+                                 */
+                                for (pidx = 0, i = 0; i < index; i++) {
+                                        if (cpu_isset(i, mask)) {
+                                                ppc = &ptlrpcds->pd_threads[i];
+                                                pc->pc_partners[pidx++] = ppc;
+                                                ppc->pc_partners[ppc->
+                                                          pc_npartners++] = pc;
+                                        }
+                                }
+                                /* adjust number of partners to the number
+                                 * of partnership really setup */
+                                pc->pc_npartners = pidx;
+#else
+                                if (index & 0x1)
+                                        cfs_set_bit(LIOD_BIND, &pc->pc_flags);
                                 if (index > 0) {
                                         pc->pc_partners[0] = &ptlrpcds->
                                                 pd_threads[index - 1];
@@ -577,6 +616,7 @@ static int ptlrpcd_bind(int index, int max)
                                                 pc_partners[0] = pc;
                                         }
                                 }
+#endif
                                 break;
                         }
                 }
@@ -700,6 +740,8 @@ out:
                 if (env != 0)
                         lu_context_fini(&pc->pc_env.le_ctx);
                 cfs_clear_bit(LIOD_BIND, &pc->pc_flags);
+#else
+                SET_BUT_UNUSED(env);
 #endif
                 cfs_clear_bit(LIOD_START, &pc->pc_flags);
         }
@@ -834,18 +876,20 @@ int ptlrpcd_addref(void)
         int rc = 0;
         ENTRY;
 
-        cfs_mutex_down(&ptlrpcd_sem);
+        cfs_mutex_lock(&ptlrpcd_mutex);
         if (++ptlrpcd_users == 1)
                 rc = ptlrpcd_init();
-        cfs_mutex_up(&ptlrpcd_sem);
+        cfs_mutex_unlock(&ptlrpcd_mutex);
         RETURN(rc);
 }
+EXPORT_SYMBOL(ptlrpcd_addref);
 
 void ptlrpcd_decref(void)
 {
-        cfs_mutex_down(&ptlrpcd_sem);
+        cfs_mutex_lock(&ptlrpcd_mutex);
         if (--ptlrpcd_users == 0)
                 ptlrpcd_fini();
-        cfs_mutex_up(&ptlrpcd_sem);
+        cfs_mutex_unlock(&ptlrpcd_mutex);
 }
+EXPORT_SYMBOL(ptlrpcd_decref);
 /** @} ptlrpcd */