* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2011, 2012, Intel Corporation.
+ * Copyright (c) 2011, 2014, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#define DEBUG_SUBSYSTEM S_RPC
-#ifdef __KERNEL__
-# include <libcfs/libcfs.h>
-#else /* __KERNEL__ */
-# include <liblustre.h>
-# include <ctype.h>
-#endif
-
+#include <libcfs/libcfs.h>
#include <lustre_net.h>
-# include <lustre_lib.h>
-
+#include <lustre_lib.h>
#include <lustre_ha.h>
#include <obd_class.h> /* for obd_zombie */
#include <obd_support.h> /* for OBD_FAIL_CHECK */
struct ptlrpcd_ctl pd_threads[0];
};
-#ifdef __KERNEL__
static int max_ptlrpcds;
CFS_MODULE_PARM(max_ptlrpcds, "i", int, 0644,
"Max ptlrpcd thread count to be started.");
static int ptlrpcd_bind_policy = PDB_POLICY_PAIR;
CFS_MODULE_PARM(ptlrpcd_bind_policy, "i", int, 0644,
"Ptlrpcd threads binding mode.");
-#endif
static struct ptlrpcd *ptlrpcds;
struct mutex ptlrpcd_mutex;
void ptlrpcd_wake(struct ptlrpc_request *req)
{
- struct ptlrpc_request_set *rq_set = req->rq_set;
-
- LASSERT(rq_set != NULL);
+ struct ptlrpc_request_set *set = req->rq_set;
- cfs_waitq_signal(&rq_set->set_waitq);
+ LASSERT(set != NULL);
+ wake_up(&set->set_waitq);
}
EXPORT_SYMBOL(ptlrpcd_wake);
if (req != NULL && req->rq_send_state != LUSTRE_IMP_FULL)
return &ptlrpcds->pd_thread_rcv;
-#ifdef __KERNEL__
- switch (policy) {
- case PDL_POLICY_SAME:
- idx = cfs_smp_processor_id() % ptlrpcds->pd_nthreads;
- break;
+ switch (policy) {
+ case PDL_POLICY_SAME:
+ idx = smp_processor_id() % ptlrpcds->pd_nthreads;
+ break;
case PDL_POLICY_LOCAL:
/* Before CPU partition patches available, process it the same
* as "PDL_POLICY_ROUND". */
* CPU partition patches are available. */
index = -1;
case PDL_POLICY_PREFERRED:
- if (index >= 0 && index < cfs_num_online_cpus()) {
+ if (index >= 0 && index < num_online_cpus()) {
idx = index % ptlrpcds->pd_nthreads;
break;
}
case PDL_POLICY_ROUND:
/* We do not care whether it is strict load balance. */
idx = ptlrpcds->pd_index + 1;
- if (idx == cfs_smp_processor_id())
+ if (idx == smp_processor_id())
idx++;
idx %= ptlrpcds->pd_nthreads;
ptlrpcds->pd_index = idx;
break;
}
-#endif /* __KERNEL__ */
return &ptlrpcds->pd_threads[idx];
}
*/
void ptlrpcd_add_rqset(struct ptlrpc_request_set *set)
{
- cfs_list_t *tmp, *pos;
-#ifdef __KERNEL__
+ struct list_head *tmp, *pos;
struct ptlrpcd_ctl *pc;
struct ptlrpc_request_set *new;
int count, i;
pc = ptlrpcd_select_pc(NULL, PDL_POLICY_LOCAL, -1);
new = pc->pc_set;
-#endif
- cfs_list_for_each_safe(pos, tmp, &set->set_requests) {
- struct ptlrpc_request *req =
- cfs_list_entry(pos, struct ptlrpc_request,
- rq_set_chain);
+ list_for_each_safe(pos, tmp, &set->set_requests) {
+ struct ptlrpc_request *req =
+ list_entry(pos, struct ptlrpc_request,
+ rq_set_chain);
- LASSERT(req->rq_phase == RQ_PHASE_NEW);
-#ifdef __KERNEL__
- req->rq_set = new;
- req->rq_queued_time = cfs_time_current();
-#else
- cfs_list_del_init(&req->rq_set_chain);
- req->rq_set = NULL;
- ptlrpcd_add_req(req, PDL_POLICY_LOCAL, -1);
- cfs_atomic_dec(&set->set_remaining);
-#endif
- }
+ LASSERT(req->rq_phase == RQ_PHASE_NEW);
+ req->rq_set = new;
+ req->rq_queued_time = cfs_time_current();
+ }
-#ifdef __KERNEL__
spin_lock(&new->set_new_req_lock);
- cfs_list_splice_init(&set->set_requests, &new->set_new_requests);
- i = cfs_atomic_read(&set->set_remaining);
- count = cfs_atomic_add_return(i, &new->set_new_count);
- cfs_atomic_set(&set->set_remaining, 0);
+ list_splice_init(&set->set_requests, &new->set_new_requests);
+ i = atomic_read(&set->set_remaining);
+ count = atomic_add_return(i, &new->set_new_count);
+ atomic_set(&set->set_remaining, 0);
spin_unlock(&new->set_new_req_lock);
- if (count == i) {
- cfs_waitq_signal(&new->set_waitq);
-
- /* XXX: It maybe unnecessary to wakeup all the partners. But to
- * guarantee the async RPC can be processed ASAP, we have
- * no other better choice. It maybe fixed in future. */
- for (i = 0; i < pc->pc_npartners; i++)
- cfs_waitq_signal(&pc->pc_partners[i]->pc_set->set_waitq);
- }
-#endif
+ if (count == i) {
+ wake_up(&new->set_waitq);
+
+ /* XXX: It maybe unnecessary to wakeup all the partners. But to
+ * guarantee the async RPC can be processed ASAP, we have
+ * no other better choice. It maybe fixed in future. */
+ for (i = 0; i < pc->pc_npartners; i++)
+ wake_up(&pc->pc_partners[i]->pc_set->set_waitq);
+ }
}
-EXPORT_SYMBOL(ptlrpcd_add_rqset);
-#ifdef __KERNEL__
/**
* Return transferred RPCs count.
*/
static int ptlrpcd_steal_rqset(struct ptlrpc_request_set *des,
struct ptlrpc_request_set *src)
{
- cfs_list_t *tmp, *pos;
- struct ptlrpc_request *req;
- int rc = 0;
+ struct list_head *tmp, *pos;
+ struct ptlrpc_request *req;
+ int rc = 0;
spin_lock(&src->set_new_req_lock);
- if (likely(!cfs_list_empty(&src->set_new_requests))) {
- cfs_list_for_each_safe(pos, tmp, &src->set_new_requests) {
- req = cfs_list_entry(pos, struct ptlrpc_request,
- rq_set_chain);
- req->rq_set = des;
- }
- cfs_list_splice_init(&src->set_new_requests,
- &des->set_requests);
- rc = cfs_atomic_read(&src->set_new_count);
- cfs_atomic_add(rc, &des->set_remaining);
- cfs_atomic_set(&src->set_new_count, 0);
- }
+ if (likely(!list_empty(&src->set_new_requests))) {
+ list_for_each_safe(pos, tmp, &src->set_new_requests) {
+ req = list_entry(pos, struct ptlrpc_request,
+ rq_set_chain);
+ req->rq_set = des;
+ }
+ list_splice_init(&src->set_new_requests,
+ &des->set_requests);
+ rc = atomic_read(&src->set_new_count);
+ atomic_add(rc, &des->set_remaining);
+ atomic_set(&src->set_new_count, 0);
+ }
spin_unlock(&src->set_new_req_lock);
return rc;
}
-#endif
/**
* Requests that are added to the ptlrpcd queue are sent via
*/
void ptlrpcd_add_req(struct ptlrpc_request *req, pdl_policy_t policy, int idx)
{
- struct ptlrpcd_ctl *pc;
+ struct ptlrpcd_ctl *pc;
if (req->rq_reqmsg)
lustre_msg_set_jobid(req->rq_reqmsg, NULL);
spin_lock(&req->rq_lock);
- if (req->rq_invalid_rqset) {
- struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(5),
- back_to_sleep, NULL);
+ if (req->rq_invalid_rqset) {
+ struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(5),
+ back_to_sleep, NULL);
- req->rq_invalid_rqset = 0;
+ req->rq_invalid_rqset = 0;
spin_unlock(&req->rq_lock);
- l_wait_event(req->rq_set_waitq, (req->rq_set == NULL), &lwi);
- } else if (req->rq_set) {
- /* If we have a vaid "rq_set", just reuse it to avoid double
- * linked. */
- LASSERT(req->rq_phase == RQ_PHASE_NEW);
- LASSERT(req->rq_send_state == LUSTRE_IMP_REPLAY);
-
- /* ptlrpc_check_set will decrease the count */
- cfs_atomic_inc(&req->rq_set->set_remaining);
+ l_wait_event(req->rq_set_waitq, (req->rq_set == NULL), &lwi);
+ } else if (req->rq_set) {
+ /* If we have a vaid "rq_set", just reuse it to avoid double
+ * linked. */
+ LASSERT(req->rq_phase == RQ_PHASE_NEW);
+ LASSERT(req->rq_send_state == LUSTRE_IMP_REPLAY);
+
+ /* ptlrpc_check_set will decrease the count */
+ atomic_inc(&req->rq_set->set_remaining);
spin_unlock(&req->rq_lock);
- cfs_waitq_signal(&req->rq_set->set_waitq);
+ wake_up(&req->rq_set->set_waitq);
return;
} else {
spin_unlock(&req->rq_lock);
- }
+ }
- pc = ptlrpcd_select_pc(req, policy, idx);
+ pc = ptlrpcd_select_pc(req, policy, idx);
- DEBUG_REQ(D_INFO, req, "add req [%p] to pc [%s:%d]",
- req, pc->pc_name, pc->pc_index);
+ DEBUG_REQ(D_INFO, req, "add req [%p] to pc [%s:%d]",
+ req, pc->pc_name, pc->pc_index);
- ptlrpc_set_add_new_req(pc, req);
+ ptlrpc_set_add_new_req(pc, req);
}
EXPORT_SYMBOL(ptlrpcd_add_req);
static inline void ptlrpc_reqset_get(struct ptlrpc_request_set *set)
{
- cfs_atomic_inc(&set->set_refcount);
+ atomic_inc(&set->set_refcount);
}
/**
*/
static int ptlrpcd_check(struct lu_env *env, struct ptlrpcd_ctl *pc)
{
- cfs_list_t *tmp, *pos;
+ struct list_head *tmp, *pos;
struct ptlrpc_request *req;
struct ptlrpc_request_set *set = pc->pc_set;
int rc = 0;
int rc2;
ENTRY;
- if (cfs_atomic_read(&set->set_new_count)) {
+ if (atomic_read(&set->set_new_count)) {
spin_lock(&set->set_new_req_lock);
- if (likely(!cfs_list_empty(&set->set_new_requests))) {
- cfs_list_splice_init(&set->set_new_requests,
- &set->set_requests);
- cfs_atomic_add(cfs_atomic_read(&set->set_new_count),
- &set->set_remaining);
- cfs_atomic_set(&set->set_new_count, 0);
- /*
- * Need to calculate its timeout.
- */
- rc = 1;
- }
+ if (likely(!list_empty(&set->set_new_requests))) {
+ list_splice_init(&set->set_new_requests,
+ &set->set_requests);
+ atomic_add(atomic_read(&set->set_new_count),
+ &set->set_remaining);
+ atomic_set(&set->set_new_count, 0);
+ /*
+ * Need to calculate its timeout.
+ */
+ rc = 1;
+ }
spin_unlock(&set->set_new_req_lock);
- }
+ }
- /* We should call lu_env_refill() before handling new requests to make
- * sure that env key the requests depending on really exists.
- */
- rc2 = lu_env_refill(env);
- if (rc2 != 0) {
- /*
- * XXX This is very awkward situation, because
- * execution can neither continue (request
- * interpreters assume that env is set up), nor repeat
- * the loop (as this potentially results in a tight
- * loop of -ENOMEM's).
- *
- * Fortunately, refill only ever does something when
- * new modules are loaded, i.e., early during boot up.
- */
- CERROR("Failure to refill session: %d\n", rc2);
- RETURN(rc);
- }
+ /* We should call lu_env_refill() before handling new requests to make
+ * sure that env key the requests depending on really exists.
+ */
+ rc2 = lu_env_refill(env);
+ if (rc2 != 0) {
+ /*
+ * XXX This is very awkward situation, because
+ * execution can neither continue (request
+ * interpreters assume that env is set up), nor repeat
+ * the loop (as this potentially results in a tight
+ * loop of -ENOMEM's).
+ *
+ * Fortunately, refill only ever does something when
+ * new modules are loaded, i.e., early during boot up.
+ */
+ CERROR("Failure to refill session: %d\n", rc2);
+ RETURN(rc);
+ }
- if (cfs_atomic_read(&set->set_remaining))
- rc |= ptlrpc_check_set(env, set);
+ if (atomic_read(&set->set_remaining))
+ rc |= ptlrpc_check_set(env, set);
- if (!cfs_list_empty(&set->set_requests)) {
- /*
- * XXX: our set never completes, so we prune the completed
- * reqs after each iteration. boy could this be smarter.
- */
- cfs_list_for_each_safe(pos, tmp, &set->set_requests) {
- req = cfs_list_entry(pos, struct ptlrpc_request,
- rq_set_chain);
- if (req->rq_phase != RQ_PHASE_COMPLETE)
- continue;
-
- cfs_list_del_init(&req->rq_set_chain);
- req->rq_set = NULL;
- ptlrpc_req_finished(req);
- }
- }
+ /* NB: ptlrpc_check_set has already moved complted request at the
+ * head of seq::set_requests */
+ list_for_each_safe(pos, tmp, &set->set_requests) {
+ req = list_entry(pos, struct ptlrpc_request, rq_set_chain);
+ if (req->rq_phase != RQ_PHASE_COMPLETE)
+ break;
- if (rc == 0) {
- /*
- * If new requests have been added, make sure to wake up.
- */
- rc = cfs_atomic_read(&set->set_new_count);
+ list_del_init(&req->rq_set_chain);
+ req->rq_set = NULL;
+ ptlrpc_req_finished(req);
+ }
+
+ if (rc == 0) {
+ /*
+ * If new requests have been added, make sure to wake up.
+ */
+ rc = atomic_read(&set->set_new_count);
-#ifdef __KERNEL__
/* If we have nothing to do, check whether we can take some
* work from our partner threads. */
if (rc == 0 && pc->pc_npartners > 0) {
ptlrpc_reqset_get(ps);
spin_unlock(&partner->pc_lock);
- if (cfs_atomic_read(&ps->set_new_count)) {
- rc = ptlrpcd_steal_rqset(set, ps);
- if (rc > 0)
- CDEBUG(D_RPCTRACE, "transfer %d"
- " async RPCs [%d->%d]\n",
- rc, partner->pc_index,
- pc->pc_index);
- }
- ptlrpc_reqset_put(ps);
- } while (rc == 0 && pc->pc_cursor != first);
- }
-#endif
- }
+ if (atomic_read(&ps->set_new_count)) {
+ rc = ptlrpcd_steal_rqset(set, ps);
+ if (rc > 0)
+ CDEBUG(D_RPCTRACE, "transfer %d"
+ " async RPCs [%d->%d]\n",
+ rc, partner->pc_index,
+ pc->pc_index);
+ }
+ ptlrpc_reqset_put(ps);
+ } while (rc == 0 && pc->pc_cursor != first);
+ }
+ }
- RETURN(rc);
+ RETURN(rc);
}
-#ifdef __KERNEL__
/**
* Main ptlrpcd thread.
* ptlrpc's code paths like to execute in process context, so we have this
*/
static int ptlrpcd(void *arg)
{
- struct ptlrpcd_ctl *pc = arg;
- struct ptlrpc_request_set *set = pc->pc_set;
- struct lu_env env = { .le_ses = NULL };
- int rc, exit = 0;
- ENTRY;
-
- cfs_daemonize_ctxt(pc->pc_name);
-#if defined(CONFIG_SMP) && defined(HAVE_NODE_TO_CPUMASK)
+ struct ptlrpcd_ctl *pc = arg;
+ struct ptlrpc_request_set *set = pc->pc_set;
+ struct lu_context ses = { 0 };
+ struct lu_env env = { .le_ses = &ses };
+ int rc, exit = 0;
+ ENTRY;
+
+ unshare_fs_struct();
+#if defined(CONFIG_SMP)
if (test_bit(LIOD_BIND, &pc->pc_flags)) {
- int index = pc->pc_index;
-
- if (index >= 0 && index < cfs_num_possible_cpus()) {
- while (!cpu_online(index)) {
- if (++index >= cfs_num_possible_cpus())
- index = 0;
- }
- cfs_set_cpus_allowed(cfs_current(),
- node_to_cpumask(cpu_to_node(index)));
- }
- }
+ int index = pc->pc_index;
+
+ if (index >= 0 && index < num_possible_cpus()) {
+ while (!cpu_online(index)) {
+ if (++index >= num_possible_cpus())
+ index = 0;
+ }
+ set_cpus_allowed_ptr(current,
+ cpumask_of_node(cpu_to_node(index)));
+ }
+ }
#endif
- /*
- * XXX So far only "client" ptlrpcd uses an environment. In
- * the future, ptlrpcd thread (or a thread-set) has to given
- * an argument, describing its "scope".
- */
- rc = lu_context_init(&env.le_ctx,
- LCT_CL_THREAD|LCT_REMEMBER|LCT_NOREF);
+ /* Both client and server (MDT/OST) may use the environment. */
+ rc = lu_context_init(&env.le_ctx, LCT_MD_THREAD | LCT_DT_THREAD |
+ LCT_CL_THREAD | LCT_REMEMBER |
+ LCT_NOREF);
+ if (rc == 0) {
+ rc = lu_context_init(env.le_ses,
+ LCT_SESSION|LCT_REMEMBER|LCT_NOREF);
+ if (rc != 0)
+ lu_context_fini(&env.le_ctx);
+ }
complete(&pc->pc_starting);
if (rc != 0)
lwi = LWI_TIMEOUT(cfs_time_seconds(timeout ? timeout : 1),
ptlrpc_expired_set, set);
- lu_context_enter(&env.le_ctx);
- l_wait_event(set->set_waitq,
- ptlrpcd_check(&env, pc), &lwi);
- lu_context_exit(&env.le_ctx);
+ lu_context_enter(&env.le_ctx);
+ lu_context_enter(env.le_ses);
+ l_wait_event(set->set_waitq, ptlrpcd_check(&env, pc), &lwi);
+ lu_context_exit(&env.le_ctx);
+ lu_context_exit(env.le_ses);
- /*
- * Abort inflight rpcs for forced stop case.
- */
+ /*
+ * Abort inflight rpcs for forced stop case.
+ */
if (test_bit(LIOD_STOP, &pc->pc_flags)) {
if (test_bit(LIOD_FORCE, &pc->pc_flags))
ptlrpc_abort_set(set);
/*
* Wait for inflight requests to drain.
*/
- if (!cfs_list_empty(&set->set_requests))
+ if (!list_empty(&set->set_requests))
ptlrpc_set_wait(set);
- lu_context_fini(&env.le_ctx);
-
- clear_bit(LIOD_START, &pc->pc_flags);
- clear_bit(LIOD_STOP, &pc->pc_flags);
- clear_bit(LIOD_FORCE, &pc->pc_flags);
- clear_bit(LIOD_BIND, &pc->pc_flags);
+ lu_context_fini(&env.le_ctx);
+ lu_context_fini(env.le_ses);
complete(&pc->pc_finishing);
- return 0;
+ return 0;
}
/* XXX: We want multiple CPU cores to share the async RPC load. So we start many
# endif
static int ptlrpcd_bind(int index, int max)
{
- struct ptlrpcd_ctl *pc;
- int rc = 0;
-#if defined(CONFIG_NUMA) && defined(HAVE_NODE_TO_CPUMASK)
- struct ptlrpcd_ctl *ppc;
- int node, i, pidx;
- cpumask_t mask;
+ struct ptlrpcd_ctl *pc;
+ int rc = 0;
+#if defined(CONFIG_NUMA)
+ cpumask_t mask;
#endif
- ENTRY;
+ ENTRY;
LASSERT(index <= max - 1);
pc = &ptlrpcds->pd_threads[index];
LASSERT(max % 2 == 0);
pc->pc_npartners = 1;
break;
- case PDB_POLICY_NEIGHBOR:
-#if defined(CONFIG_NUMA) && defined(HAVE_NODE_TO_CPUMASK)
- node = cpu_to_node(index);
- mask = node_to_cpumask(node);
- for (i = max; i < cfs_num_online_cpus(); i++)
- cpu_clear(i, mask);
- pc->pc_npartners = cpus_weight(mask) - 1;
+ case PDB_POLICY_NEIGHBOR:
+#if defined(CONFIG_NUMA)
+ {
+ int i;
+ cpumask_copy(&mask, cpumask_of_node(cpu_to_node(index)));
+ for (i = max; i < num_online_cpus(); i++)
+ cpumask_clear_cpu(i, &mask);
+ pc->pc_npartners = cpumask_weight(&mask) - 1;
set_bit(LIOD_BIND, &pc->pc_flags);
+ }
#else
LASSERT(max >= 3);
pc->pc_npartners = 2;
pc_partners[0] = pc;
}
break;
- case PDB_POLICY_NEIGHBOR:
-#if defined(CONFIG_NUMA) && defined(HAVE_NODE_TO_CPUMASK)
- /* partners are cores in the same NUMA node.
- * setup partnership only with ptlrpcd threads
- * that are already initialized
- */
- for (pidx = 0, i = 0; i < index; i++) {
- if (cpu_isset(i, mask)) {
- ppc = &ptlrpcds->pd_threads[i];
- pc->pc_partners[pidx++] = ppc;
- ppc->pc_partners[ppc->
- pc_npartners++] = pc;
- }
- }
+ case PDB_POLICY_NEIGHBOR:
+#if defined(CONFIG_NUMA)
+ {
+ struct ptlrpcd_ctl *ppc;
+ int i, pidx;
+ /* partners are cores in the same NUMA node.
+ * setup partnership only with ptlrpcd threads
+ * that are already initialized
+ */
+ for (pidx = 0, i = 0; i < index; i++) {
+ if (cpumask_test_cpu(i, &mask)) {
+ ppc = &ptlrpcds->pd_threads[i];
+ pc->pc_partners[pidx++] = ppc;
+ ppc->pc_partners[ppc->
+ pc_npartners++] = pc;
+ }
+ }
/* adjust number of partners to the number
* of partnership really setup */
pc->pc_npartners = pidx;
+ }
#else
if (index & 0x1)
set_bit(LIOD_BIND, &pc->pc_flags);
RETURN(rc);
}
-#else /* !__KERNEL__ */
-
-/**
- * In liblustre we do not have separate threads, so this function
- * is called from time to time all across common code to see
- * if something needs to be processed on ptlrpcd set.
- */
-int ptlrpcd_check_async_rpcs(void *arg)
-{
- struct ptlrpcd_ctl *pc = arg;
- int rc = 0;
-
- /*
- * Single threaded!!
- */
- pc->pc_recurred++;
-
- if (pc->pc_recurred == 1) {
- rc = lu_env_refill(&pc->pc_env);
- if (rc == 0) {
- lu_context_enter(&pc->pc_env.le_ctx);
- rc = ptlrpcd_check(&pc->pc_env, pc);
- if (!rc)
- ptlrpc_expired_set(pc->pc_set);
- /*
- * XXX: send replay requests.
- */
- if (test_bit(LIOD_RECOVERY, &pc->pc_flags))
- rc = ptlrpcd_check(&pc->pc_env, pc);
- lu_context_exit(&pc->pc_env.le_ctx);
- }
- }
-
- pc->pc_recurred--;
- return rc;
-}
-
-int ptlrpcd_idle(void *arg)
-{
- struct ptlrpcd_ctl *pc = arg;
-
- return (cfs_atomic_read(&pc->pc_set->set_new_count) == 0 &&
- cfs_atomic_read(&pc->pc_set->set_remaining) == 0);
-}
-
-#endif
int ptlrpcd_start(int index, int max, const char *name, struct ptlrpcd_ctl *pc)
{
int rc;
- int env = 0;
ENTRY;
/*
init_completion(&pc->pc_starting);
init_completion(&pc->pc_finishing);
spin_lock_init(&pc->pc_lock);
- strncpy(pc->pc_name, name, sizeof(pc->pc_name) - 1);
+ strlcpy(pc->pc_name, name, sizeof(pc->pc_name));
pc->pc_set = ptlrpc_prep_set();
if (pc->pc_set == NULL)
GOTO(out, rc = -ENOMEM);
+
/*
* So far only "client" ptlrpcd uses an environment. In the future,
* ptlrpcd thread (or a thread-set) has to be given an argument,
*/
rc = lu_context_init(&pc->pc_env.le_ctx, LCT_CL_THREAD|LCT_REMEMBER);
if (rc != 0)
- GOTO(out, rc);
+ GOTO(out_set, rc);
+
+ {
+ struct task_struct *task;
+ if (index >= 0) {
+ rc = ptlrpcd_bind(index, max);
+ if (rc < 0)
+ GOTO(out_env, rc);
+ }
- env = 1;
-#ifdef __KERNEL__
- if (index >= 0) {
- rc = ptlrpcd_bind(index, max);
- if (rc < 0)
- GOTO(out, rc);
- }
+ task = kthread_run(ptlrpcd, pc, pc->pc_name);
+ if (IS_ERR(task))
+ GOTO(out_env, rc = PTR_ERR(task));
- rc = cfs_create_thread(ptlrpcd, pc, 0);
- if (rc < 0)
- GOTO(out, rc);
+ wait_for_completion(&pc->pc_starting);
+ }
+ RETURN(0);
- rc = 0;
- wait_for_completion(&pc->pc_starting);
-#else
- pc->pc_wait_callback =
- liblustre_register_wait_callback("ptlrpcd_check_async_rpcs",
- &ptlrpcd_check_async_rpcs, pc);
- pc->pc_idle_callback =
- liblustre_register_idle_callback("ptlrpcd_check_idle_rpcs",
- &ptlrpcd_idle, pc);
-#endif
-out:
- if (rc) {
-#ifdef __KERNEL__
- if (pc->pc_set != NULL) {
- struct ptlrpc_request_set *set = pc->pc_set;
-
- spin_lock(&pc->pc_lock);
- pc->pc_set = NULL;
- spin_unlock(&pc->pc_lock);
- ptlrpc_set_destroy(set);
- }
- if (env != 0)
- lu_context_fini(&pc->pc_env.le_ctx);
- clear_bit(LIOD_BIND, &pc->pc_flags);
-#else
- SET_BUT_UNUSED(env);
-#endif
- clear_bit(LIOD_START, &pc->pc_flags);
+out_env:
+ lu_context_fini(&pc->pc_env.le_ctx);
+
+out_set:
+ if (pc->pc_set != NULL) {
+ struct ptlrpc_request_set *set = pc->pc_set;
+
+ spin_lock(&pc->pc_lock);
+ pc->pc_set = NULL;
+ spin_unlock(&pc->pc_lock);
+ ptlrpc_set_destroy(set);
}
+ clear_bit(LIOD_BIND, &pc->pc_flags);
+out:
+ clear_bit(LIOD_START, &pc->pc_flags);
RETURN(rc);
}
void ptlrpcd_stop(struct ptlrpcd_ctl *pc, int force)
{
- struct ptlrpc_request_set *set = pc->pc_set;
- ENTRY;
+ ENTRY;
if (!test_bit(LIOD_START, &pc->pc_flags)) {
- CWARN("Thread for pc %p was not started\n", pc);
- goto out;
- }
+ CWARN("Thread for pc %p was not started\n", pc);
+ goto out;
+ }
set_bit(LIOD_STOP, &pc->pc_flags);
if (force)
set_bit(LIOD_FORCE, &pc->pc_flags);
- cfs_waitq_signal(&pc->pc_set->set_waitq);
-#ifdef __KERNEL__
+ wake_up(&pc->pc_set->set_waitq);
+
+out:
+ EXIT;
+}
+
+void ptlrpcd_free(struct ptlrpcd_ctl *pc)
+{
+ struct ptlrpc_request_set *set = pc->pc_set;
+ ENTRY;
+
+ if (!test_bit(LIOD_START, &pc->pc_flags)) {
+ CWARN("Thread for pc %p was not started\n", pc);
+ goto out;
+ }
+
wait_for_completion(&pc->pc_finishing);
-#else
- liblustre_deregister_wait_callback(pc->pc_wait_callback);
- liblustre_deregister_idle_callback(pc->pc_idle_callback);
-#endif
lu_context_fini(&pc->pc_env.le_ctx);
spin_lock(&pc->pc_lock);
spin_unlock(&pc->pc_lock);
ptlrpc_set_destroy(set);
+ clear_bit(LIOD_START, &pc->pc_flags);
+ clear_bit(LIOD_STOP, &pc->pc_flags);
+ clear_bit(LIOD_FORCE, &pc->pc_flags);
+ clear_bit(LIOD_BIND, &pc->pc_flags);
+
out:
-#ifdef __KERNEL__
if (pc->pc_npartners > 0) {
LASSERT(pc->pc_partners != NULL);
pc->pc_partners = NULL;
}
pc->pc_npartners = 0;
-#endif
EXIT;
}
static void ptlrpcd_fini(void)
{
- int i;
- ENTRY;
-
- if (ptlrpcds != NULL) {
- for (i = 0; i < ptlrpcds->pd_nthreads; i++)
- ptlrpcd_stop(&ptlrpcds->pd_threads[i], 0);
- ptlrpcd_stop(&ptlrpcds->pd_thread_rcv, 0);
- OBD_FREE(ptlrpcds, ptlrpcds->pd_size);
- ptlrpcds = NULL;
- }
+ int i;
+ ENTRY;
+
+ if (ptlrpcds != NULL) {
+ for (i = 0; i < ptlrpcds->pd_nthreads; i++)
+ ptlrpcd_stop(&ptlrpcds->pd_threads[i], 0);
+ for (i = 0; i < ptlrpcds->pd_nthreads; i++)
+ ptlrpcd_free(&ptlrpcds->pd_threads[i]);
+ ptlrpcd_stop(&ptlrpcds->pd_thread_rcv, 0);
+ ptlrpcd_free(&ptlrpcds->pd_thread_rcv);
+ OBD_FREE(ptlrpcds, ptlrpcds->pd_size);
+ ptlrpcds = NULL;
+ }
- EXIT;
+ EXIT;
}
static int ptlrpcd_init(void)
{
- int nthreads = cfs_num_online_cpus();
- char name[16];
- int size, i = -1, j, rc = 0;
- ENTRY;
+ int nthreads = num_online_cpus();
+ char name[16];
+ int size, i = -1, j, rc = 0;
+ ENTRY;
-#ifdef __KERNEL__
if (max_ptlrpcds > 0 && max_ptlrpcds < nthreads)
nthreads = max_ptlrpcds;
if (nthreads < 2)
ptlrpcd_bind_policy = PDB_POLICY_PAIR;
else if (nthreads % 2 != 0 && ptlrpcd_bind_policy == PDB_POLICY_PAIR)
nthreads &= ~1; /* make sure it is even */
-#else
- nthreads = 1;
-#endif
size = offsetof(struct ptlrpcd, pd_threads[nthreads]);
OBD_ALLOC(ptlrpcds, size);
if (rc != 0 && ptlrpcds != NULL) {
for (j = 0; j <= i; j++)
ptlrpcd_stop(&ptlrpcds->pd_threads[j], 0);
- ptlrpcd_stop(&ptlrpcds->pd_thread_rcv, 0);
+ for (j = 0; j <= i; j++)
+ ptlrpcd_free(&ptlrpcds->pd_threads[j]);
+ ptlrpcd_stop(&ptlrpcds->pd_thread_rcv, 0);
+ ptlrpcd_free(&ptlrpcds->pd_thread_rcv);
OBD_FREE(ptlrpcds, size);
ptlrpcds = NULL;
}
- RETURN(0);
+ RETURN(rc);
}
int ptlrpcd_addref(void)
ENTRY;
mutex_lock(&ptlrpcd_mutex);
- if (++ptlrpcd_users == 1)
- rc = ptlrpcd_init();
+ if (++ptlrpcd_users == 1) {
+ rc = ptlrpcd_init();
+ if (rc < 0)
+ ptlrpcd_users--;
+ }
mutex_unlock(&ptlrpcd_mutex);
RETURN(rc);
}