* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2011, 2012, Intel Corporation.
+ * Copyright (c) 2011, 2014, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#define DEBUG_SUBSYSTEM S_RPC
-#ifdef __KERNEL__
-# include <libcfs/libcfs.h>
-#else /* __KERNEL__ */
-# include <liblustre.h>
-# include <ctype.h>
-#endif
-
+#include <libcfs/libcfs.h>
#include <lustre_net.h>
-# include <lustre_lib.h>
-
+#include <lustre_lib.h>
#include <lustre_ha.h>
#include <obd_class.h> /* for obd_zombie */
#include <obd_support.h> /* for OBD_FAIL_CHECK */
struct ptlrpcd_ctl pd_threads[0];
};
-#ifdef __KERNEL__
static int max_ptlrpcds;
CFS_MODULE_PARM(max_ptlrpcds, "i", int, 0644,
"Max ptlrpcd thread count to be started.");
static int ptlrpcd_bind_policy = PDB_POLICY_PAIR;
CFS_MODULE_PARM(ptlrpcd_bind_policy, "i", int, 0644,
"Ptlrpcd threads binding mode.");
-#endif
static struct ptlrpcd *ptlrpcds;
struct mutex ptlrpcd_mutex;
void ptlrpcd_wake(struct ptlrpc_request *req)
{
- struct ptlrpc_request_set *rq_set = req->rq_set;
-
- LASSERT(rq_set != NULL);
+ struct ptlrpc_request_set *set = req->rq_set;
- wake_up(&rq_set->set_waitq);
+ LASSERT(set != NULL);
+ wake_up(&set->set_waitq);
}
EXPORT_SYMBOL(ptlrpcd_wake);
if (req != NULL && req->rq_send_state != LUSTRE_IMP_FULL)
return &ptlrpcds->pd_thread_rcv;
-#ifdef __KERNEL__
switch (policy) {
case PDL_POLICY_SAME:
idx = smp_processor_id() % ptlrpcds->pd_nthreads;
ptlrpcds->pd_index = idx;
break;
}
-#endif /* __KERNEL__ */
return &ptlrpcds->pd_threads[idx];
}
*/
void ptlrpcd_add_rqset(struct ptlrpc_request_set *set)
{
- cfs_list_t *tmp, *pos;
-#ifdef __KERNEL__
+ struct list_head *tmp, *pos;
struct ptlrpcd_ctl *pc;
struct ptlrpc_request_set *new;
int count, i;
pc = ptlrpcd_select_pc(NULL, PDL_POLICY_LOCAL, -1);
new = pc->pc_set;
-#endif
- cfs_list_for_each_safe(pos, tmp, &set->set_requests) {
- struct ptlrpc_request *req =
- cfs_list_entry(pos, struct ptlrpc_request,
- rq_set_chain);
+ list_for_each_safe(pos, tmp, &set->set_requests) {
+ struct ptlrpc_request *req =
+ list_entry(pos, struct ptlrpc_request,
+ rq_set_chain);
- LASSERT(req->rq_phase == RQ_PHASE_NEW);
-#ifdef __KERNEL__
- req->rq_set = new;
- req->rq_queued_time = cfs_time_current();
-#else
- cfs_list_del_init(&req->rq_set_chain);
- req->rq_set = NULL;
- ptlrpcd_add_req(req, PDL_POLICY_LOCAL, -1);
- cfs_atomic_dec(&set->set_remaining);
-#endif
- }
+ LASSERT(req->rq_phase == RQ_PHASE_NEW);
+ req->rq_set = new;
+ req->rq_queued_time = cfs_time_current();
+ }
-#ifdef __KERNEL__
spin_lock(&new->set_new_req_lock);
- cfs_list_splice_init(&set->set_requests, &new->set_new_requests);
- i = cfs_atomic_read(&set->set_remaining);
- count = cfs_atomic_add_return(i, &new->set_new_count);
- cfs_atomic_set(&set->set_remaining, 0);
+ list_splice_init(&set->set_requests, &new->set_new_requests);
+ i = atomic_read(&set->set_remaining);
+ count = atomic_add_return(i, &new->set_new_count);
+ atomic_set(&set->set_remaining, 0);
spin_unlock(&new->set_new_req_lock);
if (count == i) {
wake_up(&new->set_waitq);
for (i = 0; i < pc->pc_npartners; i++)
wake_up(&pc->pc_partners[i]->pc_set->set_waitq);
}
-#endif
}
-EXPORT_SYMBOL(ptlrpcd_add_rqset);
-#ifdef __KERNEL__
/**
* Return transferred RPCs count.
*/
static int ptlrpcd_steal_rqset(struct ptlrpc_request_set *des,
struct ptlrpc_request_set *src)
{
- cfs_list_t *tmp, *pos;
- struct ptlrpc_request *req;
- int rc = 0;
+ struct list_head *tmp, *pos;
+ struct ptlrpc_request *req;
+ int rc = 0;
spin_lock(&src->set_new_req_lock);
- if (likely(!cfs_list_empty(&src->set_new_requests))) {
- cfs_list_for_each_safe(pos, tmp, &src->set_new_requests) {
- req = cfs_list_entry(pos, struct ptlrpc_request,
- rq_set_chain);
- req->rq_set = des;
- }
- cfs_list_splice_init(&src->set_new_requests,
- &des->set_requests);
- rc = cfs_atomic_read(&src->set_new_count);
- cfs_atomic_add(rc, &des->set_remaining);
- cfs_atomic_set(&src->set_new_count, 0);
- }
+ if (likely(!list_empty(&src->set_new_requests))) {
+ list_for_each_safe(pos, tmp, &src->set_new_requests) {
+ req = list_entry(pos, struct ptlrpc_request,
+ rq_set_chain);
+ req->rq_set = des;
+ }
+ list_splice_init(&src->set_new_requests,
+ &des->set_requests);
+ rc = atomic_read(&src->set_new_count);
+ atomic_add(rc, &des->set_remaining);
+ atomic_set(&src->set_new_count, 0);
+ }
spin_unlock(&src->set_new_req_lock);
return rc;
}
-#endif
/**
* Requests that are added to the ptlrpcd queue are sent via
*/
void ptlrpcd_add_req(struct ptlrpc_request *req, pdl_policy_t policy, int idx)
{
- struct ptlrpcd_ctl *pc;
+ struct ptlrpcd_ctl *pc;
if (req->rq_reqmsg)
lustre_msg_set_jobid(req->rq_reqmsg, NULL);
spin_lock(&req->rq_lock);
- if (req->rq_invalid_rqset) {
- struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(5),
- back_to_sleep, NULL);
+ if (req->rq_invalid_rqset) {
+ struct l_wait_info lwi = LWI_TIMEOUT(cfs_time_seconds(5),
+ back_to_sleep, NULL);
- req->rq_invalid_rqset = 0;
+ req->rq_invalid_rqset = 0;
spin_unlock(&req->rq_lock);
- l_wait_event(req->rq_set_waitq, (req->rq_set == NULL), &lwi);
- } else if (req->rq_set) {
- /* If we have a vaid "rq_set", just reuse it to avoid double
- * linked. */
- LASSERT(req->rq_phase == RQ_PHASE_NEW);
- LASSERT(req->rq_send_state == LUSTRE_IMP_REPLAY);
-
- /* ptlrpc_check_set will decrease the count */
- cfs_atomic_inc(&req->rq_set->set_remaining);
+ l_wait_event(req->rq_set_waitq, (req->rq_set == NULL), &lwi);
+ } else if (req->rq_set) {
+ /* If we have a vaid "rq_set", just reuse it to avoid double
+ * linked. */
+ LASSERT(req->rq_phase == RQ_PHASE_NEW);
+ LASSERT(req->rq_send_state == LUSTRE_IMP_REPLAY);
+
+ /* ptlrpc_check_set will decrease the count */
+ atomic_inc(&req->rq_set->set_remaining);
spin_unlock(&req->rq_lock);
wake_up(&req->rq_set->set_waitq);
return;
} else {
spin_unlock(&req->rq_lock);
- }
+ }
- pc = ptlrpcd_select_pc(req, policy, idx);
+ pc = ptlrpcd_select_pc(req, policy, idx);
- DEBUG_REQ(D_INFO, req, "add req [%p] to pc [%s:%d]",
- req, pc->pc_name, pc->pc_index);
+ DEBUG_REQ(D_INFO, req, "add req [%p] to pc [%s:%d]",
+ req, pc->pc_name, pc->pc_index);
- ptlrpc_set_add_new_req(pc, req);
+ ptlrpc_set_add_new_req(pc, req);
}
EXPORT_SYMBOL(ptlrpcd_add_req);
static inline void ptlrpc_reqset_get(struct ptlrpc_request_set *set)
{
- cfs_atomic_inc(&set->set_refcount);
+ atomic_inc(&set->set_refcount);
}
/**
*/
static int ptlrpcd_check(struct lu_env *env, struct ptlrpcd_ctl *pc)
{
- cfs_list_t *tmp, *pos;
+ struct list_head *tmp, *pos;
struct ptlrpc_request *req;
struct ptlrpc_request_set *set = pc->pc_set;
int rc = 0;
int rc2;
ENTRY;
- if (cfs_atomic_read(&set->set_new_count)) {
+ if (atomic_read(&set->set_new_count)) {
spin_lock(&set->set_new_req_lock);
- if (likely(!cfs_list_empty(&set->set_new_requests))) {
- cfs_list_splice_init(&set->set_new_requests,
- &set->set_requests);
- cfs_atomic_add(cfs_atomic_read(&set->set_new_count),
- &set->set_remaining);
- cfs_atomic_set(&set->set_new_count, 0);
- /*
- * Need to calculate its timeout.
- */
- rc = 1;
- }
+ if (likely(!list_empty(&set->set_new_requests))) {
+ list_splice_init(&set->set_new_requests,
+ &set->set_requests);
+ atomic_add(atomic_read(&set->set_new_count),
+ &set->set_remaining);
+ atomic_set(&set->set_new_count, 0);
+ /*
+ * Need to calculate its timeout.
+ */
+ rc = 1;
+ }
spin_unlock(&set->set_new_req_lock);
- }
+ }
- /* We should call lu_env_refill() before handling new requests to make
- * sure that env key the requests depending on really exists.
- */
- rc2 = lu_env_refill(env);
- if (rc2 != 0) {
- /*
- * XXX This is very awkward situation, because
- * execution can neither continue (request
- * interpreters assume that env is set up), nor repeat
- * the loop (as this potentially results in a tight
- * loop of -ENOMEM's).
- *
- * Fortunately, refill only ever does something when
- * new modules are loaded, i.e., early during boot up.
- */
- CERROR("Failure to refill session: %d\n", rc2);
- RETURN(rc);
- }
+ /* We should call lu_env_refill() before handling new requests to make
+ * sure that env key the requests depending on really exists.
+ */
+ rc2 = lu_env_refill(env);
+ if (rc2 != 0) {
+ /*
+ * XXX This is very awkward situation, because
+ * execution can neither continue (request
+ * interpreters assume that env is set up), nor repeat
+ * the loop (as this potentially results in a tight
+ * loop of -ENOMEM's).
+ *
+ * Fortunately, refill only ever does something when
+ * new modules are loaded, i.e., early during boot up.
+ */
+ CERROR("Failure to refill session: %d\n", rc2);
+ RETURN(rc);
+ }
- if (cfs_atomic_read(&set->set_remaining))
- rc |= ptlrpc_check_set(env, set);
+ if (atomic_read(&set->set_remaining))
+ rc |= ptlrpc_check_set(env, set);
- if (!cfs_list_empty(&set->set_requests)) {
- /*
- * XXX: our set never completes, so we prune the completed
- * reqs after each iteration. boy could this be smarter.
- */
- cfs_list_for_each_safe(pos, tmp, &set->set_requests) {
- req = cfs_list_entry(pos, struct ptlrpc_request,
- rq_set_chain);
- if (req->rq_phase != RQ_PHASE_COMPLETE)
- continue;
-
- cfs_list_del_init(&req->rq_set_chain);
- req->rq_set = NULL;
- ptlrpc_req_finished(req);
- }
- }
+ /* NB: ptlrpc_check_set has already moved complted request at the
+ * head of seq::set_requests */
+ list_for_each_safe(pos, tmp, &set->set_requests) {
+ req = list_entry(pos, struct ptlrpc_request, rq_set_chain);
+ if (req->rq_phase != RQ_PHASE_COMPLETE)
+ break;
- if (rc == 0) {
- /*
- * If new requests have been added, make sure to wake up.
- */
- rc = cfs_atomic_read(&set->set_new_count);
+ list_del_init(&req->rq_set_chain);
+ req->rq_set = NULL;
+ ptlrpc_req_finished(req);
+ }
+
+ if (rc == 0) {
+ /*
+ * If new requests have been added, make sure to wake up.
+ */
+ rc = atomic_read(&set->set_new_count);
-#ifdef __KERNEL__
/* If we have nothing to do, check whether we can take some
* work from our partner threads. */
if (rc == 0 && pc->pc_npartners > 0) {
ptlrpc_reqset_get(ps);
spin_unlock(&partner->pc_lock);
- if (cfs_atomic_read(&ps->set_new_count)) {
- rc = ptlrpcd_steal_rqset(set, ps);
- if (rc > 0)
- CDEBUG(D_RPCTRACE, "transfer %d"
- " async RPCs [%d->%d]\n",
- rc, partner->pc_index,
- pc->pc_index);
- }
- ptlrpc_reqset_put(ps);
- } while (rc == 0 && pc->pc_cursor != first);
- }
-#endif
- }
+ if (atomic_read(&ps->set_new_count)) {
+ rc = ptlrpcd_steal_rqset(set, ps);
+ if (rc > 0)
+ CDEBUG(D_RPCTRACE, "transfer %d"
+ " async RPCs [%d->%d]\n",
+ rc, partner->pc_index,
+ pc->pc_index);
+ }
+ ptlrpc_reqset_put(ps);
+ } while (rc == 0 && pc->pc_cursor != first);
+ }
+ }
- RETURN(rc);
+ RETURN(rc);
}
-#ifdef __KERNEL__
/**
* Main ptlrpcd thread.
* ptlrpc's code paths like to execute in process context, so we have this
*/
static int ptlrpcd(void *arg)
{
- struct ptlrpcd_ctl *pc = arg;
- struct ptlrpc_request_set *set = pc->pc_set;
- struct lu_env env = { .le_ses = NULL };
- int rc, exit = 0;
- ENTRY;
+ struct ptlrpcd_ctl *pc = arg;
+ struct ptlrpc_request_set *set = pc->pc_set;
+ struct lu_context ses = { 0 };
+ struct lu_env env = { .le_ses = &ses };
+ int rc, exit = 0;
+ ENTRY;
unshare_fs_struct();
#if defined(CONFIG_SMP)
if (++index >= num_possible_cpus())
index = 0;
}
- set_cpus_allowed_ptr(cfs_current(),
+ set_cpus_allowed_ptr(current,
cpumask_of_node(cpu_to_node(index)));
}
}
#endif
- /*
- * XXX So far only "client" ptlrpcd uses an environment. In
- * the future, ptlrpcd thread (or a thread-set) has to given
- * an argument, describing its "scope".
- */
- rc = lu_context_init(&env.le_ctx,
- LCT_CL_THREAD|LCT_REMEMBER|LCT_NOREF);
+ /* Both client and server (MDT/OST) may use the environment. */
+ rc = lu_context_init(&env.le_ctx, LCT_MD_THREAD | LCT_DT_THREAD |
+ LCT_CL_THREAD | LCT_REMEMBER |
+ LCT_NOREF);
+ if (rc == 0) {
+ rc = lu_context_init(env.le_ses,
+ LCT_SESSION|LCT_REMEMBER|LCT_NOREF);
+ if (rc != 0)
+ lu_context_fini(&env.le_ctx);
+ }
complete(&pc->pc_starting);
if (rc != 0)
lwi = LWI_TIMEOUT(cfs_time_seconds(timeout ? timeout : 1),
ptlrpc_expired_set, set);
- lu_context_enter(&env.le_ctx);
- l_wait_event(set->set_waitq,
- ptlrpcd_check(&env, pc), &lwi);
- lu_context_exit(&env.le_ctx);
+ lu_context_enter(&env.le_ctx);
+ lu_context_enter(env.le_ses);
+ l_wait_event(set->set_waitq, ptlrpcd_check(&env, pc), &lwi);
+ lu_context_exit(&env.le_ctx);
+ lu_context_exit(env.le_ses);
- /*
- * Abort inflight rpcs for forced stop case.
- */
+ /*
+ * Abort inflight rpcs for forced stop case.
+ */
if (test_bit(LIOD_STOP, &pc->pc_flags)) {
if (test_bit(LIOD_FORCE, &pc->pc_flags))
ptlrpc_abort_set(set);
/*
* Wait for inflight requests to drain.
*/
- if (!cfs_list_empty(&set->set_requests))
+ if (!list_empty(&set->set_requests))
ptlrpc_set_wait(set);
- lu_context_fini(&env.le_ctx);
+ lu_context_fini(&env.le_ctx);
+ lu_context_fini(env.le_ses);
complete(&pc->pc_finishing);
- return 0;
+ return 0;
}
/* XXX: We want multiple CPU cores to share the async RPC load. So we start many
#if defined(CONFIG_NUMA)
{
int i;
- mask = *cpumask_of_node(cpu_to_node(index));
+ cpumask_copy(&mask, cpumask_of_node(cpu_to_node(index)));
for (i = max; i < num_online_cpus(); i++)
- cpu_clear(i, mask);
- pc->pc_npartners = cpus_weight(mask) - 1;
+ cpumask_clear_cpu(i, &mask);
+ pc->pc_npartners = cpumask_weight(&mask) - 1;
set_bit(LIOD_BIND, &pc->pc_flags);
}
#else
* that are already initialized
*/
for (pidx = 0, i = 0; i < index; i++) {
- if (cpu_isset(i, mask)) {
+ if (cpumask_test_cpu(i, &mask)) {
ppc = &ptlrpcds->pd_threads[i];
pc->pc_partners[pidx++] = ppc;
ppc->pc_partners[ppc->
RETURN(rc);
}
-#else /* !__KERNEL__ */
-
-/**
- * In liblustre we do not have separate threads, so this function
- * is called from time to time all across common code to see
- * if something needs to be processed on ptlrpcd set.
- */
-int ptlrpcd_check_async_rpcs(void *arg)
-{
- struct ptlrpcd_ctl *pc = arg;
- int rc = 0;
-
- /*
- * Single threaded!!
- */
- pc->pc_recurred++;
-
- if (pc->pc_recurred == 1) {
- rc = lu_env_refill(&pc->pc_env);
- if (rc == 0) {
- lu_context_enter(&pc->pc_env.le_ctx);
- rc = ptlrpcd_check(&pc->pc_env, pc);
- if (!rc)
- ptlrpc_expired_set(pc->pc_set);
- /*
- * XXX: send replay requests.
- */
- if (test_bit(LIOD_RECOVERY, &pc->pc_flags))
- rc = ptlrpcd_check(&pc->pc_env, pc);
- lu_context_exit(&pc->pc_env.le_ctx);
- }
- }
-
- pc->pc_recurred--;
- return rc;
-}
-
-int ptlrpcd_idle(void *arg)
-{
- struct ptlrpcd_ctl *pc = arg;
-
- return (cfs_atomic_read(&pc->pc_set->set_new_count) == 0 &&
- cfs_atomic_read(&pc->pc_set->set_remaining) == 0);
-}
-
-#endif
int ptlrpcd_start(int index, int max, const char *name, struct ptlrpcd_ctl *pc)
{
init_completion(&pc->pc_starting);
init_completion(&pc->pc_finishing);
spin_lock_init(&pc->pc_lock);
- strncpy(pc->pc_name, name, sizeof(pc->pc_name) - 1);
+ strlcpy(pc->pc_name, name, sizeof(pc->pc_name));
pc->pc_set = ptlrpc_prep_set();
if (pc->pc_set == NULL)
GOTO(out, rc = -ENOMEM);
-#ifndef __KERNEL__
- pc->pc_wait_callback =
- liblustre_register_wait_callback("ptlrpcd_check_async_rpcs",
- &ptlrpcd_check_async_rpcs, pc);
- pc->pc_idle_callback =
- liblustre_register_idle_callback("ptlrpcd_check_idle_rpcs",
- &ptlrpcd_idle, pc);
- RETURN(0);
-#else
/*
* So far only "client" ptlrpcd uses an environment. In the future,
* ptlrpcd thread (or a thread-set) has to be given an argument,
GOTO(out_set, rc);
{
- cfs_task_t *task;
+ struct task_struct *task;
if (index >= 0) {
rc = ptlrpcd_bind(index, max);
if (rc < 0)
ptlrpc_set_destroy(set);
}
clear_bit(LIOD_BIND, &pc->pc_flags);
-#endif
out:
clear_bit(LIOD_START, &pc->pc_flags);
RETURN(rc);
goto out;
}
-#ifdef __KERNEL__
wait_for_completion(&pc->pc_finishing);
-#else
- liblustre_deregister_wait_callback(pc->pc_wait_callback);
- liblustre_deregister_idle_callback(pc->pc_idle_callback);
-#endif
lu_context_fini(&pc->pc_env.le_ctx);
spin_lock(&pc->pc_lock);
clear_bit(LIOD_BIND, &pc->pc_flags);
out:
-#ifdef __KERNEL__
if (pc->pc_npartners > 0) {
LASSERT(pc->pc_partners != NULL);
pc->pc_partners = NULL;
}
pc->pc_npartners = 0;
-#endif
EXIT;
}
int size, i = -1, j, rc = 0;
ENTRY;
-#ifdef __KERNEL__
if (max_ptlrpcds > 0 && max_ptlrpcds < nthreads)
nthreads = max_ptlrpcds;
if (nthreads < 2)
ptlrpcd_bind_policy = PDB_POLICY_PAIR;
else if (nthreads % 2 != 0 && ptlrpcd_bind_policy == PDB_POLICY_PAIR)
nthreads &= ~1; /* make sure it is even */
-#else
- nthreads = 1;
-#endif
size = offsetof(struct ptlrpcd, pd_threads[nthreads]);
OBD_ALLOC(ptlrpcds, size);