*
* You should have received a copy of the GNU General Public License
* version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
*
* GPL HEADER END
*/
* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*
- * Copyright (c) 2010, 2014, Intel Corporation.
+ * Copyright (c) 2010, 2016, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
*/
#define DEBUG_SUBSYSTEM S_RPC
+#include <linux/kthread.h>
#include <obd_support.h>
#include <obd_class.h>
#include <lustre_net.h>
/* The following are visible and mutable through /sys/module/ptlrpc */
int test_req_buffer_pressure = 0;
-CFS_MODULE_PARM(test_req_buffer_pressure, "i", int, 0444,
- "set non-zero to put pressure on request buffer pools");
-CFS_MODULE_PARM(at_min, "i", int, 0644,
- "Adaptive timeout minimum (sec)");
-CFS_MODULE_PARM(at_max, "i", int, 0644,
- "Adaptive timeout maximum (sec)");
-CFS_MODULE_PARM(at_history, "i", int, 0644,
- "Adaptive timeouts remember the slowest event that took place "
- "within this period (sec)");
-CFS_MODULE_PARM(at_early_margin, "i", int, 0644,
- "How soon before an RPC deadline to send an early reply");
-CFS_MODULE_PARM(at_extra, "i", int, 0644,
- "How much extra time to give with each early reply");
-
+module_param(test_req_buffer_pressure, int, 0444);
+MODULE_PARM_DESC(test_req_buffer_pressure, "set non-zero to put pressure on request buffer pools");
+module_param(at_min, int, 0644);
+MODULE_PARM_DESC(at_min, "Adaptive timeout minimum (sec)");
+module_param(at_max, int, 0644);
+MODULE_PARM_DESC(at_max, "Adaptive timeout maximum (sec)");
+module_param(at_history, int, 0644);
+MODULE_PARM_DESC(at_history,
+ "Adaptive timeouts remember the slowest event that took place within this period (sec)");
+module_param(at_early_margin, int, 0644);
+MODULE_PARM_DESC(at_early_margin, "How soon before an RPC deadline to send an early reply");
+module_param(at_extra, int, 0644);
+MODULE_PARM_DESC(at_extra, "How much extra time to give with each early reply");
/* forward ref */
static int ptlrpc_server_post_idle_rqbds(struct ptlrpc_service_part *svcpt);
/*
* User wants to increase number of threads with for
- * each CPU core/HT, most likely the factor is larger then
+ * each CPU core/HT, most likely the factor is larger than
* one thread/core because service threads are supposed to
* be blocked by lock or wait for IO.
*/
* have too many threads no matter how many cores/HTs
* there are.
*/
- if (cfs_cpu_ht_nsiblings(0) > 1) { /* weight is # of HTs */
+ if (cfs_cpu_ht_nsiblings(smp_processor_id()) > 1) {
+ /* weight is # of HTs */
/* depress thread factor for hyper-thread */
factor = factor - (factor >> 1) + (factor >> 3);
}
if (array->paa_reqs_count == NULL)
goto failed;
- cfs_timer_init(&svcpt->scp_at_timer, ptlrpc_at_timer, svcpt);
+ setup_timer(&svcpt->scp_at_timer, ptlrpc_at_timer,
+ (unsigned long)svcpt);
+
/* At SOW, service time should be quick; 10s seems generous. If client
* timeout is less than this, we'll be sending an early reply. */
at_init(&svcpt->scp_at_estimate, 10, 0);
}
/**
- * to finish a active request: stop sending more early replies, and release
+ * to finish an active request: stop sending more early replies, and release
* the request. should be called after we finished handling the request.
*/
static void ptlrpc_server_finish_active_request(
} else if (lustre_msg_get_transno(req->rq_reqmsg) != 0 &&
!obd->obd_recovering) {
DEBUG_REQ(D_ERROR, req, "Invalid req with transno "
- LPU64" without recovery",
+ "%llu without recovery",
lustre_msg_get_transno(req->rq_reqmsg));
class_fail_export(req->rq_export);
rc = -ENODEV;
__s32 next;
if (array->paa_count == 0) {
- cfs_timer_disarm(&svcpt->scp_at_timer);
+ del_timer(&svcpt->scp_at_timer);
return;
}
if (next <= 0) {
ptlrpc_at_timer((unsigned long)svcpt);
} else {
- cfs_timer_arm(&svcpt->scp_at_timer, cfs_time_shift(next));
+ mod_timer(&svcpt->scp_at_timer, cfs_time_shift(next));
CDEBUG(D_INFO, "armed %s at %+ds\n",
svcpt->scp_service->srv_name, next);
}
static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req)
{
struct ptlrpc_service_part *svcpt = req->rq_rqbd->rqbd_svcpt;
- struct ptlrpc_request *reqcopy;
- struct lustre_msg *reqmsg;
- cfs_duration_t olddl = req->rq_deadline - cfs_time_current_sec();
- int rc;
- ENTRY;
+ struct ptlrpc_request *reqcopy;
+ struct lustre_msg *reqmsg;
+ cfs_duration_t olddl = req->rq_deadline - cfs_time_current_sec();
+ time_t newdl;
+ int rc;
+
+ ENTRY;
if (CFS_FAIL_CHECK(OBD_FAIL_TGT_REPLAY_RECONNECT)) {
/* don't send early reply */
* during the recovery period send at least 4 early replies,
* spacing them every at_extra if we can. at_estimate should
* always equal this fixed value during recovery. */
- at_measured(&svcpt->scp_at_estimate,
- cfs_time_current_sec() -
- req->rq_arrival_time.tv_sec + min(at_extra,
- req->rq_export->exp_obd->obd_recovery_timeout / 4));
+ /* Don't account request processing time into AT history
+ * during recovery, it is not service time we need but
+ * includes also waiting time for recovering clients */
+ newdl = cfs_time_current_sec() + min(at_extra,
+ req->rq_export->exp_obd->obd_recovery_timeout / 4);
} else {
/* We want to extend the request deadline by at_extra seconds,
* so we set our service estimate to reflect how much time has
at_measured(&svcpt->scp_at_estimate, at_extra +
cfs_time_current_sec() -
req->rq_arrival_time.tv_sec);
-
+ newdl = req->rq_arrival_time.tv_sec +
+ at_get(&svcpt->scp_at_estimate);
}
+
/* Check to see if we've actually increased the deadline -
* we may be past adaptive_max */
- if (req->rq_deadline >= req->rq_arrival_time.tv_sec +
- at_get(&svcpt->scp_at_estimate)) {
+ if (req->rq_deadline >= newdl) {
DEBUG_REQ(D_WARNING, req, "Couldn't add any time "
"(%ld/%ld), not sending early reply\n",
- olddl, req->rq_arrival_time.tv_sec +
- at_get(&svcpt->scp_at_estimate) -
- cfs_time_current_sec());
+ olddl, newdl - cfs_time_current_sec());
RETURN(-ETIMEDOUT);
}
if (!rc) {
/* Adjust our own deadline to what we told the client */
- req->rq_deadline = req->rq_arrival_time.tv_sec +
- at_get(&svcpt->scp_at_estimate);
+ req->rq_deadline = newdl;
req->rq_early_count++; /* number sent, server side */
} else {
DEBUG_REQ(D_ERROR, req, "Early reply send failed %d", rc);
}
/**
- * Put the request to the export list if the request may become
- * a high priority one.
+ * Check if a request should be assigned with a high priority.
+ *
+ * \retval < 0: error occurred
+ * 0: normal RPC request
+ * +1: high priority request
*/
static int ptlrpc_server_hpreq_init(struct ptlrpc_service_part *svcpt,
struct ptlrpc_request *req)
{
- struct list_head *list;
- int rc, hp = 0;
-
+ int rc = 0;
ENTRY;
- if (svcpt->scp_service->srv_ops.so_hpreq_handler) {
+ if (svcpt->scp_service->srv_ops.so_hpreq_handler != NULL) {
rc = svcpt->scp_service->srv_ops.so_hpreq_handler(req);
if (rc < 0)
RETURN(rc);
+
LASSERT(rc == 0);
}
- if (req->rq_export) {
- if (req->rq_ops) {
- /* Perform request specific check. We should do this
- * check before the request is added into exp_hp_rpcs
- * list otherwise it may hit swab race at LU-1044. */
- if (req->rq_ops->hpreq_check) {
- rc = req->rq_ops->hpreq_check(req);
- /**
- * XXX: Out of all current
- * ptlrpc_hpreq_ops::hpreq_check(), only
- * ldlm_cancel_hpreq_check() can return an
- * error code; other functions assert in
- * similar places, which seems odd.
- * What also does not seem right is that
- * handlers for those RPCs do not assert
- * on the same checks, but rather handle the
- * error cases. e.g. see ost_rw_hpreq_check(),
- * and ost_brw_read(), ost_brw_write().
- */
- if (rc < 0)
- RETURN(rc);
- LASSERT(rc == 0 || rc == 1);
- hp = rc;
- }
- list = &req->rq_export->exp_hp_rpcs;
- } else {
- list = &req->rq_export->exp_reg_rpcs;
- }
- /* do search for duplicated xid and the adding to the list
- * atomically */
- spin_lock_bh(&req->rq_export->exp_rpc_lock);
- rc = ptlrpc_server_check_resend_in_progress(req);
- if (rc < 0) {
- spin_unlock_bh(&req->rq_export->exp_rpc_lock);
- RETURN(rc);
+ if (req->rq_export != NULL && req->rq_ops != NULL) {
+ /* Perform request specific check. We should do this
+ * check before the request is added into exp_hp_rpcs
+ * list otherwise it may hit swab race at LU-1044. */
+ if (req->rq_ops->hpreq_check != NULL) {
+ rc = req->rq_ops->hpreq_check(req);
+ if (rc == -ESTALE) {
+ req->rq_status = rc;
+ ptlrpc_error(req);
+ }
+ /** can only return error,
+ * 0 for normal request,
+ * or 1 for high priority request */
+ LASSERT(rc <= 1);
}
- list_add(&req->rq_exp_list, list);
- spin_unlock_bh(&req->rq_export->exp_rpc_lock);
}
- ptlrpc_nrs_req_initialize(svcpt, req, !!hp);
-
- RETURN(hp);
+ RETURN(rc);
}
/** Remove the request from the export list. */
static int ptlrpc_server_request_add(struct ptlrpc_service_part *svcpt,
struct ptlrpc_request *req)
{
- int rc;
+ int rc;
+ bool hp;
ENTRY;
rc = ptlrpc_server_hpreq_init(svcpt, req);
if (rc < 0)
RETURN(rc);
+ hp = rc > 0;
+ ptlrpc_nrs_req_initialize(svcpt, req, hp);
+
+ if (req->rq_export != NULL) {
+ struct obd_export *exp = req->rq_export;
+
+ /* do search for duplicated xid and the adding to the list
+ * atomically */
+ spin_lock_bh(&exp->exp_rpc_lock);
+ rc = ptlrpc_server_check_resend_in_progress(req);
+ if (rc < 0) {
+ spin_unlock_bh(&exp->exp_rpc_lock);
+
+ ptlrpc_nrs_req_finalize(req);
+ RETURN(rc);
+ }
+
+ if (hp || req->rq_ops != NULL)
+ list_add(&req->rq_exp_list, &exp->exp_hp_rpcs);
+ else
+ list_add(&req->rq_exp_list, &exp->exp_reg_rpcs);
+ spin_unlock_bh(&exp->exp_rpc_lock);
+ }
+
/* the current thread is not the processing thread for this request
* since that, but request is in exp_hp_list and can be find there.
* Remove all relations between request and old thread. */
req->rq_svc_thread = NULL;
req->rq_session.lc_thread = NULL;
- ptlrpc_nrs_req_add(svcpt, req, !!rc);
+ ptlrpc_nrs_req_add(svcpt, req, hp);
RETURN(0);
}
rc = ptlrpc_unpack_req_msg(req, req->rq_reqlen);
if (rc != 0) {
CERROR("error unpacking request: ptl %d from %s "
- "x"LPU64"\n", svc->srv_req_portal,
+ "x%llu\n", svc->srv_req_portal,
libcfs_id2str(req->rq_peer), req->rq_xid);
goto err_req;
}
rc = lustre_unpack_req_ptlrpc_body(req, MSG_PTLRPC_BODY_OFF);
if (rc) {
CERROR ("error unpacking ptlrpc body: ptl %d from %s x"
- LPU64"\n", svc->srv_req_portal,
+ "%llu\n", svc->srv_req_portal,
libcfs_id2str(req->rq_peer), req->rq_xid);
goto err_req;
}
if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_REQ_OPC) &&
lustre_msg_get_opc(req->rq_reqmsg) == cfs_fail_val) {
- CERROR("drop incoming rpc opc %u, x"LPU64"\n",
+ CERROR("drop incoming rpc opc %u, x%llu\n",
cfs_fail_val, req->rq_xid);
goto err_req;
}
goto err_req;
}
- switch(lustre_msg_get_opc(req->rq_reqmsg)) {
- case MDS_WRITEPAGE:
- case OST_WRITE:
- req->rq_bulk_write = 1;
- break;
- case MDS_READPAGE:
- case OST_READ:
- case MGS_CONFIG_READ:
- req->rq_bulk_read = 1;
- break;
- }
+ switch (lustre_msg_get_opc(req->rq_reqmsg)) {
+ case MDS_WRITEPAGE:
+ case OST_WRITE:
+ case OUT_UPDATE:
+ req->rq_bulk_write = 1;
+ break;
+ case MDS_READPAGE:
+ case OST_READ:
+ case MGS_CONFIG_READ:
+ req->rq_bulk_read = 1;
+ break;
+ }
- CDEBUG(D_RPCTRACE, "got req x"LPU64"\n", req->rq_xid);
+ CDEBUG(D_RPCTRACE, "got req x%llu\n", req->rq_xid);
req->rq_export = class_conn2export(
lustre_msg_get_handle(req->rq_reqmsg));
MSGHDR_AT_SUPPORT) ?
/* The max time the client expects us to take */
lustre_msg_get_timeout(req->rq_reqmsg) : obd_timeout;
+
req->rq_deadline = req->rq_arrival_time.tv_sec + deadline;
if (unlikely(deadline == 0)) {
DEBUG_REQ(D_ERROR, req, "Dropping request with 0 timeout");
}
CDEBUG(D_RPCTRACE, "Handling RPC pname:cluuid+ref:pid:xid:nid:opc "
- "%s:%s+%d:%d:x"LPU64":%s:%d\n", current_comm(),
+ "%s:%s+%d:%d:x%llu:%s:%d\n", current_comm(),
(request->rq_export ?
(char *)request->rq_export->exp_client_uuid.uuid : "0"),
(request->rq_export ?
if (lustre_msg_get_opc(request->rq_reqmsg) != OBD_PING)
CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_PAUSE_REQ, cfs_fail_val);
- CDEBUG(D_NET, "got req "LPU64"\n", request->rq_xid);
+ CDEBUG(D_NET, "got req %llu\n", request->rq_xid);
/* re-assign request and sesson thread to the current one */
request->rq_svc_thread = thread;
do_gettimeofday(&work_end);
timediff = cfs_timeval_sub(&work_end, &work_start, NULL);
CDEBUG(D_RPCTRACE, "Handled RPC pname:cluuid+ref:pid:xid:nid:opc "
- "%s:%s+%d:%d:x"LPU64":%s:%d Request procesed in "
- "%ldus (%ldus total) trans "LPU64" rc %d/%d\n",
+ "%s:%s+%d:%d:x%llu:%s:%d Request procesed in "
+ "%ldus (%ldus total) trans %llu rc %d/%d\n",
current_comm(),
(request->rq_export ?
(char *)request->rq_export->exp_client_uuid.uuid : "0"),
if (nlocks == 0 && !been_handled) {
/* If we see this, we should already have seen the warning
* in mds_steal_ack_locks() */
- CDEBUG(D_HA, "All locks stolen from rs %p x"LPD64".t"LPD64
+ CDEBUG(D_HA, "All locks stolen from rs %p x%lld.t%lld"
" o%d NID %s\n",
rs,
rs->rs_xid, rs->rs_transno, rs->rs_opc,
static inline int
ptlrpc_threads_enough(struct ptlrpc_service_part *svcpt)
{
- return (svcpt->scp_nthrs_running >=
- svcpt->scp_service->srv_nthrs_cpt_init) &&
- (svcpt->scp_nreqs_active <
- svcpt->scp_nthrs_running - 1 -
- (svcpt->scp_service->srv_ops.so_hpreq_handler != NULL));
+ return svcpt->scp_nreqs_active <
+ svcpt->scp_nthrs_running - 1 -
+ (svcpt->scp_service->srv_ops.so_hpreq_handler != NULL);
}
/**
goto out_srv_fini;
}
- /* Alloc reply state structure for this one */
- OBD_ALLOC_LARGE(rs, svc->srv_max_reply_size);
- if (!rs) {
- rc = -ENOMEM;
- goto out_srv_fini;
- }
+ /* Alloc reply state structure for this one */
+ OBD_ALLOC_LARGE(rs, svc->srv_max_reply_size);
+ if (!rs) {
+ rc = -ENOMEM;
+ goto out_srv_fini;
+ }
spin_lock(&svcpt->scp_lock);
struct ptlrpc_hr_thread *hrt = (struct ptlrpc_hr_thread *)arg;
struct ptlrpc_hr_partition *hrp = hrt->hrt_partition;
struct list_head replies;
- char threadname[20];
int rc;
INIT_LIST_HEAD(&replies);
- snprintf(threadname, sizeof(threadname), "ptlrpc_hr%02d_%03d",
- hrp->hrp_cpt, hrt->hrt_id);
unshare_fs_struct();
rc = cfs_cpt_bind(ptlrpc_hr.hr_cpt_table, hrp->hrp_cpt);
if (rc != 0) {
+ char threadname[20];
+
+ snprintf(threadname, sizeof(threadname), "ptlrpc_hr%02d_%03d",
+ hrp->hrp_cpt, hrt->hrt_id);
CWARN("Failed to bind %s on CPT %d of CPT table %p: rc = %d\n",
threadname, hrp->hrp_cpt, ptlrpc_hr.hr_cpt_table, rc);
}
int rc;
int i;
int j;
+ int weight;
ENTRY;
memset(&ptlrpc_hr, 0, sizeof(ptlrpc_hr));
init_waitqueue_head(&ptlrpc_hr.hr_waitq);
+ weight = cfs_cpu_ht_nsiblings(smp_processor_id());
+
cfs_percpt_for_each(hrp, i, ptlrpc_hr.hr_partitions) {
hrp->hrp_cpt = i;
atomic_set(&hrp->hrp_nstopped, 0);
hrp->hrp_nthrs = cfs_cpt_weight(ptlrpc_hr.hr_cpt_table, i);
- hrp->hrp_nthrs /= cfs_cpu_ht_nsiblings(0);
- LASSERT(hrp->hrp_nthrs > 0);
+ hrp->hrp_nthrs /= weight;
+ if (hrp->hrp_nthrs == 0)
+ hrp->hrp_nthrs = 1;
+
OBD_CPT_ALLOC(hrp->hrp_thrs, ptlrpc_hr.hr_cpt_table, i,
hrp->hrp_nthrs * sizeof(*hrt));
if (hrp->hrp_thrs == NULL)
/* early disarm AT timer... */
ptlrpc_service_for_each_part(svcpt, i, svc) {
if (svcpt->scp_service != NULL)
- cfs_timer_disarm(&svcpt->scp_at_timer);
+ del_timer(&svcpt->scp_at_timer);
}
}
break;
/* In case somebody rearmed this in the meantime */
- cfs_timer_disarm(&svcpt->scp_at_timer);
+ del_timer(&svcpt->scp_at_timer);
array = &svcpt->scp_at_array;
if (array->paa_reqs_array != NULL) {