+/* Send early replies to everybody expiring within at_early_margin
+ asking for at_extra time */
+static int ptlrpc_at_check_timed(struct ptlrpc_service *svc)
+{
+ struct ptlrpc_request *rq, *n;
+ cfs_list_t work_list;
+ struct ptlrpc_at_array *array = &svc->srv_at_array;
+ __u32 index, count;
+ time_t deadline;
+ time_t now = cfs_time_current_sec();
+ cfs_duration_t delay;
+ int first, counter = 0;
+ ENTRY;
+
+ cfs_spin_lock(&svc->srv_at_lock);
+ if (svc->srv_at_check == 0) {
+ cfs_spin_unlock(&svc->srv_at_lock);
+ RETURN(0);
+ }
+ delay = cfs_time_sub(cfs_time_current(), svc->srv_at_checktime);
+ svc->srv_at_check = 0;
+
+ if (array->paa_count == 0) {
+ cfs_spin_unlock(&svc->srv_at_lock);
+ RETURN(0);
+ }
+
+ /* The timer went off, but maybe the nearest rpc already completed. */
+ first = array->paa_deadline - now;
+ if (first > at_early_margin) {
+ /* We've still got plenty of time. Reset the timer. */
+ cfs_spin_unlock(&svc->srv_at_lock);
+ ptlrpc_at_set_timer(svc);
+ RETURN(0);
+ }
+
+ /* We're close to a timeout, and we don't know how much longer the
+ server will take. Send early replies to everyone expiring soon. */
+ CFS_INIT_LIST_HEAD(&work_list);
+ deadline = -1;
+ index = (unsigned long)array->paa_deadline % array->paa_size;
+ count = array->paa_count;
+ while (count > 0) {
+ count -= array->paa_reqs_count[index];
+ cfs_list_for_each_entry_safe(rq, n,
+ &array->paa_reqs_array[index],
+ rq_timed_list) {
+ if (rq->rq_deadline <= now + at_early_margin) {
+ cfs_list_del_init(&rq->rq_timed_list);
+ /**
+ * ptlrpc_server_drop_request() may drop
+ * refcount to 0 already. Let's check this and
+ * don't add entry to work_list
+ */
+ if (likely(cfs_atomic_inc_not_zero(&rq->rq_refcount)))
+ cfs_list_add(&rq->rq_timed_list, &work_list);
+ counter++;
+ array->paa_reqs_count[index]--;
+ array->paa_count--;
+ cfs_spin_lock(&rq->rq_lock);
+ rq->rq_at_linked = 0;
+ cfs_spin_unlock(&rq->rq_lock);
+ continue;
+ }
+
+ /* update the earliest deadline */
+ if (deadline == -1 || rq->rq_deadline < deadline)
+ deadline = rq->rq_deadline;
+
+ break;
+ }
+
+ if (++index >= array->paa_size)
+ index = 0;
+ }
+ array->paa_deadline = deadline;
+ cfs_spin_unlock(&svc->srv_at_lock);
+
+ /* we have a new earliest deadline, restart the timer */
+ ptlrpc_at_set_timer(svc);
+
+ CDEBUG(D_ADAPTTO, "timeout in %+ds, asking for %d secs on %d early "
+ "replies\n", first, at_extra, counter);
+ if (first < 0) {
+ /* We're already past request deadlines before we even get a
+ chance to send early replies */
+ LCONSOLE_WARN("%s: This server is not able to keep up with "
+ "request traffic (cpu-bound).\n", svc->srv_name);
+ CWARN("earlyQ=%d reqQ=%d recA=%d, svcEst=%d, "
+ "delay="CFS_DURATION_T"(jiff)\n",
+ counter, svc->srv_n_queued_reqs, svc->srv_n_active_reqs,
+ at_get(&svc->srv_at_estimate), delay);
+ }
+
+ /* we took additional refcount so entries can't be deleted from list, no
+ * locking is needed */
+ while (!cfs_list_empty(&work_list)) {
+ rq = cfs_list_entry(work_list.next, struct ptlrpc_request,
+ rq_timed_list);
+ cfs_list_del_init(&rq->rq_timed_list);
+
+ if (ptlrpc_at_send_early_reply(rq) == 0)
+ ptlrpc_at_add_timed(rq);
+
+ ptlrpc_server_drop_request(rq);
+ }
+
+ RETURN(0);
+}
+
+/**
+ * Put the request to the export list if the request may become
+ * a high priority one.
+ */
+static int ptlrpc_hpreq_init(struct ptlrpc_service *svc,
+ struct ptlrpc_request *req)
+{
+ int rc;
+ ENTRY;
+
+ if (svc->srv_hpreq_handler) {
+ rc = svc->srv_hpreq_handler(req);
+ if (rc)
+ RETURN(rc);
+ }
+ if (req->rq_export && req->rq_ops) {
+ cfs_spin_lock(&req->rq_export->exp_lock);
+ cfs_list_add(&req->rq_exp_list,
+ &req->rq_export->exp_queued_rpc);
+ cfs_spin_unlock(&req->rq_export->exp_lock);
+ }
+
+ RETURN(0);
+}
+
+/** Remove the request from the export list. */
+static void ptlrpc_hpreq_fini(struct ptlrpc_request *req)
+{
+ ENTRY;
+ if (req->rq_export && req->rq_ops) {
+ cfs_spin_lock(&req->rq_export->exp_lock);
+ cfs_list_del_init(&req->rq_exp_list);
+ cfs_spin_unlock(&req->rq_export->exp_lock);
+ }
+ EXIT;
+}
+
+/**
+ * Make the request a high priority one.
+ *
+ * All the high priority requests are queued in a separate FIFO
+ * ptlrpc_service::srv_request_hpq list which is parallel to
+ * ptlrpc_service::srv_request_queue list but has a higher priority
+ * for handling.
+ *
+ * \see ptlrpc_server_handle_request().
+ */
+static void ptlrpc_hpreq_reorder_nolock(struct ptlrpc_service *svc,
+ struct ptlrpc_request *req)
+{
+ ENTRY;
+ LASSERT(svc != NULL);
+ cfs_spin_lock(&req->rq_lock);
+ if (req->rq_hp == 0) {
+ int opc = lustre_msg_get_opc(req->rq_reqmsg);
+
+ /* Add to the high priority queue. */
+ cfs_list_move_tail(&req->rq_list, &svc->srv_request_hpq);
+ req->rq_hp = 1;
+ if (opc != OBD_PING)
+ DEBUG_REQ(D_NET, req, "high priority req");
+ }
+ cfs_spin_unlock(&req->rq_lock);
+ EXIT;
+}
+
+void ptlrpc_hpreq_reorder(struct ptlrpc_request *req)
+{
+ struct ptlrpc_service *svc = req->rq_rqbd->rqbd_service;
+ ENTRY;
+
+ cfs_spin_lock(&svc->srv_lock);
+ /* It may happen that the request is already taken for the processing
+ * but still in the export list, do not re-add it into the HP list. */
+ if (req->rq_phase == RQ_PHASE_NEW)
+ ptlrpc_hpreq_reorder_nolock(svc, req);
+ cfs_spin_unlock(&svc->srv_lock);
+ EXIT;
+}
+
+/** Check if the request is a high priority one. */
+static int ptlrpc_server_hpreq_check(struct ptlrpc_request *req)
+{
+ int opc, rc = 0;
+ ENTRY;
+
+ /* Check by request opc. */
+ opc = lustre_msg_get_opc(req->rq_reqmsg);
+ if (opc == OBD_PING)
+ RETURN(1);
+
+ /* Perform request specific check. */
+ if (req->rq_ops && req->rq_ops->hpreq_check)
+ rc = req->rq_ops->hpreq_check(req);
+ RETURN(rc);
+}
+
+/** Check if a request is a high priority one. */
+static int ptlrpc_server_request_add(struct ptlrpc_service *svc,
+ struct ptlrpc_request *req)
+{
+ int rc;
+ ENTRY;
+
+ rc = ptlrpc_server_hpreq_check(req);
+ if (rc < 0)
+ RETURN(rc);
+
+ cfs_spin_lock(&svc->srv_lock);
+ /* Before inserting the request into the queue, check if it is not
+ * inserted yet, or even already handled -- it may happen due to
+ * a racing ldlm_server_blocking_ast(). */
+ if (req->rq_phase == RQ_PHASE_NEW && cfs_list_empty(&req->rq_list)) {
+ if (rc)
+ ptlrpc_hpreq_reorder_nolock(svc, req);
+ else
+ cfs_list_add_tail(&req->rq_list,
+ &svc->srv_request_queue);
+ }
+ cfs_spin_unlock(&svc->srv_lock);
+
+ RETURN(0);
+}
+
+/* Only allow normal priority requests on a service that has a high-priority
+ * queue if forced (i.e. cleanup), if there are other high priority requests
+ * already being processed (i.e. those threads can service more high-priority
+ * requests), or if there are enough idle threads that a later thread can do
+ * a high priority request. */
+static int ptlrpc_server_allow_normal(struct ptlrpc_service *svc, int force)
+{
+ return force || !svc->srv_hpreq_handler || svc->srv_n_hpreq > 0 ||
+ svc->srv_threads_running < svc->srv_threads_started - 2;
+}
+
+static struct ptlrpc_request *
+ptlrpc_server_request_get(struct ptlrpc_service *svc)
+{
+ struct ptlrpc_request *req = NULL;
+ ENTRY;
+
+ if (!cfs_list_empty(&svc->srv_request_queue) &&
+ (cfs_list_empty(&svc->srv_request_hpq) ||
+ svc->srv_hpreq_count >= svc->srv_hpreq_ratio)) {
+ req = cfs_list_entry(svc->srv_request_queue.next,
+ struct ptlrpc_request, rq_list);
+ svc->srv_hpreq_count = 0;
+ } else if (!cfs_list_empty(&svc->srv_request_hpq)) {
+ req = cfs_list_entry(svc->srv_request_hpq.next,
+ struct ptlrpc_request, rq_list);
+ svc->srv_hpreq_count++;
+ }
+ RETURN(req);
+}
+
+static int ptlrpc_server_request_pending(struct ptlrpc_service *svc, int force)
+{
+ return ((ptlrpc_server_allow_normal(svc, force) &&
+ !cfs_list_empty(&svc->srv_request_queue)) ||
+ !cfs_list_empty(&svc->srv_request_hpq));
+}
+
+/* Handle freshly incoming reqs, add to timed early reply list,
+ pass on to regular request queue */
+static int
+ptlrpc_server_handle_req_in(struct ptlrpc_service *svc)
+{
+ struct ptlrpc_request *req;
+ __u32 deadline;
+ int rc;
+ ENTRY;
+
+ LASSERT(svc);
+
+ cfs_spin_lock(&svc->srv_lock);
+ if (cfs_list_empty(&svc->srv_req_in_queue)) {
+ cfs_spin_unlock(&svc->srv_lock);
+ RETURN(0);
+ }
+
+ req = cfs_list_entry(svc->srv_req_in_queue.next,
+ struct ptlrpc_request, rq_list);
+ cfs_list_del_init (&req->rq_list);
+ /* Consider this still a "queued" request as far as stats are
+ concerned */
+ cfs_spin_unlock(&svc->srv_lock);
+
+ /* go through security check/transform */
+ rc = sptlrpc_svc_unwrap_request(req);
+ switch (rc) {
+ case SECSVC_OK:
+ break;
+ case SECSVC_COMPLETE:
+ target_send_reply(req, 0, OBD_FAIL_MDS_ALL_REPLY_NET);
+ goto err_req;
+ case SECSVC_DROP:
+ goto err_req;
+ default:
+ LBUG();
+ }
+
+ /*
+ * for null-flavored rpc, msg has been unpacked by sptlrpc, although
+ * redo it wouldn't be harmful.
+ */
+ if (SPTLRPC_FLVR_POLICY(req->rq_flvr.sf_rpc) != SPTLRPC_POLICY_NULL) {
+ rc = ptlrpc_unpack_req_msg(req, req->rq_reqlen);
+ if (rc != 0) {
+ CERROR("error unpacking request: ptl %d from %s "
+ "x"LPU64"\n", svc->srv_req_portal,
+ libcfs_id2str(req->rq_peer), req->rq_xid);
+ goto err_req;
+ }
+ }
+
+ rc = lustre_unpack_req_ptlrpc_body(req, MSG_PTLRPC_BODY_OFF);
+ if (rc) {
+ CERROR ("error unpacking ptlrpc body: ptl %d from %s x"
+ LPU64"\n", svc->srv_req_portal,
+ libcfs_id2str(req->rq_peer), req->rq_xid);
+ goto err_req;
+ }
+
+ if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_REQ_OPC) &&
+ lustre_msg_get_opc(req->rq_reqmsg) == obd_fail_val) {
+ CERROR("drop incoming rpc opc %u, x"LPU64"\n",
+ obd_fail_val, req->rq_xid);
+ goto err_req;
+ }
+
+ rc = -EINVAL;
+ if (lustre_msg_get_type(req->rq_reqmsg) != PTL_RPC_MSG_REQUEST) {
+ CERROR("wrong packet type received (type=%u) from %s\n",
+ lustre_msg_get_type(req->rq_reqmsg),
+ libcfs_id2str(req->rq_peer));
+ goto err_req;
+ }
+
+ switch(lustre_msg_get_opc(req->rq_reqmsg)) {
+ case MDS_WRITEPAGE:
+ case OST_WRITE:
+ req->rq_bulk_write = 1;
+ break;
+ case MDS_READPAGE:
+ case OST_READ:
+ req->rq_bulk_read = 1;
+ break;
+ }
+
+ CDEBUG(D_NET, "got req "LPU64"\n", req->rq_xid);
+
+ req->rq_export = class_conn2export(
+ lustre_msg_get_handle(req->rq_reqmsg));
+ if (req->rq_export) {
+ rc = ptlrpc_check_req(req);
+ if (rc == 0) {
+ rc = sptlrpc_target_export_check(req->rq_export, req);
+ if (rc)
+ DEBUG_REQ(D_ERROR, req, "DROPPING req with "
+ "illegal security flavor,");
+ }
+
+ if (rc)
+ goto err_req;
+ ptlrpc_update_export_timer(req->rq_export, 0);
+ }
+
+ /* req_in handling should/must be fast */
+ if (cfs_time_current_sec() - req->rq_arrival_time.tv_sec > 5)
+ DEBUG_REQ(D_WARNING, req, "Slow req_in handling "CFS_DURATION_T"s",
+ cfs_time_sub(cfs_time_current_sec(),
+ req->rq_arrival_time.tv_sec));
+
+ /* Set rpc server deadline and add it to the timed list */
+ deadline = (lustre_msghdr_get_flags(req->rq_reqmsg) &
+ MSGHDR_AT_SUPPORT) ?
+ /* The max time the client expects us to take */
+ lustre_msg_get_timeout(req->rq_reqmsg) : obd_timeout;
+ req->rq_deadline = req->rq_arrival_time.tv_sec + deadline;
+ if (unlikely(deadline == 0)) {
+ DEBUG_REQ(D_ERROR, req, "Dropping request with 0 timeout");
+ goto err_req;
+ }
+
+ ptlrpc_at_add_timed(req);
+ rc = ptlrpc_hpreq_init(svc, req);
+ if (rc)
+ GOTO(err_req, rc);
+
+ /* Move it over to the request processing queue */
+ rc = ptlrpc_server_request_add(svc, req);
+ if (rc)
+ GOTO(err_req, rc);
+ cfs_waitq_signal(&svc->srv_waitq);
+ RETURN(1);
+
+err_req:
+ cfs_spin_lock(&svc->srv_lock);
+ svc->srv_n_queued_reqs--;
+ svc->srv_n_active_reqs++;
+ cfs_spin_unlock(&svc->srv_lock);
+ ptlrpc_server_finish_request(req);
+
+ RETURN(1);
+}
+
+static int
+ptlrpc_server_handle_request(struct ptlrpc_service *svc,
+ struct ptlrpc_thread *thread)