LU-10391 lnet: switch to large lnet_processid for matching

[fs/lustre-release.git] / lustre / ptlrpc / niobuf.c
diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c

index 9fde3ac..bd8f0d4 100644 (file)
--- a/lustre/ptlrpc/niobuf.c
+++ b/lustre/ptlrpc/niobuf.c
@@ -27,7 +27,6 @@
   */
  /*
   * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
   */
  
  #define DEBUG_SUBSYSTEM S_RPC
@@ -104,12 +103,15 @@ static int ptl_send_buf(struct lnet_handle_md *mdh, void *base, int len,
         RETURN (0);
  }
  
-static void mdunlink_iterate_helper(struct lnet_handle_md *bd_mds, int count)
+#define mdunlink_iterate_helper(mds, count) \
+               __mdunlink_iterate_helper(mds, count, false) 
+static void __mdunlink_iterate_helper(struct lnet_handle_md *bd_mds,
+                                     int count, bool discard)
  {
         int i;
  
         for (i = 0; i < count; i++)
-               LNetMDUnlink(bd_mds[i]);
+               __LNetMDUnlink(bd_mds[i], discard);
  }
  
  #ifdef HAVE_SERVER_SUPPORT
@@ -285,7 +287,7 @@ void ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *desc)
          * but we must still wait_event_idle_timeout() in this case, to give
          * us a chance to run server_bulk_callback()
          */
-       mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw);
+       __mdunlink_iterate_helper(desc->bd_mds, desc->bd_md_max_brw, true);
  
         for (;;) {
                 /* Network access will complete in finite time but the HUGE
@@ -312,7 +314,7 @@ void ptlrpc_abort_bulk(struct ptlrpc_bulk_desc *desc)
  int ptlrpc_register_bulk(struct ptlrpc_request *req)
  {
         struct ptlrpc_bulk_desc *desc = req->rq_bulk;
-       struct lnet_process_id peer;
+       struct lnet_processid peer;
         int rc = 0;
         int posted_md;
         int total_md;
@@ -342,7 +344,9 @@ int ptlrpc_register_bulk(struct ptlrpc_request *req)
  
         desc->bd_failure = 0;
  
-       peer = desc->bd_import->imp_connection->c_peer;
+       peer.pid = desc->bd_import->imp_connection->c_peer.pid;
+       lnet_nid4_to_nid(desc->bd_import->imp_connection->c_peer.nid,
+                     &peer.nid);
  
         LASSERT(desc->bd_cbid.cbid_fn == client_bulk_callback);
         LASSERT(desc->bd_cbid.cbid_arg == desc);
@@ -378,7 +382,7 @@ int ptlrpc_register_bulk(struct ptlrpc_request *req)
                     OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_ATTACH)) {
                         rc = -ENOMEM;
                 } else {
-                       me = LNetMEAttach(desc->bd_portal, peer, mbits, 0,
+                       me = LNetMEAttach(desc->bd_portal, &peer, mbits, 0,
                                   LNET_UNLINK, LNET_INS_AFTER);
                         rc = PTR_ERR_OR_ZERO(me);
                 }
@@ -397,7 +401,6 @@ int ptlrpc_register_bulk(struct ptlrpc_request *req)
                         CERROR("%s: LNetMDAttach failed x%llu/%d: rc = %d\n",
                                desc->bd_import->imp_obd->obd_name, mbits,
                                posted_md, rc);
-                       LNetMEUnlink(me);
                         break;
                 }
         }
@@ -418,7 +421,7 @@ int ptlrpc_register_bulk(struct ptlrpc_request *req)
         /* Holler if peer manages to touch buffers before he knows the mbits */
         if (desc->bd_refs != total_md)
                 CWARN("%s: Peer %s touched %d buffers while I registered\n",
-                     desc->bd_import->imp_obd->obd_name, libcfs_id2str(peer),
+                     desc->bd_import->imp_obd->obd_name, libcfs_idstr(&peer),
                       total_md - desc->bd_refs);
         spin_unlock(&desc->bd_lock);
  
@@ -643,7 +646,8 @@ int ptlrpc_send_reply(struct ptlrpc_request *req, int flags)
                           LNET_ACK_REQ : LNET_NOACK_REQ,
                           &rs->rs_cb_id, req->rq_self, req->rq_source,
                           ptlrpc_req2svc(req)->srv_rep_portal,
-                         req->rq_xid, req->rq_reply_off, NULL);
+                         req->rq_rep_mbits ? req->rq_rep_mbits : req->rq_xid,
+                         req->rq_reply_off, NULL);
  out:
          if (unlikely(rc != 0))
                  ptlrpc_req_drop_rs(req);
@@ -700,8 +704,11 @@ int ptlrpc_error(struct ptlrpc_request *req)
  int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
  {
         int rc;
+       __u32 opc;
         int mpflag = 0;
+       bool rep_mbits = false;
         struct lnet_handle_md bulk_cookie;
+       struct lnet_processid peer;
         struct ptlrpc_connection *connection;
         struct lnet_me *reply_me = NULL;
         struct lnet_md reply_md;
@@ -711,28 +718,34 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
  
         LNetInvalidateMDHandle(&bulk_cookie);
  
-        if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_RPC))
-                RETURN(0);
+       if (OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DROP_RPC))
+               RETURN(0);
  
-        LASSERT(request->rq_type == PTL_RPC_MSG_REQUEST);
-        LASSERT(request->rq_wait_ctx == 0);
+       if (unlikely(OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_DELAY_RECOV) &&
+                    lustre_msg_get_opc(request->rq_reqmsg) == MDS_CONNECT &&
+                    strcmp(obd->obd_type->typ_name, LUSTRE_OSP_NAME) == 0)) {
+               RETURN(0);
+       }
  
-        /* If this is a re-transmit, we're required to have disengaged
-         * cleanly from the previous attempt */
-        LASSERT(!request->rq_receiving_reply);
+       LASSERT(request->rq_type == PTL_RPC_MSG_REQUEST);
+       LASSERT(request->rq_wait_ctx == 0);
+
+       /* If this is a re-transmit, we're required to have disengaged
+        * cleanly from the previous attempt */
+       LASSERT(!request->rq_receiving_reply);
         LASSERT(!((lustre_msg_get_flags(request->rq_reqmsg) & MSG_REPLAY) &&
-               (imp->imp_state == LUSTRE_IMP_FULL)));
+                 (imp->imp_state == LUSTRE_IMP_FULL)));
  
         if (unlikely(obd != NULL && obd->obd_fail)) {
                 CDEBUG(D_HA, "muting rpc for failed imp obd %s\n",
-                       obd->obd_name);
+                      obd->obd_name);
                 /* this prevents us from waiting in ptlrpc_queue_wait */
                 spin_lock(&request->rq_lock);
                 request->rq_err = 1;
                 spin_unlock(&request->rq_lock);
-                request->rq_status = -ENODEV;
-                RETURN(-ENODEV);
-        }
+               request->rq_status = -ENODEV;
+               RETURN(-ENODEV);
+       }
  
         connection = imp->imp_connection;
  
@@ -762,8 +775,14 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
                           "Allocating new XID for resend on EINPROGRESS");
         }
  
-       if (request->rq_bulk != NULL) {
-               ptlrpc_set_bulk_mbits(request);
+       opc = lustre_msg_get_opc(request->rq_reqmsg);
+       if (opc != OST_CONNECT && opc != MDS_CONNECT &&
+           opc != MGS_CONNECT && OCD_HAS_FLAG(&imp->imp_connect_data, FLAGS2))
+               rep_mbits = imp->imp_connect_data.ocd_connect_flags2 &
+                       OBD_CONNECT2_REP_MBITS;
+
+       if ((request->rq_bulk != NULL) || rep_mbits) {
+               ptlrpc_set_mbits(request);
                 lustre_msg_set_mbits(request->rq_reqmsg, request->rq_mbits);
         }
  
@@ -781,7 +800,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
         LASSERT(AT_OFF || imp->imp_state != LUSTRE_IMP_FULL ||
                 (imp->imp_msghdr_flags & MSGHDR_AT_SUPPORT) ||
                 !(imp->imp_connect_data.ocd_connect_flags &
-               OBD_CONNECT_AT));
+                 OBD_CONNECT_AT));
  
         if (request->rq_resend) {
                 lustre_msg_add_flags(request->rq_reqmsg, MSG_RESENT);
@@ -808,16 +827,16 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
                 bulk_cookie = request->rq_bulk->bd_mds[0];
         }
  
-        if (!noreply) {
-                LASSERT (request->rq_replen != 0);
-                if (request->rq_repbuf == NULL) {
-                        LASSERT(request->rq_repdata == NULL);
-                        LASSERT(request->rq_repmsg == NULL);
-                        rc = sptlrpc_cli_alloc_repbuf(request,
-                                                      request->rq_replen);
-                        if (rc) {
-                                /* this prevents us from looping in
-                                 * ptlrpc_queue_wait */
+       if (!noreply) {
+               LASSERT (request->rq_replen != 0);
+               if (request->rq_repbuf == NULL) {
+                       LASSERT(request->rq_repdata == NULL);
+                       LASSERT(request->rq_repmsg == NULL);
+                       rc = sptlrpc_cli_alloc_repbuf(request,
+                                                     request->rq_replen);
+                       if (rc) {
+                               /* this prevents us from looping in
+                                * ptlrpc_queue_wait */
                                 spin_lock(&request->rq_lock);
                                 request->rq_err = 1;
                                 spin_unlock(&request->rq_lock);
@@ -829,14 +848,17 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
                         request->rq_repmsg = NULL;
                 }
  
+               peer.pid = connection->c_peer.pid;
+               lnet_nid4_to_nid(connection->c_peer.nid, &peer.nid);
                 if (request->rq_bulk &&
                     OBD_FAIL_CHECK(OBD_FAIL_PTLRPC_BULK_REPLY_ATTACH)) {
                         reply_me = ERR_PTR(-ENOMEM);
                 } else {
                         reply_me = LNetMEAttach(request->rq_reply_portal,
-                                               connection->c_peer,
-                                               request->rq_xid, 0,
-                                               LNET_UNLINK, LNET_INS_AFTER);
+                                               &peer,
+                                               rep_mbits ? request->rq_mbits :
+                                               request->rq_xid,
+                                               0, LNET_UNLINK, LNET_INS_AFTER);
                 }
  
                 if (IS_ERR(reply_me)) {
@@ -853,24 +875,24 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
         request->rq_receiving_reply = !noreply;
         /* Clear any flags that may be present from previous sends. */
         request->rq_req_unlinked = 0;
-        request->rq_replied = 0;
-        request->rq_err = 0;
-        request->rq_timedout = 0;
-        request->rq_net_err = 0;
-        request->rq_resend = 0;
-        request->rq_restart = 0;
+       request->rq_replied = 0;
+       request->rq_err = 0;
+       request->rq_timedout = 0;
+       request->rq_net_err = 0;
+       request->rq_resend = 0;
+       request->rq_restart = 0;
         request->rq_reply_truncated = 0;
         spin_unlock(&request->rq_lock);
  
-        if (!noreply) {
-                reply_md.start     = request->rq_repbuf;
-                reply_md.length    = request->rq_repbuf_len;
-                /* Allow multiple early replies */
-                reply_md.threshold = LNET_MD_THRESH_INF;
-                /* Manage remote for early replies */
-                reply_md.options   = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT |
-                        LNET_MD_MANAGE_REMOTE |
-                        LNET_MD_TRUNCATE; /* allow to make EOVERFLOW error */;
+       if (!noreply) {
+               reply_md.start     = request->rq_repbuf;
+               reply_md.length    = request->rq_repbuf_len;
+               /* Allow multiple early replies */
+               reply_md.threshold = LNET_MD_THRESH_INF;
+               /* Manage remote for early replies */
+               reply_md.options   = PTLRPC_MD_OPTIONS | LNET_MD_OP_PUT |
+                       LNET_MD_MANAGE_REMOTE |
+                       LNET_MD_TRUNCATE; /* allow to make EOVERFLOW error */;
                 reply_md.user_ptr  = &request->rq_reply_cbid;
                 reply_md.handler = ptlrpc_handler;
  
@@ -885,7 +907,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
                         /* ...but the MD attach didn't succeed... */
                         request->rq_receiving_reply = 0;
                         spin_unlock(&request->rq_lock);
-                       GOTO(cleanup_me, rc = -ENOMEM);
+                       GOTO(cleanup_bulk, rc = -ENOMEM);
                 }
                 percpu_ref_get(&ptlrpc_pending);
  
@@ -895,22 +917,21 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
                        request->rq_reply_portal);
         }
  
-        /* add references on request for request_out_callback */
-        ptlrpc_request_addref(request);
+       /* add references on request for request_out_callback */
+       ptlrpc_request_addref(request);
         if (obd != NULL && obd->obd_svc_stats != NULL)
                 lprocfs_counter_add(obd->obd_svc_stats, PTLRPC_REQACTIVE_CNTR,
-                       atomic_read(&imp->imp_inflight));
+                                   atomic_read(&imp->imp_inflight));
  
         OBD_FAIL_TIMEOUT(OBD_FAIL_PTLRPC_DELAY_SEND, request->rq_timeout + 5);
  
         request->rq_sent_ns = ktime_get_real();
         request->rq_sent = ktime_get_real_seconds();
         /* We give the server rq_timeout secs to process the req, and
-          add the network latency for our local timeout. */
-        request->rq_deadline = request->rq_sent + request->rq_timeout +
-                ptlrpc_at_get_net_latency(request);
-
-       ptlrpc_pinger_sending_on_import(imp);
+        * add the network latency for our local timeout.
+        */
+       request->rq_deadline = request->rq_sent + request->rq_timeout +
+               ptlrpc_at_get_net_latency(request);
  
         DEBUG_REQ(D_INFO, request, "send flags=%x",
                   lustre_msg_get_flags(request->rq_reqmsg));
@@ -924,16 +945,12 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
                 GOTO(out, rc);
  
         request->rq_req_unlinked = 1;
-        ptlrpc_req_finished(request);
-        if (noreply)
-                GOTO(out, rc);
-
- cleanup_me:
-       /* MEUnlink is safe; the PUT didn't even get off the ground, and
-        * nobody apart from the PUT's target has the right nid+XID to
-        * access the reply buffer.
-        */
-       LNetMEUnlink(reply_me);
+       ptlrpc_req_finished(request);
+       if (noreply)
+               GOTO(out, rc);
+
+       LNetMDUnlink(request->rq_reply_md_h);
+
         /* UNLINKED callback called synchronously */
         LASSERT(!request->rq_receiving_reply);
  
@@ -961,8 +978,8 @@ EXPORT_SYMBOL(ptl_send_rpc);
  int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd)
  {
         struct ptlrpc_service *service = rqbd->rqbd_svcpt->scp_service;
-       static struct lnet_process_id match_id = {
-               .nid = LNET_NID_ANY,
+       static struct lnet_processid match_id = {
+               .nid = LNET_ANY_NID,
                 .pid = LNET_PID_ANY
         };
         int rc;
@@ -979,7 +996,7 @@ int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd)
          * which means buffer can only be attached on local CPT, and LND
          * threads can find it by grabbing a local lock */
         me = LNetMEAttach(service->srv_req_portal,
-                         match_id, 0, ~0, LNET_UNLINK,
+                         &match_id, 0, ~0, LNET_UNLINK,
                           rqbd->rqbd_svcpt->scp_cpt >= 0 ?
                           LNET_INS_LOCAL : LNET_INS_AFTER);
         if (IS_ERR(me)) {
@@ -1006,7 +1023,6 @@ int ptlrpc_register_rqbd(struct ptlrpc_request_buffer_desc *rqbd)
  
         CERROR("ptlrpc: LNetMDAttach failed: rc = %d\n", rc);
         LASSERT(rc == -ENOMEM);
-       LNetMEUnlink(me);
         LASSERT(rc == 0);
         rqbd->rqbd_refcount = 0;