Whamcloud - gitweb
LU-12523 ptlrpc: Don't get jobid in body_v2 84/35584/10
authorPatrick Farrell <pfarrell@whamcloud.com>
Mon, 22 Jul 2019 18:16:12 +0000 (14:16 -0400)
committerOleg Drokin <green@whamcloud.com>
Fri, 9 Aug 2019 04:41:27 +0000 (04:41 +0000)
Some Lustre messages are still sent with ptlrpc_body_v2,
which does not have space for the jobid.

This results in errors like this when getting the jobid
from these messages, which we do now that the jobid is in
all RPC debug:
LustreError: 6817:0:(pack_generic.c:425:lustre_msg_buf_v2()) msg
000000005c83b7a2 buffer[0] size 152 too small (required 184, opc=-1)

While we should stop sending ptlrpc_body_v2 messages, we
we still have to support these messages from older servers.
So put a check in lustre_msg_get_jobid so it won't try to
get the jobid if it's the old, smaller RPC body.

Fixes: 9ae40e4c5ecb ("LU-12523 ptlrpc: Add jobid to rpctrace debug messages")
Signed-off-by: Patrick Farrell <pfarrell@whamcloud.com>
Change-Id: I8af3c7cd921c7448ed12fdd30d1e48b2ccc89fdc
Reviewed-on: https://review.whamcloud.com/35584
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Ann Koehler <amk@cray.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Shaun Tancheff <stancheff@cray.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/ptlrpc/client.c
lustre/ptlrpc/pack_generic.c
lustre/ptlrpc/service.c

index 7338cd6..0d01b0f 100644 (file)
@@ -1700,7 +1700,7 @@ static int ptlrpc_send_new_req(struct ptlrpc_request *req)
               imp->imp_obd->obd_uuid.uuid,
               lustre_msg_get_status(req->rq_reqmsg), req->rq_xid,
               obd_import_nid2str(imp), lustre_msg_get_opc(req->rq_reqmsg),
               imp->imp_obd->obd_uuid.uuid,
               lustre_msg_get_status(req->rq_reqmsg), req->rq_xid,
               obd_import_nid2str(imp), lustre_msg_get_opc(req->rq_reqmsg),
-              lustre_msg_get_jobid(req->rq_reqmsg));
+              lustre_msg_get_jobid(req->rq_reqmsg) ?: "");
 
        rc = ptl_send_rpc(req, 0);
        if (rc == -ENOMEM) {
 
        rc = ptl_send_rpc(req, 0);
        if (rc == -ENOMEM) {
@@ -2152,7 +2152,7 @@ interpret:
                               req->rq_xid,
                               obd_import_nid2str(imp),
                               lustre_msg_get_opc(req->rq_reqmsg),
                               req->rq_xid,
                               obd_import_nid2str(imp),
                               lustre_msg_get_opc(req->rq_reqmsg),
-                              lustre_msg_get_jobid(req->rq_reqmsg));
+                              lustre_msg_get_jobid(req->rq_reqmsg) ?: "");
 
                spin_lock(&imp->imp_lock);
                /*
 
                spin_lock(&imp->imp_lock);
                /*
index 7a05a90..9ff1ce9 100644 (file)
@@ -1253,8 +1253,14 @@ char *lustre_msg_get_jobid(struct lustre_msg *msg)
 {
        switch (msg->lm_magic) {
        case LUSTRE_MSG_MAGIC_V2: {
 {
        switch (msg->lm_magic) {
        case LUSTRE_MSG_MAGIC_V2: {
-               struct ptlrpc_body *pb =
-                       lustre_msg_buf_v2(msg, MSG_PTLRPC_BODY_OFF,
+               struct ptlrpc_body *pb;
+
+               /* the old pltrpc_body_v2 is smaller; doesn't include jobid */
+               if (msg->lm_buflens[MSG_PTLRPC_BODY_OFF] <
+                   sizeof(struct ptlrpc_body))
+                       return NULL;
+
+               pb = lustre_msg_buf_v2(msg, MSG_PTLRPC_BODY_OFF,
                                          sizeof(struct ptlrpc_body));
                if (!pb)
                        return NULL;
                                          sizeof(struct ptlrpc_body));
                if (!pb)
                        return NULL;
@@ -2663,7 +2669,8 @@ void _debug_req(struct ptlrpc_request *req,
                         DEBUG_REQ_FLAGS(req),
                         req_ok ? lustre_msg_get_flags(req->rq_reqmsg) : -1,
                         rep_flags, req->rq_status, rep_status,
                         DEBUG_REQ_FLAGS(req),
                         req_ok ? lustre_msg_get_flags(req->rq_reqmsg) : -1,
                         rep_flags, req->rq_status, rep_status,
-                        req_ok ? lustre_msg_get_jobid(req->rq_reqmsg) : "");
+                        req_ok ? lustre_msg_get_jobid(req->rq_reqmsg) ?: ""
+                               : "");
        va_end(args);
 }
 EXPORT_SYMBOL(_debug_req);
        va_end(args);
 }
 EXPORT_SYMBOL(_debug_req);
index faa8ad0..90d278d 100644 (file)
@@ -2225,7 +2225,7 @@ static int ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt,
               lustre_msg_get_status(request->rq_reqmsg), request->rq_xid,
               libcfs_id2str(request->rq_peer),
               lustre_msg_get_opc(request->rq_reqmsg),
               lustre_msg_get_status(request->rq_reqmsg), request->rq_xid,
               libcfs_id2str(request->rq_peer),
               lustre_msg_get_opc(request->rq_reqmsg),
-              lustre_msg_get_jobid(request->rq_reqmsg));
+              lustre_msg_get_jobid(request->rq_reqmsg) ?: "");
 
        if (lustre_msg_get_opc(request->rq_reqmsg) != OBD_PING)
                CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_PAUSE_REQ, cfs_fail_val);
 
        if (lustre_msg_get_opc(request->rq_reqmsg) != OBD_PING)
                CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_PAUSE_REQ, cfs_fail_val);
@@ -2267,7 +2267,7 @@ put_conn:
               request->rq_xid,
               libcfs_id2str(request->rq_peer),
               lustre_msg_get_opc(request->rq_reqmsg),
               request->rq_xid,
               libcfs_id2str(request->rq_peer),
               lustre_msg_get_opc(request->rq_reqmsg),
-              lustre_msg_get_jobid(request->rq_reqmsg),
+              lustre_msg_get_jobid(request->rq_reqmsg) ?: "",
               timediff_usecs,
               arrived_usecs,
               (request->rq_repmsg ?
               timediff_usecs,
               arrived_usecs,
               (request->rq_repmsg ?