From: Patrick Farrell Date: Mon, 22 Jul 2019 18:16:12 +0000 (-0400) Subject: LU-12523 ptlrpc: Don't get jobid in body_v2 X-Git-Tag: 2.12.57~43 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=544701a782fba39f669e5d764be36755073a1a9a LU-12523 ptlrpc: Don't get jobid in body_v2 Some Lustre messages are still sent with ptlrpc_body_v2, which does not have space for the jobid. This results in errors like this when getting the jobid from these messages, which we do now that the jobid is in all RPC debug: LustreError: 6817:0:(pack_generic.c:425:lustre_msg_buf_v2()) msg 000000005c83b7a2 buffer[0] size 152 too small (required 184, opc=-1) While we should stop sending ptlrpc_body_v2 messages, we we still have to support these messages from older servers. So put a check in lustre_msg_get_jobid so it won't try to get the jobid if it's the old, smaller RPC body. Fixes: 9ae40e4c5ecb ("LU-12523 ptlrpc: Add jobid to rpctrace debug messages") Signed-off-by: Patrick Farrell Change-Id: I8af3c7cd921c7448ed12fdd30d1e48b2ccc89fdc Reviewed-on: https://review.whamcloud.com/35584 Tested-by: jenkins Reviewed-by: Ann Koehler Reviewed-by: Andreas Dilger Reviewed-by: Shaun Tancheff Tested-by: Maloo --- diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 7338cd6..0d01b0f 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -1700,7 +1700,7 @@ static int ptlrpc_send_new_req(struct ptlrpc_request *req) imp->imp_obd->obd_uuid.uuid, lustre_msg_get_status(req->rq_reqmsg), req->rq_xid, obd_import_nid2str(imp), lustre_msg_get_opc(req->rq_reqmsg), - lustre_msg_get_jobid(req->rq_reqmsg)); + lustre_msg_get_jobid(req->rq_reqmsg) ?: ""); rc = ptl_send_rpc(req, 0); if (rc == -ENOMEM) { @@ -2152,7 +2152,7 @@ interpret: req->rq_xid, obd_import_nid2str(imp), lustre_msg_get_opc(req->rq_reqmsg), - lustre_msg_get_jobid(req->rq_reqmsg)); + lustre_msg_get_jobid(req->rq_reqmsg) ?: ""); spin_lock(&imp->imp_lock); /* diff --git a/lustre/ptlrpc/pack_generic.c b/lustre/ptlrpc/pack_generic.c index 7a05a90..9ff1ce9 100644 --- a/lustre/ptlrpc/pack_generic.c +++ b/lustre/ptlrpc/pack_generic.c @@ -1253,8 +1253,14 @@ char *lustre_msg_get_jobid(struct lustre_msg *msg) { switch (msg->lm_magic) { case LUSTRE_MSG_MAGIC_V2: { - struct ptlrpc_body *pb = - lustre_msg_buf_v2(msg, MSG_PTLRPC_BODY_OFF, + struct ptlrpc_body *pb; + + /* the old pltrpc_body_v2 is smaller; doesn't include jobid */ + if (msg->lm_buflens[MSG_PTLRPC_BODY_OFF] < + sizeof(struct ptlrpc_body)) + return NULL; + + pb = lustre_msg_buf_v2(msg, MSG_PTLRPC_BODY_OFF, sizeof(struct ptlrpc_body)); if (!pb) return NULL; @@ -2663,7 +2669,8 @@ void _debug_req(struct ptlrpc_request *req, DEBUG_REQ_FLAGS(req), req_ok ? lustre_msg_get_flags(req->rq_reqmsg) : -1, rep_flags, req->rq_status, rep_status, - req_ok ? lustre_msg_get_jobid(req->rq_reqmsg) : ""); + req_ok ? lustre_msg_get_jobid(req->rq_reqmsg) ?: "" + : ""); va_end(args); } EXPORT_SYMBOL(_debug_req); diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index faa8ad0..90d278d 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -2225,7 +2225,7 @@ static int ptlrpc_server_handle_request(struct ptlrpc_service_part *svcpt, lustre_msg_get_status(request->rq_reqmsg), request->rq_xid, libcfs_id2str(request->rq_peer), lustre_msg_get_opc(request->rq_reqmsg), - lustre_msg_get_jobid(request->rq_reqmsg)); + lustre_msg_get_jobid(request->rq_reqmsg) ?: ""); if (lustre_msg_get_opc(request->rq_reqmsg) != OBD_PING) CFS_FAIL_TIMEOUT_MS(OBD_FAIL_PTLRPC_PAUSE_REQ, cfs_fail_val); @@ -2267,7 +2267,7 @@ put_conn: request->rq_xid, libcfs_id2str(request->rq_peer), lustre_msg_get_opc(request->rq_reqmsg), - lustre_msg_get_jobid(request->rq_reqmsg), + lustre_msg_get_jobid(request->rq_reqmsg) ?: "", timediff_usecs, arrived_usecs, (request->rq_repmsg ?