From 566edb8c43e65ed4cb99614c7c753965bb201366 Mon Sep 17 00:00:00 2001 From: Li Xi Date: Tue, 21 Jun 2022 20:06:22 +0800 Subject: [PATCH] LU-8582 target: send error reply on wrong opcode Unknown opcode does not necessarily means insane client. A new client might send RPCs with new opcodes to an old server. The client might desperately stuck there waiting for a reply. So, send an error back when RPC has a wrong opcode. This patch returns the EOPNOTSUPP to client instead of block. ENOTSUPP is not used here since strerror() does not understand ENOTSUPP. OBD_FAIL_OST_OPCODE=0x253 is added for fault injection test of opcode. To test whether an invalid opcode is handled properly on OST, use the following command: lctl set_param fail_val=${opcode} fail_loc=0x253 Change-Id: I46ca62bc532b92368e06a4f883b102c7186c453c Signed-off-by: Li Xi Reviewed-on: https://review.whamcloud.com/47761 Tested-by: jenkins Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Bobi Jam Reviewed-by: Oleg Drokin --- lustre/include/obd_support.h | 1 + lustre/target/tgt_handler.c | 33 +++++++++++++++++++++++++++------ lustre/tests/sanity.sh | 21 +++++++++++++++++++++ 3 files changed, 49 insertions(+), 6 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 1199860..4da4e56 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -353,6 +353,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_OST_WR_ATTR_DELAY 0x250 #define OBD_FAIL_OST_RESTART_IO 0x251 #define OBD_FAIL_OST_GET_LAST_FID 0x252 +#define OBD_FAIL_OST_OPCODE 0x253 #define OBD_FAIL_LDLM 0x300 #define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301 diff --git a/lustre/target/tgt_handler.c b/lustre/target/tgt_handler.c index 998b1ee..83a8ddb 100644 --- a/lustre/target/tgt_handler.c +++ b/lustre/target/tgt_handler.c @@ -613,6 +613,8 @@ static struct tgt_handler *tgt_handler_find_check(struct ptlrpc_request *req) struct tgt_opc_slice *s; struct lu_target *tgt; __u32 opc = lustre_msg_get_opc(req->rq_reqmsg); + /* don't spew error messages for unhandled RPCs */ + static bool printed; ENTRY; @@ -629,25 +631,44 @@ static struct tgt_handler *tgt_handler_find_check(struct ptlrpc_request *req) /* opcode was not found in slice */ if (unlikely(s->tos_hs == NULL)) { - static bool printed; - - /* don't spew error messages for unhandled RPCs */ if (!printed) { CERROR("%s: no handler for opcode 0x%x from %s\n", tgt_name(tgt), opc, libcfs_id2str(req->rq_peer)); printed = true; } - RETURN(ERR_PTR(-ENOTSUPP)); + goto err_unsupported; } LASSERT(opc >= s->tos_opc_start && opc < s->tos_opc_end); h = s->tos_hs + (opc - s->tos_opc_start); if (unlikely(h->th_opc == 0)) { - CERROR("%s: unsupported opcode 0x%x\n", tgt_name(tgt), opc); - RETURN(ERR_PTR(-ENOTSUPP)); + if (!printed) { + CERROR("%s: unsupported opcode 0x%x\n", + tgt_name(tgt), opc); + printed = true; + } + goto err_unsupported; } + if (OBD_FAIL_CHECK(OBD_FAIL_OST_OPCODE) && opc == cfs_fail_val) + goto err_unsupported; + RETURN(h); +err_unsupported: + /* + * Unknown opcode does not necessarily means insane client. A new + * client might send RPCs with new opcodes to an old server. The + * client might desperately stuck there waiting for a reply. So, + * send an error back here. + * + * An old client might also send RPCs with deprecated opcodes (e.g. + * OST_QUOTACHECK). + * + * Error in ptlrpc_send_error() is ignored. + */ + req->rq_status = -EOPNOTSUPP; + ptlrpc_send_error(req, PTLRPC_REPLY_MAYBE_DIFFICULT); + RETURN(ERR_PTR(-EOPNOTSUPP)); } static int process_req_last_xid(struct ptlrpc_request *req) diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index b906786..a63333d 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -28578,6 +28578,27 @@ test_904() { } run_test 904 "virtual project ID xattr" +# LU-8582 +test_905() { + (( $OST1_VERSION >= $(version_code 2.8.54) )) || + skip "lustre < 2.8.54 does not support ladvise" + + remote_ost_nodsh && skip "remote OST with nodsh" + $LFS setstripe -c -1 -i 0 $DIR/$tfile || error "setstripe failed" + + $LFS ladvise -a willread $DIR/$tfile || error "ladvise does not work" + + #define OBD_FAIL_OST_OPCODE 0x253 + # OST_LADVISE = 21 + do_facet ost1 "$LCTL set_param fail_val=21 fail_loc=0x0253" + $LFS ladvise -a willread $DIR/$tfile && + error "unexpected success of ladvise with fault injection" + $LFS ladvise -a willread $DIR/$tfile |& + grep -q "Operation not supported" + (( $? == 0 )) || error "unexpected stderr of ladvise with fault injection" +} +run_test 905 "bad or new opcode should not stuck client" + complete $SECONDS [ -f $EXT2_DEV ] && rm $EXT2_DEV || true check_and_cleanup_lustre -- 1.8.3.1