Whamcloud - gitweb
LU-8582 target: send error reply on wrong opcode
authorLi Xi <lixi@ddn.com>
Tue, 21 Jun 2022 12:06:22 +0000 (20:06 +0800)
committerAndreas Dilger <adilger@whamcloud.com>
Sat, 1 Jul 2023 10:03:08 +0000 (10:03 +0000)
Unknown opcode does not necessarily means insane client. A new client
might send RPCs with new opcodes to an old server. The client might
desperately stuck there waiting for a reply. So, send an error back
when RPC has a wrong opcode.

This patch returns the EOPNOTSUPP to client instead of block. ENOTSUPP
is not used here since strerror() does not understand ENOTSUPP.

OBD_FAIL_OST_OPCODE=0x253 is added for fault injection test of opcode.
To test whether an invalid opcode is handled properly on OST, use the
following command:

    lctl set_param fail_val=${opcode} fail_loc=0x253

Lustre-change: https://review.whamcloud.com/47761
Lustre-commit: 03c1ddf19c83891683e1726f240a2449941e8b22

Change-Id: I46ca62bc532b92368e06a4f883b102c7186c453c
Signed-off-by: Li Xi <lixi@ddn.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Bobi Jam <bobijam@hotmail.com>
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/51513
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/include/obd_support.h
lustre/target/tgt_handler.c
lustre/tests/sanity.sh

index d22629b..c3bc3f7 100644 (file)
@@ -357,6 +357,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_OST_SEEK_NET           0x24a
 #define OBD_FAIL_OST_WR_ATTR_DELAY      0x250
 #define OBD_FAIL_OST_RESTART_IO                 0x251
+#define OBD_FAIL_OST_OPCODE             0x253
 
 #define OBD_FAIL_LDLM                    0x300
 #define OBD_FAIL_LDLM_NAMESPACE_NEW      0x301
index 2e48e45..4853415 100644 (file)
@@ -610,6 +610,8 @@ static struct tgt_handler *tgt_handler_find_check(struct ptlrpc_request *req)
        struct tgt_opc_slice    *s;
        struct lu_target        *tgt;
        __u32                    opc = lustre_msg_get_opc(req->rq_reqmsg);
+       /* don't spew error messages for unhandled RPCs */
+       static bool              printed;
 
        ENTRY;
 
@@ -626,25 +628,44 @@ static struct tgt_handler *tgt_handler_find_check(struct ptlrpc_request *req)
 
        /* opcode was not found in slice */
        if (unlikely(s->tos_hs == NULL)) {
-               static bool printed;
-
-               /* don't spew error messages for unhandled RPCs */
                if (!printed) {
                        CERROR("%s: no handler for opcode 0x%x from %s\n",
                               tgt_name(tgt), opc, libcfs_id2str(req->rq_peer));
                        printed = true;
                }
-               RETURN(ERR_PTR(-ENOTSUPP));
+               goto err_unsupported;
        }
 
        LASSERT(opc >= s->tos_opc_start && opc < s->tos_opc_end);
        h = s->tos_hs + (opc - s->tos_opc_start);
        if (unlikely(h->th_opc == 0)) {
-               CERROR("%s: unsupported opcode 0x%x\n", tgt_name(tgt), opc);
-               RETURN(ERR_PTR(-ENOTSUPP));
+               if (!printed) {
+                       CERROR("%s: unsupported opcode 0x%x\n",
+                              tgt_name(tgt), opc);
+                       printed = true;
+               }
+               goto err_unsupported;
        }
 
+       if (OBD_FAIL_CHECK(OBD_FAIL_OST_OPCODE) && opc == cfs_fail_val)
+               goto err_unsupported;
+
        RETURN(h);
+err_unsupported:
+       /*
+        * Unknown opcode does not necessarily means insane client. A new
+        * client might send RPCs with new opcodes to an old server. The
+        * client might desperately stuck there waiting for a reply. So,
+        * send an error back here.
+        *
+        * An old client might also send RPCs with deprecated opcodes (e.g.
+        * OST_QUOTACHECK).
+        *
+        * Error in ptlrpc_send_error() is ignored.
+        */
+       req->rq_status = -EOPNOTSUPP;
+       ptlrpc_send_error(req, PTLRPC_REPLY_MAYBE_DIFFICULT);
+       RETURN(ERR_PTR(-EOPNOTSUPP));
 }
 
 static int process_req_last_xid(struct ptlrpc_request *req)
index c0fdbc7..7044b9f 100755 (executable)
@@ -29168,6 +29168,27 @@ test_904() {
 }
 run_test 904 "virtual project ID xattr"
 
+# LU-8582
+test_905() {
+       (( $OST1_VERSION >= $(version_code 2.8.54) )) ||
+               skip "lustre < 2.8.54 does not support ladvise"
+
+       remote_ost_nodsh && skip "remote OST with nodsh"
+       $LFS setstripe -c -1 -i 0 $DIR/$tfile || error "setstripe failed"
+
+       $LFS ladvise -a willread $DIR/$tfile || error "ladvise does not work"
+
+       #define OBD_FAIL_OST_OPCODE 0x253
+       # OST_LADVISE = 21
+       do_facet ost1 "$LCTL set_param fail_val=21 fail_loc=0x0253"
+       $LFS ladvise -a willread $DIR/$tfile &&
+               error "unexpected success of ladvise with fault injection"
+       $LFS ladvise -a willread $DIR/$tfile |&
+               grep -q "Operation not supported"
+       (( $? == 0 )) || error "unexpected stderr of ladvise with fault injection"
+}
+run_test 905 "bad or new opcode should not stuck client"
+
 test_906() {
        grep -q io_uring_setup /proc/kallsyms ||
                skip "Client OS does not support io_uring I/O engine"