Whamcloud - gitweb
LU-8582 target: send error reply on wrong opcode 61/47761/6
authorLi Xi <lixi@ddn.com>
Tue, 21 Jun 2022 12:06:22 +0000 (20:06 +0800)
committerOleg Drokin <green@whamcloud.com>
Mon, 18 Jul 2022 05:36:06 +0000 (05:36 +0000)
Unknown opcode does not necessarily means insane client. A new client
might send RPCs with new opcodes to an old server. The client might
desperately stuck there waiting for a reply. So, send an error back
when RPC has a wrong opcode.

This patch returns the EOPNOTSUPP to client instead of block. ENOTSUPP
is not used here since strerror() does not understand ENOTSUPP.

OBD_FAIL_OST_OPCODE=0x253 is added for fault injection test of opcode.
To test whether an invalid opcode is handled properly on OST, use the
following command:

lctl set_param fail_val=${opcode} fail_loc=0x253

Change-Id: I46ca62bc532b92368e06a4f883b102c7186c453c
Signed-off-by: Li Xi <lixi@ddn.com>
Reviewed-on: https://review.whamcloud.com/47761
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Bobi Jam <bobijam@hotmail.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/obd_support.h
lustre/target/tgt_handler.c
lustre/tests/sanity.sh

index 1199860..4da4e56 100644 (file)
@@ -353,6 +353,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_OST_WR_ATTR_DELAY      0x250
 #define OBD_FAIL_OST_RESTART_IO                 0x251
 #define OBD_FAIL_OST_GET_LAST_FID       0x252
 #define OBD_FAIL_OST_WR_ATTR_DELAY      0x250
 #define OBD_FAIL_OST_RESTART_IO                 0x251
 #define OBD_FAIL_OST_GET_LAST_FID       0x252
+#define OBD_FAIL_OST_OPCODE             0x253
 
 #define OBD_FAIL_LDLM                    0x300
 #define OBD_FAIL_LDLM_NAMESPACE_NEW      0x301
 
 #define OBD_FAIL_LDLM                    0x300
 #define OBD_FAIL_LDLM_NAMESPACE_NEW      0x301
index 998b1ee..83a8ddb 100644 (file)
@@ -613,6 +613,8 @@ static struct tgt_handler *tgt_handler_find_check(struct ptlrpc_request *req)
        struct tgt_opc_slice    *s;
        struct lu_target        *tgt;
        __u32                    opc = lustre_msg_get_opc(req->rq_reqmsg);
        struct tgt_opc_slice    *s;
        struct lu_target        *tgt;
        __u32                    opc = lustre_msg_get_opc(req->rq_reqmsg);
+       /* don't spew error messages for unhandled RPCs */
+       static bool              printed;
 
        ENTRY;
 
 
        ENTRY;
 
@@ -629,25 +631,44 @@ static struct tgt_handler *tgt_handler_find_check(struct ptlrpc_request *req)
 
        /* opcode was not found in slice */
        if (unlikely(s->tos_hs == NULL)) {
 
        /* opcode was not found in slice */
        if (unlikely(s->tos_hs == NULL)) {
-               static bool printed;
-
-               /* don't spew error messages for unhandled RPCs */
                if (!printed) {
                        CERROR("%s: no handler for opcode 0x%x from %s\n",
                               tgt_name(tgt), opc, libcfs_id2str(req->rq_peer));
                        printed = true;
                }
                if (!printed) {
                        CERROR("%s: no handler for opcode 0x%x from %s\n",
                               tgt_name(tgt), opc, libcfs_id2str(req->rq_peer));
                        printed = true;
                }
-               RETURN(ERR_PTR(-ENOTSUPP));
+               goto err_unsupported;
        }
 
        LASSERT(opc >= s->tos_opc_start && opc < s->tos_opc_end);
        h = s->tos_hs + (opc - s->tos_opc_start);
        if (unlikely(h->th_opc == 0)) {
        }
 
        LASSERT(opc >= s->tos_opc_start && opc < s->tos_opc_end);
        h = s->tos_hs + (opc - s->tos_opc_start);
        if (unlikely(h->th_opc == 0)) {
-               CERROR("%s: unsupported opcode 0x%x\n", tgt_name(tgt), opc);
-               RETURN(ERR_PTR(-ENOTSUPP));
+               if (!printed) {
+                       CERROR("%s: unsupported opcode 0x%x\n",
+                              tgt_name(tgt), opc);
+                       printed = true;
+               }
+               goto err_unsupported;
        }
 
        }
 
+       if (OBD_FAIL_CHECK(OBD_FAIL_OST_OPCODE) && opc == cfs_fail_val)
+               goto err_unsupported;
+
        RETURN(h);
        RETURN(h);
+err_unsupported:
+       /*
+        * Unknown opcode does not necessarily means insane client. A new
+        * client might send RPCs with new opcodes to an old server. The
+        * client might desperately stuck there waiting for a reply. So,
+        * send an error back here.
+        *
+        * An old client might also send RPCs with deprecated opcodes (e.g.
+        * OST_QUOTACHECK).
+        *
+        * Error in ptlrpc_send_error() is ignored.
+        */
+       req->rq_status = -EOPNOTSUPP;
+       ptlrpc_send_error(req, PTLRPC_REPLY_MAYBE_DIFFICULT);
+       RETURN(ERR_PTR(-EOPNOTSUPP));
 }
 
 static int process_req_last_xid(struct ptlrpc_request *req)
 }
 
 static int process_req_last_xid(struct ptlrpc_request *req)
index b906786..a63333d 100755 (executable)
@@ -28578,6 +28578,27 @@ test_904() {
 }
 run_test 904 "virtual project ID xattr"
 
 }
 run_test 904 "virtual project ID xattr"
 
+# LU-8582
+test_905() {
+       (( $OST1_VERSION >= $(version_code 2.8.54) )) ||
+               skip "lustre < 2.8.54 does not support ladvise"
+
+       remote_ost_nodsh && skip "remote OST with nodsh"
+       $LFS setstripe -c -1 -i 0 $DIR/$tfile || error "setstripe failed"
+
+       $LFS ladvise -a willread $DIR/$tfile || error "ladvise does not work"
+
+       #define OBD_FAIL_OST_OPCODE 0x253
+       # OST_LADVISE = 21
+       do_facet ost1 "$LCTL set_param fail_val=21 fail_loc=0x0253"
+       $LFS ladvise -a willread $DIR/$tfile &&
+               error "unexpected success of ladvise with fault injection"
+       $LFS ladvise -a willread $DIR/$tfile |&
+               grep -q "Operation not supported"
+       (( $? == 0 )) || error "unexpected stderr of ladvise with fault injection"
+}
+run_test 905 "bad or new opcode should not stuck client"
+
 complete $SECONDS
 [ -f $EXT2_DEV ] && rm $EXT2_DEV || true
 check_and_cleanup_lustre
 complete $SECONDS
 [ -f $EXT2_DEV ] && rm $EXT2_DEV || true
 check_and_cleanup_lustre