From 5c8b1e87a97bbe7b05f0b8325e98c16a0de1ff4c Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Thu, 7 Nov 2019 10:18:15 +0800 Subject: [PATCH] LU-12946 kernel: fix to handle BLK_MQ_RQ_QUEUE_DEV_BUSY event It looks like what's happening is when dm_dispatch_clone_request dispatches the "clone" I/O request to the underlying (real) device from the multipath device, the scsi driver can (often under load) return BLK_MQ_RQ_QUEUE_DEV_BUSY. dm_dispatch_clone_request doesn't have that as an exception the way it does BLK_MQ_RQ_QUEUE_BUSY and so it calls dm_complete_request which propagates the BLK_MQ_RQ_QUEUE_DEV_BUSY error code up the stack resulting in multipath_end_io calling fail_path and failing the path because there is an error value set. Signed-off-by: Wang Shilong Change-Id: If17ea5b3ab33a89a17d49e5dfb2e9f9f19371564 Reviewed-on: https://review.whamcloud.com/36699 Tested-by: jenkins Reviewed-by: Yang Sheng Reviewed-by: Andreas Dilger Reviewed-by: Li Dongyang Tested-by: Maloo Reviewed-by: Oleg Drokin --- ...x-handle-BLK_MQ_RQ_QUEUE_DEV_BUSY-rhel7.6.patch | 33 ++++++++++++++++++++++ lustre/kernel_patches/series/3.10-rhel7.6.series | 1 + lustre/kernel_patches/series/3.10-rhel7.7.series | 1 + 3 files changed, 35 insertions(+) create mode 100644 lustre/kernel_patches/patches/dm-fix-handle-BLK_MQ_RQ_QUEUE_DEV_BUSY-rhel7.6.patch diff --git a/lustre/kernel_patches/patches/dm-fix-handle-BLK_MQ_RQ_QUEUE_DEV_BUSY-rhel7.6.patch b/lustre/kernel_patches/patches/dm-fix-handle-BLK_MQ_RQ_QUEUE_DEV_BUSY-rhel7.6.patch new file mode 100644 index 0000000..b6b4e61 --- /dev/null +++ b/lustre/kernel_patches/patches/dm-fix-handle-BLK_MQ_RQ_QUEUE_DEV_BUSY-rhel7.6.patch @@ -0,0 +1,33 @@ +It looks like what's happening is when dm_dispatch_clone_request +dispatches the "clone" I/O request to the underlying (real) device +from the multipath device, the scsi driver can (often under load) +return BLK_MQ_RQ_QUEUE_DEV_BUSY. dm_dispatch_clone_request doesn't +have that as an exception the way it does BLK_MQ_RQ_QUEUE_BUSY and +so it calls dm_complete_request which propagates +the BLK_MQ_RQ_QUEUE_DEV_BUSY error code up the stack resulting +in multipath_end_io calling fail_path and failing the path because +there is an error value set. + +diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c +index 02da1e65..e4f58472 100644 +--- a/drivers/md/dm-rq.c ++++ b/drivers/md/dm-rq.c +@@ -477,7 +477,8 @@ static int dm_dispatch_clone_request(struct request *clone, struct request *rq) + + clone->start_time = jiffies; + r = blk_insert_cloned_request(clone->q, clone); +- if (r != BLK_MQ_RQ_QUEUE_OK && r != BLK_MQ_RQ_QUEUE_BUSY) ++ if (r != BLK_MQ_RQ_QUEUE_OK && r != BLK_MQ_RQ_QUEUE_BUSY && ++ r != BLK_MQ_RQ_QUEUE_DEV_BUSY) + /* must complete clone in terms of original request */ + dm_complete_request(rq, r); + return r; +@@ -661,7 +662,7 @@ check_again: + trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)), + blk_rq_pos(rq)); + ret = dm_dispatch_clone_request(clone, rq); +- if (ret == BLK_MQ_RQ_QUEUE_BUSY) { ++ if (ret == BLK_MQ_RQ_QUEUE_BUSY || ret == BLK_MQ_RQ_QUEUE_DEV_BUSY) { + blk_rq_unprep_clone(clone); + tio->ti->type->release_clone_rq(clone); + tio->clone = NULL; diff --git a/lustre/kernel_patches/series/3.10-rhel7.6.series b/lustre/kernel_patches/series/3.10-rhel7.6.series index a368aad..92b320b 100644 --- a/lustre/kernel_patches/series/3.10-rhel7.6.series +++ b/lustre/kernel_patches/series/3.10-rhel7.6.series @@ -6,3 +6,4 @@ fix-integrity-verify-rhel7.patch fix-sd-dif-complete-rhel7.patch block-integrity-allow-optional-integrity-functions-rhel7.patch block-pass-bio-into-integrity_processing_fn-rhel7.patch +dm-fix-handle-BLK_MQ_RQ_QUEUE_DEV_BUSY-rhel7.6.patch diff --git a/lustre/kernel_patches/series/3.10-rhel7.7.series b/lustre/kernel_patches/series/3.10-rhel7.7.series index a368aad..92b320b 100644 --- a/lustre/kernel_patches/series/3.10-rhel7.7.series +++ b/lustre/kernel_patches/series/3.10-rhel7.7.series @@ -6,3 +6,4 @@ fix-integrity-verify-rhel7.patch fix-sd-dif-complete-rhel7.patch block-integrity-allow-optional-integrity-functions-rhel7.patch block-pass-bio-into-integrity_processing_fn-rhel7.patch +dm-fix-handle-BLK_MQ_RQ_QUEUE_DEV_BUSY-rhel7.6.patch -- 1.8.3.1