From 1b5abf625462a2b66820b2d07e25619afba504c6 Mon Sep 17 00:00:00 2001 From: Sergey Cheremencev Date: Tue, 26 Mar 2019 01:06:00 +0300 Subject: [PATCH] LU-12397 osp: always set opd_new_connection MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Flag opd_got_disconnected could be set back to 0 due to a race osp_precreate_thread vs osp_import_event. Next ACTIVE event doesn't set opd_new_connection as opd_got_disconnected also 0(i.e. import hasn't disconnected). Such race is causing osp_precreate_thread to infinitly sleep in wait despite сonnection state is FULL. The patch always sets opd_new_connection flag on ACTIVE event regardless value of opd_got_disconnected. Patch is adding conf-sanity_101b test to race DISCON and ACTIVE events. Without a fix the test causes to hung osp_precreate_thread and as a result osp_precreate_reserve threads. Change-Id: Iff41a2743f108679d5f70aca8e1c2108e979ac09 Cray-bug-id: LUS-7178 Signed-off-by: Sergey Cheremencev Reviewed-on: https://es-gerrit.dev.cray.com/154883 Reviewed-by: Andriy Skulysh Reviewed-by: Alexander Boyko Tested-by: Elena Gryaznova Reviewed-on: https://review.whamcloud.com/35078 Reviewed-by: Andreas Dilger Tested-by: jenkins Reviewed-by: Alexander Boyko Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/include/obd_support.h | 1 + lustre/osp/osp_dev.c | 3 +-- lustre/osp/osp_precreate.c | 2 ++ lustre/tests/conf-sanity.sh | 26 ++++++++++++++++++++++++-- 4 files changed, 28 insertions(+), 4 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index bca90b0..2548973 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -694,6 +694,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_OSP_RPCS_SEM 0x2104 #define OBD_FAIL_OSP_CANT_PROCESS_LLOG 0x2105 #define OBD_FAIL_OSP_INVALID_LOGID 0x2106 +#define OBD_FAIL_OSP_CON_EVENT_DELAY 0x2107 /* barrier */ #define OBD_FAIL_MGS_BARRIER_READ_NET 0x2200 diff --git a/lustre/osp/osp_dev.c b/lustre/osp/osp_dev.c index 04ad620..c5b5401 100644 --- a/lustre/osp/osp_dev.c +++ b/lustre/osp/osp_dev.c @@ -1635,8 +1635,7 @@ static int osp_import_event(struct obd_device *obd, struct obd_import *imp, case IMP_EVENT_ACTIVE: d->opd_imp_active = 1; - if (d->opd_got_disconnected) - d->opd_new_connection = 1; + d->opd_new_connection = 1; d->opd_imp_connected = 1; d->opd_imp_seen_connected = 1; d->opd_obd->obd_inactive = 0; diff --git a/lustre/osp/osp_precreate.c b/lustre/osp/osp_precreate.c index 6e3e9e9..bbbf7c4 100644 --- a/lustre/osp/osp_precreate.c +++ b/lustre/osp/osp_precreate.c @@ -1224,6 +1224,8 @@ static int osp_precreate_thread(void *_args) if (!d->opd_new_connection) continue; + OBD_FAIL_TIMEOUT(OBD_FAIL_OSP_CON_EVENT_DELAY, + cfs_fail_val); d->opd_new_connection = 0; d->opd_got_disconnected = 0; break; diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 438a08f..2de8aa6 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -7437,7 +7437,7 @@ test_100() { } run_test 100 "check lshowmount lists MGS, MDT, OST and 0@lo" -test_101() { +test_101a() { local createmany_pid local dev=$FSNAME-OST0000-osc-MDT0000 setup @@ -7464,7 +7464,29 @@ test_101() { unlinkmany $DIR1/$tdir/$tfile-%d 50000 cleanup } -run_test 101 "Race MDT->OST reconnection with create" +run_test 101a "Race MDT->OST reconnection with create" + +test_101b () { + local dev=$FSNAME-OST0000-osc-MDT0000 + local dir=$DIR1/$tdir + setup + + mkdir $dir + $LFS setstripe -c 1 -i 0 $dir + do_facet $SINGLEMDS "$LCTL --device $dev deactivate;" +#define OBD_FAIL_OSP_CON_EVENT_DELAY 0x2107 + do_facet mds1 "$LCTL set_param fail_loc=0x80002107 fail_val=20" + do_facet $SINGLEMDS "$LCTL --device $dev activate;" + stop_ost + sleep 25 + start_ost + + wait_osc_import_state client ost1 FULL + touch $dir/$tfile || error "Can't create file" + + cleanup +} +run_test 101b "Race events DISCONNECT and ACTIVE in osp" test_102() { [[ "$MDS1_VERSION" -gt $(version_code 2.9.53) ]] || -- 1.8.3.1