Whamcloud - gitweb
LU-12397 osp: always set opd_new_connection 78/35078/6
authorSergey Cheremencev <c17829@cray.com>
Mon, 25 Mar 2019 22:06:00 +0000 (01:06 +0300)
committerOleg Drokin <green@whamcloud.com>
Fri, 25 Sep 2020 03:12:43 +0000 (03:12 +0000)
Flag opd_got_disconnected could be set back to 0
due to a race osp_precreate_thread vs osp_import_event.
Next ACTIVE event doesn't set opd_new_connection as
opd_got_disconnected also 0(i.e. import hasn't disconnected).
Such race is causing osp_precreate_thread to infinitly sleep
in wait despite сonnection state is FULL.

The patch always sets opd_new_connection flag on ACTIVE event
regardless value of opd_got_disconnected.

Patch is adding conf-sanity_101b test to race DISCON and ACTIVE
events. Without a fix the test causes to hung osp_precreate_thread
and as a result osp_precreate_reserve threads.

Change-Id: Iff41a2743f108679d5f70aca8e1c2108e979ac09
Cray-bug-id: LUS-7178
Signed-off-by: Sergey Cheremencev <c17829@cray.com>
Reviewed-on: https://es-gerrit.dev.cray.com/154883
Reviewed-by: Andriy Skulysh <c17819@cray.com>
Reviewed-by: Alexander Boyko <c17825@cray.com>
Tested-by: Elena Gryaznova <c17455@cray.com>
Reviewed-on: https://review.whamcloud.com/35078
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Alexander Boyko <alexander.boyko@hpe.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/obd_support.h
lustre/osp/osp_dev.c
lustre/osp/osp_precreate.c
lustre/tests/conf-sanity.sh

index bca90b0..2548973 100644 (file)
@@ -694,6 +694,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_OSP_RPCS_SEM                  0x2104
 #define OBD_FAIL_OSP_CANT_PROCESS_LLOG         0x2105
 #define OBD_FAIL_OSP_INVALID_LOGID             0x2106
+#define OBD_FAIL_OSP_CON_EVENT_DELAY           0x2107
 
 /* barrier */
 #define OBD_FAIL_MGS_BARRIER_READ_NET          0x2200
index 04ad620..c5b5401 100644 (file)
@@ -1635,8 +1635,7 @@ static int osp_import_event(struct obd_device *obd, struct obd_import *imp,
        case IMP_EVENT_ACTIVE:
                d->opd_imp_active = 1;
 
-               if (d->opd_got_disconnected)
-                       d->opd_new_connection = 1;
+               d->opd_new_connection = 1;
                d->opd_imp_connected = 1;
                d->opd_imp_seen_connected = 1;
                d->opd_obd->obd_inactive = 0;
index 6e3e9e9..bbbf7c4 100644 (file)
@@ -1224,6 +1224,8 @@ static int osp_precreate_thread(void *_args)
                        if (!d->opd_new_connection)
                                continue;
 
+                       OBD_FAIL_TIMEOUT(OBD_FAIL_OSP_CON_EVENT_DELAY,
+                                        cfs_fail_val);
                        d->opd_new_connection = 0;
                        d->opd_got_disconnected = 0;
                        break;
index 438a08f..2de8aa6 100644 (file)
@@ -7437,7 +7437,7 @@ test_100() {
 }
 run_test 100 "check lshowmount lists MGS, MDT, OST and 0@lo"
 
-test_101() {
+test_101a() {
        local createmany_pid
        local dev=$FSNAME-OST0000-osc-MDT0000
        setup
@@ -7464,7 +7464,29 @@ test_101() {
        unlinkmany $DIR1/$tdir/$tfile-%d 50000
        cleanup
 }
-run_test 101 "Race MDT->OST reconnection with create"
+run_test 101a "Race MDT->OST reconnection with create"
+
+test_101b () {
+       local dev=$FSNAME-OST0000-osc-MDT0000
+       local dir=$DIR1/$tdir
+       setup
+
+       mkdir $dir
+       $LFS setstripe -c 1 -i 0 $dir
+       do_facet $SINGLEMDS "$LCTL --device $dev deactivate;"
+#define OBD_FAIL_OSP_CON_EVENT_DELAY 0x2107
+       do_facet mds1 "$LCTL set_param fail_loc=0x80002107 fail_val=20"
+       do_facet $SINGLEMDS "$LCTL --device $dev activate;"
+       stop_ost
+       sleep 25
+       start_ost
+
+       wait_osc_import_state client ost1 FULL
+       touch $dir/$tfile || error "Can't create file"
+
+       cleanup
+}
+run_test 101b "Race events DISCONNECT and ACTIVE in osp"
 
 test_102() {
        [[ "$MDS1_VERSION" -gt $(version_code 2.9.53) ]] ||