From: Bruno Faccini Date: Fri, 18 Jul 2014 11:34:33 +0000 (+0200) Subject: LU-5299 obdclass: avoid race during Server device start X-Git-Tag: 2.6.52~46 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=166c5ba95cb2a4771317e030a3649e4480c8cbad;p=fs%2Flustre-release.git LU-5299 obdclass: avoid race during Server device start Handle concurrent starts for same device (multiple mounts, ...). But allows for separate nosvc and nomgs case. Also add a specific test of concurent MDT/OST start with an artificial delay to verify. Signed-off-by: Bruno Faccini Change-Id: I442819a5b865ed3e98477f9d2602efc4d09d7860 Reviewed-on: http://review.whamcloud.com/11139 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Bobi Jam Reviewed-by: Mike Pershin Reviewed-by: Oleg Drokin --- diff --git a/lustre/obdclass/obd_mount_server.c b/lustre/obdclass/obd_mount_server.c index fba448d..f0b169e 100644 --- a/lustre/obdclass/obd_mount_server.c +++ b/lustre/obdclass/obd_mount_server.c @@ -1668,6 +1668,7 @@ static int osd_start(struct lustre_sb_info *lsi, unsigned long mflags) struct dt_device_param p; char flagstr[16]; int rc; + bool already_started = 0; ENTRY; CDEBUG(D_MOUNT, @@ -1690,15 +1691,27 @@ static int osd_start(struct lustre_sb_info *lsi, unsigned long mflags) GOTO(out, rc); obd = class_name2obd(lsi->lsi_osd_obdname); LASSERT(obd); + } else { + CDEBUG(D_MOUNT, "%s already started\n", lsi->lsi_osd_obdname); + already_started = 1; + /* but continue setup to allow special case of MDT and internal + * MGT being started separately, that will be identified in + * caller server_fill_super(). + */ } rc = obd_connect(NULL, &lsi->lsi_osd_exp, obd, &obd->obd_uuid, NULL, NULL); + + OBD_FAIL_TIMEOUT(OBD_FAIL_TGT_DELAY_CONNECT, 10); + if (rc) { - obd->obd_force = 1; - class_manual_cleanup(obd); - lsi->lsi_dt_dev = NULL; - RETURN(rc); + if (!already_started) { + obd->obd_force = 1; + class_manual_cleanup(obd); + lsi->lsi_dt_dev = NULL; + } + GOTO(out, rc); } LASSERT(obd->obd_lu_dev); @@ -1712,7 +1725,7 @@ static int osd_start(struct lustre_sb_info *lsi, unsigned long mflags) dt_conf_get(NULL, lsi->lsi_dt_dev, &p); out: - RETURN(rc); + RETURN(already_started ? -EALREADY : rc); } /** Fill in the superblock info for a Lustre server. @@ -1732,7 +1745,9 @@ int server_fill_super(struct super_block *sb) /* Start low level OSD */ rc = osd_start(lsi, sb->s_flags); - if (rc) { + /* Handle separate nosvc and nomgs case */ + if (rc && ((rc != -EALREADY) || !(lsi->lsi_lmd->lmd_flags & + (LMD_FLG_NOSVC|LMD_FLG_NOMGS)))) { CERROR("Unable to start osd on %s: %d\n", lsi->lsi_lmd->lmd_dev, rc); lustre_put_lsi(sb); diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 95cc423..c4d2136 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -2564,6 +2564,94 @@ test_41b() { } run_test 41b "mount mds with --nosvc and --nomgs on first mount" +test_41c() { + cleanup + # MDT concurent start + #define OBD_FAIL_TGT_DELAY_CONNECT 0x703 + do_facet $SINGLEMDS "lctl set_param fail_loc=0x703" + start mds1 $(mdsdevname 1) $MDS_MOUNT_OPTS & + local pid=$! + sleep 2 + do_facet $SINGLEMDS "lctl set_param fail_loc=0x0" + start mds1 $(mdsdevname 1) $MDS_MOUNT_OPTS & + local pid2=$! + wait $pid2 + local rc2=$? + wait $pid + local rc=$? + if [ $rc == 0 ] && [ $rc2 == 114 ]; then + echo "1st MDT start succeed" + echo "2nd MDT start failed with EALREADY" + elif [ $rc2 == 0 ] && [ $rc == 114 ]; then + echo "1st MDT start failed with EALREADY" + echo "2nd MDT start succeed" + else + stop mds1 -f + error "unexpected concurent MDT mounts result, rc=$rc rc2=$rc2" + fi + + # OST concurent start + #define OBD_FAIL_TGT_DELAY_CONNECT 0x703 + do_facet ost1 "lctl set_param fail_loc=0x703" + start ost1 $(ostdevname 1) $OST_MOUNT_OPTS & + pid=$! + sleep 2 + do_facet ost1 "lctl set_param fail_loc=0x0" + start ost1 $(ostdevname 1) $OST_MOUNT_OPTS & + pid2=$! + wait $pid2 + rc2=$? + wait $pid + rc=$? + if [ $rc == 0 ] && [ $rc2 == 114 ]; then + echo "1st OST start succeed" + echo "2nd OST start failed with EALREADY" + elif [ $rc2 == 0 ] && [ $rc == 114 ]; then + echo "1st OST start failed with EALREADY" + echo "2nd OST start succeed" + else + stop mds1 -f + stop ost1 -f + error "unexpected concurent OST mounts result, rc=$rc rc2=$rc2" + fi + # cleanup + stop mds1 -f + stop ost1 -f + + # verify everything ok + start_mds + if [ $? != 0 ] + then + stop mds1 -f + error "MDT(s) start failed" + fi + + start_ost + if [ $? != 0 ] + then + stop mds1 -f + stop ost1 -f + error "OST(s) start failed" + fi + + mount_client $MOUNT + if [ $? != 0 ] + then + stop mds1 -f + stop ost1 -f + error "client start failed" + fi + check_mount + if [ $? != 0 ] + then + stop mds1 -f + stop ost1 -f + error "client mount failed" + fi + cleanup +} +run_test 41c "concurent mounts of MDT/OST should all fail but one" + test_42() { #bug 14693 setup check_mount || error "client was not mounted"