From 54721a3902c853dea73bde97a2c93a9618a67018 Mon Sep 17 00:00:00 2001 From: fanyong Date: Thu, 4 Jun 2009 16:27:17 +0000 Subject: [PATCH] Branch HEAD b=19264 i=jay i=tianzy 1) replace "OBD_NOTIFY_ACTIVE" with real event for mds_notify() when does onu_upcall(). 2) drop repeated onu_upcall() in mds_notify(). 3) cleanup master quota recovery (trigger it when MDS recovery finishe or connection to OST complete) 4) sanity check for connection between MDS and OSS when client mount. --- lustre/mds/mds_internal.h | 3 --- lustre/mds/mds_lov.c | 40 ++++++++++++++++++++-------------------- lustre/quota/quota_ctl.c | 2 +- lustre/quota/quota_master.c | 2 +- lustre/tests/test-framework.sh | 20 ++++++++++++++++++++ lustre/utils/lfs.c | 16 +++++++++++----- 6 files changed, 53 insertions(+), 30 deletions(-) diff --git a/lustre/mds/mds_internal.h b/lustre/mds/mds_internal.h index 333597b..4c3ec94 100644 --- a/lustre/mds/mds_internal.h +++ b/lustre/mds/mds_internal.h @@ -58,9 +58,6 @@ int mds_lov_clear_orphans(struct mds_obd *mds, struct obd_uuid *ost_uuid); void mds_lov_update_objids(struct obd_device *obd, struct lov_mds_md *lmm); int mds_lov_set_nextid(struct obd_device *obd); -int mds_lov_start_synchronize(struct obd_device *obd, - struct obd_device *watched, - void *data, int nonblock); int mds_post_mds_lovconf(struct obd_device *obd); int mds_notify(struct obd_device *obd, struct obd_device *watched, enum obd_notify_event ev, void *data); diff --git a/lustre/mds/mds_lov.c b/lustre/mds/mds_lov.c index 596d52e..a423c0b 100644 --- a/lustre/mds/mds_lov.c +++ b/lustre/mds/mds_lov.c @@ -505,7 +505,7 @@ static int mds_lov_set_one_nextid(struct obd_device *obd, __u32 idx, obd_id *id) /* Update the lov desc for a new size lov. */ static int mds_lov_update_desc(struct obd_device *obd, int idx, - struct obd_uuid *uuid) + struct obd_uuid *uuid, enum obd_notify_event ev) { struct mds_obd *mds = &obd->u.mds; struct lov_desc *ld; @@ -544,7 +544,7 @@ static int mds_lov_update_desc(struct obd_device *obd, int idx, /*XXX this notifies the MDD until lov handling use old mds code */ if (obd->obd_upcall.onu_owner) { LASSERT(obd->obd_upcall.onu_upcall != NULL); - rc = obd->obd_upcall.onu_upcall(obd, NULL, OBD_NOTIFY_ACTIVE, + rc = obd->obd_upcall.onu_upcall(obd, NULL, ev, obd->obd_upcall.onu_owner); } out: @@ -555,7 +555,7 @@ out: /* Inform MDS about new/updated target */ static int mds_lov_update_mds(struct obd_device *obd, struct obd_device *watched, - __u32 idx) + __u32 idx, enum obd_notify_event ev) { struct mds_obd *mds = &obd->u.mds; int rc = 0; @@ -566,7 +566,7 @@ static int mds_lov_update_mds(struct obd_device *obd, ENTRY; /* Don't let anyone else mess with mds_lov_objids now */ - rc = mds_lov_update_desc(obd, idx, &watched->u.cli.cl_target_uuid); + rc = mds_lov_update_desc(obd, idx, &watched->u.cli.cl_target_uuid, ev); if (rc) GOTO(out, rc); @@ -711,9 +711,10 @@ int mds_lov_disconnect(struct obd_device *obd) } struct mds_lov_sync_info { - struct obd_device *mlsi_obd; /* the lov device to sync */ - struct obd_device *mlsi_watched; /* target osc */ - __u32 mlsi_index; /* index of target */ + struct obd_device *mlsi_obd; /* the lov device to sync */ + struct obd_device *mlsi_watched; /* target osc */ + __u32 mlsi_index; /* index of target */ + enum obd_notify_event mlsi_ev; /* event type */ }; static int mds_propagate_capa_keys(struct mds_obd *mds, struct obd_uuid *uuid) @@ -757,6 +758,7 @@ static int __mds_lov_synchronize(void *data) struct mds_obd *mds = &obd->u.mds; struct obd_uuid *uuid; __u32 idx = mlsi->mlsi_index; + enum obd_notify_event ev = mlsi->mlsi_ev; struct mds_group_info mgi; struct llog_ctxt *ctxt; int rc = 0; @@ -774,7 +776,7 @@ static int __mds_lov_synchronize(void *data) GOTO(out, rc = -ENODEV); OBD_RACE(OBD_FAIL_MDS_LOV_SYNC_RACE); - rc = mds_lov_update_mds(obd, watched, idx); + rc = mds_lov_update_mds(obd, watched, idx, ev); if (rc != 0) { CERROR("%s failed at update_mds: %d\n", obd_uuid2str(uuid), rc); GOTO(out, rc); @@ -814,14 +816,14 @@ static int __mds_lov_synchronize(void *data) } #ifdef HAVE_QUOTA_SUPPORT - if (obd->obd_upcall.onu_owner) { + if (obd->obd_upcall.onu_owner) { /* * This is a hack for mds_notify->mdd_notify. When the mds obd * in mdd is removed, This hack should be removed. */ - LASSERT(obd->obd_upcall.onu_upcall != NULL); - rc = obd->obd_upcall.onu_upcall(obd, NULL, OBD_NOTIFY_QUOTA, - obd->obd_upcall.onu_owner); + LASSERT(obd->obd_upcall.onu_upcall != NULL); + rc = obd->obd_upcall.onu_upcall(obd, NULL, OBD_NOTIFY_QUOTA, + obd->obd_upcall.onu_owner); } #endif EXIT; @@ -854,7 +856,7 @@ int mds_lov_synchronize(void *data) int mds_lov_start_synchronize(struct obd_device *obd, struct obd_device *watched, - void *data, int nonblock) + void *data, enum obd_notify_event ev) { struct mds_lov_sync_info *mlsi; int rc; @@ -872,6 +874,7 @@ int mds_lov_start_synchronize(struct obd_device *obd, mlsi->mlsi_obd = obd; mlsi->mlsi_watched = watched; mlsi->mlsi_index = *(__u32 *)data; + mlsi->mlsi_ev = ev; /* Although class_export_get(obd->obd_self_export) would lock the MDS in place, since it's only a self-export @@ -883,7 +886,7 @@ int mds_lov_start_synchronize(struct obd_device *obd, finish for as long as the sync is blocking. */ class_incref(obd, "mds_lov_synchronize", obd); - if (nonblock) { + if (ev != OBD_NOTIFY_SYNC) { /* Synchronize in the background */ rc = cfs_kernel_thread(mds_lov_synchronize, mlsi, CLONE_VM | CLONE_FILES); @@ -939,12 +942,9 @@ int mds_notify(struct obd_device *obd, struct obd_device *watched, after the mdt in the config log. They didn't make it into mds_lov_connect. */ rc = mds_lov_update_desc(obd, *(__u32 *)data, - &watched->u.cli.cl_target_uuid); - RETURN(rc); + &watched->u.cli.cl_target_uuid, ev); + } else { + rc = mds_lov_start_synchronize(obd, watched, data, ev); } - - rc = mds_lov_start_synchronize(obd, watched, data, - !(ev == OBD_NOTIFY_SYNC)); - RETURN(rc); } diff --git a/lustre/quota/quota_ctl.c b/lustre/quota/quota_ctl.c index a1961cf..6af3712 100644 --- a/lustre/quota/quota_ctl.c +++ b/lustre/quota/quota_ctl.c @@ -361,8 +361,8 @@ int lov_quota_ctl(struct obd_device *unused, struct obd_export *exp, tgt = lov->lov_tgts[i]; if (!tgt || !tgt->ltd_active || tgt->ltd_reap) { if (oqctl->qc_cmd == Q_GETOQUOTA) { + rc = -EREMOTEIO; CERROR("ost %d is inactive\n", i); - rc = -EIO; } else { CDEBUG(D_HA, "ost %d is inactive\n", i); } diff --git a/lustre/quota/quota_master.c b/lustre/quota/quota_master.c index beb55df..f884d9d 100644 --- a/lustre/quota/quota_master.c +++ b/lustre/quota/quota_master.c @@ -1407,7 +1407,7 @@ static int mds_get_space(struct obd_device *obd, struct obd_quotactl *oqctl) /* get block usage from OSS */ soqc->qc_dqblk.dqb_curspace = 0; rc = obd_quotactl(obd->u.mds.mds_osc_exp, soqc); - if (!rc) { + if (!rc || rc == -EREMOTEIO) { oqctl->qc_dqblk.dqb_curspace = soqc->qc_dqblk.dqb_curspace; oqctl->qc_dqblk.dqb_valid |= QIF_SPACE; } diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 3e3f074..06d70cc 100644 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -1669,6 +1669,24 @@ init_facets_vars () { done } +mds_sanity_check () { + local timeout=$1 + local period=0 + + while [ $period -lt $timeout ]; do + count=$(do_facet $SINGLEMDS "lctl dl | grep 'osc.*mdtlov_UUID' | grep ' IN ' 2>/dev/null | wc -l") + if [ $count -eq 0 ]; then + break + fi + + echo "There are $count OST are inactive, wait $period seconds, and try again" + sleep 3 + period=$((period+3)) + done + + [ $period -lt $timeout ] || log "$count OST are inactive after $timeout seconds, give up" +} + init_param_vars () { if ! remote_ost_nodsh && ! remote_mds_nodsh; then export MDSVER=$(do_facet $SINGLEMDS "lctl get_param version" | cut -d. -f1,2) @@ -1681,6 +1699,8 @@ init_param_vars () { log "Using TIMEOUT=$TIMEOUT" + mds_sanity_check $TIMEOUT + if [ "$ENABLE_QUOTA" ]; then setup_quota $MOUNT || return 2 fi diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index bd4d94f..c3c8ba6 100644 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -1949,7 +1949,7 @@ static void print_quota_title(char *name, struct if_quotactl *qctl) "files", "quota", "limit", "grace"); } -static void print_quota(char *mnt, struct if_quotactl *qctl, int type) +static void print_quota(char *mnt, struct if_quotactl *qctl, int type, int rc) { time_t now; @@ -1997,8 +1997,14 @@ static void print_quota(char *mnt, struct if_quotactl *qctl, int type) if (bover) diff2str(dqb->dqb_btime, timebuf, now); - sprintf(numbuf[0], (dqb->dqb_valid & QIF_SPACE) ? - LPU64 : "["LPU64"]", toqb(dqb->dqb_curspace)); + if (rc == -1 && errno == EREMOTEIO) + sprintf(numbuf[0], LPU64"*", + toqb(dqb->dqb_curspace)); + else + sprintf(numbuf[0], + (dqb->dqb_valid & QIF_SPACE) ? + LPU64 : "["LPU64"]", + toqb(dqb->dqb_curspace)); if (type == QC_GENERAL) sprintf(numbuf[1], (dqb->dqb_valid & QIF_BLIMITS) ? LPU64 : "["LPU64"]", @@ -2071,7 +2077,7 @@ static int print_obd_quota(char *mnt, struct if_quotactl *qctl, int is_mdt) continue; } - print_quota(obd_uuid2str(&qctl->obd_uuid), qctl, qctl->qc_valid); + print_quota(obd_uuid2str(&qctl->obd_uuid), qctl, qctl->qc_valid, 0); } out: @@ -2189,7 +2195,7 @@ ug_output: if (qctl.qc_valid != QC_GENERAL) mnt = ""; - print_quota(mnt, &qctl, QC_GENERAL); + print_quota(mnt, &qctl, QC_GENERAL, rc1); if (qctl.qc_valid == QC_GENERAL && qctl.qc_cmd != LUSTRE_Q_GETINFO && verbose) { rc2 = print_obd_quota(mnt, &qctl, 1); -- 1.8.3.1