From 737f2ea683ba8dc2989c463d4a208f8ac7715008 Mon Sep 17 00:00:00 2001 From: nathan Date: Tue, 21 Mar 2006 20:34:04 +0000 Subject: [PATCH] Branch b1_4_mountconf b=4482 Fix some recovery problems with late-added OSTs. A "late-added" OST is one which is added to the FS after the MDT is added. Since the config logs are not re-written, these OSTs get added to the LOV after the MDT starts up, every time the MDT starts up. We have to fix the mdsize, and re-setup the llogs whenever we add a late OST. This fix in particular: during replay on an MDT, we still have to set the right mdsize, but don't re-setup the llogs until recovery finishes. (replay-single test 13) Also check recovery w/o replay: create file, failover MDT, create another file. --- lustre/mds/handler.c | 2 +- lustre/mds/mds_lov.c | 32 ++++++++++++++++++-------------- lustre/mds/mds_open.c | 22 ++++++++++++---------- lustre/mgc/mgc_request.c | 5 +++-- lustre/mgs/mgs_handler.c | 2 +- lustre/mgs/mgs_llog.c | 2 +- lustre/obdclass/llog_lvfs.c | 8 +++++--- lustre/obdclass/obd_mount.c | 8 +++----- 8 files changed, 44 insertions(+), 37 deletions(-) diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 59f59dd..4e95a45 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -1911,9 +1911,9 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) /* Don't wait for mds_postrecov trying to clear orphans */ obd->obd_async_recov = 1; rc = mds_postsetup(obd); + obd->obd_async_recov = 0; if (rc) GOTO(err_qctxt, rc); - obd->obd_async_recov = 0; lprocfs_init_vars(mds, &lvars); lprocfs_obd_setup(obd, lvars.obd_vars); diff --git a/lustre/mds/mds_lov.c b/lustre/mds/mds_lov.c index 42a0259..94d3076 100644 --- a/lustre/mds/mds_lov.c +++ b/lustre/mds/mds_lov.c @@ -231,7 +231,7 @@ static int mds_lov_update_desc(struct obd_device *obd, struct obd_export *lov) mds->mds_max_mdsize = lov_mds_md_size(stripes); mds->mds_max_cookiesize = stripes * sizeof(struct llog_cookie); - CDEBUG(D_HA, "updated max_mdsize/max_cookiesize: %d/%d\n", + CDEBUG(D_HA|D_WARNING, "updated max_mdsize/max_cookiesize: %d/%d\n", mds->mds_max_mdsize, mds->mds_max_cookiesize); out: @@ -257,9 +257,12 @@ static int mds_lov_update_mds(struct obd_device *obd, if (rc) RETURN(rc); - /* idx is set as data from lov_notify. In the recovery case, this - is not set. */ - if (idx != MDSLOV_NO_INDEX) { + CDEBUG(D_ERROR, "idx=%d, recov=%d/%d, cnt=%d/%d\n", + idx, obd->obd_recovering, obd->obd_async_recov, old_count, + mds->mds_lov_desc.ld_tgt_count); + + /* idx is set as data from lov_notify. */ + if (idx != MDSLOV_NO_INDEX && !obd->obd_recovering) { if (idx >= mds->mds_lov_desc.ld_tgt_count) { CERROR("index %d > count %d!\n", idx, mds->mds_lov_desc.ld_tgt_count); @@ -289,9 +292,9 @@ static int mds_lov_update_mds(struct obd_device *obd, } /* If we added a target we have to reconnect the llogs */ - if (idx != MDSLOV_NO_INDEX || - mds->mds_lov_desc.ld_tgt_count > old_count) { - CDEBUG(D_CONFIG, "reset llogs idx=%d\n", idx); + /* Only do this at first add (idx), or the first time after recovery */ + if (idx != MDSLOV_NO_INDEX || 1/*FIXME*/) { + CDEBUG(D_CONFIG|D_WARNING, "reset llogs idx=%d\n", idx); /* These two must be atomic */ down(&mds->mds_orphan_recovery_sem); obd_llog_finish(obd, old_count); @@ -678,10 +681,9 @@ static int __mds_lov_synchronize(void *data) GOTO(out, rc); } - EXIT; out: class_decref(obd); - return rc; + RETURN(rc); } int mds_lov_synchronize(void *data) @@ -769,7 +771,7 @@ int mds_notify(struct obd_device *obd, struct obd_device *watched, RETURN(0); } - CDEBUG(D_WARNING, "notify %s ev=%d\n", watched->obd_name, ev); + CDEBUG(D_CONFIG, "notify %s ev=%d\n", watched->obd_name, ev); if (strcmp(watched->obd_type->typ_name, LUSTRE_OSC_NAME) != 0) { CERROR("unexpected notification of %s %s!\n", @@ -778,18 +780,20 @@ int mds_notify(struct obd_device *obd, struct obd_device *watched, } if (obd->obd_recovering) { - /* if MDT is in recovery we do not reinit desc and - easize, as that will be done in mds_postrecov() after - recovery is finished. */ CWARN("MDS %s: in recovery, not resetting orphans on %s\n", obd->obd_name, watched->u.cli.cl_import->imp_target_uuid.uuid); + /* We still have to fix the lov descriptor for ost's added + after the mdt in the config log. They didn't make it into + mds_lov_connect. */ + rc = mds_lov_update_desc(obd, obd->u.mds.mds_osc_exp); RETURN(rc); - } + } LASSERT(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT) != NULL); rc = mds_lov_start_synchronize(obd, watched, data, !(ev == OBD_NOTIFY_SYNC)); + lquota_recovery(quota_interface, obd); RETURN(rc); diff --git a/lustre/mds/mds_open.c b/lustre/mds/mds_open.c index 4bf67bc..f78d348 100644 --- a/lustre/mds/mds_open.c +++ b/lustre/mds/mds_open.c @@ -314,7 +314,7 @@ static int mds_create_objects(struct ptlrpc_request *req, int offset, struct obd_trans_info oti = { 0 }; struct lov_stripe_md *lsm = NULL; struct lov_mds_md *lmm = NULL; - int rc, lmm_bufsize, lmm_size; + int rc, lmm_size; struct mds_body *body; struct obdo *oa; void *lmm_buf; @@ -359,15 +359,16 @@ static int mds_create_objects(struct ptlrpc_request *req, int offset, mds_objids_from_lmm(*ids, lmm, &mds->mds_lov_desc); - lmm_buf = lustre_msg_buf(req->rq_repmsg, offset, 0); - lmm_bufsize = req->rq_repmsg->buflens[offset]; - LASSERT(lmm_buf); - LASSERT(lmm_bufsize >= lmm_size); - memcpy(lmm_buf, lmm, lmm_size); rc = fsfilt_set_md(obd, inode, *handle, lmm, lmm_size, "lov"); + lmm_buf = lustre_msg_buf(req->rq_repmsg, offset, lmm_size); + if (!lmm_buf) { + if (!rc) rc = -ENOMEM; + } else { + memcpy(lmm_buf, lmm, lmm_size); + } if (rc) CERROR("open replay failed to set md:%d\n", rc); - RETURN(0); + RETURN(rc); } if (OBD_FAIL_CHECK_ONCE(OBD_FAIL_MDS_ALLOC_OBDO)) @@ -479,10 +480,11 @@ static int mds_create_objects(struct ptlrpc_request *req, int offset, rc = fsfilt_set_md(obd, inode, *handle, lmm, lmm_size, "lov"); lmm_buf = lustre_msg_buf(req->rq_repmsg, offset, lmm_size); - if (!lmm_buf) - rc = -ENOMEM; - else + if (!lmm_buf) { + if (!rc) rc = -ENOMEM; + } else { memcpy(lmm_buf, lmm, lmm_size); + } obd_free_diskmd(mds->mds_osc_exp, &lmm); out_oa: oti_free_cookies(&oti); diff --git a/lustre/mgc/mgc_request.c b/lustre/mgc/mgc_request.c index 4ff5e2c..0e5e2fc 100644 --- a/lustre/mgc/mgc_request.c +++ b/lustre/mgc/mgc_request.c @@ -28,7 +28,7 @@ # define EXPORT_SYMTAB #endif #define DEBUG_SUBSYSTEM S_MGC -#define D_MGC D_CONFIG|D_WARNING +#define D_MGC D_CONFIG/*|D_WARNING*/ #ifdef __KERNEL__ # include @@ -207,7 +207,8 @@ static int config_log_end(char *logname, struct config_llog_instance *cfg) spin_unlock(&config_list_lock); /* drop the start ref */ config_log_put(cld); - CDEBUG(D_MGC, "end config log %s (%d)\n", logname, rc); + CDEBUG(D_MGC, "end config log %s (%d)\n", logname ? logname : "client", + rc); RETURN(rc); } diff --git a/lustre/mgs/mgs_handler.c b/lustre/mgs/mgs_handler.c index 7570553..b25b42d 100644 --- a/lustre/mgs/mgs_handler.c +++ b/lustre/mgs/mgs_handler.c @@ -27,7 +27,7 @@ # define EXPORT_SYMTAB #endif #define DEBUG_SUBSYSTEM S_MGS -#define D_MGS D_CONFIG|D_WARNING +#define D_MGS D_CONFIG/*|D_WARNING*/ #ifdef __KERNEL__ # include diff --git a/lustre/mgs/mgs_llog.c b/lustre/mgs/mgs_llog.c index 14cc41f..703c949 100644 --- a/lustre/mgs/mgs_llog.c +++ b/lustre/mgs/mgs_llog.c @@ -27,7 +27,7 @@ #define EXPORT_SYMTAB #endif #define DEBUG_SUBSYSTEM S_MGS -#define D_MGS D_CONFIG|D_WARNING +#define D_MGS D_CONFIG/*|D_WARNING*/ #ifdef __KERNEL__ #include diff --git a/lustre/obdclass/llog_lvfs.c b/lustre/obdclass/llog_lvfs.c index 76e9ae2..0f04901 100644 --- a/lustre/obdclass/llog_lvfs.c +++ b/lustre/obdclass/llog_lvfs.c @@ -722,17 +722,19 @@ int llog_get_cat_list(struct obd_device *obd, struct obd_device *disk_obd, name, rc); GOTO(out, rc); } - + if (!S_ISREG(file->f_dentry->d_inode->i_mode)) { CERROR("%s is not a regular file!: mode = %o\n", name, file->f_dentry->d_inode->i_mode); GOTO(out, rc = -ENOENT); } + CERROR("cat list: disk size=%d, read=%d\n", + (int)file->f_dentry->d_inode->i_size, size); + rc = fsfilt_read_record(disk_obd, file, idarray, size, &off); if (rc) { - CDEBUG(D_INODE,"OBD filter: error reading %s: rc %d\n", - name, rc); + CERROR("OBD filter: error reading %s: rc %d\n", name, rc); GOTO(out, rc); } diff --git a/lustre/obdclass/obd_mount.c b/lustre/obdclass/obd_mount.c index 7e66ed7..e02673cd 100644 --- a/lustre/obdclass/obd_mount.c +++ b/lustre/obdclass/obd_mount.c @@ -25,7 +25,7 @@ #define DEBUG_SUBSYSTEM S_MGMT -#define D_MOUNT D_SUPER|D_CONFIG|D_WARNING +#define D_MOUNT D_SUPER|D_CONFIG/*|D_WARNING*/ #define PRINT_CMD LCONSOLE #define PRINT_MASK D_WARNING @@ -108,7 +108,7 @@ int class_parse_nid(char *buf, lnet_nid_t *nid, char **endh) if (endh) *endh = endp; - CDEBUG(D_WARNING, "Nid %s\n", libcfs_nid2str(*nid)); + CDEBUG(D_MOUNT, "Nid %s\n", libcfs_nid2str(*nid)); return 0; } @@ -498,10 +498,8 @@ static int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd, struct lustre_cfg_bufs bufs; struct lustre_cfg * lcfg = NULL; int rc; - - CDEBUG((cmd==LCFG_ADD_UUID)?D_WARNING:D_TRACE, - "lcfg %s %#x %s %s %s %s\n", cfgname, + CDEBUG(D_TRACE, "lcfg %s %#x %s %s %s %s\n", cfgname, cmd, s1, s2, s3, s4); lustre_cfg_bufs_reset(&bufs, cfgname); -- 1.8.3.1