From 502622a15525d2d6c37f36aad549d8e3a885580b Mon Sep 17 00:00:00 2001 From: shadow Date: Thu, 6 Dec 2007 07:35:37 +0000 Subject: [PATCH] remove last scability issue from lov objid. set next id only for OST which actualy finished recovery (mds_postrecov->mds_notify code path). Always set next id if we ask ost about last id, this fixes problems from bug 14222. b=12702 i=tappro i=yury.umanets --- lustre/include/obd.h | 2 + lustre/lov/lov_obd.c | 12 ++++- lustre/mds/handler.c | 13 +---- lustre/mds/mds_lov.c | 112 +++++++++++++++++--------------------------- lustre/tests/conf-sanity.sh | 60 ++++++++++++++++++++++++ 5 files changed, 115 insertions(+), 84 deletions(-) diff --git a/lustre/include/obd.h b/lustre/include/obd.h index ef4c7ff..64844a2 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -989,6 +989,8 @@ enum obd_cleanup_stage { #define KEY_CONN_DATA "conn_data" #define KEY_MAX_EASIZE "max_easize" #define KEY_REVIMP_UPD "revimp_update" +#define KEY_LOV_IDX "lov_idx" +#define KEY_LAST_ID "last_id" struct lu_context; diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index 96d6e88..4edb6be 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -2367,7 +2367,7 @@ static int lov_get_info(struct obd_export *exp, __u32 keylen, LDLM_ERROR(data->lock, "lock on inode without such object"); dump_lsm(D_ERROR, data->lsm); GOTO(out, rc = -ENXIO); - } else if (KEY_IS("last_id")) { + } else if (KEY_IS(KEY_LAST_ID)) { struct obd_id_info *info = val; int size = sizeof(obd_id); struct lov_tgt_desc *tgt; @@ -2385,6 +2385,14 @@ static int lov_get_info(struct obd_export *exp, __u32 keylen, *desc_ret = lov->desc; GOTO(out, rc = 0); + } else if (KEY_IS(KEY_LOV_IDX)) { + struct lov_tgt_desc *tgt; + + for(i = 0; i < lov->desc.ld_tgt_count; i++) { + tgt = lov->lov_tgts[i]; + if (obd_uuid_equals(val, &tgt->ltd_uuid)) + GOTO(out, rc = i); + } } rc = -EINVAL; @@ -2420,7 +2428,7 @@ static int lov_set_info_async(struct obd_export *exp, obd_count keylen, if (KEY_IS(KEY_NEXT_ID)) { count = vallen / sizeof(struct obd_id_info); - vallen = sizeof(struct obd_id_info); + vallen = sizeof(obd_id); incr = sizeof(struct obd_id_info); do_inactive = 1; next_id = 1; diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index e6d2a96..bf008b0 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -2137,7 +2137,7 @@ err_cleanup: int mds_postrecov(struct obd_device *obd) { - int rc; + int rc = 0; ENTRY; if (obd->obd_fail) @@ -2145,17 +2145,6 @@ int mds_postrecov(struct obd_device *obd) LASSERT(!obd->obd_recovering); LASSERT(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT) != NULL); - - /* set nextid first, so we are sure it happens */ - mutex_down(&obd->obd_dev_sem); - rc = mds_lov_set_nextid(obd); - mutex_up(&obd->obd_dev_sem); - if (rc) { - CERROR("%s: mds_lov_set_nextid failed %d\n", - obd->obd_name, rc); - GOTO(out, rc); - } - /* clean PENDING dir */ if (strncmp(obd->obd_name, MDD_OBD_NAME, strlen(MDD_OBD_NAME))) rc = mds_cleanup_pending(obd); diff --git a/lustre/mds/mds_lov.c b/lustre/mds/mds_lov.c index db3f531..ee98565 100644 --- a/lustre/mds/mds_lov.c +++ b/lustre/mds/mds_lov.c @@ -69,7 +69,7 @@ skip_bitmap: for(j=0; j < OBJID_PER_PAGE(); j++) { if (data[j] == 0) continue; - CDEBUG(D_INFO,"objid page %u idx %u - %llu ",i,j,data[j]); + CDEBUG(D_INFO,"objid page %u idx %u - %llu \n", i,j,data[j]); } } skip_array: @@ -194,7 +194,7 @@ static int mds_lov_read_objids(struct obd_device *obd) RETURN(0); page = (size/(OBJID_PER_PAGE()*sizeof(obd_id)))+1; - CDEBUG(D_INFO, "file size %d pages %d\n", size, page); + CDEBUG(D_INFO, "file size %d pages %d\n", size, page); for(i=0; i < page; i++) { obd_id *data = mds->mds_lov_page_array[i]; loff_t off_old = off; @@ -215,12 +215,16 @@ static int mds_lov_read_objids(struct obd_device *obd) if (off == off_old) break; // eof - count += (off-off_old+sizeof(obd_id)-1)/sizeof(obd_id); + count += (off-off_old)/sizeof(obd_id); } mds->mds_lov_objid_count = count; - mds->mds_lov_objid_lastpage = count / OBJID_PER_PAGE(); - mds->mds_lov_objid_lastidx = count % OBJID_PER_PAGE(); - CDEBUG(D_INFO, "Read %u objid\n", count); + if (count) { + count --; + mds->mds_lov_objid_lastpage = count / OBJID_PER_PAGE(); + mds->mds_lov_objid_lastidx = count % OBJID_PER_PAGE(); + } + CDEBUG(D_INFO, "Read %u - %u %u objid\n", count, + mds->mds_lov_objid_lastpage, mds->mds_lov_objid_lastidx); out: mds_lov_dump_objids("read",obd); @@ -246,9 +250,8 @@ int mds_lov_write_objids(struct obd_device *obd) LASSERT(data != NULL); /* check for particaly filled last page */ - if (i == mds->mds_lov_objid_lastpage) { - size = mds->mds_lov_objid_lastidx * sizeof(obd_id); - } + if (i == mds->mds_lov_objid_lastpage) + size = (mds->mds_lov_objid_lastidx+1) * sizeof(obd_id); rc = fsfilt_write_record(obd, mds->mds_lov_objid_filp, data, size, &off, 0); @@ -291,8 +294,8 @@ static int mds_lov_get_objid(struct obd_device * obd, struct obd_export *export, lastid.idx = idx; lastid.data = &data[off]; - rc = obd_get_info(export, sizeof("last_id"), - "last_id", &size, &lastid); + rc = obd_get_info(export, sizeof(KEY_LAST_ID), + KEY_LAST_ID, &size, &lastid); if (rc) GOTO(out, rc); @@ -332,54 +335,6 @@ int mds_lov_clear_orphans(struct mds_obd *mds, struct obd_uuid *ost_uuid) RETURN(rc); } -/* update the LOV-OSC knowledge of the last used object id's */ -/* for all targets */ -/* is we realy need this ? all osc's should be pass via __mds_lov_synchronize - * and call */ -#define MDS_LOV_SETID_COUNT (CFS_PAGE_SIZE / sizeof(struct obd_id_info)) - -int mds_lov_set_nextid(struct obd_device *obd) -{ - struct mds_obd *mds = &obd->u.mds; - int i = 0, j, rc = 0; - struct obd_id_info *info; - ENTRY; - - LASSERT(!obd->obd_recovering); - - /* obd->obd_dev_sem must be held so mds_lov_objids doesn't change */ - LASSERT_SEM_LOCKED(&obd->obd_dev_sem); - - OBD_ALLOC(info, CFS_PAGE_SIZE); - if (info == NULL) - RETURN(-ENOMEM); - - while(i < mds->mds_lov_desc.ld_tgt_count) { - for(j=0; j < MDS_LOV_SETID_COUNT; i++, j++) { - int page = i / OBJID_PER_PAGE(); - int idx = i % OBJID_PER_PAGE(); - obd_id *data = mds->mds_lov_page_array[page]; - - if (i == mds->mds_lov_desc.ld_tgt_count) - break; - - info[j].idx = i; - info[j].data = &data[idx]; - } - - rc = obd_set_info_async(mds->mds_osc_exp, sizeof(KEY_NEXT_ID), - KEY_NEXT_ID, sizeof(info), &info, NULL); - if (rc) { - CERROR ("%s: mds_lov_set_nextid failed (%d)\n", - obd->obd_name, rc); - break; - } - } - OBD_FREE(info, CFS_PAGE_SIZE); - - RETURN(rc); - -} /* for one target */ static int mds_lov_set_one_nextid(struct obd_device *obd, __u32 idx, obd_id *id) @@ -406,6 +361,19 @@ static int mds_lov_set_one_nextid(struct obd_device *obd, __u32 idx, obd_id *id) RETURN(rc); } +static __u32 mds_lov_get_idx(struct obd_export *lov, + struct obd_uuid *ost_uuid) +{ + int rc; + int valsize = sizeof(ost_uuid); + + rc = obd_get_info(lov, sizeof(KEY_LOV_IDX), KEY_LOV_IDX, + &valsize, ost_uuid); + LASSERT(rc >= 0); + + RETURN(rc); +} + /* Update the lov desc for a new size lov. */ static int mds_lov_update_desc(struct obd_device *obd, struct obd_export *lov) { @@ -497,9 +465,13 @@ static int mds_lov_update_mds(struct obd_device *obd, mds->mds_lov_desc.ld_tgt_count); /* idx is set as data from lov_notify. */ - if (idx == MDSLOV_NO_INDEX || obd->obd_recovering) + if (obd->obd_recovering) GOTO(out, rc); + /* mds post recov not know about ost index - ask lov for it */ + if (idx == MDSLOV_NO_INDEX) + idx = mds_lov_get_idx(mds->mds_osc_exp, uuid); + if (idx >= mds->mds_lov_desc.ld_tgt_count) { CERROR("index %d > count %d!\n", idx, mds->mds_lov_desc.ld_tgt_count); @@ -510,17 +482,17 @@ static int mds_lov_update_mds(struct obd_device *obd, off = idx % OBJID_PER_PAGE(); data = mds->mds_lov_page_array[page]; CDEBUG(D_CONFIG, "idx %d - %p - %d/%d\n", idx, data, page, off); + if (data[off] == 0) { rc = mds_lov_get_objid(obd, watched->obd_self_export, idx); - } else { - /* We have read this lastid from disk; tell the osc. - Don't call this during recovery. */ - rc = mds_lov_set_one_nextid(obd, idx, &data[off]); - if (rc) { - CERROR("Failed to set next id, idx=%d rc=%d\n", idx,rc); - /* Don't abort the rest of the sync */ - rc = 0; - } + } + /* We have read this lastid from disk; tell the osc. + Don't call this during recovery. */ + rc = mds_lov_set_one_nextid(obd, idx, &data[off]); + if (rc) { + CERROR("Failed to set next id, idx=%d rc=%d\n", idx,rc); + /* Don't abort the rest of the sync */ + rc = 0; } CDEBUG(D_CONFIG, "last object "LPU64" from OST %d rc=%d\n", @@ -981,7 +953,7 @@ out: CERROR("%s sync failed %d, deactivating\n", obd_uuid2str(uuid), rc); if (!obd->obd_stopping && mds->mds_osc_obd && - !mds->mds_osc_obd->obd_stopping && !watched->obd_stopping) + !mds->mds_osc_obd->obd_stopping && !watched->obd_stopping) obd_notify(mds->mds_osc_obd, watched, OBD_NOTIFY_INACTIVE, NULL); } diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 75b3644..c601994 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -1424,6 +1424,66 @@ test_37() { } run_test 37 "verify set tunables works for symlink device" +test_38() { # bug 14222 + setup + # like runtests + COUNT=10 + SRC="/etc /bin" + FILES=`find $SRC -type f -mtime +1 | head -n $COUNT` + log "copying $(echo $FILES | wc -w) files to $DIR/$tdir" + mkdir -p $DIR/$tdir + tar cf - $FILES | tar xf - -C $DIR/$tdir || \ + error "copying $SRC to $DIR/$tdir" + sync + umount_client $MOUNT + stop_mds + log "rename lov_objid file on MDS" + rm -f $TMP/lov_objid.orig + do_facet mds "debugfs -w $MDSDEV" <<-EOF + dump lov_objid $TMP/lov_objid.orig + rm lov_objid + EOF + do_facet mds "od -Ax -td8 $TMP/lov_objid.orig" + # check create in mds_lov_connect + start_mds + mount_client $MOUNT + for f in $FILES; do + [ $V ] && log "verifying $DIR/$tdir/$f" + diff $f $DIR/$tdir/$f || ERROR=y + done + do_facet mds "debugfs -c $MDSDEV" <<-EOF + dump lov_objid $TMP/lov_objid.new + EOF + do_facet mds "od -Ax -td8 $TMP/lov_objid.new" + [ "$ERROR" = "y" ] && error "old and new files are different after connect" || true + + + # check it's updates in sync + umount_client $MOUNT + stop_mds + multiop $TMP/lov_objid.clear Ow4096c + do_facet mds "debugfs -w $MDSDEV" <<-EOF + write $TMP/lov_objid.clear lov_objid + EOF + start_mds + mount_client $MOUNT + for f in $FILES; do + [ $V ] && log "verifying $DIR/$tdir/$f" + diff $f $DIR/$tdir/$f || ERROR=y + done + do_facet mds "debugfs -c $MDSDEV" <<-EOF + dump lov_objid $TMP/lov_objid.new1 + EOF + do_facet mds "od -Ax -td8 $TMP/lov_objid.new1" + umount_client $MOUNT + stop_mds + [ "$ERROR" = "y" ] && error "old and new files are different after sync" || true + + log "files compared the same" + #cleanup +} +run_test 38 "MDS recreates missing lov_objid file from OST data" + umount_client $MOUNT cleanup_nocli cleanup_krb5_env -- 1.8.3.1