Whamcloud - gitweb
remove last scability issue from lov objid. set next id only for OST which
authorshadow <shadow>
Thu, 6 Dec 2007 07:35:37 +0000 (07:35 +0000)
committershadow <shadow>
Thu, 6 Dec 2007 07:35:37 +0000 (07:35 +0000)
actualy finished recovery (mds_postrecov->mds_notify code path).
Always set next id if we ask ost about last id, this fixes problems from
bug 14222.

b=12702
i=tappro
i=yury.umanets

lustre/include/obd.h
lustre/lov/lov_obd.c
lustre/mds/handler.c
lustre/mds/mds_lov.c
lustre/tests/conf-sanity.sh

index ef4c7ff..64844a2 100644 (file)
@@ -989,6 +989,8 @@ enum obd_cleanup_stage {
 #define KEY_CONN_DATA           "conn_data"
 #define KEY_MAX_EASIZE          "max_easize"
 #define KEY_REVIMP_UPD          "revimp_update"
+#define KEY_LOV_IDX             "lov_idx"
+#define KEY_LAST_ID             "last_id"
 
 struct lu_context;
 
index 96d6e88..4edb6be 100644 (file)
@@ -2367,7 +2367,7 @@ static int lov_get_info(struct obd_export *exp, __u32 keylen,
                 LDLM_ERROR(data->lock, "lock on inode without such object");
                 dump_lsm(D_ERROR, data->lsm);
                 GOTO(out, rc = -ENXIO);
-        } else if (KEY_IS("last_id")) {
+        } else if (KEY_IS(KEY_LAST_ID)) {
                 struct obd_id_info *info = val;
                 int size = sizeof(obd_id);
                 struct lov_tgt_desc *tgt;
@@ -2385,6 +2385,14 @@ static int lov_get_info(struct obd_export *exp, __u32 keylen,
                 *desc_ret = lov->desc;
 
                 GOTO(out, rc = 0);
+        } else if (KEY_IS(KEY_LOV_IDX)) {
+                struct lov_tgt_desc *tgt;
+
+                for(i = 0; i < lov->desc.ld_tgt_count; i++) {
+                        tgt = lov->lov_tgts[i];
+                        if (obd_uuid_equals(val, &tgt->ltd_uuid))
+                                GOTO(out, rc = i);
+                }
         }
 
         rc = -EINVAL;
@@ -2420,7 +2428,7 @@ static int lov_set_info_async(struct obd_export *exp, obd_count keylen,
 
         if (KEY_IS(KEY_NEXT_ID)) {
                 count = vallen / sizeof(struct obd_id_info);
-                vallen = sizeof(struct obd_id_info);
+                vallen = sizeof(obd_id);
                 incr = sizeof(struct obd_id_info);
                 do_inactive = 1;
                 next_id = 1;
index e6d2a96..bf008b0 100644 (file)
@@ -2137,7 +2137,7 @@ err_cleanup:
 
 int mds_postrecov(struct obd_device *obd)
 {
-        int rc;
+        int rc = 0;
         ENTRY;
 
         if (obd->obd_fail)
@@ -2145,17 +2145,6 @@ int mds_postrecov(struct obd_device *obd)
 
         LASSERT(!obd->obd_recovering);
         LASSERT(llog_get_context(obd, LLOG_MDS_OST_ORIG_CTXT) != NULL);
-
-        /* set nextid first, so we are sure it happens */
-        mutex_down(&obd->obd_dev_sem);
-        rc = mds_lov_set_nextid(obd);
-        mutex_up(&obd->obd_dev_sem);
-        if (rc) {
-                CERROR("%s: mds_lov_set_nextid failed %d\n",
-                       obd->obd_name, rc);
-                GOTO(out, rc);
-        }
-
         /* clean PENDING dir */
         if (strncmp(obd->obd_name, MDD_OBD_NAME, strlen(MDD_OBD_NAME)))
                 rc = mds_cleanup_pending(obd);
index db3f531..ee98565 100644 (file)
@@ -69,7 +69,7 @@ skip_bitmap:
                 for(j=0; j < OBJID_PER_PAGE(); j++) {
                         if (data[j] == 0)
                                 continue;
-                        CDEBUG(D_INFO,"objid page %u idx %u - %llu ",i,j,data[j]);
+                        CDEBUG(D_INFO,"objid page %u idx %u - %llu \n", i,j,data[j]);
                 }
         }
 skip_array:
@@ -194,7 +194,7 @@ static int mds_lov_read_objids(struct obd_device *obd)
                 RETURN(0);
 
         page = (size/(OBJID_PER_PAGE()*sizeof(obd_id)))+1;
-        CDEBUG(D_INFO, "file size %d pages %d\n", size, page); 
+        CDEBUG(D_INFO, "file size %d pages %d\n", size, page);
         for(i=0; i < page; i++) {
                 obd_id *data =  mds->mds_lov_page_array[i];
                 loff_t off_old = off;
@@ -215,12 +215,16 @@ static int mds_lov_read_objids(struct obd_device *obd)
                 if (off == off_old)
                         break; // eof
 
-                count += (off-off_old+sizeof(obd_id)-1)/sizeof(obd_id);
+                count += (off-off_old)/sizeof(obd_id);
         }
         mds->mds_lov_objid_count = count;
-        mds->mds_lov_objid_lastpage = count / OBJID_PER_PAGE();
-        mds->mds_lov_objid_lastidx = count % OBJID_PER_PAGE();
-        CDEBUG(D_INFO, "Read %u objid\n", count);
+       if (count) {
+                count --;
+                mds->mds_lov_objid_lastpage = count / OBJID_PER_PAGE();
+                mds->mds_lov_objid_lastidx = count % OBJID_PER_PAGE();
+       }
+        CDEBUG(D_INFO, "Read %u - %u %u objid\n", count,
+               mds->mds_lov_objid_lastpage, mds->mds_lov_objid_lastidx);
 out:
         mds_lov_dump_objids("read",obd);
 
@@ -246,9 +250,8 @@ int mds_lov_write_objids(struct obd_device *obd)
                 LASSERT(data != NULL);
 
                 /* check for particaly filled last page */
-                if (i == mds->mds_lov_objid_lastpage) {
-                        size = mds->mds_lov_objid_lastidx * sizeof(obd_id);
-                }
+                if (i == mds->mds_lov_objid_lastpage)
+                        size = (mds->mds_lov_objid_lastidx+1) * sizeof(obd_id);
 
                 rc = fsfilt_write_record(obd, mds->mds_lov_objid_filp, data,
                                          size, &off, 0);
@@ -291,8 +294,8 @@ static int mds_lov_get_objid(struct obd_device * obd, struct obd_export *export,
 
                 lastid.idx = idx;
                 lastid.data = &data[off];
-                rc = obd_get_info(export, sizeof("last_id"),
-                                  "last_id", &size, &lastid);
+                rc = obd_get_info(export, sizeof(KEY_LAST_ID),
+                                  KEY_LAST_ID, &size, &lastid);
                 if (rc)
                         GOTO(out, rc);
 
@@ -332,54 +335,6 @@ int mds_lov_clear_orphans(struct mds_obd *mds, struct obd_uuid *ost_uuid)
 
         RETURN(rc);
 }
-/* update the LOV-OSC knowledge of the last used object id's */
-/* for all targets */
-/* is we realy need this ? all osc's should be pass via __mds_lov_synchronize
- * and call */
-#define MDS_LOV_SETID_COUNT (CFS_PAGE_SIZE / sizeof(struct obd_id_info))
-
-int mds_lov_set_nextid(struct obd_device *obd)
-{
-        struct mds_obd *mds = &obd->u.mds;
-        int i = 0, j, rc = 0;
-        struct obd_id_info *info;
-        ENTRY;
-
-        LASSERT(!obd->obd_recovering);
-
-        /* obd->obd_dev_sem must be held so mds_lov_objids doesn't change */
-        LASSERT_SEM_LOCKED(&obd->obd_dev_sem);
-
-        OBD_ALLOC(info, CFS_PAGE_SIZE);
-        if (info == NULL)
-                RETURN(-ENOMEM);
-
-        while(i < mds->mds_lov_desc.ld_tgt_count) {
-                for(j=0; j < MDS_LOV_SETID_COUNT; i++, j++) {
-                        int page = i / OBJID_PER_PAGE();
-                        int idx = i % OBJID_PER_PAGE();
-                        obd_id *data = mds->mds_lov_page_array[page];
-
-                        if (i == mds->mds_lov_desc.ld_tgt_count)
-                                break;
-
-                        info[j].idx = i;
-                        info[j].data = &data[idx];
-                }
-
-                rc = obd_set_info_async(mds->mds_osc_exp, sizeof(KEY_NEXT_ID),
-                                KEY_NEXT_ID, sizeof(info), &info, NULL);
-                if (rc) {
-                        CERROR ("%s: mds_lov_set_nextid failed (%d)\n",
-                                 obd->obd_name, rc);
-                        break;
-                }
-        }
-        OBD_FREE(info, CFS_PAGE_SIZE);
-
-        RETURN(rc);
-
-}
 
 /* for one target */
 static int mds_lov_set_one_nextid(struct obd_device *obd, __u32 idx, obd_id *id)
@@ -406,6 +361,19 @@ static int mds_lov_set_one_nextid(struct obd_device *obd, __u32 idx, obd_id *id)
         RETURN(rc);
 }
 
+static __u32 mds_lov_get_idx(struct obd_export *lov,
+                             struct obd_uuid *ost_uuid)
+{
+        int rc;
+        int valsize = sizeof(ost_uuid);
+
+        rc = obd_get_info(lov, sizeof(KEY_LOV_IDX), KEY_LOV_IDX,
+                          &valsize, ost_uuid);
+        LASSERT(rc >= 0);
+
+        RETURN(rc);
+}
+
 /* Update the lov desc for a new size lov. */
 static int mds_lov_update_desc(struct obd_device *obd, struct obd_export *lov)
 {
@@ -497,9 +465,13 @@ static int mds_lov_update_mds(struct obd_device *obd,
                mds->mds_lov_desc.ld_tgt_count);
 
         /* idx is set as data from lov_notify. */
-        if (idx == MDSLOV_NO_INDEX || obd->obd_recovering)
+        if (obd->obd_recovering)
                 GOTO(out, rc);
 
+        /* mds post recov not know about ost index - ask lov for it */
+        if (idx == MDSLOV_NO_INDEX)
+                idx = mds_lov_get_idx(mds->mds_osc_exp, uuid);
+
         if (idx >= mds->mds_lov_desc.ld_tgt_count) {
                 CERROR("index %d > count %d!\n", idx,
                        mds->mds_lov_desc.ld_tgt_count);
@@ -510,17 +482,17 @@ static int mds_lov_update_mds(struct obd_device *obd,
         off = idx % OBJID_PER_PAGE();
         data = mds->mds_lov_page_array[page];
         CDEBUG(D_CONFIG, "idx %d - %p - %d/%d\n", idx, data, page, off);
+
         if (data[off] == 0) {
                 rc = mds_lov_get_objid(obd, watched->obd_self_export, idx);
-        } else {
-                /* We have read this lastid from disk; tell the osc.
-                   Don't call this during recovery. */
-                rc = mds_lov_set_one_nextid(obd, idx, &data[off]);
-                if (rc) {
-                        CERROR("Failed to set next id, idx=%d rc=%d\n", idx,rc);
-                        /* Don't abort the rest of the sync */
-                        rc = 0;
-                }
+        }
+        /* We have read this lastid from disk; tell the osc.
+           Don't call this during recovery. */
+        rc = mds_lov_set_one_nextid(obd, idx, &data[off]);
+        if (rc) {
+                CERROR("Failed to set next id, idx=%d rc=%d\n", idx,rc);
+                /* Don't abort the rest of the sync */
+                rc = 0;
         }
 
         CDEBUG(D_CONFIG, "last object "LPU64" from OST %d rc=%d\n",
@@ -981,7 +953,7 @@ out:
                 CERROR("%s sync failed %d, deactivating\n", obd_uuid2str(uuid),
                        rc);
                 if (!obd->obd_stopping && mds->mds_osc_obd &&
-                    !mds->mds_osc_obd->obd_stopping && !watched->obd_stopping) 
+                    !mds->mds_osc_obd->obd_stopping && !watched->obd_stopping)
                         obd_notify(mds->mds_osc_obd, watched,
                                    OBD_NOTIFY_INACTIVE, NULL);
         }
index 75b3644..c601994 100644 (file)
@@ -1424,6 +1424,66 @@ test_37() {
 }
 run_test 37 "verify set tunables works for symlink device"
 
+test_38() { # bug 14222
+       setup
+       # like runtests
+       COUNT=10
+       SRC="/etc /bin"
+       FILES=`find $SRC -type f -mtime +1 | head -n $COUNT`
+       log "copying $(echo $FILES | wc -w) files to $DIR/$tdir"
+       mkdir -p $DIR/$tdir
+       tar cf - $FILES | tar xf - -C $DIR/$tdir || \
+               error "copying $SRC to $DIR/$tdir"
+       sync
+       umount_client $MOUNT
+       stop_mds
+       log "rename lov_objid file on MDS"
+       rm -f $TMP/lov_objid.orig
+       do_facet mds "debugfs -w $MDSDEV" <<-EOF
+               dump lov_objid $TMP/lov_objid.orig
+               rm lov_objid
+       EOF
+       do_facet mds "od -Ax -td8 $TMP/lov_objid.orig"
+       # check create in mds_lov_connect
+       start_mds
+       mount_client $MOUNT
+       for f in $FILES; do
+               [ $V ] && log "verifying $DIR/$tdir/$f"
+               diff $f $DIR/$tdir/$f || ERROR=y
+       done
+       do_facet mds "debugfs -c $MDSDEV" <<-EOF
+               dump lov_objid $TMP/lov_objid.new
+       EOF
+       do_facet mds "od -Ax -td8 $TMP/lov_objid.new"
+       [ "$ERROR" = "y" ] && error "old and new files are different after connect" || true
+       
+       
+       # check it's updates in sync
+       umount_client $MOUNT
+       stop_mds        
+       multiop $TMP/lov_objid.clear Ow4096c
+       do_facet mds "debugfs -w $MDSDEV" <<-EOF
+               write $TMP/lov_objid.clear lov_objid
+       EOF
+       start_mds
+       mount_client $MOUNT
+       for f in $FILES; do
+               [ $V ] && log "verifying $DIR/$tdir/$f"
+               diff $f $DIR/$tdir/$f || ERROR=y
+       done
+       do_facet mds "debugfs -c $MDSDEV" <<-EOF
+               dump lov_objid $TMP/lov_objid.new1
+       EOF
+       do_facet mds "od -Ax -td8 $TMP/lov_objid.new1"
+       umount_client $MOUNT
+       stop_mds
+       [ "$ERROR" = "y" ] && error "old and new files are different after sync" || true
+       
+       log "files compared the same"
+       #cleanup
+}
+run_test 38 "MDS recreates missing lov_objid file from OST data"
+
 umount_client $MOUNT
 cleanup_nocli
 cleanup_krb5_env