Whamcloud - gitweb
Don't LBUG the MDS if orphan cleanup requests times out; instead allow
authorrread <rread>
Tue, 9 Mar 2004 22:47:17 +0000 (22:47 +0000)
committerrread <rread>
Tue, 9 Mar 2004 22:47:17 +0000 (22:47 +0000)
the individual OSC to recover on it's own.

b=1530

lustre/ChangeLog
lustre/lov/lov_obd.c
lustre/mds/mds_lov.c
lustre/tests/replay-single.sh

index 23f9ea5..3420d0d 100644 (file)
@@ -5,6 +5,7 @@ tbd  Cluster File Systems, Inc. <info@clusterfs.com>
        - don't overwrite extent policy data in reply if lock was blocked (2901)
        - drop filter export grants atomically with removal from device (2663)
        - del obd_self_export from work_list in class_disconnect_exports (2908)
+       - don't LBUG if MDS recovery times out during orphan cleanup (2530)
 
 2004-03-04  Cluster File Systems, Inc. <info@clusterfs.com>
        * version 1.2.0
index 9c9fbb1..b0be68f 100644 (file)
@@ -568,15 +568,14 @@ static int lov_clear_orphans(struct obd_export *export, struct obdo *src_oa,
                         continue;
 
                 memcpy(tmp_oa, src_oa, sizeof(*tmp_oa));
-
+                
                 /* XXX: LOV STACKING: use real "obj_mdp" sub-data */
                 err = obd_create(lov->tgts[i].ltd_exp, tmp_oa, &obj_mdp, oti);
-                if (err) {
+                if (err)
+                        /* This export will be disabled until it is recovered,
+                           and then orphan recovery will be completed. */
                         CERROR("error in orphan recovery on OST idx %d/%d: "
                                "rc = %d\n", i, lov->desc.ld_tgt_count, err);
-                        if (!rc)
-                                rc = err;
-                }
 
                 if (ost_uuid)
                         break;
@@ -2624,8 +2623,7 @@ static int lov_set_info(struct obd_export *exp, obd_count keylen,
                 for (i = 0; i < lov->desc.ld_tgt_count; i++) {
                         int er;
 
-                        if (!lov->tgts[i].active)
-                                continue;
+                        /* initialize all OSCs, even inactive ones */
 
                         er = obd_set_info(lov->tgts[i].ltd_exp, keylen, key,
                                           sizeof(obd_id), ((obd_id*)val) + i);
index 0e9d2f0..3520849 100644 (file)
@@ -159,20 +159,8 @@ int mds_lov_set_nextid(struct obd_device *obd)
                 GOTO(out, rc);
 
         rc = mds_lov_clearorphans(mds, NULL /* all OSTs */);
-        if (rc < 0)
-                GOTO(out, rc);
 
 out:
-        if (rc && mds->mds_lov_objids) {
-                /* Might as well crash here, until we figure out what to do.
-                 * If we OBD_FREE, we'll just LASSERT the next time through this
-                 * function. */
-                LBUG();
-                OBD_FREE(mds->mds_lov_objids,
-                         mds->mds_lov_desc.ld_tgt_count * sizeof(obd_id));
-                mds->mds_lov_objids = NULL;
-        }
-
         RETURN(rc);
 }
 
index 67595fc..ef241b2 100755 (executable)
@@ -838,6 +838,22 @@ test_42() {
 }
 run_test 42 "recoery after ost failure"
 
+# b=2530
+# directory orphans can't be unlinked from PENDING directory
+test_43() {
+    replay_barrier mds
+
+    # OBD_FAIL_OST_CREATE_NET 0x204
+    do_facet ost "sysctl -w lustre.fail_loc=0x80000204"
+    facet_failover mds
+    df $MOUNT || return 1
+    sleep 10
+    do_facet ost "sysctl -w lustre.fail_loc=0"
+
+    return 0
+}
+run_test 43 "mds osc import failure during recovery; don't LBUG"
+
 equals_msg test complete, cleaning up
 $CLEANUP