Whamcloud - gitweb
b=17485
authorzhanghc <zhanghc>
Thu, 16 Apr 2009 15:15:58 +0000 (15:15 +0000)
committerzhanghc <zhanghc>
Thu, 16 Apr 2009 15:15:58 +0000 (15:15 +0000)
don't reuse orphan objects in OST during
establishing connection in MDS

i=shadow
i=bobijam

lustre/include/lustre/lustre_idl.h
lustre/mds/mds_lov.c
lustre/obdfilter/filter.c
lustre/osc/osc_create.c
lustre/tests/recovery-small.sh

index 1a608e4..6be9cef 100644 (file)
@@ -356,6 +356,7 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb, int msgsize);
 #define OBD_CONNECT_VBR        0x80000000ULL /*version based recovery */
 #define OBD_CONNECT_LOV_V3    0x100000000ULL /*client supports LOV v3 EA */
 #define OBD_CONNECT_GRANT_SHRINK  0x200000000ULL /* support grant shrink */
+#define OBD_CONNECT_SKIP_ORPHAN   0x400000000ULL /* don't reuse orphan objids */
 /* also update obd_connect_names[] for lprocfs_rd_connect_flags()
  * and lustre/utils/wirecheck.c */
 
@@ -379,7 +380,8 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb, int msgsize);
                                 OBD_CONNECT_CANCELSET | OBD_CONNECT_AT | \
                                 LRU_RESIZE_CONNECT_FLAG | OBD_CONNECT_CKSUM | \
                                 OBD_CONNECT_VBR | OBD_CONNECT_CHANGE_QS | \
-                                OBD_CONNECT_MDS | OBD_CONNECT_GRANT_SHRINK)
+                                OBD_CONNECT_MDS | OBD_CONNECT_GRANT_SHRINK | \
+                                OBD_CONNECT_SKIP_ORPHAN)
 #define ECHO_CONNECT_SUPPORTED (0)
 #define MGS_CONNECT_SUPPORTED  (OBD_CONNECT_VERSION | OBD_CONNECT_AT)
 
index 212e358..b945eb5 100644 (file)
@@ -458,17 +458,21 @@ static int mds_lov_get_objid(struct obd_device * obd,
                              obd_id idx)
 {
         struct mds_obd *mds = &obd->u.mds;
+        struct obd_export *osc_exp = mds->mds_osc_exp;
         unsigned int page;
         unsigned int off;
         obd_id *data;
         int rc = 0;
         ENTRY;
 
+        LASSERT(osc_exp != NULL);
+
         page = idx / OBJID_PER_PAGE();
         off = idx % OBJID_PER_PAGE();
 
         data = mds->mds_lov_page_array[page];
-        if (data[off] < 2) {
+        if (data[off] < 2 || 
+            !(osc_exp->exp_connect_flags & OBD_CONNECT_SKIP_ORPHAN)) {
                 /* We never read this lastid; ask the osc */
                 struct obd_id_info lastid;
                 __u32 size = sizeof(lastid);
@@ -681,7 +685,8 @@ int mds_lov_connect(struct obd_device *obd, char * lov_name)
                 RETURN(-ENOMEM);
         data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_INDEX |
                 OBD_CONNECT_REQPORTAL | OBD_CONNECT_QUOTA64 | OBD_CONNECT_AT |
-                OBD_CONNECT_CHANGE_QS | OBD_CONNECT_MDS;
+                OBD_CONNECT_CHANGE_QS | OBD_CONNECT_MDS |
+                OBD_CONNECT_SKIP_ORPHAN;
 #ifdef HAVE_LRU_RESIZE_SUPPORT
         data->ocd_connect_flags |= OBD_CONNECT_LRU_RESIZE;
 #endif
index 565e927..80a5d1c 100644 (file)
@@ -2274,6 +2274,11 @@ static int filter_connect_internal(struct obd_export *exp,
         exp->exp_connect_flags = data->ocd_connect_flags;
         data->ocd_version = LUSTRE_VERSION_CODE;
 
+        /* Kindly make sure the SKIP_ORPHAN flag is from MDS. */
+        if (!ergo(data->ocd_connect_flags & OBD_CONNECT_SKIP_ORPHAN,
+                  data->ocd_connect_flags & OBD_CONNECT_MDS))
+                RETURN(-EPROTO);
+
         if (exp->exp_connect_flags & OBD_CONNECT_GRANT) {
                 struct filter_obd *filter = &exp->exp_obd->u.filter;
                 struct filter_export_data *fed = &exp->exp_filter_data;
@@ -2994,7 +2999,8 @@ static int filter_destroy_precreated(struct obd_export *exp, struct obdo *oa,
 {
         struct obdo doa = { 0 }; /* XXX obdo on stack */
         obd_id last, id;
-        int rc;
+        int rc = 0;
+        int skip_orphan;
         ENTRY;
 
         LASSERT(oa);
@@ -3016,25 +3022,37 @@ static int filter_destroy_precreated(struct obd_export *exp, struct obdo *oa,
         }
 
         last = filter_last_id(filter, doa.o_gr);
-        CWARN("%s: deleting orphan objects from "LPU64" to "LPU64"\n",
-               exp->exp_obd->obd_name, oa->o_id + 1, last);
+        skip_orphan = !!(exp->exp_connect_flags & OBD_CONNECT_SKIP_ORPHAN);
+
+        CWARN("%s: deleting orphan objects from "LPU64" to "LPU64"%s\n",
+               exp->exp_obd->obd_name, oa->o_id + 1, last,
+               skip_orphan ? ", orphan objids won't be reused any more." : ".");
+
         for (id = last; id > oa->o_id; id--) {
                 doa.o_id = id;
                 rc = filter_destroy(exp, &doa, NULL, NULL, NULL);
                 if (rc && rc != -ENOENT) /* this is pretty fatal... */
                         CEMERG("error destroying precreate objid "LPU64": %d\n",
                                id, rc);
-                filter_set_last_id(filter, id - 1, doa.o_gr);
                 /* update last_id on disk periodically so that if we restart
                  * we don't need to re-scan all of the just-deleted objects. */
-                if ((id & 511) == 0)
+                if ((id & 511) == 0 && !skip_orphan) {
+                        filter_set_last_id(filter, id - 1, doa.o_gr);
                         filter_update_last_objid(exp->exp_obd, doa.o_gr, 0);
+                }
         }
 
         CDEBUG(D_HA, "%s: after destroy: set last_objids["LPU64"] = "LPU64"\n",
                exp->exp_obd->obd_name, doa.o_gr, oa->o_id);
 
-        rc = filter_update_last_objid(exp->exp_obd, doa.o_gr, 1);
+        if (!skip_orphan) {
+                filter_set_last_id(filter, id, doa.o_gr);
+                rc = filter_update_last_objid(exp->exp_obd, doa.o_gr, 1);
+        } else {
+                /* don't reuse orphan object, return last used objid */
+                oa->o_id = last;
+                rc = 0;
+        }
         filter->fo_destroy_in_progress = 0;
 
         RETURN(rc);
index 3909c75..a5ce2f2 100644 (file)
@@ -324,6 +324,7 @@ int osc_create(struct obd_export *exp, struct obdo *oa,
 {
         struct lov_stripe_md *lsm;
         struct osc_creator *oscc = &exp->exp_obd->u.cli.cl_oscc;
+        struct obd_import  *imp  = exp->exp_obd->u.cli.cl_import;
         int try_again = 1, rc = 0;
         ENTRY;
         LASSERT(oa);
@@ -367,10 +368,18 @@ int osc_create(struct obd_export *exp, struct obdo *oa,
                 spin_lock(&oscc->oscc_lock);
                 oscc->oscc_flags &= ~OSCC_FLAG_SYNC_IN_PROGRESS;
                 if (rc == 0 || rc == -ENOSPC) {
+                        struct obd_connect_data *ocd;
+
                         if (rc == -ENOSPC)
                                 oscc->oscc_flags |= OSCC_FLAG_NOSPC;
                         oscc->oscc_flags &= ~OSCC_FLAG_RECOVERING;
                         oscc->oscc_last_id = oa->o_id;
+                        ocd = &imp->imp_connect_data;
+                        if (ocd->ocd_connect_flags & OBD_CONNECT_SKIP_ORPHAN) {
+                                CWARN("Skip orphan set, reset last objid\n");
+                                oscc->oscc_next_id = oa->o_id + 1;
+                        }
+
                         CDEBUG(D_HA, "%s: oscc recovery finished, last_id: "
                                LPU64", rc: %d\n", oscc->oscc_obd->obd_name,
                                oscc->oscc_last_id, rc);
index 715990c..f9b5acc 100755 (executable)
@@ -994,6 +994,31 @@ test_59() { # bug 10589
 }
 run_test 59 "Read cancel race on client eviction"
 
+test_61()
+{
+       local cflags='osc.*-OST0000-osc.connect_flags'
+       do_facet mds "lctl get_param -n $cflags |grep -q skip_orphan"
+       [ $? -ne 0 ] && skip "don't have skip orphan feature" && return
+
+       mkdir -p $DIR/d61 || error "mkdir dir $DIR/d61 failed"
+       # Set the default stripe of $DIR/d61 to put the files to ost1
+       $LFS setstripe -c 1 --index 0 $DIR/d61
+
+       replay_barrier mds
+       createmany -o $DIR/d61/$tfile-%d 10 
+       local oid=`do_facet ost1 "lctl get_param -n obdfilter.*OST0000.last_id"`
+
+       fail_abort mds
+       
+       touch $DIR/d61/$tfile
+       local id=`$LFS getstripe $DIR/d61/$tfile | awk '$2 ~ /^[1-9]+/ {print $2}'`
+       [ $id -le $oid ] && error "the orphan objid was reused, failed"
+
+       # Cleanup
+       rm -rf $DIR/d61
+}
+run_test 61 "Verify to not reuse orphan objects - bug 17485"
+
 equals_msg `basename $0`: test complete, cleaning up
 check_and_cleanup_lustre
 [ -f "$TESTSUITELOG" ] && cat $TESTSUITELOG && grep -q FAIL $TESTSUITELOG && exit 1 || true