From 54b044966dda10e86d6d2a72b681421b1f570906 Mon Sep 17 00:00:00 2001 From: Johann Lombardi Date: Fri, 25 Jun 2010 16:06:07 +0200 Subject: [PATCH] b=17485 Don't reset the lov_objid to last_id sent by OST i=andrew i=hongchao Resetting the lov_objid values to last_id reported by the OST in mds_lov_get_objid() unconditionally is just wrong and can cause the same object to be allocated twice by the MDS. The SKIP_ORPHAN feature is supposed to work as follows: * during orphan recovery, the MDS must supply to the OST the lov_objid value; * on the OST side: - if MDS's lov_objid > OST's last_id, the OST recreates the missing objects up to lov_objid and packs this value in the reply; - if OST's last_id > MDS's lov_objid, the OST destroys orphan objects from lov_objid + 1 up to last_id and does *not* change last_id. The OST packs in the reply the value of last_id which is from where the MDS should restart allocations if it does not want to reuse orphans. * the MDS sets next_id to the value reported by the OST in the orphan recovery request. If we always discard the lov_objid values as done currently in mds_lov_get_objid(), orphan recovery is just a noop and neither clears orphans nor recreates missing objects. --- lustre/mds/mds_lov.c | 9 +-------- lustre/obdfilter/filter.c | 9 ++++++++- lustre/osc/osc_create.c | 5 +++++ 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/lustre/mds/mds_lov.c b/lustre/mds/mds_lov.c index 29419b3..b478bd9 100644 --- a/lustre/mds/mds_lov.c +++ b/lustre/mds/mds_lov.c @@ -474,7 +474,6 @@ static int mds_lov_get_objid(struct obd_device * obd, unsigned int page; unsigned int off; obd_id *data; - __u64 connect_flags; __u32 size; int rc = 0; ENTRY; @@ -485,14 +484,8 @@ static int mds_lov_get_objid(struct obd_device * obd, off = idx % OBJID_PER_PAGE(); data = mds->mds_lov_page_array[page]; - size = sizeof(__u64); - connect_flags = idx; - rc = obd_get_info(lov_exp, sizeof(KEY_CONNECT_FLAG), KEY_CONNECT_FLAG, - &size, &connect_flags, NULL); - if (rc) - GOTO(out, rc); - if (data[off] < 2 || connect_flags & OBD_CONNECT_SKIP_ORPHAN) { + if (data[off] < 2) { /* We never read this lastid; ask the osc */ struct obd_id_info lastid; diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 228f59c..f11472f 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -3124,7 +3124,14 @@ static int filter_destroy_precreated(struct obd_export *exp, struct obdo *oa, filter_set_last_id(filter, id, doa.o_gr); rc = filter_update_last_objid(exp->exp_obd, doa.o_gr, 1); } else { - /* don't reuse orphan object, return last used objid */ + /* + * We have destroyed orphan objects, but don't want to reuse + * them. Therefore we don't reset last_id to the last created + * objects. Instead, we report back to the MDS the object id + * of the last orphan, so that the MDS can restart allocating + * objects from this id + 1 and thus skip the whole orphan + * object id range + */ oa->o_id = last; rc = 0; } diff --git a/lustre/osc/osc_create.c b/lustre/osc/osc_create.c index 42fe4ef..535e5bf 100644 --- a/lustre/osc/osc_create.c +++ b/lustre/osc/osc_create.c @@ -571,6 +571,11 @@ int osc_create(struct obd_export *exp, struct obdo *oa, ocd = &imp->imp_connect_data; if (ocd->ocd_connect_flags & OBD_CONNECT_SKIP_ORPHAN) { + /* + * The OST reports back in oa->o_id from where + * we should restart in order to skip orphan + * objects + */ CDEBUG(D_HA, "%s: Skip orphan set, reset last " "objid\n", oscc->oscc_obd->obd_name); oscc->oscc_next_id = oa->o_id + 1; -- 1.8.3.1