From: wang di Date: Sun, 11 Jan 2015 20:19:30 +0000 (-0800) Subject: LU-6154 zfs: striped directory and migration on ZFS X-Git-Tag: 2.6.94~14 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=0c69c941cdae8cc41a3daaa9184ef2468a24aa09 LU-6154 zfs: striped directory and migration on ZFS 1. Increase/decrease the refcount for sub_stripe object, because we need explicitly increase/decrease refcount for ZFS directory. 2. setup/cleanup sequence service for osd-zfs, so it can create FID for local OSD. 3. Do not zero dah_eadata in OSD layer, instead of set it MDD layer, so striping create process will be interferred. 4. Put 0 at the end of link data during migration, since osd-zfs does not do it when reading link. 5. Create orphan object with linkEA data, so if migration is interrupted, then other threads are able to read entries from this half-migrated directory, because osd-zfs needs to retrieve the parent FID from linkea data during read dir entries (see osd_dir_it_rec()). Signed-off-by: wang di Change-Id: I67cbd0b09d2716b163277425066dcf155df68039 Reviewed-on: http://review.whamcloud.com/13518 Reviewed-by: Andreas Dilger Reviewed-by: Fan Yong Tested-by: Jenkins Reviewed-by: Alex Zhuravlev Tested-by: Oleg Drokin Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/lustre/lod/lod_object.c b/lustre/lod/lod_object.c index 5dbd0b9..a2d2256 100644 --- a/lustre/lod/lod_object.c +++ b/lustre/lod/lod_object.c @@ -1821,6 +1821,10 @@ next: if (!dt_try_as_dir(env, dto)) GOTO(out_put, rc = -EINVAL); + rc = dt_declare_ref_add(env, dto, th); + if (rc != 0) + GOTO(out_put, rc); + rec->rec_fid = lu_object_fid(&dto->do_lu); rc = dt_declare_insert(env, dto, (const struct dt_rec *)rec, (const struct dt_key *)dot, th); @@ -2445,6 +2449,12 @@ static int lod_xattr_set_lmv(const struct lu_env *env, struct dt_object *dt, dto = lo->ldo_stripe[i]; dt_write_lock(env, dto, MOR_TGT_CHILD); rc = dt_create(env, dto, attr, NULL, dof, th); + if (rc != 0) { + dt_write_unlock(env, dto); + RETURN(rc); + } + + rc = dt_ref_add(env, dto, th); dt_write_unlock(env, dto); if (rc != 0) RETURN(rc); @@ -3570,6 +3580,13 @@ static int lod_declare_object_destroy(const struct lu_env *env, /* declare destroy all striped objects */ for (i = 0; i < lo->ldo_stripenr; i++) { if (likely(lo->ldo_stripe[i] != NULL)) { + if (S_ISDIR(dt->do_lu.lo_header->loh_attr)) { + rc = dt_declare_ref_del(env, lo->ldo_stripe[i], + th); + if (rc != 0) + RETURN(rc); + } + rc = dt_declare_destroy(env, lo->ldo_stripe[i], th); if (rc != 0) break; @@ -3638,6 +3655,15 @@ static int lod_object_destroy(const struct lu_env *env, if (likely(lo->ldo_stripe[i] != NULL) && (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LOST_SPEOBJ) || i == cfs_fail_val)) { + if (S_ISDIR(dt->do_lu.lo_header->loh_attr)) { + dt_write_lock(env, lo->ldo_stripe[i], + MOR_TGT_CHILD); + rc = dt_ref_del(env, lo->ldo_stripe[i], th); + dt_write_unlock(env, lo->ldo_stripe[i]); + if (rc != 0) + break; + } + rc = dt_destroy(env, lo->ldo_stripe[i], th); if (rc != 0) break; diff --git a/lustre/mdd/mdd_dir.c b/lustre/mdd/mdd_dir.c index 6344e40..be38034 100644 --- a/lustre/mdd/mdd_dir.c +++ b/lustre/mdd/mdd_dir.c @@ -3072,15 +3072,6 @@ static int mdd_update_linkea_internal(const struct lu_env *env, RETURN(rc); } - if (declare) - rc = mdd_declare_links_add(env, mdd_tobj, handle, ldata, - MLAO_IGNORE); - else - rc = mdd_links_write(env, mdd_tobj, ldata, handle); - - if (rc != 0) - RETURN(rc); - /* If it is mulitple links file, we need update the name entry for * all parent */ LASSERT(ldata->ld_leh != NULL); @@ -3292,6 +3283,7 @@ static int mdd_declare_migrate_create(const struct lu_env *env, struct md_op_spec *spec, struct lu_attr *la, union lmv_mds_md *mgr_ea, + struct linkea_data *ldata, struct thandle *handle) { struct lu_attr *la_flag = MDD_ENV_VAR(env, la_for_fix); @@ -3319,6 +3311,11 @@ static int mdd_declare_migrate_create(const struct lu_env *env, buf, 0, handle); if (rc != 0) return rc; + } else if (S_ISDIR(la->la_mode)) { + rc = mdd_declare_links_add(env, mdd_tobj, handle, ldata, + MLAO_IGNORE); + if (rc != 0) + return rc; } if (spec->u.sp_ea.eadata != NULL && spec->u.sp_ea.eadatalen != 0) { @@ -3348,6 +3345,7 @@ static int mdd_migrate_create(const struct lu_env *env, struct mdd_object *mdd_pobj, struct mdd_object *mdd_sobj, struct mdd_object *mdd_tobj, + const struct lu_name *lname, struct lu_attr *la) { struct mdd_thread_info *info = mdd_env_info(env); @@ -3361,6 +3359,7 @@ static int mdd_migrate_create(const struct lu_env *env, struct lu_attr *la_flag = MDD_ENV_VAR(env, la_for_fix); struct dt_allocation_hint *hint = &mdd_env_info(env)->mti_hint; int mgr_easize; + struct linkea_data *ldata = &mdd_env_info(env)->mti_link_data; int rc; ENTRY; @@ -3374,6 +3373,7 @@ static int mdd_migrate_create(const struct lu_env *env, la->la_size + 1); link_buf = *buf; link_buf.lb_len = la->la_size + 1; + memset(link_buf.lb_buf, 0, link_buf.lb_len); rc = mdd_readlink(env, &mdd_sobj->mod_obj, &link_buf); if (rc <= 0) { rc = rc != 0 ? rc : -EFAULT; @@ -3393,6 +3393,10 @@ static int mdd_migrate_create(const struct lu_env *env, spec->u.sp_ea.eadatalen = lmm_buf.lb_len; spec->sp_cr_flags |= MDS_OPEN_HAS_EA; } + } else if (S_ISDIR(la->la_mode)) { + rc = mdd_links_read(env, mdd_sobj, ldata); + if (rc < 0 && rc != -ENODATA) + RETURN(rc); } mgr_ea = (struct lmv_mds_md_v1 *)info->mti_xattr_buf; @@ -3417,7 +3421,7 @@ static int mdd_migrate_create(const struct lu_env *env, rc = mdd_declare_migrate_create(env, mdd_pobj, mdd_sobj, mdd_tobj, spec, la, (union lmv_mds_md *)info->mti_xattr_buf, - handle); + ldata, handle); if (rc != 0) GOTO(stop_trans, rc); @@ -3431,6 +3435,12 @@ static int mdd_migrate_create(const struct lu_env *env, if (rc != 0) GOTO(stop_trans, rc); + if (S_ISDIR(la->la_mode)) { + rc = mdd_links_write(env, mdd_tobj, ldata, handle); + if (rc != 0) + GOTO(stop_trans, rc); + } + /* Set MIGRATE EA on the source inode, so once the migration needs * to be re-done during failover, the re-do process can locate the * target object which is already being created. */ @@ -4067,7 +4077,7 @@ static int mdd_migrate(const struct lu_env *env, struct md_object *pobj, mdd_tobj = md2mdd_obj(tobj); if (!mdd_object_exists(mdd_tobj)) { rc = mdd_migrate_create(env, mdd_pobj, mdd_sobj, mdd_tobj, - so_attr); + lname, so_attr); if (rc != 0) GOTO(put, rc); } diff --git a/lustre/osd-zfs/osd_handler.c b/lustre/osd-zfs/osd_handler.c index 2ea2f1f..5d77809 100644 --- a/lustre/osd-zfs/osd_handler.c +++ b/lustre/osd-zfs/osd_handler.c @@ -664,6 +664,16 @@ struct lu_context_key osd_key = { .lct_exit = osd_key_exit }; +static void osd_fid_fini(const struct lu_env *env, struct osd_device *osd) +{ + if (osd->od_cl_seq == NULL) + return; + + seq_client_fini(osd->od_cl_seq); + OBD_FREE_PTR(osd->od_cl_seq); + osd->od_cl_seq = NULL; +} + static int osd_shutdown(const struct lu_env *env, struct osd_device *o) { ENTRY; @@ -674,6 +684,8 @@ static int osd_shutdown(const struct lu_env *env, struct osd_device *o) o->od_quota_slave = NULL; } + osd_fid_fini(env, o); + RETURN(0); } @@ -1096,6 +1108,33 @@ static int osd_obd_disconnect(struct obd_export *exp) RETURN(rc); } +static int osd_fid_init(const struct lu_env *env, struct osd_device *osd) +{ + struct seq_server_site *ss = osd_seq_site(osd); + int rc; + ENTRY; + + if (osd->od_is_ost || osd->od_cl_seq != NULL) + RETURN(0); + + if (unlikely(ss == NULL)) + RETURN(-ENODEV); + + OBD_ALLOC_PTR(osd->od_cl_seq); + if (osd->od_cl_seq == NULL) + RETURN(-ENOMEM); + + rc = seq_client_init(osd->od_cl_seq, NULL, LUSTRE_SEQ_METADATA, + osd->od_svname, ss->ss_server_seq); + + if (rc != 0) { + OBD_FREE_PTR(osd->od_cl_seq); + osd->od_cl_seq = NULL; + } + + RETURN(rc); +} + static int osd_prepare(const struct lu_env *env, struct lu_device *pdev, struct lu_device *dev) { @@ -1103,9 +1142,14 @@ static int osd_prepare(const struct lu_env *env, struct lu_device *pdev, int rc = 0; ENTRY; - if (osd->od_quota_slave != NULL) + if (osd->od_quota_slave != NULL) { /* set up quota slave objects */ rc = qsd_prepare(env, osd->od_quota_slave); + if (rc != 0) + RETURN(rc); + } + + rc = osd_fid_init(env, osd); RETURN(rc); } diff --git a/lustre/osd-zfs/osd_index.c b/lustre/osd-zfs/osd_index.c index b28f65f..8f5905e 100644 --- a/lustre/osd-zfs/osd_index.c +++ b/lustre/osd-zfs/osd_index.c @@ -525,8 +525,9 @@ static int osd_seq_exists(const struct lu_env *env, struct osd_device *osd, rc = osd_fld_lookup(env, osd, seq, range); if (rc != 0) { - CERROR("%s: Can not lookup fld for "LPX64"\n", - osd_name(osd), seq); + if (rc != -ENOENT) + CERROR("%s: Can not lookup fld for "LPX64"\n", + osd_name(osd), seq); RETURN(0); } diff --git a/lustre/osd-zfs/osd_object.c b/lustre/osd-zfs/osd_object.c index 1767769..9f1fbc4 100644 --- a/lustre/osd-zfs/osd_object.c +++ b/lustre/osd-zfs/osd_object.c @@ -1050,7 +1050,6 @@ static void osd_ah_init(const struct lu_env *env, struct dt_allocation_hint *ah, { LASSERT(ah); - memset(ah, 0, sizeof(*ah)); ah->dah_parent = parent; ah->dah_mode = child_mode; }