From: Fan Yong Date: Mon, 10 Nov 2014 20:48:24 +0000 (+0800) Subject: LU-6050 target: control OST-index in IDIF via ROCOMPAT flag X-Git-Tag: 2.6.94~12 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=ecd28d9b6cb691bda8184a7e07f1acc1ccded391 LU-6050 target: control OST-index in IDIF via ROCOMPAT flag Introduce new flag OBD_ROCOMPAT_IDX_IN_IDIF that is stored in the last_rcvd file. For new formatted OST device, it will be auto set; for the case of upgrading from old OST device, you can enable it via the lproc interface osd-ldiskfs.index_in_idif. With such flag enabled, for new created OST-object, its IDIF-in-LMA will contain the OST-index; for the existing OST-object, the OSD will convert old format IDIF as new format IDIF with OST-index stored in the LMA EA when accessing such OST-object or via OI scrub. Once such flag is enabled, it cannot be reverted back, so the system cannot be downgraded to the orignal incompatible version. Signed-off-by: Fan Yong Change-Id: I9e6e089d54fdb3970bb201eedac8dc09be2cc1c1 Reviewed-on: http://review.whamcloud.com/13516 Tested-by: Jenkins Reviewed-by: Andreas Dilger Reviewed-by: wangdi Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 685b785..b1abe71 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -18,6 +18,20 @@ TBD Intel Corporation * NFSv4 reexport to 32-bit NFS client nodes requires Lustre client on the re-exporting nodes to be mounted with "32bitapi" mount option +Severity : enhancement +Jira : LU-6050 +Description: control OST-index in IDIF via ROCOMPAT flag. +Details : Introduce new flag OBD_ROCOMPAT_IDX_IN_IDIF that is stored in the + last_rcvd file. For new formatted OST device, it will be auto set; + for the case of upgrading from old OST device, you can enable it + via the lproc interface osd-ldiskfs.index_in_idif. With such flag + enabled, for new created OST-object, its IDIF-in-LMA will contain + the OST-index; for the existing OST-object, the OSD will convert + old format IDIF as new format IDIF with OST-index stored in the + LMA EA when accessing such OST-object or via OI scrub. Once such + flag is enabled, it cannot be reverted back, so the system cannot + be downgraded to the orignal incompatible version. + -------------------------------------------------------------------------------- 07-30-2014 Intel Corporation diff --git a/lustre/include/lu_object.h b/lustre/include/lu_object.h index 8937a20..2ac4144 100644 --- a/lustre/include/lu_object.h +++ b/lustre/include/lu_object.h @@ -642,6 +642,10 @@ struct lu_site { * XXX: a hack! fld has to find md_site via site, remove when possible */ struct seq_server_site *ld_seq_site; + /** + * Pointer to the lu_target for this site. + */ + struct lu_target *ls_tgt; }; static inline struct lu_site_bkt_data * diff --git a/lustre/include/lustre_disk.h b/lustre/include/lustre_disk.h index 0c1f665..b0dec84 100644 --- a/lustre/include/lustre_disk.h +++ b/lustre/include/lustre_disk.h @@ -289,7 +289,9 @@ struct lustre_mount_data { #define OBD_COMPAT_20 0x00000008 /** MDS handles LOV_OBJID file */ -#define OBD_ROCOMPAT_LOVOBJID 0x00000001 +#define OBD_ROCOMPAT_LOVOBJID 0x00000001 +/** store OST index in the IDIF */ +#define OBD_ROCOMPAT_IDX_IN_IDIF 0x00000002 /** OST handles group subdirs */ #define OBD_INCOMPAT_GROUPS 0x00000001 diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index dddbba3..086a812 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -495,15 +495,14 @@ static int osd_check_lma(const struct lu_env *env, struct osd_object *obj) if (rc == -ENODATA && !fid_is_igif(rfid) && osd->od_check_ff) { fid = &lma->lma_self_fid; rc = osd_get_idif(info, inode, dentry, fid); - if ((rc > 0) || (rc == -ENODATA && osd->od_lma_self_repair)) { + if ((rc > 0) || (rc == -ENODATA && osd->od_index_in_idif)) { /* For the given OST-object, if it has neither LMA nor * FID in XATTR_NAME_FID, then the given FID (which is * contained in the @obj, from client RPC for locating * the OST-object) is trusted. We use it to generate * the LMA. */ osd_lma_self_repair(info, osd, inode, rfid, - fid_is_on_ost(info, osd, fid, OI_CHECK_FLD) ? - LMAC_FID_ON_OST : 0); + LMAC_FID_ON_OST); RETURN(0); } } @@ -546,7 +545,7 @@ static int osd_check_lma(const struct lu_env *env, struct osd_object *obj) fid_to_ostid(fid, oi); ostid_to_fid(fid1, oi, idx); if (lu_fid_eq(fid1, rfid)) { - if (osd->od_lma_self_repair) + if (osd->od_index_in_idif) osd_lma_self_repair(info, osd, inode, rfid, LMAC_FID_ON_OST); @@ -2796,11 +2795,23 @@ static int osd_object_ea_create(const struct lu_env *env, struct dt_object *dt, osd_trans_declare_rb(env, th, OSD_OT_REF_ADD); result = __osd_object_create(info, obj, attr, hint, dof, th); - if (result == 0) - result = osd_ea_fid_set(info, obj->oo_inode, fid, + if (result == 0) { + if (fid_is_idif(fid) && + !osd_dev(dt->do_lu.lo_dev)->od_index_in_idif) { + struct lu_fid *tfid = &info->oti_fid; + struct ost_id *oi = &info->oti_ostid; + + fid_to_ostid(fid, oi); + ostid_to_fid(tfid, oi, 0); + result = osd_ea_fid_set(info, obj->oo_inode, tfid, + LMAC_FID_ON_OST, 0); + } else { + result = osd_ea_fid_set(info, obj->oo_inode, fid, fid_is_on_ost(info, osd_obj2dev(obj), fid, OI_CHECK_FLD) ? LMAC_FID_ON_OST : 0, 0); + } + } if (result == 0) result = __osd_oi_insert(env, obj, fid, th); @@ -6129,9 +6140,6 @@ static int osd_device_init0(const struct lu_env *env, if (rc != 0) GOTO(out_site, rc); - /* self-repair LMA by default */ - o->od_lma_self_repair = 1; - INIT_LIST_HEAD(&o->od_ios_list); /* setup scrub, including OI files initialization */ rc = osd_scrub_setup(env, o); @@ -6322,10 +6330,12 @@ static int osd_obd_disconnect(struct obd_export *exp) } static int osd_prepare(const struct lu_env *env, struct lu_device *pdev, - struct lu_device *dev) + struct lu_device *dev) { - struct osd_device *osd = osd_dev(dev); - int result = 0; + struct osd_device *osd = osd_dev(dev); + struct lr_server_data *lsd = + &osd->od_dt_dev.dd_lu_dev.ld_site->ls_tgt->lut_lsd; + int result = 0; ENTRY; if (osd->od_quota_slave != NULL) { @@ -6335,6 +6345,21 @@ static int osd_prepare(const struct lu_env *env, struct lu_device *pdev, RETURN(result); } + if (lsd->lsd_feature_incompat & OBD_COMPAT_OST) { +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 52, 0) + if (lsd->lsd_feature_rocompat & OBD_ROCOMPAT_IDX_IN_IDIF) { + osd->od_index_in_idif = 1; + } else { + osd->od_index_in_idif = 0; + result = osd_register_proc_index_in_idif(osd); + if (result != 0) + RETURN(result); + } +#else + osd->od_index_in_idif = 1; +#endif + } + result = osd_fid_init(env, osd); RETURN(result); diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index 97337ead..8ee1865 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -237,7 +237,7 @@ struct osd_device { od_igif_inoi:1, od_check_ff:1, od_is_ost:1, - od_lma_self_repair:1; + od_index_in_idif:1; unsigned long od_capa_timeout; __u32 od_capa_alg; @@ -641,6 +641,9 @@ extern struct lprocfs_vars lprocfs_osd_module_vars[]; int osd_procfs_init(struct osd_device *osd, const char *name); int osd_procfs_fini(struct osd_device *osd); void osd_brw_stats_update(struct osd_device *osd, struct osd_iobuf *iobuf); +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 52, 0) +int osd_register_proc_index_in_idif(struct osd_device *osd); +#endif #endif int osd_statfs(const struct lu_env *env, struct dt_device *dev, diff --git a/lustre/osd-ldiskfs/osd_lproc.c b/lustre/osd-ldiskfs/osd_lproc.c index 2ec7529..91fd2ae 100644 --- a/lustre/osd-ldiskfs/osd_lproc.c +++ b/lustre/osd-ldiskfs/osd_lproc.c @@ -549,7 +549,8 @@ ldiskfs_osd_readcache_seq_write(struct file *file, const char *buffer, } LPROC_SEQ_FOPS(ldiskfs_osd_readcache); -static int ldiskfs_osd_lma_self_repair_seq_show(struct seq_file *m, void *data) +#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 52, 0) +static int ldiskfs_osd_index_in_idif_seq_show(struct seq_file *m, void *data) { struct osd_device *dev = osd_dt_dev((struct dt_device *)m->private); @@ -557,31 +558,73 @@ static int ldiskfs_osd_lma_self_repair_seq_show(struct seq_file *m, void *data) if (unlikely(dev->od_mnt == NULL)) return -EINPROGRESS; - return seq_printf(m, "%d\n", !!dev->od_lma_self_repair); + return seq_printf(m, "%d\n", (int)(dev->od_index_in_idif)); } static ssize_t -ldiskfs_osd_lma_self_repair_seq_write(struct file *file, const char *buffer, - size_t count, loff_t *off) +ldiskfs_osd_index_in_idif_seq_write(struct file *file, const char *buffer, + size_t count, loff_t *off) { - struct seq_file *m = file->private_data; - struct dt_device *dt = m->private; - struct osd_device *dev = osd_dt_dev(dt); - int val; - int rc; + struct lu_env env; + struct seq_file *m = file->private_data; + struct dt_device *dt = m->private; + struct osd_device *dev = osd_dt_dev(dt); + struct lu_target *tgt; + int val; + int rc; LASSERT(dev != NULL); if (unlikely(dev->od_mnt == NULL)) return -EINPROGRESS; rc = lprocfs_write_helper(buffer, count, &val); - if (rc) + if (rc != 0) + return rc; + + if (dev->od_index_in_idif) { + if (val != 0) + return count; + + LCONSOLE_WARN("%s: OST-index in IDIF has been enabled, " + "it cannot be reverted back.\n", osd_name(dev)); + return -EPERM; + } + + if (val == 0) + return count; + + rc = lu_env_init(&env, LCT_DT_THREAD); + if (rc != 0) + return rc; + + tgt = dev->od_dt_dev.dd_lu_dev.ld_site->ls_tgt; + tgt->lut_lsd.lsd_feature_rocompat |= OBD_ROCOMPAT_IDX_IN_IDIF; + rc = tgt_server_data_update(&env, tgt, 1); + lu_env_fini(&env); + if (rc < 0) return rc; - dev->od_lma_self_repair = !!val; + LCONSOLE_INFO("%s: enable OST-index in IDIF successfully, " + "it cannot be reverted back.\n", osd_name(dev)); + + dev->od_index_in_idif = 1; return count; } -LPROC_SEQ_FOPS(ldiskfs_osd_lma_self_repair); +LPROC_SEQ_FOPS(ldiskfs_osd_index_in_idif); + +int osd_register_proc_index_in_idif(struct osd_device *osd) +{ + struct proc_dir_entry *proc; + + proc = proc_create_data("index_in_idif", 0, osd->od_proc_entry, + &ldiskfs_osd_index_in_idif_fops, + &osd->od_dt_dev); + if (proc == NULL) + return -ENOMEM; + + return 0; +} +#endif LPROC_SEQ_FOPS_RO_TYPE(ldiskfs, dt_blksize); LPROC_SEQ_FOPS_RO_TYPE(ldiskfs, dt_kbytestotal); @@ -625,8 +668,6 @@ struct lprocfs_vars lprocfs_osd_obd_vars[] = { .fops = &ldiskfs_osd_wcache_fops }, { .name = "readcache_max_filesize", .fops = &ldiskfs_osd_readcache_fops }, - { .name = "lma_self_repair", - .fops = &ldiskfs_osd_lma_self_repair_fops }, { NULL } }; diff --git a/lustre/osd-ldiskfs/osd_scrub.c b/lustre/osd-ldiskfs/osd_scrub.c index 411f69b..a92db02 100644 --- a/lustre/osd-ldiskfs/osd_scrub.c +++ b/lustre/osd-ldiskfs/osd_scrub.c @@ -370,6 +370,7 @@ osd_scrub_convert_ff(struct osd_thread_info *info, struct osd_device *dev, { struct filter_fid_old *ff = &info->oti_ff; struct dentry *dentry = &info->oti_obj_dentry; + struct lu_fid *tfid = &info->oti_fid; handle_t *jh; int size = 0; int rc; @@ -380,6 +381,15 @@ osd_scrub_convert_ff(struct osd_thread_info *info, struct osd_device *dev, if (dev->od_scrub.os_file.sf_param & SP_DRYRUN) RETURN(0); + if (fid_is_idif(fid) && dev->od_index_in_idif == 0) { + struct ost_id *oi = &info->oti_ostid; + + fid_to_ostid(fid, oi); + ostid_to_fid(tfid, oi, 0); + } else { + *tfid = *fid; + } + /* We want the LMA to fit into the 256-byte OST inode, so operate * as following: * 1) read old XATTR_NAME_FID and save the parent FID; @@ -395,7 +405,7 @@ osd_scrub_convert_ff(struct osd_thread_info *info, struct osd_device *dev, if (IS_ERR(jh)) { rc = PTR_ERR(jh); CDEBUG(D_LFSCK, "%s: fail to start trans for convert ff " - DFID": rc = %d\n", osd_name(dev), PFID(fid), rc); + DFID": rc = %d\n", osd_name(dev), PFID(tfid), rc); RETURN(rc); } @@ -416,7 +426,7 @@ osd_scrub_convert_ff(struct osd_thread_info *info, struct osd_device *dev, } /* 3) make new LMA and add it */ - rc = osd_ea_fid_set(info, inode, fid, LMAC_FID_ON_OST, 0); + rc = osd_ea_fid_set(info, inode, tfid, LMAC_FID_ON_OST, 0); if (rc == 0 && reset) size = sizeof(struct filter_fid); else if (rc != 0 && removed) @@ -439,7 +449,7 @@ stop: ldiskfs_journal_stop(jh); if (rc < 0) CDEBUG(D_LFSCK, "%s: fail to convert ff "DFID": rc = %d\n", - osd_name(dev), PFID(fid), rc); + osd_name(dev), PFID(tfid), rc); return rc; } diff --git a/lustre/target/tgt_lastrcvd.c b/lustre/target/tgt_lastrcvd.c index 50147d3..2457993 100644 --- a/lustre/target/tgt_lastrcvd.c +++ b/lustre/target/tgt_lastrcvd.c @@ -981,10 +981,10 @@ static struct server_compat_data tgt_scd[] = { OBD_INCOMPAT_MULTI_OI, }, [LDD_F_SV_TYPE_OST] = { - .rocompat = 0, + .rocompat = OBD_ROCOMPAT_IDX_IN_IDIF, .incompat = OBD_INCOMPAT_OST | OBD_INCOMPAT_COMMON_LR | OBD_INCOMPAT_FID, - .rocinit = 0, + .rocinit = OBD_ROCOMPAT_IDX_IN_IDIF, .incinit = OBD_INCOMPAT_OST | OBD_INCOMPAT_COMMON_LR, } }; diff --git a/lustre/target/tgt_main.c b/lustre/target/tgt_main.c index cdc60b8..a1ad974 100644 --- a/lustre/target/tgt_main.c +++ b/lustre/target/tgt_main.c @@ -112,6 +112,7 @@ int tgt_init(const struct lu_env *env, struct lu_target *lut, INIT_LIST_HEAD(&lut->lut_txn_cb.dtc_linkage); dt_txn_callback_add(lut->lut_bottom, &lut->lut_txn_cb); + lut->lut_bottom->dd_lu_dev.ld_site->ls_tgt = lut; RETURN(0); out_obj: diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 60475ea..3043cf5 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -1152,7 +1152,7 @@ test_28a() { # LU-4221 # Check 3. # prepare a non-symlink parameter in the OSD - name="lma_self_repair" + name="auto_scrub" param="$device.osd.$name" cmd="$LCTL get_param -n osd-*.$device.$name"