Whamcloud - gitweb
LU-6050 target: control OST-index in IDIF via ROCOMPAT flag 16/13516/5
authorFan Yong <fan.yong@intel.com>
Mon, 10 Nov 2014 20:48:24 +0000 (04:48 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Sun, 8 Feb 2015 02:15:34 +0000 (02:15 +0000)
Introduce new flag OBD_ROCOMPAT_IDX_IN_IDIF that is stored in the
last_rcvd file. For new formatted OST device, it will be auto set;
for the case of upgrading from old OST device, you can enable it
via the lproc interface osd-ldiskfs.index_in_idif. With such flag
enabled, for new created OST-object, its IDIF-in-LMA will contain
the OST-index; for the existing OST-object, the OSD will convert
old format IDIF as new format IDIF with OST-index stored in the
LMA EA when accessing such OST-object or via OI scrub. Once such
flag is enabled, it cannot be reverted back, so the system cannot
be downgraded to the orignal incompatible version.

Signed-off-by: Fan Yong <fan.yong@intel.com>
Change-Id: I9e6e089d54fdb3970bb201eedac8dc09be2cc1c1
Reviewed-on: http://review.whamcloud.com/13516
Tested-by: Jenkins
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: wangdi <di.wang@intel.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/ChangeLog
lustre/include/lu_object.h
lustre/include/lustre_disk.h
lustre/osd-ldiskfs/osd_handler.c
lustre/osd-ldiskfs/osd_internal.h
lustre/osd-ldiskfs/osd_lproc.c
lustre/osd-ldiskfs/osd_scrub.c
lustre/target/tgt_lastrcvd.c
lustre/target/tgt_main.c
lustre/tests/conf-sanity.sh

index 685b785..b1abe71 100644 (file)
@@ -18,6 +18,20 @@ TBD Intel Corporation
        * NFSv4 reexport to 32-bit NFS client nodes requires Lustre client on
          the re-exporting nodes to be mounted with "32bitapi" mount option
 
        * NFSv4 reexport to 32-bit NFS client nodes requires Lustre client on
          the re-exporting nodes to be mounted with "32bitapi" mount option
 
+Severity   : enhancement
+Jira       : LU-6050
+Description: control OST-index in IDIF via  ROCOMPAT flag.
+Details    : Introduce new flag OBD_ROCOMPAT_IDX_IN_IDIF that is stored in the
+            last_rcvd file. For new formatted OST device, it will be auto set;
+            for the case of upgrading from old OST device, you can enable it
+            via the lproc interface osd-ldiskfs.index_in_idif. With such flag
+            enabled, for new created OST-object, its IDIF-in-LMA will contain
+            the OST-index; for the existing OST-object, the OSD will convert
+            old format IDIF as new format IDIF with OST-index stored in the
+            LMA EA when accessing such OST-object or via OI scrub. Once such
+            flag is enabled, it cannot be reverted back, so the system cannot
+            be downgraded to the orignal incompatible version.
+
 --------------------------------------------------------------------------------
 
 07-30-2014 Intel Corporation
 --------------------------------------------------------------------------------
 
 07-30-2014 Intel Corporation
index 8937a20..2ac4144 100644 (file)
@@ -642,6 +642,10 @@ struct lu_site {
         * XXX: a hack! fld has to find md_site via site, remove when possible
         */
        struct seq_server_site  *ld_seq_site;
         * XXX: a hack! fld has to find md_site via site, remove when possible
         */
        struct seq_server_site  *ld_seq_site;
+       /**
+        * Pointer to the lu_target for this site.
+        */
+       struct lu_target        *ls_tgt;
 };
 
 static inline struct lu_site_bkt_data *
 };
 
 static inline struct lu_site_bkt_data *
index 0c1f665..b0dec84 100644 (file)
@@ -289,7 +289,9 @@ struct lustre_mount_data {
 #define OBD_COMPAT_20           0x00000008
 
 /** MDS handles LOV_OBJID file */
 #define OBD_COMPAT_20           0x00000008
 
 /** MDS handles LOV_OBJID file */
-#define OBD_ROCOMPAT_LOVOBJID   0x00000001
+#define OBD_ROCOMPAT_LOVOBJID          0x00000001
+/** store OST index in the IDIF */
+#define OBD_ROCOMPAT_IDX_IN_IDIF       0x00000002
 
 /** OST handles group subdirs */
 #define OBD_INCOMPAT_GROUPS     0x00000001
 
 /** OST handles group subdirs */
 #define OBD_INCOMPAT_GROUPS     0x00000001
index dddbba3..086a812 100644 (file)
@@ -495,15 +495,14 @@ static int osd_check_lma(const struct lu_env *env, struct osd_object *obj)
        if (rc == -ENODATA && !fid_is_igif(rfid) && osd->od_check_ff) {
                fid = &lma->lma_self_fid;
                rc = osd_get_idif(info, inode, dentry, fid);
        if (rc == -ENODATA && !fid_is_igif(rfid) && osd->od_check_ff) {
                fid = &lma->lma_self_fid;
                rc = osd_get_idif(info, inode, dentry, fid);
-               if ((rc > 0) || (rc == -ENODATA && osd->od_lma_self_repair)) {
+               if ((rc > 0) || (rc == -ENODATA && osd->od_index_in_idif)) {
                        /* For the given OST-object, if it has neither LMA nor
                         * FID in XATTR_NAME_FID, then the given FID (which is
                         * contained in the @obj, from client RPC for locating
                         * the OST-object) is trusted. We use it to generate
                         * the LMA. */
                        osd_lma_self_repair(info, osd, inode, rfid,
                        /* For the given OST-object, if it has neither LMA nor
                         * FID in XATTR_NAME_FID, then the given FID (which is
                         * contained in the @obj, from client RPC for locating
                         * the OST-object) is trusted. We use it to generate
                         * the LMA. */
                        osd_lma_self_repair(info, osd, inode, rfid,
-                               fid_is_on_ost(info, osd, fid, OI_CHECK_FLD) ?
-                               LMAC_FID_ON_OST : 0);
+                                           LMAC_FID_ON_OST);
                        RETURN(0);
                }
        }
                        RETURN(0);
                }
        }
@@ -546,7 +545,7 @@ static int osd_check_lma(const struct lu_env *env, struct osd_object *obj)
                                fid_to_ostid(fid, oi);
                                ostid_to_fid(fid1, oi, idx);
                                if (lu_fid_eq(fid1, rfid)) {
                                fid_to_ostid(fid, oi);
                                ostid_to_fid(fid1, oi, idx);
                                if (lu_fid_eq(fid1, rfid)) {
-                                       if (osd->od_lma_self_repair)
+                                       if (osd->od_index_in_idif)
                                                osd_lma_self_repair(info, osd,
                                                        inode, rfid,
                                                        LMAC_FID_ON_OST);
                                                osd_lma_self_repair(info, osd,
                                                        inode, rfid,
                                                        LMAC_FID_ON_OST);
@@ -2796,11 +2795,23 @@ static int osd_object_ea_create(const struct lu_env *env, struct dt_object *dt,
        osd_trans_declare_rb(env, th, OSD_OT_REF_ADD);
 
        result = __osd_object_create(info, obj, attr, hint, dof, th);
        osd_trans_declare_rb(env, th, OSD_OT_REF_ADD);
 
        result = __osd_object_create(info, obj, attr, hint, dof, th);
-       if (result == 0)
-               result = osd_ea_fid_set(info, obj->oo_inode, fid,
+       if (result == 0) {
+               if (fid_is_idif(fid) &&
+                   !osd_dev(dt->do_lu.lo_dev)->od_index_in_idif) {
+                       struct lu_fid *tfid = &info->oti_fid;
+                       struct ost_id *oi   = &info->oti_ostid;
+
+                       fid_to_ostid(fid, oi);
+                       ostid_to_fid(tfid, oi, 0);
+                       result = osd_ea_fid_set(info, obj->oo_inode, tfid,
+                                               LMAC_FID_ON_OST, 0);
+               } else {
+                       result = osd_ea_fid_set(info, obj->oo_inode, fid,
                                fid_is_on_ost(info, osd_obj2dev(obj),
                                              fid, OI_CHECK_FLD) ?
                                LMAC_FID_ON_OST : 0, 0);
                                fid_is_on_ost(info, osd_obj2dev(obj),
                                              fid, OI_CHECK_FLD) ?
                                LMAC_FID_ON_OST : 0, 0);
+               }
+       }
 
        if (result == 0)
                result = __osd_oi_insert(env, obj, fid, th);
 
        if (result == 0)
                result = __osd_oi_insert(env, obj, fid, th);
@@ -6129,9 +6140,6 @@ static int osd_device_init0(const struct lu_env *env,
        if (rc != 0)
                GOTO(out_site, rc);
 
        if (rc != 0)
                GOTO(out_site, rc);
 
-       /* self-repair LMA by default */
-       o->od_lma_self_repair = 1;
-
        INIT_LIST_HEAD(&o->od_ios_list);
        /* setup scrub, including OI files initialization */
        rc = osd_scrub_setup(env, o);
        INIT_LIST_HEAD(&o->od_ios_list);
        /* setup scrub, including OI files initialization */
        rc = osd_scrub_setup(env, o);
@@ -6322,10 +6330,12 @@ static int osd_obd_disconnect(struct obd_export *exp)
 }
 
 static int osd_prepare(const struct lu_env *env, struct lu_device *pdev,
 }
 
 static int osd_prepare(const struct lu_env *env, struct lu_device *pdev,
-                       struct lu_device *dev)
+                      struct lu_device *dev)
 {
 {
-       struct osd_device *osd = osd_dev(dev);
-       int                result = 0;
+       struct osd_device       *osd    = osd_dev(dev);
+       struct lr_server_data   *lsd    =
+                       &osd->od_dt_dev.dd_lu_dev.ld_site->ls_tgt->lut_lsd;
+       int                      result = 0;
        ENTRY;
 
        if (osd->od_quota_slave != NULL) {
        ENTRY;
 
        if (osd->od_quota_slave != NULL) {
@@ -6335,6 +6345,21 @@ static int osd_prepare(const struct lu_env *env, struct lu_device *pdev,
                        RETURN(result);
        }
 
                        RETURN(result);
        }
 
+       if (lsd->lsd_feature_incompat & OBD_COMPAT_OST) {
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 52, 0)
+               if (lsd->lsd_feature_rocompat & OBD_ROCOMPAT_IDX_IN_IDIF) {
+                       osd->od_index_in_idif = 1;
+               } else {
+                       osd->od_index_in_idif = 0;
+                       result = osd_register_proc_index_in_idif(osd);
+                       if (result != 0)
+                               RETURN(result);
+               }
+#else
+               osd->od_index_in_idif = 1;
+#endif
+       }
+
        result = osd_fid_init(env, osd);
 
        RETURN(result);
        result = osd_fid_init(env, osd);
 
        RETURN(result);
index 97337ea..8ee1865 100644 (file)
@@ -237,7 +237,7 @@ struct osd_device {
                                  od_igif_inoi:1,
                                  od_check_ff:1,
                                  od_is_ost:1,
                                  od_igif_inoi:1,
                                  od_check_ff:1,
                                  od_is_ost:1,
-                                 od_lma_self_repair:1;
+                                 od_index_in_idif:1;
 
        unsigned long             od_capa_timeout;
        __u32                     od_capa_alg;
 
        unsigned long             od_capa_timeout;
        __u32                     od_capa_alg;
@@ -641,6 +641,9 @@ extern struct lprocfs_vars lprocfs_osd_module_vars[];
 int osd_procfs_init(struct osd_device *osd, const char *name);
 int osd_procfs_fini(struct osd_device *osd);
 void osd_brw_stats_update(struct osd_device *osd, struct osd_iobuf *iobuf);
 int osd_procfs_init(struct osd_device *osd, const char *name);
 int osd_procfs_fini(struct osd_device *osd);
 void osd_brw_stats_update(struct osd_device *osd, struct osd_iobuf *iobuf);
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 52, 0)
+int osd_register_proc_index_in_idif(struct osd_device *osd);
+#endif
 
 #endif
 int osd_statfs(const struct lu_env *env, struct dt_device *dev,
 
 #endif
 int osd_statfs(const struct lu_env *env, struct dt_device *dev,
index 2ec7529..91fd2ae 100644 (file)
@@ -549,7 +549,8 @@ ldiskfs_osd_readcache_seq_write(struct file *file, const char *buffer,
 }
 LPROC_SEQ_FOPS(ldiskfs_osd_readcache);
 
 }
 LPROC_SEQ_FOPS(ldiskfs_osd_readcache);
 
-static int ldiskfs_osd_lma_self_repair_seq_show(struct seq_file *m, void *data)
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 52, 0)
+static int ldiskfs_osd_index_in_idif_seq_show(struct seq_file *m, void *data)
 {
        struct osd_device *dev = osd_dt_dev((struct dt_device *)m->private);
 
 {
        struct osd_device *dev = osd_dt_dev((struct dt_device *)m->private);
 
@@ -557,31 +558,73 @@ static int ldiskfs_osd_lma_self_repair_seq_show(struct seq_file *m, void *data)
        if (unlikely(dev->od_mnt == NULL))
                return -EINPROGRESS;
 
        if (unlikely(dev->od_mnt == NULL))
                return -EINPROGRESS;
 
-       return seq_printf(m, "%d\n", !!dev->od_lma_self_repair);
+       return seq_printf(m, "%d\n", (int)(dev->od_index_in_idif));
 }
 
 static ssize_t
 }
 
 static ssize_t
-ldiskfs_osd_lma_self_repair_seq_write(struct file *file, const char *buffer,
-                                       size_t count, loff_t *off)
+ldiskfs_osd_index_in_idif_seq_write(struct file *file, const char *buffer,
+                                   size_t count, loff_t *off)
 {
 {
-       struct seq_file   *m = file->private_data;
-       struct dt_device  *dt = m->private;
-       struct osd_device *dev = osd_dt_dev(dt);
-       int                val;
-       int                rc;
+       struct lu_env            env;
+       struct seq_file         *m      = file->private_data;
+       struct dt_device        *dt     = m->private;
+       struct osd_device       *dev    = osd_dt_dev(dt);
+       struct lu_target        *tgt;
+       int                      val;
+       int                      rc;
 
        LASSERT(dev != NULL);
        if (unlikely(dev->od_mnt == NULL))
                return -EINPROGRESS;
 
        rc = lprocfs_write_helper(buffer, count, &val);
 
        LASSERT(dev != NULL);
        if (unlikely(dev->od_mnt == NULL))
                return -EINPROGRESS;
 
        rc = lprocfs_write_helper(buffer, count, &val);
-       if (rc)
+       if (rc != 0)
+               return rc;
+
+       if (dev->od_index_in_idif) {
+               if (val != 0)
+                       return count;
+
+               LCONSOLE_WARN("%s: OST-index in IDIF has been enabled, "
+                             "it cannot be reverted back.\n", osd_name(dev));
+               return -EPERM;
+       }
+
+       if (val == 0)
+               return count;
+
+       rc = lu_env_init(&env, LCT_DT_THREAD);
+       if (rc != 0)
+               return rc;
+
+       tgt = dev->od_dt_dev.dd_lu_dev.ld_site->ls_tgt;
+       tgt->lut_lsd.lsd_feature_rocompat |= OBD_ROCOMPAT_IDX_IN_IDIF;
+       rc = tgt_server_data_update(&env, tgt, 1);
+       lu_env_fini(&env);
+       if (rc < 0)
                return rc;
 
                return rc;
 
-       dev->od_lma_self_repair = !!val;
+       LCONSOLE_INFO("%s: enable OST-index in IDIF successfully, "
+                     "it cannot be reverted back.\n", osd_name(dev));
+
+       dev->od_index_in_idif = 1;
        return count;
 }
        return count;
 }
-LPROC_SEQ_FOPS(ldiskfs_osd_lma_self_repair);
+LPROC_SEQ_FOPS(ldiskfs_osd_index_in_idif);
+
+int osd_register_proc_index_in_idif(struct osd_device *osd)
+{
+       struct proc_dir_entry *proc;
+
+       proc = proc_create_data("index_in_idif", 0, osd->od_proc_entry,
+                               &ldiskfs_osd_index_in_idif_fops,
+                               &osd->od_dt_dev);
+       if (proc == NULL)
+               return -ENOMEM;
+
+       return 0;
+}
+#endif
 
 LPROC_SEQ_FOPS_RO_TYPE(ldiskfs, dt_blksize);
 LPROC_SEQ_FOPS_RO_TYPE(ldiskfs, dt_kbytestotal);
 
 LPROC_SEQ_FOPS_RO_TYPE(ldiskfs, dt_blksize);
 LPROC_SEQ_FOPS_RO_TYPE(ldiskfs, dt_kbytestotal);
@@ -625,8 +668,6 @@ struct lprocfs_vars lprocfs_osd_obd_vars[] = {
          .fops =       &ldiskfs_osd_wcache_fops        },
        { .name =       "readcache_max_filesize",
          .fops =       &ldiskfs_osd_readcache_fops     },
          .fops =       &ldiskfs_osd_wcache_fops        },
        { .name =       "readcache_max_filesize",
          .fops =       &ldiskfs_osd_readcache_fops     },
-       { .name =       "lma_self_repair",
-         .fops =       &ldiskfs_osd_lma_self_repair_fops       },
        { NULL }
 };
 
        { NULL }
 };
 
index 411f69b..a92db02 100644 (file)
@@ -370,6 +370,7 @@ osd_scrub_convert_ff(struct osd_thread_info *info, struct osd_device *dev,
 {
        struct filter_fid_old   *ff      = &info->oti_ff;
        struct dentry           *dentry  = &info->oti_obj_dentry;
 {
        struct filter_fid_old   *ff      = &info->oti_ff;
        struct dentry           *dentry  = &info->oti_obj_dentry;
+       struct lu_fid           *tfid    = &info->oti_fid;
        handle_t                *jh;
        int                      size    = 0;
        int                      rc;
        handle_t                *jh;
        int                      size    = 0;
        int                      rc;
@@ -380,6 +381,15 @@ osd_scrub_convert_ff(struct osd_thread_info *info, struct osd_device *dev,
        if (dev->od_scrub.os_file.sf_param & SP_DRYRUN)
                RETURN(0);
 
        if (dev->od_scrub.os_file.sf_param & SP_DRYRUN)
                RETURN(0);
 
+       if (fid_is_idif(fid) && dev->od_index_in_idif == 0) {
+               struct ost_id *oi = &info->oti_ostid;
+
+               fid_to_ostid(fid, oi);
+               ostid_to_fid(tfid, oi, 0);
+       } else {
+               *tfid = *fid;
+       }
+
        /* We want the LMA to fit into the 256-byte OST inode, so operate
         * as following:
         * 1) read old XATTR_NAME_FID and save the parent FID;
        /* We want the LMA to fit into the 256-byte OST inode, so operate
         * as following:
         * 1) read old XATTR_NAME_FID and save the parent FID;
@@ -395,7 +405,7 @@ osd_scrub_convert_ff(struct osd_thread_info *info, struct osd_device *dev,
        if (IS_ERR(jh)) {
                rc = PTR_ERR(jh);
                CDEBUG(D_LFSCK, "%s: fail to start trans for convert ff "
        if (IS_ERR(jh)) {
                rc = PTR_ERR(jh);
                CDEBUG(D_LFSCK, "%s: fail to start trans for convert ff "
-                      DFID": rc = %d\n", osd_name(dev), PFID(fid), rc);
+                      DFID": rc = %d\n", osd_name(dev), PFID(tfid), rc);
                RETURN(rc);
        }
 
                RETURN(rc);
        }
 
@@ -416,7 +426,7 @@ osd_scrub_convert_ff(struct osd_thread_info *info, struct osd_device *dev,
        }
 
        /* 3) make new LMA and add it */
        }
 
        /* 3) make new LMA and add it */
-       rc = osd_ea_fid_set(info, inode, fid, LMAC_FID_ON_OST, 0);
+       rc = osd_ea_fid_set(info, inode, tfid, LMAC_FID_ON_OST, 0);
        if (rc == 0 && reset)
                size = sizeof(struct filter_fid);
        else if (rc != 0 && removed)
        if (rc == 0 && reset)
                size = sizeof(struct filter_fid);
        else if (rc != 0 && removed)
@@ -439,7 +449,7 @@ stop:
        ldiskfs_journal_stop(jh);
        if (rc < 0)
                CDEBUG(D_LFSCK, "%s: fail to convert ff "DFID": rc = %d\n",
        ldiskfs_journal_stop(jh);
        if (rc < 0)
                CDEBUG(D_LFSCK, "%s: fail to convert ff "DFID": rc = %d\n",
-                      osd_name(dev), PFID(fid), rc);
+                      osd_name(dev), PFID(tfid), rc);
        return rc;
 }
 
        return rc;
 }
 
index 50147d3..2457993 100644 (file)
@@ -981,10 +981,10 @@ static struct server_compat_data tgt_scd[] = {
                           OBD_INCOMPAT_MULTI_OI,
        },
        [LDD_F_SV_TYPE_OST] = {
                           OBD_INCOMPAT_MULTI_OI,
        },
        [LDD_F_SV_TYPE_OST] = {
-               .rocompat = 0,
+               .rocompat = OBD_ROCOMPAT_IDX_IN_IDIF,
                .incompat = OBD_INCOMPAT_OST | OBD_INCOMPAT_COMMON_LR |
                            OBD_INCOMPAT_FID,
                .incompat = OBD_INCOMPAT_OST | OBD_INCOMPAT_COMMON_LR |
                            OBD_INCOMPAT_FID,
-               .rocinit = 0,
+               .rocinit = OBD_ROCOMPAT_IDX_IN_IDIF,
                .incinit = OBD_INCOMPAT_OST | OBD_INCOMPAT_COMMON_LR,
        }
 };
                .incinit = OBD_INCOMPAT_OST | OBD_INCOMPAT_COMMON_LR,
        }
 };
index cdc60b8..a1ad974 100644 (file)
@@ -112,6 +112,7 @@ int tgt_init(const struct lu_env *env, struct lu_target *lut,
        INIT_LIST_HEAD(&lut->lut_txn_cb.dtc_linkage);
 
        dt_txn_callback_add(lut->lut_bottom, &lut->lut_txn_cb);
        INIT_LIST_HEAD(&lut->lut_txn_cb.dtc_linkage);
 
        dt_txn_callback_add(lut->lut_bottom, &lut->lut_txn_cb);
+       lut->lut_bottom->dd_lu_dev.ld_site->ls_tgt = lut;
 
        RETURN(0);
 out_obj:
 
        RETURN(0);
 out_obj:
index 60475ea..3043cf5 100644 (file)
@@ -1152,7 +1152,7 @@ test_28a() { # LU-4221
 
        # Check 3.
        # prepare a non-symlink parameter in the OSD
 
        # Check 3.
        # prepare a non-symlink parameter in the OSD
-       name="lma_self_repair"
+       name="auto_scrub"
        param="$device.osd.$name"
        cmd="$LCTL get_param -n osd-*.$device.$name"
 
        param="$device.osd.$name"
        cmd="$LCTL get_param -n osd-*.$device.$name"