Whamcloud - gitweb
LU-11963 osd: Add nonrotational flag to statfs
[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_internal.h
index a7f10aa..bb3bec1 100644 (file)
@@ -23,7 +23,7 @@
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2011, 2016, Intel Corporation.
+ * Copyright (c) 2011, 2017, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -49,6 +49,7 @@
 /* struct dirent64 */
 #include <linux/dirent.h>
 #include <linux/statfs.h>
+#include <linux/bio.h>
 #include <ldiskfs/ldiskfs.h>
 #include <ldiskfs/ldiskfs_jbd2.h>
 
@@ -95,6 +96,8 @@ extern struct kmem_cache *dynlock_cachep;
 #undef HAVE_PROJECT_QUOTA
 #endif
 
+#define OBD_BRW_MAPPED OBD_BRW_LOCAL1
+
 struct osd_directory {
         struct iam_container od_container;
         struct iam_descr     od_descr;
@@ -133,24 +136,23 @@ struct osd_object {
        /** protects inode attributes. */
        spinlock_t              oo_guard;
 
-       __u32                   oo_destroyed:1;
+       /**
+        * Following two members *compat_dot* are used to indicate
+        * the presence of dot and dotdot in the given directory.
+        * This is required for interop mode (b11826).
+        */
+       __u32                   oo_destroyed:1,
+                               oo_pfid_in_lma:1,
+                               oo_compat_dot_created:1,
+                               oo_compat_dotdot_created:1;
 
        /* the i_flags in LMA */
-       __u32                   oo_lma_flags;
-        /**
-         * Following two members are used to indicate the presence of dot and
-         * dotdot in the given directory. This is required for interop mode
-         * (b11826).
-         */
-        int                     oo_compat_dot_created;
-        int                     oo_compat_dotdot_created;
+       __u32                   oo_lma_flags;
 
         const struct lu_env    *oo_owner;
-#ifdef CONFIG_LOCKDEP
-        struct lockdep_map      oo_dep_map;
-#endif
 
        struct list_head        oo_xattr_list;
+       struct lu_object_header *oo_header;
 };
 
 struct osd_obj_seq {
@@ -187,8 +189,8 @@ int osd_ldiskfs_add_entry(struct osd_thread_info *info, struct osd_device *osd,
 #define OSD_OTABLE_IT_CACHE_MASK       (~(OSD_OTABLE_IT_CACHE_SIZE - 1))
 
 struct osd_inconsistent_item {
-       /* link into osd_scrub::os_inconsistent_items,
-        * protected by osd_scrub::os_lock. */
+       /* link into lustre_scrub::os_inconsistent_items,
+        * protected by lustre_scrub::os_lock. */
        struct list_head       oii_list;
 
        /* The right FID <=> ino#/gen mapping. */
@@ -207,15 +209,16 @@ struct osd_otable_cache {
        int                    ooc_consumer_idx;
 
        /* How many items in ooc_cache. */
-       int                    ooc_cached_items;
+       __u64                  ooc_cached_items;
 
        /* Position for up layer LFSCK iteration pre-loading. */
-       __u32                  ooc_pos_preload;
+       __u64                  ooc_pos_preload;
 };
 
 struct osd_otable_it {
        struct osd_device       *ooi_dev;
        struct osd_otable_cache  ooi_cache;
+       struct osd_iit_param     ooi_iit_param;
 
        /* The following bits can be updated/checked w/o lock protection.
         * If more bits will be introduced in the future and need lock to
@@ -235,6 +238,14 @@ struct osd_obj_orphan {
        __u32 oor_ino;
 };
 
+enum osd_t10_type {
+       OSD_T10_TYPE_UNKNOWN = 0,
+       OSD_T10_TYPE1_CRC,
+       OSD_T10_TYPE3_CRC,
+       OSD_T10_TYPE1_IP,
+       OSD_T10_TYPE3_IP
+};
+
 /*
  * osd device.
  */
@@ -252,12 +263,15 @@ struct osd_device {
          */
        unsigned int              od_fl_capa:1,
                                  od_maybe_new:1,
-                                 od_noscrub:1,
                                  od_igif_inoi:1,
                                  od_check_ff:1,
                                  od_is_ost:1,
-                                 od_index_in_idif:1;
+                                 od_in_init:1,
+                                 od_index_in_idif:1,
+       /* Other flags */
+                                 od_nonrotational:1;
 
+       __s64                     od_auto_scrub_interval;
        __u32                     od_dirent_journal;
        int                       od_index;
        struct proc_dir_entry    *od_proc_entry;
@@ -288,8 +302,11 @@ struct osd_device {
        char                      od_svname[MAX_OBD_NAME];
        char                      od_mntdev[MAX_OBD_NAME];
 
-       /* quota slave instance */
-       struct qsd_instance      *od_quota_slave;
+       /* quota slave instance for inode */
+       struct qsd_instance      *od_quota_slave_md;
+
+       /* quota slave instance for block */
+       struct qsd_instance      *od_quota_slave_dt;
 
        /* osd seq instance */
        struct lu_client_seq    *od_cl_seq;
@@ -305,8 +322,24 @@ struct osd_device {
 
        /* a list of orphaned agent inodes, protected with od_osfs_lock */
        struct list_head         od_orphan_list;
+       struct list_head         od_index_backup_list;
+       struct list_head         od_index_restore_list;
+       spinlock_t               od_lock;
+       struct inode            *od_index_backup_inode;
+       enum lustre_index_backup_policy od_index_backup_policy;
+       int                      od_index_backup_stop;
+       /* T10PI type, zero if not supported  */
+       enum osd_t10_type        od_t10_type;
 };
 
+static inline struct qsd_instance *osd_def_qsd(struct osd_device *osd)
+{
+       if (osd->od_is_ost)
+               return osd->od_quota_slave_dt;
+       else
+               return osd->od_quota_slave_md;
+}
+
 enum osd_full_scrub_ratio {
        /* Trigger OI scrub to scan the whole device directly. */
        OFSR_DIRECTLY   = 0,
@@ -348,6 +381,13 @@ enum osd_op_type {
        OSD_OT_MAX              = 11
 };
 
+struct osd_access_lock {
+       struct list_head         tl_list;
+       struct osd_object       *tl_obj;
+       bool                     tl_shared;
+       bool                     tl_truncate;
+};
+
 struct osd_thandle {
         struct thandle          ot_super;
         handle_t               *ot_handle;
@@ -357,11 +397,15 @@ struct osd_thandle {
        /* Link to the device, for debugging. */
        struct lu_ref_link      ot_dev_link;
        unsigned int            ot_credits;
+
+       /* quota IDs related to the transaction */
        unsigned short          ot_id_cnt;
-       __u8                    ot_id_types[OSD_MAX_UGID_CNT];
-       unsigned int            ot_remove_agents:1;
+       __u8                    ot_id_res[OSD_MAX_UGID_CNT];
+       __u8                    ot_id_types[OSD_MAX_UGID_CNT];
        uid_t                   ot_id_array[OSD_MAX_UGID_CNT];
        struct lquota_trans    *ot_quota_trans;
+
+       unsigned int            ot_remove_agents:1;
 #if OSD_THANDLE_STATS
         /** time when this handle was allocated */
        ktime_t oth_alloced;
@@ -369,6 +413,7 @@ struct osd_thandle {
         /** time when this thanle was started */
        ktime_t oth_started;
 #endif
+       struct list_head        ot_trunc_locks;
 };
 
 /**
@@ -499,15 +544,16 @@ struct osd_iobuf {
        int                dr_npages;
        int                dr_error;
        int                dr_frags;
-       unsigned int       dr_ignore_quota:1;
        unsigned int       dr_elapsed_valid:1; /* we really did count time */
        unsigned int       dr_rw:1;
        struct lu_buf      dr_pg_buf;
        struct page      **dr_pages;
+       struct niobuf_local     **dr_lnbs;
        struct lu_buf      dr_bl_buf;
+       struct lu_buf      dr_lnb_buf;
        sector_t          *dr_blocks;
-       unsigned long      dr_start_time;
-       unsigned long      dr_elapsed;  /* how long io took */
+       ktime_t            dr_start_time;
+       ktime_t            dr_elapsed;  /* how long io took */
        struct osd_device *dr_dev;
        unsigned int       dr_init_at;  /* the line iobuf was initialized */
 };
@@ -575,29 +621,31 @@ struct osd_thread_info {
        struct osd_idmap_cache oti_cache;
 
        /* dedicated OI cache for insert (which needs inum) */
-       struct osd_idmap_cache *oti_ins_cache;
-       int                    oti_ins_cache_size;
-       int                    oti_ins_cache_used;
-
-        int                    oti_r_locks;
-        int                    oti_w_locks;
-        int                    oti_txns;
-        /** used in osd_fid_set() to put xattr */
-        struct lu_buf          oti_buf;
-       struct lu_buf          oti_big_buf;
-        /** used in osd_ea_fid_set() to set fid into common ea */
+       struct osd_idmap_cache          *oti_ins_cache;
+       int                             oti_ins_cache_size;
+       int                             oti_ins_cache_used;
+       /* inc by osd_trans_create and dec by osd_trans_stop */
+       int                             oti_ins_cache_depth;
+
+       int                             oti_r_locks;
+       int                             oti_w_locks;
+       int                             oti_txns;
+       /** used in osd_fid_set() to put xattr */
+       struct lu_buf                   oti_buf;
+       struct lu_buf                   oti_big_buf;
+       /** used in osd_ea_fid_set() to set fid into common ea */
        union {
                struct lustre_ost_attrs oti_ost_attrs;
-               struct filter_fid_old   oti_ff;
-               struct filter_fid       oti_ff_new;
+               struct filter_fid_18_23 oti_ff_old;
+               struct filter_fid       oti_ff;
        };
        /** 0-copy IO */
-       struct osd_iobuf       oti_iobuf;
+       struct osd_iobuf                oti_iobuf;
        /* used to access objects in /O */
-       struct inode          *oti_inode;
+       struct inode                    *oti_inode;
 #define OSD_FID_REC_SZ 32
-       char                   oti_ldp[OSD_FID_REC_SZ];
-       char                   oti_ldp2[OSD_FID_REC_SZ];
+       char                            oti_ldp[OSD_FID_REC_SZ];
+       char                            oti_ldp2[OSD_FID_REC_SZ];
 
        /* used by quota code */
        union {
@@ -623,17 +671,30 @@ struct osd_thread_info {
        unsigned int            oti_declare_ops[OSD_OT_MAX];
        unsigned int            oti_declare_ops_cred[OSD_OT_MAX];
        unsigned int            oti_declare_ops_used[OSD_OT_MAX];
-};
+       struct osd_directory    oti_iam;
 
-/* flags for inode/block quota accounting */
-enum osd_qid_declare_flags {
-       OSD_QID_INODE   = 1 << 0,
-       OSD_QID_BLK     = 1 << 1,
-       OSD_QID_FORCE   = 1 << 2,
+       struct page             **oti_dio_pages;
+       int                     oti_dio_pages_used;
 };
 
 extern int ldiskfs_pdo;
 
+#ifndef HAVE_VFS_SETXATTR
+#define osd_setxattr(dentry, inode, name, buf, len, flag) \
+               ((inode)->i_op->setxattr(dentry, name, buf, len, flag))
+#define osd_getxattr(dentry, inode, name, buf, len) \
+               ((inode)->i_op->getxattr(dentry, name, buf, len))
+#define osd_removexattr(dentry, inode, name) \
+               ((inode)->i_op->removexattr(dentry, name))
+#else /* HAVE_VFS_SETXATTR */
+#define osd_setxattr(dentry, inode, name, buf, len, flag) \
+               __vfs_setxattr(dentry, inode, name, buf, len, flag)
+#define osd_getxattr(dentry, inode, name, buf, len) \
+               __vfs_getxattr(dentry, inode, name, buf, len)
+#define osd_removexattr(dentry, inode, name) \
+               __vfs_removexattr(dentry, name)
+#endif /* !HAVE_VFS_SETXATTR */
+
 static inline int __osd_xattr_get(struct inode *inode, struct dentry *dentry,
                                  const char *name, void *buf, int len)
 {
@@ -642,7 +703,7 @@ static inline int __osd_xattr_get(struct inode *inode, struct dentry *dentry,
 
        dentry->d_inode = inode;
        dentry->d_sb = inode->i_sb;
-       return inode->i_op->getxattr(dentry, name, buf, len);
+       return osd_getxattr(dentry, inode, name, buf, len);
 }
 
 static inline int __osd_xattr_set(struct osd_thread_info *info,
@@ -654,13 +715,12 @@ static inline int __osd_xattr_set(struct osd_thread_info *info,
        ll_vfs_dq_init(inode);
        dentry->d_inode = inode;
        dentry->d_sb = inode->i_sb;
-       return inode->i_op->setxattr(dentry, name, buf, buflen, fl);
+       return osd_setxattr(dentry, inode, name, buf, buflen, fl);
 }
 
 #ifdef CONFIG_PROC_FS
 /* osd_lproc.c */
 extern struct lprocfs_vars lprocfs_osd_obd_vars[];
-extern struct lprocfs_vars lprocfs_osd_module_vars[];
 int osd_procfs_init(struct osd_device *osd, const char *name);
 int osd_procfs_fini(struct osd_device *osd);
 void osd_brw_stats_update(struct osd_device *osd, struct osd_iobuf *iobuf);
@@ -673,6 +733,9 @@ int osd_statfs(const struct lu_env *env, struct dt_device *dev,
                struct obd_statfs *sfs);
 struct inode *osd_iget(struct osd_thread_info *info, struct osd_device *dev,
                       struct osd_inode_id *id);
+struct inode *
+osd_iget_fid(struct osd_thread_info *info, struct osd_device *dev,
+            struct osd_inode_id *id, struct lu_fid *fid);
 int osd_ea_fid_set(struct osd_thread_info *info, struct inode *inode,
                   const struct lu_fid *fid, __u32 compat, __u32 incompat);
 int osd_get_lma(struct osd_thread_info *info, struct inode *inode,
@@ -706,17 +769,17 @@ int osd_obj_spec_update(struct osd_thread_info *info, struct osd_device *osd,
                        const struct lu_fid *fid, const struct osd_inode_id *id,
                        handle_t *th);
 
-void osd_scrub_file_reset(struct osd_scrub *scrub, __u8 *uuid, __u64 flags);
-int osd_scrub_file_store(struct osd_scrub *scrub);
 char *osd_lf_fid2name(const struct lu_fid *fid);
-int osd_scrub_start(struct osd_device *dev, __u32 flags);
+int osd_scrub_start(const struct lu_env *env, struct osd_device *dev,
+                   __u32 flags);
+void osd_scrub_stop(struct osd_device *dev);
 int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev);
 void osd_scrub_cleanup(const struct lu_env *env, struct osd_device *dev);
 int osd_oii_insert(struct osd_device *dev, struct osd_idmap_cache *oic,
                   int insert);
 int osd_oii_lookup(struct osd_device *dev, const struct lu_fid *fid,
                   struct osd_inode_id *id);
-int osd_scrub_dump(struct seq_file *m, struct osd_device *dev);
+void osd_scrub_dump(struct seq_file *m, struct osd_device *dev);
 
 int osd_fld_lookup(const struct lu_env *env, struct osd_device *osd,
                   u64 seq, struct lu_seq_range *range);
@@ -724,7 +787,7 @@ int osd_fld_lookup(const struct lu_env *env, struct osd_device *osd,
 int osd_delete_from_remote_parent(const struct lu_env *env,
                                  struct osd_device *osd,
                                  struct osd_object *obj,
-                                 struct osd_thandle *oh);
+                                 struct osd_thandle *oh, bool destroy);
 int osd_add_to_remote_parent(const struct lu_env *env, struct osd_device *osd,
                             struct osd_object *obj, struct osd_thandle *oh);
 int osd_lookup_in_remote_parent(struct osd_thread_info *oti,
@@ -1265,9 +1328,14 @@ static inline int fid_is_internal(const struct lu_fid *fid)
        return (!fid_is_namespace_visible(fid) && !fid_is_idif(fid));
 }
 
-static inline unsigned long osd_remote_parent_ino(struct osd_device *dev)
+static inline bool is_remote_parent_ino(struct osd_device *o, unsigned long ino)
 {
-       return dev->od_mdt_map->omm_remote_parent->d_inode->i_ino;
+       if (o->od_is_ost)
+               return false;
+
+       LASSERT(o->od_mdt_map != NULL);
+
+       return ino == o->od_mdt_map->omm_remote_parent->d_inode->i_ino;
 }
 
 /**
@@ -1299,10 +1367,142 @@ static inline struct buffer_head *__ldiskfs_bread(handle_t *handle,
        return bh;
 }
 
+#ifndef HAVE_BIO_INTEGRITY_ENABLED
+bool bio_integrity_enabled(struct bio *bio);
+#endif
+
+#ifdef HAVE_BI_BDEV
+# define bio_get_dev(bio)      ((bio)->bi_bdev)
+# define bio_get_disk(bio)     (bio_get_dev(bio)->bd_disk)
+# define bio_get_queue(bio)    bdev_get_queue(bio_get_dev(bio))
+# define bio_set_dev(bio, bdev) (bio_get_dev(bio) = (bdev))
+#else
+# define bio_get_disk(bio)     ((bio)->bi_disk)
+# define bio_get_queue(bio)    (bio_get_disk(bio)->queue)
+#endif
+
 void ldiskfs_inc_count(handle_t *handle, struct inode *inode);
 void ldiskfs_dec_count(handle_t *handle, struct inode *inode);
 
 void osd_fini_iobuf(struct osd_device *d, struct osd_iobuf *iobuf);
 
+static inline int
+osd_index_register(struct osd_device *osd, const struct lu_fid *fid,
+                  __u32 keysize, __u32 recsize)
+{
+       return lustre_index_register(&osd->od_dt_dev, osd_name(osd),
+                                    &osd->od_index_backup_list, &osd->od_lock,
+                                    &osd->od_index_backup_stop,
+                                    fid, keysize, recsize);
+}
+
+static inline void
+osd_index_backup(const struct lu_env *env, struct osd_device *osd, bool backup)
+{
+       struct osd_thread_info *info = osd_oti_get(env);
+       struct lu_fid *fid = &info->oti_fid3;
+       struct osd_inode_id *id = &info->oti_id3;
+
+       lu_local_obj_fid(fid, INDEX_BACKUP_OID);
+       osd_id_gen(id, osd->od_index_backup_inode->i_ino,
+                  osd->od_index_backup_inode->i_generation);
+       osd_add_oi_cache(info, osd, id, fid);
+
+       lustre_index_backup(env, &osd->od_dt_dev, osd_name(osd),
+                           &osd->od_index_backup_list, &osd->od_lock,
+                           &osd->od_index_backup_stop, backup);
+}
+
+#ifdef LDISKFS_HAS_INCOMPAT_FEATURE
+
+# ifdef LDISKFS_FEATURE_INCOMPAT_EXTENTS
+# define ldiskfs_has_feature_extents(sb) \
+       LDISKFS_HAS_INCOMPAT_FEATURE(sb, LDISKFS_FEATURE_INCOMPAT_EXTENTS)
+# endif
+# ifdef LDISKFS_FEATURE_INCOMPAT_EA_INODE
+# define ldiskfs_has_feature_ea_inode(sb) \
+       LDISKFS_HAS_INCOMPAT_FEATURE(sb, LDISKFS_FEATURE_INCOMPAT_EA_INODE)
+# endif
+# ifdef LDISKFS_FEATURE_INCOMPAT_DIRDATA
+# define ldiskfs_has_feature_dirdata(sb) \
+       LDISKFS_HAS_INCOMPAT_FEATURE(sb, LDISKFS_FEATURE_INCOMPAT_DIRDATA)
+# endif
+# ifdef LDISKFS_FEATURE_COMPAT_HAS_JOURNAL
+# define ldiskfs_has_feature_journal(sb) \
+       LDISKFS_HAS_COMPAT_FEATURE(sb, LDISKFS_FEATURE_COMPAT_HAS_JOURNAL)
+# endif
+# ifdef LDISKFS_FEATURE_RO_COMPAT_QUOTA
+# define ldiskfs_has_feature_quota(sb) \
+       LDISKFS_HAS_RO_COMPAT_FEATURE(sb, LDISKFS_FEATURE_RO_COMPAT_QUOTA)
+# endif
+# ifdef LDISKFS_FEATURE_RO_COMPAT_PROJECT
+# define ldiskfs_has_feature_project(sb) \
+       LDISKFS_HAS_RO_COMPAT_FEATURE(sb, LDISKFS_FEATURE_RO_COMPAT_PROJECT)
+# endif
+
+#endif
+
+int osd_trunc_lock(struct osd_object *obj, struct osd_thandle *oh,
+                  bool shared);
+void osd_trunc_unlock_all(struct list_head *list);
+void osd_process_truncates(struct list_head *list);
+void osd_execute_truncate(struct osd_object *obj);
+
+#ifdef HAVE_BIO_ENDIO_USES_ONE_ARG
+#define osd_dio_complete_routine(bio, error) dio_complete_routine(bio)
+#else
+#define osd_dio_complete_routine(bio, error) dio_complete_routine(bio, error)
+#endif
+
+#ifndef HAVE___BI_CNT
+#define __bi_cnt bi_cnt
+#endif
+
+#ifndef HAVE_BI_OPF
+#define bi_opf bi_rw
+#endif
+
+#ifndef HAVE_CLEAN_BDEV_ALIASES
+#define clean_bdev_aliases(bdev, block, len)   \
+       unmap_underlying_metadata((bdev), (block))
+#endif
+
+#ifndef HAVE_BI_STATUS
+#define bi_status bi_error
+#endif
+
+/*
+ * Maximum size of xattr attributes for FEATURE_INCOMPAT_EA_INODE 1Mb
+ * This limit is arbitrary, but is reasonable for the xattr API.
+ */
+#define LDISKFS_XATTR_MAX_LARGE_EA_SIZE    (1024 * 1024)
+
+struct osd_bio_private {
+       struct osd_iobuf        *obp_iobuf;
+       /* Start page index in the obp_iobuf for the bio */
+       int                      obp_start_page_idx;
+};
+
+#ifdef HAVE_BIO_INTEGRITY_PREP_FN
+int osd_get_integrity_profile(struct osd_device *osd,
+                             integrity_gen_fn **generate_fn,
+                             integrity_vrfy_fn **verify_fn);
+#else
+#define integrity_gen_fn void
+#define integrity_vrfy_fn int
+static inline int osd_get_integrity_profile(struct osd_device *osd,
+                                           integrity_gen_fn **generate_fn,
+                                           integrity_vrfy_fn **verify_fn)
+{
+       return 0;
+}
+
+static inline bool bio_integrity_prep_fn(struct bio *bio,
+                                        integrity_gen_fn *generate_fn,
+                                        integrity_vrfy_fn *verify_fn)
+{
+       return bio_integrity_prep(bio);
+}
+#endif
 
 #endif /* _OSD_INTERNAL_H */