Whamcloud - gitweb
LU-1866 osd: ancillary work for initial OI scrub
[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_internal.h
index 1bdd60e..f28af74 100644 (file)
@@ -27,7 +27,7 @@
  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  * Use is subject to license terms.
  *
- * Copyright (c) 2011, 2012, Whamcloud, Inc.
+ * Copyright (c) 2011, 2012, Intel Corporation.
  */
 /*
  * This file is part of Lustre, http://www.lustre.org/
@@ -83,12 +83,24 @@ struct inode;
 
 #define OSD_COUNTERS (0)
 
-/* Lustre special inode::i_state to indicate OI scrub skip this inode. */
-#define I_LUSTRE_NOSCRUB       (1 << 31)
+/* ldiskfs special inode::i_state_flags need to be accessed with
+ * ldiskfs_{set,clear,test}_inode_state() only */
+
+/* OI scrub should skip this inode. */
+#define LDISKFS_STATE_LUSTRE_NOSCRUB   31
+
+/* Do not add OI mapping for this inode. */
+#define LDISKFS_STATE_LUSTRE_NO_OI     30
 
 /** Enable thandle usage statistics */
 #define OSD_THANDLE_STATS (0)
 
+#define MAX_OBJID_GROUP (FID_SEQ_ECHO + 1)
+
+#define OBJECTS        "OBJECTS"
+#define ADMIN_USR      "admin_quotafile_v2.usr"
+#define ADMIN_GRP      "admin_quotafile_v2.grp"
+
 struct osd_directory {
         struct iam_container od_container;
         struct iam_descr     od_descr;
@@ -121,11 +133,11 @@ struct osd_object {
          * to protect index ops.
          */
         struct htree_lock_head *oo_hl_head;
-        cfs_rw_semaphore_t      oo_ext_idx_sem;
-        cfs_rw_semaphore_t      oo_sem;
-        struct osd_directory   *oo_dir;
-        /** protects inode attributes. */
-        cfs_spinlock_t          oo_guard;
+       struct rw_semaphore     oo_ext_idx_sem;
+       struct rw_semaphore     oo_sem;
+       struct osd_directory    *oo_dir;
+       /** protects inode attributes. */
+       spinlock_t              oo_guard;
         /**
          * Following two members are used to indicate the presence of dot and
          * dotdot in the given directory. This is required for interop mode
@@ -140,54 +152,27 @@ struct osd_object {
 #endif
 };
 
-#ifdef HAVE_LDISKFS_PDO
-
-#define osd_ldiskfs_find_entry(dir, dentry, de, lock)   \
-        ll_ldiskfs_find_entry(dir, dentry, de, lock)
-#define osd_ldiskfs_add_entry(handle, child, cinode, hlock) \
-        ldiskfs_add_entry(handle, child, cinode, hlock)
-
-#else /* HAVE_LDISKFS_PDO */
-
-struct htree_lock {
-        int     dummy;
+struct osd_obj_seq {
+       /* protects on-fly initialization */
+       int              oos_subdir_count; /* subdir count for each seq */
+       struct dentry    *oos_root;        /* O/<seq> */
+       struct dentry    **oos_dirs;       /* O/<seq>/d0-dXX */
+       obd_seq          oos_seq;          /* seq number */
+       cfs_list_t       oos_seq_list;     /* list to seq_list */
 };
 
-struct htree_lock_head {
-        int     dummy;
+struct osd_obj_map {
+       struct dentry    *om_root;        /* dentry for /O */
+       rwlock_t         om_seq_list_lock; /* lock for seq_list */
+       cfs_list_t       om_seq_list;      /* list head for seq */
+       int              om_subdir_count;
+       struct semaphore om_dir_init_sem;
 };
 
-#define ldiskfs_htree_lock(lock, head, inode, op)  do { LBUG(); } while (0)
-#define ldiskfs_htree_unlock(lock)                 do { LBUG(); } while (0)
-
-static inline struct htree_lock_head *ldiskfs_htree_lock_head_alloc(int dep)
-{
-        LBUG();
-        return NULL;
-}
-
-#define ldiskfs_htree_lock_head_free(lh)           do { LBUG(); } while (0)
-
-#define LDISKFS_DUMMY_HTREE_LOCK        0xbabecafe
-
-static inline struct htree_lock *ldiskfs_htree_lock_alloc(void)
-{
-        return (struct htree_lock *)LDISKFS_DUMMY_HTREE_LOCK;
-}
-
-static inline void ldiskfs_htree_lock_free(struct htree_lock *lk)
-{
-        LASSERT((unsigned long)lk == LDISKFS_DUMMY_HTREE_LOCK);
-}
-
-#define HTREE_HBITS_DEF         0
-
 #define osd_ldiskfs_find_entry(dir, dentry, de, lock)   \
-        ll_ldiskfs_find_entry(dir, dentry, de)
-#define osd_ldiskfs_add_entry(handle, child, cinode, lock) \
-        ldiskfs_add_entry(handle, child, cinode)
-
-#endif /* HAVE_LDISKFS_PDO */
+        ll_ldiskfs_find_entry(dir, dentry, de, lock)
+#define osd_ldiskfs_add_entry(handle, child, cinode, hlock) \
+        ldiskfs_add_entry(handle, child, cinode, hlock)
 
 #define OSD_OTABLE_IT_CACHE_SIZE       128
 #define OSD_OTABLE_IT_CACHE_MASK       (~(OSD_OTABLE_IT_CACHE_SIZE - 1))
@@ -269,18 +254,16 @@ struct osd_device {
          */
         cfs_time_t                od_osfs_age;
         struct obd_statfs         od_statfs;
-        cfs_spinlock_t            od_osfs_lock;
+       spinlock_t                od_osfs_lock;
 
-       unsigned int              od_noscrub:1;
+       unsigned int              od_noscrub:1,
+                                 od_handle_nolma:1;
 
-        struct fsfilt_operations *od_fsops;
+       struct fsfilt_operations *od_fsops;
        int                       od_connects;
        struct lu_site            od_site;
 
-        /*
-         * mapping for legacy OST objids
-         */
-        struct osd_compat_objid  *od_ost_map;
+       struct osd_obj_map      *od_ost_map;
 
         unsigned long long        od_readcache_max_filesize;
         int                       od_read_cache;
@@ -290,37 +273,18 @@ struct osd_device {
         cfs_atomic_t              od_r_in_flight;
         cfs_atomic_t              od_w_in_flight;
 
-       cfs_mutex_t               od_otable_mutex;
+       struct mutex              od_otable_mutex;
        struct osd_otable_it     *od_otable_it;
        struct osd_scrub          od_scrub;
 
        /* service name associated with the osd device */
        char                      od_svname[MAX_OBD_NAME];
+       char                      od_mntdev[MAX_OBD_NAME];
 
        /* quota slave instance */
        struct qsd_instance      *od_quota_slave;
 };
 
-#define OSD_TRACK_DECLARES
-#ifdef OSD_TRACK_DECLARES
-#define OSD_DECLARE_OP(oh, op, credits)                                        \
-do {                                                                   \
-       LASSERT((oh)->ot_handle == NULL);                               \
-       ((oh)->ot_declare_ ##op)++;                                     \
-       ((oh)->ot_declare_ ##op ##_cred) += (credits);                  \
-       (oh)->ot_credits += (credits);                                  \
-} while (0)
-#define OSD_EXEC_OP(handle, op)                                                \
-do {                                                                   \
-       struct osd_thandle *oh = container_of(handle, typeof(*oh), ot_super); \
-       LASSERT((oh)->ot_declare_ ##op > 0);                            \
-       ((oh)->ot_declare_ ##op)--;                                     \
-} while (0)
-#else
-#define OSD_DECLARE_OP(oh, op, credits) (oh)->ot_credits += (credits)
-#define OSD_EXEC_OP(oh, op)
-#endif
-
 /* There are at most 10 uid/gids are affected in a transaction, and
  * that's rename case:
  * - 2 for source parent uid & gid;
@@ -334,6 +298,23 @@ do {                                                                       \
  */
 #define OSD_MAX_UGID_CNT        10
 
+enum {
+       OSD_OT_ATTR_SET         = 0,
+       OSD_OT_PUNCH            = 1,
+       OSD_OT_XATTR_SET        = 2,
+       OSD_OT_CREATE           = 3,
+       OSD_OT_DESTROY          = 4,
+       OSD_OT_REF_ADD          = 5,
+       OSD_OT_REF_DEL          = 6,
+       OSD_OT_WRITE            = 7,
+       OSD_OT_INSERT           = 8,
+       OSD_OT_DELETE           = 9,
+       OSD_OT_QUOTA            = 10,
+       OSD_OT_MAX              = 11
+};
+
+#define OSD_TRACK_DECLARES
+
 struct osd_thandle {
         struct thandle          ot_super;
         handle_t               *ot_handle;
@@ -346,36 +327,6 @@ struct osd_thandle {
         unsigned short          ot_id_type;
         uid_t                   ot_id_array[OSD_MAX_UGID_CNT];
        struct lquota_trans    *ot_quota_trans;
-
-#ifdef OSD_TRACK_DECLARES
-       /* Tracking for transaction credits, to allow debugging and optimizing
-        * cases where a large number of credits are being allocated for
-        * single transaction. */
-       unsigned char           ot_declare_attr_set;
-       unsigned char           ot_declare_punch;
-       unsigned char           ot_declare_xattr_set;
-       unsigned char           ot_declare_create;
-       unsigned char           ot_declare_destroy;
-       unsigned char           ot_declare_ref_add;
-       unsigned char           ot_declare_ref_del;
-       unsigned char           ot_declare_write;
-       unsigned char           ot_declare_insert;
-       unsigned char           ot_declare_delete;
-       unsigned char           ot_declare_quota;
-
-       unsigned short          ot_declare_attr_set_cred;
-       unsigned short          ot_declare_punch_cred;
-       unsigned short          ot_declare_xattr_set_cred;
-       unsigned short          ot_declare_create_cred;
-       unsigned short          ot_declare_destroy_cred;
-       unsigned short          ot_declare_ref_add_cred;
-       unsigned short          ot_declare_ref_del_cred;
-       unsigned short          ot_declare_write_cred;
-       unsigned short          ot_declare_insert_cred;
-       unsigned short          ot_declare_delete_cred;
-       unsigned short          ot_declare_quota_cred;
-#endif
-
 #if OSD_THANDLE_STATS
         /** time when this handle was allocated */
         cfs_time_t oth_alloced;
@@ -603,7 +554,11 @@ struct osd_thread_info {
         /** used in osd_fid_set() to put xattr */
         struct lu_buf          oti_buf;
         /** used in osd_ea_fid_set() to set fid into common ea */
-        struct lustre_mdt_attrs oti_mdt_attrs;
+       union {
+               struct lustre_mdt_attrs oti_mdt_attrs;
+               /* old LMA for compatibility */
+               char                    oti_mdt_attrs_old[LMA_OLD_SIZE];
+       };
         /** 0-copy IO */
         struct osd_iobuf       oti_iobuf;
         struct inode           oti_inode;
@@ -622,6 +577,18 @@ struct osd_thread_info {
        struct lquota_trans     oti_quota_trans;
        union lquota_rec        oti_quota_rec;
        __u64                   oti_quota_id;
+       struct lu_seq_range     oti_seq_range;
+
+#ifdef OSD_TRACK_DECLARES
+       /* Tracking for transaction credits, to allow debugging and optimizing
+        * cases where a large number of credits are being allocated for
+        * single transaction. */
+       unsigned char           oti_declare_ops[OSD_OT_MAX];
+       unsigned char           oti_declare_ops_rb[OSD_OT_MAX];
+       unsigned short          oti_declare_ops_cred[OSD_OT_MAX];
+       bool                    oti_rollback;
+#endif
+
 };
 
 extern int ldiskfs_pdo;
@@ -631,9 +598,6 @@ extern int ldiskfs_pdo;
 void lprocfs_osd_init_vars(struct lprocfs_static_vars *lvars);
 int osd_procfs_init(struct osd_device *osd, const char *name);
 int osd_procfs_fini(struct osd_device *osd);
-void osd_lprocfs_time_start(const struct lu_env *env);
-void osd_lprocfs_time_end(const struct lu_env *env,
-                          struct osd_device *osd, int op);
 void osd_brw_stats_update(struct osd_device *osd, struct osd_iobuf *iobuf);
 
 #endif
@@ -643,28 +607,23 @@ int osd_object_auth(const struct lu_env *env, struct dt_object *dt,
                     struct lustre_capa *capa, __u64 opc);
 struct inode *osd_iget(struct osd_thread_info *info, struct osd_device *dev,
                       struct osd_inode_id *id);
-struct inode *osd_iget_fid(struct osd_thread_info *info, struct osd_device *dev,
-                          struct osd_inode_id *id, struct lu_fid *fid);
-
-int osd_compat_init(struct osd_device *dev);
-void osd_compat_fini(struct osd_device *dev);
-int osd_compat_objid_lookup(struct osd_thread_info *info,
-                            struct osd_device *osd,
-                            const struct lu_fid *fid, struct osd_inode_id *id);
-int osd_compat_objid_insert(struct osd_thread_info *info,
-                            struct osd_device *osd,
-                            const struct lu_fid *fid,
-                            const struct osd_inode_id *id, struct thandle *th);
-int osd_compat_objid_delete(struct osd_thread_info *info,
-                            struct osd_device *osd,
-                            const struct lu_fid *fid, struct thandle *th);
-int osd_compat_spec_lookup(struct osd_thread_info *info,
-                           struct osd_device *osd,
-                           const struct lu_fid *fid, struct osd_inode_id *id);
-int osd_compat_spec_insert(struct osd_thread_info *info,
-                           struct osd_device *osd,
-                           const struct lu_fid *fid,
-                           const struct osd_inode_id *id, struct thandle *th);
+int osd_get_lma(struct osd_thread_info *info, struct inode *inode,
+               struct dentry *dentry, struct lustre_mdt_attrs *lma);
+
+int osd_obj_map_init(struct osd_device *osd);
+void osd_obj_map_fini(struct osd_device *dev);
+int osd_obj_map_lookup(struct osd_thread_info *info, struct osd_device *osd,
+                       const struct lu_fid *fid, struct osd_inode_id *id);
+int osd_obj_map_insert(struct osd_thread_info *info, struct osd_device *osd,
+                      const struct lu_fid *fid, const struct osd_inode_id *id,
+                      struct thandle *th);
+int osd_obj_map_delete(struct osd_thread_info *info, struct osd_device *osd,
+                       const struct lu_fid *fid, struct thandle *th);
+int osd_obj_spec_lookup(struct osd_thread_info *info, struct osd_device *osd,
+                       const struct lu_fid *fid, struct osd_inode_id *id);
+int osd_obj_spec_insert(struct osd_thread_info *info, struct osd_device *osd,
+                       const struct lu_fid *fid, const struct osd_inode_id *id,
+                       struct thandle *th);
 
 void osd_scrub_file_reset(struct osd_scrub *scrub, __u8 *uuid, __u64 flags);
 int osd_scrub_file_store(struct osd_scrub *scrub);
@@ -677,6 +636,8 @@ int osd_oii_lookup(struct osd_device *dev, const struct lu_fid *fid,
                   struct osd_inode_id *id);
 int osd_scrub_dump(struct osd_device *dev, char *buf, int len);
 
+int osd_fld_lookup(const struct lu_env *env, struct osd_device *osd,
+                  const struct lu_fid *fid, struct lu_seq_range *range);
 /* osd_quota_fmt.c */
 int walk_tree_dqentry(const struct lu_env *env, struct osd_object *obj,
                       int type, uint blk, int depth, uint index,
@@ -749,7 +710,7 @@ static inline struct osd_oi *osd_fid2oi(struct osd_device *osd,
                                         const struct lu_fid *fid)
 {
        LASSERTF(!fid_is_idif(fid), DFID"\n", PFID(fid));
-       LASSERTF(!fid_is_igif(fid), DFID"\n", PFID(fid));
+       LASSERTF(!fid_is_last_id(fid), DFID"\n", PFID(fid));
        LASSERT(osd->od_oi_table != NULL && osd->od_oi_count >= 1);
        /* It can work even od_oi_count equals to 1 although it's unexpected,
         * the only reason we set it to 1 is for performance measurement */
@@ -811,6 +772,17 @@ static inline journal_t *osd_journal(const struct osd_device *dev)
         return LDISKFS_SB(osd_sb(dev))->s_journal;
 }
 
+static inline struct seq_server_site *osd_seq_site(struct osd_device *osd)
+{
+       return osd->od_dt_dev.dd_lu_dev.ld_site->ld_seq_site;
+}
+
+static inline char *osd_name(struct osd_device *osd)
+{
+       return osd->od_dt_dev.dd_lu_dev.ld_obd->obd_name;
+}
+
+
 extern const struct dt_body_operations osd_body_ops;
 extern struct lu_context_key osd_key;
 
@@ -871,6 +843,78 @@ struct dentry *osd_child_dentry_by_inode(const struct lu_env *env,
         return child_dentry;
 }
 
+#ifdef OSD_TRACK_DECLARES
+extern int osd_trans_declare_op2rb[];
+
+static inline void osd_trans_declare_op(const struct lu_env *env,
+                                       struct osd_thandle *oh,
+                                       unsigned int op, int credits)
+{
+       struct osd_thread_info *oti = osd_oti_get(env);
+
+       LASSERT(oh->ot_handle == NULL);
+       LASSERT(op < OSD_OT_MAX);
+
+       oti->oti_declare_ops[op]++;
+       oti->oti_declare_ops_cred[op] += credits;
+       oh->ot_credits += credits;
+}
+
+static inline void osd_trans_exec_op(const struct lu_env *env,
+                                    struct thandle *th, unsigned int op)
+{
+       struct osd_thread_info *oti = osd_oti_get(env);
+       struct osd_thandle     *oh  = container_of(th, struct osd_thandle,
+                                                  ot_super);
+       unsigned int            rb;
+
+       LASSERT(oh->ot_handle != NULL);
+       LASSERT(op < OSD_OT_MAX);
+
+       if (likely(!oti->oti_rollback && oti->oti_declare_ops[op] > 0)) {
+               oti->oti_declare_ops[op]--;
+               oti->oti_declare_ops_rb[op]++;
+       } else {
+               /* all future updates are considered rollback */
+               oti->oti_rollback = true;
+               rb = osd_trans_declare_op2rb[op];
+               LASSERTF(rb < OSD_OT_MAX, "op = %u\n", op);
+               LASSERTF(oti->oti_declare_ops_rb[rb] > 0, "rb = %u\n", rb);
+               oti->oti_declare_ops_rb[rb]--;
+       }
+}
+
+static inline void osd_trans_declare_rb(const struct lu_env *env,
+                                       struct thandle *th, unsigned int op)
+{
+       struct osd_thread_info *oti = osd_oti_get(env);
+       struct osd_thandle     *oh  = container_of(th, struct osd_thandle,
+                                                  ot_super);
+
+       LASSERT(oh->ot_handle != NULL);
+       LASSERT(op < OSD_OT_MAX);
+
+       oti->oti_declare_ops_rb[op]++;
+}
+#else
+static inline void osd_trans_declare_op(const struct lu_env *env,
+                                       struct osd_thandle *oh,
+                                       unsigned int op, int credits)
+{
+       oh->ot_credits += credits;
+}
+
+static inline void osd_trans_exec_op(const struct lu_env *env,
+                                    struct thandle *th, unsigned int op)
+{
+}
+
+static inline void osd_trans_declare_rb(const struct lu_env *env,
+                                       struct thandle *th, unsigned int op)
+{
+}
+#endif
+
 /**
  * Helper function to pack the fid, ldiskfs stores fid in packed format.
  */