X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fosd-ldiskfs%2Fosd_internal.h;h=ab5fc62afbd55b4145f8b00de9bfb4e1bf32e8f4;hp=068c9af662ab531c1e2c029034c66799afb0ad67;hb=3ff4a0744c229e0199bc7d93db9221c3bfb1f846;hpb=67076c3c7e2b11023b943db2f5031d9b9a11329c diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index 068c9af..ab5fc62a 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -1,6 +1,4 @@ -/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- - * vim:expandtab:shiftwidth=8:tabstop=8: - * +/* * GPL HEADER START * * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. @@ -74,15 +72,20 @@ #include #include #include +#include #include "osd_oi.h" #include "osd_iam.h" +#include "osd_scrub.h" +#include "osd_quota_fmt.h" struct inode; -#define OSD_OII_NOGEN (0) #define OSD_COUNTERS (0) +/* Lustre special inode::i_state to indicate OI scrub skip this inode. */ +#define I_LUSTRE_NOSCRUB (1 << 31) + /** Enable thandle usage statistics */ #define OSD_THANDLE_STATS (0) @@ -99,6 +102,19 @@ struct osd_directory { struct iam_descr od_descr; }; +/* + * Object Index (oi) instance. + */ +struct osd_oi { + /* + * underlying index object, where fid->id mapping in stored. + */ + struct inode *oi_inode; + struct osd_directory oi_dir; +}; + +extern const int osd_dto_credits_noquota[]; + struct osd_object { struct dt_object oo_dt; /** @@ -181,6 +197,55 @@ static inline void ldiskfs_htree_lock_free(struct htree_lock *lk) #endif /* HAVE_LDISKFS_PDO */ +#define OSD_OTABLE_IT_CACHE_SIZE 128 +#define OSD_OTABLE_IT_CACHE_MASK (~(OSD_OTABLE_IT_CACHE_SIZE - 1)) + +struct osd_inconsistent_item { + /* link into osd_scrub::os_inconsistent_items, + * protected by osd_scrub::os_lock. */ + cfs_list_t oii_list; + + /* The right FID <=> ino#/gen mapping. */ + struct osd_idmap_cache oii_cache; + + unsigned int oii_insert:1; /* insert or update mapping. */ +}; + +struct osd_otable_cache { + struct osd_idmap_cache ooc_cache[OSD_OTABLE_IT_CACHE_SIZE]; + + /* Index for next cache slot to be filled. */ + int ooc_producer_idx; + + /* Index for next cache slot to be returned by it::next(). */ + int ooc_consumer_idx; + + /* How many items in ooc_cache. */ + int ooc_cached_items; + + /* Position for up layer LFSCK iteration pre-loading. */ + __u32 ooc_pos_preload; +}; + +struct osd_otable_it { + struct osd_device *ooi_dev; + struct osd_otable_cache ooi_cache; + + /* For osd_otable_it_key. */ + __u8 ooi_key[16]; + + /* The following bits can be updated/checked w/o lock protection. + * If more bits will be introduced in the future and need lock to + * protect, please add comment. */ + unsigned long ooi_used_outside:1, /* Some user out of OSD + * uses the iteration. */ + ooi_all_cached:1, /* No more entries can be + * filled into cache. */ + ooi_user_ready:1, /* The user out of OSD is + * ready to iterate. */ + ooi_waiting:1; /* it::next is waiting. */ +}; + extern const int osd_dto_credits_noquota[]; /* @@ -190,20 +255,16 @@ struct osd_device { /* super-class */ struct dt_device od_dt_dev; /* information about underlying file system */ - struct lustre_mount_info *od_mount; - /* - * XXX temporary stuff for object index: directory where every object - * is named by its fid. - */ - struct dt_object *od_obj_area; + struct vfsmount *od_mnt; /* object index */ - struct osd_oi *od_oi_table; + struct osd_oi **od_oi_table; /* total number of OI containers */ int od_oi_count; /* * Fid Capability */ - unsigned int od_fl_capa:1; + unsigned int od_fl_capa:1, + od_is_md:1; /* set in ->ldo_prepare */ unsigned long od_capa_timeout; __u32 od_capa_alg; struct lustre_capa_key *od_capa_keys; @@ -215,7 +276,7 @@ struct osd_device { * statfs optimization: we cache a bit. */ cfs_time_t od_osfs_age; - cfs_kstatfs_t od_kstatfs; + struct obd_statfs od_statfs; cfs_spinlock_t od_osfs_lock; /** @@ -225,6 +286,13 @@ struct osd_device { __u32 od_iop_mode; struct fsfilt_operations *od_fsops; + int od_connects; + struct lu_site od_site; + + /* + * mapping for legacy OST objids + */ + struct osd_compat_objid *od_ost_map; unsigned long long od_readcache_max_filesize; int od_read_cache; @@ -233,6 +301,16 @@ struct osd_device { struct brw_stats od_brw_stats; cfs_atomic_t od_r_in_flight; cfs_atomic_t od_w_in_flight; + + cfs_mutex_t od_otable_mutex; + struct osd_otable_it *od_otable_it; + struct osd_scrub od_scrub; + + /* service name associated with the osd device */ + char od_svname[MAX_OBD_NAME]; + + /* quota slave instance */ + struct qsd_instance *od_quota_slave; }; #define OSD_TRACK_DECLARES @@ -240,11 +318,13 @@ struct osd_device { #define OSD_DECLARE_OP(oh, op) { \ LASSERT(oh->ot_handle == NULL); \ ((oh)->ot_declare_ ##op)++; } -#define OSD_EXEC_OP(handle, op) { \ +#define OSD_EXEC_OP(handle,op) { \ struct osd_thandle *oh; \ oh = container_of0(handle, struct osd_thandle, ot_super);\ - LASSERT((oh)->ot_declare_ ##op > 0); \ - ((oh)->ot_declare_ ##op)--; } + if (((oh)->ot_declare_ ##op) > 0) { \ + ((oh)->ot_declare_ ##op)--; \ + } \ + } #else #define OSD_DECLARE_OP(oh, op) #define OSD_EXEC_OP(oh, op) @@ -275,6 +355,7 @@ struct osd_thandle { unsigned short ot_id_cnt; unsigned short ot_id_type; uid_t ot_id_array[OSD_MAX_UGID_CNT]; + struct lquota_trans *ot_quota_trans; #ifdef OSD_TRACK_DECLARES unsigned char ot_declare_attr_set; @@ -395,6 +476,28 @@ struct osd_it_iam { struct iam_iterator oi_it; }; +struct osd_quota_leaf { + cfs_list_t oql_link; + uint oql_blk; +}; + +/** + * Iterator's in-memory data structure for quota file. + */ +struct osd_it_quota { + struct osd_object *oiq_obj; + /** tree blocks path to where the entry is stored */ + uint oiq_blk[LUSTRE_DQTREEDEPTH + 1]; + /** on-disk offset for current key where quota record can be found */ + loff_t oiq_offset; + /** identifier for current quota record */ + __u64 oiq_id; + /** the record index in the leaf/index block */ + uint oiq_index[LUSTRE_DQTREEDEPTH + 1]; + /** list of already processed leaf blocks */ + cfs_list_t oiq_list; +}; + #define MAX_BLOCKS_PER_PAGE (CFS_PAGE_SIZE / 512) struct osd_iobuf { @@ -427,7 +530,11 @@ struct osd_thread_info { struct htree_lock *oti_hlock; struct lu_fid oti_fid; - struct osd_inode_id oti_id; + struct lu_fid oti_fid2; + struct osd_inode_id oti_id; + struct osd_inode_id oti_id2; + struct ost_id oti_ostid; + /* * XXX temporary: for ->i_op calls. */ @@ -454,15 +561,19 @@ struct osd_thread_info { /** osd iterator context used for iterator session */ - union { - struct osd_it_iam oti_it; - /** ldiskfs iterator data structure, see osd_it_ea_{init, fini} */ - struct osd_it_ea oti_it_ea; - }; + union { + struct osd_it_iam oti_it; + /* ldiskfs iterator data structure, + * see osd_it_ea_{init, fini} */ + struct osd_it_ea oti_it_ea; + struct osd_it_quota oti_it_quota; + }; /** pre-allocated buffer used by oti_it_ea, size OSD_IT_EA_BUFSIZE */ void *oti_it_ea_buf; + cfs_kstatfs_t oti_ksfs; + /** IAM iterator for index operation. */ struct iam_iterator oti_idx_it; @@ -477,6 +588,7 @@ struct osd_thread_info { long long oti_alignment_lieutenant_colonel; }; + struct osd_idmap_cache oti_cache; int oti_r_locks; int oti_w_locks; @@ -496,6 +608,16 @@ struct osd_thread_info { #define OSD_FID_REC_SZ 32 char oti_ldp[OSD_FID_REC_SZ]; char oti_ldp2[OSD_FID_REC_SZ]; + + /* used by quota code */ + union { + struct if_dqblk oti_dqblk; + struct if_dqinfo oti_dqinfo; + }; + struct lquota_id_info oti_qi; + struct lquota_trans oti_quota_trans; + union lquota_rec oti_quota_rec; + __u64 oti_quota_id; }; extern int ldiskfs_pdo; @@ -512,13 +634,76 @@ void osd_brw_stats_update(struct osd_device *osd, struct osd_iobuf *iobuf); #endif int osd_statfs(const struct lu_env *env, struct dt_device *dev, - cfs_kstatfs_t *sfs); + struct obd_statfs *sfs); int osd_object_auth(const struct lu_env *env, struct dt_object *dt, struct lustre_capa *capa, __u64 opc); -void osd_declare_qid(struct dt_object *dt, struct osd_thandle *oh, - int type, uid_t id, struct inode *inode); -int generic_error_remove_page(struct address_space *mapping, - struct page *page); +struct inode *osd_iget(struct osd_thread_info *info, struct osd_device *dev, + struct osd_inode_id *id); +struct inode *osd_iget_fid(struct osd_thread_info *info, struct osd_device *dev, + struct osd_inode_id *id, struct lu_fid *fid); + +int osd_compat_init(struct osd_device *dev); +void osd_compat_fini(struct osd_device *dev); +int osd_compat_objid_lookup(struct osd_thread_info *info, + struct osd_device *osd, + const struct lu_fid *fid, struct osd_inode_id *id); +int osd_compat_objid_insert(struct osd_thread_info *info, + struct osd_device *osd, + const struct lu_fid *fid, + const struct osd_inode_id *id, struct thandle *th); +int osd_compat_objid_delete(struct osd_thread_info *info, + struct osd_device *osd, + const struct lu_fid *fid, struct thandle *th); +int osd_compat_spec_lookup(struct osd_thread_info *info, + struct osd_device *osd, + const struct lu_fid *fid, struct osd_inode_id *id); +int osd_compat_spec_insert(struct osd_thread_info *info, + struct osd_device *osd, + const struct lu_fid *fid, + const struct osd_inode_id *id, struct thandle *th); + +void osd_scrub_file_reset(struct osd_scrub *scrub, __u8 *uuid, __u64 flags); +int osd_scrub_file_store(struct osd_scrub *scrub); +int osd_scrub_start(struct osd_device *dev); +int osd_scrub_setup(const struct lu_env *env, struct osd_device *dev); +void osd_scrub_cleanup(const struct lu_env *env, struct osd_device *dev); +int osd_oii_insert(struct osd_device *dev, struct osd_idmap_cache *oic, + int insert); +int osd_oii_lookup(struct osd_device *dev, const struct lu_fid *fid, + struct osd_inode_id *id); +int osd_scrub_dump(struct osd_device *dev, char *buf, int len); + +/* osd_quota_fmt.c */ +int walk_tree_dqentry(const struct lu_env *env, struct osd_object *obj, + int type, uint blk, int depth, uint index, + struct osd_it_quota *it); +int walk_block_dqentry(const struct lu_env *env, struct osd_object *obj, + int type, uint blk, uint index, + struct osd_it_quota *it); +loff_t find_tree_dqentry(const struct lu_env *env, + struct osd_object *obj, int type, + qid_t dqid, uint blk, int depth, + struct osd_it_quota *it); +/* osd_quota.c */ +int osd_declare_qid(const struct lu_env *env, struct osd_thandle *oh, + struct lquota_id_info *qi, bool allocated, int *flags); +int osd_declare_inode_qid(const struct lu_env *env, qid_t uid, qid_t gid, + long long space, struct osd_thandle *oh, + bool is_blk, bool allocated, int *flags, bool force); +const struct dt_rec *osd_quota_pack(struct osd_object *obj, + const struct dt_rec *rec, + union lquota_rec *quota_rec); +void osd_quota_unpack(struct osd_object *obj, const struct dt_rec *rec); +int osd_quota_migration(const struct lu_env *env, struct dt_object *dt, + const struct dt_index_features *feat); + +static inline bool is_quota_glb_feat(const struct dt_index_features *feat) +{ + return (feat == &dt_quota_iusr_features || + feat == &dt_quota_busr_features || + feat == &dt_quota_igrp_features || + feat == &dt_quota_bgrp_features) ? true : false; +} /* * Invariants, assertions. @@ -546,37 +731,25 @@ static inline int osd_invariant(const struct osd_object *obj) #define osd_invariant(obj) (1) #endif -/* The on-disk extN format reserves inodes 0-11 for internal filesystem - * use, and these inodes will be invisible on client side, so the valid - * sequence for IGIF fid is 12-0xffffffff. But root inode (2#) will be seen - * on server side (osd), and it should be valid too here. - */ -#define OSD_ROOT_SEQ 2 -static inline int osd_fid_is_root(const struct lu_fid *fid) -{ - return fid_seq(fid) == OSD_ROOT_SEQ; -} +extern const struct dt_index_operations osd_otable_ops; -static inline int osd_fid_is_igif(const struct lu_fid *fid) +static inline int osd_oi_fid2idx(struct osd_device *dev, + const struct lu_fid *fid) { - return fid_is_igif(fid) || osd_fid_is_root(fid); + return fid->f_seq & (dev->od_oi_count - 1); } -static inline struct osd_oi * -osd_fid2oi(struct osd_device *osd, const struct lu_fid *fid) +static inline struct osd_oi *osd_fid2oi(struct osd_device *osd, + const struct lu_fid *fid) { - if (!fid_is_norm(fid)) - return NULL; - - LASSERT(osd->od_oi_table != NULL && osd->od_oi_count >= 1); - /* It can work even od_oi_count equals to 1 although it's unexpected, - * the only reason we set it to 1 is for performance measurement */ - return &osd->od_oi_table[fid->f_seq & (osd->od_oi_count - 1)]; + LASSERTF(!fid_is_idif(fid), DFID"\n", PFID(fid)); + LASSERTF(!fid_is_igif(fid), DFID"\n", PFID(fid)); + LASSERT(osd->od_oi_table != NULL && osd->od_oi_count >= 1); + /* It can work even od_oi_count equals to 1 although it's unexpected, + * the only reason we set it to 1 is for performance measurement */ + return osd->od_oi_table[osd_oi_fid2idx(osd, fid)]; } -/* - * Helpers. - */ extern const struct lu_device_operations osd_lu_ops; static inline int lu_device_is_osd(const struct lu_device *d) @@ -603,7 +776,7 @@ static inline struct osd_device *osd_obj2dev(const struct osd_object *o) static inline struct super_block *osd_sb(const struct osd_device *dev) { - return dev->od_mount->lmi_mnt->mnt_sb; + return dev->od_mnt->mnt_sb; } static inline int osd_object_is_root(const struct osd_object *obj) @@ -642,5 +815,111 @@ static inline struct osd_thread_info *osd_oti_get(const struct lu_env *env) extern const struct dt_body_operations osd_body_ops_new; +/** + * IAM Iterator + */ +static inline +struct iam_path_descr *osd_it_ipd_get(const struct lu_env *env, + const struct iam_container *bag) +{ + return bag->ic_descr->id_ops->id_ipd_alloc(bag, + osd_oti_get(env)->oti_it_ipd); +} + +static inline +struct iam_path_descr *osd_idx_ipd_get(const struct lu_env *env, + const struct iam_container *bag) +{ + return bag->ic_descr->id_ops->id_ipd_alloc(bag, + osd_oti_get(env)->oti_idx_ipd); +} + +static inline void osd_ipd_put(const struct lu_env *env, + const struct iam_container *bag, + struct iam_path_descr *ipd) +{ + bag->ic_descr->id_ops->id_ipd_free(ipd); +} + +int osd_ldiskfs_read(struct inode *inode, void *buf, int size, loff_t *offs); +int osd_ldiskfs_write_record(struct inode *inode, void *buf, int bufsize, + int write_NUL, loff_t *offs, handle_t *handle); + +static inline +struct dentry *osd_child_dentry_by_inode(const struct lu_env *env, + struct inode *inode, + const char *name, const int namelen) +{ + struct osd_thread_info *info = osd_oti_get(env); + struct dentry *child_dentry = &info->oti_child_dentry; + struct dentry *obj_dentry = &info->oti_obj_dentry; + + obj_dentry->d_inode = inode; + obj_dentry->d_sb = inode->i_sb; + obj_dentry->d_name.hash = 0; + + child_dentry->d_name.hash = 0; + child_dentry->d_parent = obj_dentry; + child_dentry->d_name.name = name; + child_dentry->d_name.len = namelen; + return child_dentry; +} + +/** + * Helper function to pack the fid, ldiskfs stores fid in packed format. + */ +static inline +void osd_fid_pack(struct osd_fid_pack *pack, const struct dt_rec *fid, + struct lu_fid *befider) +{ + fid_cpu_to_be(befider, (struct lu_fid *)fid); + memcpy(pack->fp_area, befider, sizeof(*befider)); + pack->fp_len = sizeof(*befider) + 1; +} + +static inline +int osd_fid_unpack(struct lu_fid *fid, const struct osd_fid_pack *pack) +{ + int result; + + result = 0; + switch (pack->fp_len) { + case sizeof *fid + 1: + memcpy(fid, pack->fp_area, sizeof *fid); + fid_be_to_cpu(fid, fid); + break; + default: + CERROR("Unexpected packed fid size: %d\n", pack->fp_len); + result = -EIO; + } + return result; +} + +/** + * Quota/Accounting handling + */ +extern const struct dt_index_operations osd_acct_index_ops; +int osd_acct_obj_lookup(struct osd_thread_info *info, struct osd_device *osd, + const struct lu_fid *fid, struct osd_inode_id *id); + +/* copy from fs/ext4/dir.c */ +static inline int is_32bit_api(void) +{ +#ifdef CONFIG_COMPAT + return is_compat_task(); +#else + return (BITS_PER_LONG == 32); +#endif +} + +static inline loff_t ldiskfs_get_htree_eof(struct file *filp) +{ + if ((filp->f_mode & FMODE_32BITHASH) || + (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api())) + return LDISKFS_HTREE_EOF_32BIT; + else + return LDISKFS_HTREE_EOF_64BIT; +} + #endif /* __KERNEL__ */ #endif /* _OSD_INTERNAL_H */