X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fosd%2Fosd_internal.h;h=0b3aad3c53cd275f89365673815b41bdc2b658f3;hb=76691e6500bebf20276913ecd9e6f5775a7e62ae;hp=4a476db679f458d4f945cf06e473681bfeccc8e2;hpb=ee78987810b09e294e89b42321139dd06af03441;p=fs%2Flustre-release.git diff --git a/lustre/osd/osd_internal.h b/lustre/osd/osd_internal.h index 4a476db..0b3aad3 100644 --- a/lustre/osd/osd_internal.h +++ b/lustre/osd/osd_internal.h @@ -47,13 +47,22 @@ /* struct rw_semaphore */ #include -/* handle_t, journal_start(), journal_stop() */ -#include -/* struct dx_hash_info */ -#include /* struct dentry */ #include -#include +/* struct dirent64 */ +#include + +#ifdef HAVE_EXT4_LDISKFS +#include +#include +#define osd_journal_callback_set(handle, func, jcb) jbd2_journal_callback_set(handle, func, jcb) +#else +#include +#include +#include +#define osd_journal_callback_set(handle, func, jcb) journal_callback_set(handle, func, jcb) +#endif + /* LUSTRE_OSD_NAME */ #include @@ -63,9 +72,21 @@ #include #include "osd_oi.h" +#include "osd_iam.h" struct inode; +#define OSD_OII_NOGEN (0) +#define OSD_COUNTERS (0) + +#ifdef HAVE_QUOTA_SUPPORT +struct osd_ctxt { + __u32 oc_uid; + __u32 oc_gid; + __u32 oc_cap; +}; +#endif + /* * osd device. */ @@ -80,7 +101,7 @@ struct osd_device { * XXX temporary stuff for object index: directory where every object * is named by its fid. */ - struct dentry *od_obj_area; + struct dt_object *od_obj_area; /* Environment for transaction commit callback. * Currently, OSD is based on ext3/JBD. Transaction commit in ext3/JBD @@ -97,34 +118,98 @@ struct osd_device { unsigned long od_capa_timeout; __u32 od_capa_alg; struct lustre_capa_key *od_capa_keys; - struct hlist_head *od_capa_hash; - + cfs_hlist_head_t *od_capa_hash; + cfs_proc_dir_entry_t *od_proc_entry; struct lprocfs_stats *od_stats; /* * statfs optimization: we cache a bit. */ cfs_time_t od_osfs_age; - struct kstatfs od_kstatfs; - spinlock_t od_osfs_lock; + cfs_kstatfs_t od_kstatfs; + cfs_spinlock_t od_osfs_lock; + + /** + * The following flag indicates, if it is interop mode or not. + * It will be initialized, using mount param. + */ + __u32 od_iop_mode; }; +/** + * Storage representation for fids. + * + * Variable size, first byte contains the length of the whole record. + */ +struct osd_fid_pack { + unsigned char fp_len; + char fp_area[sizeof(struct lu_fid)]; +}; + +struct osd_it_ea_dirent { + struct lu_fid oied_fid; + __u64 oied_ino; + __u64 oied_off; + unsigned short oied_namelen; + unsigned int oied_type; + char oied_name[0]; +} __attribute__((packed)); + +/** + * as osd_it_ea_dirent (in memory dirent struct for osd) is greater + * than lu_dirent struct. osd readdir reads less number of dirent than + * required for mdd dir page. so buffer size need to be increased so that + * there would be one ext3 readdir for every mdd readdir page. + */ + +#define OSD_IT_EA_BUFSIZE (CFS_PAGE_SIZE + CFS_PAGE_SIZE/4) + +/** + * This is iterator's in-memory data structure in interoperability + * mode (i.e. iterator over ldiskfs style directory) + */ +struct osd_it_ea { + struct osd_object *oie_obj; + /** used in ldiskfs iterator, to stored file pointer */ + struct file oie_file; + /** how many entries have been read-cached from storage */ + int oie_rd_dirent; + /** current entry is being iterated by caller */ + int oie_it_dirent; + /** current processing entry */ + struct osd_it_ea_dirent *oie_dirent; + /** buffer to hold entries, size == OSD_IT_EA_BUFSIZE */ + void *oie_buf; +}; + +/** + * Iterator's in-memory data structure for IAM mode. + */ +struct osd_it_iam { + struct osd_object *oi_obj; + struct iam_path_descr *oi_ipd; + struct iam_iterator oi_it; +}; struct osd_thread_info { const struct lu_env *oti_env; + /** + * used for index operations. + */ + struct dentry oti_obj_dentry; + struct dentry oti_child_dentry; + + /** dentry for Iterator context. */ + struct dentry oti_it_dentry; struct lu_fid oti_fid; struct osd_inode_id oti_id; /* * XXX temporary: for ->i_op calls. */ - struct qstr oti_str; struct txn_param oti_txn; - /* - * XXX temporary: fake dentry used by xattr calls. - */ - struct dentry oti_dentry; struct timespec oti_time; + struct timespec oti_time2; /* * XXX temporary: fake struct file for osd_object_sync */ @@ -135,16 +220,52 @@ struct osd_thread_info { struct lustre_capa_key oti_capa_key; struct lustre_capa oti_capa; - struct lu_fid_pack oti_pack; + /** + * following ipd and it structures are used for osd_index_iam_lookup() + * these are defined separately as we might do index operation + * in open iterator session. + */ + + /** osd iterator context used for iterator session */ + + union { + struct osd_it_iam oti_it; + /** ldiskfs iterator data structure, see osd_it_ea_{init, fini} */ + struct osd_it_ea oti_it_ea; + }; + + /** pre-allocated buffer used by oti_it_ea, size OSD_IT_EA_BUFSIZE */ + void *oti_it_ea_buf; - /* union to guarantee that ->oti_ipd[] has proper alignment. */ + /** IAM iterator for index operation. */ + struct iam_iterator oti_idx_it; + + /** union to guarantee that ->oti_ipd[] has proper alignment. */ union { - char oti_ipd[DX_IPD_MAX_SIZE]; + char oti_it_ipd[DX_IPD_MAX_SIZE]; long long oti_alignment_lieutenant; }; + + union { + char oti_idx_ipd[DX_IPD_MAX_SIZE]; + long long oti_alignment_lieutenant_colonel; + }; + + int oti_r_locks; int oti_w_locks; int oti_txns; + /** used in osd_fid_set() to put xattr */ + struct lu_buf oti_buf; + /** used in osd_ea_fid_set() to set fid into common ea */ + struct lustre_mdt_attrs oti_mdt_attrs; +#ifdef HAVE_QUOTA_SUPPORT + struct osd_ctxt oti_ctxt; +#endif + struct lu_env oti_obj_delete_tx_env; +#define OSD_FID_REC_SZ 32 + char oti_ldp[OSD_FID_REC_SZ]; + char oti_ldp2[OSD_FID_REC_SZ]; }; #ifdef LPROCFS @@ -157,7 +278,49 @@ void osd_lprocfs_time_end(const struct lu_env *env, struct osd_device *osd, int op); #endif int osd_statfs(const struct lu_env *env, struct dt_device *dev, - struct kstatfs *sfs); + cfs_kstatfs_t *sfs); + +/* + * Invariants, assertions. + */ + +/* + * XXX: do not enable this, until invariant checking code is made thread safe + * in the face of pdirops locking. + */ +#define OSD_INVARIANT_CHECKS (0) + +#if OSD_INVARIANT_CHECKS +static inline int osd_invariant(const struct osd_object *obj) +{ + return + obj != NULL && + ergo(obj->oo_inode != NULL, + obj->oo_inode->i_sb == osd_sb(osd_obj2dev(obj)) && + atomic_read(&obj->oo_inode->i_count) > 0) && + ergo(obj->oo_dir != NULL && + obj->oo_dir->od_conationer.ic_object != NULL, + obj->oo_dir->od_conationer.ic_object == obj->oo_inode); +} +#else +#define osd_invariant(obj) (1) +#endif + +/* The on-disk extN format reserves inodes 0-11 for internal filesystem + * use, and these inodes will be invisible on client side, so the valid + * sequence for IGIF fid is 12-0xffffffff. But root inode (2#) will be seen + * on server side (osd), and it should be valid too here. + */ +#define OSD_ROOT_SEQ 2 +static inline int osd_fid_is_root(const struct lu_fid *fid) +{ + return fid_seq(fid) == OSD_ROOT_SEQ; +} + +static inline int osd_fid_is_igif(const struct lu_fid *fid) +{ + return fid_is_igif(fid) || osd_fid_is_root(fid); +} #endif /* __KERNEL__ */ #endif /* _OSD_INTERNAL_H */