X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lustre%2Fllite%2Fllite_internal.h;h=aaa55b7755e0d44f91bb9b74613ee112d4be6864;hp=bcc6c2e96e6d166613d30447bcbd903d31951547;hb=0140f50c12879076090beb5a369691ab28835c33;hpb=adb5aca3d67380078c4bd35c74651a7b9f848f4d diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index bcc6c2e..aaa55b7 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -44,10 +44,11 @@ #include #include #include - #include + #include "vvp_internal.h" #include "range_lock.h" +#include "pcc.h" #ifndef FMODE_EXEC #define FMODE_EXEC 0 @@ -135,17 +136,13 @@ struct ll_inode_info { /* update atime from MDS no matter if it's older than * local inode atime. */ - unsigned int lli_update_atime:1, - lli_inode_locked:1; + unsigned int lli_update_atime:1; /* Try to make the d::member and f::member are aligned. Before using * these members, make clear whether it is directory or not. */ union { /* for directory */ struct { - /* serialize normal readdir and statahead-readdir. */ - struct mutex lli_readdir_mutex; - /* metadata statahead */ /* since parent-child threads can share the same @file * struct, "opendir_key" is the token when dir close for @@ -170,12 +167,8 @@ struct ll_inode_info { struct rw_semaphore lli_lsm_sem; /* directory stripe information */ struct lmv_stripe_md *lli_lsm_md; - /* default directory stripe offset. This is extracted - * from the "dmv" xattr in order to decide which MDT to - * create a subdirectory on. The MDS itself fetches - * "dmv" and gets the rest of the default layout itself - * (count, hash, etc). */ - __u32 lli_def_stripe_offset; + /* directory default LMV */ + struct lmv_stripe_md *lli_default_lsm_md; }; /* for non-directory */ @@ -200,6 +193,11 @@ struct ll_inode_info { /* for writepage() only to communicate to fsync */ int lli_async_rc; + /* protect the file heat fields */ + spinlock_t lli_heat_lock; + __u32 lli_heat_flags; + struct obd_heat_instance lli_heat_instances[OBD_HEAT_COUNT]; + /* * Whenever a process try to read/write the file, the * jobid of the process will be saved here, and it'll @@ -209,6 +207,13 @@ struct ll_inode_info { * accurate if the file is shared by different jobs. */ char lli_jobid[LUSTRE_JOBID_SIZE]; + + struct mutex lli_pcc_lock; + enum lu_pcc_state_flags lli_pcc_state; + struct pcc_inode *lli_pcc_inode; + struct mutex lli_group_mutex; + __u64 lli_group_users; + unsigned long lli_group_gid; }; }; @@ -321,6 +326,9 @@ int ll_dentry_init_security(struct dentry *dentry, int mode, struct qstr *name, int ll_inode_init_security(struct dentry *dentry, struct inode *inode, struct inode *dir); +int ll_listsecurity(struct inode *inode, char *secctx_name, + size_t secctx_name_size); + /* * Locking to guarantee consistency of non-atomic updates to long long i_size, * consistency between file size and KMS. @@ -331,18 +339,24 @@ int ll_inode_init_security(struct dentry *dentry, struct inode *inode, void ll_inode_size_lock(struct inode *inode); void ll_inode_size_unlock(struct inode *inode); -// FIXME: replace the name of this with LL_I to conform to kernel stuff -// static inline struct ll_inode_info *LL_I(struct inode *inode) static inline struct ll_inode_info *ll_i2info(struct inode *inode) { - return container_of(inode, struct ll_inode_info, lli_vfs_inode); + return container_of(inode, struct ll_inode_info, lli_vfs_inode); +} + +static inline struct pcc_inode *ll_i2pcci(struct inode *inode) +{ + return ll_i2info(inode)->lli_pcc_inode; } +/* default to use at least 16M for fast read if possible */ +#define RA_REMAIN_WINDOW_MIN MiB_TO_PAGES(16UL) + /* default to about 64M of readahead on a given system. */ -#define SBI_DEFAULT_READAHEAD_MAX (64UL << (20 - PAGE_SHIFT)) +#define SBI_DEFAULT_READAHEAD_MAX MiB_TO_PAGES(64UL) /* default to read-ahead full files smaller than 2MB on the second read */ -#define SBI_DEFAULT_READAHEAD_WHOLE_MAX (2UL << (20 - PAGE_SHIFT)) +#define SBI_DEFAULT_READAHEAD_WHOLE_MAX MiB_TO_PAGES(2UL) enum ra_stat { RA_STAT_HIT = 0, @@ -358,6 +372,8 @@ enum ra_stat { RA_STAT_MAX_IN_FLIGHT, RA_STAT_WRONG_GRAB_PAGE, RA_STAT_FAILED_REACH_END, + RA_STAT_ASYNC, + RA_STAT_FAILED_FAST_READ, _NR_RA_STAT, }; @@ -366,6 +382,16 @@ struct ll_ra_info { unsigned long ra_max_pages; unsigned long ra_max_pages_per_file; unsigned long ra_max_read_ahead_whole_pages; + struct workqueue_struct *ll_readahead_wq; + /* + * Max number of active works for readahead workqueue, + * default is 0 which make workqueue init number itself, + * unless there is a specific need for throttling the + * number of active work items, specifying '0' is recommended. + */ + unsigned int ra_async_max_active; + /* Threshold to control when to trigger async readahead */ + unsigned long ra_async_pages_per_file_threshold; }; /* ra_io_arg will be filled in the beginning of ll_readahead with @@ -451,9 +477,10 @@ enum stats_track_type { * suppress_pings */ #define LL_SBI_FAST_READ 0x400000 /* fast read support */ #define LL_SBI_FILE_SECCTX 0x800000 /* set file security context at create */ -#define LL_SBI_PIO 0x1000000 /* parallel IO support */ +/* LL_SBI_PIO 0x1000000 parallel IO support, introduced in + 2.10, abandoned */ #define LL_SBI_TINY_WRITE 0x2000000 /* tiny write support */ - +#define LL_SBI_FILE_HEAT 0x4000000 /* file heat support */ #define LL_SBI_FLAGS { \ "nolck", \ "checksum", \ @@ -480,7 +507,8 @@ enum stats_track_type { "fast_read", \ "file_secctx", \ "pio", \ - "tiny_write", \ + "tiny_write", \ + "file_heat", \ } /* This is embedded into llite super-blocks to keep track of connect @@ -511,8 +539,7 @@ struct ll_sb_info { struct lu_fid ll_root_fid; /* root object fid */ int ll_flags; - unsigned int ll_umounting:1, - ll_xattr_cache_enabled:1, + unsigned int ll_xattr_cache_enabled:1, ll_xattr_cache_set:1, /* already set to 0/1 */ ll_client_common_fill_super_succeeded:1, ll_checksum_set:1; @@ -567,10 +594,25 @@ struct ll_sb_info { /* st_blksize returned by stat(2), when non-zero */ unsigned int ll_stat_blksize; + /* maximum relative age of cached statfs results */ + unsigned int ll_statfs_max_age; + struct kset ll_kset; /* sysfs object */ struct completion ll_kobj_unregister; + + /* File heat */ + unsigned int ll_heat_decay_weight; + unsigned int ll_heat_period_second; + + /* filesystem fsname */ + char ll_fsname[LUSTRE_MAXFSNAME + 1]; + + /* Persistent Client Cache */ + struct pcc_super ll_pcc_super; }; +#define SBI_DEFAULT_HEAT_DECAY_WEIGHT ((80 * 256 + 50) / 100) +#define SBI_DEFAULT_HEAT_PERIOD_SECOND (60) /* * per file-descriptor read-ahead data. */ @@ -654,6 +696,20 @@ struct ll_readahead_state { * stride read-ahead will be enable */ unsigned long ras_consecutive_stride_requests; + /* index of the last page that async readahead starts */ + unsigned long ras_async_last_readpage; +}; + +struct ll_readahead_work { + /** File to readahead */ + struct file *lrw_file; + /** Start bytes */ + unsigned long lrw_start; + /** End bytes */ + unsigned long lrw_end; + + /* async worker to handler read */ + struct work_struct lrw_readahead_work; }; extern struct kmem_cache *ll_file_data_slab; @@ -682,6 +738,7 @@ struct ll_file_data { /* The layout version when resync starts. Resync I/O should carry this * layout version for verification to OST objects */ __u32 fd_layout_version; + struct pcc_file fd_pcc_file; }; void llite_tunables_unregister(void); @@ -728,6 +785,11 @@ static inline bool ll_sbi_has_tiny_write(struct ll_sb_info *sbi) return !!(sbi->ll_flags & LL_SBI_TINY_WRITE); } +static inline bool ll_sbi_has_file_heat(struct ll_sb_info *sbi) +{ + return !!(sbi->ll_flags & LL_SBI_FILE_HEAT); +} + void ll_ras_enter(struct file *f); /* llite/lcommon_misc.c */ @@ -743,12 +805,8 @@ void ll_debugfs_unregister_super(struct super_block *sb); void ll_stats_ops_tally(struct ll_sb_info *sbi, int op, int count); enum { - LPROC_LL_DIRTY_HITS, - LPROC_LL_DIRTY_MISSES, LPROC_LL_READ_BYTES, LPROC_LL_WRITE_BYTES, - LPROC_LL_BRW_READ, - LPROC_LL_BRW_WRITE, LPROC_LL_IOCTL, LPROC_LL_OPEN, LPROC_LL_RELEASE, @@ -770,7 +828,7 @@ enum { LPROC_LL_RMDIR, LPROC_LL_MKNOD, LPROC_LL_RENAME, - LPROC_LL_STAFS, + LPROC_LL_STATFS, LPROC_LL_ALLOC_INODE, LPROC_LL_SETXATTR, LPROC_LL_GETXATTR, @@ -861,6 +919,7 @@ int ll_getattr(const struct path *path, struct kstat *stat, #else int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat); #endif +int ll_getattr_dentry(struct dentry *de, struct kstat *stat); struct posix_acl *ll_get_acl(struct inode *inode, int type); #ifdef HAVE_IOP_SET_ACL #ifdef CONFIG_FS_POSIX_ACL @@ -874,15 +933,7 @@ int ll_migrate(struct inode *parent, struct file *file, struct lmv_user_md *lum, const char *name); int ll_get_fid_by_name(struct inode *parent, const char *name, int namelen, struct lu_fid *fid, struct inode **inode); -#ifdef HAVE_GENERIC_PERMISSION_4ARGS -int ll_inode_permission(struct inode *inode, int mask, unsigned int flags); -#else -# ifndef HAVE_INODE_PERMISION_2ARGS -int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd); -# else int ll_inode_permission(struct inode *inode, int mask); -# endif -#endif int ll_ioctl_check_project(struct inode *inode, struct fsxattr *fa); int ll_ioctl_fsgetxattr(struct inode *inode, unsigned int cmd, unsigned long arg); @@ -900,13 +951,7 @@ int ll_dir_setstripe(struct inode *inode, struct lov_user_md *lump, int ll_dir_getstripe(struct inode *inode, void **lmmp, int *lmm_size, struct ptlrpc_request **request, u64 valid); -#ifdef HAVE_FILE_FSYNC_4ARGS int ll_fsync(struct file *file, loff_t start, loff_t end, int data); -#elif defined(HAVE_FILE_FSYNC_2ARGS) -int ll_fsync(struct file *file, int data); -#else -int ll_fsync(struct file *file, struct dentry *dentry, int data); -#endif int ll_merge_attr(const struct lu_env *env, struct inode *inode); int ll_fid2path(struct inode *inode, void __user *arg); int ll_data_version(struct inode *inode, __u64 *data_version, int flags); @@ -963,22 +1008,14 @@ int ll_get_max_mdsize(struct ll_sb_info *sbi, int *max_mdsize); int ll_get_default_mdsize(struct ll_sb_info *sbi, int *default_mdsize); int ll_set_default_mdsize(struct ll_sb_info *sbi, int default_mdsize); -enum { - LUSTRE_OPC_MKDIR = 0, - LUSTRE_OPC_SYMLINK = 1, - LUSTRE_OPC_MKNOD = 2, - LUSTRE_OPC_CREATE = 3, - LUSTRE_OPC_ANY = 5, -}; - void ll_unlock_md_op_lsm(struct md_op_data *op_data); struct md_op_data *ll_prep_md_op_data(struct md_op_data *op_data, struct inode *i1, struct inode *i2, const char *name, size_t namelen, - __u32 mode, __u32 opc, void *data); + __u32 mode, enum md_op_code opc, + void *data); void ll_finish_md_op_data(struct md_op_data *op_data); int ll_get_obd_name(struct inode *inode, unsigned int cmd, unsigned long arg); -char *ll_get_fsname(struct super_block *sb, char *buf, int buflen); void ll_compute_rootsquash_state(struct ll_sb_info *sbi); ssize_t ll_copy_user_md(const struct lov_user_md __user *md, struct lov_user_md **kbuf); @@ -1003,6 +1040,8 @@ static inline ssize_t ll_lov_user_md_size(const struct lov_user_md *lum) LOV_USER_MAGIC_SPECIFIC); case LOV_USER_MAGIC_COMP_V1: return ((struct lov_comp_md_v1 *)lum)->lcm_size; + case LOV_USER_MAGIC_FOREIGN: + return foreign_size(lum); } return -EINVAL; @@ -1052,7 +1091,6 @@ struct ll_cl_context { }; struct ll_thread_info { - struct iov_iter lti_iter; struct vvp_io_args lti_args; struct ra_io_arg lti_ria; struct ll_cl_context lti_io_ctx; @@ -1080,6 +1118,9 @@ static inline struct vvp_io_args *ll_env_args(const struct lu_env *env, return via; } +void ll_io_init(struct cl_io *io, struct file *file, enum cl_io_type iot, + struct vvp_io_args *args); + /* llite/llite_mmap.c */ int ll_teardown_mmaps(struct address_space *mapping, __u64 first, __u64 last); @@ -1089,23 +1130,6 @@ void policy_from_vma(union ldlm_policy_data *policy, struct vm_area_struct *vma, struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr, size_t count); -static inline void ll_invalidate_page(struct page *vmpage) -{ - struct address_space *mapping = vmpage->mapping; - loff_t offset = vmpage->index << PAGE_SHIFT; - - LASSERT(PageLocked(vmpage)); - if (mapping == NULL) - return; - - /* - * truncate_complete_page() calls - * a_ops->invalidatepage()->cl_page_delete()->vvp_page_delete(). - */ - ll_teardown_mmaps(mapping, offset, offset + PAGE_SIZE); - truncate_complete_page(mapping, vmpage); -} - #define ll_s2sbi(sb) (s2lsi(sb)->lsi_llsbi) /* don't need an addref as the sb_info should be holding one */ @@ -1154,6 +1178,13 @@ static inline struct lu_fid *ll_inode2fid(struct inode *inode) return fid; } +static inline bool ll_dir_striped(struct inode *inode) +{ + LASSERT(inode); + return S_ISDIR(inode->i_mode) && + lmv_dir_striped(ll_i2info(inode)->lli_lsm_md); +} + static inline loff_t ll_file_maxbytes(struct inode *inode) { struct cl_object *obj = ll_i2info(inode)->lli_clob; @@ -1387,24 +1418,6 @@ static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode, *bits = it->it_lock_bits; } -static inline void ll_lock_dcache(struct inode *inode) -{ -#ifdef HAVE_DCACHE_LOCK - spin_lock(&dcache_lock); -#else - spin_lock(&inode->i_lock); -#endif -} - -static inline void ll_unlock_dcache(struct inode *inode) -{ -#ifdef HAVE_DCACHE_LOCK - spin_unlock(&dcache_lock); -#else - spin_unlock(&inode->i_lock); -#endif -} - static inline int d_lustre_invalid(const struct dentry *dentry) { struct ll_dentry_data *lld = ll_d2d(dentry); @@ -1455,6 +1468,18 @@ static inline void d_lustre_revalidate(struct dentry *dentry) spin_unlock(&dentry->d_lock); } +static inline dev_t ll_compat_encode_dev(dev_t dev) +{ + /* The compat_sys_*stat*() syscalls will fail unless the + * device majors and minors are both less than 256. Note that + * the value returned here will be passed through + * old_encode_dev() in cp_compat_stat(). And so we are not + * trying to return a valid compat (u16) device number, just + * one that will pass the old_valid_dev() check. */ + + return MKDEV(MAJOR(dev) & 0xff, MINOR(dev) & 0xff); +} + int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf); int ll_layout_refresh(struct inode *inode, __u32 *gen); int ll_layout_restore(struct inode *inode, loff_t start, __u64 length);