-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2012, Whamcloud, Inc.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
/* for struct cl_lock_descr and struct cl_io */
#include <cl_object.h>
#include <lclient.h>
+#include <lustre_mdc.h>
+#include <linux/lustre_intent.h>
#ifndef FMODE_EXEC
#define FMODE_EXEC 0
#endif
-#ifndef DCACHE_LUSTRE_INVALID
-#define DCACHE_LUSTRE_INVALID 0x4000
+#ifndef VM_FAULT_RETRY
+#define VM_FAULT_RETRY 0
+#endif
+
+/* Kernel 3.1 kills LOOKUP_CONTINUE, LOOKUP_PARENT is equivalent to it.
+ * seem kernel commit 49084c3bb2055c401f3493c13edae14d49128ca0 */
+#ifndef LOOKUP_CONTINUE
+#define LOOKUP_CONTINUE LOOKUP_PARENT
#endif
+/** Only used on client-side for indicating the tail of dir hash/offset. */
+#define LL_DIR_END_OFF 0x7fffffffffffffffULL
+#define LL_DIR_END_OFF_32BIT 0x7fffffffUL
+
#define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0")
#define LUSTRE_FPRIVATE(file) ((file)->private_data)
-#ifdef HAVE_VFS_INTENT_PATCHES
-static inline struct lookup_intent *ll_nd2it(struct nameidata *nd)
-{
- return &nd->intent;
-}
-#endif
-
struct ll_dentry_data {
- int lld_cwd_count;
- int lld_mnt_count;
- struct obd_client_handle lld_cwd_och;
- struct obd_client_handle lld_mnt_och;
-#ifndef HAVE_VFS_INTENT_PATCHES
- struct lookup_intent *lld_it;
-#endif
- unsigned int lld_sa_generation;
+ int lld_cwd_count;
+ int lld_mnt_count;
+ struct obd_client_handle lld_cwd_och;
+ struct obd_client_handle lld_mnt_och;
+ struct lookup_intent *lld_it;
+ unsigned int lld_sa_generation;
+ unsigned int lld_invalid:1;
+ struct rcu_head lld_rcu_head;
};
#define ll_d2d(de) ((struct ll_dentry_data*)((de)->d_fsdata))
/* remote client permission cache */
#define REMOTE_PERM_HASHSIZE 16
+struct ll_getname_data {
+ char *lgd_name; /* points to a buffer with NAME_MAX+1 size */
+ struct lu_fid lgd_fid; /* target fid we are looking for */
+ int lgd_found; /* inode matched? */
+};
+
/* llite setxid/access permission for user on remote client */
struct ll_remote_perm {
cfs_hlist_node_t lrp_list;
* be sent to MDS. */
LLIF_SOM_DIRTY = (1 << 3),
/* File is contented */
- LLIF_CONTENDED = (1 << 4),
+ LLIF_CONTENDED = (1 << 4),
/* Truncate uses server lock for this file */
- LLIF_SRVLOCK = (1 << 5)
+ LLIF_SRVLOCK = (1 << 5),
};
struct ll_inode_info {
- int lli_inode_magic;
- cfs_semaphore_t lli_size_sem; /* protect open and change size */
- void *lli_size_sem_owner;
- cfs_semaphore_t lli_write_sem;
- cfs_rw_semaphore_t lli_trunc_sem;
- char *lli_symlink_name;
- __u64 lli_maxbytes;
- __u64 lli_ioepoch;
- unsigned long lli_flags;
- cfs_time_t lli_contention_time;
-
- /* this lock protects posix_acl, pending_write_llaps, mmap_cnt */
- cfs_spinlock_t lli_lock;
- cfs_list_t lli_close_list;
- /* handle is to be sent to MDS later on done_writing and setattr.
- * Open handle data are needed for the recovery to reconstruct
- * the inode state on the MDS. XXX: recovery is not ready yet. */
- struct obd_client_handle *lli_pending_och;
+ __u32 lli_inode_magic;
+ __u32 lli_flags;
+ __u64 lli_ioepoch;
- /* for writepage() only to communicate to fsync */
- int lli_async_rc;
- int lli_write_rc;
+ spinlock_t lli_lock;
+ struct posix_acl *lli_posix_acl;
- struct posix_acl *lli_posix_acl;
-
- /* remote permission hash */
- cfs_hlist_head_t *lli_remote_perms;
- unsigned long lli_rmtperm_utime;
- cfs_semaphore_t lli_rmtperm_sem;
-
- cfs_list_t lli_dead_list;
-
- cfs_semaphore_t lli_och_sem; /* Protects access to och pointers
- and their usage counters, also
- atomicity of check-update of
- lli_smd */
- /* We need all three because every inode may be opened in different
- modes */
- struct obd_client_handle *lli_mds_read_och;
- __u64 lli_open_fd_read_count;
- struct obd_client_handle *lli_mds_write_och;
- __u64 lli_open_fd_write_count;
- struct obd_client_handle *lli_mds_exec_och;
- __u64 lli_open_fd_exec_count;
-
- struct inode lli_vfs_inode;
+ cfs_hlist_head_t *lli_remote_perms;
+ struct mutex lli_rmtperm_mutex;
/* identifying fields for both metadata and data stacks. */
- struct lu_fid lli_fid;
- struct lov_stripe_md *lli_smd;
+ struct lu_fid lli_fid;
+ /* Parent fid for accessing default stripe data on parent directory
+ * for allocating OST objects after a mknod() and later open-by-FID. */
+ struct lu_fid lli_pfid;
- /* fid capability */
+ cfs_list_t lli_close_list;
+ cfs_list_t lli_oss_capas;
/* open count currently used by capability only, indicate whether
* capability needs renewal */
- cfs_atomic_t lli_open_count;
- struct obd_capa *lli_mds_capa;
- cfs_list_t lli_oss_capas;
-
- /* metadata statahead */
- /* protect statahead stuff: lli_opendir_pid, lli_opendir_key, lli_sai,
- * and so on. */
- cfs_spinlock_t lli_sa_lock;
- /*
- * "opendir_pid" is the token when lookup/revalid -- I am the owner of
- * dir statahead.
- */
- pid_t lli_opendir_pid;
- /*
- * since parent-child threads can share the same @file struct,
- * "opendir_key" is the token when dir close for case of parent exit
- * before child -- it is me should cleanup the dir readahead. */
- void *lli_opendir_key;
- struct ll_statahead_info *lli_sai;
- __u64 lli_sa_pos;
- struct cl_object *lli_clob;
- /* the most recent timestamps obtained from mds */
- struct ost_lvb lli_lvb;
- /**
- * serialize normal readdir and statahead-readdir
+ cfs_atomic_t lli_open_count;
+ struct obd_capa *lli_mds_capa;
+ cfs_time_t lli_rmtperm_time;
+
+ /* handle is to be sent to MDS later on done_writing and setattr.
+ * Open handle data are needed for the recovery to reconstruct
+ * the inode state on the MDS. XXX: recovery is not ready yet. */
+ struct obd_client_handle *lli_pending_och;
+
+ /* We need all three because every inode may be opened in different
+ * modes */
+ struct obd_client_handle *lli_mds_read_och;
+ struct obd_client_handle *lli_mds_write_och;
+ struct obd_client_handle *lli_mds_exec_och;
+ __u64 lli_open_fd_read_count;
+ __u64 lli_open_fd_write_count;
+ __u64 lli_open_fd_exec_count;
+ /* Protects access to och pointers and their usage counters */
+ struct mutex lli_och_mutex;
+
+ struct inode lli_vfs_inode;
+
+ /* the most recent timestamps obtained from mds */
+ struct ost_lvb lli_lvb;
+ spinlock_t lli_agl_lock;
+
+ /* Try to make the d::member and f::member are aligned. Before using
+ * these members, make clear whether it is directory or not. */
+ union {
+ /* for directory */
+ struct {
+ /* serialize normal readdir and statahead-readdir. */
+ struct mutex d_readdir_mutex;
+
+ /* metadata statahead */
+ /* since parent-child threads can share the same @file
+ * struct, "opendir_key" is the token when dir close for
+ * case of parent exit before child -- it is me should
+ * cleanup the dir readahead. */
+ void *d_opendir_key;
+ struct ll_statahead_info *d_sai;
+ struct posix_acl *d_def_acl;
+ /* protect statahead stuff. */
+ spinlock_t d_sa_lock;
+ /* "opendir_pid" is the token when lookup/revalid
+ * -- I am the owner of dir statahead. */
+ pid_t d_opendir_pid;
+ } d;
+
+#define lli_readdir_mutex u.d.d_readdir_mutex
+#define lli_opendir_key u.d.d_opendir_key
+#define lli_sai u.d.d_sai
+#define lli_def_acl u.d.d_def_acl
+#define lli_sa_lock u.d.d_sa_lock
+#define lli_opendir_pid u.d.d_opendir_pid
+
+ /* for non-directory */
+ struct {
+ struct semaphore f_size_sem;
+ void *f_size_sem_owner;
+ char *f_symlink_name;
+ __u64 f_maxbytes;
+ /*
+ * struct rw_semaphore {
+ * signed long count; // align d.d_def_acl
+ * spinlock_t wait_lock; // align d.d_sa_lock
+ * struct list_head wait_list;
+ * }
+ */
+ struct rw_semaphore f_trunc_sem;
+ struct mutex f_write_mutex;
+
+ struct rw_semaphore f_glimpse_sem;
+ cfs_time_t f_glimpse_time;
+ cfs_list_t f_agl_list;
+ __u64 f_agl_index;
+
+ /* for writepage() only to communicate to fsync */
+ int f_async_rc;
+
+ /*
+ * whenever a process try to read/write the file, the
+ * jobid of the process will be saved here, and it'll
+ * be packed into the write PRC when flush later.
+ *
+ * so the read/write statistics for jobid will not be
+ * accurate if the file is shared by different jobs.
+ */
+ char f_jobid[JOBSTATS_JOBID_SIZE];
+ } f;
+
+#define lli_size_sem u.f.f_size_sem
+#define lli_size_sem_owner u.f.f_size_sem_owner
+#define lli_symlink_name u.f.f_symlink_name
+#define lli_maxbytes u.f.f_maxbytes
+#define lli_trunc_sem u.f.f_trunc_sem
+#define lli_write_mutex u.f.f_write_mutex
+#define lli_glimpse_sem u.f.f_glimpse_sem
+#define lli_glimpse_time u.f.f_glimpse_time
+#define lli_agl_list u.f.f_agl_list
+#define lli_agl_index u.f.f_agl_index
+#define lli_async_rc u.f.f_async_rc
+#define lli_jobid u.f.f_jobid
+
+ } u;
+
+ /* XXX: For following frequent used members, although they maybe special
+ * used for non-directory object, it is some time-wasting to check
+ * whether the object is directory or not before using them. On the
+ * other hand, currently, sizeof(f) > sizeof(d), it cannot reduce
+ * the "ll_inode_info" size even if moving those members into u.f.
+ * So keep them out side.
+ *
+ * In the future, if more members are added only for directory,
+ * some of the following members can be moved into u.f.
*/
- cfs_semaphore_t lli_readdir_sem;
+ bool lli_has_smd;
+ struct cl_object *lli_clob;
+
+ /* mutex to request for layout lock exclusively. */
+ struct mutex lli_layout_mutex;
+ /* valid only inside LAYOUT ibits lock, protected by lli_layout_mutex */
+ __u32 lli_layout_gen;
};
/*
* Locking to guarantee consistency of non-atomic updates to long long i_size,
- * consistency between file size and KMS, and consistency within
- * ->lli_smd->lsm_oinfo[]'s.
+ * consistency between file size and KMS.
*
- * Implemented by ->lli_size_sem and ->lsm_sem, nested in that order.
+ * Implemented by ->lli_size_sem and ->lsm_lock, nested in that order.
*/
-void ll_inode_size_lock(struct inode *inode, int lock_lsm);
-void ll_inode_size_unlock(struct inode *inode, int unlock_lsm);
+void ll_inode_size_lock(struct inode *inode);
+void ll_inode_size_unlock(struct inode *inode);
// FIXME: replace the name of this with LL_I to conform to kernel stuff
// static inline struct ll_inode_info *LL_I(struct inode *inode)
#define LL_SBI_LOCALFLOCK 0x200 /* Local flocks support by kernel */
#define LL_SBI_LRU_RESIZE 0x400 /* lru resize support */
#define LL_SBI_LAZYSTATFS 0x800 /* lazystatfs mount option */
-#define LL_SBI_SOM_PREVIEW 0x1000 /* SOM preview mount option */
-#define LL_SBI_32BIT_API 0x2000 /* generate 32 bit inodes. */
+#define LL_SBI_SOM_PREVIEW 0x1000 /* SOM preview mount option */
+#define LL_SBI_32BIT_API 0x2000 /* generate 32 bit inodes. */
+#define LL_SBI_64BIT_HASH 0x4000 /* support 64-bits dir hash/offset */
+#define LL_SBI_AGL_ENABLED 0x8000 /* enable agl */
+#define LL_SBI_VERBOSE 0x10000 /* verbose mount/umount */
+#define LL_SBI_LAYOUT_LOCK 0x20000 /* layout lock support */
+#define LL_SBI_USER_FID2PATH 0x40000 /* allow fid2path by unprivileged users */
/* default value for ll_sb_info->contention_time */
#define SBI_DEFAULT_CONTENTION_SECONDS 60
};
struct rmtacl_ctl_table {
- cfs_spinlock_t rct_lock;
- cfs_list_t rct_entries[RCE_HASHES];
+ spinlock_t rct_lock;
+ cfs_list_t rct_entries[RCE_HASHES];
};
#define EE_HASHES 32
};
struct eacl_table {
- cfs_spinlock_t et_lock;
- cfs_list_t et_entries[EE_HASHES];
+ spinlock_t et_lock;
+ cfs_list_t et_entries[EE_HASHES];
};
struct ll_sb_info {
- cfs_list_t ll_list;
- /* this protects pglist and ra_info. It isn't safe to
- * grab from interrupt contexts */
- cfs_spinlock_t ll_lock;
- cfs_spinlock_t ll_pp_extent_lock; /* Lock for pp_extent entries */
- cfs_spinlock_t ll_process_lock; /* Lock for ll_rw_process_info */
+ cfs_list_t ll_list;
+ /* this protects pglist and ra_info. It isn't safe to
+ * grab from interrupt contexts */
+ spinlock_t ll_lock;
+ spinlock_t ll_pp_extent_lock; /* pp_extent entry*/
+ spinlock_t ll_process_lock; /* ll_rw_process_info */
struct obd_uuid ll_sb_uuid;
struct obd_export *ll_md_exp;
struct obd_export *ll_dt_exp;
struct lprocfs_stats *ll_stats; /* lprocfs stats counter */
- unsigned long ll_async_page_max;
- unsigned long ll_async_page_count;
+ struct cl_client_cache ll_cache;
struct lprocfs_stats *ll_ra_stats;
/* =0 - hold lock over whole read/write
* >0 - max. chunk to be read/written w/o lock re-acquiring */
unsigned long ll_max_rw_chunk;
+ unsigned int ll_md_brw_size; /* used by readdir */
struct lu_site *ll_site;
struct cl_device *ll_cl;
unsigned int ll_offset_process_count;
struct ll_rw_process_info ll_rw_offset_info[LL_OFFSET_HIST_MAX];
unsigned int ll_rw_offset_entry_count;
- enum stats_track_type ll_stats_track_type;
int ll_stats_track_id;
+ enum stats_track_type ll_stats_track_type;
int ll_rw_stats_on;
/* metadata stat-ahead */
* count */
atomic_t ll_sa_wrong; /* statahead thread stopped for
* low hit ratio */
+ atomic_t ll_agl_total; /* AGL thread started count */
dev_t ll_sdev_orig; /* save s_dev before assign for
* clustred nfs */
* per file-descriptor read-ahead data.
*/
struct ll_readahead_state {
- cfs_spinlock_t ras_lock;
+ spinlock_t ras_lock;
/*
* index of the last page that read(2) needed and that wasn't in the
* cache. Used by ras_update() to detect seeks.
unsigned long ras_consecutive_stride_requests;
};
-struct ll_file_dir {
- __u64 lfd_pos;
- __u64 lfd_next;
-};
-
extern cfs_mem_cache_t *ll_file_data_slab;
struct lustre_handle;
struct ll_file_data {
struct ll_readahead_state fd_ras;
int fd_omode;
struct ccc_grouplock fd_grouplock;
- struct ll_file_dir fd_dir;
+ __u64 lfd_pos;
__u32 fd_flags;
struct file *fd_file;
+ /* Indicate whether need to report failure when close.
+ * true: failure is known, not report again.
+ * false: unknown failure, should report. */
+ bool fd_write_failed;
};
struct lov_stripe_md;
-extern cfs_spinlock_t inode_lock;
+extern spinlock_t inode_lock;
extern struct proc_dir_entry *proc_lustre_fs_root;
/* llite/dir.c */
-static inline void ll_put_page(struct page *page)
-{
- kunmap(page);
- page_cache_release(page);
-}
-
+void ll_release_page(struct page *page, int remove);
extern struct file_operations ll_dir_operations;
extern struct inode_operations ll_dir_inode_operations;
-struct page *ll_get_dir_page(struct file *filp, struct inode *dir, __u64 hash,
- int exact, struct ll_dir_chain *chain);
+struct page *ll_get_dir_page(struct inode *dir, __u64 hash,
+ struct ll_dir_chain *chain);
+int ll_dir_read(struct inode *inode, __u64 *_pos, void *cookie,
+ filldir_t filldir);
int ll_get_mdt_idx(struct inode *inode);
/* llite/namei.c */
struct lustre_md *lic);
int ll_md_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
void *data, int flag);
-#ifndef HAVE_VFS_INTENT_PATCHES
+#ifndef HAVE_IOP_ATOMIC_OPEN
struct lookup_intent *ll_convert_intent(struct open_intent *oit,
int lookup_flags);
#endif
-int ll_lookup_it_finish(struct ptlrpc_request *request,
- struct lookup_intent *it, void *data);
+struct dentry *ll_splice_alias(struct inode *inode, struct dentry *de);
/* llite/rw.c */
int ll_prepare_write(struct file *, struct page *, unsigned from, unsigned to);
int ll_commit_write(struct file *, struct page *, unsigned from, unsigned to);
int ll_writepage(struct page *page, struct writeback_control *wbc);
+int ll_writepages(struct address_space *, struct writeback_control *wbc);
void ll_removepage(struct page *page);
int ll_readpage(struct file *file, struct page *page);
void ll_readahead_init(struct inode *inode, struct ll_readahead_state *ras);
-void ll_truncate(struct inode *inode);
int ll_file_punch(struct inode *, loff_t, int);
ssize_t ll_file_lockless_io(struct file *, char *, size_t, loff_t *, int);
void ll_clear_file_contended(struct inode*);
int ll_sync_page_range(struct inode *, struct address_space *, loff_t, size_t);
-int ll_readahead(const struct lu_env *env, struct cl_io *io, struct ll_readahead_state *ras,
- struct address_space *mapping, struct cl_page_list *queue, int flags);
+int ll_readahead(const struct lu_env *env, struct cl_io *io,
+ struct ll_readahead_state *ras, struct address_space *mapping,
+ struct cl_page_list *queue, int flags);
/* llite/file.c */
extern struct file_operations ll_file_operations;
extern struct file_operations ll_file_operations_flock;
extern struct file_operations ll_file_operations_noflock;
extern struct inode_operations ll_file_inode_operations;
-extern int ll_inode_revalidate_it(struct dentry *, struct lookup_intent *);
-extern int ll_have_md_lock(struct inode *inode, __u64 bits, ldlm_mode_t l_req_mode);
+extern int ll_inode_revalidate_it(struct dentry *, struct lookup_intent *,
+ __u64);
+extern int ll_have_md_lock(struct inode *inode, __u64 *bits,
+ ldlm_mode_t l_req_mode);
extern ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
- struct lustre_handle *lockh);
-int __ll_inode_revalidate_it(struct dentry *, struct lookup_intent *, __u64 bits);
+ struct lustre_handle *lockh, __u64 flags);
+int __ll_inode_revalidate_it(struct dentry *, struct lookup_intent *,
+ __u64 bits);
+#ifdef HAVE_IOP_ATOMIC_OPEN
+int ll_revalidate_nd(struct dentry *dentry, unsigned int flags);
+#else
int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd);
+#endif
int ll_file_open(struct inode *inode, struct file *file);
int ll_file_release(struct inode *inode, struct file *file);
int ll_glimpse_ioctl(struct ll_sb_info *sbi,
int ll_som_update(struct inode *inode, struct md_op_data *op_data);
int ll_inode_getattr(struct inode *inode, struct obdo *obdo,
__u64 ioepoch, int sync);
-int ll_md_setattr(struct inode *inode, struct md_op_data *op_data,
+int ll_md_setattr(struct dentry *dentry, struct md_op_data *op_data,
struct md_open_data **mod);
void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
struct lustre_handle *fh);
struct lookup_intent *it, struct kstat *stat);
int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat);
struct ll_file_data *ll_file_data_get(void);
-#ifndef HAVE_INODE_PERMISION_2ARGS
-int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd);
+struct posix_acl * ll_get_acl(struct inode *inode, int type);
+
+#ifdef HAVE_GENERIC_PERMISSION_4ARGS
+int ll_inode_permission(struct inode *inode, int mask, unsigned int flags);
#else
+# ifndef HAVE_INODE_PERMISION_2ARGS
+int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd);
+# else
int ll_inode_permission(struct inode *inode, int mask);
+# endif
#endif
+
int ll_lov_setstripe_ea_info(struct inode *inode, struct file *file,
int flags, struct lov_user_md *lum,
int lum_size);
int set_default);
int ll_dir_getstripe(struct inode *inode, struct lov_mds_md **lmm,
int *lmm_size, struct ptlrpc_request **request);
+#ifdef HAVE_FILE_FSYNC_4ARGS
+int ll_fsync(struct file *file, loff_t start, loff_t end, int data);
+#elif defined(HAVE_FILE_FSYNC_2ARGS)
+int ll_fsync(struct file *file, int data);
+#else
int ll_fsync(struct file *file, struct dentry *dentry, int data);
+#endif
int ll_do_fiemap(struct inode *inode, struct ll_user_fiemap *fiemap,
int num_bytes);
int ll_merge_lvb(struct inode *inode);
int ll_get_grouplock(struct inode *inode, struct file *file, unsigned long arg);
int ll_put_grouplock(struct inode *inode, struct file *file, unsigned long arg);
-int ll_fid2path(struct obd_export *exp, void *arg);
+int ll_fid2path(struct inode *inode, void *arg);
/* llite/dcache.c */
-/* llite/namei.c */
-/**
- * protect race ll_find_aliases vs ll_revalidate_it vs ll_unhash_aliases
- */
-int ll_dops_init(struct dentry *de, int block);
-extern cfs_spinlock_t ll_lookup_lock;
+
+int ll_dops_init(struct dentry *de, int block, int init_sa);
extern struct dentry_operations ll_d_ops;
void ll_intent_drop_lock(struct lookup_intent *);
void ll_intent_release(struct lookup_intent *);
-int ll_drop_dentry(struct dentry *dentry);
-int ll_drop_dentry(struct dentry *dentry);
-void ll_unhash_aliases(struct inode *);
+void ll_invalidate_aliases(struct inode *);
void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft);
void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry);
+#ifdef HAVE_D_COMPARE_7ARGS
+int ll_dcompare(const struct dentry *parent, const struct inode *pinode,
+ const struct dentry *dentry, const struct inode *inode,
+ unsigned int len, const char *str, const struct qstr *d_name);
+#else
int ll_dcompare(struct dentry *parent, struct qstr *d_name, struct qstr *name);
+#endif
int ll_revalidate_it_finish(struct ptlrpc_request *request,
struct lookup_intent *it, struct dentry *de);
char *ll_read_opt(const char *opt, char *data);
void ll_lli_init(struct ll_inode_info *lli);
-int ll_fill_super(struct super_block *sb);
+int ll_fill_super(struct super_block *sb, struct vfsmount *mnt);
void ll_put_super(struct super_block *sb);
void ll_kill_super(struct super_block *sb);
-int ll_shrink_cache(int nr_to_scan, gfp_t gfp_mask);
struct inode *ll_inode_from_lock(struct ldlm_lock *lock);
void ll_clear_inode(struct inode *inode);
-int ll_setattr_raw(struct inode *inode, struct iattr *attr);
+int ll_setattr_raw(struct dentry *dentry, struct iattr *attr);
int ll_setattr(struct dentry *de, struct iattr *attr);
-#ifndef HAVE_STATFS_DENTRY_PARAM
-int ll_statfs(struct super_block *sb, struct kstatfs *sfs);
-#else
int ll_statfs(struct dentry *de, struct kstatfs *sfs);
-#endif
int ll_statfs_internal(struct super_block *sb, struct obd_statfs *osfs,
__u64 max_age, __u32 flags);
void ll_update_inode(struct inode *inode, struct lustre_md *md);
void ll_umount_begin(struct super_block *sb);
#endif
int ll_remount_fs(struct super_block *sb, int *flags, char *data);
+#ifdef HAVE_SUPEROPS_USE_DENTRY
+int ll_show_options(struct seq_file *seq, struct dentry *dentry);
+#else
int ll_show_options(struct seq_file *seq, struct vfsmount *vfs);
+#endif
+void ll_dirty_page_discard_warn(cfs_page_t *page, int ioret);
int ll_prep_inode(struct inode **inode, struct ptlrpc_request *req,
- struct super_block *);
+ struct super_block *);
void lustre_dump_dentry(struct dentry *, int recur);
void lustre_dump_inode(struct inode *);
int ll_obd_statfs(struct inode *inode, void *arg);
const char *name, int namelen,
int mode, __u32 opc, void *data);
void ll_finish_md_op_data(struct md_op_data *op_data);
+int ll_get_obd_name(struct inode *inode, unsigned int cmd, unsigned long arg);
+char *ll_get_fsname(struct super_block *sb, char *buf, int buflen);
/* llite/llite_nfs.c */
extern struct export_operations lustre_export_operations;
/* llite/llite_close.c */
struct ll_close_queue {
- cfs_spinlock_t lcq_lock;
- cfs_list_t lcq_head;
- cfs_waitq_t lcq_waitq;
- cfs_completion_t lcq_comp;
- cfs_atomic_t lcq_stop;
+ spinlock_t lcq_lock;
+ cfs_list_t lcq_head;
+ cfs_waitq_t lcq_waitq;
+ struct completion lcq_comp;
+ cfs_atomic_t lcq_stop;
};
struct ccc_object *cl_inode2ccc(struct inode *inode);
__u64 end, ldlm_mode_t mode);
void policy_from_vma(ldlm_policy_data_t *policy,
struct vm_area_struct *vma, unsigned long addr, size_t count);
-struct vm_area_struct *our_vma(unsigned long addr, size_t count);
+struct vm_area_struct *our_vma(struct mm_struct *mm, unsigned long addr,
+ size_t count);
+
+static inline void ll_invalidate_page(struct page *vmpage)
+{
+ struct address_space *mapping = vmpage->mapping;
+ loff_t offset = vmpage->index << PAGE_CACHE_SHIFT;
+
+ LASSERT(PageLocked(vmpage));
+ if (mapping == NULL)
+ return;
+
+ ll_teardown_mmaps(mapping, offset, offset + CFS_PAGE_SIZE);
+ truncate_complete_page(mapping, vmpage);
+}
#define ll_s2sbi(sb) (s2lsi(sb)->lsi_llsbi)
static inline struct lu_fid *ll_inode2fid(struct inode *inode)
{
struct lu_fid *fid;
+
LASSERT(inode != NULL);
fid = &ll_i2info(inode)->lli_fid;
- LASSERT(fid_is_igif(fid) || fid_ver(fid) == 0);
+
return fid;
}
/**
* Common IO arguments for various VFS I/O interfaces.
*/
-
int cl_sb_init(struct super_block *sb);
int cl_sb_fini(struct super_block *sb);
-int cl_inode_init(struct inode *inode, struct lustre_md *md);
-void cl_inode_fini(struct inode *inode);
-
enum cl_lock_mode vvp_mode_from_vma(struct vm_area_struct *vma);
void ll_io_init(struct cl_io *io, const struct file *file, int write);
/* statahead.c */
-#define LL_SA_RPC_MIN 2
-#define LL_SA_RPC_DEF 32
-#define LL_SA_RPC_MAX 8192
+#define LL_SA_RPC_MIN 2
+#define LL_SA_RPC_DEF 32
+#define LL_SA_RPC_MAX 8192
+
+#define LL_SA_CACHE_BIT 5
+#define LL_SA_CACHE_SIZE (1 << LL_SA_CACHE_BIT)
+#define LL_SA_CACHE_MASK (LL_SA_CACHE_SIZE - 1)
/* per inode struct, for dir only */
struct ll_statahead_info {
struct inode *sai_inode;
- unsigned int sai_generation; /* generation for statahead */
cfs_atomic_t sai_refcount; /* when access this struct, hold
* refcount */
- unsigned int sai_sent; /* stat requests sent count */
- unsigned int sai_replied; /* stat requests which received
- * reply */
+ unsigned int sai_generation; /* generation for statahead */
unsigned int sai_max; /* max ahead of lookup */
- unsigned int sai_index; /* index of statahead entry */
- unsigned int sai_index_next; /* index for the next statahead
- * entry to be stated */
- unsigned int sai_hit; /* hit count */
- unsigned int sai_miss; /* miss count:
+ __u64 sai_sent; /* stat requests sent count */
+ __u64 sai_replied; /* stat requests which received
+ * reply */
+ __u64 sai_index; /* index of statahead entry */
+ __u64 sai_index_wait; /* index of entry which is the
+ * caller is waiting for */
+ __u64 sai_hit; /* hit count */
+ __u64 sai_miss; /* miss count:
* for "ls -al" case, it includes
* hidden dentry miss;
* for "ls -l" case, it does not
unsigned int sai_miss_hidden;/* "ls -al", but first dentry
* is not a hidden one */
unsigned int sai_skip_hidden;/* skipped hidden dentry count */
- unsigned int sai_ls_all:1; /* "ls -al", do stat-ahead for
+ unsigned int sai_ls_all:1, /* "ls -al", do stat-ahead for
* hidden entries */
+ sai_in_readpage:1,/* statahead is in readdir()*/
+ sai_agl_valid:1;/* AGL is valid for the dir */
cfs_waitq_t sai_waitq; /* stat-ahead wait queue */
struct ptlrpc_thread sai_thread; /* stat-ahead thread */
+ struct ptlrpc_thread sai_agl_thread; /* AGL thread */
cfs_list_t sai_entries_sent; /* entries sent out */
cfs_list_t sai_entries_received; /* entries returned */
cfs_list_t sai_entries_stated; /* entries stated */
- pid_t sai_pid; /* pid of statahead itself */
+ cfs_list_t sai_entries_agl; /* AGL entries to be sent */
+ cfs_list_t sai_cache[LL_SA_CACHE_SIZE];
+ spinlock_t sai_cache_lock[LL_SA_CACHE_SIZE];
+ cfs_atomic_t sai_cache_count; /* entry count in cache */
};
-int do_statahead_enter(struct inode *dir, struct dentry **dentry, int lookup);
-void ll_statahead_exit(struct inode *dir, struct dentry *dentry, int result);
+int do_statahead_enter(struct inode *dir, struct dentry **dentry,
+ int only_unplug);
void ll_stop_statahead(struct inode *dir, void *key);
-static inline
-void ll_statahead_mark(struct inode *dir, struct dentry *dentry)
+static inline int ll_glimpse_size(struct inode *inode)
{
- struct ll_inode_info *lli;
- struct ll_dentry_data *ldd = ll_d2d(dentry);
+ struct ll_inode_info *lli = ll_i2info(inode);
+ int rc;
+
+ down_read(&lli->lli_glimpse_sem);
+ rc = cl_glimpse_size(inode);
+ lli->lli_glimpse_time = cfs_time_current();
+ up_read(&lli->lli_glimpse_sem);
+ return rc;
+}
- /* dentry has been move to other directory, no need mark */
- if (unlikely(dir != dentry->d_parent->d_inode))
- return;
+static inline void
+ll_statahead_mark(struct inode *dir, struct dentry *dentry)
+{
+ struct ll_inode_info *lli = ll_i2info(dir);
+ struct ll_statahead_info *sai = lli->lli_sai;
+ struct ll_dentry_data *ldd = ll_d2d(dentry);
- lli = ll_i2info(dir);
/* not the same process, don't mark */
if (lli->lli_opendir_pid != cfs_curproc_pid())
return;
- cfs_spin_lock(&lli->lli_sa_lock);
- if (likely(lli->lli_sai != NULL && ldd != NULL))
- ldd->lld_sa_generation = lli->lli_sai->sai_generation;
- cfs_spin_unlock(&lli->lli_sa_lock);
+ if (sai != NULL && ldd != NULL)
+ ldd->lld_sa_generation = sai->sai_generation;
}
-static inline
-int ll_statahead_enter(struct inode *dir, struct dentry **dentryp, int lookup)
+static inline int
+ll_need_statahead(struct inode *dir, struct dentry *dentryp)
{
- struct ll_inode_info *lli;
- struct ll_dentry_data *ldd = ll_d2d(*dentryp);
-
- if (unlikely(dir == NULL))
- return -EAGAIN;
-
- if (ll_i2sbi(dir)->ll_sa_max == 0)
- return -ENOTSUPP;
+ struct ll_inode_info *lli;
+ struct ll_dentry_data *ldd;
+
+ if (ll_i2sbi(dir)->ll_sa_max == 0)
+ return -EAGAIN;
+
+ lli = ll_i2info(dir);
+ /* not the same process, don't statahead */
+ if (lli->lli_opendir_pid != cfs_curproc_pid())
+ return -EAGAIN;
+
+ /* statahead has been stopped */
+ if (lli->lli_opendir_key == NULL)
+ return -EAGAIN;
+
+ ldd = ll_d2d(dentryp);
+ /*
+ * When stats a dentry, the system trigger more than once "revalidate"
+ * or "lookup", for "getattr", for "getxattr", and maybe for others.
+ * Under patchless client mode, the operation intent is not accurate,
+ * which maybe misguide the statahead thread. For example:
+ * The "revalidate" call for "getattr" and "getxattr" of a dentry maybe
+ * have the same operation intent -- "IT_GETATTR".
+ * In fact, one dentry should has only one chance to interact with the
+ * statahead thread, otherwise the statahead windows will be confused.
+ * The solution is as following:
+ * Assign "lld_sa_generation" with "sai_generation" when a dentry
+ * "IT_GETATTR" for the first time, and the subsequent "IT_GETATTR"
+ * will bypass interacting with statahead thread for checking:
+ * "lld_sa_generation == lli_sai->sai_generation"
+ */
+ if (ldd && lli->lli_sai &&
+ ldd->lld_sa_generation == lli->lli_sai->sai_generation)
+ return -EAGAIN;
+
+ return 1;
+}
- lli = ll_i2info(dir);
- /* not the same process, don't statahead */
- if (lli->lli_opendir_pid != cfs_curproc_pid())
- return -EAGAIN;
+static inline int
+ll_statahead_enter(struct inode *dir, struct dentry **dentryp, int only_unplug)
+{
+ int ret;
- /*
- * When "ls" a dentry, the system trigger more than once "revalidate" or
- * "lookup", for "getattr", for "getxattr", and maybe for others.
- * Under patchless client mode, the operation intent is not accurate,
- * it maybe misguide the statahead thread. For example:
- * The "revalidate" call for "getattr" and "getxattr" of a dentry maybe
- * have the same operation intent -- "IT_GETATTR".
- * In fact, one dentry should has only one chance to interact with the
- * statahead thread, otherwise the statahead windows will be confused.
- * The solution is as following:
- * Assign "lld_sa_generation" with "sai_generation" when a dentry
- * "IT_GETATTR" for the first time, and the subsequent "IT_GETATTR"
- * will bypass interacting with statahead thread for checking:
- * "lld_sa_generation == lli_sai->sai_generation"
- */
- if (ldd && lli->lli_sai &&
- ldd->lld_sa_generation == lli->lli_sai->sai_generation)
- return -EAGAIN;
+ ret = ll_need_statahead(dir, *dentryp);
+ if (ret <= 0)
+ return ret;
- return do_statahead_enter(dir, dentryp, lookup);
+ return do_statahead_enter(dir, dentryp, only_unplug);
}
/* llite ioctl register support rountine */
#define cl_inode_mode(inode) ((inode)->i_mode)
#define cl_i2sbi ll_i2sbi
-static inline void cl_isize_lock(struct inode *inode, int lsmlock)
+static inline struct ll_file_data *cl_iattr2fd(struct inode *inode,
+ const struct iattr *attr)
{
- ll_inode_size_lock(inode, lsmlock);
+ LASSERT(attr->ia_valid & ATTR_FILE);
+ return LUSTRE_FPRIVATE(attr->ia_file);
}
-static inline void cl_isize_unlock(struct inode *inode, int lsmlock)
+static inline void cl_isize_lock(struct inode *inode)
{
- ll_inode_size_unlock(inode, lsmlock);
+ ll_inode_size_lock(inode);
+}
+
+static inline void cl_isize_unlock(struct inode *inode)
+{
+ ll_inode_size_unlock(inode);
}
static inline void cl_isize_write_nolock(struct inode *inode, loff_t kms)
static inline void cl_isize_write(struct inode *inode, loff_t kms)
{
- ll_inode_size_lock(inode, 0);
- i_size_write(inode, kms);
- ll_inode_size_unlock(inode, 0);
+ ll_inode_size_lock(inode);
+ i_size_write(inode, kms);
+ ll_inode_size_unlock(inode);
}
#define cl_isize_read(inode) i_size_read(inode)
struct obd_capa *cl_capa_lookup(struct inode *inode, enum cl_req_type crt);
+int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
+ enum cl_fsync_mode mode);
+
/** direct write pages */
struct ll_dio_pages {
/** page array to be written. we don't support
int ldp_nr;
};
+static inline void cl_stats_tally(struct cl_device *dev, enum cl_req_type crt,
+ int rc)
+{
+ int opc = (crt == CRT_READ) ? LPROC_LL_OSC_READ :
+ LPROC_LL_OSC_WRITE;
+
+ ll_stats_ops_tally(ll_s2sbi(cl2ccc_dev(dev)->cdv_sb), opc, rc);
+}
+
extern ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
int rw, struct inode *inode,
struct ll_dio_pages *pv);
return ((fd->fd_flags & LL_FILE_IGNORE_LOCK) ||
(ll_i2sbi(inode)->ll_flags & LL_SBI_NOLCK));
}
+
+static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode,
+ struct lookup_intent *it, __u64 *bits)
+{
+ if (!it->d.lustre.it_lock_set) {
+ CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
+ inode, inode->i_ino, inode->i_generation);
+ md_set_lock_data(exp, &it->d.lustre.it_lock_handle,
+ inode, &it->d.lustre.it_lock_bits);
+ it->d.lustre.it_lock_set = 1;
+ }
+
+ if (bits != NULL)
+ *bits = it->d.lustre.it_lock_bits;
+}
+
+static inline void ll_lock_dcache(struct inode *inode)
+{
+#ifdef HAVE_DCACHE_LOCK
+ spin_lock(&dcache_lock);
+#else
+ spin_lock(&inode->i_lock);
+#endif
+}
+
+static inline void ll_unlock_dcache(struct inode *inode)
+{
+#ifdef HAVE_DCACHE_LOCK
+ spin_unlock(&dcache_lock);
+#else
+ spin_unlock(&inode->i_lock);
+#endif
+}
+
+static inline int d_lustre_invalid(const struct dentry *dentry)
+{
+ struct ll_dentry_data *lld = ll_d2d(dentry);
+
+ return (lld == NULL) || lld->lld_invalid;
+}
+
+static inline void __d_lustre_invalidate(struct dentry *dentry)
+{
+ struct ll_dentry_data *lld = ll_d2d(dentry);
+
+ if (lld != NULL)
+ lld->lld_invalid = 1;
+}
+
+/*
+ * Mark dentry INVALID, if dentry refcount is zero (this is normally case for
+ * ll_md_blocking_ast), unhash this dentry, and let dcache to reclaim it later;
+ * else dput() of the last refcount will unhash this dentry and kill it.
+ */
+static inline void d_lustre_invalidate(struct dentry *dentry)
+{
+ CDEBUG(D_DENTRY, "invalidate dentry %.*s (%p) parent %p inode %p "
+ "refc %d\n", dentry->d_name.len, dentry->d_name.name, dentry,
+ dentry->d_parent, dentry->d_inode, d_refcount(dentry));
+
+ spin_lock(&dentry->d_lock);
+ __d_lustre_invalidate(dentry);
+ if (d_refcount(dentry) == 0)
+ __d_drop(dentry);
+ spin_unlock(&dentry->d_lock);
+}
+
+static inline void d_lustre_revalidate(struct dentry *dentry)
+{
+ spin_lock(&dentry->d_lock);
+ LASSERT(ll_d2d(dentry) != NULL);
+ ll_d2d(dentry)->lld_invalid = 0;
+ spin_unlock(&dentry->d_lock);
+}
+
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 7, 50, 0)
+/* Compatibility for old (1.8) compiled userspace quota code */
+struct if_quotactl_18 {
+ __u32 qc_cmd;
+ __u32 qc_type;
+ __u32 qc_id;
+ __u32 qc_stat;
+ struct obd_dqinfo qc_dqinfo;
+ struct obd_dqblk qc_dqblk;
+ char obd_type[16];
+ struct obd_uuid obd_uuid;
+};
+#define LL_IOC_QUOTACTL_18 _IOWR('f', 162, struct if_quotactl_18 *)
+/* End compatibility for old (1.8) compiled userspace quota code */
+#else
+#warning "remove old LL_IOC_QUOTACTL_18 compatibility code"
+#endif /* LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 7, 50, 0) */
+
+int ll_layout_conf(struct inode *inode, const struct cl_object_conf *conf);
+int ll_layout_refresh(struct inode *inode, __u32 *gen);
+
#endif /* LLITE_INTERNAL_H */