* GPL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
*/
/*
+ * Copyright (c) 2011 Whamcloud, Inc.
+ */
+/*
* This file is part of Lustre, http://www.lustre.org/
* Lustre is a trademark of Sun Microsystems, Inc.
*/
/* for struct cl_lock_descr and struct cl_io */
#include <cl_object.h>
#include <lclient.h>
+#include <lustre_mdc.h>
+#include <linux/lustre_intent.h>
#ifndef FMODE_EXEC
#define FMODE_EXEC 0
#endif
+#ifndef VM_FAULT_RETRY
+#define VM_FAULT_RETRY 0
+#endif
+
+/** Only used on client-side for indicating the tail of dir hash/offset. */
+#define LL_DIR_END_OFF 0x7fffffffffffffffULL
+#define LL_DIR_END_OFF_32BIT 0x7fffffffUL
+
#ifndef DCACHE_LUSTRE_INVALID
-#define DCACHE_LUSTRE_INVALID 0x100
+#define DCACHE_LUSTRE_INVALID 0x4000000
#endif
#define LL_IT2STR(it) ((it) ? ldlm_it2str((it)->it_op) : "0")
#define LUSTRE_FPRIVATE(file) ((file)->private_data)
-#ifdef HAVE_VFS_INTENT_PATCHES
-static inline struct lookup_intent *ll_nd2it(struct nameidata *nd)
-{
- return &nd->intent;
-}
-#endif
-
struct ll_dentry_data {
int lld_cwd_count;
int lld_mnt_count;
struct obd_client_handle lld_cwd_och;
struct obd_client_handle lld_mnt_och;
-#ifndef HAVE_VFS_INTENT_PATCHES
struct lookup_intent *lld_it;
-#endif
unsigned int lld_sa_generation;
};
/* remote client permission cache */
#define REMOTE_PERM_HASHSIZE 16
+struct ll_getname_data {
+ char *lgd_name; /* points to a buffer with NAME_MAX+1 size */
+ struct lu_fid lgd_fid; /* target fid we are looking for */
+ int lgd_found; /* inode matched? */
+};
+
/* llite setxid/access permission for user on remote client */
struct ll_remote_perm {
cfs_hlist_node_t lrp_list;
cfs_semaphore_t lli_size_sem; /* protect open and change size */
void *lli_size_sem_owner;
cfs_semaphore_t lli_write_sem;
- cfs_semaphore_t lli_trunc_sem;
+ cfs_rw_semaphore_t lli_trunc_sem;
char *lli_symlink_name;
__u64 lli_maxbytes;
__u64 lli_ioepoch;
unsigned long lli_flags;
- cfs_time_t lli_contention_time;
/* this lock protects posix_acl, pending_write_llaps, mmap_cnt */
cfs_spinlock_t lli_lock;
/* for writepage() only to communicate to fsync */
int lli_async_rc;
+ int lli_write_rc;
struct posix_acl *lli_posix_acl;
/* identifying fields for both metadata and data stacks. */
struct lu_fid lli_fid;
+ /* Parent fid for accessing default stripe data on parent directory
+ * for allocating OST objects after a mknod() and later open-by-FID. */
+ struct lu_fid lli_pfid;
struct lov_stripe_md *lli_smd;
/* fid capability */
struct obd_capa *lli_mds_capa;
cfs_list_t lli_oss_capas;
- /* metadata stat-ahead */
+ /* metadata statahead */
+ /* protect statahead stuff: lli_opendir_pid, lli_opendir_key, lli_sai,
+ * and so on. */
+ cfs_spinlock_t lli_sa_lock;
/*
* "opendir_pid" is the token when lookup/revalid -- I am the owner of
* dir statahead.
* before child -- it is me should cleanup the dir readahead. */
void *lli_opendir_key;
struct ll_statahead_info *lli_sai;
+ __u64 lli_sa_pos;
struct cl_object *lli_clob;
/* the most recent timestamps obtained from mds */
struct ost_lvb lli_lvb;
+ /**
+ * serialize normal readdir and statahead-readdir
+ */
+ cfs_semaphore_t lli_readdir_sem;
};
/*
#define LL_SBI_LOCALFLOCK 0x200 /* Local flocks support by kernel */
#define LL_SBI_LRU_RESIZE 0x400 /* lru resize support */
#define LL_SBI_LAZYSTATFS 0x800 /* lazystatfs mount option */
+#define LL_SBI_SOM_PREVIEW 0x1000 /* SOM preview mount option */
+#define LL_SBI_32BIT_API 0x2000 /* generate 32 bit inodes. */
+#define LL_SBI_64BIT_HASH 0x4000 /* support 64-bits dir hash/offset */
/* default value for ll_sb_info->contention_time */
#define SBI_DEFAULT_CONTENTION_SECONDS 60
/* =0 - hold lock over whole read/write
* >0 - max. chunk to be read/written w/o lock re-acquiring */
unsigned long ll_max_rw_chunk;
+ unsigned int ll_md_brw_size; /* used by readdir */
struct lu_site *ll_site;
struct cl_device *ll_cl;
* clustred nfs */
struct rmtacl_ctl_table ll_rct;
struct eacl_table ll_et;
+ struct vfsmount *ll_mnt;
};
#define LL_DEFAULT_MAX_RW_CHUNK (32 * 1024 * 1024)
};
struct ll_file_dir {
+ __u64 lfd_pos;
+ __u64 lfd_next;
};
extern cfs_mem_cache_t *ll_file_data_slab;
}
struct it_cb_data {
- struct inode *icbd_parent;
+ struct inode *icbd_parent;
struct dentry **icbd_childp;
- obd_id hash;
+ obd_id hash;
};
__u32 ll_i2suppgid(struct inode *i);
void ll_i2gids(__u32 *suppgids, struct inode *i1,struct inode *i2);
+static inline int ll_need_32bit_api(struct ll_sb_info *sbi)
+{
+#if BITS_PER_LONG == 32
+ return 1;
+#else
+ return unlikely(cfs_curproc_is_32bit() || (sbi->ll_flags & LL_SBI_32BIT_API));
+#endif
+}
+
#define LLAP_MAGIC 98764321
extern cfs_mem_cache_t *ll_async_page_slab;
/* llite/dir.c */
-static inline void ll_put_page(struct page *page)
-{
- kunmap(page);
- page_cache_release(page);
-}
-
+void ll_release_page(struct page *page, int remove);
extern struct file_operations ll_dir_operations;
extern struct inode_operations ll_dir_inode_operations;
-struct page *ll_get_dir_page(struct inode *dir, __u64 hash, int exact,
+struct page *ll_get_dir_page(struct file *filp, struct inode *dir, __u64 hash,
struct ll_dir_chain *chain);
+int ll_readdir(struct file *filp, void *cookie, filldir_t filldir);
int ll_get_mdt_idx(struct inode *inode);
/* llite/namei.c */
struct lustre_md *lic);
int ll_md_blocking_ast(struct ldlm_lock *, struct ldlm_lock_desc *,
void *data, int flag);
-#ifndef HAVE_VFS_INTENT_PATCHES
struct lookup_intent *ll_convert_intent(struct open_intent *oit,
int lookup_flags);
-#endif
-void ll_lookup_it_alias(struct dentry **de, struct inode *inode, __u32 bits);
int ll_lookup_it_finish(struct ptlrpc_request *request,
- struct lookup_intent *it, void *data,
- struct inode **alias);
+ struct lookup_intent *it, void *data);
+struct dentry *ll_find_alias(struct inode *inode, struct dentry *de);
/* llite/rw.c */
int ll_prepare_write(struct file *, struct page *, unsigned from, unsigned to);
ssize_t ll_file_lockless_io(struct file *, char *, size_t, loff_t *, int);
void ll_clear_file_contended(struct inode*);
int ll_sync_page_range(struct inode *, struct address_space *, loff_t, size_t);
-int ll_readahead(const struct lu_env *env, struct cl_io *io, struct ll_readahead_state *ras,
- struct address_space *mapping, struct cl_page_list *queue, int flags);
+int ll_readahead(const struct lu_env *env, struct cl_io *io,
+ struct ll_readahead_state *ras, struct address_space *mapping,
+ struct cl_page_list *queue, int flags);
/* llite/file.c */
extern struct file_operations ll_file_operations;
extern struct file_operations ll_file_operations_flock;
extern struct file_operations ll_file_operations_noflock;
extern struct inode_operations ll_file_inode_operations;
-extern int ll_inode_revalidate_it(struct dentry *, struct lookup_intent *);
-extern int ll_have_md_lock(struct inode *inode, __u64 bits);
+extern int ll_inode_revalidate_it(struct dentry *, struct lookup_intent *,
+ __u64);
+extern int ll_have_md_lock(struct inode *inode, __u64 *bits,
+ ldlm_mode_t l_req_mode);
extern ldlm_mode_t ll_take_md_lock(struct inode *inode, __u64 bits,
struct lustre_handle *lockh);
-int __ll_inode_revalidate_it(struct dentry *, struct lookup_intent *, __u64 bits);
+int __ll_inode_revalidate_it(struct dentry *, struct lookup_intent *,
+ __u64 bits);
int ll_revalidate_nd(struct dentry *dentry, struct nameidata *nd);
int ll_file_open(struct inode *inode, struct file *file);
int ll_file_release(struct inode *inode, struct file *file);
int ll_fid2path(struct obd_export *exp, void *arg);
/* llite/dcache.c */
-/* llite/namei.c */
-/**
- * protect race ll_find_aliases vs ll_revalidate_it vs ll_unhash_aliases
- */
+int ll_dops_init(struct dentry *de, int block, int init_sa);
extern cfs_spinlock_t ll_lookup_lock;
extern struct dentry_operations ll_d_ops;
void ll_intent_drop_lock(struct lookup_intent *);
void ll_intent_release(struct lookup_intent *);
int ll_drop_dentry(struct dentry *dentry);
-extern void ll_set_dd(struct dentry *de);
-int ll_drop_dentry(struct dentry *dentry);
void ll_unhash_aliases(struct inode *);
void ll_frob_intent(struct lookup_intent **itp, struct lookup_intent *deft);
void ll_lookup_finish_locks(struct lookup_intent *it, struct dentry *dentry);
char *ll_read_opt(const char *opt, char *data);
void ll_lli_init(struct ll_inode_info *lli);
-int ll_fill_super(struct super_block *sb);
+int ll_fill_super(struct super_block *sb, struct vfsmount *mnt);
void ll_put_super(struct super_block *sb);
void ll_kill_super(struct super_block *sb);
-int ll_shrink_cache(int nr_to_scan, gfp_t gfp_mask);
struct inode *ll_inode_from_lock(struct ldlm_lock *lock);
void ll_clear_inode(struct inode *inode);
int ll_setattr_raw(struct inode *inode, struct iattr *attr);
#define ll_s2sbi(sb) (s2lsi(sb)->lsi_llsbi)
-static inline __u64 ll_ts2u64(struct timespec *time)
-{
- __u64 t = time->tv_sec;
- return t;
-}
-
/* don't need an addref as the sb_info should be holding one */
static inline struct obd_export *ll_s2dtexp(struct super_block *sb)
{
/* statahead.c */
-#define LL_SA_RPC_MIN 2
-#define LL_SA_RPC_DEF 32
-#define LL_SA_RPC_MAX 8192
+#define LL_SA_RPC_MIN 2
+#define LL_SA_RPC_DEF 32
+#define LL_SA_RPC_MAX 8192
+
+#define LL_SA_CACHE_BIT 5
+#define LL_SA_CACHE_SIZE (1 << LL_SA_CACHE_BIT)
+#define LL_SA_CACHE_MASK (LL_SA_CACHE_SIZE - 1)
/* per inode struct, for dir only */
struct ll_statahead_info {
struct inode *sai_inode;
- unsigned int sai_generation; /* generation for statahead */
cfs_atomic_t sai_refcount; /* when access this struct, hold
* refcount */
- unsigned int sai_sent; /* stat requests sent count */
- unsigned int sai_replied; /* stat requests which received
- * reply */
+ unsigned int sai_generation; /* generation for statahead */
unsigned int sai_max; /* max ahead of lookup */
- unsigned int sai_index; /* index of statahead entry */
- unsigned int sai_index_next; /* index for the next statahead
- * entry to be stated */
- unsigned int sai_hit; /* hit count */
- unsigned int sai_miss; /* miss count:
+ __u64 sai_sent; /* stat requests sent count */
+ __u64 sai_replied; /* stat requests which received
+ * reply */
+ __u64 sai_index; /* index of statahead entry */
+ __u64 sai_index_wait; /* index of entry which is the
+ * caller is waiting for */
+ __u64 sai_hit; /* hit count */
+ __u64 sai_miss; /* miss count:
* for "ls -al" case, it includes
* hidden dentry miss;
* for "ls -l" case, it does not
unsigned int sai_miss_hidden;/* "ls -al", but first dentry
* is not a hidden one */
unsigned int sai_skip_hidden;/* skipped hidden dentry count */
- unsigned int sai_ls_all:1; /* "ls -al", do stat-ahead for
+ unsigned int sai_ls_all:1, /* "ls -al", do stat-ahead for
* hidden entries */
+ sai_in_readpage:1;/* statahead is in readdir()*/
cfs_waitq_t sai_waitq; /* stat-ahead wait queue */
struct ptlrpc_thread sai_thread; /* stat-ahead thread */
cfs_list_t sai_entries_sent; /* entries sent out */
cfs_list_t sai_entries_received; /* entries returned */
cfs_list_t sai_entries_stated; /* entries stated */
+ cfs_list_t sai_cache[LL_SA_CACHE_SIZE];
+ cfs_spinlock_t sai_cache_lock[LL_SA_CACHE_SIZE];
+ cfs_atomic_t sai_cache_count; /* entry count in cache */
};
-int do_statahead_enter(struct inode *dir, struct dentry **dentry, int lookup);
-void ll_statahead_exit(struct inode *dir, struct dentry *dentry, int result);
-void ll_stop_statahead(struct inode *inode, void *key);
+int do_statahead_enter(struct inode *dir, struct dentry **dentry,
+ int only_unplug);
+void ll_stop_statahead(struct inode *dir, void *key);
-static inline
-void ll_statahead_mark(struct inode *dir, struct dentry *dentry)
+static inline void
+ll_statahead_mark(struct inode *dir, struct dentry *dentry)
{
- struct ll_inode_info *lli;
- struct ll_dentry_data *ldd = ll_d2d(dentry);
-
- /* dentry has been move to other directory, no need mark */
- if (unlikely(dir != dentry->d_parent->d_inode))
- return;
+ struct ll_inode_info *lli = ll_i2info(dir);
+ struct ll_statahead_info *sai = lli->lli_sai;
+ struct ll_dentry_data *ldd = ll_d2d(dentry);
- lli = ll_i2info(dir);
/* not the same process, don't mark */
if (lli->lli_opendir_pid != cfs_curproc_pid())
return;
- cfs_spin_lock(&lli->lli_lock);
- if (likely(lli->lli_sai != NULL && ldd != NULL))
- ldd->lld_sa_generation = lli->lli_sai->sai_generation;
- cfs_spin_unlock(&lli->lli_lock);
+ if (sai != NULL && ldd != NULL)
+ ldd->lld_sa_generation = sai->sai_generation;
}
-static inline
-int ll_statahead_enter(struct inode *dir, struct dentry **dentryp, int lookup)
+static inline int
+ll_statahead_enter(struct inode *dir, struct dentry **dentryp, int only_unplug)
{
struct ll_inode_info *lli;
- struct ll_dentry_data *ldd = ll_d2d(*dentryp);
-
- if (unlikely(dir == NULL))
- return -EAGAIN;
+ struct ll_dentry_data *ldd;
if (ll_i2sbi(dir)->ll_sa_max == 0)
return -ENOTSUPP;
if (lli->lli_opendir_pid != cfs_curproc_pid())
return -EAGAIN;
+ ldd = ll_d2d(*dentryp);
/*
- * When "ls" a dentry, the system trigger more than once "revalidate" or
- * "lookup", for "getattr", for "getxattr", and maybe for others.
+ * When stats a dentry, the system trigger more than once "revalidate"
+ * or "lookup", for "getattr", for "getxattr", and maybe for others.
* Under patchless client mode, the operation intent is not accurate,
- * it maybe misguide the statahead thread. For example:
+ * which maybe misguide the statahead thread. For example:
* The "revalidate" call for "getattr" and "getxattr" of a dentry maybe
* have the same operation intent -- "IT_GETATTR".
* In fact, one dentry should has only one chance to interact with the
ldd->lld_sa_generation == lli->lli_sai->sai_generation)
return -EAGAIN;
- return do_statahead_enter(dir, dentryp, lookup);
-}
-
-static void inline ll_dops_init(struct dentry *de, int block)
-{
- struct ll_dentry_data *lld = ll_d2d(de);
-
- if (lld == NULL && block != 0) {
- ll_set_dd(de);
- lld = ll_d2d(de);
- }
-
- if (lld != NULL)
- lld->lld_sa_generation = 0;
-
- de->d_op = &ll_d_ops;
+ return do_statahead_enter(dir, dentryp, only_unplug);
}
/* llite ioctl register support rountine */
int ldp_nr;
};
+static inline void cl_stats_tally(struct cl_device *dev, enum cl_req_type crt,
+ int rc)
+{
+ int opc = (crt == CIT_READ) ? LPROC_LL_OSC_READ :
+ LPROC_LL_OSC_WRITE;
+
+ ll_stats_ops_tally(ll_s2sbi(cl2ccc_dev(dev)->cdv_sb), opc, rc);
+}
+
extern ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io,
int rw, struct inode *inode,
struct ll_dio_pages *pv);
return ((fd->fd_flags & LL_FILE_IGNORE_LOCK) ||
(ll_i2sbi(inode)->ll_flags & LL_SBI_NOLCK));
}
+
+static inline void ll_set_lock_data(struct obd_export *exp, struct inode *inode,
+ struct lookup_intent *it, __u64 *bits)
+{
+ if (!it->d.lustre.it_lock_set) {
+ CDEBUG(D_DLMTRACE, "setting l_data to inode %p (%lu/%u)\n",
+ inode, inode->i_ino, inode->i_generation);
+ md_set_lock_data(exp, &it->d.lustre.it_lock_handle,
+ inode, &it->d.lustre.it_lock_bits);
+ it->d.lustre.it_lock_set = 1;
+ }
+
+ if (bits != NULL)
+ *bits = it->d.lustre.it_lock_bits;
+}
+
+static inline void ll_dentry_rehash(struct dentry *dentry, int locked)
+{
+ if (!locked) {
+ cfs_spin_lock(&ll_lookup_lock);
+ spin_lock(&dcache_lock);
+ }
+ if (d_unhashed(dentry))
+ d_rehash_cond(dentry, 0);
+ if (!locked) {
+ spin_unlock(&dcache_lock);
+ cfs_spin_unlock(&ll_lookup_lock);
+ }
+}
+
+static inline void ll_dentry_reset_flags(struct dentry *dentry, __u64 bits)
+{
+ if (bits & MDS_INODELOCK_LOOKUP &&
+ dentry->d_flags & DCACHE_LUSTRE_INVALID) {
+ lock_dentry(dentry);
+ dentry->d_flags &= ~DCACHE_LUSTRE_INVALID;
+ unlock_dentry(dentry);
+ }
+}
+
+#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2,7,50,0)
+/* Compatibility for old (1.8) compiled userspace quota code */
+struct if_quotactl_18 {
+ __u32 qc_cmd;
+ __u32 qc_type;
+ __u32 qc_id;
+ __u32 qc_stat;
+ struct obd_dqinfo qc_dqinfo;
+ struct obd_dqblk qc_dqblk;
+ char obd_type[16];
+ struct obd_uuid obd_uuid;
+};
+#define LL_IOC_QUOTACTL_18 _IOWR('f', 162, struct if_quotactl_18 *)
+/* End compatibility for old (1.8) compiled userspace quota code */
+#else
+#warning "remove old LL_IOC_QUOTACTL_18 compatibility code"
+#endif /* LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2,7,50,0) */
+
#endif /* LLITE_INTERNAL_H */