goto out_unlock;
}
+#define READDIR_ROLLBACK(dp, cur_page, prev_page, cross, cur_ent, first_ent) \
+do { \
+ LASSERT(first_ent != NULL); \
+ cur_ent = first_ent; \
+ first_ent = NULL; \
+ if (cross != 0) { \
+ LASSERT(prev_page != NULL); \
+ ll_put_page(cur_page); \
+ cur_page = prev_page; \
+ prev_page = NULL; \
+ dp = page_address(cur_page); \
+ } \
+} while(0)
+
int ll_readdir(struct file *filp, void *cookie, filldir_t filldir)
{
struct inode *inode = filp->f_dentry->d_inode;
struct ll_inode_info *info = ll_i2info(inode);
__u64 pos = filp->f_pos;
+ int cross = 0;
+ struct ll_file_data *fd = LUSTRE_FPRIVATE(filp);
+ struct lu_dirent *first = NULL;
+ struct page *prev_page = NULL;
+ struct lu_dirent *prev_ent = NULL;
struct page *page;
struct ll_dir_chain chain;
int rc;
inode->i_ino, inode->i_generation, inode,
(unsigned long)pos, i_size_read(inode));
+ /* If someone changed f_pos out of readdir, ignore fd_dir */
+ if (unlikely(fd->fd_dir.lfd_valid && fd->fd_dir.lfd_hash != pos)) {
+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) someone changed "
+ "pos from %llu to %llu, maybe cause misposition\n",
+ inode->i_ino, inode->i_generation, inode,
+ fd->fd_dir.lfd_hash, pos);
+ fd->fd_dir.lfd_valid = 0;
+ }
+
if (pos == DIR_END_OFF)
/*
* end-of-file.
* use this value.
*/
__u64 hash = DIR_END_OFF;
- __u64 next;
dp = page_address(page);
for (ent = lu_dirent_start(dp); ent != NULL && !done;
* XXX: implement correct swabbing here.
*/
- hash = le64_to_cpu(ent->lde_hash);
- namelen = le16_to_cpu(ent->lde_namelen);
-
+restart:
+ hash = le64_to_cpu(ent->lde_hash);
if (hash < pos)
/*
* Skip until we find target hash
*/
continue;
+ namelen = le16_to_cpu(ent->lde_namelen);
if (namelen == 0)
/*
* Skip dummy record.
*/
continue;
- fid = ent->lde_fid;
name = ent->lde_name;
- fid_le_to_cpu(&fid, &fid);
+ fid_le_to_cpu(&fid, &ent->lde_fid);
+
+ if (unlikely(fd->fd_dir.lfd_valid &&
+ hash == pos)) {
+ if (fd->fd_dir.lfd_namelen != namelen ||
+ memcmp(fd->fd_dir.lfd_name, name,
+ namelen) != 0) {
+ if (first == NULL)
+ first = ent;
+ /*
+ * It is not the one we want.
+ * Coutinue.
+ */
+ continue;
+ }
+
+ fd->fd_dir.lfd_valid = 0;
+ if (!lu_fid_eq(&fid,
+ &fd->fd_dir.lfd_fid)) {
+ /*
+ * Someone re-add it.
+ */
+ CDEBUG(D_VFSTRACE,"VFS Op:inode"
+ "=%lu/%u(%p) someone "
+ "re-add at pos %llu\n",
+ inode->i_ino,
+ inode->i_generation,
+ inode, pos);
+ if (first != NULL) {
+ READDIR_ROLLBACK(dp,
+ page, prev_page,
+ cross, ent,
+ first);
+ goto restart;
+ }
+ }
+ }
+
+ if (unlikely(fd->fd_dir.lfd_valid &&
+ hash > pos)) {
+ CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u"
+ "(%p) miss to find pos %llu\n",
+ inode->i_ino,inode->i_generation,
+ inode, pos);
+ fd->fd_dir.lfd_valid = 0;
+ if (first != NULL) {
+ READDIR_ROLLBACK(dp, page,
+ prev_page, cross,
+ ent, first);
+ goto restart;
+ }
+ }
+
+
if (cfs_curproc_is_32bit())
ino = cl_fid_build_ino32(&fid);
else
ino = cl_fid_build_ino(&fid);
type = ll_dirent_type_get(ent);
+
+ if (prev_ent != NULL) {
+ if (unlikely(namelen ==
+ le16_to_cpu(prev_ent->lde_namelen)&&
+ memcmp(name, prev_ent->lde_name,
+ namelen) == 0)) {
+ struct lu_fid prev_fid;
+
+ fid_le_to_cpu(&prev_fid,
+ &prev_ent->lde_fid);
+ CERROR("Pack two entries with "
+ "the same name [%.*s] "
+ "under inode=%lu/%u"
+ "(%p): entry1 [%p], "
+ "hash1 ["LPU64"], "
+ "FID1 ["DFID"], "
+ "entry2 [%p], "
+ "hash2 ["LPU64"], "
+ "FID2 ["DFID"]. (C)\n",
+ namelen, name,
+ inode->i_ino,
+ inode->i_generation,
+ inode, prev_ent,
+ le64_to_cpu(
+ prev_ent->lde_hash),
+ PFID(&prev_fid),
+ ent, hash, PFID(&fid));
+ LASSERT(ent != prev_ent);
+ }
+ }
+
done = filldir(cookie, name, namelen,
(loff_t)hash, ino, type);
- }
- next = le64_to_cpu(dp->ldp_hash_end);
- ll_put_page(page);
+ if (!done)
+ prev_ent = ent;
+ } /* end for */
+
if (!done) {
- pos = next;
+ if (unlikely(fd->fd_dir.lfd_valid && first)) {
+ if (cross == 0) {
+ cross = 1;
+ } else {
+ /*
+ * At here, we fall into full
+ * page hash collision.
+ * Currently, we can not handle
+ * full page hash collision yet.
+ * Give up search, just pack
+ * from the first.
+ */
+ CWARN("Full page hash collision"
+ " for inode=%lu/%u(%p) "
+ "with value ["LPU64"]\n",
+ inode->i_ino,
+ inode->i_generation,
+ inode, hash);
+ fd->fd_dir.lfd_valid = 0;
+ READDIR_ROLLBACK(dp, page,
+ prev_page, cross,
+ ent, first);
+ goto restart;
+ }
+ }
+
+ if (prev_page != NULL)
+ ll_put_page(prev_page);
+ prev_page = page;
+
+ pos = le64_to_cpu(dp->ldp_hash_end);
if (pos == DIR_END_OFF)
/*
* End of directory reached.
* go into overflow page.
*/
}
- } else
+ } else {
pos = hash;
+ if (unlikely(prev_ent != NULL &&
+ le64_to_cpu(prev_ent->lde_hash) == hash)) {
+ struct lu_fid prev_fid;
+ struct lu_fid fid;
+ int namelen =
+ le16_to_cpu(ent->lde_namelen);
+
+ fid_le_to_cpu(&prev_fid,
+ &prev_ent->lde_fid);
+ fid_le_to_cpu(&fid,
+ &ent->lde_fid);
+ /* This should replace D_WARNING with
+ * D_VFSTRACE after bug 20581 fixed. */
+ CDEBUG(D_WARNING, "Hash collision for "
+ "inode=%lu/%u(%p) with the "
+ "same hash value ["LPU64"] to "
+ "be returned to user space "
+ "through multiple readdir: "
+ "entry1 [%p], name1 [%.*s], "
+ "FID1 ["DFID"], "
+ "entry2 [%p], name2 [%.*s], "
+ "FID2 ["DFID"]\n",
+ inode->i_ino,
+ inode->i_generation,
+ inode, hash, prev_ent,
+ le16_to_cpu(
+ prev_ent->lde_namelen),
+ prev_ent->lde_name,
+ PFID(&prev_fid),
+ ent, namelen,
+ ent->lde_name,
+ PFID(&fid));
+ LASSERT(ent != prev_ent);
+ fd->fd_dir.lfd_fid = fid;
+ fd->fd_dir.lfd_hash = hash;
+ fd->fd_dir.lfd_namelen = namelen;
+ memcpy(fd->fd_dir.lfd_name,
+ ent->lde_name, namelen);
+ fd->fd_dir.lfd_valid = 1;
+ }
+
+ if (prev_page != NULL)
+ ll_put_page(prev_page);
+ prev_page = page;
+ }
} else {
rc = PTR_ERR(page);
CERROR("error reading dir "DFID" at %lu: rc %d\n",
}
}
+ if (prev_page != NULL)
+ ll_put_page(prev_page);
+
filp->f_pos = (loff_t)pos;
filp->f_version = inode->i_version;
touch_atime(filp->f_vfsmnt, filp->f_dentry);
#include "cl_object.h"
-struct ll_file_data *ll_file_data_get(void)
+static struct ll_file_data *ll_file_data_get(int is_dir)
{
struct ll_file_data *fd;
OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
+ if (fd && is_dir) {
+ OBD_ALLOC(fd->fd_dir.lfd_name, LLITE_NAME_LEN);
+ if (unlikely(fd->fd_dir.lfd_name == NULL)) {
+ OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
+ fd = NULL;
+ }
+ }
return fd;
}
static void ll_file_data_put(struct ll_file_data *fd)
{
- if (fd != NULL)
+ if (fd != NULL) {
+ if (fd->fd_dir.lfd_name)
+ OBD_FREE(fd->fd_dir.lfd_name, LLITE_NAME_LEN);
OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
+ }
}
void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
LASSERT(fd != NULL);
/* The last ref on @file, maybe not the the owner pid of statahead.
- * Different processes can open the same dir, "ll_opendir_key" means:
+ * Different processes can open the same dir, "lli_opendir_key" means:
* it is me that should stop the statahead thread. */
if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
ll_stop_statahead(inode, lli->lli_opendir_key);
file->private_data = NULL; /* prevent ll_local_open assertion */
#endif
- fd = ll_file_data_get();
+ fd = ll_file_data_get(S_ISDIR(inode->i_mode));
if (fd == NULL)
RETURN(-ENOMEM);
unsigned long ras_consecutive_stride_requests;
};
+#define LLITE_NAME_LEN 255
+
struct ll_file_dir {
+ struct lu_fid lfd_fid; /* fid of next entry. it can not be
+ * used as offset clew independently
+ * because of hardlink */
+ __u64 lfd_hash; /* hash (offset) of next entry */
+ __u16 lfd_valid; /* If user buffer for readdir is full,
+ * and hash collision for next entry,
+ * then it is 1, otherwise it is 0. */
+ __u16 lfd_namelen; /* namelen of next entry */
+ char *lfd_name; /* name of next entry */
};
extern cfs_mem_cache_t *ll_file_data_slab;
struct ll_readahead_state fd_ras;
int fd_omode;
struct ccc_grouplock fd_grouplock;
+ /* We assign fd_dir only when user buffer for readdir is full, and hash
+ * collision for next entry is found. */
struct ll_file_dir fd_dir;
__u32 fd_flags;
struct file *fd_file;
int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
struct lookup_intent *it, struct kstat *stat);
int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat);
-struct ll_file_data *ll_file_data_get(void);
#ifndef HAVE_INODE_PERMISION_2ARGS
int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd);
#else