Whamcloud - gitweb
Revert "b=20581 Handle directory entry hash collisions" v2_0_0-rc1a v2_0_0_RC1
authorRobert Read <robert.read@oracle.com>
Thu, 1 Jul 2010 06:46:55 +0000 (23:46 -0700)
committerRobert Read <robert.read@oracle.com>
Thu, 1 Jul 2010 06:46:55 +0000 (23:46 -0700)
This reverts commit 04e67d056c015dbe3f8ce69092489fe9a10d771d.

This patch is no longer required (as we think we understand the problem now),
and it introduced a rare NULL pointer exception, so best to remove it.

lustre/llite/dir.c
lustre/llite/file.c
lustre/llite/llite_internal.h

index 61ca421..f145b8c 100644 (file)
@@ -394,30 +394,11 @@ fail:
         goto out_unlock;
 }
 
-#define READDIR_ROLLBACK(dp, cur_page, prev_page, cross, cur_ent, first_ent)   \
-do {                                                                           \
-        LASSERT(first_ent != NULL);                                            \
-        cur_ent = first_ent;                                                   \
-        first_ent = NULL;                                                      \
-        if (cross != 0) {                                                      \
-                LASSERT(prev_page != NULL);                                    \
-                ll_put_page(cur_page);                                         \
-                cur_page = prev_page;                                          \
-                prev_page = NULL;                                              \
-                dp = page_address(cur_page);                                   \
-        }                                                                      \
-} while(0)
-
 int ll_readdir(struct file *filp, void *cookie, filldir_t filldir)
 {
         struct inode         *inode = filp->f_dentry->d_inode;
         struct ll_inode_info *info  = ll_i2info(inode);
         __u64                 pos   = filp->f_pos;
-        int                   cross = 0;
-        struct ll_file_data  *fd    = LUSTRE_FPRIVATE(filp);
-        struct lu_dirent     *first = NULL;
-        struct page          *prev_page = NULL;
-        struct lu_dirent     *prev_ent  = NULL;
         struct page          *page;
         struct ll_dir_chain   chain;
         int rc;
@@ -430,15 +411,6 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir)
                inode->i_ino, inode->i_generation, inode,
                (unsigned long)pos, i_size_read(inode));
 
-        /* If someone changed f_pos out of readdir, ignore fd_dir */
-        if (unlikely(fd->fd_dir.lfd_valid && fd->fd_dir.lfd_hash != pos)) {
-                CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) someone changed "
-                       "pos from %llu to %llu, maybe cause misposition\n",
-                       inode->i_ino, inode->i_generation, inode,
-                       fd->fd_dir.lfd_hash, pos);
-                fd->fd_dir.lfd_valid = 0;
-        }
-
         if (pos == DIR_END_OFF)
                 /*
                  * end-of-file.
@@ -462,6 +434,7 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir)
                          * use this value.
                          */
                         __u64 hash = DIR_END_OFF;
+                        __u64 next;
 
                         dp = page_address(page);
                         for (ent = lu_dirent_start(dp); ent != NULL && !done;
@@ -475,8 +448,9 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir)
                                  * XXX: implement correct swabbing here.
                                  */
 
-restart:
-                                hash = le64_to_cpu(ent->lde_hash);
+                                hash    = le64_to_cpu(ent->lde_hash);
+                                namelen = le16_to_cpu(ent->lde_namelen);
+
                                 if (hash < pos)
                                         /*
                                          * Skip until we find target hash
@@ -484,142 +458,27 @@ restart:
                                          */
                                         continue;
 
-                                namelen = le16_to_cpu(ent->lde_namelen);
                                 if (namelen == 0)
                                         /*
                                          * Skip dummy record.
                                          */
                                         continue;
 
+                                fid  = ent->lde_fid;
                                 name = ent->lde_name;
-                                fid_le_to_cpu(&fid, &ent->lde_fid);
-
-                                if (unlikely(fd->fd_dir.lfd_valid &&
-                                             hash == pos)) {
-                                        if (fd->fd_dir.lfd_namelen != namelen ||
-                                            memcmp(fd->fd_dir.lfd_name, name,
-                                                   namelen) != 0) {
-                                                if (first == NULL)
-                                                        first = ent;
-                                                /*
-                                                 * It is not the one we want.
-                                                 * Coutinue.
-                                                 */
-                                                continue;
-                                        }
-
-                                        fd->fd_dir.lfd_valid = 0;
-                                        if (!lu_fid_eq(&fid,
-                                                       &fd->fd_dir.lfd_fid)) {
-                                                /*
-                                                 * Someone re-add it.
-                                                 */
-                                                CDEBUG(D_VFSTRACE,"VFS Op:inode"
-                                                       "=%lu/%u(%p) someone "
-                                                       "re-add at pos %llu\n",
-                                                       inode->i_ino,
-                                                       inode->i_generation,
-                                                       inode, pos);
-                                                if (first != NULL) {
-                                                        READDIR_ROLLBACK(dp,
-                                                                page, prev_page,
-                                                                cross, ent,
-                                                                first);
-                                                        goto restart;
-                                                }
-                                        }
-                                }
-
-                                if (unlikely(fd->fd_dir.lfd_valid &&
-                                             hash > pos)) {
-                                        CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u"
-                                               "(%p) miss to find pos %llu\n",
-                                               inode->i_ino,inode->i_generation,
-                                               inode, pos);
-                                        fd->fd_dir.lfd_valid = 0;
-                                        if (first != NULL) {
-                                                READDIR_ROLLBACK(dp, page,
-                                                        prev_page, cross,
-                                                        ent, first);
-                                                goto restart;
-                                        }
-                                }
-
-
+                                fid_le_to_cpu(&fid, &fid);
                                 if (cfs_curproc_is_32bit())
                                         ino = cl_fid_build_ino32(&fid);
                                 else
                                         ino = cl_fid_build_ino(&fid);
                                 type = ll_dirent_type_get(ent);
-
-                                if (prev_ent != NULL) {
-                                        if (unlikely(namelen ==
-                                            le16_to_cpu(prev_ent->lde_namelen)&&
-                                            memcmp(name, prev_ent->lde_name,
-                                                   namelen) == 0)) {
-                                                struct lu_fid prev_fid;
-
-                                                fid_le_to_cpu(&prev_fid,
-                                                        &prev_ent->lde_fid);
-                                                CERROR("Pack two entries with "
-                                                       "the same name [%.*s] "
-                                                       "under inode=%lu/%u"
-                                                       "(%p): entry1 [%p], "
-                                                       "hash1 ["LPU64"], "
-                                                       "FID1 ["DFID"], "
-                                                       "entry2 [%p], "
-                                                       "hash2 ["LPU64"], "
-                                                       "FID2 ["DFID"]. (C)\n",
-                                                       namelen, name,
-                                                       inode->i_ino,
-                                                       inode->i_generation,
-                                                       inode, prev_ent,
-                                                       le64_to_cpu(
-                                                         prev_ent->lde_hash),
-                                                       PFID(&prev_fid),
-                                                       ent, hash, PFID(&fid));
-                                                LASSERT(ent != prev_ent);
-                                        }
-                                }
-
                                 done = filldir(cookie, name, namelen,
                                                (loff_t)hash, ino, type);
-                                if (!done)
-                                        prev_ent = ent;
-                        } /* end for */
-
+                        }
+                        next = le64_to_cpu(dp->ldp_hash_end);
+                        ll_put_page(page);
                         if (!done) {
-                                if (unlikely(fd->fd_dir.lfd_valid && first)) {
-                                        if (cross == 0) {
-                                                cross = 1;
-                                        } else {
-                                                /*
-                                                 * At here, we fall into full
-                                                 * page hash collision.
-                                                 * Currently, we can not handle
-                                                 * full page hash collision yet.
-                                                 * Give up search, just pack
-                                                 * from the first.
-                                                 */
-                                                CWARN("Full page hash collision"
-                                                      " for inode=%lu/%u(%p) "
-                                                      "with value ["LPU64"]\n",
-                                                      inode->i_ino,
-                                                      inode->i_generation,
-                                                      inode, hash);
-                                                fd->fd_dir.lfd_valid = 0;
-                                                READDIR_ROLLBACK(dp, page,
-                                                        prev_page, cross,
-                                                        ent, first);
-                                                goto restart;
-                                        }
-                                }
-
-                                if (prev_page != NULL)
-                                        ll_put_page(prev_page);
-                                prev_page = page;
-
-                                pos = le64_to_cpu(dp->ldp_hash_end);
+                                pos = next;
                                 if (pos == DIR_END_OFF)
                                         /*
                                          * End of directory reached.
@@ -637,53 +496,8 @@ restart:
                                          * go into overflow page.
                                          */
                                 }
-                        } else {
+                        } else
                                 pos = hash;
-                                if (unlikely(prev_ent != NULL &&
-                                    le64_to_cpu(prev_ent->lde_hash) == hash)) {
-                                        struct lu_fid prev_fid;
-                                        struct lu_fid fid;
-                                        int namelen =
-                                                le16_to_cpu(ent->lde_namelen);
-
-                                        fid_le_to_cpu(&prev_fid,
-                                                      &prev_ent->lde_fid);
-                                        fid_le_to_cpu(&fid,
-                                                      &ent->lde_fid);
-                                        /* This should replace D_WARNING with
-                                         * D_VFSTRACE after bug 20581 fixed. */
-                                        CDEBUG(D_WARNING, "Hash collision for "
-                                               "inode=%lu/%u(%p) with the "
-                                               "same hash value ["LPU64"] to "
-                                               "be returned to user space "
-                                               "through multiple readdir: "
-                                               "entry1 [%p], name1 [%.*s], "
-                                               "FID1 ["DFID"], "
-                                               "entry2 [%p], name2 [%.*s], "
-                                               "FID2 ["DFID"]\n",
-                                               inode->i_ino,
-                                               inode->i_generation,
-                                               inode, hash, prev_ent,
-                                               le16_to_cpu(
-                                                 prev_ent->lde_namelen),
-                                               prev_ent->lde_name,
-                                               PFID(&prev_fid),
-                                               ent, namelen,
-                                               ent->lde_name,
-                                               PFID(&fid));
-                                        LASSERT(ent != prev_ent);
-                                        fd->fd_dir.lfd_fid = fid;
-                                        fd->fd_dir.lfd_hash = hash;
-                                        fd->fd_dir.lfd_namelen = namelen;
-                                        memcpy(fd->fd_dir.lfd_name,
-                                               ent->lde_name, namelen);
-                                        fd->fd_dir.lfd_valid = 1;
-                                }
-
-                                if (prev_page != NULL)
-                                        ll_put_page(prev_page);
-                                prev_page = page;
-                        }
                 } else {
                         rc = PTR_ERR(page);
                         CERROR("error reading dir "DFID" at %lu: rc %d\n",
@@ -691,9 +505,6 @@ restart:
                 }
         }
 
-        if (prev_page != NULL)
-                ll_put_page(prev_page);
-
         filp->f_pos = (loff_t)pos;
         filp->f_version = inode->i_version;
         touch_atime(filp->f_vfsmnt, filp->f_dentry);
index 90fb2e5..693dc35 100644 (file)
 
 #include "cl_object.h"
 
-static struct ll_file_data *ll_file_data_get(int is_dir)
+struct ll_file_data *ll_file_data_get(void)
 {
         struct ll_file_data *fd;
 
         OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO);
-        if (fd && is_dir) {
-                OBD_ALLOC(fd->fd_dir.lfd_name, LLITE_NAME_LEN);
-                if (unlikely(fd->fd_dir.lfd_name == NULL)) {
-                        OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
-                        fd = NULL;
-                }
-        }
         return fd;
 }
 
 static void ll_file_data_put(struct ll_file_data *fd)
 {
-        if (fd != NULL) {
-                if (fd->fd_dir.lfd_name)
-                        OBD_FREE(fd->fd_dir.lfd_name, LLITE_NAME_LEN);
+        if (fd != NULL)
                 OBD_SLAB_FREE_PTR(fd, ll_file_data_slab);
-        }
 }
 
 void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data,
@@ -324,7 +314,7 @@ int ll_file_release(struct inode *inode, struct file *file)
         LASSERT(fd != NULL);
 
         /* The last ref on @file, maybe not the the owner pid of statahead.
-         * Different processes can open the same dir, "lli_opendir_key" means:
+         * Different processes can open the same dir, "ll_opendir_key" means:
          * it is me that should stop the statahead thread. */
         if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0)
                 ll_stop_statahead(inode, lli->lli_opendir_key);
@@ -519,7 +509,7 @@ int ll_file_open(struct inode *inode, struct file *file)
         file->private_data = NULL; /* prevent ll_local_open assertion */
 #endif
 
-        fd = ll_file_data_get(S_ISDIR(inode->i_mode));
+        fd = ll_file_data_get();
         if (fd == NULL)
                 RETURN(-ENOMEM);
 
index 9774f55..b8788d2 100644 (file)
@@ -501,18 +501,7 @@ struct ll_readahead_state {
         unsigned long   ras_consecutive_stride_requests;
 };
 
-#define LLITE_NAME_LEN 255
-
 struct ll_file_dir {
-        struct lu_fid lfd_fid;          /* fid of next entry. it can not be
-                                         * used as offset clew independently
-                                         * because of hardlink */
-        __u64         lfd_hash;         /* hash (offset) of next entry */
-        __u16         lfd_valid;        /* If user buffer for readdir is full,
-                                         * and hash collision for next entry,
-                                         * then it is 1, otherwise it is 0. */
-        __u16         lfd_namelen;      /* namelen of next entry */
-        char         *lfd_name;         /* name of next entry */
 };
 
 extern cfs_mem_cache_t *ll_file_data_slab;
@@ -521,8 +510,6 @@ struct ll_file_data {
         struct ll_readahead_state fd_ras;
         int fd_omode;
         struct ccc_grouplock fd_grouplock;
-        /* We assign fd_dir only when user buffer for readdir is full, and hash
-         * collision for next entry is found. */
         struct ll_file_dir fd_dir;
         __u32 fd_flags;
         struct file *fd_file;
@@ -660,6 +647,7 @@ extern void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid,
 int ll_getattr_it(struct vfsmount *mnt, struct dentry *de,
                struct lookup_intent *it, struct kstat *stat);
 int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat);
+struct ll_file_data *ll_file_data_get(void);
 #ifndef HAVE_INODE_PERMISION_2ARGS
 int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd);
 #else