Whamcloud - gitweb
b=20581 MDS returns full hash for readdir to decrease hash collision
[fs/lustre-release.git] / lustre / llite / dir.c
index bb93357..6f26358 100644 (file)
@@ -155,7 +155,21 @@ static int ll_dir_readpage(struct file *file, struct page *page)
         int rc;
         ENTRY;
 
-        hash = (__u64)hash_x_index(page->index);
+        if (file) {
+                struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+
+                hash = fd->fd_dir.lfd_next;
+        } else {
+                struct ll_inode_info *lli = ll_i2info(inode);
+
+                cfs_spin_lock(&lli->lli_sa_lock);
+                if (lli->lli_sai)
+                        LASSERT(lli->lli_sai->sai_pid == cfs_curproc_pid());
+                else
+                        LASSERT(lli->lli_opendir_pid == cfs_curproc_pid());
+                hash = lli->lli_sa_pos;
+                cfs_spin_unlock(&lli->lli_sa_lock);
+        }
         CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) off %lu\n",
                inode->i_ino, inode->i_generation, inode, (unsigned long)hash);
 
@@ -209,7 +223,7 @@ static void ll_release_page(struct page *page, __u64 hash,
 /*
  * Find, kmap and return page that contains given hash.
  */
-static struct page *ll_dir_page_locate(struct inode *dir, __u64 hash,
+static struct page *ll_dir_page_locate(struct inode *dir, __u64 *hash,
                                        __u64 *start, __u64 *end)
 {
         struct address_space *mapping = dir->i_mapping;
@@ -218,7 +232,7 @@ static struct page *ll_dir_page_locate(struct inode *dir, __u64 hash,
          * radix_tree_gang_lookup() can be used to find a page with starting
          * hash _smaller_ than one we are looking for.
          */
-        unsigned long offset = hash_x_index((unsigned long)hash);
+        unsigned long offset = hash_x_index(*hash);
         struct page *page;
         int found;
 
@@ -241,11 +255,18 @@ static struct page *ll_dir_page_locate(struct inode *dir, __u64 hash,
                 wait_on_page(page);
                 if (PageUptodate(page)) {
                         dp = kmap(page);
+#if BITS_PER_LONG == 32
+                        *start = le64_to_cpu(dp->ldp_hash_start) >> 32;
+                        *end   = le64_to_cpu(dp->ldp_hash_end) >> 32;
+                        *hash  = *hash >> 32;
+#else
                         *start = le64_to_cpu(dp->ldp_hash_start);
                         *end   = le64_to_cpu(dp->ldp_hash_end);
-                        LASSERT(*start <= hash);
-                        if (hash > *end || (*end != *start && hash == *end)) {
-                                ll_release_page(page, hash, *start, *end);
+#endif
+                        LASSERTF(*start <= *hash, "start = "LPX64",end = "
+                                 LPX64",hash = "LPX64"\n", *start, *end, *hash);
+                        if (*hash > *end || (*end != *start && *hash == *end)) {
+                                ll_release_page(page, *hash, *start, *end);
                                 page = NULL;
                         }
                 } else {
@@ -260,8 +281,8 @@ static struct page *ll_dir_page_locate(struct inode *dir, __u64 hash,
         return page;
 }
 
-struct page *ll_get_dir_page(struct inode *dir, __u64 hash, int exact,
-                             struct ll_dir_chain *chain)
+struct page *ll_get_dir_page(struct file *filp, struct inode *dir, __u64 hash,
+                             int exact, struct ll_dir_chain *chain)
 {
         ldlm_policy_data_t policy = {.l_inodebits = {MDS_INODELOCK_UPDATE} };
         struct address_space *mapping = dir->i_mapping;
@@ -272,6 +293,8 @@ struct page *ll_get_dir_page(struct inode *dir, __u64 hash, int exact,
         int rc;
         __u64 start = 0;
         __u64 end = 0;
+        __u64 lhash = hash;
+        struct ll_inode_info *lli = ll_i2info(dir);
 
         mode = LCK_PR;
         rc = md_lock_match(ll_i2sbi(dir)->ll_md_exp, LDLM_FL_BLOCK_GRANTED,
@@ -310,10 +333,11 @@ struct page *ll_get_dir_page(struct inode *dir, __u64 hash, int exact,
         }
         ldlm_lock_dump_handle(D_OTHER, &lockh);
 
-        page = ll_dir_page_locate(dir, hash, &start, &end);
+        cfs_down(&lli->lli_readdir_sem);
+        page = ll_dir_page_locate(dir, &lhash, &start, &end);
         if (IS_ERR(page)) {
                 CERROR("dir page locate: "DFID" at "LPU64": rc %ld\n",
-                       PFID(ll_inode2fid(dir)), hash, PTR_ERR(page));
+                       PFID(ll_inode2fid(dir)), lhash, PTR_ERR(page));
                 GOTO(out_unlock, page);
         }
 
@@ -332,23 +356,24 @@ struct page *ll_get_dir_page(struct inode *dir, __u64 hash, int exact,
                  * it as an "overflow" page. 1. invalidate all pages at
                  * once. 2. use HASH|1 as an index for P1.
                  */
-                if (exact && hash != start) {
+                if (exact && lhash != start) {
                         /*
                          * readdir asked for a page starting _exactly_ from
                          * given hash, but cache contains stale page, with
                          * entries with smaller hash values. Stale page should
                          * be invalidated, and new one fetched.
                          */
-                        CDEBUG(D_OTHER, "Stale readpage page %p: "LPX64" != "LPX64"\n",
-                               page, hash, start);
-                        ll_release_page(page, hash, start, end);
+                        CDEBUG(D_OTHER, "Stale readpage page %p: "
+                               "start = "LPX64",end = "LPX64"hash ="LPX64"\n",
+                               page, start, end, lhash);
+                        ll_release_page(page, lhash, start, end);
                 } else {
                         GOTO(hash_collision, page);
                 }
         }
 
-        page = read_cache_page(mapping, hash_x_index((unsigned long)hash),
-                               (filler_t*)mapping->a_ops->readpage, NULL);
+        page = read_cache_page(mapping, hash_x_index(hash),
+                               (filler_t*)mapping->a_ops->readpage, filp);
         if (IS_ERR(page)) {
                 CERROR("read cache page: "DFID" at "LPU64": rc %ld\n",
                        PFID(ll_inode2fid(dir)), hash, PTR_ERR(page));
@@ -371,12 +396,23 @@ struct page *ll_get_dir_page(struct inode *dir, __u64 hash, int exact,
         }
 hash_collision:
         dp = page_address(page);
-
+#if BITS_PER_LONG == 32
+        start = le64_to_cpu(dp->ldp_hash_start) >> 32;
+        end   = le64_to_cpu(dp->ldp_hash_end) >> 32;
+        lhash = hash >> 32;
+#else
         start = le64_to_cpu(dp->ldp_hash_start);
         end   = le64_to_cpu(dp->ldp_hash_end);
+        lhash = hash;
+#endif
         if (end == start) {
-                LASSERT(start == hash);
-                CWARN("Page-wide hash collision: %#lx\n", (unsigned long)end);
+                LASSERT(start == lhash);
+                CWARN("Page-wide hash collision: "LPU64"\n", end);
+#if BITS_PER_LONG == 32
+                CWARN("Real page-wide hash collision at ["LPU64" "LPU64"] with "
+                      "hash "LPU64"\n", le64_to_cpu(dp->ldp_hash_start),
+                      le64_to_cpu(dp->ldp_hash_end), hash);
+#endif
                 /*
                  * Fetch whole overflow chain...
                  *
@@ -385,6 +421,7 @@ hash_collision:
                 goto fail;
         }
 out_unlock:
+        cfs_up(&lli->lli_readdir_sem);
         ldlm_lock_decref(&lockh, mode);
         return page;
 
@@ -398,8 +435,9 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir)
 {
         struct inode         *inode = filp->f_dentry->d_inode;
         struct ll_inode_info *info  = ll_i2info(inode);
-        __u64                 pos   = filp->f_pos;
-        struct ll_sb_info    *sbi  = ll_i2sbi(inode);
+        struct ll_sb_info    *sbi   = ll_i2sbi(inode);
+        struct ll_file_data  *fd    = LUSTRE_FPRIVATE(filp);
+        __u64                 pos   = fd->fd_dir.lfd_pos;
         struct page          *page;
         struct ll_dir_chain   chain;
         int rc, need_32bit;
@@ -424,7 +462,8 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir)
         shift = 0;
         ll_dir_chain_init(&chain);
 
-        page = ll_get_dir_page(inode, pos, 0, &chain);
+        fd->fd_dir.lfd_next = pos;
+        page = ll_get_dir_page(filp, inode, pos, 0, &chain);
 
         while (rc == 0 && !done) {
                 struct lu_dirpage *dp;
@@ -445,14 +484,13 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir)
                                 int            namelen;
                                 struct lu_fid  fid;
                                 __u64          ino;
+                                __u64          lhash;
 
                                 /*
                                  * XXX: implement correct swabbing here.
                                  */
 
-                                hash    = le64_to_cpu(ent->lde_hash);
-                                namelen = le16_to_cpu(ent->lde_namelen);
-
+                                hash = le64_to_cpu(ent->lde_hash);
                                 if (hash < pos)
                                         /*
                                          * Skip until we find target hash
@@ -460,46 +498,51 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir)
                                          */
                                         continue;
 
+                                namelen = le16_to_cpu(ent->lde_namelen);
                                 if (namelen == 0)
                                         /*
                                          * Skip dummy record.
                                          */
                                         continue;
 
-                                fid  = ent->lde_fid;
                                 name = ent->lde_name;
-                                fid_le_to_cpu(&fid, &fid);
-                                if (need_32bit)
+                                fid_le_to_cpu(&fid, &ent->lde_fid);
+                                if (need_32bit) {
+                                        lhash = hash >> 32;
                                         ino = cl_fid_build_ino32(&fid);
-                                else
+                                } else {
+                                        lhash = hash;
                                         ino = cl_fid_build_ino(&fid);
+                                }
                                 type = ll_dirent_type_get(ent);
                                 done = filldir(cookie, name, namelen,
-                                               (loff_t)hash, ino, type);
+                                               lhash, ino, type);
                         }
                         next = le64_to_cpu(dp->ldp_hash_end);
                         ll_put_page(page);
                         if (!done) {
                                 pos = next;
-                                if (pos == DIR_END_OFF)
+                                if (pos == DIR_END_OFF) {
                                         /*
                                          * End of directory reached.
                                          */
                                         done = 1;
-                                else if (1 /* chain is exhausted*/)
+                                } else if (1 /* chain is exhausted*/) {
                                         /*
                                          * Normal case: continue to the next
                                          * page.
                                          */
-                                        page = ll_get_dir_page(inode, pos, 1,
-                                                               &chain);
-                                else {
+                                        fd->fd_dir.lfd_next = pos;
+                                        page = ll_get_dir_page(filp, inode, pos,
+                                                               1, &chain);
+                                } else {
                                         /*
                                          * go into overflow page.
                                          */
                                 }
-                        } else
+                        } else {
                                 pos = hash;
+                        }
                 } else {
                         rc = PTR_ERR(page);
                         CERROR("error reading dir "DFID" at %lu: rc %d\n",
@@ -507,7 +550,11 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir)
                 }
         }
 
-        filp->f_pos = (loff_t)pos;
+        fd->fd_dir.lfd_pos = pos;
+        if (need_32bit)
+                filp->f_pos = pos >> 32;
+        else
+                filp->f_pos = pos;
         filp->f_version = inode->i_version;
         touch_atime(filp->f_vfsmnt, filp->f_dentry);
 
@@ -1316,6 +1363,37 @@ out_free:
         }
 }
 
+static loff_t ll_dir_seek(struct file *file, loff_t offset, int origin)
+{
+        struct ll_file_data *fd = LUSTRE_FPRIVATE(file);
+        loff_t pos = file->f_pos;
+        loff_t ret;
+        ENTRY;
+
+        if (origin == 1 && offset >= 0 && file->f_pos == DIR_END_OFF) {
+                CWARN("end of dir hash, DIR_END_OFF(-2) is returned\n");
+                RETURN(DIR_END_OFF);
+        }
+
+        ret = default_llseek(file, offset, origin);
+        if (ret >= 0) {
+                struct ll_sb_info *sbi = ll_i2sbi(file->f_dentry->d_inode);
+
+                if (ll_need_32bit_api(sbi)) {
+                        if (file->f_pos >> 32) {
+                                /* hash overflow, simple revert */
+                                file->f_pos = pos;
+                                RETURN(-EOVERFLOW);
+                        } else {
+                                fd->fd_dir.lfd_pos = file->f_pos << 32;
+                        }
+                } else {
+                        fd->fd_dir.lfd_pos = file->f_pos;
+                }
+        }
+        RETURN(ret);
+}
+
 int ll_dir_open(struct inode *inode, struct file *file)
 {
         ENTRY;
@@ -1329,6 +1407,7 @@ int ll_dir_release(struct inode *inode, struct file *file)
 }
 
 struct file_operations ll_dir_operations = {
+        .llseek   = ll_dir_seek,
         .open     = ll_dir_open,
         .release  = ll_dir_release,
         .read     = generic_read_dir,