From: Fan Yong Date: Fri, 25 Jun 2010 22:14:22 +0000 (-0700) Subject: b=20581 Handle directory entry hash collisions X-Git-Tag: v2_0_0-rc1~11 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=04e67d056c015dbe3f8ce69092489fe9a10d771d b=20581 Handle directory entry hash collisions i=rread i=pravin --- diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c index f145b8c..61ca421 100644 --- a/lustre/llite/dir.c +++ b/lustre/llite/dir.c @@ -394,11 +394,30 @@ fail: goto out_unlock; } +#define READDIR_ROLLBACK(dp, cur_page, prev_page, cross, cur_ent, first_ent) \ +do { \ + LASSERT(first_ent != NULL); \ + cur_ent = first_ent; \ + first_ent = NULL; \ + if (cross != 0) { \ + LASSERT(prev_page != NULL); \ + ll_put_page(cur_page); \ + cur_page = prev_page; \ + prev_page = NULL; \ + dp = page_address(cur_page); \ + } \ +} while(0) + int ll_readdir(struct file *filp, void *cookie, filldir_t filldir) { struct inode *inode = filp->f_dentry->d_inode; struct ll_inode_info *info = ll_i2info(inode); __u64 pos = filp->f_pos; + int cross = 0; + struct ll_file_data *fd = LUSTRE_FPRIVATE(filp); + struct lu_dirent *first = NULL; + struct page *prev_page = NULL; + struct lu_dirent *prev_ent = NULL; struct page *page; struct ll_dir_chain chain; int rc; @@ -411,6 +430,15 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir) inode->i_ino, inode->i_generation, inode, (unsigned long)pos, i_size_read(inode)); + /* If someone changed f_pos out of readdir, ignore fd_dir */ + if (unlikely(fd->fd_dir.lfd_valid && fd->fd_dir.lfd_hash != pos)) { + CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u(%p) someone changed " + "pos from %llu to %llu, maybe cause misposition\n", + inode->i_ino, inode->i_generation, inode, + fd->fd_dir.lfd_hash, pos); + fd->fd_dir.lfd_valid = 0; + } + if (pos == DIR_END_OFF) /* * end-of-file. @@ -434,7 +462,6 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir) * use this value. */ __u64 hash = DIR_END_OFF; - __u64 next; dp = page_address(page); for (ent = lu_dirent_start(dp); ent != NULL && !done; @@ -448,9 +475,8 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir) * XXX: implement correct swabbing here. */ - hash = le64_to_cpu(ent->lde_hash); - namelen = le16_to_cpu(ent->lde_namelen); - +restart: + hash = le64_to_cpu(ent->lde_hash); if (hash < pos) /* * Skip until we find target hash @@ -458,27 +484,142 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir) */ continue; + namelen = le16_to_cpu(ent->lde_namelen); if (namelen == 0) /* * Skip dummy record. */ continue; - fid = ent->lde_fid; name = ent->lde_name; - fid_le_to_cpu(&fid, &fid); + fid_le_to_cpu(&fid, &ent->lde_fid); + + if (unlikely(fd->fd_dir.lfd_valid && + hash == pos)) { + if (fd->fd_dir.lfd_namelen != namelen || + memcmp(fd->fd_dir.lfd_name, name, + namelen) != 0) { + if (first == NULL) + first = ent; + /* + * It is not the one we want. + * Coutinue. + */ + continue; + } + + fd->fd_dir.lfd_valid = 0; + if (!lu_fid_eq(&fid, + &fd->fd_dir.lfd_fid)) { + /* + * Someone re-add it. + */ + CDEBUG(D_VFSTRACE,"VFS Op:inode" + "=%lu/%u(%p) someone " + "re-add at pos %llu\n", + inode->i_ino, + inode->i_generation, + inode, pos); + if (first != NULL) { + READDIR_ROLLBACK(dp, + page, prev_page, + cross, ent, + first); + goto restart; + } + } + } + + if (unlikely(fd->fd_dir.lfd_valid && + hash > pos)) { + CDEBUG(D_VFSTRACE, "VFS Op:inode=%lu/%u" + "(%p) miss to find pos %llu\n", + inode->i_ino,inode->i_generation, + inode, pos); + fd->fd_dir.lfd_valid = 0; + if (first != NULL) { + READDIR_ROLLBACK(dp, page, + prev_page, cross, + ent, first); + goto restart; + } + } + + if (cfs_curproc_is_32bit()) ino = cl_fid_build_ino32(&fid); else ino = cl_fid_build_ino(&fid); type = ll_dirent_type_get(ent); + + if (prev_ent != NULL) { + if (unlikely(namelen == + le16_to_cpu(prev_ent->lde_namelen)&& + memcmp(name, prev_ent->lde_name, + namelen) == 0)) { + struct lu_fid prev_fid; + + fid_le_to_cpu(&prev_fid, + &prev_ent->lde_fid); + CERROR("Pack two entries with " + "the same name [%.*s] " + "under inode=%lu/%u" + "(%p): entry1 [%p], " + "hash1 ["LPU64"], " + "FID1 ["DFID"], " + "entry2 [%p], " + "hash2 ["LPU64"], " + "FID2 ["DFID"]. (C)\n", + namelen, name, + inode->i_ino, + inode->i_generation, + inode, prev_ent, + le64_to_cpu( + prev_ent->lde_hash), + PFID(&prev_fid), + ent, hash, PFID(&fid)); + LASSERT(ent != prev_ent); + } + } + done = filldir(cookie, name, namelen, (loff_t)hash, ino, type); - } - next = le64_to_cpu(dp->ldp_hash_end); - ll_put_page(page); + if (!done) + prev_ent = ent; + } /* end for */ + if (!done) { - pos = next; + if (unlikely(fd->fd_dir.lfd_valid && first)) { + if (cross == 0) { + cross = 1; + } else { + /* + * At here, we fall into full + * page hash collision. + * Currently, we can not handle + * full page hash collision yet. + * Give up search, just pack + * from the first. + */ + CWARN("Full page hash collision" + " for inode=%lu/%u(%p) " + "with value ["LPU64"]\n", + inode->i_ino, + inode->i_generation, + inode, hash); + fd->fd_dir.lfd_valid = 0; + READDIR_ROLLBACK(dp, page, + prev_page, cross, + ent, first); + goto restart; + } + } + + if (prev_page != NULL) + ll_put_page(prev_page); + prev_page = page; + + pos = le64_to_cpu(dp->ldp_hash_end); if (pos == DIR_END_OFF) /* * End of directory reached. @@ -496,8 +637,53 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir) * go into overflow page. */ } - } else + } else { pos = hash; + if (unlikely(prev_ent != NULL && + le64_to_cpu(prev_ent->lde_hash) == hash)) { + struct lu_fid prev_fid; + struct lu_fid fid; + int namelen = + le16_to_cpu(ent->lde_namelen); + + fid_le_to_cpu(&prev_fid, + &prev_ent->lde_fid); + fid_le_to_cpu(&fid, + &ent->lde_fid); + /* This should replace D_WARNING with + * D_VFSTRACE after bug 20581 fixed. */ + CDEBUG(D_WARNING, "Hash collision for " + "inode=%lu/%u(%p) with the " + "same hash value ["LPU64"] to " + "be returned to user space " + "through multiple readdir: " + "entry1 [%p], name1 [%.*s], " + "FID1 ["DFID"], " + "entry2 [%p], name2 [%.*s], " + "FID2 ["DFID"]\n", + inode->i_ino, + inode->i_generation, + inode, hash, prev_ent, + le16_to_cpu( + prev_ent->lde_namelen), + prev_ent->lde_name, + PFID(&prev_fid), + ent, namelen, + ent->lde_name, + PFID(&fid)); + LASSERT(ent != prev_ent); + fd->fd_dir.lfd_fid = fid; + fd->fd_dir.lfd_hash = hash; + fd->fd_dir.lfd_namelen = namelen; + memcpy(fd->fd_dir.lfd_name, + ent->lde_name, namelen); + fd->fd_dir.lfd_valid = 1; + } + + if (prev_page != NULL) + ll_put_page(prev_page); + prev_page = page; + } } else { rc = PTR_ERR(page); CERROR("error reading dir "DFID" at %lu: rc %d\n", @@ -505,6 +691,9 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir) } } + if (prev_page != NULL) + ll_put_page(prev_page); + filp->f_pos = (loff_t)pos; filp->f_version = inode->i_version; touch_atime(filp->f_vfsmnt, filp->f_dentry); diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 693dc35..90fb2e5 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -51,18 +51,28 @@ #include "cl_object.h" -struct ll_file_data *ll_file_data_get(void) +static struct ll_file_data *ll_file_data_get(int is_dir) { struct ll_file_data *fd; OBD_SLAB_ALLOC_PTR_GFP(fd, ll_file_data_slab, CFS_ALLOC_IO); + if (fd && is_dir) { + OBD_ALLOC(fd->fd_dir.lfd_name, LLITE_NAME_LEN); + if (unlikely(fd->fd_dir.lfd_name == NULL)) { + OBD_SLAB_FREE_PTR(fd, ll_file_data_slab); + fd = NULL; + } + } return fd; } static void ll_file_data_put(struct ll_file_data *fd) { - if (fd != NULL) + if (fd != NULL) { + if (fd->fd_dir.lfd_name) + OBD_FREE(fd->fd_dir.lfd_name, LLITE_NAME_LEN); OBD_SLAB_FREE_PTR(fd, ll_file_data_slab); + } } void ll_pack_inode2opdata(struct inode *inode, struct md_op_data *op_data, @@ -314,7 +324,7 @@ int ll_file_release(struct inode *inode, struct file *file) LASSERT(fd != NULL); /* The last ref on @file, maybe not the the owner pid of statahead. - * Different processes can open the same dir, "ll_opendir_key" means: + * Different processes can open the same dir, "lli_opendir_key" means: * it is me that should stop the statahead thread. */ if (lli->lli_opendir_key == fd && lli->lli_opendir_pid != 0) ll_stop_statahead(inode, lli->lli_opendir_key); @@ -509,7 +519,7 @@ int ll_file_open(struct inode *inode, struct file *file) file->private_data = NULL; /* prevent ll_local_open assertion */ #endif - fd = ll_file_data_get(); + fd = ll_file_data_get(S_ISDIR(inode->i_mode)); if (fd == NULL) RETURN(-ENOMEM); diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index b8788d2..9774f55 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -501,7 +501,18 @@ struct ll_readahead_state { unsigned long ras_consecutive_stride_requests; }; +#define LLITE_NAME_LEN 255 + struct ll_file_dir { + struct lu_fid lfd_fid; /* fid of next entry. it can not be + * used as offset clew independently + * because of hardlink */ + __u64 lfd_hash; /* hash (offset) of next entry */ + __u16 lfd_valid; /* If user buffer for readdir is full, + * and hash collision for next entry, + * then it is 1, otherwise it is 0. */ + __u16 lfd_namelen; /* namelen of next entry */ + char *lfd_name; /* name of next entry */ }; extern cfs_mem_cache_t *ll_file_data_slab; @@ -510,6 +521,8 @@ struct ll_file_data { struct ll_readahead_state fd_ras; int fd_omode; struct ccc_grouplock fd_grouplock; + /* We assign fd_dir only when user buffer for readdir is full, and hash + * collision for next entry is found. */ struct ll_file_dir fd_dir; __u32 fd_flags; struct file *fd_file; @@ -647,7 +660,6 @@ extern void ll_rw_stats_tally(struct ll_sb_info *sbi, pid_t pid, int ll_getattr_it(struct vfsmount *mnt, struct dentry *de, struct lookup_intent *it, struct kstat *stat); int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat); -struct ll_file_data *ll_file_data_get(void); #ifndef HAVE_INODE_PERMISION_2ARGS int ll_inode_permission(struct inode *inode, int mask, struct nameidata *nd); #else