b=14340

author pravins <pravins>

Thu, 10 Apr 2008 05:17:55 +0000 (05:17 +0000)

committer pravins <pravins>

Thu, 10 Apr 2008 05:17:55 +0000 (05:17 +0000)
author pravins <pravins>
Thu, 10 Apr 2008 05:17:55 +0000 (05:17 +0000)
committer pravins <pravins>
Thu, 10 Apr 2008 05:17:55 +0000 (05:17 +0000)
diff --git a/lustre/include/dt_object.h b/lustre/include/dt_object.h

index 563adc1..3d2d920 100644 (file)
--- a/lustre/include/dt_object.h
+++ b/lustre/include/dt_object.h
@@ -381,10 +381,10 @@ struct dt_index_operations {
                                        const struct dt_it *di);
                  struct dt_rec *(*rec)(const struct lu_env *env,
                                        const struct dt_it *di);
-                __u32        (*store)(const struct lu_env *env,
+                __u64        (*store)(const struct lu_env *env,
                                        const struct dt_it *di);
                  int           (*load)(const struct lu_env *env,
-                                      const struct dt_it *di, __u32 hash);
+                                      const struct dt_it *di, __u64 hash);
          } dio_it;
  };
  
diff --git a/lustre/include/lu_object.h b/lustre/include/lu_object.h

index 5c64d90..ef3645d 100644 (file)
--- a/lustre/include/lu_object.h
+++ b/lustre/include/lu_object.h
@@ -836,7 +836,7 @@ static inline __u32 lu_object_attr(const struct lu_object *o)
  
  struct lu_rdpg {
          /* input params, should be filled out by mdt */
-        __u32                   rp_hash;        /* hash */
+        __u64                   rp_hash;        /* hash */
          int                     rp_count;       /* count in bytes       */
          int                     rp_npages;      /* number of pages      */
          struct page           **rp_pages;       /* pointers to pages    */
diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h

index 65e1f91..f8061b9 100644 (file)
--- a/lustre/include/lustre/lustre_idl.h
+++ b/lustre/include/lustre/lustre_idl.h
@@ -351,15 +351,15 @@ static inline int lu_fid_eq(const struct lu_fid *f0,
   */
  struct lu_dirent {
          struct lu_fid lde_fid;
-        __u32         lde_hash;
+        __u64         lde_hash;
          __u16         lde_reclen;
          __u16         lde_namelen;
          char          lde_name[0];
  };
  
  struct lu_dirpage {
-        __u32            ldp_hash_start;
-        __u32            ldp_hash_end;
+        __u64            ldp_hash_start;
+        __u64            ldp_hash_end;
          __u16            ldp_flags;
          __u32            ldp_pad0;
          struct lu_dirent ldp_entries[0];
@@ -398,7 +398,7 @@ static inline int lu_dirent_size(struct lu_dirent *ent)
          return le16_to_cpu(ent->lde_reclen);
  }
  
-#define DIR_END_OFF              0xfffffffeUL
+#define DIR_END_OFF              0xfffffffffffffffeULL
  
  struct lustre_handle {
          __u64 cookie;
diff --git a/lustre/include/obd.h b/lustre/include/obd.h

index 2f4f771..15d9377 100644 (file)
--- a/lustre/include/obd.h
+++ b/lustre/include/obd.h
@@ -1256,8 +1256,9 @@ enum {
  #define MEA_MAGIC_ALL_CHARS      0xb222a11c
  #define MEA_MAGIC_HASH_SEGMENT   0xb222a11b
  
-#define MAX_HASH_SIZE            0x7fffffffUL
-#define MAX_HASH_HIGHEST_BIT     0x10000000
+#define MAX_HASH_SIZE_32         0x7fffffffUL
+#define MAX_HASH_SIZE            0x7fffffffffffffffULL
+#define MAX_HASH_HIGHEST_BIT     0x1000000000000000
  
  struct lustre_md {
          struct mdt_body         *body;
diff --git a/lustre/liblustre/dir.c b/lustre/liblustre/dir.c

index a297c9b..72b5157 100644 (file)
--- a/lustre/liblustre/dir.c
+++ b/lustre/liblustre/dir.c
@@ -238,8 +238,8 @@ ssize_t llu_iop_filldirentries(struct inode *dir, _SYSIO_OFF_T *basep,
                           * If page is empty (end of directoryis reached),
                           * use this value. 
                           */
-                        __u32 hash = DIR_END_OFF;
-                        __u32 next;
+                        __u64 hash = DIR_END_OFF;
+                        __u64 next;
  
                          dp = page->addr;
                          for (ent = lu_dirent_start(dp); ent != NULL && !done;
@@ -249,7 +249,7 @@ ssize_t llu_iop_filldirentries(struct inode *dir, _SYSIO_OFF_T *basep,
                                  struct lu_fid  fid;
                                  ino_t          ino;
  
-                                hash    = le32_to_cpu(ent->lde_hash);
+                                hash    = le64_to_cpu(ent->lde_hash);
                                  namelen = le16_to_cpu(ent->lde_namelen);
  
                                  if (hash < pos)
@@ -274,7 +274,7 @@ ssize_t llu_iop_filldirentries(struct inode *dir, _SYSIO_OFF_T *basep,
                                                 (loff_t)hash, ino, DT_UNKNOWN,
                                                 &filled);
                          }
-                        next = le32_to_cpu(dp->ldp_hash_end);
+                        next = le64_to_cpu(dp->ldp_hash_end);
                          OBD_PAGE_FREE(page);
                          if (!done) {
                                  pos = next;
diff --git a/lustre/llite/dir.c b/lustre/llite/dir.c

index c230e02..74f8abc 100644 (file)
--- a/lustre/llite/dir.c
+++ b/lustre/llite/dir.c
@@ -201,7 +201,7 @@ static inline void ll_put_page(struct page *page)
   * Find, kmap and return page that contains given hash.
   */
  static struct page *ll_dir_page_locate(struct inode *dir, unsigned long hash,
-                                       __u32 *start, __u32 *end)
+                                       __u64 *start, __u64 *end)
  {
          struct address_space *mapping = dir->i_mapping;
          /*
@@ -232,8 +232,8 @@ static struct page *ll_dir_page_locate(struct inode *dir, unsigned long hash,
                  wait_on_page(page);
                  if (PageUptodate(page)) {
                          dp = kmap(page);
-                        *start = le32_to_cpu(dp->ldp_hash_start);
-                        *end   = le32_to_cpu(dp->ldp_hash_end);
+                        *start = le64_to_cpu(dp->ldp_hash_start);
+                        *end   = le64_to_cpu(dp->ldp_hash_end);
                          LASSERT(*start <= hash);
                          if (hash > *end || (*end != *start && hash == *end)) {
                                  kunmap(page);
@@ -265,8 +265,8 @@ static struct page *ll_get_dir_page(struct inode *dir, __u32 hash, int exact,
          struct page *page;
          ldlm_mode_t mode;
          int rc;
-        __u32 start;
-        __u32 end;
+        __u64 start;
+        __u64 end;
  
          mode = LCK_PR;
          rc = md_lock_match(ll_i2sbi(dir)->ll_md_exp, LDLM_FL_BLOCK_GRANTED,
@@ -354,8 +354,8 @@ static struct page *ll_get_dir_page(struct inode *dir, __u32 hash, int exact,
  hash_collision:
          dp = page_address(page);
  
-        start = le32_to_cpu(dp->ldp_hash_start);
-        end   = le32_to_cpu(dp->ldp_hash_end);
+        start = le64_to_cpu(dp->ldp_hash_start);
+        end   = le64_to_cpu(dp->ldp_hash_end);
          if (end == start) {
                  LASSERT(start == hash);
                  CWARN("Page-wide hash collision: %#lx\n", (unsigned long)end);
@@ -415,8 +415,8 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir)
                           * If page is empty (end of directoryis reached),
                           * use this value. 
                           */
-                        __u32 hash = DIR_END_OFF;
-                        __u32 next;
+                        __u64 hash = DIR_END_OFF;
+                        __u64 next;
  
                          dp = page_address(page);
                          for (ent = lu_dirent_start(dp); ent != NULL && !done;
@@ -430,7 +430,7 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir)
                                   * XXX: implement correct swabbing here.
                                   */
  
-                                hash    = le32_to_cpu(ent->lde_hash);
+                                hash    = le64_to_cpu(ent->lde_hash);
                                  namelen = le16_to_cpu(ent->lde_namelen);
  
                                  if (hash < pos)
@@ -454,7 +454,7 @@ int ll_readdir(struct file *filp, void *cookie, filldir_t filldir)
                                  done = filldir(cookie, name, namelen,
                                                 (loff_t)hash, ino, DT_UNKNOWN);
                          }
-                        next = le32_to_cpu(dp->ldp_hash_end);
+                        next = le64_to_cpu(dp->ldp_hash_end);
                          ll_put_page(page);
                          if (!done) {
                                  pos = next;
diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c

index 34e539c..501592c 100644 (file)
--- a/lustre/lmv/lmv_obd.c
+++ b/lustre/lmv/lmv_obd.c
@@ -2182,15 +2182,15 @@ int lmv_blocking_ast(struct ldlm_lock *lock,
          RETURN(0);
  }
  
-static void lmv_hash_adjust(__u32 *hash, __u32 hash_adj)
+static void lmv_hash_adjust(__u64 *hash, __u64 hash_adj)
  {
-        __u32 val;
+        __u64 val;
  
-        val = le32_to_cpu(*hash);
+        val = le64_to_cpu(*hash);
          if (val < hash_adj)
                  val += MAX_HASH_SIZE;
          if (val != DIR_END_OFF)
-                *hash = cpu_to_le32(val - hash_adj);
+                *hash = cpu_to_le64(val - hash_adj);
  }
  
  static __u32 lmv_node_rank(struct obd_export *exp, const struct lu_fid *fid)
@@ -2219,42 +2219,63 @@ static int lmv_readpage(struct obd_export *exp, const struct lu_fid *fid,
          struct obd_export *tgt_exp;
          struct lu_fid rid = *fid;
          struct lmv_obj *obj;
-        __u32 offset0;
-        __u32 offset;
-        __u32 hash_adj = 0;
+        __u64 offset;
+        __u64 hash_adj = 0;
          __u32 rank = 0;
-        __u32 seg_size = 0;
+        __u64 seg_size = 0;
+        __u64 tgt_tmp = 0;
          int tgt = 0;
          int tgt0 = 0;
          int rc;
          int nr = 0;
          ENTRY;
  
-        offset0 = offset = offset64;
-        /*
-         * Check that offset is representable by 32bit number.
-         */
-        LASSERT((__u64)offset == offset64);
+        offset = offset64;
  
          rc = lmv_check_connect(obd);
         if (rc)
                 RETURN(rc);
  
-        CDEBUG(D_INFO, "READPAGE at %x from "DFID"\n", offset, PFID(&rid));
+        CDEBUG(D_INFO, "READPAGE at %llx from "DFID"\n", offset, PFID(&rid));
  
          obj = lmv_obj_grab(obd, fid);
          if (obj) {
+
+        /*
+         * This case handle directory lookup in clustered metadata case (i.e.
+         * split directory is located on multiple md servers.)
+         * each server keeps directory entries for certain range of hashes.
+         * E.g. we have N server and suppose hash range is 0 to MAX_HASH.
+         * first server will keep records with hashes [ 0 ... MAX_HASH / N  - 1],
+         * second one with hashes [MAX_HASH / N ... 2 * MAX_HASH / N] and
+         * so on....
+         *      readdir can simply start reading entries from 0 - N server in
+         * order but that will not scale well as all client will request dir in
+         * to server in same order.
+         * Following algorithm does optimization:
+         * Instead of doing readdir in 1, 2, ...., N order, client with a
+         * rank R does readdir in R, R + 1, ..., N, 1, ... R - 1 order.
+         * (every client has rank R)
+         *      But ll_readdir() expect offset range [0 to MAX_HASH/N) but
+         * since client ask dir from MDS{R} client has pages with offsets
+         * [R*MAX_HASH/N ... (R + 1)*MAX_HASH/N] there for we do hash_adj
+         * on hash  values that we get.
+         */
+
                  struct lmv_inode *loi;
  
                  lmv_obj_lock(obj);
  
                  nr       = obj->lo_objcount;
                  LASSERT(nr > 0);
-                seg_size = MAX_HASH_SIZE / nr;
+                seg_size = MAX_HASH_SIZE;
+                do_div(seg_size, nr);
                  loi      = obj->lo_inodes;
                  rank     = lmv_node_rank(lmv_get_export(lmv, loi[0].li_mds),
                                           fid) % nr;
-                tgt0     = (offset / seg_size) % nr;
+                tgt_tmp = offset;
+                do_div(tgt_tmp, seg_size);
+                tgt0     = do_div(tgt_tmp,  nr);
                  tgt      = (tgt0 + rank) % nr;
  
                  if (tgt < tgt0)
@@ -2270,10 +2291,10 @@ static int lmv_readpage(struct obd_export *exp, const struct lu_fid *fid,
  
                  hash_adj += rank * seg_size;
  
-                CDEBUG(D_INFO, "hash_adj: %x %x %x/%x -> %x/%x\n",
+                CDEBUG(D_INFO, "hash_adj: %x %llx %llx/%x -> %llx/%x\n",
                         rank, hash_adj, offset, tgt0, offset + hash_adj, tgt);
  
-                offset = (offset + hash_adj) % MAX_HASH_SIZE;
+                offset = (offset + hash_adj) & MAX_HASH_SIZE;
                  rid = obj->lo_inodes[tgt].li_fid;
                  tgt_exp = lmv_get_export(lmv, loi[tgt].li_mds);
  
@@ -2296,7 +2317,7 @@ static int lmv_readpage(struct obd_export *exp, const struct lu_fid *fid,
  
                  lmv_hash_adjust(&dp->ldp_hash_start, hash_adj);
                  lmv_hash_adjust(&dp->ldp_hash_end,   hash_adj);
-                LASSERT(cpu_to_le32(dp->ldp_hash_start) <= offset0);
+                LASSERT(cpu_to_le32(dp->ldp_hash_start) <= offset64);
  
                  for (ent = lu_dirent_start(dp); ent != NULL;
                       ent = lu_dirent_next(ent))
@@ -2309,9 +2330,9 @@ static int lmv_readpage(struct obd_export *exp, const struct lu_fid *fid,
                          if (end == DIR_END_OFF) {
                                  dp->ldp_hash_end = cpu_to_le32(seg_size *
                                                                 (tgt0 + 1));
-                                CDEBUG(D_INFO, ""DFID" reset end %x tgt %d\n",
+                                CDEBUG(D_INFO, ""DFID" reset end %llx tgt %d\n",
                                         PFID(&rid),
-                                       le32_to_cpu(dp->ldp_hash_end), tgt);
+                                       le64_to_cpu(dp->ldp_hash_end), tgt);
                          }
                  }
                  cfs_kunmap(page);
diff --git a/lustre/mdd/mdd_object.c b/lustre/mdd/mdd_object.c

index 6400e41..2cc5974 100644 (file)
--- a/lustre/mdd/mdd_object.c
+++ b/lustre/mdd/mdd_object.c
@@ -1315,7 +1315,7 @@ static int mdd_readpage_sanity_check(const struct lu_env *env,
  
  static int mdd_dir_page_build(const struct lu_env *env, int first,
                                void *area, int nob, struct dt_it_ops *iops,
-                              struct dt_it *it, __u32 *start, __u32 *end,
+                              struct dt_it *it, __u64 *start, __u64 *end,
                                struct lu_dirent **last)
  {
          struct lu_fid          *fid  = &mdd_env_info(env)->mti_fid2;
@@ -1338,7 +1338,7 @@ static int mdd_dir_page_build(const struct lu_env *env, int first,
                  char  *name;
                  int    len;
                  int    recsize;
-                __u32  hash;
+                __u64  hash;
  
                  name = (char *)iops->key(env, it);
                  len  = iops->key_size(env, it);
@@ -1352,7 +1352,7 @@ static int mdd_dir_page_build(const struct lu_env *env, int first,
                  hash = iops->store(env, it);
                  *end = hash;
  
-                CDEBUG(D_INFO, "%p %p %d "DFID": %#8.8x (%d) \"%*.*s\"\n",
+                CDEBUG(D_INFO, "%p %p %d "DFID": "LPU64" (%d) \"%*.*s\"\n",
                         name, ent, nob, PFID(fid), hash, len, len, len, name);
  
                  if (nob >= recsize) {
@@ -1394,8 +1394,8 @@ static int __mdd_readpage(const struct lu_env *env, struct mdd_object *obj,
          int i;
          int rc;
          int nob;
-        __u32 hash_start;
-        __u32 hash_end;
+        __u64 hash_start;
+        __u64 hash_end;
  
          LASSERT(rdpg->rp_pages != NULL);
          LASSERT(next->do_index_ops != NULL);
diff --git a/lustre/obdclass/mea.c b/lustre/obdclass/mea.c

index 15f15d8..a7bd3e4 100644 (file)
--- a/lustre/obdclass/mea.c
+++ b/lustre/obdclass/mea.c
@@ -117,8 +117,8 @@ static __u32 hash_build(const char *name, int namelen)
  {
          __u32 hash;
  
-        hash = (hash_build0(name, namelen) << 1) & MAX_HASH_SIZE;
-        if (hash > MAX_HASH_SIZE - HASH_GRAY_AREA)
+        hash = (hash_build0(name, namelen) << 1) & MAX_HASH_SIZE_32;
+        if (hash > MAX_HASH_SIZE_32 - HASH_GRAY_AREA)
                  hash &= HASH_GRAY_AREA - 1;
          return hash;
  }
@@ -127,9 +127,9 @@ static int mea_hash_segment(int count, const char *name, int namelen)
  {
          __u32 hash;
  
-        LASSERT(IS_PO2(MAX_HASH_SIZE + 1));
+        LASSERT(IS_PO2(MAX_HASH_SIZE_32 + 1));
  
-        hash = hash_build(name, namelen) / (MAX_HASH_SIZE / count);
+        hash = hash_build(name, namelen) / (MAX_HASH_SIZE_32 / count);
          LASSERTF(hash < count, "hash %x count %d \n", hash, count);
  
          return hash;
diff --git a/lustre/osd/osd_handler.c b/lustre/osd/osd_handler.c

index 3c59117..68f2e3e 100644 (file)
--- a/lustre/osd/osd_handler.c
+++ b/lustre/osd/osd_handler.c
@@ -1909,7 +1909,7 @@ static struct dt_rec *osd_it_rec(const struct lu_env *env,
          return (struct dt_rec *)iam_it_rec_get(&it->oi_it);
  }
  
-static __u32 osd_it_store(const struct lu_env *env, const struct dt_it *di)
+static __u64 osd_it_store(const struct lu_env *env, const struct dt_it *di)
  {
          struct osd_it *it = (struct osd_it *)di;
  
@@ -1917,7 +1917,7 @@ static __u32 osd_it_store(const struct lu_env *env, const struct dt_it *di)
  }
  
  static int osd_it_load(const struct lu_env *env,
-                       const struct dt_it *di, __u32 hash)
+                       const struct dt_it *di, __u64 hash)
  {
          struct osd_it *it = (struct osd_it *)di;
author	pravins <pravins>
	Thu, 10 Apr 2008 05:17:55 +0000 (05:17 +0000)
committer	pravins <pravins>
	Thu, 10 Apr 2008 05:17:55 +0000 (05:17 +0000)
lustre/include/dt_object.h		patch \| blob \| history
lustre/include/lu_object.h		patch \| blob \| history
lustre/include/lustre/lustre_idl.h		patch \| blob \| history
lustre/include/obd.h		patch \| blob \| history
lustre/liblustre/dir.c		patch \| blob \| history
lustre/llite/dir.c		patch \| blob \| history
lustre/lmv/lmv_obd.c		patch \| blob \| history
lustre/mdd/mdd_object.c		patch \| blob \| history
lustre/obdclass/mea.c		patch \| blob \| history
lustre/osd/osd_handler.c		patch \| blob \| history