const struct dt_it *di);
struct dt_rec *(*rec)(const struct lu_env *env,
const struct dt_it *di);
- __u32 (*store)(const struct lu_env *env,
+ __u64 (*store)(const struct lu_env *env,
const struct dt_it *di);
int (*load)(const struct lu_env *env,
- const struct dt_it *di, __u32 hash);
+ const struct dt_it *di, __u64 hash);
} dio_it;
};
struct lu_rdpg {
/* input params, should be filled out by mdt */
- __u32 rp_hash; /* hash */
+ __u64 rp_hash; /* hash */
int rp_count; /* count in bytes */
int rp_npages; /* number of pages */
struct page **rp_pages; /* pointers to pages */
*/
struct lu_dirent {
struct lu_fid lde_fid;
- __u32 lde_hash;
+ __u64 lde_hash;
__u16 lde_reclen;
__u16 lde_namelen;
char lde_name[0];
};
struct lu_dirpage {
- __u32 ldp_hash_start;
- __u32 ldp_hash_end;
+ __u64 ldp_hash_start;
+ __u64 ldp_hash_end;
__u16 ldp_flags;
__u32 ldp_pad0;
struct lu_dirent ldp_entries[0];
return le16_to_cpu(ent->lde_reclen);
}
-#define DIR_END_OFF 0xfffffffeUL
+#define DIR_END_OFF 0xfffffffffffffffeULL
struct lustre_handle {
__u64 cookie;
#define MEA_MAGIC_ALL_CHARS 0xb222a11c
#define MEA_MAGIC_HASH_SEGMENT 0xb222a11b
-#define MAX_HASH_SIZE 0x7fffffffUL
-#define MAX_HASH_HIGHEST_BIT 0x10000000
+#define MAX_HASH_SIZE_32 0x7fffffffUL
+#define MAX_HASH_SIZE 0x7fffffffffffffffULL
+#define MAX_HASH_HIGHEST_BIT 0x1000000000000000
struct lustre_md {
struct mdt_body *body;
* If page is empty (end of directoryis reached),
* use this value.
*/
- __u32 hash = DIR_END_OFF;
- __u32 next;
+ __u64 hash = DIR_END_OFF;
+ __u64 next;
dp = page->addr;
for (ent = lu_dirent_start(dp); ent != NULL && !done;
struct lu_fid fid;
ino_t ino;
- hash = le32_to_cpu(ent->lde_hash);
+ hash = le64_to_cpu(ent->lde_hash);
namelen = le16_to_cpu(ent->lde_namelen);
if (hash < pos)
(loff_t)hash, ino, DT_UNKNOWN,
&filled);
}
- next = le32_to_cpu(dp->ldp_hash_end);
+ next = le64_to_cpu(dp->ldp_hash_end);
OBD_PAGE_FREE(page);
if (!done) {
pos = next;
* Find, kmap and return page that contains given hash.
*/
static struct page *ll_dir_page_locate(struct inode *dir, unsigned long hash,
- __u32 *start, __u32 *end)
+ __u64 *start, __u64 *end)
{
struct address_space *mapping = dir->i_mapping;
/*
wait_on_page(page);
if (PageUptodate(page)) {
dp = kmap(page);
- *start = le32_to_cpu(dp->ldp_hash_start);
- *end = le32_to_cpu(dp->ldp_hash_end);
+ *start = le64_to_cpu(dp->ldp_hash_start);
+ *end = le64_to_cpu(dp->ldp_hash_end);
LASSERT(*start <= hash);
if (hash > *end || (*end != *start && hash == *end)) {
kunmap(page);
struct page *page;
ldlm_mode_t mode;
int rc;
- __u32 start;
- __u32 end;
+ __u64 start;
+ __u64 end;
mode = LCK_PR;
rc = md_lock_match(ll_i2sbi(dir)->ll_md_exp, LDLM_FL_BLOCK_GRANTED,
hash_collision:
dp = page_address(page);
- start = le32_to_cpu(dp->ldp_hash_start);
- end = le32_to_cpu(dp->ldp_hash_end);
+ start = le64_to_cpu(dp->ldp_hash_start);
+ end = le64_to_cpu(dp->ldp_hash_end);
if (end == start) {
LASSERT(start == hash);
CWARN("Page-wide hash collision: %#lx\n", (unsigned long)end);
* If page is empty (end of directoryis reached),
* use this value.
*/
- __u32 hash = DIR_END_OFF;
- __u32 next;
+ __u64 hash = DIR_END_OFF;
+ __u64 next;
dp = page_address(page);
for (ent = lu_dirent_start(dp); ent != NULL && !done;
* XXX: implement correct swabbing here.
*/
- hash = le32_to_cpu(ent->lde_hash);
+ hash = le64_to_cpu(ent->lde_hash);
namelen = le16_to_cpu(ent->lde_namelen);
if (hash < pos)
done = filldir(cookie, name, namelen,
(loff_t)hash, ino, DT_UNKNOWN);
}
- next = le32_to_cpu(dp->ldp_hash_end);
+ next = le64_to_cpu(dp->ldp_hash_end);
ll_put_page(page);
if (!done) {
pos = next;
RETURN(0);
}
-static void lmv_hash_adjust(__u32 *hash, __u32 hash_adj)
+static void lmv_hash_adjust(__u64 *hash, __u64 hash_adj)
{
- __u32 val;
+ __u64 val;
- val = le32_to_cpu(*hash);
+ val = le64_to_cpu(*hash);
if (val < hash_adj)
val += MAX_HASH_SIZE;
if (val != DIR_END_OFF)
- *hash = cpu_to_le32(val - hash_adj);
+ *hash = cpu_to_le64(val - hash_adj);
}
static __u32 lmv_node_rank(struct obd_export *exp, const struct lu_fid *fid)
struct obd_export *tgt_exp;
struct lu_fid rid = *fid;
struct lmv_obj *obj;
- __u32 offset0;
- __u32 offset;
- __u32 hash_adj = 0;
+ __u64 offset;
+ __u64 hash_adj = 0;
__u32 rank = 0;
- __u32 seg_size = 0;
+ __u64 seg_size = 0;
+ __u64 tgt_tmp = 0;
int tgt = 0;
int tgt0 = 0;
int rc;
int nr = 0;
ENTRY;
- offset0 = offset = offset64;
- /*
- * Check that offset is representable by 32bit number.
- */
- LASSERT((__u64)offset == offset64);
+ offset = offset64;
rc = lmv_check_connect(obd);
if (rc)
RETURN(rc);
- CDEBUG(D_INFO, "READPAGE at %x from "DFID"\n", offset, PFID(&rid));
+ CDEBUG(D_INFO, "READPAGE at %llx from "DFID"\n", offset, PFID(&rid));
obj = lmv_obj_grab(obd, fid);
if (obj) {
+
+ /*
+ * This case handle directory lookup in clustered metadata case (i.e.
+ * split directory is located on multiple md servers.)
+ * each server keeps directory entries for certain range of hashes.
+ * E.g. we have N server and suppose hash range is 0 to MAX_HASH.
+ * first server will keep records with hashes [ 0 ... MAX_HASH / N - 1],
+ * second one with hashes [MAX_HASH / N ... 2 * MAX_HASH / N] and
+ * so on....
+ * readdir can simply start reading entries from 0 - N server in
+ * order but that will not scale well as all client will request dir in
+ * to server in same order.
+ * Following algorithm does optimization:
+ * Instead of doing readdir in 1, 2, ...., N order, client with a
+ * rank R does readdir in R, R + 1, ..., N, 1, ... R - 1 order.
+ * (every client has rank R)
+ * But ll_readdir() expect offset range [0 to MAX_HASH/N) but
+ * since client ask dir from MDS{R} client has pages with offsets
+ * [R*MAX_HASH/N ... (R + 1)*MAX_HASH/N] there for we do hash_adj
+ * on hash values that we get.
+ */
+
struct lmv_inode *loi;
lmv_obj_lock(obj);
nr = obj->lo_objcount;
LASSERT(nr > 0);
- seg_size = MAX_HASH_SIZE / nr;
+ seg_size = MAX_HASH_SIZE;
+ do_div(seg_size, nr);
loi = obj->lo_inodes;
rank = lmv_node_rank(lmv_get_export(lmv, loi[0].li_mds),
fid) % nr;
- tgt0 = (offset / seg_size) % nr;
+ tgt_tmp = offset;
+ do_div(tgt_tmp, seg_size);
+ tgt0 = do_div(tgt_tmp, nr);
tgt = (tgt0 + rank) % nr;
if (tgt < tgt0)
hash_adj += rank * seg_size;
- CDEBUG(D_INFO, "hash_adj: %x %x %x/%x -> %x/%x\n",
+ CDEBUG(D_INFO, "hash_adj: %x %llx %llx/%x -> %llx/%x\n",
rank, hash_adj, offset, tgt0, offset + hash_adj, tgt);
- offset = (offset + hash_adj) % MAX_HASH_SIZE;
+ offset = (offset + hash_adj) & MAX_HASH_SIZE;
rid = obj->lo_inodes[tgt].li_fid;
tgt_exp = lmv_get_export(lmv, loi[tgt].li_mds);
lmv_hash_adjust(&dp->ldp_hash_start, hash_adj);
lmv_hash_adjust(&dp->ldp_hash_end, hash_adj);
- LASSERT(cpu_to_le32(dp->ldp_hash_start) <= offset0);
+ LASSERT(cpu_to_le32(dp->ldp_hash_start) <= offset64);
for (ent = lu_dirent_start(dp); ent != NULL;
ent = lu_dirent_next(ent))
if (end == DIR_END_OFF) {
dp->ldp_hash_end = cpu_to_le32(seg_size *
(tgt0 + 1));
- CDEBUG(D_INFO, ""DFID" reset end %x tgt %d\n",
+ CDEBUG(D_INFO, ""DFID" reset end %llx tgt %d\n",
PFID(&rid),
- le32_to_cpu(dp->ldp_hash_end), tgt);
+ le64_to_cpu(dp->ldp_hash_end), tgt);
}
}
cfs_kunmap(page);
static int mdd_dir_page_build(const struct lu_env *env, int first,
void *area, int nob, struct dt_it_ops *iops,
- struct dt_it *it, __u32 *start, __u32 *end,
+ struct dt_it *it, __u64 *start, __u64 *end,
struct lu_dirent **last)
{
struct lu_fid *fid = &mdd_env_info(env)->mti_fid2;
char *name;
int len;
int recsize;
- __u32 hash;
+ __u64 hash;
name = (char *)iops->key(env, it);
len = iops->key_size(env, it);
hash = iops->store(env, it);
*end = hash;
- CDEBUG(D_INFO, "%p %p %d "DFID": %#8.8x (%d) \"%*.*s\"\n",
+ CDEBUG(D_INFO, "%p %p %d "DFID": "LPU64" (%d) \"%*.*s\"\n",
name, ent, nob, PFID(fid), hash, len, len, len, name);
if (nob >= recsize) {
int i;
int rc;
int nob;
- __u32 hash_start;
- __u32 hash_end;
+ __u64 hash_start;
+ __u64 hash_end;
LASSERT(rdpg->rp_pages != NULL);
LASSERT(next->do_index_ops != NULL);
{
__u32 hash;
- hash = (hash_build0(name, namelen) << 1) & MAX_HASH_SIZE;
- if (hash > MAX_HASH_SIZE - HASH_GRAY_AREA)
+ hash = (hash_build0(name, namelen) << 1) & MAX_HASH_SIZE_32;
+ if (hash > MAX_HASH_SIZE_32 - HASH_GRAY_AREA)
hash &= HASH_GRAY_AREA - 1;
return hash;
}
{
__u32 hash;
- LASSERT(IS_PO2(MAX_HASH_SIZE + 1));
+ LASSERT(IS_PO2(MAX_HASH_SIZE_32 + 1));
- hash = hash_build(name, namelen) / (MAX_HASH_SIZE / count);
+ hash = hash_build(name, namelen) / (MAX_HASH_SIZE_32 / count);
LASSERTF(hash < count, "hash %x count %d \n", hash, count);
return hash;
return (struct dt_rec *)iam_it_rec_get(&it->oi_it);
}
-static __u32 osd_it_store(const struct lu_env *env, const struct dt_it *di)
+static __u64 osd_it_store(const struct lu_env *env, const struct dt_it *di)
{
struct osd_it *it = (struct osd_it *)di;
}
static int osd_it_load(const struct lu_env *env,
- const struct dt_it *di, __u32 hash)
+ const struct dt_it *di, __u64 hash)
{
struct osd_it *it = (struct osd_it *)di;