linux_headers := $(wildcard @LINUX@/include/linux/ext3*.h)
ext3_sources := $(filter-out %.mod.c,$(wildcard @LINUX@/fs/ext3/*.c))
-new_sources := iopen.c iopen.h extents.c mballoc.c iam.c iam_lfix.c iam_htree.c iam-uapi.c
+new_sources := iopen.c iopen.h extents.c mballoc.c \
+ iam.c iam_lfix.c iam_lvar.c iam_htree.c iam-uapi.c
new_headers := ext3_extents.h
ldiskfs_patched_sources := $(notdir $(ext3_sources) $(ext3_headers)) $(new_sources) $(new_headers)
ldiskfs_sources := $(ldiskfs_patched_sources)
Index: iam/fs/ext3/Makefile
===================================================================
--- iam.orig/fs/ext3/Makefile 2006-05-31 20:24:32.000000000 +0400
-+++ iam/fs/ext3/Makefile 2006-06-29 18:50:12.000000000 +0400
++++ iam/fs/ext3/Makefile 2006-07-03 01:03:10.000000000 +0400
@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
Index: iam/fs/ext3/iam.c
===================================================================
--- iam.orig/fs/ext3/iam.c 2004-04-06 17:27:52.000000000 +0400
-+++ iam/fs/ext3/iam.c 2006-06-28 22:46:13.000000000 +0400
-@@ -0,0 +1,1233 @@
++++ iam/fs/ext3/iam.c 2006-07-01 20:21:38.000000000 +0400
+@@ -0,0 +1,1262 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * so that iam_htree_guess() runs last.
+ */
+ iam_htree_format_init();
++ iam_lvar_format_init();
+ iam_lfix_format_init();
+ initialized = 1;
+ }
+ return iam_leaf_ops(leaf)->key(leaf);
+}
+
++static int iam_leaf_key_size(const struct iam_leaf *leaf)
++{
++ return iam_leaf_ops(leaf)->key_size(leaf);
++}
++
+static struct iam_ikey *iam_leaf_ikey(const struct iam_leaf *leaf,
+ struct iam_ikey *key)
+{
+ int result;
+
+ result = iam_it_get(it, k);
-+ if (result == 0 &&
-+ (it_state(it) != IAM_IT_ATTACHED || it_keycmp(it, k) != 0))
++ if (result > 0)
++ result = 0;
++ else if (result == 0)
+ /*
+ * Return -ENOENT if cursor is located above record with a key
+ * different from one specified, or in the empty leaf.
+ *
-+ * XXX returning -ENOENT only works if iam_it_get never
++ * XXX returning -ENOENT only works if iam_it_get() never
+ * returns -ENOENT as a legitimate error.
+ */
+ result = -ENOENT;
+ * least key not larger than @k.
+ *
+ * Return value: 0: positioned on existing record,
++ * +ve: exact position found,
+ * -ve: error.
+ *
+ * precondition: it_state(it) == IAM_IT_DETACHED
+ result = iam_path_lookup(&it->ii_path);
+ if (result >= 0) {
+ switch (result) {
++ case IAM_LOOKUP_EXACT:
++ result = +1;
++ it->ii_state = IAM_IT_ATTACHED;
++ break;
+ case IAM_LOOKUP_OK:
++ result = 0;
+ it->ii_state = IAM_IT_ATTACHED;
+ break;
+ case IAM_LOOKUP_BEFORE:
+ case IAM_LOOKUP_EMPTY:
++ result = 0;
+ it->ii_state = IAM_IT_SKEWED;
+ break;
+ default:
+ assert(0);
+ }
-+ result = 0;
+ } else
+ iam_it_unlock(it);
++ assert(ergo(result > 0, it_keycmp(it, k) == 0));
+ assert(ergo(result == 0 && it_state(it) == IAM_IT_ATTACHED,
+ it_keycmp(it, k) <= 0));
+ /*
+ * Attach iterator, and assure it points to the record (not skewed).
+ *
+ * Return value: 0: positioned on existing record,
++ * +ve: exact position found,
+ * -ve: error.
+ *
+ * precondition: it_state(it) == IAM_IT_DETACHED &&
+ result = iam_it_next(it);
+ }
+ }
-+ assert(ergo(result == 0, it_state(it) == IAM_IT_ATTACHED));
++ assert(ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED));
+ return result;
+}
+
+ *
+ * precondition: it_state(it) == IAM_IT_ATTACHED ||
+ * it_state(it) == IAM_IT_SKEWED
-+ * postcondition: it_state(it) == IAM_IT_ATTACHED
+ */
+struct iam_key *iam_it_key_get(const struct iam_iterator *it)
+{
-+ assert(it_state(it) == IAM_IT_ATTACHED);
++ assert(it_state(it) == IAM_IT_ATTACHED ||
++ it_state(it) == IAM_IT_SKEWED);
+ assert(it_at_rec(it));
+ return iam_leaf_key(&it->ii_path.ip_leaf);
+}
+
+/*
++ * Return size of key under iterator (in bytes)
++ *
++ * precondition: it_state(it) == IAM_IT_ATTACHED ||
++ * it_state(it) == IAM_IT_SKEWED
++ */
++int iam_it_key_size(const struct iam_iterator *it)
++{
++ assert(it_state(it) == IAM_IT_ATTACHED ||
++ it_state(it) == IAM_IT_SKEWED);
++ assert(it_at_rec(it));
++ return iam_leaf_key_size(&it->ii_path.ip_leaf);
++}
++
++/*
+ * Insertion of new record. Interaction with jbd during non-trivial case (when
+ * split happens) is as following:
+ *
+ * (it_state(it) == IAM_IT_ATTACHED ||
+ * it_state(it) == IAM_IT_SKEWED) &&
+ * ergo(it_state(it) == IAM_IT_ATTACHED,
-+ * it_keycmp(it, k) < 0) &&
++ * it_keycmp(it, k) <= 0) &&
+ * ergo(it_before(it), it_keycmp(it, k) > 0));
+ * postcondition: ergo(result == 0,
+ * it_state(it) == IAM_IT_ATTACHED &&
+ assert(it->ii_flags&IAM_IT_WRITE);
+ assert(it_state(it) == IAM_IT_ATTACHED ||
+ it_state(it) == IAM_IT_SKEWED);
-+ assert(ergo(it_state(it) == IAM_IT_ATTACHED, it_keycmp(it, k) < 0));
++ assert(ergo(it_state(it) == IAM_IT_ATTACHED, it_keycmp(it, k) <= 0));
+ assert(ergo(it_before(it), it_keycmp(it, k) > 0));
+ result = iam_add_rec(h, path, k, r);
+ if (result == 0)
Index: iam/fs/ext3/iam_htree.c
===================================================================
--- iam.orig/fs/ext3/iam_htree.c 2004-04-06 17:27:52.000000000 +0400
-+++ iam/fs/ext3/iam_htree.c 2006-06-28 22:18:53.000000000 +0400
-@@ -0,0 +1,645 @@
++++ iam/fs/ext3/iam_htree.c 2006-07-01 19:09:16.000000000 +0400
+@@ -0,0 +1,655 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Leaf operations.
+ */
+
-+struct iam_ikey *iam_htree_ikey(const struct iam_leaf *l, struct iam_ikey *key)
++static struct iam_ikey *iam_htree_ikey(const struct iam_leaf *l,
++ struct iam_ikey *key)
+{
+ __u32 *hash;
+ assert(iam_leaf_at_rec(l));
+ return key;
+}
+
-+struct iam_key *iam_htree_key(const struct iam_leaf *l)
++static struct iam_key *iam_htree_key(const struct iam_leaf *l)
+{
+ assert(iam_leaf_at_rec(l));
+
+ return (struct iam_key *)&getent(l)->name;
+}
+
++static int iam_htree_key_size(const struct iam_leaf *l)
++{
++ assert(iam_leaf_at_rec(l));
++
++ return getent(l)->name_len;
++}
++
+static void iam_htree_start(struct iam_leaf *l)
+{
+ l->il_at = (void *)skipdead(getstart(l));
+ namelen = strlen(name);
+ hash = hashname(l, name, namelen);
+ found = NULL;
++ result = IAM_LOOKUP_OK;
+ for (scan = getstart(l); scan < getlast(l, namelen);
+ scan = entnext(scan)) {
+ if (match(namelen, name, scan)) {
+ found = scan;
++ result = IAM_LOOKUP_EXACT;
+ break;
+ } else if (ent_is_live(scan) && gethash(l, scan) <= hash)
+ found = scan;
+ result = IAM_LOOKUP_BEFORE;
+ } else {
+ l->il_at = (void *)found;
-+ result = IAM_LOOKUP_OK;
+ assert(iam_leaf_at_rec(l));
+ }
+ return result;
+ .rec = iam_htree_rec,
+ .key_set = iam_htree_key_set,
+ .key_cmp = iam_htree_key_cmp,
++ .key_size = iam_htree_key_size,
+ .rec_set = iam_htree_rec_set,
+ .lookup = iam_htree_lookup,
+ .at_end = iam_htree_at_end,
Index: iam/fs/ext3/iam_lfix.c
===================================================================
--- iam.orig/fs/ext3/iam_lfix.c 2004-04-06 17:27:52.000000000 +0400
-+++ iam/fs/ext3/iam_lfix.c 2006-06-28 21:18:56.000000000 +0400
-@@ -0,0 +1,649 @@
++++ iam/fs/ext3/iam_lfix.c 2006-07-01 19:19:48.000000000 +0400
+@@ -0,0 +1,660 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ folio->il_at < iam_lfix_get_end(folio);
+}
+
-+struct iam_ikey *iam_lfix_ikey(const struct iam_leaf *l, struct iam_ikey *key)
++static struct iam_ikey *iam_lfix_ikey(const struct iam_leaf *l,
++ struct iam_ikey *key)
+{
+ void *ie = l->il_at;
+ assert(iam_leaf_at_rec(l));
+ return (struct iam_ikey*)ie;
+}
+
-+struct iam_key *iam_lfix_key(const struct iam_leaf *l)
++static struct iam_key *iam_lfix_key(const struct iam_leaf *l)
+{
+ void *ie = l->il_at;
+ assert(iam_leaf_at_rec(l));
+ return (struct iam_key*)ie;
+}
+
++static int iam_lfix_key_size(const struct iam_leaf *l)
++{
++ return iam_leaf_descr(l)->id_key_size;
++}
++
+static void iam_lfix_start(struct iam_leaf *l)
+{
+ l->il_at = iam_get_lentries(l);
+ }
+ assert(iam_leaf_at_rec(l));
+
++ if (lfix_keycmp(c, iam_leaf_key_at(l->il_at), k) == 0)
++ result = IAM_LOOKUP_EXACT;
++
+ return result;
+}
+
+ .rec = iam_lfix_rec,
+ .key_set = iam_lfix_key_set,
+ .key_cmp = iam_lfix_key_cmp,
++ .key_size = iam_lfix_key_size,
+ .rec_set = iam_lfix_rec_set,
+ .lookup = iam_lfix_lookup,
+ .at_end = iam_lfix_at_end,
+ __le16 ilr_keysize;
+ __le16 ilr_recsize;
+ __le16 ilr_ptrsize;
-+ __le16 ilr_indirect_levels;
++ u8 ilr_indirect_levels;
++ u8 ilr_padding;
+};
+
+static __u32 iam_lfix_root_ptr(struct iam_container *c)
+ struct iam_lfix_root *root;
+
+ root = data;
-+ path->ip_indirect = le16_to_cpu(root->ilr_indirect_levels);
++ path->ip_indirect = root->ilr_indirect_levels;
+ path->ip_ikey_target = (struct iam_ikey *)path->ip_key_target;
+ }
+ frame->entries = frame->at = entries;
+ char rec[RECSIZE];
+ } ll_entry[LFIX_LEAF_RECNO];
+};
+Index: iam/fs/ext3/iam_lvar.c
+===================================================================
+--- iam.orig/fs/ext3/iam_lvar.c 2004-04-06 17:27:52.000000000 +0400
++++ iam/fs/ext3/iam_lvar.c 2006-07-01 20:10:13.000000000 +0400
+@@ -0,0 +1,732 @@
++/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
++ * vim:expandtab:shiftwidth=8:tabstop=8:
++ *
++ * iam_lvar.c
++ * implementation of iam format for fixed size records, variable sized keys.
++ *
++ * Copyright (c) 2006 Cluster File Systems, Inc.
++ * Author: Nikita Danilov <nikita@clusterfs.com>
++ *
++ * This file is part of the Lustre file system, http://www.lustre.org
++ * Lustre is a trademark of Cluster File Systems, Inc.
++ *
++ * You may have signed or agreed to another license before downloading
++ * this software. If so, you are bound by the terms and conditions
++ * of that agreement, and the following does not apply to you. See the
++ * LICENSE file included with this distribution for more information.
++ *
++ * If you did not agree to a different license, then this copy of Lustre
++ * is open source software; you can redistribute it and/or modify it
++ * under the terms of version 2 of the GNU General Public License as
++ * published by the Free Software Foundation.
++ *
++ * In either case, Lustre is distributed in the hope that it will be
++ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
++ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * license text for more details.
++ */
++
++#include <linux/types.h>
++#include <linux/jbd.h>
++/* ext3_error() */
++#include <linux/ext3_fs.h>
++
++#include <linux/lustre_iam.h>
++
++#include <libcfs/libcfs.h>
++#include <libcfs/kp30.h>
++
++/*
++ * Leaf operations.
++ */
++
++enum {
++ IAM_LVAR_LEAF_MAGIC = 0x1973 /* This is duplicated in
++ * lustre/utils/create_iam.c */
++};
++
++/* This is duplicated in lustre/utils/create_iam.c */
++struct lvar_leaf_header {
++ __le16 vlh_magic; /* magic number IAM_LVAR_LEAF_MAGIC */
++ __le16 vlh_used; /* used bytes, including header */
++};
++
++/*
++ * Format of leaf entry:
++ *
++ * __le16 keysize
++ * u8 key[keysize]
++ * u8 record[rec_size]
++ *
++ * Entries are ordered in key order.
++ */
++
++/* This is duplicated in lustre/utils/create_iam.c */
++typedef __u32 lvar_hash_t;
++
++/* This is duplicated in lustre/utils/create_iam.c */
++struct lvar_leaf_entry {
++ __le32 vle_hash;
++ __le16 vle_keysize;
++ u8 vle_key[0];
++};
++
++#define PDIFF(ptr0, ptr1) (((char *)(ptr0)) - ((char *)(ptr1)))
++
++
++static inline int blocksize(const struct iam_leaf *leaf)
++{
++ return iam_leaf_container(leaf)->ic_object->i_sb->s_blocksize;
++}
++
++static inline const char *kchar(const struct iam_key *key)
++{
++ return (void *)key;
++}
++
++static inline struct iam_lentry *lvar_lentry(const struct lvar_leaf_entry *ent)
++{
++ return (struct iam_lentry *)ent;
++}
++
++static inline struct lvar_leaf_entry *lentry_lvar(const struct iam_lentry *lent)
++{
++ return (struct lvar_leaf_entry *)lent;
++}
++
++
++static inline int recsize(const struct iam_leaf *leaf)
++{
++ return iam_leaf_descr(leaf)->id_rec_size;
++}
++
++static inline int e_keysize(const struct lvar_leaf_entry *ent)
++{
++ return le16_to_cpu(ent->vle_keysize);
++}
++
++/* This is duplicated in lustre/utils/create_iam.c */
++enum {
++ LVAR_PAD = 4,
++ LVAR_ROUND = LVAR_PAD - 1
++};
++
++static inline int getsize(const struct iam_leaf *leaf, int namelen)
++{
++ CLASSERT(!(LVAR_PAD & (LVAR_PAD - 1)));
++
++ return (offsetof(struct lvar_leaf_entry, vle_key) +
++ namelen + recsize(leaf) + LVAR_ROUND) & ~LVAR_ROUND;
++}
++
++static inline int e_size(const struct iam_leaf *leaf,
++ const struct lvar_leaf_entry *ent)
++{
++ return getsize(leaf, e_keysize(ent));
++}
++
++static inline char *e_char(const struct lvar_leaf_entry *ent)
++{
++ return (char *)&ent->vle_key;
++}
++
++static inline struct iam_key *e_key(const struct lvar_leaf_entry *ent)
++{
++ return (struct iam_key *)e_char(ent);
++}
++
++static inline lvar_hash_t e_hash(const struct lvar_leaf_entry *ent)
++{
++ return le32_to_cpu(ent->vle_hash);
++}
++
++static inline struct iam_rec *e_rec(const struct lvar_leaf_entry *ent)
++{
++ return ((void *)ent) +
++ offsetof(struct lvar_leaf_entry, vle_key) + e_keysize(ent);
++}
++
++static int e_check(const struct iam_leaf *leaf,
++ const struct lvar_leaf_entry *ent)
++{
++ const void *point = ent;
++ const void *start = leaf->il_bh->b_data;
++ return
++ start + sizeof(struct lvar_leaf_header) <= point &&
++ point + e_size(leaf, ent) < start + blocksize(leaf);
++}
++
++static struct lvar_leaf_entry *e_next(const struct iam_leaf *leaf,
++ const struct lvar_leaf_entry *ent)
++{
++ return ((void *)ent) + e_size(leaf, ent);
++}
++
++static inline lvar_hash_t get_hash(const struct iam_container *bag,
++ const char *name, int namelen)
++{
++ lvar_hash_t result;
++
++ result = 0;
++ strncpy((void *)&result, name, min(namelen, (int)sizeof result));
++ return result << 1;
++}
++
++static inline int e_eq(const struct lvar_leaf_entry *ent,
++ const char *name, int namelen)
++{
++ return namelen == e_keysize(ent) && !memcmp(e_char(ent), name, namelen);
++}
++
++static inline int e_cmp(const struct iam_leaf *leaf,
++ const struct lvar_leaf_entry *ent, lvar_hash_t hash)
++{
++ lvar_hash_t ehash;
++
++ ehash = e_hash(ent);
++ return ehash == hash ? 0 : (ehash < hash ? -1 : +1);
++}
++
++static struct lvar_leaf_header *n_head(const struct iam_leaf *l)
++{
++ return (struct lvar_leaf_header *)l->il_bh->b_data;
++}
++
++static int h_used(const struct lvar_leaf_header *hdr)
++{
++ return le16_to_cpu(hdr->vlh_used);
++}
++
++static void h_used_adj(const struct iam_leaf *leaf,
++ struct lvar_leaf_header *hdr, int adj)
++{
++ int used;
++
++ used = h_used(hdr) + adj;
++ assert(sizeof *hdr <= used && used <= blocksize(leaf));
++ hdr->vlh_used = cpu_to_le16(used);
++}
++
++static struct lvar_leaf_entry *n_start(const struct iam_leaf *leaf)
++{
++ return (void *)leaf->il_bh->b_data + sizeof(struct lvar_leaf_header);
++}
++
++static struct lvar_leaf_entry *n_end(const struct iam_leaf *l)
++{
++ return (void *)l->il_bh->b_data + h_used(n_head(l));
++}
++
++static struct lvar_leaf_entry *n_cur(const struct iam_leaf *l)
++{
++ return lentry_lvar(l->il_at);
++}
++
++static int n_at_rec(const struct iam_leaf *folio)
++{
++ return
++ n_start(folio) <= lentry_lvar(folio->il_at) &&
++ lentry_lvar(folio->il_at) < n_end(folio);
++}
++
++static struct iam_ikey *lvar_ikey(const struct iam_leaf *l,
++ struct iam_ikey *key)
++{
++ lvar_hash_t *hash;
++
++ assert(n_at_rec(l));
++
++ hash = (void *)key;
++ *hash = e_hash(n_cur(l));
++ BUG(); /* shouldn't be called currently */
++ return key;
++}
++
++static struct iam_key *lvar_key(const struct iam_leaf *l)
++{
++ return e_key(n_cur(l));
++}
++
++static int lvar_key_size(const struct iam_leaf *l)
++{
++ return e_keysize(n_cur(l));
++}
++
++static void lvar_start(struct iam_leaf *l)
++{
++ l->il_at = lvar_lentry(n_start(l));
++}
++
++static int lvar_init(struct iam_leaf *l)
++{
++ int result;
++ int used;
++ struct lvar_leaf_header *head;
++
++ assert(l->il_bh != NULL);
++
++ head = n_head(l);
++ used = h_used(head);
++ if (head->vlh_magic == le16_to_cpu(IAM_LVAR_LEAF_MAGIC) &&
++ used <= blocksize(l)) {
++ l->il_at = l->il_entries = lvar_lentry(n_start(l));
++ result = 0;
++ } else {
++ struct inode *obj;
++
++ obj = iam_leaf_container(l)->ic_object;
++ ext3_error(obj->i_sb, __FUNCTION__,
++ "Wrong magic in node %llu (#%lu): %#x != %#x or "
++ "wrong used: %i",
++ (unsigned long long)l->il_bh->b_blocknr, obj->i_ino,
++ head->vlh_magic, le16_to_cpu(IAM_LVAR_LEAF_MAGIC),
++ used);
++ result = -EIO;
++ BREAKPOINT;
++ }
++ return result;
++}
++
++static void lvar_fini(struct iam_leaf *l)
++{
++ l->il_entries = l->il_at = NULL;
++}
++
++struct iam_rec *lvar_rec(const struct iam_leaf *l)
++{
++ assert(n_at_rec(l));
++ return e_rec(n_cur(l));
++}
++
++static void lvar_next(struct iam_leaf *l)
++{
++ assert(n_at_rec(l));
++ l->il_at = lvar_lentry(e_next(l, n_cur(l)));
++}
++
++static int lvar_lookup(struct iam_leaf *leaf, const struct iam_key *k)
++{
++ struct lvar_leaf_entry *found;
++ struct lvar_leaf_entry *scan;
++ struct lvar_leaf_entry *end;
++ int result;
++ const char *name;
++ int namelen;
++ int found_equal;
++ lvar_hash_t hash;
++
++ end = n_end(leaf);
++
++ name = kchar(k);
++ namelen = strlen(name);
++ hash = get_hash(iam_leaf_container(leaf), name, namelen);
++ found = NULL;
++ found_equal = 0;
++
++ for (scan = n_start(leaf); scan < end; scan = e_next(leaf, scan)) {
++ lvar_hash_t scan_hash;
++
++ scan_hash = e_hash(scan);
++ if (scan_hash < hash)
++ found = scan;
++ else if (scan_hash == hash) {
++ if (e_eq(scan, name, namelen)) {
++ /*
++ * perfect match
++ */
++ leaf->il_at = lvar_lentry(scan);
++ return IAM_LOOKUP_EXACT;
++ } else if (!found_equal) {
++ found = scan;
++ found_equal = 1;
++ }
++ } else
++ break;
++ }
++ if (found == NULL) {
++ /*
++ * @k is less than all hashes in the leaf.
++ */
++ lvar_start(leaf);
++ result = IAM_LOOKUP_BEFORE;
++ } else {
++ leaf->il_at = lvar_lentry(found);
++ result = IAM_LOOKUP_OK;
++ assert(n_at_rec(leaf));
++ }
++ return result;
++}
++
++static void lvar_key_set(struct iam_leaf *l, const struct iam_key *k)
++{
++ assert(n_at_rec(l));
++ assert(strlen(kchar(k)) == e_keysize(n_cur(l)));
++ memcpy(e_key(n_cur(l)), k, e_keysize(n_cur(l)));
++}
++
++static int lvar_key_cmp(const struct iam_leaf *l, const struct iam_key *k)
++{
++ lvar_hash_t hash;
++ const char *name;
++
++ name = kchar(k);
++ hash = get_hash(iam_leaf_container(l), name, strlen(name));
++ return e_cmp(l, n_cur(l), hash);
++}
++
++static void lvar_rec_set(struct iam_leaf *l, const struct iam_rec *r)
++{
++ assert(n_at_rec(l));
++ iam_reccpy(iam_leaf_path(l), e_rec(n_cur(l)), r);
++}
++
++static int lvar_can_add(const struct iam_leaf *l,
++ const struct iam_key *k, const struct iam_rec *r)
++{
++ return h_used(n_head(l)) + getsize(l, strlen(kchar(k))) <= blocksize(l);
++}
++
++static int lvar_at_end(const struct iam_leaf *folio)
++{
++ return n_cur(folio) == n_end(folio);
++}
++
++static void lvar_rec_add(struct iam_leaf *leaf,
++ const struct iam_key *k, const struct iam_rec *r)
++{
++ const char *key;
++ int ksize;
++ int shift;
++ void *end;
++ void *start;
++ ptrdiff_t diff;
++
++ assert(lvar_can_add(leaf, k, r));
++
++ key = kchar(k);
++ ksize = strlen(key);
++ shift = getsize(leaf, ksize);
++
++ if (!lvar_at_end(leaf)) {
++ end = n_end(leaf);
++ if (lvar_key_cmp(leaf, k) <= 0)
++ lvar_next(leaf);
++ else
++ /*
++ * Another exceptional case: insertion with the key
++ * less than least key in the leaf.
++ */
++ assert(leaf->il_at == leaf->il_entries);
++
++ start = leaf->il_at;
++ diff = PDIFF(end, start);
++ assert(diff >= 0);
++ memmove(start + shift, start, diff);
++ }
++ h_used_adj(leaf, n_head(leaf), shift);
++ n_cur(leaf)->vle_keysize = cpu_to_le16(ksize);
++ n_cur(leaf)->vle_hash = cpu_to_le32(get_hash(iam_leaf_container(leaf),
++ key, ksize));
++ lvar_key_set(leaf, k);
++ lvar_rec_set(leaf, r);
++ assert(n_at_rec(leaf));
++}
++
++static void lvar_rec_del(struct iam_leaf *leaf, int shift)
++{
++ void *next;
++ void *end;
++ int nob;
++
++ assert(n_at_rec(leaf));
++
++ end = n_end(leaf);
++ next = e_next(leaf, n_cur(leaf));
++ nob = e_size(leaf, n_cur(leaf));
++ memmove(leaf->il_at, next, end - next);
++ h_used_adj(leaf, n_head(leaf), -nob);
++}
++
++static void lvar_init_new(struct iam_container *c, struct buffer_head *bh)
++{
++ struct lvar_leaf_header *hdr;
++
++ hdr = (struct lvar_leaf_header *)bh->b_data;
++ hdr->vlh_magic = cpu_to_le16(IAM_LVAR_LEAF_MAGIC);
++ hdr->vlh_used = sizeof *hdr;
++}
++
++static struct lvar_leaf_entry *find_pivot(const struct iam_leaf *leaf,
++ struct lvar_leaf_entry **prev)
++{
++ void *scan;
++ void *start;
++ int threshold;
++
++ *prev = NULL;
++ threshold = blocksize(leaf) / 2;
++ for (scan = start = n_start(leaf); scan - start <= threshold;
++ *prev = scan, scan = e_next(leaf, scan)) {
++ ;
++ }
++ return scan;
++}
++
++static void lvar_split(struct iam_leaf *leaf, struct buffer_head **bh,
++ iam_ptr_t new_blknr)
++{
++ struct lvar_leaf_entry *first_to_move;
++ struct lvar_leaf_entry *last_to_stay;
++ struct iam_path *path;
++ struct lvar_leaf_header *hdr;
++ struct buffer_head *new_leaf;
++
++ ptrdiff_t tomove;
++ lvar_hash_t hash;
++
++ new_leaf = *bh;
++ path = iam_leaf_path(leaf);
++
++ hdr = (void *)new_leaf->b_data;
++
++ first_to_move = find_pivot(leaf, &last_to_stay);
++ assert(last_to_stay != NULL);
++ assert(e_next(leaf, last_to_stay) == first_to_move);
++
++ hash = e_hash(first_to_move);
++ if (hash == e_hash(last_to_stay))
++ /*
++ * Duplicate hash.
++ */
++ hash |= 1;
++
++ tomove = PDIFF(n_end(leaf), first_to_move);
++ memmove(hdr + 1, first_to_move, tomove);
++
++ h_used_adj(leaf, hdr, tomove);
++ h_used_adj(leaf, n_head(leaf), -tomove);
++
++ assert(n_end(leaf) == first_to_move);
++
++ /*
++ * Insert pointer to the new node (together with the least key in
++ * the node) into index node.
++ */
++ iam_insert_key(path, path->ip_frame, (struct iam_ikey *)&hash,
++ new_blknr);
++ if (n_cur(leaf) >= first_to_move) {
++ /*
++ * insertion point moves into new leaf.
++ */
++ ptrdiff_t shift;
++ int result;
++
++ shift = PDIFF(leaf->il_at, first_to_move);
++ *bh = leaf->il_bh;
++ leaf->il_bh = new_leaf;
++ result = lvar_init(leaf);
++ /*
++ * init cannot fail, as node was just initialized.
++ */
++ assert(result == 0);
++ leaf->il_at = ((void *)leaf->il_at) + shift;
++ }
++}
++
++static struct iam_leaf_operations lvar_leaf_ops = {
++ .init = lvar_init,
++ .init_new = lvar_init_new,
++ .fini = lvar_fini,
++ .start = lvar_start,
++ .next = lvar_next,
++ .key = lvar_key,
++ .ikey = lvar_ikey,
++ .rec = lvar_rec,
++ .key_set = lvar_key_set,
++ .key_cmp = lvar_key_cmp,
++ .key_size = lvar_key_size,
++ .rec_set = lvar_rec_set,
++ .lookup = lvar_lookup,
++ .at_end = lvar_at_end,
++ .rec_add = lvar_rec_add,
++ .rec_del = lvar_rec_del,
++ .can_add = lvar_can_add,
++ .split = lvar_split
++};
++
++/*
++ * Index operations.
++ */
++
++enum {
++ /* This is duplicated in lustre/utils/create_iam.c */
++ /* egrep -i '^o?x?[olabcdef]*$' /usr/share/dict/words */
++ IAM_LVAR_ROOT_MAGIC = 0xb01dface
++};
++
++/* This is duplicated in lustre/utils/create_iam.c */
++struct lvar_root {
++ __le32 vr_magic;
++ __le16 vr_recsize;
++ __le16 vr_ptrsize;
++ u8 vr_indirect_levels;
++ u8 vr_padding0;
++ __le16 vr_padding1;
++};
++
++static __u32 lvar_root_ptr(struct iam_container *c)
++{
++ return 0;
++}
++
++static int lvar_node_init(struct iam_container *c, struct buffer_head *bh,
++ int root)
++{
++ return 0;
++}
++
++static struct iam_entry *lvar_root_inc(struct iam_container *c,
++ struct iam_path *path,
++ struct iam_frame *frame)
++{
++ struct lvar_root *root;
++ struct iam_entry *entries;
++
++ entries = frame->entries;
++
++ dx_set_count(entries, 2);
++ assert(dx_get_limit(entries) == dx_root_limit(path));
++
++ root = (void *)frame->bh->b_data;
++ assert(le64_to_cpu(root->vr_magic) == IAM_LVAR_ROOT_MAGIC);
++ root->vr_indirect_levels ++;
++ return iam_entry_shift(path, entries, 1);
++}
++
++static int lvar_node_check(struct iam_path *path, struct iam_frame *frame)
++{
++ unsigned count;
++ unsigned limit;
++ unsigned limit_correct;
++ struct iam_entry *entries;
++
++ entries = dx_node_get_entries(path, frame);
++
++ if (frame == path->ip_frames) {
++ struct lvar_root *root;
++
++ root = (void *)frame->bh->b_data;
++ if (le64_to_cpu(root->vr_magic) != IAM_LVAR_ROOT_MAGIC) {
++ BREAKPOINT;
++ return -EIO;
++ }
++ limit_correct = dx_root_limit(path);
++ } else
++ limit_correct = dx_node_limit(path);
++ count = dx_get_count(entries);
++ limit = dx_get_limit(entries);
++ if (count > limit) {
++ BREAKPOINT;
++ return -EIO;
++ }
++ if (limit != limit_correct) {
++ BREAKPOINT;
++ return -EIO;
++ }
++ return 0;
++}
++
++static int lvar_node_load(struct iam_path *path, struct iam_frame *frame)
++{
++ struct iam_entry *entries;
++ void *data;
++ entries = dx_node_get_entries(path, frame);
++
++ data = frame->bh->b_data;
++
++ if (frame == path->ip_frames) {
++ struct lvar_root *root;
++ const char *name;
++
++ root = data;
++ name = kchar(path->ip_key_target);
++ path->ip_indirect = root->vr_indirect_levels;
++ path->ip_ikey_target = iam_path_ikey(path, 4);
++ *(lvar_hash_t *)path->ip_ikey_target =
++ get_hash(path->ip_container, name, strlen(name));
++ }
++ frame->entries = frame->at = entries;
++ return 0;
++}
++
++static int lvar_ikeycmp(const struct iam_container *c,
++ const struct iam_ikey *k1, const struct iam_ikey *k2)
++{
++ lvar_hash_t p1 = le32_to_cpu(*(lvar_hash_t *)k1);
++ lvar_hash_t p2 = le32_to_cpu(*(lvar_hash_t *)k2);
++
++ return p1 > p2 ? +1 : (p1 < p2 ? -1 : 0);
++}
++
++static struct iam_path_descr *lvar_ipd_alloc(const struct iam_container *c)
++{
++ return iam_ipd_alloc(c->ic_descr->id_ikey_size);
++}
++
++static void lvar_ipd_free(const struct iam_container *c,
++ struct iam_path_descr *ipd)
++{
++ iam_ipd_free(ipd);
++}
++
++static struct iam_operations lvar_ops = {
++ .id_root_ptr = lvar_root_ptr,
++ .id_node_read = iam_node_read,
++ .id_node_init = lvar_node_init,
++ .id_node_check = lvar_node_check,
++ .id_node_load = lvar_node_load,
++ .id_ikeycmp = lvar_ikeycmp,
++ .id_root_inc = lvar_root_inc,
++ .id_ipd_alloc = lvar_ipd_alloc,
++ .id_ipd_free = lvar_ipd_free,
++ .id_name = "lvar"
++};
++
++static int lvar_guess(struct iam_container *c)
++{
++ int result;
++ struct buffer_head *bh;
++ const struct lvar_root *root;
++
++ assert(c->ic_object != NULL);
++
++ result = iam_node_read(c, lvar_root_ptr(c), NULL, &bh);
++ if (result == 0) {
++ root = (void *)bh->b_data;
++ if (le64_to_cpu(root->vr_magic) == IAM_LVAR_ROOT_MAGIC) {
++ struct iam_descr *descr;
++
++ descr = c->ic_descr;
++ descr->id_key_size = EXT3_NAME_LEN;
++ descr->id_ikey_size = sizeof (lvar_hash_t);
++ descr->id_rec_size = le16_to_cpu(root->vr_recsize);
++ descr->id_ptr_size = le16_to_cpu(root->vr_ptrsize);
++ descr->id_root_gap = sizeof *root;
++ descr->id_node_gap = 0;
++ descr->id_ops = &lvar_ops;
++ descr->id_leaf_ops = &lvar_leaf_ops;
++ } else
++ result = -EBADF;
++ }
++ return result;
++}
++
++static struct iam_format lvar_format = {
++ .if_guess = lvar_guess
++};
++
++void iam_lvar_format_init(void)
++{
++ iam_format_register(&lvar_format);
++}
++
Index: iam/fs/ext3/namei.c
===================================================================
--- iam.orig/fs/ext3/namei.c 2006-05-31 20:24:32.000000000 +0400
Index: iam/include/linux/lustre_iam.h
===================================================================
--- iam.orig/include/linux/lustre_iam.h 2006-05-31 20:24:32.000000000 +0400
-+++ iam/include/linux/lustre_iam.h 2006-06-29 18:50:12.000000000 +0400
++++ iam/include/linux/lustre_iam.h 2006-07-03 01:03:10.000000000 +0400
@@ -1,9 +1,68 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
Index: iam/fs/ext3/Makefile
===================================================================
---- iam.orig/fs/ext3/Makefile 2006-06-29 18:50:12.000000000 +0400
-+++ iam/fs/ext3/Makefile 2006-06-29 18:50:13.000000000 +0400
+--- iam.orig/fs/ext3/Makefile 2006-07-03 01:03:10.000000000 +0400
++++ iam/fs/ext3/Makefile 2006-07-03 01:03:11.000000000 +0400
@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
ioctl.o namei.o super.o symlink.o hash.o resize.o \
- extents.o mballoc.o iam.o iam_lfix.o
-+ extents.o mballoc.o iam.o iam_lfix.o iam_htree.o iam_uapi.o
++ extents.o mballoc.o iam.o iam_lfix.o iam_lvar.o iam_htree.o iam_uapi.o
ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
Index: iam/fs/ext3/dir.c
===================================================================
---- iam.orig/fs/ext3/dir.c 2006-06-29 18:50:12.000000000 +0400
-+++ iam/fs/ext3/dir.c 2006-06-29 18:50:13.000000000 +0400
+--- iam.orig/fs/ext3/dir.c 2006-07-03 01:03:10.000000000 +0400
++++ iam/fs/ext3/dir.c 2006-07-03 01:03:11.000000000 +0400
@@ -28,6 +28,7 @@
#include <linux/smp_lock.h>
#include <linux/slab.h>
(filp->f_version != inode->i_version)) {
Index: iam/fs/ext3/file.c
===================================================================
---- iam.orig/fs/ext3/file.c 2006-06-29 18:50:12.000000000 +0400
-+++ iam/fs/ext3/file.c 2006-06-29 18:50:13.000000000 +0400
+--- iam.orig/fs/ext3/file.c 2006-07-03 01:03:10.000000000 +0400
++++ iam/fs/ext3/file.c 2006-07-03 01:03:11.000000000 +0400
@@ -23,6 +23,7 @@
#include <linux/jbd.h>
#include <linux/ext3_fs.h>
Index: iam/fs/ext3/iam-uapi.c
===================================================================
--- iam.orig/fs/ext3/iam-uapi.c 2004-04-06 17:27:52.000000000 +0400
-+++ iam/fs/ext3/iam-uapi.c 2006-06-29 18:50:13.000000000 +0400
-@@ -0,0 +1,357 @@
++++ iam/fs/ext3/iam-uapi.c 2006-07-03 01:03:11.000000000 +0400
+@@ -0,0 +1,361 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+};
+
+enum {
-+ IAM_INSERT_CREDITS = 10
++ IAM_INSERT_CREDITS = 20
+};
+
+static struct iam_private_info *get_ipi(struct file *filp)
+ st = it->ii_state;
+ if (st == IAM_IT_ATTACHED || st == IAM_IT_SKEWED)
+ memcpy(itop->iui_op.iul_key, iam_it_key_get(it),
-+ ipi->ipi_bag.ic_descr->id_key_size);
++ iam_it_key_size(it));
+ if (st == IAM_IT_ATTACHED)
+ iam_reccpy(&it->ii_path,
+ itop->iui_op.iul_rec, iam_it_rec_get(it));
+ struct iam_uapi_it it;
+ enum outop_t opt;
+
-+ if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
++ if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) {
+ result = -EACCES;
-+ else if (cmd == IAM_IOC_INIT) {
++ } else if (cmd == IAM_IOC_POLYMORPH) {
++ inode->i_mode = (umode_t)arg;
++ mark_inode_dirty(inode);
++ result = 0;
++ } else if (cmd == IAM_IOC_INIT) {
+ if (filp->private_data == NULL) {
+ result = getua(&ua, arg);
+ if (result == 0)
+}
Index: iam/fs/ext3/ioctl.c
===================================================================
---- iam.orig/fs/ext3/ioctl.c 2006-06-29 18:50:12.000000000 +0400
-+++ iam/fs/ext3/ioctl.c 2006-06-29 18:50:13.000000000 +0400
+--- iam.orig/fs/ext3/ioctl.c 2006-07-03 01:03:10.000000000 +0400
++++ iam/fs/ext3/ioctl.c 2006-07-03 01:03:11.000000000 +0400
@@ -250,6 +250,6 @@ flags_err:
}
Index: iam/include/linux/lustre_iam.h
===================================================================
---- iam.orig/include/linux/lustre_iam.h 2006-06-29 18:50:12.000000000 +0400
-+++ iam/include/linux/lustre_iam.h 2006-06-29 18:50:13.000000000 +0400
+--- iam.orig/include/linux/lustre_iam.h 2006-07-03 01:03:10.000000000 +0400
++++ iam/include/linux/lustre_iam.h 2006-07-03 01:03:11.000000000 +0400
@@ -30,9 +30,6 @@
#ifndef __LINUX_LUSTRE_IAM_H__
#define __LINUX_LUSTRE_IAM_H__
typedef __u64 iam_ptr_t;
/*
-@@ -123,6 +134,27 @@ struct iam_leaf {
+@@ -123,6 +134,31 @@ struct iam_leaf {
void *il_descr_data;
};
+ */
+enum iam_lookup_t {
+ /*
++ * lookup found a record with the key requested
++ */
++ IAM_LOOKUP_EXACT,
++ /*
+ * lookup positioned leaf on some record
+ */
+ IAM_LOOKUP_OK,
struct iam_operations {
/*
* Returns pointer (in the same sense as pointer in index entry) to
-@@ -131,11 +163,15 @@ struct iam_operations {
+@@ -131,11 +167,15 @@ struct iam_operations {
__u32 (*id_root_ptr)(struct iam_container *c);
/*
* Initialize new node (stored in @bh) that is going to be added into
* tree.
*/
-@@ -144,23 +180,33 @@ struct iam_operations {
+@@ -144,23 +184,33 @@ struct iam_operations {
int (*id_node_read)(struct iam_container *c, iam_ptr_t ptr,
handle_t *h, struct buffer_head **bh);
/*
struct iam_leaf_operations {
/*
* leaf operations.
-@@ -186,7 +232,8 @@ struct iam_leaf_operations {
+@@ -186,7 +236,8 @@ struct iam_leaf_operations {
void (*start)(struct iam_leaf *l);
/* more leaf to the next entry. */
void (*next)(struct iam_leaf *l);
* either pointer to the key stored in node, or copy key into
* @k buffer supplied by caller and return pointer to this
* buffer. The latter approach is used when keys in nodes are
-@@ -194,8 +241,10 @@ struct iam_leaf_operations {
+@@ -194,8 +245,10 @@ struct iam_leaf_operations {
* all).
*
* Caller should assume that returned pointer is only valid
/* return pointer to entry body. Pointer is valid while
corresponding leaf node is locked and pinned. */
struct iam_rec *(*rec)(const struct iam_leaf *l);
-@@ -203,6 +252,8 @@ struct iam_leaf_operations {
+@@ -203,6 +256,9 @@ struct iam_leaf_operations {
void (*key_set)(struct iam_leaf *l, const struct iam_key *k);
void (*rec_set)(struct iam_leaf *l, const struct iam_rec *r);
+ int (*key_cmp)(const struct iam_leaf *l, const struct iam_key *k);
+
++ int (*key_size)(const struct iam_leaf *l);
/*
* Search leaf @l for a record with key @k or for a place
* where such record is to be inserted.
-@@ -221,12 +272,13 @@ struct iam_leaf_operations {
+@@ -221,12 +277,13 @@ struct iam_leaf_operations {
/*
* remove rec for a leaf
*/
};
struct iam_path *iam_leaf_path(const struct iam_leaf *leaf);
-@@ -241,6 +293,10 @@ struct iam_descr {
+@@ -241,6 +298,10 @@ struct iam_descr {
*/
size_t id_key_size;
/*
* Size of a pointer to the next level (stored in index nodes), in
* bytes.
*/
-@@ -264,6 +320,9 @@ struct iam_descr {
+@@ -264,6 +325,9 @@ struct iam_descr {
struct iam_leaf_operations *id_leaf_ops;
};
struct iam_container {
/*
* Underlying flat file. IO against this object is issued to
-@@ -284,7 +343,7 @@ struct iam_path_descr {
+@@ -284,7 +348,7 @@ struct iam_path_descr {
/*
* Scratch-pad area for temporary keys.
*/
};
/*
-@@ -316,6 +375,7 @@ struct iam_path {
+@@ -316,6 +380,7 @@ struct iam_path {
* Key searched for.
*/
const struct iam_key *ip_key_target;
/*
* Description-specific data.
*/
-@@ -334,6 +394,7 @@ struct iam_path_compat {
+@@ -334,6 +399,7 @@ struct iam_path_compat {
struct dx_hash_info *ipc_hinfo;
struct dentry *ipc_dentry;
struct iam_path_descr ipc_descr;
};
/*
-@@ -347,7 +408,9 @@ enum iam_it_state {
+@@ -347,7 +413,9 @@ enum iam_it_state {
/* initial state */
IAM_IT_DETACHED,
/* iterator is above particular record in the container */
};
/*
-@@ -355,7 +418,7 @@ enum iam_it_state {
+@@ -355,7 +423,7 @@ enum iam_it_state {
*/
enum iam_it_flags {
/*
*/
IAM_IT_MOVE = (1 << 0),
/*
-@@ -372,15 +435,26 @@ enum iam_it_flags {
+@@ -372,15 +440,26 @@ enum iam_it_flags {
* doesn't point to any particular record in this container.
*
* After successful call to iam_it_get() and until corresponding call to
*
*/
struct iam_iterator {
-@@ -390,7 +464,8 @@ struct iam_iterator {
+@@ -390,7 +469,8 @@ struct iam_iterator {
__u32 ii_flags;
enum iam_it_state ii_state;
/*
*/
struct iam_path ii_path;
};
-@@ -405,133 +480,24 @@ void iam_path_compat_fini(struct iam_pat
+@@ -405,133 +485,25 @@ void iam_path_compat_fini(struct iam_pat
struct iam_path_descr *iam_ipd_alloc(int keysize);
void iam_ipd_free(struct iam_path_descr *ipd);
- * !memcmp(iam_it_rec_get(it), r, ...))
- */
+struct iam_key *iam_it_key_get(const struct iam_iterator *it);
++int iam_it_key_size(const struct iam_iterator *it);
int iam_it_rec_insert(handle_t *h, struct iam_iterator *it,
const struct iam_key *k, const struct iam_rec *r);
-/*
int iam_it_load(struct iam_iterator *it, iam_pos_t pos);
int iam_lookup(struct iam_container *c, const struct iam_key *k,
-@@ -577,16 +543,65 @@ static inline struct inode *iam_path_obj
+@@ -577,16 +549,65 @@ static inline struct inode *iam_path_obj
return p->ip_container->ic_object;
}
+}
+
+static inline size_t iam_entry_size(struct iam_path *p)
- {
-- memcpy(k1, k2, c->ic_descr->id_key_size);
++{
+ return iam_path_descr(p)->id_ikey_size + iam_path_descr(p)->id_ptr_size;
- }
-
--static inline int iam_keycmp(const struct iam_container *c,
-- const struct iam_key *k1, const struct iam_key *k2)
++}
++
+static inline struct iam_entry *iam_entry_shift(struct iam_path *p,
+ struct iam_entry *entry,
+ int shift)
- {
-- return c->ic_descr->id_ops->id_keycmp(c, k1, k2);
++{
+ void *e = entry;
+ return e + shift * iam_entry_size(p);
+}
+ */
+static inline void iam_ikeycpy0(const struct iam_container *c,
+ struct iam_ikey *k1, const struct iam_ikey *k2)
-+{
+ {
+- memcpy(k1, k2, c->ic_descr->id_key_size);
+ if (k1 != k2)
+ iam_ikeycpy(c, k1, k2);
-+}
-+
+ }
+
+-static inline int iam_keycmp(const struct iam_container *c,
+- const struct iam_key *k1, const struct iam_key *k2)
+static inline int iam_ikeycmp(const struct iam_container *c,
+ const struct iam_ikey *k1,
+ const struct iam_ikey *k2)
-+{
+ {
+- return c->ic_descr->id_ops->id_keycmp(c, k1, k2);
+ return c->ic_descr->id_ops->id_ikeycmp(c, k1, k2);
}
static inline void iam_reccpy(const struct iam_path *p, struct iam_rec *rec_dst,
-@@ -604,7 +619,7 @@ static inline void *iam_entry_off(struct
+@@ -604,7 +625,7 @@ static inline void *iam_entry_off(struct
static inline unsigned dx_get_block(struct iam_path *p, struct iam_entry *entry)
{
return le32_to_cpu(*(u32*)iam_entry_off(entry,
& 0x00ffffff;
}
-@@ -612,21 +627,64 @@ static inline void dx_set_block(struct i
+@@ -612,21 +633,64 @@ static inline void dx_set_block(struct i
struct iam_entry *entry, unsigned value)
{
*(u32*)iam_entry_off(entry,
static inline unsigned dx_get_count(struct iam_entry *entries)
{
return le16_to_cpu(((struct dx_countlimit *) entries)->count);
-@@ -647,9 +705,18 @@ static inline unsigned dx_node_limit(str
+@@ -647,9 +711,18 @@ static inline unsigned dx_node_limit(str
struct iam_descr *param = iam_path_descr(p);
unsigned entry_space = iam_path_obj(p)->i_sb->s_blocksize -
param->id_node_gap;
static inline struct iam_entry *dx_get_entries(struct iam_path *path,
void *data, int root)
{
-@@ -665,7 +732,8 @@ static inline struct iam_entry *dx_node_
+@@ -665,7 +738,8 @@ static inline struct iam_entry *dx_node_
frame->bh->b_data, frame == path->ip_frames);
}
{
assert(0 <= nr && nr < ARRAY_SIZE(path->ip_data->ipd_key_scratch));
return path->ip_data->ipd_key_scratch[nr];
-@@ -674,6 +742,7 @@ static inline struct iam_key *iam_path_k
+@@ -674,6 +748,7 @@ static inline struct iam_key *iam_path_k
int dx_lookup(struct iam_path *path);
void dx_insert_block(struct iam_path *path, struct iam_frame *frame,
u32 hash, u32 block);
int ext3_htree_next_block(struct inode *dir, __u32 hash,
struct iam_path *path, __u32 *start_hash);
-@@ -681,6 +750,21 @@ int ext3_htree_next_block(struct inode *
+@@ -681,6 +756,21 @@ int ext3_htree_next_block(struct inode *
struct buffer_head *ext3_append(handle_t *handle, struct inode *inode,
u32 *block, int *err);
int split_index_node(handle_t *handle, struct iam_path *path);
/*
* external
-@@ -698,10 +782,12 @@ int iam_node_read(struct iam_container *
+@@ -698,10 +788,12 @@ int iam_node_read(struct iam_container *
handle_t *handle, struct buffer_head **bh);
void iam_insert_key(struct iam_path *path, struct iam_frame *frame,
struct iam_path *iam_leaf_path(const struct iam_leaf *leaf);
struct iam_container *iam_leaf_container(const struct iam_leaf *leaf);
-@@ -709,14 +795,76 @@ struct iam_descr *iam_leaf_descr(const s
+@@ -709,14 +801,79 @@ struct iam_descr *iam_leaf_descr(const s
struct iam_leaf_operations *iam_leaf_ops(const struct iam_leaf *leaf);
void iam_format_register(struct iam_format *fmt);
void iam_lfix_format_init(void);
++void iam_lvar_format_init(void);
+void iam_htree_format_init(void);
+
+struct iam_private_info;
+ IAM_IOC_DELETE = _IOR('i', 5, struct iam_uapi_op),
+ IAM_IOC_IT_START = _IOR('i', 6, struct iam_uapi_it),
+ IAM_IOC_IT_NEXT = _IOW('i', 7, struct iam_uapi_it),
-+ IAM_IOC_IT_STOP = _IOR('i', 8, struct iam_uapi_it)
++ IAM_IOC_IT_STOP = _IOR('i', 8, struct iam_uapi_it),
++
++ IAM_IOC_POLYMORPH = _IOR('i', 9, unsigned long)
+};
/* __LINUX_LUSTRE_IAM_H__ */
linux_headers := $(wildcard @LINUX@/include/linux/ext3*.h)
ext3_sources := $(filter-out %.mod.c,$(wildcard @LINUX@/fs/ext3/*.c))
-new_sources := iopen.c iopen.h extents.c mballoc.c iam.c iam_lfix.c iam_htree.c iam-uapi.c
+new_sources := iopen.c iopen.h extents.c mballoc.c \
+ iam.c iam_lfix.c iam_lvar.c iam_htree.c iam-uapi.c
new_headers := ext3_extents.h
ldiskfs_patched_sources := $(notdir $(ext3_sources) $(ext3_headers)) $(new_sources) $(new_headers)
ldiskfs_sources := $(ldiskfs_patched_sources)
CERROR("MD size %d larger than maximum possible %u\n",
rc, MAX_MD_SIZE);
} else {
- req_capsule_set_size(pill, &RMF_MDT_MD,
+ req_capsule_set_size(pill, &RMF_MDT_MD,
RCL_SERVER, rc);
}
} else if (S_ISLNK(la->la_mode) && (body->valid & OBD_MD_LINKNAME)) {
}
req_capsule_set_size(pill, &RMF_EADATA, RCL_SERVER, 0);
} else
- req_capsule_set_size(pill, &RMF_EADATA,
+ req_capsule_set_size(pill, &RMF_EADATA,
RCL_SERVER, rc);
}
#endif
/*step 3: find the child object by fid & lock it*/
lhc->mlh_mode = LCK_CR;
- child = mdt_object_find_lock(info->mti_ctxt, info->mti_mdt,
+ child = mdt_object_find_lock(info->mti_ctxt, info->mti_mdt,
&child_fid, lhc, child_bits);
if (IS_ERR(child))
GOTO(out_parent, result = PTR_ERR(child));
/* If we're DISCONNECTing, the mdt_export_data is already freed */
if (result == 0 && h->mh_opc != MDS_DISCONNECT) {
#ifdef MDT_CODE
- /* FIXME: fake untill journal callback & open handling is OK.*/
+ /* FIXME: fake untill journal callback & open handling is OK.*/
__u64 last_transno;
__u64 last_committed;
struct mdt_device *mdt = info->mti_mdt;
last_transno = ++ (mdt->mdt_last_transno);
last_committed = ++ (mdt->mdt_last_committed);
spin_unlock(&mdt->mdt_transno_lock);
-
+
req->rq_repmsg->transno = req->rq_transno = last_transno;
req->rq_repmsg->last_xid = req->rq_xid;
req->rq_repmsg->last_committed = last_committed;
}
rc = mdt_getattr_name_lock(info, &lhc, child_bits);
- ldlm_rep = req_capsule_server_get(&info->mti_pill,
+ /*
+ * XXX nikita: if rc != 0, reply message is not necessary packed.
+ */
+ ldlm_rep = req_capsule_server_get(&info->mti_pill,
&RMF_DLM_REP);
if (rc)
intent_set_disposition(ldlm_rep, DISP_LOOKUP_NEG);
new_lock = ldlm_handle2lock(&lhc.mlh_lh);
if (new_lock == NULL && (flags & LDLM_FL_INTENT_ONLY))
RETURN(0);
-
+
LASSERTF(new_lock != NULL, "op %d lockh "LPX64"\n",
opcode, lhc.mlh_lh.cookie);
}
static int mdt_seq_init(const struct lu_context *ctx,
- const char *uuid,
+ const char *uuid,
struct mdt_device *m)
{
struct lu_site *ls;
OBD_ALLOC_PTR(ls->ls_ctlr_seq);
if (ls->ls_ctlr_seq != NULL) {
- rc = seq_server_init(ls->ls_ctlr_seq,
+ rc = seq_server_init(ls->ls_ctlr_seq,
m->mdt_bottom, uuid,
LUSTRE_SEQ_CTLR,
ctx);
OBD_ALLOC_PTR(ls->ls_server_seq);
if (ls->ls_server_seq != NULL) {
- rc = seq_server_init(ls->ls_server_seq,
+ rc = seq_server_init(ls->ls_server_seq,
m->mdt_bottom, uuid,
LUSTRE_SEQ_SRV,
ctx);
if (ls && ls->ls_server_seq)
seq_server_fini_ctlr(ls->ls_server_seq);
-
+
if (ls && ls->ls_client_seq) {
seq_client_fini(ls->ls_client_seq);
OBD_FREE_PTR(ls->ls_client_seq);
ls->ls_client_seq = NULL;
}
-
+
if (ls && ls->ls_ctlr_exp) {
int rc = obd_disconnect(ls->ls_ctlr_exp);
ls->ls_ctlr_exp = NULL;
-
+
if (rc) {
CERROR("failure to disconnect "
"obd: %d\n", rc);
* FLD wrappers
*/
static int mdt_fld_init(const struct lu_context *ctx,
- const char *uuid,
+ const char *uuid,
struct mdt_device *m)
{
struct lu_site *ls;
rc = next->ld_ops->ldo_notify(&ctxt, next, watched, ev, data);
lu_context_exit(&ctxt);
out:
- lu_context_fini(&ctxt);
- RETURN(rc);
+ lu_context_fini(&ctxt);
+ RETURN(rc);
}
static struct obd_ops mdt_obd_device_ops = {
{
fprintf(out, "%s v"LUSTRE_VERSION_STRING"\n", progname);
fprintf(out, "usage: %s <target types> [options] <device>\n", progname);
- fprintf(out,
+ fprintf(out,
"\t<device>:block device or file (e.g /dev/sda or /tmp/ost1)\n"
"\ttarget types:\n"
"\t\t--ost: object storage, mutually exclusive with mdt\n"
/*================ utility functions =====================*/
-inline unsigned int
+inline unsigned int
dev_major (unsigned long long int __dev)
{
return ((__dev >> 8) & 0xfff) | ((unsigned int) (__dev >> 32) & ~0xfff);
char release[4] = "";
fd = open("/proc/sys/kernel/osrelease", O_RDONLY);
- if (fd < 0)
+ if (fd < 0)
fprintf(stderr, "%s: Warning: Can't resolve kernel "
"version, assuming 2.6\n", progname);
else {
read(fd, release, 4);
close(fd);
}
- if (strncmp(release, "2.4.", 4) == 0)
+ if (strncmp(release, "2.4.", 4) == 0)
version = 24;
- else
+ else
version = 26;
}
return version;
{
char log[] = "/tmp/mkfs_logXXXXXX";
int fd, rc;
-
+
if (verbose > 1)
printf("cmd: %s\n", cmd);
-
+
if ((fd = mkstemp(log)) >= 0) {
close(fd);
strcat(cmd, " >");
fp = fopen(log, "r");
if (fp) {
while (fgets(buf, sizeof(buf), fp) != NULL) {
- if (rc || verbose > 2)
+ if (rc || verbose > 2)
printf(" %s", buf);
}
fclose(fp);
}
}
- if (fd >= 0)
+ if (fd >= 0)
remove(log);
return rc;
-}
+}
static int check_mtab_entry(char *spec, char *type)
{
for (i = 0; i < MAX_LOOP_DEVICES; i++) {
char cmd[128];
sprintf(l_device, "%s%d", loop_base, i);
- if (access(l_device, F_OK | R_OK))
+ if (access(l_device, F_OK | R_OK))
break;
sprintf(cmd, "losetup %s > /dev/null 2>&1", l_device);
ret = system(cmd);
return ret;
}
}
-
+
fprintf(stderr, "%s: out of loop devices!\n", progname);
return EMFILE;
-}
+}
int loop_cleanup(struct mkfs_opts *mop)
{
int ret = 0;
ret = access(devname, F_OK);
- if (ret != 0)
+ if (ret != 0)
return 0;
ret = stat(devname, &st);
if (ret != 0) {
return S_ISBLK(st.st_mode);
}
-__u64 get_device_size(char* device)
+__u64 get_device_size(char* device)
{
int ret, fd;
__u64 size = 0;
fd = open(device, O_RDONLY);
if (fd < 0) {
- fprintf(stderr, "%s: cannot open %s: %s\n",
+ fprintf(stderr, "%s: cannot open %s: %s\n",
progname, device, strerror(errno));
return 0;
}
ret = ioctl(fd, BLKGETSIZE64, (void*)&size);
close(fd);
if (ret < 0) {
- fprintf(stderr, "%s: size ioctl failed: %s\n",
+ fprintf(stderr, "%s: size ioctl failed: %s\n",
progname, strerror(errno));
return 0;
}
-
+
vprint("device size = "LPU64"MB\n", size >> 20);
/* return value in KB */
return size >> 10;
int loop_format(struct mkfs_opts *mop)
{
int ret = 0;
-
+
if (mop->mo_device_sz == 0) {
fatal();
fprintf(stderr, "loop device requires a --device-size= "
ret = truncate(mop->mo_device, mop->mo_device_sz * 1024);
if (ret != 0) {
ret = errno;
- fprintf(stderr, "%s: Unable to create backing store: %d\n",
+ fprintf(stderr, "%s: Unable to create backing store: %d\n",
progname, ret);
}
/* Filesystem has unsupported feature */
vprint("%.*s", i, debugfs_cmd);
/* in all likelihood, the "unsupported feature" is
- 'extents', which older debugfs does not understand.
- Use e2fsprogs-1.38-cfs1 or later, available from
+ 'extents', which older debugfs does not understand.
+ Use e2fsprogs-1.38-cfs1 or later, available from
ftp://ftp.lustre.org/pub/lustre/other/e2fsprogs/ */
return -1;
}
{
int rc;
vprint("checking for existing Lustre data\n");
-
+
if ((rc = file_in_dev(MOUNT_DATA_FILE, mop->mo_device))
- || (rc = file_in_dev(LAST_RCVD, mop->mo_device))) {
+ || (rc = file_in_dev(LAST_RCVD, mop->mo_device))) {
vprint("found Lustre data\n");
/* in the -1 case, 'extents' means this really IS a lustre
target */
- return rc;
+ return rc;
}
return 0; /* The device is not a lustre target. */
return EINVAL;
}
block_count = mop->mo_device_sz / (L_BLOCK_SIZE >> 10);
- }
-
+ }
+
if ((mop->mo_ldd.ldd_mount_type == LDD_MT_EXT3) ||
- (mop->mo_ldd.ldd_mount_type == LDD_MT_LDISKFS)) {
+ (mop->mo_ldd.ldd_mount_type == LDD_MT_LDISKFS)) {
__u64 device_sz = mop->mo_device_sz;
/* we really need the size */
if (device_sz == 0) {
device_sz = get_device_size(mop->mo_device);
- if (device_sz == 0)
+ if (device_sz == 0)
return ENODEV;
}
/* Default bytes_per_inode is block size */
if (strstr(mop->mo_mkfsopts, "-i") == NULL) {
long bytes_per_inode = 0;
-
- if (IS_MDT(&mop->mo_ldd))
+
+ if (IS_MDT(&mop->mo_ldd))
bytes_per_inode = 4096;
/* Allocate fewer inodes on large OST devices. Most
- filesystems can be much more aggressive than even
+ filesystems can be much more aggressive than even
this. */
- if ((IS_OST(&mop->mo_ldd) && (device_sz > 1000000)))
+ if ((IS_OST(&mop->mo_ldd) && (device_sz > 1000000)))
bytes_per_inode = 16384;
-
+
if (bytes_per_inode > 0) {
sprintf(buf, " -i %ld", bytes_per_inode);
strcat(mop->mo_mkfsopts, buf);
}
}
-
+
/* This is an undocumented mke2fs option. Default is 128. */
if (strstr(mop->mo_mkfsopts, "-I") == NULL) {
long inode_size = 0;
inode_size = 2048;
else if (mop->mo_stripe_count > 13)
inode_size = 1024;
- else
+ else
inode_size = 512;
} else if (IS_OST(&mop->mo_ldd)) {
- /* now as we store fids in EA on OST we need
+ /* now as we store fids in EA on OST we need
to make inode bigger */
inode_size = 256;
}
sprintf(buf, " -I %ld", inode_size);
strcat(mop->mo_mkfsopts, buf);
}
-
+
}
if (verbose < 2) {
strcat(mop->mo_mkfsopts, " -O dir_index");
}
- /* Allow reformat of full devices (as opposed to
+ /* Allow reformat of full devices (as opposed to
partitions.) We already checked for mounted dev. */
strcat(mop->mo_mkfsopts, " -F");
} else if (mop->mo_ldd.ldd_mount_type == LDD_MT_REISERFS) {
long journal_sz = 0; /* FIXME default journal size */
- if (journal_sz > 0) {
+ if (journal_sz > 0) {
sprintf(buf, " --journal_size %ld", journal_sz);
strcat(mop->mo_mkfsopts, buf);
}
} else {
fprintf(stderr,"%s: unsupported fs type: %d (%s)\n",
- progname, mop->mo_ldd.ldd_mount_type,
+ progname, mop->mo_ldd.ldd_mount_type,
MT_STR(&mop->mo_ldd));
return EINVAL;
}
/* For loop device format the dev, not the filename */
dev = mop->mo_device;
- if (mop->mo_flags & MO_IS_LOOP)
+ if (mop->mo_flags & MO_IS_LOOP)
dev = mop->mo_loopdev;
-
+
vprint("formatting backing filesystem %s on %s\n",
MT_STR(&mop->mo_ldd), dev);
vprint("\ttarget name %s\n", mop->mo_ldd.ldd_svname);
{
printf("\n %s:\n", str);
printf("Target: %s\n", ldd->ldd_svname);
- if (ldd->ldd_svindex == INDEX_UNASSIGNED)
+ if (ldd->ldd_svindex == INDEX_UNASSIGNED)
printf("Index: unassigned\n");
else
printf("Index: %d\n", ldd->ldd_svindex);
printf("Mount type: %s\n", MT_STR(ldd));
printf("Flags: %#x\n", ldd->ldd_flags);
printf(" (%s%s%s%s%s%s%s%s)\n",
- IS_MDT(ldd) ? "MDT ":"",
+ IS_MDT(ldd) ? "MDT ":"",
IS_OST(ldd) ? "OST ":"",
IS_MGS(ldd) ? "MGS ":"",
ldd->ldd_flags & LDD_F_NEED_INDEX ? "needs_index ":"",
}
dev = mop->mo_device;
- if (mop->mo_flags & MO_IS_LOOP)
+ if (mop->mo_flags & MO_IS_LOOP)
dev = mop->mo_loopdev;
-
+
ret = mount(dev, mntpt, MT_STR(&mop->mo_ldd), 0, NULL);
if (ret) {
- fprintf(stderr, "%s: Unable to mount %s: %s\n",
+ fprintf(stderr, "%s: Unable to mount %s: %s\n",
progname, dev, strerror(errno));
if (errno == ENODEV) {
- fprintf(stderr, "Is the %s module available?\n",
+ fprintf(stderr, "Is the %s module available?\n",
MT_STR(&mop->mo_ldd));
}
goto out_rmdir;
sprintf(filepnm, "%s/%s", mntpt, MOUNT_CONFIGS_DIR);
ret = mkdir(filepnm, 0777);
if ((ret != 0) && (errno != EEXIST)) {
- fprintf(stderr, "%s: Can't make configs dir %s (%d)\n",
- progname, filepnm, ret);
- goto out_umnt;
- } else if (errno == EEXIST) {
- ret = 0;
- }
-
- sprintf(filepnm, "%s/%s", mntpt, "oi");
- ret = mkdir(filepnm, 0777);
- if ((ret != 0) && (errno != EEXIST)) {
- fprintf(stderr, "%s: Can't make oi dir %s (%d)\n",
+ fprintf(stderr, "%s: Can't make configs dir %s (%d)\n",
progname, filepnm, ret);
goto out_umnt;
} else if (errno == EEXIST) {