-/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
- * vim:expandtab:shiftwidth=8:tabstop=8:
- *
+/*
* GPL HEADER START
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright (c) 2011, 2014, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* Format of a leaf node is not specified. Generic iam code accesses leaf
* nodes through ->id_leaf methods in struct iam_descr.
*
+ * The IAM root block is a special node, which contains the IAM descriptor.
+ * It is on disk format:
+ *
+ * +---------+-------+--------+---------+-------+------+-------+------------+
+ * |IAM desc | count | idle | | | | | |
+ * |(fix/var)| / | blocks | padding | entry | .... | entry | free space |
+ * | | limit | | | | | | |
+ * +---------+-------+--------+---------+-------+------+-------+------------+
+ *
+ * The padding length is calculated with the parameters in the IAM descriptor.
+ *
+ * The field "idle_blocks" is used to record empty leaf nodes, which have not
+ * been released but all contained entries in them have been removed. Usually,
+ * the idle blocks in the IAM should be reused when need to allocate new leaf
+ * nodes for new entries, it depends on the IAM hash functions to map the new
+ * entries to these idle blocks. Unfortunately, it is not easy to design some
+ * hash functions for such clever mapping, especially considering the insert/
+ * lookup performance.
+ *
+ * So the IAM recycles the empty leaf nodes, and put them into a per-file based
+ * idle blocks pool. If need some new leaf node, it will try to take idle block
+ * from such pool with priority, in spite of how the IAM hash functions to map
+ * the entry.
+ *
+ * The idle blocks pool is organized as a series of tables, and each table
+ * can be described as following (on-disk format):
+ *
+ * +---------+---------+---------+---------+------+---------+-------+
+ * | magic | count | next | logic | | logic | free |
+ * |(16 bits)|(16 bits)| table | blk # | .... | blk # | space |
+ * | | |(32 bits)|(32 bits)| |(32 bits)| |
+ * +---------+---------+---------+---------+------+---------+-------+
+ *
+ * The logic blk# for the first table is stored in the root node "idle_blocks".
+ *
*/
#include <linux/module.h>
#include <linux/string.h>
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
-#include <linux/smp_lock.h>
+
+#include <ldiskfs/ldiskfs.h>
+#include <ldiskfs/xattr.h>
+#undef ENTRY
+
#include "osd_internal.h"
-#include "xattr.h"
-#include "iopen.h"
-#include "acl.h"
+#include <ldiskfs/acl.h>
/*
* List of all registered formats.
*
* No locking. Callers synchronize.
*/
-static CFS_LIST_HEAD(iam_formats);
+static struct list_head iam_formats = LIST_HEAD_INIT(iam_formats);
void iam_format_register(struct iam_format *fmt)
{
- cfs_list_add(&fmt->if_linkage, &iam_formats);
+ list_add(&fmt->if_linkage, &iam_formats);
+}
+
+static struct buffer_head *
+iam_load_idle_blocks(struct iam_container *c, iam_ptr_t blk)
+{
+ struct inode *inode = c->ic_object;
+ struct iam_idle_head *head;
+ struct buffer_head *bh;
+ int err;
+
+ LASSERT(mutex_is_locked(&c->ic_idle_mutex));
+
+ if (blk == 0)
+ return NULL;
+
+ bh = ldiskfs_bread(NULL, inode, blk, 0, &err);
+ if (bh == NULL) {
+ CERROR("%.16s: cannot load idle blocks, blk = %u, err = %d\n",
+ LDISKFS_SB(inode->i_sb)->s_es->s_volume_name, blk, err);
+ c->ic_idle_failed = 1;
+ return ERR_PTR(err);
+ }
+
+ head = (struct iam_idle_head *)(bh->b_data);
+ if (le16_to_cpu(head->iih_magic) != IAM_IDLE_HEADER_MAGIC) {
+ CERROR("%.16s: invalid idle block head, blk = %u, magic = %d\n",
+ LDISKFS_SB(inode->i_sb)->s_es->s_volume_name, blk,
+ le16_to_cpu(head->iih_magic));
+ brelse(bh);
+ c->ic_idle_failed = 1;
+ return ERR_PTR(-EBADF);
+ }
+
+ return bh;
}
-EXPORT_SYMBOL(iam_format_register);
/*
* Determine format of given container. This is done by scanning list of
}
result = -ENOENT;
- cfs_list_for_each_entry(fmt, &iam_formats, if_linkage) {
+ list_for_each_entry(fmt, &iam_formats, if_linkage) {
result = fmt->if_guess(c);
if (result == 0)
break;
}
- return result;
+
+ if (result == 0) {
+ struct buffer_head *bh;
+ __u32 *idle_blocks;
+
+ LASSERT(c->ic_root_bh != NULL);
+
+ idle_blocks = (__u32 *)(c->ic_root_bh->b_data +
+ c->ic_descr->id_root_gap +
+ sizeof(struct dx_countlimit));
+ mutex_lock(&c->ic_idle_mutex);
+ bh = iam_load_idle_blocks(c, le32_to_cpu(*idle_blocks));
+ if (bh != NULL && IS_ERR(bh))
+ result = PTR_ERR(bh);
+ else
+ c->ic_idle_bh = bh;
+ mutex_unlock(&c->ic_idle_mutex);
+ }
+
+ return result;
}
/*
* Initialize container @c.
*/
int iam_container_init(struct iam_container *c,
- struct iam_descr *descr, struct inode *inode)
+ struct iam_descr *descr, struct inode *inode)
{
- memset(c, 0, sizeof *c);
- c->ic_descr = descr;
- c->ic_object = inode;
- cfs_init_rwsem(&c->ic_sem);
- return 0;
+ memset(c, 0, sizeof *c);
+ c->ic_descr = descr;
+ c->ic_object = inode;
+ init_rwsem(&c->ic_sem);
+ dynlock_init(&c->ic_tree_lock);
+ mutex_init(&c->ic_idle_mutex);
+ return 0;
}
-EXPORT_SYMBOL(iam_container_init);
/*
* Determine container format.
{
return iam_format_guess(c);
}
-EXPORT_SYMBOL(iam_container_setup);
/*
* Finalize container @c, release all resources.
*/
void iam_container_fini(struct iam_container *c)
{
+ brelse(c->ic_idle_bh);
+ c->ic_idle_bh = NULL;
+ brelse(c->ic_root_bh);
+ c->ic_root_bh = NULL;
}
-EXPORT_SYMBOL(iam_container_fini);
void iam_path_init(struct iam_path *path, struct iam_container *c,
struct iam_path_descr *pd)
for (i = 0; i < ARRAY_SIZE(path->ip_frames); i++) {
if (path->ip_frames[i].bh != NULL) {
+ path->ip_frames[i].at_shifted = 0;
brelse(path->ip_frames[i].bh);
path->ip_frames[i].bh = NULL;
}
ipd->ipd_key_scratch[i] = karea;
return ipd;
}
-EXPORT_SYMBOL(iam_ipd_alloc);
void iam_ipd_free(struct iam_path_descr *ipd)
{
}
-EXPORT_SYMBOL(iam_ipd_free);
int iam_node_read(struct iam_container *c, iam_ptr_t ptr,
handle_t *h, struct buffer_head **bh)
{
int result = 0;
+ /* NB: it can be called by iam_lfix_guess() which is still at
+ * very early stage, c->ic_root_bh and c->ic_descr->id_ops
+ * haven't been intialized yet.
+ * Also, we don't have this for IAM dir.
+ */
+ if (c->ic_root_bh != NULL &&
+ c->ic_descr->id_ops->id_root_ptr(c) == ptr) {
+ get_bh(c->ic_root_bh);
+ *bh = c->ic_root_bh;
+ return 0;
+ }
+
*bh = ldiskfs_bread(h, c->ic_object, (int)ptr, 0, &result);
if (*bh == NULL)
result = -EIO;
static int iam_leaf_load(struct iam_path *path)
{
- iam_ptr_t block;
- int err;
- struct iam_container *c;
- struct buffer_head *bh;
- struct iam_leaf *leaf;
- struct iam_descr *descr;
-
- c = path->ip_container;
- leaf = &path->ip_leaf;
- descr = iam_path_descr(path);
- block = path->ip_frame->leaf;
- if (block == 0) {
- /* XXX bug 11027 */
- printk(CFS_KERN_EMERG "wrong leaf: %lu %d [%p %p %p]\n",
- (long unsigned)path->ip_frame->leaf,
- dx_get_count(dx_node_get_entries(path, path->ip_frame)),
- path->ip_frames[0].bh, path->ip_frames[1].bh,
- path->ip_frames[2].bh);
- }
- err = descr->id_ops->id_node_read(c, block, NULL, &bh);
- if (err == 0) {
- leaf->il_bh = bh;
- leaf->il_curidx = block;
- err = iam_leaf_ops(leaf)->init(leaf);
- assert_inv(ergo(err == 0, iam_leaf_check(leaf)));
- }
- return err;
-}
-
-static void iam_unlock_htree(struct inode *dir, struct dynlock_handle *lh)
-{
- if (lh != NULL)
- dynlock_unlock(&LDISKFS_I(dir)->i_htree_lock, lh);
+ iam_ptr_t block;
+ int err;
+ struct iam_container *c;
+ struct buffer_head *bh;
+ struct iam_leaf *leaf;
+ struct iam_descr *descr;
+
+ c = path->ip_container;
+ leaf = &path->ip_leaf;
+ descr = iam_path_descr(path);
+ block = path->ip_frame->leaf;
+ if (block == 0) {
+ /* XXX bug 11027 */
+ printk(KERN_EMERG "wrong leaf: %lu %d [%p %p %p]\n",
+ (long unsigned)path->ip_frame->leaf,
+ dx_get_count(dx_node_get_entries(path, path->ip_frame)),
+ path->ip_frames[0].bh, path->ip_frames[1].bh,
+ path->ip_frames[2].bh);
+ }
+ err = descr->id_ops->id_node_read(c, block, NULL, &bh);
+ if (err == 0) {
+ leaf->il_bh = bh;
+ leaf->il_curidx = block;
+ err = iam_leaf_ops(leaf)->init(leaf);
+ assert_inv(ergo(err == 0, iam_leaf_check(leaf)));
+ }
+ return err;
+}
+
+static void iam_unlock_htree(struct iam_container *ic,
+ struct dynlock_handle *lh)
+{
+ if (lh != NULL)
+ dynlock_unlock(&ic->ic_tree_lock, lh);
}
static void iam_leaf_unlock(struct iam_leaf *leaf)
{
if (leaf->il_lock != NULL) {
- iam_unlock_htree(iam_leaf_container(leaf)->ic_object,
- leaf->il_lock);
- do_corr(schedule());
- leaf->il_lock = NULL;
+ iam_unlock_htree(iam_leaf_container(leaf),
+ leaf->il_lock);
+ do_corr(schedule());
+ leaf->il_lock = NULL;
}
}
return iam_leaf_ops(leaf)->at_end(leaf);
}
-void iam_leaf_split(struct iam_leaf *l, struct buffer_head **bh, iam_ptr_t nr)
+static void iam_leaf_split(struct iam_leaf *l, struct buffer_head **bh,
+ iam_ptr_t nr)
{
iam_leaf_ops(l)->split(l, bh, nr);
}
+static inline int iam_leaf_empty(struct iam_leaf *l)
+{
+ return iam_leaf_ops(l)->leaf_empty(l);
+}
+
int iam_leaf_can_add(const struct iam_leaf *l,
const struct iam_key *k, const struct iam_rec *r)
{
{
int result;
- result = ldiskfs_journal_dirty_metadata(handle, bh);
+ result = ldiskfs_handle_dirty_metadata(handle, NULL, bh);
if (result != 0)
ldiskfs_std_error(iam_path_obj(path)->i_sb, result);
return result;
void iam_container_write_lock(struct iam_container *ic)
{
- cfs_down_write(&ic->ic_sem);
+ down_write(&ic->ic_sem);
}
void iam_container_write_unlock(struct iam_container *ic)
{
- cfs_up_write(&ic->ic_sem);
+ up_write(&ic->ic_sem);
}
void iam_container_read_lock(struct iam_container *ic)
{
- cfs_down_read(&ic->ic_sem);
+ down_read(&ic->ic_sem);
}
void iam_container_read_unlock(struct iam_container *ic)
{
- cfs_up_read(&ic->ic_sem);
+ up_read(&ic->ic_sem);
}
/*
iam_path_init(&it->ii_path, c, pd);
return 0;
}
-EXPORT_SYMBOL(iam_it_init);
/*
* Finalize iterator and release all resources.
assert_corr(it_state(it) == IAM_IT_DETACHED);
iam_path_fini(&it->ii_path);
}
-EXPORT_SYMBOL(iam_it_fini);
/*
* this locking primitives are used to protect parts
* of dir's htree. protection unit is block: leaf or index
*/
-struct dynlock_handle *iam_lock_htree(struct inode *dir, unsigned long value,
- enum dynlock_type lt)
+static struct dynlock_handle *iam_lock_htree(struct iam_container *ic,
+ unsigned long value,
+ enum dynlock_type lt)
{
- return dynlock_lock(&LDISKFS_I(dir)->i_htree_lock, value, lt, GFP_NOFS);
+ return dynlock_lock(&ic->ic_tree_lock, value, lt, GFP_NOFS);
}
-
-
-int iam_index_lock(struct iam_path *path, struct dynlock_handle **lh)
+static int iam_index_lock(struct iam_path *path, struct dynlock_handle **lh)
{
struct iam_frame *f;
- for (f = path->ip_frame; f >= path->ip_frames; --f, ++lh) {
- do_corr(schedule());
- *lh = iam_lock_htree(iam_path_obj(path), f->curidx, DLT_READ);
- if (*lh == NULL)
- return -ENOMEM;
- }
- return 0;
+ for (f = path->ip_frame; f >= path->ip_frames; --f, ++lh) {
+ do_corr(schedule());
+ *lh = iam_lock_htree(path->ip_container, f->curidx, DLT_READ);
+ if (*lh == NULL)
+ return -ENOMEM;
+ }
+ return 0;
}
/*
*
*/
-struct iam_entry *iam_find_position(struct iam_path *path,
- struct iam_frame *frame)
+static struct iam_entry *iam_find_position(struct iam_path *path,
+ struct iam_frame *frame)
{
int count;
struct iam_entry *p;
* Performs path lookup and returns with found leaf (if any) locked by htree
* lock.
*/
-int iam_lookup_lock(struct iam_path *path,
- struct dynlock_handle **dl, enum dynlock_type lt)
+static int iam_lookup_lock(struct iam_path *path,
+ struct dynlock_handle **dl, enum dynlock_type lt)
{
int result;
- struct inode *dir;
- dir = iam_path_obj(path);
while ((result = __iam_path_lookup(path)) == 0) {
do_corr(schedule());
- *dl = iam_lock_htree(dir, path->ip_frame->leaf, lt);
- if (*dl == NULL) {
- iam_path_fini(path);
- result = -ENOMEM;
- break;
- }
+ *dl = iam_lock_htree(path->ip_container, path->ip_frame->leaf,
+ lt);
+ if (*dl == NULL) {
+ iam_path_fini(path);
+ result = -ENOMEM;
+ break;
+ }
do_corr(schedule());
/*
* while locking leaf we just found may get split so we need
*/
if (iam_check_full_path(path, 1) == 0)
break;
- iam_unlock_htree(dir, *dl);
+ iam_unlock_htree(path->ip_container, *dl);
*dl = NULL;
iam_path_fini(path);
}
static int iam_path_lookup(struct iam_path *path, int index)
{
struct iam_container *c;
- struct iam_descr *descr;
struct iam_leaf *leaf;
int result;
c = path->ip_container;
leaf = &path->ip_leaf;
- descr = iam_path_descr(path);
result = iam_lookup_lock(path, &leaf->il_lock, DLT_WRITE);
assert_inv(iam_path_check(path));
do_corr(schedule());
it_keycmp(it, k) <= 0));
return result;
}
-EXPORT_SYMBOL(iam_it_get);
/*
* Attach iterator by index key.
assert_corr(ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED));
return result;
}
-EXPORT_SYMBOL(iam_it_get_at);
/*
* Duplicates iterator.
iam_leaf_fini(&it->ii_path.ip_leaf);
}
}
-EXPORT_SYMBOL(iam_it_put);
static struct iam_ikey *iam_it_ikey_get(const struct iam_iterator *it,
struct iam_ikey *ikey);
while (1) {
do_corr(schedule());
iam_lock_bh(p->bh);
- p->at = iam_entry_shift(path, p->at, +1);
+ if (p->at_shifted)
+ p->at_shifted = 0;
+ else
+ p->at = iam_entry_shift(path, p->at, +1);
if (p->at < iam_entry_shift(path, p->entries,
dx_get_count(p->entries))) {
p->leaf = dx_get_block(path, p->at);
return iam_htree_advance(iam_path_obj(path), 0, path, NULL, 0);
}
-static void iam_unlock_array(struct inode *dir, struct dynlock_handle **lh)
+static void iam_unlock_array(struct iam_container *ic,
+ struct dynlock_handle **lh)
{
int i;
for (i = 0; i < DX_MAX_TREE_HEIGHT; ++i, ++lh) {
- if (*lh != NULL) {
- iam_unlock_htree(dir, *lh);
- *lh = NULL;
- }
+ if (*lh != NULL) {
+ iam_unlock_htree(ic, *lh);
+ *lh = NULL;
+ }
}
}
/*
int iam_index_next(struct iam_container *c, struct iam_path *path)
{
iam_ptr_t cursor;
- struct dynlock_handle *lh[DX_MAX_TREE_HEIGHT] = { 0, };
+ struct dynlock_handle *lh[DX_MAX_TREE_HEIGHT] = { NULL, };
int result;
struct inode *object;
break;
}
do {
- iam_unlock_array(object, lh);
+ iam_unlock_array(c, lh);
iam_path_release(path);
do_corr(schedule());
result = iam_check_full_path(path, 0);
if (result != 0)
break;
- iam_unlock_array(object, lh);
+ iam_unlock_array(c, lh);
}
} while (result == -EAGAIN);
if (result < 0)
break;
}
- iam_unlock_array(object, lh);
+ iam_unlock_array(c, lh);
return result;
}
int result;
struct iam_path *path;
struct iam_leaf *leaf;
- struct inode *obj;
do_corr(struct iam_ikey *ik_orig);
/* assert_corr(it->ii_flags&IAM_IT_MOVE); */
path = &it->ii_path;
leaf = &path->ip_leaf;
- obj = iam_path_obj(path);
assert_corr(iam_leaf_is_locked(leaf));
result = iam_index_next(iam_it_container(it), path);
assert_corr(iam_leaf_is_locked(leaf));
if (result == 1) {
- struct dynlock_handle *lh;
- lh = iam_lock_htree(obj, path->ip_frame->leaf,
- DLT_WRITE);
+ struct dynlock_handle *lh;
+ lh = iam_lock_htree(iam_it_container(it),
+ path->ip_frame->leaf,
+ DLT_WRITE);
if (lh != NULL) {
iam_leaf_fini(leaf);
leaf->il_lock = lh;
it_ikeycmp(it, ik_orig) >= 0));
return result;
}
-EXPORT_SYMBOL(iam_it_next);
/*
* Return pointer to the record under iterator.
assert_corr(it_at_rec(it));
return iam_leaf_rec(&it->ii_path.ip_leaf);
}
-EXPORT_SYMBOL(iam_it_rec_get);
static void iam_it_reccpy(struct iam_iterator *it, const struct iam_rec *r)
{
}
return result;
}
-EXPORT_SYMBOL(iam_it_rec_set);
/*
* Return pointer to the index key under iterator.
assert_corr(it_at_rec(it));
return iam_leaf_key(&it->ii_path.ip_leaf);
}
-EXPORT_SYMBOL(iam_it_key_get);
/*
* Return size of key under iterator (in bytes)
assert_corr(it_at_rec(it));
return iam_leaf_key_size(&it->ii_path.ip_leaf);
}
-EXPORT_SYMBOL(iam_it_key_size);
+
+static struct buffer_head *
+iam_new_node(handle_t *h, struct iam_container *c, iam_ptr_t *b, int *e)
+{
+ struct inode *inode = c->ic_object;
+ struct buffer_head *bh = NULL;
+ struct iam_idle_head *head;
+ struct buffer_head *idle;
+ __u32 *idle_blocks;
+ __u16 count;
+
+ if (c->ic_idle_bh == NULL)
+ goto newblock;
+
+ mutex_lock(&c->ic_idle_mutex);
+ if (unlikely(c->ic_idle_bh == NULL)) {
+ mutex_unlock(&c->ic_idle_mutex);
+ goto newblock;
+ }
+
+ head = (struct iam_idle_head *)(c->ic_idle_bh->b_data);
+ count = le16_to_cpu(head->iih_count);
+ if (count > 0) {
+ *e = ldiskfs_journal_get_write_access(h, c->ic_idle_bh);
+ if (*e != 0)
+ goto fail;
+
+ --count;
+ *b = le32_to_cpu(head->iih_blks[count]);
+ head->iih_count = cpu_to_le16(count);
+ *e = ldiskfs_handle_dirty_metadata(h, inode, c->ic_idle_bh);
+ if (*e != 0)
+ goto fail;
+
+ mutex_unlock(&c->ic_idle_mutex);
+ bh = ldiskfs_bread(NULL, inode, *b, 0, e);
+ if (bh == NULL)
+ return NULL;
+ goto got;
+ }
+
+ /* The block itself which contains the iam_idle_head is
+ * also an idle block, and can be used as the new node. */
+ idle_blocks = (__u32 *)(c->ic_root_bh->b_data +
+ c->ic_descr->id_root_gap +
+ sizeof(struct dx_countlimit));
+ *e = ldiskfs_journal_get_write_access(h, c->ic_root_bh);
+ if (*e != 0)
+ goto fail;
+
+ *b = le32_to_cpu(*idle_blocks);
+ iam_lock_bh(c->ic_root_bh);
+ *idle_blocks = head->iih_next;
+ iam_unlock_bh(c->ic_root_bh);
+ *e = ldiskfs_handle_dirty_metadata(h, inode, c->ic_root_bh);
+ if (*e != 0) {
+ iam_lock_bh(c->ic_root_bh);
+ *idle_blocks = cpu_to_le32(*b);
+ iam_unlock_bh(c->ic_root_bh);
+ goto fail;
+ }
+
+ bh = c->ic_idle_bh;
+ idle = iam_load_idle_blocks(c, le32_to_cpu(*idle_blocks));
+ if (idle != NULL && IS_ERR(idle)) {
+ *e = PTR_ERR(idle);
+ c->ic_idle_bh = NULL;
+ brelse(bh);
+ goto fail;
+ }
+
+ c->ic_idle_bh = idle;
+ mutex_unlock(&c->ic_idle_mutex);
+
+got:
+ /* get write access for the found buffer head */
+ *e = ldiskfs_journal_get_write_access(h, bh);
+ if (*e != 0) {
+ brelse(bh);
+ bh = NULL;
+ ldiskfs_std_error(inode->i_sb, *e);
+ } else {
+ /* Clear the reused node as new node does. */
+ memset(bh->b_data, 0, inode->i_sb->s_blocksize);
+ set_buffer_uptodate(bh);
+ }
+ return bh;
+
+newblock:
+ bh = osd_ldiskfs_append(h, inode, b, e);
+ return bh;
+
+fail:
+ mutex_unlock(&c->ic_idle_mutex);
+ ldiskfs_std_error(inode->i_sb, *e);
+ return NULL;
+}
/*
* Insertion of new record. Interaction with jbd during non-trivial case (when
* split happens) is as following:
*
- * - new leaf node is involved into transaction by ldiskfs_append();
+ * - new leaf node is involved into transaction by iam_new_node();
*
* - old leaf node is involved into transaction by iam_add_rec();
*
path = leaf->il_path;
obj = c->ic_object;
- new_leaf = ldiskfs_append(handle, obj, (__u32 *)&blknr, &err);
+ new_leaf = iam_new_node(handle, c, &blknr, &err);
do_corr(schedule());
if (new_leaf != NULL) {
struct dynlock_handle *lh;
- lh = iam_lock_htree(obj, blknr, DLT_WRITE);
+ lh = iam_lock_htree(c, blknr, DLT_WRITE);
do_corr(schedule());
if (lh != NULL) {
iam_leaf_ops(leaf)->init_new(c, new_leaf);
leaf->il_lock = lh;
path->ip_frame->leaf = blknr;
} else
- iam_unlock_htree(obj, lh);
+ iam_unlock_htree(path->ip_container, lh);
do_corr(schedule());
err = iam_txn_dirty(handle, path, new_leaf);
brelse(new_leaf);
struct iam_entry *entries; /* old block contents */
struct iam_entry *entries2; /* new block contents */
- struct iam_frame *frame, *safe;
- struct buffer_head *bh_new[DX_MAX_TREE_HEIGHT] = {0};
+ struct iam_frame *frame, *safe;
+ struct buffer_head *bh_new[DX_MAX_TREE_HEIGHT] = {NULL};
u32 newblock[DX_MAX_TREE_HEIGHT] = {0};
struct dynlock_handle *lock[DX_MAX_TREE_HEIGHT] = {NULL,};
struct dynlock_handle *new_lock[DX_MAX_TREE_HEIGHT] = {NULL,};
* Lock all nodes, bottom to top.
*/
for (frame = path->ip_frame, i = nr_splet; i >= 0; --i, --frame) {
- do_corr(schedule());
- lock[i] = iam_lock_htree(dir, frame->curidx, DLT_WRITE);
- if (lock[i] == NULL) {
- err = -ENOMEM;
- goto cleanup;
- }
+ do_corr(schedule());
+ lock[i] = iam_lock_htree(path->ip_container, frame->curidx,
+ DLT_WRITE);
+ if (lock[i] == NULL) {
+ err = -ENOMEM;
+ goto cleanup;
+ }
}
/*
/* Go back down, allocating blocks, locking them, and adding into
* transaction... */
for (frame = safe + 1, i = 0; i < nr_splet; ++i, ++frame) {
- bh_new[i] = ldiskfs_append (handle, dir, &newblock[i], &err);
+ bh_new[i] = iam_new_node(handle, path->ip_container,
+ &newblock[i], &err);
do_corr(schedule());
if (!bh_new[i] ||
descr->id_ops->id_node_init(path->ip_container,
bh_new[i], 0) != 0)
goto cleanup;
- new_lock[i] = iam_lock_htree(dir, newblock[i], DLT_WRITE);
- if (new_lock[i] == NULL) {
- err = -ENOMEM;
- goto cleanup;
- }
+ new_lock[i] = iam_lock_htree(path->ip_container, newblock[i],
+ DLT_WRITE);
+ if (new_lock[i] == NULL) {
+ err = -ENOMEM;
+ goto cleanup;
+ }
do_corr(schedule());
BUFFER_TRACE(frame->bh, "get_write_access");
err = ldiskfs_journal_get_write_access(handle, frame->bh);
++ frame;
assert_inv(dx_node_check(path, frame));
bh_new[0] = NULL; /* buffer head is "consumed" */
- err = ldiskfs_journal_get_write_access(handle, bh2);
+ err = ldiskfs_handle_dirty_metadata(handle, NULL, bh2);
if (err)
goto journal_error;
do_corr(schedule());
dxtrace(dx_show_index ("node", frame->entries));
dxtrace(dx_show_index ("node",
((struct dx_node *) bh2->b_data)->entries));
- err = ldiskfs_journal_dirty_metadata(handle, bh2);
+ err = ldiskfs_handle_dirty_metadata(handle, NULL, bh2);
if (err)
goto journal_error;
do_corr(schedule());
- err = ldiskfs_journal_dirty_metadata(handle, parent->bh);
+ err = ldiskfs_handle_dirty_metadata(handle, NULL,
+ parent->bh);
if (err)
goto journal_error;
}
do_corr(schedule());
- err = ldiskfs_journal_dirty_metadata(handle, bh);
+ err = ldiskfs_handle_dirty_metadata(handle, NULL, bh);
if (err)
goto journal_error;
}
ldiskfs_std_error(dir->i_sb, err);
cleanup:
- iam_unlock_array(dir, lock);
- iam_unlock_array(dir, new_lock);
+ iam_unlock_array(path->ip_container, lock);
+ iam_unlock_array(path->ip_container, new_lock);
assert_corr(err || iam_frame_is_locked(path, path->ip_frame));
err = iam_txn_dirty(handle, path,
path->ip_frame->bh);
}
- iam_unlock_htree(iam_path_obj(path), lh);
+ iam_unlock_htree(path->ip_container, lh);
do_corr(schedule());
}
if (err == 0) {
it_keycmp(it, k) == 0));
return result;
}
-EXPORT_SYMBOL(iam_it_rec_insert);
+
+static inline int iam_idle_blocks_limit(struct inode *inode)
+{
+ return (inode->i_sb->s_blocksize - sizeof(struct iam_idle_head)) >> 2;
+}
+
+/*
+ * If the leaf cannnot be recycled, we will lose one block for reusing.
+ * It is not a serious issue because it almost the same of non-recycle.
+ */
+static iam_ptr_t iam_index_shrink(handle_t *h, struct iam_path *p,
+ struct iam_leaf *l, struct buffer_head **bh)
+{
+ struct iam_container *c = p->ip_container;
+ struct inode *inode = c->ic_object;
+ struct iam_frame *frame = p->ip_frame;
+ struct iam_entry *entries;
+ struct iam_entry *pos;
+ struct dynlock_handle *lh;
+ int count;
+ int rc;
+
+ if (c->ic_idle_failed)
+ return 0;
+
+ if (unlikely(frame == NULL))
+ return 0;
+
+ if (!iam_leaf_empty(l))
+ return 0;
+
+ lh = iam_lock_htree(c, frame->curidx, DLT_WRITE);
+ if (lh == NULL) {
+ CWARN("%.16s: No memory to recycle idle blocks\n",
+ LDISKFS_SB(inode->i_sb)->s_es->s_volume_name);
+ return 0;
+ }
+
+ rc = iam_txn_add(h, p, frame->bh);
+ if (rc != 0) {
+ iam_unlock_htree(c, lh);
+ return 0;
+ }
+
+ iam_lock_bh(frame->bh);
+ entries = frame->entries;
+ count = dx_get_count(entries);
+ /* NOT shrink the last entry in the index node, which can be reused
+ * directly by next new node. */
+ if (count == 2) {
+ iam_unlock_bh(frame->bh);
+ iam_unlock_htree(c, lh);
+ return 0;
+ }
+
+ pos = iam_find_position(p, frame);
+ /* There may be some new leaf nodes have been added or empty leaf nodes
+ * have been shrinked during my delete operation.
+ *
+ * If the empty leaf is not under current index node because the index
+ * node has been split, then just skip the empty leaf, which is rare. */
+ if (unlikely(frame->leaf != dx_get_block(p, pos))) {
+ iam_unlock_bh(frame->bh);
+ iam_unlock_htree(c, lh);
+ return 0;
+ }
+
+ frame->at = pos;
+ if (frame->at < iam_entry_shift(p, entries, count - 1)) {
+ struct iam_entry *n = iam_entry_shift(p, frame->at, 1);
+
+ memmove(frame->at, n,
+ (char *)iam_entry_shift(p, entries, count) - (char *)n);
+ frame->at_shifted = 1;
+ }
+ dx_set_count(entries, count - 1);
+ iam_unlock_bh(frame->bh);
+ rc = iam_txn_dirty(h, p, frame->bh);
+ iam_unlock_htree(c, lh);
+ if (rc != 0)
+ return 0;
+
+ get_bh(l->il_bh);
+ *bh = l->il_bh;
+ return frame->leaf;
+}
+
+static int
+iam_install_idle_blocks(handle_t *h, struct iam_path *p, struct buffer_head *bh,
+ __u32 *idle_blocks, iam_ptr_t blk)
+{
+ struct iam_container *c = p->ip_container;
+ struct buffer_head *old = c->ic_idle_bh;
+ struct iam_idle_head *head;
+ int rc;
+
+ head = (struct iam_idle_head *)(bh->b_data);
+ head->iih_magic = cpu_to_le16(IAM_IDLE_HEADER_MAGIC);
+ head->iih_count = 0;
+ head->iih_next = *idle_blocks;
+ /* The bh already get_write_accessed. */
+ rc = iam_txn_dirty(h, p, bh);
+ if (rc != 0)
+ return rc;
+
+ rc = iam_txn_add(h, p, c->ic_root_bh);
+ if (rc != 0)
+ return rc;
+
+ iam_lock_bh(c->ic_root_bh);
+ *idle_blocks = cpu_to_le32(blk);
+ iam_unlock_bh(c->ic_root_bh);
+ rc = iam_txn_dirty(h, p, c->ic_root_bh);
+ if (rc == 0) {
+ /* NOT release old before new assigned. */
+ get_bh(bh);
+ c->ic_idle_bh = bh;
+ brelse(old);
+ } else {
+ iam_lock_bh(c->ic_root_bh);
+ *idle_blocks = head->iih_next;
+ iam_unlock_bh(c->ic_root_bh);
+ }
+ return rc;
+}
+
+/*
+ * If the leaf cannnot be recycled, we will lose one block for reusing.
+ * It is not a serious issue because it almost the same of non-recycle.
+ */
+static void iam_recycle_leaf(handle_t *h, struct iam_path *p,
+ struct buffer_head *bh, iam_ptr_t blk)
+{
+ struct iam_container *c = p->ip_container;
+ struct inode *inode = c->ic_object;
+ struct iam_idle_head *head;
+ __u32 *idle_blocks;
+ int count;
+ int rc;
+
+ mutex_lock(&c->ic_idle_mutex);
+ if (unlikely(c->ic_idle_failed)) {
+ rc = -EFAULT;
+ goto unlock;
+ }
+
+ idle_blocks = (__u32 *)(c->ic_root_bh->b_data +
+ c->ic_descr->id_root_gap +
+ sizeof(struct dx_countlimit));
+ /* It is the first idle block. */
+ if (c->ic_idle_bh == NULL) {
+ rc = iam_install_idle_blocks(h, p, bh, idle_blocks, blk);
+ goto unlock;
+ }
+
+ head = (struct iam_idle_head *)(c->ic_idle_bh->b_data);
+ count = le16_to_cpu(head->iih_count);
+ /* Current ic_idle_bh is full, to be replaced by the leaf. */
+ if (count == iam_idle_blocks_limit(inode)) {
+ rc = iam_install_idle_blocks(h, p, bh, idle_blocks, blk);
+ goto unlock;
+ }
+
+ /* Just add to ic_idle_bh. */
+ rc = iam_txn_add(h, p, c->ic_idle_bh);
+ if (rc != 0)
+ goto unlock;
+
+ head->iih_blks[count] = cpu_to_le32(blk);
+ head->iih_count = cpu_to_le16(count + 1);
+ rc = iam_txn_dirty(h, p, c->ic_idle_bh);
+
+unlock:
+ mutex_unlock(&c->ic_idle_mutex);
+ if (rc != 0)
+ CWARN("%.16s: idle blocks failed, will lose the blk %u\n",
+ LDISKFS_SB(inode->i_sb)->s_es->s_volume_name, blk);
+}
/*
* Delete record under iterator.
if (result == 0) {
iam_rec_del(leaf, it->ii_flags&IAM_IT_MOVE);
result = iam_txn_dirty(h, path, leaf->il_bh);
- if (result == 0 && iam_leaf_at_end(leaf) &&
- it->ii_flags&IAM_IT_MOVE) {
- result = iam_it_next(it);
- if (result > 0)
- result = 0;
- }
+ if (result == 0 && iam_leaf_at_end(leaf)) {
+ struct buffer_head *bh = NULL;
+ iam_ptr_t blk;
+
+ blk = iam_index_shrink(h, path, leaf, &bh);
+ if (it->ii_flags & IAM_IT_MOVE) {
+ result = iam_it_next(it);
+ if (result > 0)
+ result = 0;
+ }
+
+ if (bh != NULL) {
+ iam_recycle_leaf(h, path, bh, blk);
+ brelse(bh);
+ }
+ }
}
assert_inv(iam_leaf_check(leaf));
assert_inv(iam_path_check(path));
it_state(it) == IAM_IT_DETACHED);
return result;
}
-EXPORT_SYMBOL(iam_it_rec_delete);
/*
* Convert iterator to cookie.
result = 0;
return *(iam_pos_t *)iam_it_ikey_get(it, (void *)&result);
}
-EXPORT_SYMBOL(iam_it_store);
/*
* Restore iterator from cookie.
assert_corr(iam_it_container(it)->ic_descr->id_ikey_size <= sizeof pos);
return iam_it_iget(it, (struct iam_ikey *)&pos);
}
-EXPORT_SYMBOL(iam_it_load);
/***********************************************************************/
/* invariants */
return (base <= ptr) && (ptr < base + size);
}
-int iam_frame_invariant(struct iam_frame *f)
+static int iam_frame_invariant(struct iam_frame *f)
{
return
(f->bh != NULL &&
ptr_inside(f->bh->b_data, f->bh->b_size, f->at) &&
f->entries <= f->at);
}
-int iam_leaf_invariant(struct iam_leaf *l)
+
+static int iam_leaf_invariant(struct iam_leaf *l)
{
return
l->il_bh != NULL &&
l->il_entries <= l->il_at;
}
-int iam_path_invariant(struct iam_path *p)
+static int iam_path_invariant(struct iam_path *p)
{
int i;
iam_it_fini(&it);
return result;
}
-EXPORT_SYMBOL(iam_lookup);
/*
* Insert new record @r with key @k into container @c (within context of
iam_it_fini(&it);
return result;
}
-EXPORT_SYMBOL(iam_insert);
/*
* Update record with the key @k in container @c (within context of
* transaction @h), new record is given by @r.
*
- * Return values: 0: success, -ve: error, including -ENOENT if no record with
- * the given key found.
+ * Return values: +1: skip because of the same rec value, 0: success,
+ * -ve: error, including -ENOENT if no record with the given key found.
*/
int iam_update(handle_t *h, struct iam_container *c, const struct iam_key *k,
const struct iam_rec *r, struct iam_path_descr *pd)
{
struct iam_iterator it;
- int result;
-
- iam_it_init(&it, c, IAM_IT_WRITE, pd);
-
- result = iam_it_get_exact(&it, k);
- if (result == 0)
- iam_it_rec_set(h, &it, r);
+ struct iam_leaf *folio;
+ int result;
+
+ iam_it_init(&it, c, IAM_IT_WRITE, pd);
+
+ result = iam_it_get_exact(&it, k);
+ if (result == 0) {
+ folio = &it.ii_path.ip_leaf;
+ result = iam_leaf_ops(folio)->rec_eq(folio, r);
+ if (result == 0)
+ iam_it_rec_set(h, &it, r);
+ else
+ result = 1;
+ }
iam_it_put(&it);
iam_it_fini(&it);
return result;
}
-EXPORT_SYMBOL(iam_update);
/*
* Delete existing record with key @k.
iam_it_fini(&it);
return result;
}
-EXPORT_SYMBOL(iam_delete);
int iam_root_limit(int rootgap, int blocksize, int size)
{