Bugzilla : 21961/17914
Description: ignore trailing -mdc when determining index number
-Severity : normal
-Bugzilla : 20433
-Description: decrease the usage of memory on clients.
-Details : 1. On clients, recycle dentries and inodes unused.
- 2. Delete the code related to ll_deathrow(att 6215 in bug 1443). It
- is useless now.
-------------------------------------------------------------------------------
2010-01-29 Sun Microsystems, Inc.
])
#
+# LC_EXPORT___IGET
+# starting from 2.6.19 linux kernel exports __iget()
+#
+AC_DEFUN([LC_EXPORT___IGET],
+[LB_CHECK_SYMBOL_EXPORT([__iget],
+[fs/inode.c],[
+ AC_DEFINE(HAVE_EXPORT___IGET, 1, [kernel exports __iget])
+],[
+])
+])
+
+#
# only for Lustre-patched kernels
#
AC_DEFUN([LC_LUSTRE_VERSION_H],
ldlm_res_iterator_t iter, void *closure);
int ldlm_replay_locks(struct obd_import *imp);
-int ldlm_resource_iterate(struct ldlm_namespace *, struct ldlm_res_id *,
- ldlm_iterator_t iter, void *data);
+void ldlm_resource_iterate(struct ldlm_namespace *, struct ldlm_res_id *,
+ ldlm_iterator_t iter, void *data);
/* ldlm_flock.c */
int ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data);
void mdc_set_lock_data(__u64 *lockh, void *data, __u32 *lockbits);
int mdc_change_cbdata(struct obd_export *exp, struct ll_fid *fid,
ldlm_iterator_t it, void *data);
-int mdc_find_cbdata(struct obd_export *exp, struct ll_fid *fid,
- ldlm_iterator_t it, void *data);
int mdc_revalidate_lock(struct obd_export *exp,
struct lookup_intent *it,
struct ll_fid *fid);
struct lustre_handle *lockh, int *n_matches);
int (*o_change_cbdata)(struct obd_export *, struct lov_stripe_md *,
ldlm_iterator_t it, void *data);
- int (*o_find_cbdata)(struct obd_export *, struct lov_stripe_md *,
- ldlm_iterator_t it, void *data);
int (*o_cancel)(struct obd_export *, struct lov_stripe_md *md,
__u32 mode, struct lustre_handle *, int flags,
obd_off end);
RETURN(rc);
}
-static inline int obd_find_cbdata(struct obd_export *exp,
- struct lov_stripe_md *lsm,
- ldlm_iterator_t it, void *data)
-{
- int rc;
- ENTRY;
-
- EXP_CHECK_OP(exp, find_cbdata);
- EXP_COUNTER_INCREMENT(exp, find_cbdata);
-
- rc = OBP(exp->exp_obd, find_cbdata)(exp, lsm, it, data);
- RETURN(rc);
-}
-
static inline int obd_cancel(struct obd_export *exp, struct lov_stripe_md *ea,
__u32 mode, struct lustre_handle *lockh, int flags,
obd_off end)
RETURN(rc);
}
-/* non-blocking function to manipulate a lock whose cb_data is being put away.
- * return 0: find no resource
- * > 0: must be LDLM_ITER_STOP/LDLM_ITER_CONTINUE.
- * < 0: errors
- */
-int ldlm_resource_iterate(struct ldlm_namespace *ns, struct ldlm_res_id *res_id,
+/* non-blocking function to manipulate a lock whose cb_data is being put away.*/
+void ldlm_resource_iterate(struct ldlm_namespace *ns, struct ldlm_res_id *res_id,
ldlm_iterator_t iter, void *data)
{
struct ldlm_resource *res;
- int rc;
ENTRY;
if (ns == NULL) {
}
res = ldlm_resource_get(ns, NULL, *res_id, 0, 0);
- if (res == NULL)
- RETURN(0);
+ if (res == NULL) {
+ EXIT;
+ return;
+ }
- rc = ldlm_resource_foreach(res, iter, data);
+ ldlm_resource_foreach(res, iter, data);
ldlm_resource_putref(res);
- RETURN(rc);
+ EXIT;
}
/* Lock replay */
RETURN(0);
}
-static inline int return_if_equal(struct ldlm_lock *lock, void *data)
-{
- return LDLM_ITER_STOP;
-}
-
-/* find any ldlm lock of the inode in mdc and lov
- * return 0 not find
- * 1 find one
- * < 0 error */
-int find_cbdata(struct inode *inode)
-{
- struct ll_fid fid;
- struct ll_inode_info *lli = ll_i2info(inode);
- struct ll_sb_info *sbi = ll_i2sbi(inode);
- int rc = 0;
- ENTRY;
-
- LASSERT(inode);
- ll_inode2fid(&fid, inode);
- rc = mdc_find_cbdata(sbi->ll_mdc_exp, &fid, return_if_equal, NULL);
- if (rc != 0)
- RETURN(rc);
-
- if (lli->lli_smd)
- rc = obd_find_cbdata(sbi->ll_osc_exp, lli->lli_smd,
- return_if_equal, NULL);
-
- RETURN(rc);
-}
-
/* should NOT be called with the dcache lock, see fs/dcache.c */
static int ll_ddelete(struct dentry *de)
{
d_unhashed(de) ? "" : "hashed,",
list_empty(&de->d_subdirs) ? "" : "subdirs");
- /* if not ldlm lock for this inode, set i_nlink to 0 so that
- * this inode can be recycled later b=20433 */
- LASSERT(atomic_read(&de->d_count) == 0);
- if (de->d_inode && !find_cbdata(de->d_inode))
- de->d_inode->i_nlink = 0;
-
- if (de->d_flags & DCACHE_LUSTRE_INVALID)
- RETURN(1);
-
RETURN(0);
}
}
#endif
-void ll_d_iput(struct dentry *de, struct inode *inode)
-{
- LASSERT(inode);
- if (inode && !find_cbdata(inode))
- inode->i_nlink = 0;
- iput(inode);
-}
-
struct dentry_operations ll_d_ops = {
.d_revalidate = ll_revalidate_nd,
.d_release = ll_release,
.d_delete = ll_ddelete,
- .d_iput = ll_d_iput,
#ifdef DCACHE_LUSTRE_INVALID
.d_compare = ll_dcompare,
#endif
unsigned int ll_namelen;
struct file_operations *ll_fop;
+#ifdef HAVE_EXPORT___IGET
+ struct list_head ll_deathrow; /* inodes to be destroyed (b1443) */
+ spinlock_t ll_deathrow_lock;
+#endif
/* =0 - hold lock over whole read/write
* >0 - max. chunk to be read/written w/o lock re-acquiring */
unsigned long ll_max_rw_chunk;
sbi->ll_flags |= LL_SBI_LRU_RESIZE;
#endif
+#ifdef HAVE_EXPORT___IGET
+ INIT_LIST_HEAD(&sbi->ll_deathrow);
+ spin_lock_init(&sbi->ll_deathrow_lock);
+#endif
for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) {
spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].pp_r_hist.oh_lock);
spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].pp_w_hist.oh_lock);
}
}
+#ifdef HAVE_EXPORT___IGET
+static void prune_dir_dentries(struct inode *inode)
+{
+ struct dentry *dentry, *prev = NULL;
+
+ /* due to lustre specific logic, a directory
+ * can have few dentries - a bug from VFS POV */
+restart:
+ spin_lock(&dcache_lock);
+ if (!list_empty(&inode->i_dentry)) {
+ dentry = list_entry(inode->i_dentry.prev,
+ struct dentry, d_alias);
+ /* in order to prevent infinite loops we
+ * break if previous dentry is busy */
+ if (dentry != prev) {
+ prev = dentry;
+ dget_locked(dentry);
+ spin_unlock(&dcache_lock);
+
+ /* try to kill all child dentries */
+ shrink_dcache_parent(dentry);
+ dput(dentry);
+
+ /* now try to get rid of current dentry */
+ d_prune_aliases(inode);
+ goto restart;
+ }
+ }
+ spin_unlock(&dcache_lock);
+}
+
+static void prune_deathrow_one(struct ll_inode_info *lli)
+{
+ struct inode *inode = ll_info2i(lli);
+
+ /* first, try to drop any dentries - they hold a ref on the inode */
+ if (S_ISDIR(inode->i_mode))
+ prune_dir_dentries(inode);
+ else
+ d_prune_aliases(inode);
+
+
+ /* if somebody still uses it, leave it */
+ LASSERT(atomic_read(&inode->i_count) > 0);
+ if (atomic_read(&inode->i_count) > 1)
+ goto out;
+
+ CDEBUG(D_INODE, "inode %lu/%u(%d) looks a good candidate for prune\n",
+ inode->i_ino,inode->i_generation, atomic_read(&inode->i_count));
+
+ /* seems nobody uses it anymore */
+ inode->i_nlink = 0;
+
+out:
+ iput(inode);
+ return;
+}
+
+static void prune_deathrow(struct ll_sb_info *sbi, int try)
+{
+ struct ll_inode_info *lli;
+ int empty;
+
+ do {
+ if (need_resched() && try)
+ break;
+
+ if (try) {
+ if (!spin_trylock(&sbi->ll_deathrow_lock))
+ break;
+ } else {
+ spin_lock(&sbi->ll_deathrow_lock);
+ }
+
+ empty = 1;
+ lli = NULL;
+ if (!list_empty(&sbi->ll_deathrow)) {
+ lli = list_entry(sbi->ll_deathrow.next,
+ struct ll_inode_info,
+ lli_dead_list);
+ list_del_init(&lli->lli_dead_list);
+ if (!list_empty(&sbi->ll_deathrow))
+ empty = 0;
+ }
+ spin_unlock(&sbi->ll_deathrow_lock);
+
+ if (lli)
+ prune_deathrow_one(lli);
+
+ } while (empty == 0);
+}
+#else /* !HAVE_EXPORT___IGET */
+#define prune_deathrow(sbi, try) do {} while (0)
+#endif /* HAVE_EXPORT___IGET */
+
void client_common_put_super(struct super_block *sb)
{
struct ll_sb_info *sbi = ll_s2sbi(sb);
lprocfs_unregister_mountpoint(sbi);
+ /* destroy inodes in deathrow */
+ prune_deathrow(sbi, 0);
+
list_del(&sbi->ll_conn_chain);
/* callbacks is cleared after disconnect each target */
lli->lli_inode_magic = LLI_INODE_DEAD;
+#ifdef HAVE_EXPORT___IGET
+ spin_lock(&sbi->ll_deathrow_lock);
+ list_del_init(&lli->lli_dead_list);
+ spin_unlock(&sbi->ll_deathrow_lock);
+#endif
+
EXIT;
}
static int ll_setattr_do_truncate(struct inode *inode, loff_t new_size)
LASSERT(*inode || sb);
sbi = sb ? ll_s2sbi(sb) : ll_i2sbi(*inode);
+ prune_deathrow(sbi, 1);
rc = mdc_req2lustre_md(req, offset, exp, &md);
if (rc)
return rc;
}
-/* ll_unlink_generic() doesn't update the inode with the new link count.
- * Instead, ll_ddelete() and ll_d_iput() will update it based upon if there
- * is any lock existing. They will recycle dentries and inodes based upon locks
- * too. b=20433 */
static int ll_unlink_generic(struct inode * dir, struct qstr *name)
{
struct ptlrpc_request *request = NULL;
RETURN(rc);
}
-/* find any ldlm lock of the inode in lov
- * return 0 not find
- * 1 find one
- * < 0 error */
-static int lov_find_cbdata(struct obd_export *exp,
- struct lov_stripe_md *lsm, ldlm_iterator_t it,
- void *data)
-{
- struct lov_obd *lov;
- struct lov_oinfo *loi;
- int rc = 0, i;
- ENTRY;
-
- ASSERT_LSM_MAGIC(lsm);
-
- if (!exp || !exp->exp_obd)
- RETURN(-ENODEV);
-
- lov = &exp->exp_obd->u.lov;
- for (i = 0; i < lsm->lsm_stripe_count; i++) {
- struct lov_stripe_md submd;
-
- loi = lsm->lsm_oinfo[i];
- if (!lov->lov_tgts[loi->loi_ost_idx]) {
- CDEBUG(D_HA, "lov idx %d NULL \n", loi->loi_ost_idx);
- continue;
- }
- submd.lsm_object_id = loi->loi_id;
- submd.lsm_object_gr = loi->loi_gr;
- submd.lsm_stripe_count = 0;
- rc = obd_find_cbdata(lov->lov_tgts[loi->loi_ost_idx]->ltd_exp,
- &submd, it, data);
- if (rc != 0)
- RETURN(rc);
- }
- RETURN(rc);
-}
-
-
static int lov_cancel(struct obd_export *exp, struct lov_stripe_md *lsm,
__u32 mode, struct lustre_handle *lockh, int flags,
obd_off end)
.o_enqueue = lov_enqueue,
.o_match = lov_match,
.o_change_cbdata = lov_change_cbdata,
- .o_find_cbdata = lov_find_cbdata,
.o_cancel = lov_cancel,
.o_cancel_unused = lov_cancel_unused,
.o_join_lru = lov_join_lru,
return 0;
}
-/* find any ldlm lock of the inode in mdc
- * return 0 not find
- * 1 find one
- * < 0 error */
-int mdc_find_cbdata(struct obd_export *exp, struct ll_fid *fid,
- ldlm_iterator_t it, void *data)
-{
- struct ldlm_res_id res_id;
- int rc = 0;
- ENTRY;
-
- fid_build_reg_res_name((struct lu_fid*)fid, &res_id);
- rc = ldlm_resource_iterate(class_exp2obd(exp)->obd_namespace, &res_id,
- it, data);
- if (rc == LDLM_ITER_STOP)
- RETURN(1);
- else if (rc == LDLM_ITER_CONTINUE)
- RETURN(0);
- RETURN(rc);
-}
-
static inline void mdc_clear_replay_flag(struct ptlrpc_request *req, int rc)
{
/* Don't hold error requests for replay. */
EXPORT_SYMBOL(mdc_req2lustre_md);
EXPORT_SYMBOL(mdc_free_lustre_md);
EXPORT_SYMBOL(mdc_change_cbdata);
-EXPORT_SYMBOL(mdc_find_cbdata);
EXPORT_SYMBOL(mdc_getstatus);
EXPORT_SYMBOL(mdc_getattr);
EXPORT_SYMBOL(mdc_getattr_name);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, enqueue);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, match);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, change_cbdata);
- LPROCFS_OBD_OP_INIT(num_private_stats, stats, find_cbdata);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel_unused);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, join_lru);
return 0;
}
-/* find any ldlm lock of the inode in osc
- * return 0 not find
- * 1 find one
- * < 0 error */
-static int osc_find_cbdata(struct obd_export *exp, struct lov_stripe_md *lsm,
- ldlm_iterator_t replace, void *data)
-{
- struct ldlm_res_id res_id;
- struct obd_device *obd = class_exp2obd(exp);
- int rc = 0;
-
- osc_build_res_name(lsm->lsm_object_id, lsm->lsm_object_gr, &res_id);
- rc = ldlm_resource_iterate(obd->obd_namespace, &res_id, replace, data);
- if (rc == LDLM_ITER_STOP)
- return(1);
- if (rc == LDLM_ITER_CONTINUE)
- return(0);
- return(rc);
-}
-
static int osc_enqueue_fini(struct obd_device *obd, struct ptlrpc_request *req,
struct obd_info *oinfo, int intent, int rc)
{
.o_enqueue = osc_enqueue,
.o_match = osc_match,
.o_change_cbdata = osc_change_cbdata,
- .o_find_cbdata = osc_find_cbdata,
.o_cancel = osc_cancel,
.o_cancel_unused = osc_cancel_unused,
.o_join_lru = osc_join_lru,
ONLY=${ONLY:-"$*"}
# bug number for skipped test: 13297 2108 9789 3637 9789 3561 12622 15528/2330 5188 10764 16410
-ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"27u 42a 42b 42c 42d 45 51d 62 68 75 $SANITY_EXCEPT"}
+ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"27u 42a 42b 42c 42d 45 51d 62 68 75 76 $SANITY_EXCEPT"}
# UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT!
# Tests that fail on uml, maybe elsewhere, FIXME
awk '/lustre_inode_cache/ {print $2; exit}' /proc/slabinfo
}
-test_76() { # Now for bug 20433, added originally in bug 1443
+test_76() { # bug 1443
+ DETH=$(grep deathrow /proc/kallsyms /proc/ksyms 2> /dev/null | wc -l)
+ [ $DETH -eq 0 ] && skip "No _iget." && return 0
BEFORE_INODES=`num_inodes`
echo "before inodes: $BEFORE_INODES"
local COUNT=1000
error "inode slab grew from $BEFORE_INODES to $AFTER_INODES"
true
}
-run_test 76 "confirm clients recycle inodes properly ===="
+run_test 76 "destroy duplicate inodes in client inode cache ===="
export ORIG_CSUM=""
set_checksums()