From cf1dac6d4aacb93949c1768c50f324cd5aedb793 Mon Sep 17 00:00:00 2001 From: Johann Lombardi Date: Mon, 22 Mar 2010 23:14:45 +0100 Subject: [PATCH] Revert "b=20433 decrease the usage of memory on clients." Suspected to cause bug 22307, so revert temporarily. This reverts commit 841fbac6378df39e357342d86d9380e6676c1faf. --- lustre/ChangeLog | 6 --- lustre/autoconf/lustre-core.m4 | 12 +++++ lustre/include/lustre_dlm.h | 4 +- lustre/include/lustre_mds.h | 2 - lustre/include/obd.h | 2 - lustre/include/obd_class.h | 14 ----- lustre/ldlm/ldlm_request.c | 19 +++---- lustre/llite/dcache.c | 48 ----------------- lustre/llite/llite_internal.h | 4 ++ lustre/llite/llite_lib.c | 109 +++++++++++++++++++++++++++++++++++++++ lustre/llite/namei.c | 4 -- lustre/lov/lov_obd.c | 40 -------------- lustre/mdc/mdc_locks.c | 21 -------- lustre/mdc/mdc_request.c | 1 - lustre/obdclass/lprocfs_status.c | 1 - lustre/osc/osc_request.c | 21 -------- lustre/tests/sanity.sh | 8 +-- 17 files changed, 140 insertions(+), 176 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index b1ddb7d..5c74853 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -42,12 +42,6 @@ Severity : normal Bugzilla : 21961/17914 Description: ignore trailing -mdc when determining index number -Severity : normal -Bugzilla : 20433 -Description: decrease the usage of memory on clients. -Details : 1. On clients, recycle dentries and inodes unused. - 2. Delete the code related to ll_deathrow(att 6215 in bug 1443). It - is useless now. ------------------------------------------------------------------------------- 2010-01-29 Sun Microsystems, Inc. diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4 index 6fa304b..120942a 100644 --- a/lustre/autoconf/lustre-core.m4 +++ b/lustre/autoconf/lustre-core.m4 @@ -435,6 +435,18 @@ AC_MSG_RESULT([no]) ]) # +# LC_EXPORT___IGET +# starting from 2.6.19 linux kernel exports __iget() +# +AC_DEFUN([LC_EXPORT___IGET], +[LB_CHECK_SYMBOL_EXPORT([__iget], +[fs/inode.c],[ + AC_DEFINE(HAVE_EXPORT___IGET, 1, [kernel exports __iget]) +],[ +]) +]) + +# # only for Lustre-patched kernels # AC_DEFUN([LC_LUSTRE_VERSION_H], diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h index 3939a23..ed54da1 100644 --- a/lustre/include/lustre_dlm.h +++ b/lustre/include/lustre_dlm.h @@ -696,8 +696,8 @@ int ldlm_namespace_foreach_res(struct ldlm_namespace *ns, ldlm_res_iterator_t iter, void *closure); int ldlm_replay_locks(struct obd_import *imp); -int ldlm_resource_iterate(struct ldlm_namespace *, struct ldlm_res_id *, - ldlm_iterator_t iter, void *data); +void ldlm_resource_iterate(struct ldlm_namespace *, struct ldlm_res_id *, + ldlm_iterator_t iter, void *data); /* ldlm_flock.c */ int ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data); diff --git a/lustre/include/lustre_mds.h b/lustre/include/lustre_mds.h index 7a6a136..aa7d663 100644 --- a/lustre/include/lustre_mds.h +++ b/lustre/include/lustre_mds.h @@ -184,8 +184,6 @@ int it_open_error(int phase, struct lookup_intent *it); void mdc_set_lock_data(__u64 *lockh, void *data, __u32 *lockbits); int mdc_change_cbdata(struct obd_export *exp, struct ll_fid *fid, ldlm_iterator_t it, void *data); -int mdc_find_cbdata(struct obd_export *exp, struct ll_fid *fid, - ldlm_iterator_t it, void *data); int mdc_revalidate_lock(struct obd_export *exp, struct lookup_intent *it, struct ll_fid *fid); diff --git a/lustre/include/obd.h b/lustre/include/obd.h index b328292..46e826a 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -1259,8 +1259,6 @@ struct obd_ops { struct lustre_handle *lockh, int *n_matches); int (*o_change_cbdata)(struct obd_export *, struct lov_stripe_md *, ldlm_iterator_t it, void *data); - int (*o_find_cbdata)(struct obd_export *, struct lov_stripe_md *, - ldlm_iterator_t it, void *data); int (*o_cancel)(struct obd_export *, struct lov_stripe_md *md, __u32 mode, struct lustre_handle *, int flags, obd_off end); diff --git a/lustre/include/obd_class.h b/lustre/include/obd_class.h index fcd7070..c8db6e6 100644 --- a/lustre/include/obd_class.h +++ b/lustre/include/obd_class.h @@ -1425,20 +1425,6 @@ static inline int obd_change_cbdata(struct obd_export *exp, RETURN(rc); } -static inline int obd_find_cbdata(struct obd_export *exp, - struct lov_stripe_md *lsm, - ldlm_iterator_t it, void *data) -{ - int rc; - ENTRY; - - EXP_CHECK_OP(exp, find_cbdata); - EXP_COUNTER_INCREMENT(exp, find_cbdata); - - rc = OBP(exp->exp_obd, find_cbdata)(exp, lsm, it, data); - RETURN(rc); -} - static inline int obd_cancel(struct obd_export *exp, struct lov_stripe_md *ea, __u32 mode, struct lustre_handle *lockh, int flags, obd_off end) diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index 773dbd6..66d94e1 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -1823,16 +1823,11 @@ int ldlm_namespace_foreach_res(struct ldlm_namespace *ns, RETURN(rc); } -/* non-blocking function to manipulate a lock whose cb_data is being put away. - * return 0: find no resource - * > 0: must be LDLM_ITER_STOP/LDLM_ITER_CONTINUE. - * < 0: errors - */ -int ldlm_resource_iterate(struct ldlm_namespace *ns, struct ldlm_res_id *res_id, +/* non-blocking function to manipulate a lock whose cb_data is being put away.*/ +void ldlm_resource_iterate(struct ldlm_namespace *ns, struct ldlm_res_id *res_id, ldlm_iterator_t iter, void *data) { struct ldlm_resource *res; - int rc; ENTRY; if (ns == NULL) { @@ -1841,12 +1836,14 @@ int ldlm_resource_iterate(struct ldlm_namespace *ns, struct ldlm_res_id *res_id, } res = ldlm_resource_get(ns, NULL, *res_id, 0, 0); - if (res == NULL) - RETURN(0); + if (res == NULL) { + EXIT; + return; + } - rc = ldlm_resource_foreach(res, iter, data); + ldlm_resource_foreach(res, iter, data); ldlm_resource_putref(res); - RETURN(rc); + EXIT; } /* Lock replay */ diff --git a/lustre/llite/dcache.c b/lustre/llite/dcache.c index 22a929d..add4dd6 100644 --- a/lustre/llite/dcache.c +++ b/lustre/llite/dcache.c @@ -111,36 +111,6 @@ int ll_dcompare(struct dentry *parent, struct qstr *d_name, struct qstr *name) RETURN(0); } -static inline int return_if_equal(struct ldlm_lock *lock, void *data) -{ - return LDLM_ITER_STOP; -} - -/* find any ldlm lock of the inode in mdc and lov - * return 0 not find - * 1 find one - * < 0 error */ -int find_cbdata(struct inode *inode) -{ - struct ll_fid fid; - struct ll_inode_info *lli = ll_i2info(inode); - struct ll_sb_info *sbi = ll_i2sbi(inode); - int rc = 0; - ENTRY; - - LASSERT(inode); - ll_inode2fid(&fid, inode); - rc = mdc_find_cbdata(sbi->ll_mdc_exp, &fid, return_if_equal, NULL); - if (rc != 0) - RETURN(rc); - - if (lli->lli_smd) - rc = obd_find_cbdata(sbi->ll_osc_exp, lli->lli_smd, - return_if_equal, NULL); - - RETURN(rc); -} - /* should NOT be called with the dcache lock, see fs/dcache.c */ static int ll_ddelete(struct dentry *de) { @@ -153,15 +123,6 @@ static int ll_ddelete(struct dentry *de) d_unhashed(de) ? "" : "hashed,", list_empty(&de->d_subdirs) ? "" : "subdirs"); - /* if not ldlm lock for this inode, set i_nlink to 0 so that - * this inode can be recycled later b=20433 */ - LASSERT(atomic_read(&de->d_count) == 0); - if (de->d_inode && !find_cbdata(de->d_inode)) - de->d_inode->i_nlink = 0; - - if (de->d_flags & DCACHE_LUSTRE_INVALID) - RETURN(1); - RETURN(0); } @@ -796,19 +757,10 @@ out_it: } #endif -void ll_d_iput(struct dentry *de, struct inode *inode) -{ - LASSERT(inode); - if (inode && !find_cbdata(inode)) - inode->i_nlink = 0; - iput(inode); -} - struct dentry_operations ll_d_ops = { .d_revalidate = ll_revalidate_nd, .d_release = ll_release, .d_delete = ll_ddelete, - .d_iput = ll_d_iput, #ifdef DCACHE_LUSTRE_INVALID .d_compare = ll_dcompare, #endif diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 6c79825..76a7931 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -375,6 +375,10 @@ struct ll_sb_info { unsigned int ll_namelen; struct file_operations *ll_fop; +#ifdef HAVE_EXPORT___IGET + struct list_head ll_deathrow; /* inodes to be destroyed (b1443) */ + spinlock_t ll_deathrow_lock; +#endif /* =0 - hold lock over whole read/write * >0 - max. chunk to be read/written w/o lock re-acquiring */ unsigned long ll_max_rw_chunk; diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index e6ced4f..a53bf2d 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -199,6 +199,10 @@ static struct ll_sb_info *ll_init_sbi(void) sbi->ll_flags |= LL_SBI_LRU_RESIZE; #endif +#ifdef HAVE_EXPORT___IGET + INIT_LIST_HEAD(&sbi->ll_deathrow); + spin_lock_init(&sbi->ll_deathrow_lock); +#endif for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) { spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].pp_r_hist.oh_lock); spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i].pp_w_hist.oh_lock); @@ -621,6 +625,101 @@ void lustre_dump_dentry(struct dentry *dentry, int recur) } } +#ifdef HAVE_EXPORT___IGET +static void prune_dir_dentries(struct inode *inode) +{ + struct dentry *dentry, *prev = NULL; + + /* due to lustre specific logic, a directory + * can have few dentries - a bug from VFS POV */ +restart: + spin_lock(&dcache_lock); + if (!list_empty(&inode->i_dentry)) { + dentry = list_entry(inode->i_dentry.prev, + struct dentry, d_alias); + /* in order to prevent infinite loops we + * break if previous dentry is busy */ + if (dentry != prev) { + prev = dentry; + dget_locked(dentry); + spin_unlock(&dcache_lock); + + /* try to kill all child dentries */ + shrink_dcache_parent(dentry); + dput(dentry); + + /* now try to get rid of current dentry */ + d_prune_aliases(inode); + goto restart; + } + } + spin_unlock(&dcache_lock); +} + +static void prune_deathrow_one(struct ll_inode_info *lli) +{ + struct inode *inode = ll_info2i(lli); + + /* first, try to drop any dentries - they hold a ref on the inode */ + if (S_ISDIR(inode->i_mode)) + prune_dir_dentries(inode); + else + d_prune_aliases(inode); + + + /* if somebody still uses it, leave it */ + LASSERT(atomic_read(&inode->i_count) > 0); + if (atomic_read(&inode->i_count) > 1) + goto out; + + CDEBUG(D_INODE, "inode %lu/%u(%d) looks a good candidate for prune\n", + inode->i_ino,inode->i_generation, atomic_read(&inode->i_count)); + + /* seems nobody uses it anymore */ + inode->i_nlink = 0; + +out: + iput(inode); + return; +} + +static void prune_deathrow(struct ll_sb_info *sbi, int try) +{ + struct ll_inode_info *lli; + int empty; + + do { + if (need_resched() && try) + break; + + if (try) { + if (!spin_trylock(&sbi->ll_deathrow_lock)) + break; + } else { + spin_lock(&sbi->ll_deathrow_lock); + } + + empty = 1; + lli = NULL; + if (!list_empty(&sbi->ll_deathrow)) { + lli = list_entry(sbi->ll_deathrow.next, + struct ll_inode_info, + lli_dead_list); + list_del_init(&lli->lli_dead_list); + if (!list_empty(&sbi->ll_deathrow)) + empty = 0; + } + spin_unlock(&sbi->ll_deathrow_lock); + + if (lli) + prune_deathrow_one(lli); + + } while (empty == 0); +} +#else /* !HAVE_EXPORT___IGET */ +#define prune_deathrow(sbi, try) do {} while (0) +#endif /* HAVE_EXPORT___IGET */ + void client_common_put_super(struct super_block *sb) { struct ll_sb_info *sbi = ll_s2sbi(sb); @@ -630,6 +729,9 @@ void client_common_put_super(struct super_block *sb) lprocfs_unregister_mountpoint(sbi); + /* destroy inodes in deathrow */ + prune_deathrow(sbi, 0); + list_del(&sbi->ll_conn_chain); /* callbacks is cleared after disconnect each target */ @@ -1310,6 +1412,12 @@ void ll_clear_inode(struct inode *inode) lli->lli_inode_magic = LLI_INODE_DEAD; +#ifdef HAVE_EXPORT___IGET + spin_lock(&sbi->ll_deathrow_lock); + list_del_init(&lli->lli_dead_list); + spin_unlock(&sbi->ll_deathrow_lock); +#endif + EXIT; } static int ll_setattr_do_truncate(struct inode *inode, loff_t new_size) @@ -2220,6 +2328,7 @@ int ll_prep_inode(struct obd_export *exp, struct inode **inode, LASSERT(*inode || sb); sbi = sb ? ll_s2sbi(sb) : ll_i2sbi(*inode); + prune_deathrow(sbi, 1); rc = mdc_req2lustre_md(req, offset, exp, &md); if (rc) diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index 3da1dfd..d88ae6c 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -1180,10 +1180,6 @@ int ll_objects_destroy(struct ptlrpc_request *request, struct inode *dir) return rc; } -/* ll_unlink_generic() doesn't update the inode with the new link count. - * Instead, ll_ddelete() and ll_d_iput() will update it based upon if there - * is any lock existing. They will recycle dentries and inodes based upon locks - * too. b=20433 */ static int ll_unlink_generic(struct inode * dir, struct qstr *name) { struct ptlrpc_request *request = NULL; diff --git a/lustre/lov/lov_obd.c b/lustre/lov/lov_obd.c index ee7bc30..37723c5 100644 --- a/lustre/lov/lov_obd.c +++ b/lustre/lov/lov_obd.c @@ -2226,45 +2226,6 @@ static int lov_change_cbdata(struct obd_export *exp, RETURN(rc); } -/* find any ldlm lock of the inode in lov - * return 0 not find - * 1 find one - * < 0 error */ -static int lov_find_cbdata(struct obd_export *exp, - struct lov_stripe_md *lsm, ldlm_iterator_t it, - void *data) -{ - struct lov_obd *lov; - struct lov_oinfo *loi; - int rc = 0, i; - ENTRY; - - ASSERT_LSM_MAGIC(lsm); - - if (!exp || !exp->exp_obd) - RETURN(-ENODEV); - - lov = &exp->exp_obd->u.lov; - for (i = 0; i < lsm->lsm_stripe_count; i++) { - struct lov_stripe_md submd; - - loi = lsm->lsm_oinfo[i]; - if (!lov->lov_tgts[loi->loi_ost_idx]) { - CDEBUG(D_HA, "lov idx %d NULL \n", loi->loi_ost_idx); - continue; - } - submd.lsm_object_id = loi->loi_id; - submd.lsm_object_gr = loi->loi_gr; - submd.lsm_stripe_count = 0; - rc = obd_find_cbdata(lov->lov_tgts[loi->loi_ost_idx]->ltd_exp, - &submd, it, data); - if (rc != 0) - RETURN(rc); - } - RETURN(rc); -} - - static int lov_cancel(struct obd_export *exp, struct lov_stripe_md *lsm, __u32 mode, struct lustre_handle *lockh, int flags, obd_off end) @@ -3349,7 +3310,6 @@ struct obd_ops lov_obd_ops = { .o_enqueue = lov_enqueue, .o_match = lov_match, .o_change_cbdata = lov_change_cbdata, - .o_find_cbdata = lov_find_cbdata, .o_cancel = lov_cancel, .o_cancel_unused = lov_cancel_unused, .o_join_lru = lov_join_lru, diff --git a/lustre/mdc/mdc_locks.c b/lustre/mdc/mdc_locks.c index 3a9db8b..b8f69e9 100644 --- a/lustre/mdc/mdc_locks.c +++ b/lustre/mdc/mdc_locks.c @@ -144,27 +144,6 @@ int mdc_change_cbdata(struct obd_export *exp, struct ll_fid *fid, return 0; } -/* find any ldlm lock of the inode in mdc - * return 0 not find - * 1 find one - * < 0 error */ -int mdc_find_cbdata(struct obd_export *exp, struct ll_fid *fid, - ldlm_iterator_t it, void *data) -{ - struct ldlm_res_id res_id; - int rc = 0; - ENTRY; - - fid_build_reg_res_name((struct lu_fid*)fid, &res_id); - rc = ldlm_resource_iterate(class_exp2obd(exp)->obd_namespace, &res_id, - it, data); - if (rc == LDLM_ITER_STOP) - RETURN(1); - else if (rc == LDLM_ITER_CONTINUE) - RETURN(0); - RETURN(rc); -} - static inline void mdc_clear_replay_flag(struct ptlrpc_request *req, int rc) { /* Don't hold error requests for replay. */ diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index f1d075d..bae823f 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -1691,7 +1691,6 @@ MODULE_LICENSE("GPL"); EXPORT_SYMBOL(mdc_req2lustre_md); EXPORT_SYMBOL(mdc_free_lustre_md); EXPORT_SYMBOL(mdc_change_cbdata); -EXPORT_SYMBOL(mdc_find_cbdata); EXPORT_SYMBOL(mdc_getstatus); EXPORT_SYMBOL(mdc_getattr); EXPORT_SYMBOL(mdc_getattr_name); diff --git a/lustre/obdclass/lprocfs_status.c b/lustre/obdclass/lprocfs_status.c index 03ad827..777a0a2 100644 --- a/lustre/obdclass/lprocfs_status.c +++ b/lustre/obdclass/lprocfs_status.c @@ -1396,7 +1396,6 @@ void lprocfs_init_ops_stats(int num_private_stats, struct lprocfs_stats *stats) LPROCFS_OBD_OP_INIT(num_private_stats, stats, enqueue); LPROCFS_OBD_OP_INIT(num_private_stats, stats, match); LPROCFS_OBD_OP_INIT(num_private_stats, stats, change_cbdata); - LPROCFS_OBD_OP_INIT(num_private_stats, stats, find_cbdata); LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel); LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel_unused); LPROCFS_OBD_OP_INIT(num_private_stats, stats, join_lru); diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 8183ef5..ea5737e 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -3272,26 +3272,6 @@ static int osc_change_cbdata(struct obd_export *exp, struct lov_stripe_md *lsm, return 0; } -/* find any ldlm lock of the inode in osc - * return 0 not find - * 1 find one - * < 0 error */ -static int osc_find_cbdata(struct obd_export *exp, struct lov_stripe_md *lsm, - ldlm_iterator_t replace, void *data) -{ - struct ldlm_res_id res_id; - struct obd_device *obd = class_exp2obd(exp); - int rc = 0; - - osc_build_res_name(lsm->lsm_object_id, lsm->lsm_object_gr, &res_id); - rc = ldlm_resource_iterate(obd->obd_namespace, &res_id, replace, data); - if (rc == LDLM_ITER_STOP) - return(1); - if (rc == LDLM_ITER_CONTINUE) - return(0); - return(rc); -} - static int osc_enqueue_fini(struct obd_device *obd, struct ptlrpc_request *req, struct obd_info *oinfo, int intent, int rc) { @@ -4599,7 +4579,6 @@ struct obd_ops osc_obd_ops = { .o_enqueue = osc_enqueue, .o_match = osc_match, .o_change_cbdata = osc_change_cbdata, - .o_find_cbdata = osc_find_cbdata, .o_cancel = osc_cancel, .o_cancel_unused = osc_cancel_unused, .o_join_lru = osc_join_lru, diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 43b81c1..2c89c86 100644 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -10,7 +10,7 @@ set -e ONLY=${ONLY:-"$*"} # bug number for skipped test: 13297 2108 9789 3637 9789 3561 12622 15528/2330 5188 10764 16410 -ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"27u 42a 42b 42c 42d 45 51d 62 68 75 $SANITY_EXCEPT"} +ALWAYS_EXCEPT=${ALWAYS_EXCEPT:-"27u 42a 42b 42c 42d 45 51d 62 68 75 76 $SANITY_EXCEPT"} # UPDATE THE COMMENT ABOVE WITH BUG NUMBERS WHEN CHANGING ALWAYS_EXCEPT! # Tests that fail on uml, maybe elsewhere, FIXME @@ -3441,7 +3441,9 @@ num_inodes() { awk '/lustre_inode_cache/ {print $2; exit}' /proc/slabinfo } -test_76() { # Now for bug 20433, added originally in bug 1443 +test_76() { # bug 1443 + DETH=$(grep deathrow /proc/kallsyms /proc/ksyms 2> /dev/null | wc -l) + [ $DETH -eq 0 ] && skip "No _iget." && return 0 BEFORE_INODES=`num_inodes` echo "before inodes: $BEFORE_INODES" local COUNT=1000 @@ -3456,7 +3458,7 @@ test_76() { # Now for bug 20433, added originally in bug 1443 error "inode slab grew from $BEFORE_INODES to $AFTER_INODES" true } -run_test 76 "confirm clients recycle inodes properly ====" +run_test 76 "destroy duplicate inodes in client inode cache ====" export ORIG_CSUM="" set_checksums() -- 1.8.3.1