1. On clients, recycle dentries and inodes unused.
2. Delete the code related to ll_deathrow(att 6215 in bug 1443). It
is useless now.
i=robert.read
i=vladimir.saveliev
request with tid smaller then one currently being added, add it
to the start, not end of the list.
+Severity : normal
+Bugzilla : 20433
+Description: decrease the usage of memory on clients.
+Details : 1. On clients, recycle dentries and inodes unused.
+ 2. Delete the code related to ll_deathrow(att 6215 in bug 1443). It
+ is useless now.
+
--------------------------------------------------------------------------------
2007-08-10 Cluster File Systems, Inc. <info@clusterfs.com>
])
#
-# LC_EXPORT___IGET
-# starting from 2.6.19 linux kernel exports __iget()
-#
-AC_DEFUN([LC_EXPORT___IGET],
-[LB_CHECK_SYMBOL_EXPORT([__iget],
-[fs/inode.c],[
- AC_DEFINE(HAVE_EXPORT___IGET, 1, [kernel exports __iget])
-],[
-])
-])
-
-#
# only for Lustre-patched kernels
#
AC_DEFUN([LC_LUSTRE_VERSION_H],
ldlm_res_iterator_t iter, void *closure);
int ldlm_replay_locks(struct obd_import *imp);
-void ldlm_resource_iterate(struct ldlm_namespace *, const struct ldlm_res_id *,
+int ldlm_resource_iterate(struct ldlm_namespace *, const struct ldlm_res_id *,
ldlm_iterator_t iter, void *data);
/* ldlm_flock.c */
struct ptlrpc_request_set *rqset);
int (*o_change_cbdata)(struct obd_export *, struct lov_stripe_md *,
ldlm_iterator_t it, void *data);
+ int (*o_find_cbdata)(struct obd_export *, struct lov_stripe_md *,
+ ldlm_iterator_t it, void *data);
int (*o_cancel)(struct obd_export *, struct lov_stripe_md *md,
__u32 mode, struct lustre_handle *);
int (*o_cancel_unused)(struct obd_export *, struct lov_stripe_md *,
struct obd_capa **);
int (*m_change_cbdata)(struct obd_export *, const struct lu_fid *,
ldlm_iterator_t, void *);
+ int (*m_find_cbdata)(struct obd_export *, const struct lu_fid *,
+ ldlm_iterator_t, void *);
int (*m_close)(struct obd_export *, struct md_op_data *,
struct md_open_data *, struct ptlrpc_request **);
int (*m_create)(struct obd_export *, struct md_op_data *,
RETURN(rc);
}
+static inline int obd_find_cbdata(struct obd_export *exp,
+ struct lov_stripe_md *lsm,
+ ldlm_iterator_t it, void *data)
+{
+ int rc;
+ ENTRY;
+
+ EXP_CHECK_DT_OP(exp, find_cbdata);
+ EXP_COUNTER_INCREMENT(exp, find_cbdata);
+
+ rc = OBP(exp->exp_obd, find_cbdata)(exp, lsm, it, data);
+ RETURN(rc);
+}
+
static inline int obd_cancel(struct obd_export *exp,
struct lov_stripe_md *ea, __u32 mode,
struct lustre_handle *lockh)
RETURN(rc);
}
+static inline int md_find_cbdata(struct obd_export *exp,
+ const struct lu_fid *fid,
+ ldlm_iterator_t it, void *data)
+{
+ int rc;
+ ENTRY;
+ EXP_CHECK_MD_OP(exp, find_cbdata);
+ EXP_MD_COUNTER_INCREMENT(exp, find_cbdata);
+ rc = MDP(exp->exp_obd, find_cbdata)(exp, fid, it, data);
+ RETURN(rc);
+}
+
static inline int md_close(struct obd_export *exp, struct md_op_data *op_data,
struct md_open_data *mod,
struct ptlrpc_request **request)
RETURN(rc);
}
-/* non-blocking function to manipulate a lock whose cb_data is being put away.*/
-void ldlm_resource_iterate(struct ldlm_namespace *ns,
- const struct ldlm_res_id *res_id,
- ldlm_iterator_t iter, void *data)
+/* non-blocking function to manipulate a lock whose cb_data is being put away.
+ * return 0: find no resource
+ * > 0: must be LDLM_ITER_STOP/LDLM_ITER_CONTINUE.
+ * < 0: errors
+ */
+int ldlm_resource_iterate(struct ldlm_namespace *ns,
+ const struct ldlm_res_id *res_id,
+ ldlm_iterator_t iter, void *data)
{
struct ldlm_resource *res;
+ int rc;
ENTRY;
if (ns == NULL) {
}
res = ldlm_resource_get(ns, NULL, res_id, 0, 0);
- if (res == NULL) {
- EXIT;
- return;
- }
+ if (res == NULL)
+ RETURN(0);
LDLM_RESOURCE_ADDREF(res);
- ldlm_resource_foreach(res, iter, data);
+ rc = ldlm_resource_foreach(res, iter, data);
LDLM_RESOURCE_DELREF(res);
ldlm_resource_putref(res);
- EXIT;
+ RETURN(rc);
}
/* Lock replay */
RETURN(0);
}
+static inline int return_if_equal(struct ldlm_lock *lock, void *data)
+{
+ return LDLM_ITER_STOP;
+}
+
+/* find any ldlm lock of the inode in mdc and lov
+ * return 0 not find
+ * 1 find one
+ * < 0 error */
+static int find_cbdata(struct inode *inode)
+{
+ struct ll_inode_info *lli = ll_i2info(inode);
+ struct ll_sb_info *sbi = ll_i2sbi(inode);
+ int rc = 0;
+ ENTRY;
+
+ LASSERT(inode);
+ rc = md_find_cbdata(sbi->ll_md_exp, ll_inode2fid(inode),
+ return_if_equal, NULL);
+ if (rc != 0)
+ RETURN(rc);
+
+ if (lli->lli_smd)
+ rc = obd_find_cbdata(sbi->ll_dt_exp, lli->lli_smd,
+ return_if_equal, NULL);
+
+ RETURN(rc);
+}
+
/* should NOT be called with the dcache lock, see fs/dcache.c */
static int ll_ddelete(struct dentry *de)
{
d_unhashed(de) ? "" : "hashed,",
list_empty(&de->d_subdirs) ? "" : "subdirs");
+ /* if not ldlm lock for this inode, set i_nlink to 0 so that
+ * this inode can be recycled later b=20433 */
+ LASSERT(atomic_read(&de->d_count) == 0);
+ if (de->d_inode && !find_cbdata(de->d_inode))
+ de->d_inode->i_nlink = 0;
+
+ if (de->d_flags & DCACHE_LUSTRE_INVALID)
+ RETURN(1);
+
RETURN(0);
}
}
#endif
+void ll_d_iput(struct dentry *de, struct inode *inode)
+{
+ LASSERT(inode);
+ if (!find_cbdata(inode))
+ inode->i_nlink = 0;
+ iput(inode);
+}
+
struct dentry_operations ll_d_ops = {
.d_revalidate = ll_revalidate_nd,
.d_release = ll_release,
- .d_delete = ll_ddelete,
+ .d_delete = ll_ddelete,
+ .d_iput = ll_d_iput,
.d_compare = ll_dcompare,
#if 0
.d_pin = ll_pin,
unsigned int ll_namelen;
struct file_operations *ll_fop;
-#ifdef HAVE_EXPORT___IGET
- cfs_list_t ll_deathrow;/*inodes to be destroyed (b1443)*/
- cfs_spinlock_t ll_deathrow_lock;
-#endif
/* =0 - hold lock over whole read/write
* >0 - max. chunk to be read/written w/o lock re-acquiring */
unsigned long ll_max_rw_chunk;
sbi->ll_flags |= LL_SBI_LRU_RESIZE;
#endif
-#ifdef HAVE_EXPORT___IGET
- CFS_INIT_LIST_HEAD(&sbi->ll_deathrow);
- cfs_spin_lock_init(&sbi->ll_deathrow_lock);
-#endif
for (i = 0; i <= LL_PROCESS_HIST_MAX; i++) {
cfs_spin_lock_init(&sbi->ll_rw_extents_info.pp_extents[i]. \
pp_r_hist.oh_lock);
}
}
-#ifdef HAVE_EXPORT___IGET
-static void prune_dir_dentries(struct inode *inode)
-{
- struct dentry *dentry, *prev = NULL;
-
- /* due to lustre specific logic, a directory
- * can have few dentries - a bug from VFS POV */
-restart:
- spin_lock(&dcache_lock);
- if (!list_empty(&inode->i_dentry)) {
- dentry = list_entry(inode->i_dentry.prev,
- struct dentry, d_alias);
- /* in order to prevent infinite loops we
- * break if previous dentry is busy */
- if (dentry != prev) {
- prev = dentry;
- dget_locked(dentry);
- spin_unlock(&dcache_lock);
-
- /* try to kill all child dentries */
- lock_dentry(dentry);
- shrink_dcache_parent(dentry);
- unlock_dentry(dentry);
- dput(dentry);
-
- /* now try to get rid of current dentry */
- d_prune_aliases(inode);
- goto restart;
- }
- }
- spin_unlock(&dcache_lock);
-}
-
-static void prune_deathrow_one(struct ll_inode_info *lli)
-{
- struct inode *inode = ll_info2i(lli);
-
- /* first, try to drop any dentries - they hold a ref on the inode */
- if (S_ISDIR(inode->i_mode))
- prune_dir_dentries(inode);
- else
- d_prune_aliases(inode);
-
-
- /* if somebody still uses it, leave it */
- LASSERT(atomic_read(&inode->i_count) > 0);
- if (atomic_read(&inode->i_count) > 1)
- goto out;
-
- CDEBUG(D_INODE, "inode %lu/%u(%d) looks a good candidate for prune\n",
- inode->i_ino,inode->i_generation,
- atomic_read(&inode->i_count));
-
- /* seems nobody uses it anymore */
- inode->i_nlink = 0;
-
-out:
- iput(inode);
- return;
-}
-
-static void prune_deathrow(struct ll_sb_info *sbi, int try)
-{
- struct ll_inode_info *lli;
- int empty;
-
- do {
- if (need_resched() && try)
- break;
-
- if (try) {
- if (!cfs_spin_trylock(&sbi->ll_deathrow_lock))
- break;
- } else {
- cfs_spin_lock(&sbi->ll_deathrow_lock);
- }
-
- empty = 1;
- lli = NULL;
- if (!cfs_list_empty(&sbi->ll_deathrow)) {
- lli = cfs_list_entry(sbi->ll_deathrow.next,
- struct ll_inode_info,
- lli_dead_list);
- cfs_list_del_init(&lli->lli_dead_list);
- if (!cfs_list_empty(&sbi->ll_deathrow))
- empty = 0;
- }
- cfs_spin_unlock(&sbi->ll_deathrow_lock);
-
- if (lli)
- prune_deathrow_one(lli);
-
- } while (empty == 0);
-}
-#else /* !HAVE_EXPORT___IGET */
-#define prune_deathrow(sbi, try) do {} while (0)
-#endif /* HAVE_EXPORT___IGET */
-
void client_common_put_super(struct super_block *sb)
{
struct ll_sb_info *sbi = ll_s2sbi(sb);
cl_sb_fini(sb);
- /* destroy inodes in deathrow */
- prune_deathrow(sbi, 0);
-
cfs_list_del(&sbi->ll_conn_chain);
obd_fid_fini(sbi->ll_dt_exp);
#endif
lli->lli_inode_magic = LLI_INODE_DEAD;
-#ifdef HAVE_EXPORT___IGET
- cfs_spin_lock(&sbi->ll_deathrow_lock);
- cfs_list_del_init(&lli->lli_dead_list);
- cfs_spin_unlock(&sbi->ll_deathrow_lock);
-#endif
ll_clear_inode_capas(inode);
/*
* XXX This has to be done before lsm is freed below, because
LASSERT(*inode || sb);
sbi = sb ? ll_s2sbi(sb) : ll_i2sbi(*inode);
- prune_deathrow(sbi, 1);
memset(&md, 0, sizeof(struct lustre_md));
rc = md_get_lustre_md(sbi->ll_md_exp, req, sbi->ll_dt_exp,
return rc;
}
+/* ll_unlink_generic() doesn't update the inode with the new link count.
+ * Instead, ll_ddelete() and ll_d_iput() will update it based upon if there
+ * is any lock existing. They will recycle dentries and inodes based upon locks
+ * too. b=20433 */
static int ll_unlink_generic(struct inode *dir, struct dentry *dparent,
struct dentry *dchild, struct qstr *name)
{
RETURN(0);
}
+static int lmv_find_cbdata(struct obd_export *exp, const struct lu_fid *fid,
+ ldlm_iterator_t it, void *data)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct lmv_obd *lmv = &obd->u.lmv;
+ int i;
+ int rc;
+ ENTRY;
+
+ rc = lmv_check_connect(obd);
+ if (rc)
+ RETURN(rc);
+
+ CDEBUG(D_INODE, "CBDATA for "DFID"\n", PFID(fid));
+
+ /*
+ * With CMD every object can have two locks in different namespaces:
+ * lookup lock in space of mds storing direntry and update/open lock in
+ * space of mds storing inode.
+ */
+ for (i = 0; i < lmv->desc.ld_tgt_count; i++) {
+ rc = md_find_cbdata(lmv->tgts[i].ltd_exp, fid, it, data);
+ if (rc)
+ RETURN(rc);
+ }
+
+ RETURN(rc);
+}
+
+
static int lmv_close(struct obd_export *exp, struct md_op_data *op_data,
struct md_open_data *mod, struct ptlrpc_request **request)
{
struct md_ops lmv_md_ops = {
.m_getstatus = lmv_getstatus,
.m_change_cbdata = lmv_change_cbdata,
+ .m_find_cbdata = lmv_find_cbdata,
.m_close = lmv_close,
.m_create = lmv_create,
.m_done_writing = lmv_done_writing,
RETURN(rc);
}
+/* find any ldlm lock of the inode in lov
+ * return 0 not find
+ * 1 find one
+ * < 0 error */
+static int lov_find_cbdata(struct obd_export *exp,
+ struct lov_stripe_md *lsm, ldlm_iterator_t it,
+ void *data)
+{
+ struct lov_obd *lov;
+ int rc = 0, i;
+ ENTRY;
+
+ ASSERT_LSM_MAGIC(lsm);
+
+ if (!exp || !exp->exp_obd)
+ RETURN(-ENODEV);
+
+ LASSERT_MDS_GROUP(lsm->lsm_object_gr);
+
+ lov = &exp->exp_obd->u.lov;
+ for (i = 0; i < lsm->lsm_stripe_count; i++) {
+ struct lov_stripe_md submd;
+ struct lov_oinfo *loi = lsm->lsm_oinfo[i];
+
+ if (!lov->lov_tgts[loi->loi_ost_idx]) {
+ CDEBUG(D_HA, "lov idx %d NULL \n", loi->loi_ost_idx);
+ continue;
+ }
+
+ submd.lsm_object_id = loi->loi_id;
+ submd.lsm_object_gr = loi->loi_gr;
+ submd.lsm_stripe_count = 0;
+ rc = obd_find_cbdata(lov->lov_tgts[loi->loi_ost_idx]->ltd_exp,
+ &submd, it, data);
+ if (rc != 0)
+ RETURN(rc);
+ }
+ RETURN(rc);
+}
+
static int lov_cancel(struct obd_export *exp, struct lov_stripe_md *lsm,
__u32 mode, struct lustre_handle *lockh)
{
.o_sync = lov_sync,
.o_enqueue = lov_enqueue,
.o_change_cbdata = lov_change_cbdata,
+ .o_find_cbdata = lov_find_cbdata,
.o_cancel = lov_cancel,
.o_cancel_unused = lov_cancel_unused,
.o_iocontrol = lov_iocontrol,
int mdc_change_cbdata(struct obd_export *exp, const struct lu_fid *fid,
ldlm_iterator_t it, void *data);
+int mdc_find_cbdata(struct obd_export *exp, const struct lu_fid *fid,
+ ldlm_iterator_t it, void *data);
+
int mdc_intent_lock(struct obd_export *exp,
struct md_op_data *,
void *lmm, int lmmsize,
return 0;
}
+/* find any ldlm lock of the inode in mdc
+ * return 0 not find
+ * 1 find one
+ * < 0 error */
+int mdc_find_cbdata(struct obd_export *exp,
+ const struct lu_fid *fid,
+ ldlm_iterator_t it, void *data)
+{
+ struct ldlm_res_id res_id;
+ int rc = 0;
+ ENTRY;
+
+ fid_build_reg_res_name((struct lu_fid*)fid, &res_id);
+ rc = ldlm_resource_iterate(class_exp2obd(exp)->obd_namespace, &res_id,
+ it, data);
+ if (rc == LDLM_ITER_STOP)
+ RETURN(1);
+ else if (rc == LDLM_ITER_CONTINUE)
+ RETURN(0);
+ RETURN(rc);
+}
+
static inline void mdc_clear_replay_flag(struct ptlrpc_request *req, int rc)
{
/* Don't hold error requests for replay. */
struct md_ops mdc_md_ops = {
.m_getstatus = mdc_getstatus,
.m_change_cbdata = mdc_change_cbdata,
+ .m_find_cbdata = mdc_find_cbdata,
.m_close = mdc_close,
.m_create = mdc_create,
.m_done_writing = mdc_done_writing,
LPROCFS_OBD_OP_INIT(num_private_stats, stats, commitrw);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, enqueue);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, change_cbdata);
+ LPROCFS_OBD_OP_INIT(num_private_stats, stats, find_cbdata);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, cancel_unused);
LPROCFS_OBD_OP_INIT(num_private_stats, stats, init_export);
LPROCFS_MD_OP_INIT(num_private_stats, stats, getstatus);
LPROCFS_MD_OP_INIT(num_private_stats, stats, change_cbdata);
+ LPROCFS_MD_OP_INIT(num_private_stats, stats, find_cbdata);
LPROCFS_MD_OP_INIT(num_private_stats, stats, close);
LPROCFS_MD_OP_INIT(num_private_stats, stats, create);
LPROCFS_MD_OP_INIT(num_private_stats, stats, done_writing);
return 0;
}
+/* find any ldlm lock of the inode in osc
+ * return 0 not find
+ * 1 find one
+ * < 0 error */
+static int osc_find_cbdata(struct obd_export *exp, struct lov_stripe_md *lsm,
+ ldlm_iterator_t replace, void *data)
+{
+ struct ldlm_res_id res_id;
+ struct obd_device *obd = class_exp2obd(exp);
+ int rc = 0;
+
+ osc_build_res_name(lsm->lsm_object_id, lsm->lsm_object_gr, &res_id);
+ rc = ldlm_resource_iterate(obd->obd_namespace, &res_id, replace, data);
+ if (rc == LDLM_ITER_STOP)
+ return(1);
+ if (rc == LDLM_ITER_CONTINUE)
+ return(0);
+ return(rc);
+}
+
static int osc_enqueue_fini(struct ptlrpc_request *req, struct ost_lvb *lvb,
obd_enqueue_update_f upcall, void *cookie,
int *flags, int rc)
.o_sync = osc_sync,
.o_enqueue = osc_enqueue,
.o_change_cbdata = osc_change_cbdata,
+ .o_find_cbdata = osc_find_cbdata,
.o_cancel = osc_cancel,
.o_cancel_unused = osc_cancel_unused,
.o_iocontrol = osc_iocontrol,
# buffer i/o errs sock spc runas
[ "$CPU" = "UML" ] && EXCEPT="$EXCEPT 27m 27n 27o 27p 27q 27r 31d 54a 64b 99a 99b 99c 99d 99e 99f 101"
-# test76 is not valid with FIDs because inode numbers are not reused
-ALWAYS_EXCEPT="$ALWAYS_EXCEPT 76"
-
case `uname -r` in
2.4*) FSTYPE=${FSTYPE:-ext3} ;;
2.6*) FSTYPE=${FSTYPE:-ldiskfs} ;;
awk '/lustre_inode_cache/ {print $2; exit}' /proc/slabinfo
}
-test_76() { # bug 1443
- DETH=$(grep deathrow /proc/kallsyms /proc/ksyms 2> /dev/null | wc -l)
- [ $DETH -eq 0 ] && skip "No _iget." && return 0
- BEFORE_INODES=`num_inodes`
+test_76() { # Now for bug 20433, added originally in bug 1443
+ cancel_lru_locks osc
+ BEFORE_INODES=`num_inodes`
echo "before inodes: $BEFORE_INODES"
local COUNT=1000
[ "$SLOW" = "no" ] && COUNT=100
touch $DIR/$tfile
rm -f $DIR/$tfile
done
+ cancel_lru_locks osc
AFTER_INODES=`num_inodes`
echo "after inodes: $AFTER_INODES"
- [ $AFTER_INODES -gt $((BEFORE_INODES + 32)) ] && \
- error "inode slab grew from $BEFORE_INODES to $AFTER_INODES"
- true
+ local wait=0
+ while [ $AFTER_INODES -gt $BEFORE_INODES ]; do
+ sleep 2
+ AFTER_INODES=`num_inodes`
+ wait=$((wait+2))
+ echo "wait $wait seconds inodes: $AFTER_INODES"
+ if [ $wait -gt 30 ]; then
+ error "inode slab grew from $BEFORE_INODES to $AFTER_INODES"
+ fi
+ done
}
-run_test 76 "destroy duplicate inodes in client inode cache ===="
+run_test 76 "confirm clients recycle inodes properly ===="
+
export ORIG_CSUM=""
set_checksums()