From 1e70391f4e65b9d3e5002b1cc24033ba409b5476 Mon Sep 17 00:00:00 2001 From: adilger Date: Wed, 18 May 2005 17:11:14 +0000 Subject: [PATCH] Branch b1_4 It was possible to get multiple mfd references during close and client eviction, leading to either mds_close() or mds_mfd_close() referencing a freed mfd. Now we remove the mfd from the mfd_list and the handle hash under lock so that once it starts on the road to destruction it is not possible to get a new reference to it. Also add comments on usage of mfd-related functions! b=3819, b=4364, b=4397, b=6313 r=green --- lustre/ChangeLog | 13 ++++++++++--- lustre/mds/handler.c | 14 ++++++-------- lustre/mds/mds_internal.h | 1 + lustre/mds/mds_open.c | 45 ++++++++++++++++++++++++++++++++------------- 4 files changed, 49 insertions(+), 24 deletions(-) diff --git a/lustre/ChangeLog b/lustre/ChangeLog index ddaa9b3..3f69618 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -35,8 +35,8 @@ Severity : major Frequency : rare Bugzilla : 6200 Description: A bug in MDS/OSS recovery could cause the OSS to fail an assertion -Details : There's little harm in just aborting MDS/OSS recovery and letting - it try again next time, so I removed the LASSERT and return an error instead. +Details : There's little harm in aborting MDS/OSS recovery and letting it + try again, so I removed the LASSERT and return an error instead. Severity : enhancement Bugzilla : 5902 @@ -45,6 +45,13 @@ Details : The I/O checksum code was replaced to: (a) control it at runtime, (b) cover more of the client-side code path, and (c) try to narrow down where problems occurred +Severity : major +Frequency : rare +Bugzilla : 3819, 4364, 4397, 6313 +Description: Racing close and eviction MDS could cause assertion in mds_close +Details : It was possible to get multiple mfd references during close and + client eviction, leading to one thread referencing a freed mfd. + ------------------------------------------------------------------------------ 2005-05-05 Cluster File Systems, Inc. @@ -170,7 +177,7 @@ Details : The I/O checksum code was replaced to: (a) control it at runtime, - fix ppc64/x86_64 spec to use %{_libdir} instead of /usr/lib (5389) - remove ancient LOV_MAGIC_V0 EA support (5047) - add "disk I/Os in flight" and "I/O req time" stats in obdfilter - - align r/w RPCs to PTLRPC_MAX_BRW_SIZE boundaries for performance + - align r/w RPCs to PTLRPC_MAX_BRW_SIZE boundary for performance (3451) - allow readahead allocations to fail when low on memory (5383) - mmap locking landed again, after considerable improvement (2828) - add get_hostaddr() to lustreDB.py for LDAP support (5459) diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 6acf36b..1f8efb6 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -339,20 +339,18 @@ static int mds_destroy_export(struct obd_export *export) struct list_head *tmp = med->med_open_head.next; struct mds_file_data *mfd = list_entry(tmp, struct mds_file_data, mfd_list); - BDEVNAME_DECLARE_STORAGE(btmp); - - /* bug 1579: fix force-closing for 2.5 */ struct dentry *dentry = mfd->mfd_dentry; - list_del(&mfd->mfd_list); + /* Remove mfd handle so it can't be found again. + * We are consuming the mfd_list reference here. */ + mds_mfd_unlink(mfd, 0); spin_unlock(&med->med_open_lock); /* If you change this message, be sure to update * replay_single:test_46 */ - CDEBUG(D_INODE|D_IOCTL, "force closing file handle for %.*s (%s:%lu)\n", - dentry->d_name.len, dentry->d_name.name, - ll_bdevname(dentry->d_inode->i_sb, btmp), - dentry->d_inode->i_ino); + CDEBUG(D_INODE|D_IOCTL, "%s: force closing file handle for " + "%.*s (ino %lu)\n", obd->obd_name, dentry->d_name.len, + dentry->d_name.name, dentry->d_inode->i_ino); /* child orphan sem protects orphan_dec_test and * is_orphan race, mds_mfd_close drops it */ MDS_DOWN_WRITE_ORPHAN_SEM(dentry->d_inode); diff --git a/lustre/mds/mds_internal.h b/lustre/mds/mds_internal.h index 421fa75..c6540e2 100644 --- a/lustre/mds/mds_internal.h +++ b/lustre/mds/mds_internal.h @@ -162,6 +162,7 @@ int mds_query_write_access(struct inode *inode); int mds_open(struct mds_update_record *rec, int offset, struct ptlrpc_request *req, struct lustre_handle *); int mds_pin(struct ptlrpc_request *req); +void mds_mfd_unlink(struct mds_file_data *mfd, int decref); int mds_mfd_close(struct ptlrpc_request *req, struct obd_device *obd, struct mds_file_data *mfd, int unlink_orphan); int mds_close(struct ptlrpc_request *req); diff --git a/lustre/mds/mds_open.c b/lustre/mds/mds_open.c index cb09d80..54af699 100644 --- a/lustre/mds/mds_open.c +++ b/lustre/mds/mds_open.c @@ -68,6 +68,9 @@ static void mds_mfd_addref(void *mfdp) atomic_read(&mfd->mfd_refcount)); } +/* Create a new mds_file_data struct. + * One reference for handle+med_open_head list and dropped by mds_mfd_unlink(), + * one reference for the caller of this function. */ struct mds_file_data *mds_mfd_new(void) { struct mds_file_data *mfd; @@ -81,11 +84,14 @@ struct mds_file_data *mds_mfd_new(void) atomic_set(&mfd->mfd_refcount, 2); INIT_LIST_HEAD(&mfd->mfd_handle.h_link); + INIT_LIST_HEAD(&mfd->mfd_list); class_handle_hash(&mfd->mfd_handle, mds_mfd_addref); return mfd; } +/* Get a new reference on the mfd pointed to by handle, if handle is still + * valid. Caller must drop reference with mds_mfd_put(). */ static struct mds_file_data *mds_handle2mfd(struct lustre_handle *handle) { ENTRY; @@ -93,6 +99,7 @@ static struct mds_file_data *mds_handle2mfd(struct lustre_handle *handle) RETURN(class_handle2object(handle->cookie)); } +/* Drop mfd reference, freeing struct if this is the last one. */ static void mds_mfd_put(struct mds_file_data *mfd) { CDEBUG(D_INFO, "PUTting mfd %p : new refcount %d\n", mfd, @@ -105,13 +112,16 @@ static void mds_mfd_put(struct mds_file_data *mfd) } } -static void mds_mfd_destroy(struct mds_file_data *mfd) +/* Remove the mfd handle so that it cannot be found by open/close again. + * Caller must hold med_open_lock for mfd_list manipulation. */ +void mds_mfd_unlink(struct mds_file_data *mfd, int decref) { class_handle_unhash(&mfd->mfd_handle); - mds_mfd_put(mfd); + list_del_init(&mfd->mfd_list); + if (decref) + mds_mfd_put(mfd); } - /* Caller must hold mds->mds_epoch_sem */ static int mds_alloc_filterdata(struct inode *inode) { @@ -278,7 +288,6 @@ static struct mds_file_data *mds_dentry_open(struct dentry *dentry, spin_lock(&med->med_open_lock); list_add(&mfd->mfd_list, &med->med_open_head); spin_unlock(&med->med_open_lock); - mds_mfd_put(mfd); body->handle.cookie = mfd->mfd_handle.h_cookie; @@ -286,7 +295,7 @@ static struct mds_file_data *mds_dentry_open(struct dentry *dentry, cleanup_mfd: mds_mfd_put(mfd); - mds_mfd_destroy(mfd); + mds_mfd_unlink(mfd, 1); cleanup_dentry: return ERR_PTR(error); } @@ -586,12 +595,16 @@ static void reconstruct_open(struct mds_update_record *rec, int offset, GOTO(out_dput, 0); mfd = NULL; + spin_lock(&med->med_open_lock); list_for_each(t, &med->med_open_head) { mfd = list_entry(t, struct mds_file_data, mfd_list); - if (mfd->mfd_xid == req->rq_xid) + if (mfd->mfd_xid == req->rq_xid) { + mds_mfd_addref(mfd); break; + } mfd = NULL; } + spin_unlock(&med->med_open_lock); /* #warning "XXX fixme" bug 2991 */ /* Here it used to LASSERT(mfd) if exp_outstanding_reply != NULL. @@ -613,6 +626,8 @@ static void reconstruct_open(struct mds_update_record *rec, int offset, mfd->mfd_handle.h_cookie); } + mds_mfd_put(mfd); + out_dput: if (put_child) l_dput(dchild); @@ -701,12 +716,14 @@ static int mds_finish_open(struct ptlrpc_request *req, struct dentry *dchild, CDEBUG(D_INODE, "mfd %p, cookie "LPX64"\n", mfd, mfd->mfd_handle.h_cookie); + if (ids != NULL) { mds_lov_update_objids(obd, ids); OBD_FREE(ids, sizeof(*ids) * mds->mds_lov_desc.ld_tgt_count); } - //if (rc) - // mds_mfd_destroy(mfd); + if (rc) + mds_mfd_unlink(mfd, 1); + mds_mfd_put(mfd); RETURN(rc); } @@ -1289,7 +1306,7 @@ out: if (rc > 0) rc = 0; l_dput(mfd->mfd_dentry); - mds_mfd_destroy(mfd); + mds_mfd_put(mfd); cleanup: if (req != NULL && reply_body != NULL) { @@ -1343,13 +1360,19 @@ int mds_close(struct ptlrpc_request *req) if (body->flags & MDS_BFLAG_UNCOMMITTED_WRITES) /* do some stuff */ ; + spin_lock(&med->med_open_lock); mfd = mds_handle2mfd(&body->handle); if (mfd == NULL) { + spin_unlock(&med->med_open_lock); DEBUG_REQ(D_ERROR, req, "no handle for file close ino "LPD64 ": cookie "LPX64, body->fid1.id, body->handle.cookie); req->rq_status = -ESTALE; RETURN(-ESTALE); } + /* Remove mfd handle so it can't be found again. We consume mfd_list + * reference here, but still have mds_handle2mfd ref until mfd_close. */ + mds_mfd_unlink(mfd, 1); + spin_unlock(&med->med_open_lock); inode = mfd->mfd_dentry->d_inode; /* child orphan sem protects orphan_dec_test && is_orphan race */ @@ -1362,15 +1385,11 @@ int mds_close(struct ptlrpc_request *req) mds_pack_inode2body(body, inode); mds_pack_md(obd, req->rq_repmsg, 1,body,inode,MDS_PACK_MD_LOCK); } - spin_lock(&med->med_open_lock); - list_del(&mfd->mfd_list); - spin_unlock(&med->med_open_lock); push_ctxt(&saved, &obd->obd_ctxt, NULL); req->rq_status = mds_mfd_close(req, obd, mfd, 1); pop_ctxt(&saved, &obd->obd_ctxt, NULL); - mds_mfd_put(mfd); if (OBD_FAIL_CHECK(OBD_FAIL_MDS_CLOSE_PACK)) { CERROR("test case OBD_FAIL_MDS_CLOSE_PACK\n"); req->rq_status = -ENOMEM; -- 1.8.3.1