Frequency : rare
Bugzilla : 6200
Description: A bug in MDS/OSS recovery could cause the OSS to fail an assertion
-Details : There's little harm in just aborting MDS/OSS recovery and letting
- it try again next time, so I removed the LASSERT and return an error instead.
+Details : There's little harm in aborting MDS/OSS recovery and letting it
+ try again, so I removed the LASSERT and return an error instead.
Severity : enhancement
Bugzilla : 5902
(b) cover more of the client-side code path, and (c) try to narrow
down where problems occurred
+Severity : major
+Frequency : rare
+Bugzilla : 3819, 4364, 4397, 6313
+Description: Racing close and eviction MDS could cause assertion in mds_close
+Details : It was possible to get multiple mfd references during close and
+ client eviction, leading to one thread referencing a freed mfd.
+
------------------------------------------------------------------------------
2005-05-05 Cluster File Systems, Inc. <info@clusterfs.com>
- fix ppc64/x86_64 spec to use %{_libdir} instead of /usr/lib (5389)
- remove ancient LOV_MAGIC_V0 EA support (5047)
- add "disk I/Os in flight" and "I/O req time" stats in obdfilter
- - align r/w RPCs to PTLRPC_MAX_BRW_SIZE boundaries for performance
+ - align r/w RPCs to PTLRPC_MAX_BRW_SIZE boundary for performance (3451)
- allow readahead allocations to fail when low on memory (5383)
- mmap locking landed again, after considerable improvement (2828)
- add get_hostaddr() to lustreDB.py for LDAP support (5459)
struct list_head *tmp = med->med_open_head.next;
struct mds_file_data *mfd =
list_entry(tmp, struct mds_file_data, mfd_list);
- BDEVNAME_DECLARE_STORAGE(btmp);
-
- /* bug 1579: fix force-closing for 2.5 */
struct dentry *dentry = mfd->mfd_dentry;
- list_del(&mfd->mfd_list);
+ /* Remove mfd handle so it can't be found again.
+ * We are consuming the mfd_list reference here. */
+ mds_mfd_unlink(mfd, 0);
spin_unlock(&med->med_open_lock);
/* If you change this message, be sure to update
* replay_single:test_46 */
- CDEBUG(D_INODE|D_IOCTL, "force closing file handle for %.*s (%s:%lu)\n",
- dentry->d_name.len, dentry->d_name.name,
- ll_bdevname(dentry->d_inode->i_sb, btmp),
- dentry->d_inode->i_ino);
+ CDEBUG(D_INODE|D_IOCTL, "%s: force closing file handle for "
+ "%.*s (ino %lu)\n", obd->obd_name, dentry->d_name.len,
+ dentry->d_name.name, dentry->d_inode->i_ino);
/* child orphan sem protects orphan_dec_test and
* is_orphan race, mds_mfd_close drops it */
MDS_DOWN_WRITE_ORPHAN_SEM(dentry->d_inode);
atomic_read(&mfd->mfd_refcount));
}
+/* Create a new mds_file_data struct.
+ * One reference for handle+med_open_head list and dropped by mds_mfd_unlink(),
+ * one reference for the caller of this function. */
struct mds_file_data *mds_mfd_new(void)
{
struct mds_file_data *mfd;
atomic_set(&mfd->mfd_refcount, 2);
INIT_LIST_HEAD(&mfd->mfd_handle.h_link);
+ INIT_LIST_HEAD(&mfd->mfd_list);
class_handle_hash(&mfd->mfd_handle, mds_mfd_addref);
return mfd;
}
+/* Get a new reference on the mfd pointed to by handle, if handle is still
+ * valid. Caller must drop reference with mds_mfd_put(). */
static struct mds_file_data *mds_handle2mfd(struct lustre_handle *handle)
{
ENTRY;
RETURN(class_handle2object(handle->cookie));
}
+/* Drop mfd reference, freeing struct if this is the last one. */
static void mds_mfd_put(struct mds_file_data *mfd)
{
CDEBUG(D_INFO, "PUTting mfd %p : new refcount %d\n", mfd,
}
}
-static void mds_mfd_destroy(struct mds_file_data *mfd)
+/* Remove the mfd handle so that it cannot be found by open/close again.
+ * Caller must hold med_open_lock for mfd_list manipulation. */
+void mds_mfd_unlink(struct mds_file_data *mfd, int decref)
{
class_handle_unhash(&mfd->mfd_handle);
- mds_mfd_put(mfd);
+ list_del_init(&mfd->mfd_list);
+ if (decref)
+ mds_mfd_put(mfd);
}
-
/* Caller must hold mds->mds_epoch_sem */
static int mds_alloc_filterdata(struct inode *inode)
{
spin_lock(&med->med_open_lock);
list_add(&mfd->mfd_list, &med->med_open_head);
spin_unlock(&med->med_open_lock);
- mds_mfd_put(mfd);
body->handle.cookie = mfd->mfd_handle.h_cookie;
cleanup_mfd:
mds_mfd_put(mfd);
- mds_mfd_destroy(mfd);
+ mds_mfd_unlink(mfd, 1);
cleanup_dentry:
return ERR_PTR(error);
}
GOTO(out_dput, 0);
mfd = NULL;
+ spin_lock(&med->med_open_lock);
list_for_each(t, &med->med_open_head) {
mfd = list_entry(t, struct mds_file_data, mfd_list);
- if (mfd->mfd_xid == req->rq_xid)
+ if (mfd->mfd_xid == req->rq_xid) {
+ mds_mfd_addref(mfd);
break;
+ }
mfd = NULL;
}
+ spin_unlock(&med->med_open_lock);
/* #warning "XXX fixme" bug 2991 */
/* Here it used to LASSERT(mfd) if exp_outstanding_reply != NULL.
mfd->mfd_handle.h_cookie);
}
+ mds_mfd_put(mfd);
+
out_dput:
if (put_child)
l_dput(dchild);
CDEBUG(D_INODE, "mfd %p, cookie "LPX64"\n", mfd,
mfd->mfd_handle.h_cookie);
+
if (ids != NULL) {
mds_lov_update_objids(obd, ids);
OBD_FREE(ids, sizeof(*ids) * mds->mds_lov_desc.ld_tgt_count);
}
- //if (rc)
- // mds_mfd_destroy(mfd);
+ if (rc)
+ mds_mfd_unlink(mfd, 1);
+ mds_mfd_put(mfd);
RETURN(rc);
}
if (rc > 0)
rc = 0;
l_dput(mfd->mfd_dentry);
- mds_mfd_destroy(mfd);
+ mds_mfd_put(mfd);
cleanup:
if (req != NULL && reply_body != NULL) {
if (body->flags & MDS_BFLAG_UNCOMMITTED_WRITES)
/* do some stuff */ ;
+ spin_lock(&med->med_open_lock);
mfd = mds_handle2mfd(&body->handle);
if (mfd == NULL) {
+ spin_unlock(&med->med_open_lock);
DEBUG_REQ(D_ERROR, req, "no handle for file close ino "LPD64
": cookie "LPX64, body->fid1.id, body->handle.cookie);
req->rq_status = -ESTALE;
RETURN(-ESTALE);
}
+ /* Remove mfd handle so it can't be found again. We consume mfd_list
+ * reference here, but still have mds_handle2mfd ref until mfd_close. */
+ mds_mfd_unlink(mfd, 1);
+ spin_unlock(&med->med_open_lock);
inode = mfd->mfd_dentry->d_inode;
/* child orphan sem protects orphan_dec_test && is_orphan race */
mds_pack_inode2body(body, inode);
mds_pack_md(obd, req->rq_repmsg, 1,body,inode,MDS_PACK_MD_LOCK);
}
- spin_lock(&med->med_open_lock);
- list_del(&mfd->mfd_list);
- spin_unlock(&med->med_open_lock);
push_ctxt(&saved, &obd->obd_ctxt, NULL);
req->rq_status = mds_mfd_close(req, obd, mfd, 1);
pop_ctxt(&saved, &obd->obd_ctxt, NULL);
- mds_mfd_put(mfd);
if (OBD_FAIL_CHECK(OBD_FAIL_MDS_CLOSE_PACK)) {
CERROR("test case OBD_FAIL_MDS_CLOSE_PACK\n");
req->rq_status = -ENOMEM;