#include <linux/lustre_dlm.h>
#include <linux/init.h>
#include <linux/obd_lov.h>
+#include <linux/lprocfs_status.h>
#define REQUEST_MINOR 244
extern int mds_queue_req(struct ptlrpc_request *);
+extern lprocfs_vars_t status_var_nm_1[];
+extern lprocfs_vars_t status_class_var[];
-int mdc_getstatus(struct lustre_handle *conn, struct ll_fid *rootfid,
- __u64 *last_committed, __u64 *last_xid,
- struct ptlrpc_request **request)
+/* should become mdc_getinfo() */
+int mdc_getstatus(struct lustre_handle *conn, struct ll_fid *rootfid)
{
struct ptlrpc_request *req;
struct mds_body *body;
body = lustre_msg_buf(req->rq_repmsg, 0);
mds_unpack_body(body);
memcpy(rootfid, &body->fid1, sizeof(*rootfid));
- *last_committed = req->rq_repmsg->last_committed;
- *last_xid = req->rq_repmsg->last_xid;
- CDEBUG(D_NET,"root ino=%ld, last_committed=%Lu, last_xid=%Ld\n",
- (unsigned long)rootfid->id,
- (unsigned long long)*last_committed,
- (unsigned long long)*last_xid);
+ CDEBUG(D_NET, "root ino="LPU64", last_committed="LPU64
+ ", last_xid="LPU64"\n",
+ rootfid->id, req->rq_repmsg->last_committed,
+ req->rq_repmsg->last_xid);
}
EXIT;
return rc;
}
+static void d_delete_aliases(struct inode *inode)
+{
+ struct dentry *dentry = NULL;
+ struct list_head *tmp;
+ int dentry_count = 0;
+ ENTRY;
+
+ spin_lock(&dcache_lock);
+ list_for_each(tmp, &inode->i_dentry) {
+ dentry = list_entry(tmp, struct dentry, d_alias);
+ dentry_count++;
+ }
+
+ /* XXX FIXME tell phil/peter that you see this -- unless you're playing
+ * with hard links, in which case, stop. */
+ LASSERT(dentry_count <= 1);
+
+ if (dentry_count == 0) {
+ spin_unlock(&dcache_lock);
+ EXIT;
+ return;
+ }
+
+ CDEBUG(D_INODE, "d_deleting dentry %p\n", dentry);
+ dget_locked(dentry);
+ spin_unlock(&dcache_lock);
+ d_delete(dentry);
+ dput(dentry);
+ EXIT;
+}
+
static int mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc,
void *data, __u32 data_len, int flag)
{
int rc;
- struct inode *inode = data;
struct lustre_handle lockh;
ENTRY;
- if (data_len != sizeof(*inode)) {
- CERROR("data_len should be %d, but is %d\n", sizeof(*inode),
- data_len);
- LBUG();
- RETURN(-EINVAL);
- }
-
switch (flag) {
case LDLM_CB_BLOCKING:
ldlm_lock2handle(lock, &lockh);
rc = ldlm_cli_cancel(&lockh);
if (rc < 0) {
- CERROR("ldlm_cli_cancel: %d\n", rc);
- LBUG();
+ CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc);
+ RETURN(rc);
}
break;
- case LDLM_CB_CANCELING:
- /* FIXME: do something better than throwing away everything */
- if (inode == NULL)
- LBUG();
+ case LDLM_CB_CANCELING: {
+ /* Invalidate all dentries associated with this inode */
+ struct inode *inode = data;
+
+ LASSERT(inode != NULL);
+ LASSERT(data_len == sizeof(*inode));
+
if (S_ISDIR(inode->i_mode)) {
CDEBUG(D_INODE, "invalidating inode %ld\n",
inode->i_ino);
- invalidate_inode_pages(inode);
+ ll_invalidate_inode_pages(inode);
}
+
+ LASSERT(igrab(inode) == inode);
+ d_delete_aliases(inode);
+ iput(inode);
break;
+ }
default:
LBUG();
}
RETURN(0);
}
+struct create_replay_data {
+ struct super_block *sb;
+ u32 generation;
+};
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+static int create_replay_find_inode(struct inode *inode, unsigned long ino,
+ void *opaque)
+#else
+static int create_replay_find_inode(struct inode *inode, void *opaque)
+#endif
+{
+ struct ptlrpc_request *req = opaque;
+ struct create_replay_data *saved;
+ struct mds_body *body;
+
+ saved = lustre_msg_buf(req->rq_reqmsg, 5); /* lock with intent */
+
+ if (saved->generation != inode->i_generation) {
+ CDEBUG(D_HA,
+ "generation mismatch for ino %u: saved %u != inode %u\n",
+ inode->i_ino, saved->generation, inode->i_generation);
+ return 0;
+ }
+
+ body = lustre_msg_buf(req->rq_repmsg, 1);
+
+ /* XXX do I need more out of ll_update_inode? */
+ CDEBUG(D_HA, "updating inode %u generation %u to %u\n",
+ inode->i_ino, inode->i_generation, body->generation);
+
+ inode->i_generation = body->generation;
+
+ return 1;
+}
+
+static void fixup_req_for_recreate(struct ptlrpc_request *fixreq,
+ struct ptlrpc_request *req,
+ struct inode *inode)
+{
+ struct ldlm_request *lockreq;
+ struct mds_rec_link *rec; /* representative, two-fid op structure */
+ int opc;
+
+ if (fixreq->rq_import != req->rq_import) {
+ DEBUG_REQ(D_HA, fixreq, "import mismatch, skipping");
+ return;
+ }
+
+ DEBUG_REQ(D_HA, fixreq, "fixing");
+
+ /* XXX check replay_state to see if we'll actually replay. */
+
+ /* We only care about LDLM_ENQUEUE and MDS_REINT requests. */
+ if (fixreq->rq_reqmsg->opc == LDLM_ENQUEUE) {
+ lockreq = lustre_msg_buf(fixreq->rq_reqmsg, 0);
+
+ if (lockreq->lock_desc.l_resource.lr_type != LDLM_PLAIN &&
+ !(lockreq->lock_flags & LDLM_FL_HAS_INTENT)) {
+ DEBUG_REQ(D_HA, fixreq, "non-intent lock, skipping");
+ return;
+ }
+
+ if (fixreq->rq_reqmsg->bufcount < 2) {
+ DEBUG_REQ(D_HA, fixreq,
+ "short intent (probably readdir), skipping");
+ return;
+ }
+
+ /* XXX endianness is probably very very wrong here. Very. */
+ rec = lustre_msg_buf(fixreq->rq_reqmsg, 2);
+ } else if (fixreq->rq_reqmsg->opc == MDS_REINT) {
+ rec = lustre_msg_buf(fixreq->rq_reqmsg, 0);
+ } else if (fixreq->rq_reqmsg->opc == MDS_OPEN) {
+ struct mds_body *body = lustre_msg_buf(fixreq->rq_reqmsg, 0);
+ DEBUG_REQ(D_HA, fixreq, "fixing fid1: %u -> %u",
+ body->fid1.generation, inode->i_generation);
+ body->fid1.generation = inode->i_generation;
+ return;
+ } else {
+ DEBUG_REQ(D_HA, fixreq, "not a replayable request, skipping");
+ return;
+ }
+
+ if (rec->lk_fid1.id == inode->i_ino) {
+ DEBUG_REQ(D_HA, fixreq, "fixing fid1: %u -> %u",
+ rec->lk_fid1.generation, inode->i_generation);
+ rec->lk_fid1.generation = inode->i_generation;
+ }
+
+ /* Some ops have two FIDs. ZZZ We rely on the identical
+ * placement of that second FID in all such ops' messages.
+ */
+ opc = rec->lk_opcode & REINT_OPCODE_MASK;
+ if ((opc == REINT_LINK || opc == REINT_UNLINK ||
+ opc == REINT_RENAME) &&
+ rec->lk_fid2.id == inode->i_ino) {
+ DEBUG_REQ(D_HA, fixreq, "fixing fid2: %u -> %u",
+ rec->lk_fid2.generation, inode->i_generation);
+ rec->lk_fid2.generation = inode->i_generation;
+ }
+}
+
+static void mdc_replay_create(struct ptlrpc_request *req)
+{
+ struct create_replay_data *saved;
+ struct mds_body *body;
+ struct inode *inode;
+ struct list_head *tmp;
+
+ if (req->rq_reqmsg->opc == MDS_REINT)
+ LBUG(); /* XXX don't handle the non-intent case yet */
+
+ body = lustre_msg_buf(req->rq_repmsg, 1);
+ saved = lustre_msg_buf(req->rq_reqmsg, 5); /* lock with intent */
+
+ CDEBUG(D_HA, "create of inode %d replayed; gen %u -> %u\n",
+ body->fid1.id, saved->generation, body->generation);
+ /* XXX cargo-culted right out of ll_iget */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+ inode = iget4(saved->sb, body->fid1.id, create_replay_find_inode, req);
+#endif
+#if 0
+ {
+ extern int ll_read_inode2(struct inode *inode, void *opaque);
+ inode = iget5_locked(saved->sb, body->fid1.id,
+ create_replay_find_inode,
+ ll_read_inode2, req);
+
+ if (!inode)
+ LBUG(); /* XXX ick */
+
+ if (inode->i_state & I_NEW)
+ unlock_new_inode(inode);
+ }
+#endif
+
+ /* Now that we've updated the generation, we need to go and find all
+ * the other requests that refer to this file and will be replayed,
+ * and teach them about our new generation.
+ */
+ list_for_each(tmp, &req->rq_connection->c_sending_head) {
+ struct ptlrpc_request *fixreq =
+ list_entry(tmp, struct ptlrpc_request, rq_list);
+
+ fixup_req_for_recreate(fixreq, req, inode);
+ }
+
+ list_for_each(tmp, &req->rq_connection->c_delayed_head) {
+ struct ptlrpc_request *fixreq =
+ list_entry(tmp, struct ptlrpc_request, rq_list);
+
+ fixup_req_for_recreate(fixreq, req, inode);
+ }
+}
+
+void mdc_store_create_replay_data(struct ptlrpc_request *req,
+ struct super_block *sb)
+{
+ struct create_replay_data *saved =
+ lustre_msg_buf(req->rq_reqmsg, 5);
+ struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 1);
+
+
+ if (req->rq_reqmsg->opc == MDS_REINT)
+ LBUG(); /* XXX don't handle the non-intent case yet */
+
+ saved->generation = body->generation;
+ saved->sb = sb; /* XXX is this safe? */
+
+ req->rq_replay_cb = mdc_replay_create;
+}
+
int mdc_enqueue(struct lustre_handle *conn, int lock_type,
struct lookup_intent *it, int lock_mode, struct inode *dir,
struct dentry *de, struct lustre_handle *lockh,
{
struct ptlrpc_request *req;
struct obd_device *obddev = class_conn2obd(conn);
- __u64 res_id[RES_NAME_SIZE] = {dir->i_ino};
- int size[5] = {sizeof(struct ldlm_request), sizeof(struct ldlm_intent)};
- int rc, flags;
+ __u64 res_id[RES_NAME_SIZE] = {dir->i_ino, (__u64)dir->i_generation};
+ int size[6] = {sizeof(struct ldlm_request), sizeof(struct ldlm_intent)};
+ int rc, flags = LDLM_FL_HAS_INTENT;
int repsize[3] = {sizeof(struct ldlm_reply),
sizeof(struct mds_body),
obddev->u.cli.cl_max_mds_easize};
struct ldlm_reply *dlm_rep;
struct ldlm_intent *lit;
+ struct ldlm_request *lockreq;
ENTRY;
LDLM_DEBUG_NOLOCK("mdsintent %s dir %ld", ldlm_it2str(it->it_op),
dir->i_ino);
- switch (it->it_op) {
- case IT_MKDIR:
- it->it_mode = (it->it_mode | S_IFDIR) & ~current->fs->umask;
- break;
- case (IT_CREAT|IT_OPEN):
- case IT_CREAT:
- it->it_mode |= S_IFREG; /* no break */
- case IT_MKNOD:
+ if (it->it_op & (IT_MKDIR | IT_CREAT | IT_SYMLINK | IT_MKNOD)) {
+ switch (it->it_op) {
+ case IT_MKDIR:
+ it->it_mode |= S_IFDIR;
+ break;
+ case (IT_CREAT|IT_OPEN):
+ case IT_CREAT:
+ it->it_mode |= S_IFREG;
+ break;
+ case IT_SYMLINK:
+ it->it_mode |= S_IFLNK;
+ break;
+ }
it->it_mode &= ~current->fs->umask;
- break;
- case IT_SYMLINK:
- it->it_mode = (it->it_mode | S_IFLNK) & ~current->fs->umask;
- break;
- }
- if (it->it_op & (IT_MKDIR | IT_CREAT | IT_SYMLINK | IT_MKNOD)) {
size[2] = sizeof(struct mds_rec_create);
size[3] = de->d_name.len + 1;
size[4] = tgtlen + 1;
- req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 5,
+ size[5] = sizeof(struct create_replay_data);
+ req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 6,
size, NULL);
if (!req)
RETURN(-ENOMEM);
/* pack the intended request */
mds_link_pack(req, 2, old_de->d_inode, dir,
- de->d_name.name, de->d_name.len);
+ de->d_name.name, de->d_name.len);
req->rq_replen = lustre_msg_size(3, repsize);
} else if (it->it_op == IT_UNLINK || it->it_op == IT_RMDIR) {
size[2] = sizeof(struct mds_rec_unlink);
RETURN(rc);
}
+ /* On replay, we don't want the lock granted. */
+ lockreq = lustre_msg_buf(req->rq_reqmsg, 0);
+ lockreq->lock_flags |= LDLM_FL_INTENT_ONLY;
+
dlm_rep = lustre_msg_buf(req->rq_repmsg, 0);
it->it_disposition = (int) dlm_rep->lock_policy_res1;
it->it_status = (int) dlm_rep->lock_policy_res2;
RETURN(0);
}
-static void mdc_replay_open(struct ptlrpc_request *req,
- struct lustre_handle *data)
+int mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode,
+ int flags)
{
+ __u64 res_id[RES_NAME_SIZE] = {inode->i_ino, inode->i_generation};
+ struct obd_device *obddev = class_conn2obd(conn);
+ ENTRY;
+ RETURN(ldlm_cli_cancel_unused(obddev->obd_namespace, res_id, flags));
+}
+
+struct replay_open_data {
+ struct lustre_handle *fh;
+};
+
+static void mdc_replay_open(struct ptlrpc_request *req)
+{
+ int offset;
+ struct replay_open_data *saved;
struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 0);
+ if (lustre_msg_get_op_flags(req->rq_reqmsg) & MDS_OPEN_HAS_EA)
+ offset = 2;
+ else
+ offset = 1;
+
+ saved = lustre_msg_buf(req->rq_reqmsg, offset);
mds_unpack_body(body);
CDEBUG(D_HA, "updating from "LPD64"/"LPD64" to "LPD64"/"LPD64"\n",
- data->addr, data->cookie, body->handle.addr, body->handle.cookie);
- memcpy(data, &body->handle, sizeof(*data));
+ saved->fh->addr, saved->fh->cookie,
+ body->handle.addr, body->handle.cookie);
+ memcpy(saved->fh, &body->handle, sizeof(body->handle));
}
int mdc_open(struct lustre_handle *conn, obd_id ino, int type, int flags,
struct ptlrpc_request **request)
{
struct mds_body *body;
- int rc, size[2] = {sizeof(*body)}, bufcount = 1;
+ struct replay_open_data *replay_data;
+ int rc, size[3] = {sizeof(*body), sizeof(*replay_data)}, bufcount = 2;
struct ptlrpc_request *req;
ENTRY;
if (lsm) {
- bufcount = 2;
- // size[1] = mdc->cl_max_mds_easize; soon...
+ bufcount = 3;
+ size[2] = size[1]; /* shuffle the spare data along */
+
size[1] = lsm->lsm_mds_easize;
}
if (!req)
GOTO(out, rc = -ENOMEM);
+ if (lsm)
+ lustre_msg_set_op_flags(req->rq_reqmsg, MDS_OPEN_HAS_EA);
+
+
req->rq_flags |= PTL_RPC_FL_REPLAY;
body = lustre_msg_buf(req->rq_reqmsg, 0);
/* If open is replayed, we need to fix up the fh. */
req->rq_replay_cb = mdc_replay_open;
- memcpy(&req->rq_replay_cb_handle, fh, sizeof(req->rq_replay_cb_handle));
-
+ replay_data = lustre_msg_buf(req->rq_reqmsg, lsm ? 2 : 1);
+ replay_data->fh = fh;
+
EXIT;
out:
*request = req;
return rc;
}
-int mdc_statfs(struct lustre_handle *conn, struct obd_statfs *osfs,
- struct ptlrpc_request **request)
+static int mdc_statfs(struct lustre_handle *conn, struct obd_statfs *osfs)
{
struct ptlrpc_request *req;
int rc, size = sizeof(*osfs);
req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_STATFS, 0, NULL,
NULL);
if (!req)
- GOTO(out, rc = -ENOMEM);
+ RETURN(-ENOMEM);
+
req->rq_replen = lustre_msg_size(1, &size);
rc = ptlrpc_queue_wait(req);
EXIT;
out:
- *request = req;
+ ptlrpc_req_finished(req);
return rc;
}
+int mdc_attach(struct obd_device *dev,
+ obd_count len, void *data)
+{
+ int rc;
+ rc = lprocfs_reg_obd(dev, (lprocfs_vars_t*)status_var_nm_1, (void*)dev);
+ return rc;
+}
+
+int mdc_detach(struct obd_device *dev)
+{
+ int rc;
+ rc = lprocfs_dereg_obd(dev);
+ return rc;
+}
struct obd_ops mdc_obd_ops = {
+ o_attach: mdc_attach,
+ o_detach: mdc_detach,
o_setup: client_obd_setup,
o_cleanup: client_obd_cleanup,
o_connect: client_obd_connect,
o_disconnect: client_obd_disconnect,
+ o_statfs: mdc_statfs,
};
static int __init ptlrpc_request_init(void)
{
- return class_register_type(&mdc_obd_ops, LUSTRE_MDC_NAME);
+ int rc;
+ rc = class_register_type(&mdc_obd_ops,
+ (lprocfs_vars_t*)status_class_var,
+ LUSTRE_MDC_NAME);
+ if(rc)
+ RETURN(rc);
+ return 0;
+
}
static void __exit ptlrpc_request_exit(void)
{
+
class_unregister_type(LUSTRE_MDC_NAME);
+
}
MODULE_AUTHOR("Cluster File Systems <info@clusterfs.com>");
EXPORT_SYMBOL(mdc_getstatus);
EXPORT_SYMBOL(mdc_getlovinfo);
EXPORT_SYMBOL(mdc_enqueue);
+EXPORT_SYMBOL(mdc_cancel_unused);
EXPORT_SYMBOL(mdc_getattr);
-EXPORT_SYMBOL(mdc_statfs);
EXPORT_SYMBOL(mdc_create);
EXPORT_SYMBOL(mdc_unlink);
EXPORT_SYMBOL(mdc_rename);
EXPORT_SYMBOL(mdc_close);
EXPORT_SYMBOL(mdc_open);
+EXPORT_SYMBOL(mdc_store_create_replay_data);
+
module_init(ptlrpc_request_init);
module_exit(ptlrpc_request_exit);