X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fmdc%2Fmdc_request.c;h=4b5f6b2d2bd95426cf076a882e5e87c48dc0a50c;hb=400b0681017091fab9cef9bd00e0f536e1793dcc;hp=db2dbd61c8a4ad15ce816f8c06e35fd238368803;hpb=600b41c99c6ddf4ed0d3f02e656c693de671baee;p=fs%2Flustre-release.git diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index db2dbd6..4b5f6b2 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -28,24 +28,26 @@ #include #include #include +#include +#include +#include #define REQUEST_MINOR 244 extern int mds_queue_req(struct ptlrpc_request *); +extern lprocfs_vars_t status_var_nm_1[]; +extern lprocfs_vars_t status_class_var[]; - -int mdc_getstatus(struct obd_conn *conn, struct ll_fid *rootfid, - __u64 *last_committed, __u64 *last_rcvd, - __u32 *last_xid, struct ptlrpc_request **request) +/* should become mdc_getinfo() */ +int mdc_getstatus(struct lustre_handle *conn, struct ll_fid *rootfid) { struct ptlrpc_request *req; struct mds_body *body; - struct mdc_obd *mdc = mdc_conn2mdc(conn); int rc, size = sizeof(*body); ENTRY; - req = ptlrpc_prep_req(mdc->mdc_client, mdc->mdc_conn, - MDS_GETSTATUS, 1, &size, NULL); + req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_GETSTATUS, 1, &size, + NULL); if (!req) GOTO(out, rc = -ENOMEM); @@ -61,37 +63,62 @@ int mdc_getstatus(struct obd_conn *conn, struct ll_fid *rootfid, body = lustre_msg_buf(req->rq_repmsg, 0); mds_unpack_body(body); memcpy(rootfid, &body->fid1, sizeof(*rootfid)); - *last_committed = req->rq_repmsg->last_committed; - *last_rcvd = req->rq_repmsg->last_rcvd; - *last_xid = body->last_xid; - - CDEBUG(D_NET, "root ino=%ld, last_committed=%Lu, last_rcvd=%Lu," - " last_xid=%d\n", - (unsigned long)rootfid->id, - (unsigned long long)*last_committed, - (unsigned long long)*last_rcvd, - body->last_xid); + + CDEBUG(D_NET, "root ino="LPU64", last_committed="LPU64 + ", last_xid="LPU64"\n", + rootfid->id, req->rq_repmsg->last_committed, + req->rq_repmsg->last_xid); } EXIT; out: - ptlrpc_free_req(req); + ptlrpc_req_finished(req); return rc; } +int mdc_getlovinfo(struct obd_device *obd, struct lustre_handle *mdc_connh, + struct ptlrpc_request **request) +{ + struct ptlrpc_request *req; + struct mds_status_req *streq; + int rc, size[2] = {sizeof(*streq)}; + ENTRY; + + req = ptlrpc_prep_req(class_conn2cliimp(mdc_connh), MDS_GETLOVINFO, 1, + size, NULL); + if (!req) + GOTO(out, rc = -ENOMEM); + + *request = req; + streq = lustre_msg_buf(req->rq_reqmsg, 0); + streq->flags = HTON__u32(MDS_STATUS_LOV); + streq->repbuf = HTON__u32(8192); + + /* prepare for reply */ + req->rq_level = LUSTRE_CONN_CON; + size[0] = 512; + size[1] = 8192; + req->rq_replen = lustre_msg_size(2, size); + + rc = ptlrpc_queue_wait(req); + rc = ptlrpc_check_status(req, rc); -int mdc_getattr(struct obd_conn *conn, - ino_t ino, int type, unsigned long valid, size_t ea_size, + out: + RETURN(rc); +} + + +int mdc_getattr(struct lustre_handle *conn, + obd_id ino, int type, unsigned long valid, size_t ea_size, struct ptlrpc_request **request) { - struct mdc_obd *mdc = mdc_conn2mdc(conn); struct ptlrpc_request *req; struct mds_body *body; int rc, size[2] = {sizeof(*body), 0}, bufcount = 1; ENTRY; - req = ptlrpc_prep_req(mdc->mdc_client, mdc->mdc_conn, - MDS_GETATTR, 1, size, NULL); + req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_GETATTR, 1, size, + NULL); if (!req) GOTO(out, rc = -ENOMEM); @@ -100,14 +127,18 @@ int mdc_getattr(struct obd_conn *conn, body->valid = valid; if (S_ISREG(type)) { + struct client_obd *mdc = &class_conn2obd(conn)->u.cli; bufcount = 2; - size[1] = sizeof(struct obdo); + size[1] = mdc->cl_max_mds_easize; } else if (valid & OBD_MD_LINKNAME) { bufcount = 2; size[1] = ea_size; + body->size = ea_size; + CDEBUG(D_INODE, "allocating %d bytes for symlink in packet\n", + ea_size); } req->rq_replen = lustre_msg_size(bufcount, size); - req->rq_level = LUSTRE_CONN_FULL; + mds_pack_req_body(req); rc = ptlrpc_queue_wait(req); rc = ptlrpc_check_status(req, rc); @@ -124,83 +155,293 @@ int mdc_getattr(struct obd_conn *conn, return rc; } -static int mdc_lock_callback(struct ldlm_lock *lock, struct ldlm_lock *new, - void *data, int data_len, - struct ptlrpc_request **req) +static void d_delete_aliases(struct inode *inode) { - int rc; - struct inode *inode = data; + struct dentry *dentry = NULL; + struct list_head *tmp; + int dentry_count = 0; ENTRY; - if (new == NULL) { - /* Completion AST. Do nothing. */ - RETURN(0); + spin_lock(&dcache_lock); + list_for_each(tmp, &inode->i_dentry) { + dentry = list_entry(tmp, struct dentry, d_alias); + dentry_count++; } - if (data_len != sizeof(*inode)) { - CERROR("data_len should be %d, but is %d\n", sizeof(*inode), - data_len); - LBUG(); - } + /* XXX FIXME tell phil/peter that you see this -- unless you're playing + * with hard links, in which case, stop. */ + LASSERT(dentry_count <= 1); - /* FIXME: do something better than throwing away everything */ - if (inode == NULL) - LBUG(); - if (S_ISDIR(inode->i_mode)) { - CDEBUG(D_INODE, "invalidating inode %ld\n", inode->i_ino); - invalidate_inode_pages(inode); + if (dentry_count == 0) { + spin_unlock(&dcache_lock); + EXIT; + return; } - rc = ldlm_cli_cancel(lock->l_client, lock); - if (rc < 0) { - CERROR("ldlm_cli_cancel: %d\n", rc); + CDEBUG(D_INODE, "d_deleting dentry %p\n", dentry); + dget_locked(dentry); + spin_unlock(&dcache_lock); + d_delete(dentry); + dput(dentry); + EXIT; +} + +static int mdc_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, + void *data, __u32 data_len, int flag) +{ + int rc; + struct lustre_handle lockh; + ENTRY; + + switch (flag) { + case LDLM_CB_BLOCKING: + ldlm_lock2handle(lock, &lockh); + rc = ldlm_cli_cancel(&lockh); + if (rc < 0) { + CDEBUG(D_INODE, "ldlm_cli_cancel: %d\n", rc); + RETURN(rc); + } + break; + case LDLM_CB_CANCELING: { + /* Invalidate all dentries associated with this inode */ + struct inode *inode = data; + + LASSERT(inode != NULL); + LASSERT(data_len == sizeof(*inode)); + + if (S_ISDIR(inode->i_mode)) { + CDEBUG(D_INODE, "invalidating inode %ld\n", + inode->i_ino); + ll_invalidate_inode_pages(inode); + } + + LASSERT(igrab(inode) == inode); + d_delete_aliases(inode); + iput(inode); + break; + } + default: LBUG(); } + RETURN(0); } -int mdc_enqueue(struct obd_conn *conn, int lock_type, struct lookup_intent *it, - int lock_mode, struct inode *dir, struct dentry *de, - struct lustre_handle *lockh, __u64 id, char *tgt, int tgtlen, - void *data, int datalen) +struct create_replay_data { + struct super_block *sb; + u32 generation; +}; + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +static int create_replay_find_inode(struct inode *inode, unsigned long ino, + void *opaque) +#else +static int create_replay_find_inode(struct inode *inode, void *opaque) +#endif +{ + struct ptlrpc_request *req = opaque; + struct create_replay_data *saved; + struct mds_body *body; + + saved = lustre_msg_buf(req->rq_reqmsg, 5); /* lock with intent */ + + if (saved->generation != inode->i_generation) { + CDEBUG(D_HA, + "generation mismatch for ino %u: saved %u != inode %u\n", + inode->i_ino, saved->generation, inode->i_generation); + return 0; + } + + body = lustre_msg_buf(req->rq_repmsg, 1); + + /* XXX do I need more out of ll_update_inode? */ + CDEBUG(D_HA, "updating inode %u generation %u to %u\n", + inode->i_ino, inode->i_generation, body->generation); + + inode->i_generation = body->generation; + + return 1; +} + +static void fixup_req_for_recreate(struct ptlrpc_request *fixreq, + struct ptlrpc_request *req, + struct inode *inode) +{ + struct ldlm_request *lockreq; + struct mds_rec_link *rec; /* representative, two-fid op structure */ + int opc; + + if (fixreq->rq_import != req->rq_import) { + DEBUG_REQ(D_HA, fixreq, "import mismatch, skipping"); + return; + } + + DEBUG_REQ(D_HA, fixreq, "fixing"); + + /* XXX check replay_state to see if we'll actually replay. */ + + /* We only care about LDLM_ENQUEUE and MDS_REINT requests. */ + if (fixreq->rq_reqmsg->opc == LDLM_ENQUEUE) { + lockreq = lustre_msg_buf(fixreq->rq_reqmsg, 0); + + if (lockreq->lock_desc.l_resource.lr_type != LDLM_PLAIN && + !(lockreq->lock_flags & LDLM_FL_HAS_INTENT)) { + DEBUG_REQ(D_HA, fixreq, "non-intent lock, skipping"); + return; + } + + if (fixreq->rq_reqmsg->bufcount < 2) { + DEBUG_REQ(D_HA, fixreq, + "short intent (probably readdir), skipping"); + return; + } + + /* XXX endianness is probably very very wrong here. Very. */ + rec = lustre_msg_buf(fixreq->rq_reqmsg, 2); + } else if (fixreq->rq_reqmsg->opc == MDS_REINT) { + rec = lustre_msg_buf(fixreq->rq_reqmsg, 0); + } else if (fixreq->rq_reqmsg->opc == MDS_OPEN) { + struct mds_body *body = lustre_msg_buf(fixreq->rq_reqmsg, 0); + DEBUG_REQ(D_HA, fixreq, "fixing fid1: %u -> %u", + body->fid1.generation, inode->i_generation); + body->fid1.generation = inode->i_generation; + return; + } else { + DEBUG_REQ(D_HA, fixreq, "not a replayable request, skipping"); + return; + } + + if (rec->lk_fid1.id == inode->i_ino) { + DEBUG_REQ(D_HA, fixreq, "fixing fid1: %u -> %u", + rec->lk_fid1.generation, inode->i_generation); + rec->lk_fid1.generation = inode->i_generation; + } + + /* Some ops have two FIDs. ZZZ We rely on the identical + * placement of that second FID in all such ops' messages. + */ + opc = rec->lk_opcode & REINT_OPCODE_MASK; + if ((opc == REINT_LINK || opc == REINT_UNLINK || + opc == REINT_RENAME) && + rec->lk_fid2.id == inode->i_ino) { + DEBUG_REQ(D_HA, fixreq, "fixing fid2: %u -> %u", + rec->lk_fid2.generation, inode->i_generation); + rec->lk_fid2.generation = inode->i_generation; + } +} + +static void mdc_replay_create(struct ptlrpc_request *req) +{ + struct create_replay_data *saved; + struct mds_body *body; + struct inode *inode; + struct list_head *tmp; + + if (req->rq_reqmsg->opc == MDS_REINT) + LBUG(); /* XXX don't handle the non-intent case yet */ + + body = lustre_msg_buf(req->rq_repmsg, 1); + saved = lustre_msg_buf(req->rq_reqmsg, 5); /* lock with intent */ + + CDEBUG(D_HA, "create of inode %d replayed; gen %u -> %u\n", + body->fid1.id, saved->generation, body->generation); + /* XXX cargo-culted right out of ll_iget */ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) + inode = iget4(saved->sb, body->fid1.id, create_replay_find_inode, req); +#endif +#if 0 + { + extern int ll_read_inode2(struct inode *inode, void *opaque); + inode = iget5_locked(saved->sb, body->fid1.id, + create_replay_find_inode, + ll_read_inode2, req); + + if (!inode) + LBUG(); /* XXX ick */ + + if (inode->i_state & I_NEW) + unlock_new_inode(inode); + } +#endif + + /* Now that we've updated the generation, we need to go and find all + * the other requests that refer to this file and will be replayed, + * and teach them about our new generation. + */ + list_for_each(tmp, &req->rq_connection->c_sending_head) { + struct ptlrpc_request *fixreq = + list_entry(tmp, struct ptlrpc_request, rq_list); + + fixup_req_for_recreate(fixreq, req, inode); + } + + list_for_each(tmp, &req->rq_connection->c_delayed_head) { + struct ptlrpc_request *fixreq = + list_entry(tmp, struct ptlrpc_request, rq_list); + + fixup_req_for_recreate(fixreq, req, inode); + } +} + +void mdc_store_create_replay_data(struct ptlrpc_request *req, + struct super_block *sb) +{ + struct create_replay_data *saved = + lustre_msg_buf(req->rq_reqmsg, 5); + struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 1); + + + if (req->rq_reqmsg->opc == MDS_REINT) + LBUG(); /* XXX don't handle the non-intent case yet */ + + saved->generation = body->generation; + saved->sb = sb; /* XXX is this safe? */ + + req->rq_replay_cb = mdc_replay_create; +} + +int mdc_enqueue(struct lustre_handle *conn, int lock_type, + struct lookup_intent *it, int lock_mode, struct inode *dir, + struct dentry *de, struct lustre_handle *lockh, + char *tgt, int tgtlen, void *data, int datalen) { struct ptlrpc_request *req; - struct obd_device *obddev = conn->oc_dev; - struct mdc_obd *mdc = mdc_conn2mdc(conn); - __u64 res_id[RES_NAME_SIZE] = {dir->i_ino}; - int size[5] = {sizeof(struct ldlm_request), sizeof(struct ldlm_intent)}; - int rc, flags; + struct obd_device *obddev = class_conn2obd(conn); + __u64 res_id[RES_NAME_SIZE] = {dir->i_ino, (__u64)dir->i_generation}; + int size[6] = {sizeof(struct ldlm_request), sizeof(struct ldlm_intent)}; + int rc, flags = LDLM_FL_HAS_INTENT; + int repsize[3] = {sizeof(struct ldlm_reply), + sizeof(struct mds_body), + obddev->u.cli.cl_max_mds_easize}; struct ldlm_reply *dlm_rep; struct ldlm_intent *lit; + struct ldlm_request *lockreq; ENTRY; -#warning FIXME: Andreas, the sgid directory stuff also goes here, but check again on mds - - LDLM_DEBUG_NOLOCK("mdsintent %d dir %ld", it->it_op, dir->i_ino); - - switch (it->it_op) { - case IT_MKDIR: - it->it_mode = (it->it_mode | S_IFDIR) & ~current->fs->umask; - break; - case IT_SETATTR: - it->it_op = IT_GETATTR; - break; - case (IT_CREAT|IT_OPEN): - case IT_CREAT: - case IT_MKNOD: - it->it_mode = (it->it_mode | S_IFREG) & ~current->fs->umask; - break; - case IT_SYMLINK: - it->it_mode = (it->it_mode | S_IFLNK) & ~current->fs->umask; - break; - } + LDLM_DEBUG_NOLOCK("mdsintent %s dir %ld", ldlm_it2str(it->it_op), + dir->i_ino); if (it->it_op & (IT_MKDIR | IT_CREAT | IT_SYMLINK | IT_MKNOD)) { + switch (it->it_op) { + case IT_MKDIR: + it->it_mode |= S_IFDIR; + break; + case (IT_CREAT|IT_OPEN): + case IT_CREAT: + it->it_mode |= S_IFREG; + break; + case IT_SYMLINK: + it->it_mode |= S_IFLNK; + break; + } + it->it_mode &= ~current->fs->umask; + size[2] = sizeof(struct mds_rec_create); size[3] = de->d_name.len + 1; size[4] = tgtlen + 1; - req = ptlrpc_prep_req(mdc->mdc_ldlm_client, mdc->mdc_conn, - LDLM_ENQUEUE, 5, size, NULL); + size[5] = sizeof(struct create_replay_data); + req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 6, + size, NULL); if (!req) RETURN(-ENOMEM); @@ -209,22 +450,18 @@ int mdc_enqueue(struct obd_conn *conn, int lock_type, struct lookup_intent *it, lit->opc = NTOH__u64((__u64)it->it_op); /* pack the intended request */ - mds_create_pack(req, 2, dir, it->it_mode, id, current->fsuid, + mds_create_pack(req, 2, dir, it->it_mode, 0, current->fsuid, current->fsgid, CURRENT_TIME, de->d_name.name, de->d_name.len, tgt, tgtlen); - - size[0] = sizeof(struct ldlm_reply); - size[1] = sizeof(struct mds_body); - size[2] = sizeof(struct obdo); - req->rq_replen = lustre_msg_size(3, size); - } else if ( it->it_op == IT_RENAME2 ) { + req->rq_replen = lustre_msg_size(3, repsize); + } else if (it->it_op == IT_RENAME2) { struct dentry *old_de = it->it_data; size[2] = sizeof(struct mds_rec_rename); size[3] = old_de->d_name.len + 1; size[4] = de->d_name.len + 1; - req = ptlrpc_prep_req(mdc->mdc_ldlm_client, mdc->mdc_conn, - LDLM_ENQUEUE, 5, size, NULL); + req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 5, + size, NULL); if (!req) RETURN(-ENOMEM); @@ -233,19 +470,17 @@ int mdc_enqueue(struct obd_conn *conn, int lock_type, struct lookup_intent *it, lit->opc = NTOH__u64((__u64)it->it_op); /* pack the intended request */ - mds_rename_pack(req, 2, old_de->d_inode, dir, - old_de->d_parent->d_name.name, - old_de->d_parent->d_name.len, + mds_rename_pack(req, 2, old_de->d_parent->d_inode, dir, + old_de->d_name.name, old_de->d_name.len, de->d_name.name, de->d_name.len); + req->rq_replen = lustre_msg_size(3, repsize); + } else if (it->it_op == IT_LINK2) { + struct dentry *old_de = it->it_data; - size[0] = sizeof(struct ldlm_reply); - size[1] = sizeof(struct mds_body); - req->rq_replen = lustre_msg_size(2, size); - } else if ( it->it_op == IT_UNLINK ) { - size[2] = sizeof(struct mds_rec_unlink); + size[2] = sizeof(struct mds_rec_link); size[3] = de->d_name.len + 1; - req = ptlrpc_prep_req(mdc->mdc_ldlm_client, mdc->mdc_conn, - LDLM_ENQUEUE, 4, size, NULL); + req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4, + size, NULL); if (!req) RETURN(-ENOMEM); @@ -254,18 +489,14 @@ int mdc_enqueue(struct obd_conn *conn, int lock_type, struct lookup_intent *it, lit->opc = NTOH__u64((__u64)it->it_op); /* pack the intended request */ - mds_unlink_pack(req, 2, dir, NULL, de->d_name.name, - de->d_name.len); - size[0] = sizeof(struct ldlm_reply); - size[1] = sizeof(struct obdo); - req->rq_replen = lustre_msg_size(2, size); - } else if ( it->it_op == IT_GETATTR || it->it_op == IT_RENAME || - it->it_op == IT_OPEN ) { - size[2] = sizeof(struct mds_body); + mds_link_pack(req, 2, old_de->d_inode, dir, + de->d_name.name, de->d_name.len); + req->rq_replen = lustre_msg_size(3, repsize); + } else if (it->it_op == IT_UNLINK || it->it_op == IT_RMDIR) { + size[2] = sizeof(struct mds_rec_unlink); size[3] = de->d_name.len + 1; - - req = ptlrpc_prep_req(mdc->mdc_ldlm_client, mdc->mdc_conn, - LDLM_ENQUEUE, 4, size, NULL); + req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4, + size, NULL); if (!req) RETURN(-ENOMEM); @@ -274,59 +505,69 @@ int mdc_enqueue(struct obd_conn *conn, int lock_type, struct lookup_intent *it, lit->opc = NTOH__u64((__u64)it->it_op); /* pack the intended request */ - mds_getattr_pack(req, 2, dir, de->d_name.name, de->d_name.len); + mds_unlink_pack(req, 2, dir, NULL, + it->it_op == IT_UNLINK ? S_IFREG : S_IFDIR, + de->d_name.name, de->d_name.len); - /* get ready for the reply */ - size[0] = sizeof(struct ldlm_reply); - size[1] = sizeof(struct mds_body); - size[2] = sizeof(struct obdo); - req->rq_replen = lustre_msg_size(3, size); - } else if ( it->it_op == IT_SETATTR) { - size[2] = sizeof(struct mds_rec_setattr); + req->rq_replen = lustre_msg_size(3, repsize); + } else if (it->it_op & (IT_GETATTR | IT_RENAME | IT_LINK | + IT_OPEN | IT_SETATTR | IT_LOOKUP | IT_READLINK)) { + size[2] = sizeof(struct mds_body); size[3] = de->d_name.len + 1; - req = ptlrpc_prep_req(mdc->mdc_ldlm_client, mdc->mdc_conn, - LDLM_ENQUEUE, 5, size, NULL); + + req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 4, + size, NULL); if (!req) RETURN(-ENOMEM); + /* pack the intent */ lit = lustre_msg_buf(req->rq_reqmsg, 1); lit->opc = NTOH__u64((__u64)it->it_op); - - if (!it->it_iattr) - LBUG(); - mds_setattr_pack(req, 2, dir, it->it_iattr, - de->d_name.name, de->d_name.len); - size[0] = sizeof(struct ldlm_reply); - size[1] = sizeof(struct mds_body); - req->rq_replen = lustre_msg_size(2, size); - } else if ( it->it_op == IT_READDIR ) { - req = ptlrpc_prep_req(mdc->mdc_ldlm_client, mdc->mdc_conn, - LDLM_ENQUEUE, 1, size, NULL); + /* pack the intended request */ + mds_getattr_pack(req, 2, dir, de->d_name.name, de->d_name.len); + + /* we need to replay opens */ + if (it->it_op == IT_OPEN) + req->rq_flags |= PTL_RPC_FL_REPLAY; + + /* get ready for the reply */ + req->rq_replen = lustre_msg_size(3, repsize); + } else if (it->it_op == IT_READDIR) { + req = ptlrpc_prep_req(class_conn2cliimp(conn), LDLM_ENQUEUE, 1, + size, NULL); if (!req) RETURN(-ENOMEM); /* get ready for the reply */ - size[0] = sizeof(struct ldlm_reply); - req->rq_replen = lustre_msg_size(1, size); + req->rq_replen = lustre_msg_size(1, repsize); } else { LBUG(); - RETURN(-1); + RETURN(-EINVAL); } #warning FIXME: the data here needs to be different if a lock was granted for a different inode - rc = ldlm_cli_enqueue(mdc->mdc_ldlm_client, mdc->mdc_conn, req, - obddev->obd_namespace, NULL, res_id, lock_type, - NULL, 0, lock_mode, &flags, - (void *)mdc_lock_callback, data, datalen, lockh); + rc = ldlm_cli_enqueue(conn, req, obddev->obd_namespace, NULL, res_id, + lock_type, NULL, 0, lock_mode, &flags, + ldlm_completion_ast, mdc_blocking_ast, data, + datalen, lockh); if (rc == -ENOENT) { + /* This can go when we're sure that this can never happen */ + LBUG(); + } + if (rc == ELDLM_LOCK_ABORTED) { lock_mode = 0; memset(lockh, 0, sizeof(*lockh)); + /* rc = 0 */ } else if (rc != 0) { CERROR("ldlm_cli_enqueue: %d\n", rc); RETURN(rc); } - dlm_rep = lustre_msg_buf(req->rq_repmsg, 0); + /* On replay, we don't want the lock granted. */ + lockreq = lustre_msg_buf(req->rq_reqmsg, 0); + lockreq->lock_flags |= LDLM_FL_INTENT_ONLY; + + dlm_rep = lustre_msg_buf(req->rq_repmsg, 0); it->it_disposition = (int) dlm_rep->lock_policy_res1; it->it_status = (int) dlm_rep->lock_policy_res2; it->it_lock_mode = lock_mode; @@ -335,72 +576,111 @@ int mdc_enqueue(struct obd_conn *conn, int lock_type, struct lookup_intent *it, RETURN(0); } -int mdc_open(struct obd_conn *conn, ino_t ino, int type, int flags, - struct obdo *obdo, - __u64 cookie, __u64 *fh, struct ptlrpc_request **request) +int mdc_cancel_unused(struct lustre_handle *conn, struct inode *inode, + int flags) +{ + __u64 res_id[RES_NAME_SIZE] = {inode->i_ino, inode->i_generation}; + struct obd_device *obddev = class_conn2obd(conn); + ENTRY; + RETURN(ldlm_cli_cancel_unused(obddev->obd_namespace, res_id, flags)); +} + +struct replay_open_data { + struct lustre_handle *fh; +}; + +static void mdc_replay_open(struct ptlrpc_request *req) +{ + int offset; + struct replay_open_data *saved; + struct mds_body *body = lustre_msg_buf(req->rq_repmsg, 0); + + if (lustre_msg_get_op_flags(req->rq_reqmsg) & MDS_OPEN_HAS_EA) + offset = 2; + else + offset = 1; + + saved = lustre_msg_buf(req->rq_reqmsg, offset); + mds_unpack_body(body); + CDEBUG(D_HA, "updating from "LPD64"/"LPD64" to "LPD64"/"LPD64"\n", + saved->fh->addr, saved->fh->cookie, + body->handle.addr, body->handle.cookie); + memcpy(saved->fh, &body->handle, sizeof(body->handle)); +} + +int mdc_open(struct lustre_handle *conn, obd_id ino, int type, int flags, + struct lov_stripe_md *lsm, struct lustre_handle *fh, + struct ptlrpc_request **request) { - struct mdc_obd *mdc = mdc_conn2mdc(conn); struct mds_body *body; - int rc, size[2] = {sizeof(*body)}, bufcount = 1; + struct replay_open_data *replay_data; + int rc, size[3] = {sizeof(*body), sizeof(*replay_data)}, bufcount = 2; struct ptlrpc_request *req; ENTRY; - if (obdo != NULL) { - bufcount = 2; - size[1] = sizeof(*obdo); + if (lsm) { + bufcount = 3; + size[2] = size[1]; /* shuffle the spare data along */ + + size[1] = lsm->lsm_mds_easize; } - req = ptlrpc_prep_req(mdc->mdc_client, mdc->mdc_conn, - MDS_OPEN, bufcount, size, NULL); + req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_OPEN, bufcount, size, + NULL); if (!req) GOTO(out, rc = -ENOMEM); + if (lsm) + lustre_msg_set_op_flags(req->rq_reqmsg, MDS_OPEN_HAS_EA); + + req->rq_flags |= PTL_RPC_FL_REPLAY; - req->rq_level = LUSTRE_CONN_FULL; body = lustre_msg_buf(req->rq_reqmsg, 0); ll_ino2fid(&body->fid1, ino, 0, type); body->flags = HTON__u32(flags); - body->extra = cookie; + memcpy(&body->handle, fh, sizeof(body->handle)); - if (obdo != NULL) - memcpy(lustre_msg_buf(req->rq_reqmsg, 1), obdo, sizeof(*obdo)); + if (lsm) + lov_packmd(lustre_msg_buf(req->rq_reqmsg, 1), lsm); req->rq_replen = lustre_msg_size(1, size); rc = ptlrpc_queue_wait(req); rc = ptlrpc_check_status(req, rc); - if (!rc) { body = lustre_msg_buf(req->rq_repmsg, 0); mds_unpack_body(body); - *fh = body->extra; + memcpy(fh, &body->handle, sizeof(*fh)); } + /* If open is replayed, we need to fix up the fh. */ + req->rq_replay_cb = mdc_replay_open; + replay_data = lustre_msg_buf(req->rq_reqmsg, lsm ? 2 : 1); + replay_data->fh = fh; + EXIT; out: *request = req; return rc; } -int mdc_close(struct obd_conn *conn, - ino_t ino, int type, __u64 fh, struct ptlrpc_request **request) +int mdc_close(struct lustre_handle *conn, obd_id ino, int type, + struct lustre_handle *fh, struct ptlrpc_request **request) { - struct mdc_obd *mdc = mdc_conn2mdc(conn); struct mds_body *body; int rc, size = sizeof(*body); struct ptlrpc_request *req; - req = ptlrpc_prep_req(mdc->mdc_client, mdc->mdc_conn, - MDS_CLOSE, 1, &size, NULL); + req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_CLOSE, 1, &size, + NULL); if (!req) GOTO(out, rc = -ENOMEM); body = lustre_msg_buf(req->rq_reqmsg, 0); ll_ino2fid(&body->fid1, ino, 0, type); - body->extra = fh; + memcpy(&body->handle, fh, sizeof(body->handle)); - req->rq_level = LUSTRE_CONN_FULL; req->rq_replen = lustre_msg_size(0, NULL); rc = ptlrpc_queue_wait(req); @@ -412,10 +692,11 @@ int mdc_close(struct obd_conn *conn, return rc; } -int mdc_readpage(struct obd_conn *conn, ino_t ino, int type, __u64 offset, +int mdc_readpage(struct lustre_handle *conn, obd_id ino, int type, __u64 offset, char *addr, struct ptlrpc_request **request) { - struct mdc_obd *mdc = mdc_conn2mdc(conn); + struct ptlrpc_connection *connection = + client_conn2cli(conn)->cl_import.imp_connection; struct ptlrpc_request *req = NULL; struct ptlrpc_bulk_desc *desc = NULL; struct ptlrpc_bulk_page *bulk = NULL; @@ -425,20 +706,20 @@ int mdc_readpage(struct obd_conn *conn, ino_t ino, int type, __u64 offset, CDEBUG(D_INODE, "inode: %ld\n", (long)ino); - desc = ptlrpc_prep_bulk(mdc->mdc_conn); + desc = ptlrpc_prep_bulk(connection); if (desc == NULL) GOTO(out, rc = -ENOMEM); - req = ptlrpc_prep_req(mdc->mdc_client, mdc->mdc_conn, - MDS_READPAGE, 1, &size, NULL); + req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_READPAGE, 1, &size, + NULL); if (!req) GOTO(out2, rc = -ENOMEM); bulk = ptlrpc_prep_bulk_page(desc); - bulk->b_buflen = PAGE_SIZE; - bulk->b_buf = addr; - bulk->b_xid = req->rq_xid; - desc->b_portal = MDS_BULK_PORTAL; + bulk->bp_buflen = PAGE_SIZE; + bulk->bp_buf = addr; + bulk->bp_xid = req->rq_xid; + desc->bd_portal = MDS_BULK_PORTAL; rc = ptlrpc_register_bulk(desc); if (rc) { @@ -452,13 +733,12 @@ int mdc_readpage(struct obd_conn *conn, ino_t ino, int type, __u64 offset, body->size = offset; req->rq_replen = lustre_msg_size(1, &size); - req->rq_level = LUSTRE_CONN_FULL; rc = ptlrpc_queue_wait(req); rc = ptlrpc_check_status(req, rc); if (rc) { ptlrpc_abort_bulk(desc); GOTO(out2, rc); - } else { + } else { body = lustre_msg_buf(req->rq_repmsg, 0); mds_unpack_body(body); } @@ -471,21 +751,18 @@ int mdc_readpage(struct obd_conn *conn, ino_t ino, int type, __u64 offset, return rc; } -int mdc_statfs(struct obd_conn *conn, struct statfs *sfs, - struct ptlrpc_request **request) +static int mdc_statfs(struct lustre_handle *conn, struct obd_statfs *osfs) { - struct mdc_obd *mdc = mdc_conn2mdc(conn); - struct obd_statfs *osfs; struct ptlrpc_request *req; int rc, size = sizeof(*osfs); ENTRY; - req = ptlrpc_prep_req(mdc->mdc_client, mdc->mdc_conn, MDS_STATFS, - 0, NULL, NULL); + req = ptlrpc_prep_req(class_conn2cliimp(conn), MDS_STATFS, 0, NULL, + NULL); if (!req) - GOTO(out, rc = -ENOMEM); + RETURN(-ENOMEM); + req->rq_replen = lustre_msg_size(1, &size); - req->rq_level = LUSTRE_CONN_FULL; rc = ptlrpc_queue_wait(req); rc = ptlrpc_check_status(req, rc); @@ -493,298 +770,56 @@ int mdc_statfs(struct obd_conn *conn, struct statfs *sfs, if (rc) GOTO(out, rc); - osfs = lustre_msg_buf(req->rq_repmsg, 0); - obd_statfs_unpack(osfs, sfs); + obd_statfs_unpack(osfs, lustre_msg_buf(req->rq_repmsg, 0)); EXIT; out: - *request = req; + ptlrpc_req_finished(req); return rc; } - -static int mdc_ioctl(long cmd, struct obd_conn *conn, int len, void *karg, - void *uarg) +int mdc_attach(struct obd_device *dev, + obd_count len, void *data) { -#if 0 - /* FIXME XXX : This should use the new ioc_data to pass args in */ - int err = 0; - struct ptlrpc_client cl; - struct ptlrpc_connection *conn; - struct ptlrpc_request *request; - - ENTRY; - - if (_IOC_TYPE(cmd) != IOC_REQUEST_TYPE || - _IOC_NR(cmd) < IOC_REQUEST_MIN_NR || - _IOC_NR(cmd) > IOC_REQUEST_MAX_NR ) { - CDEBUG(D_IOCTL, "invalid ioctl ( type %d, nr %d, size %d )\n", - _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd)); - RETURN(-EINVAL); - } - - ptlrpc_init_client(NULL, NULL, - MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL, &cl); - connection = ptlrpc_uuid_to_connection("mds"); - if (!connection) { - CERROR("cannot create client\n"); - RETURN(-EINVAL); - } - - switch (cmd) { - case IOC_REQUEST_GETATTR: { - CERROR("-- getting attr for ino %lu\n", arg); - err = mdc_getattr(&cl, connection, arg, S_IFDIR, ~0, 0, - &request); - CERROR("-- done err %d\n", err); - - GOTO(out, err); - } - - case IOC_REQUEST_READPAGE: { - char *buf; - OBD_ALLOC(buf, PAGE_SIZE); - if (!buf) { - err = -ENOMEM; - GOTO(out, err); - } - CERROR("-- readpage 0 for ino %lu\n", arg); - err = mdc_readpage(&cl, connection, arg, S_IFDIR, 0, buf, - &request); - CERROR("-- done err %d\n", err); - OBD_FREE(buf, PAGE_SIZE); - - GOTO(out, err); - } - - case IOC_REQUEST_SETATTR: { - struct inode inode; - struct iattr iattr; - - inode.i_ino = arg; - inode.i_generation = 0; - iattr.ia_mode = 040777; - iattr.ia_atime = 0; - iattr.ia_valid = ATTR_MODE | ATTR_ATIME; - - err = mdc_setattr(&cl, connection, &inode, &iattr, &request); - CERROR("-- done err %d\n", err); - - GOTO(out, err); - } - - case IOC_REQUEST_CREATE: { - struct inode inode; - struct iattr iattr; - - inode.i_ino = arg; - inode.i_generation = 0; - iattr.ia_mode = 040777; - iattr.ia_atime = 0; - iattr.ia_valid = ATTR_MODE | ATTR_ATIME; - - err = mdc_create(&cl, connection, &inode, - "foofile", strlen("foofile"), - NULL, 0, 0100707, 47114711, - 11, 47, 0, NULL, &request); - CERROR("-- done err %d\n", err); - - GOTO(out, err); - } - - case IOC_REQUEST_OPEN: { - __u64 fh, ino; - copy_from_user(&ino, (__u64 *)arg, sizeof(ino)); - CERROR("-- opening ino %llu\n", (unsigned long long)ino); - err = mdc_open(&cl, connection, ino, S_IFDIR, O_RDONLY, 4711, - &fh, &request); - copy_to_user((__u64 *)arg, &fh, sizeof(fh)); - CERROR("-- done err %d (fh=%Lu)\n", err, - (unsigned long long)fh); - - GOTO(out, err); - } - - case IOC_REQUEST_CLOSE: { - CERROR("-- closing ino 2, filehandle %lu\n", arg); - err = mdc_close(&cl, connection, 2, S_IFDIR, arg, &request); - CERROR("-- done err %d\n", err); - - GOTO(out, err); - } - - default: - GOTO(out, err = -EINVAL); - } - - out: - ptlrpc_free_req(request); - ptlrpc_put_connection(connection); - ptlrpc_cleanup_client(&cl); - - RETURN(err); -#endif - return 0; -} - -static int mdc_setup(struct obd_device *obddev, obd_count len, void *buf) -{ - struct obd_ioctl_data* data = buf; - struct mdc_obd *mdc = &obddev->u.mdc; - char server_uuid[37]; int rc; - ENTRY; - - if (data->ioc_inllen1 < 1) { - CERROR("osc setup requires a TARGET UUID\n"); - RETURN(-EINVAL); - } - - if (data->ioc_inllen1 > 37) { - CERROR("mdc UUID must be less than 38 characters\n"); - RETURN(-EINVAL); - } - - if (data->ioc_inllen2 < 1) { - CERROR("mdc setup requires a SERVER UUID\n"); - RETURN(-EINVAL); - } - - if (data->ioc_inllen2 > 37) { - CERROR("mdc UUID must be less than 38 characters\n"); - RETURN(-EINVAL); - } - - memcpy(mdc->mdc_target_uuid, data->ioc_inlbuf1, data->ioc_inllen1); - memcpy(server_uuid, data->ioc_inlbuf2, MIN(data->ioc_inllen2, - sizeof(server_uuid))); - - mdc->mdc_conn = ptlrpc_uuid_to_connection(server_uuid); - if (!mdc->mdc_conn) - RETURN(-ENOENT); - - OBD_ALLOC(mdc->mdc_client, sizeof(*mdc->mdc_client)); - if (mdc->mdc_client == NULL) - GOTO(out_conn, rc = -ENOMEM); - - OBD_ALLOC(mdc->mdc_ldlm_client, sizeof(*mdc->mdc_ldlm_client)); - if (mdc->mdc_ldlm_client == NULL) - GOTO(out_client, rc = -ENOMEM); - - ptlrpc_init_client(NULL, NULL, MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL, - mdc->mdc_client); - ptlrpc_init_client(NULL, NULL, LDLM_REQUEST_PORTAL, LDLM_REPLY_PORTAL, - mdc->mdc_ldlm_client); - mdc->mdc_client->cli_name = "mdc"; - mdc->mdc_ldlm_client->cli_name = "ldlm"; - /* XXX get recovery hooked in here again */ - //ptlrpc_init_client(ptlrpc_connmgr, ll_recover,... - - ptlrpc_init_client(ptlrpc_connmgr, NULL, - MDS_REQUEST_PORTAL, MDC_REPLY_PORTAL, - mdc->mdc_client); - - MOD_INC_USE_COUNT; - RETURN(0); - - out_client: - OBD_FREE(mdc->mdc_client, sizeof(*mdc->mdc_client)); - out_conn: - ptlrpc_put_connection(mdc->mdc_conn); - return rc; + rc = lprocfs_reg_obd(dev, (lprocfs_vars_t*)status_var_nm_1, (void*)dev); + return rc; } -static int mdc_cleanup(struct obd_device * obddev) +int mdc_detach(struct obd_device *dev) { - struct mdc_obd *mdc = &obddev->u.mdc; - - ptlrpc_cleanup_client(mdc->mdc_client); - OBD_FREE(mdc->mdc_client, sizeof(*mdc->mdc_client)); - ptlrpc_cleanup_client(mdc->mdc_ldlm_client); - OBD_FREE(mdc->mdc_ldlm_client, sizeof(*mdc->mdc_ldlm_client)); - ptlrpc_put_connection(mdc->mdc_conn); - - MOD_DEC_USE_COUNT; - return 0; -} - -static int mdc_connect(struct obd_conn *conn) -{ - struct mdc_obd *mdc = mdc_conn2mdc(conn); - struct ptlrpc_request *request; - int rc, size = sizeof(mdc->mdc_target_uuid); - char *tmp = mdc->mdc_target_uuid; - - ENTRY; - - conn->oc_dev->obd_namespace = - ldlm_namespace_new("mdc", LDLM_NAMESPACE_CLIENT); - if (conn->oc_dev->obd_namespace == NULL) - RETURN(-ENOMEM); - - request = ptlrpc_prep_req(mdc->mdc_client, mdc->mdc_conn, - MDS_CONNECT, 1, &size, &tmp); - if (!request) - RETURN(-ENOMEM); - - request->rq_replen = lustre_msg_size(0, NULL); - - rc = ptlrpc_queue_wait(request); - if (rc) - GOTO(out, rc); - - mdc->mdc_client->cli_target_devno = request->rq_repmsg->target_id; - mdc->mdc_ldlm_client->cli_target_devno = - mdc->mdc_client->cli_target_devno; - EXIT; - out: - ptlrpc_free_req(request); + int rc; + rc = lprocfs_dereg_obd(dev); return rc; -} - -static int mdc_disconnect(struct obd_conn *conn) -{ - struct mdc_obd *mdc = mdc_conn2mdc(conn); - struct ptlrpc_request *request; - struct mds_body *body; - int rc, size = sizeof(*body); - ENTRY; - - ldlm_namespace_free(conn->oc_dev->obd_namespace); - request = ptlrpc_prep_req(mdc->mdc_client, mdc->mdc_conn, - MDS_DISCONNECT, 1, &size, - NULL); - if (!request) - RETURN(-ENOMEM); - - body = lustre_msg_buf(request->rq_reqmsg, 0); - body->valid = conn->oc_id; - - request->rq_replen = lustre_msg_size(0, NULL); - rc = ptlrpc_queue_wait(request); - GOTO(out, rc); - out: - ptlrpc_free_req(request); - return rc; } - struct obd_ops mdc_obd_ops = { - o_setup: mdc_setup, - o_cleanup: mdc_cleanup, - o_connect: mdc_connect, - o_disconnect: mdc_disconnect, - o_iocontrol: mdc_ioctl + o_attach: mdc_attach, + o_detach: mdc_detach, + o_setup: client_obd_setup, + o_cleanup: client_obd_cleanup, + o_connect: client_obd_connect, + o_disconnect: client_obd_disconnect, + o_statfs: mdc_statfs, }; static int __init ptlrpc_request_init(void) { - return obd_register_type(&mdc_obd_ops, LUSTRE_MDC_NAME); + int rc; + rc = class_register_type(&mdc_obd_ops, + (lprocfs_vars_t*)status_class_var, + LUSTRE_MDC_NAME); + if(rc) + RETURN(rc); + return 0; + } static void __exit ptlrpc_request_exit(void) { - obd_unregister_type(LUSTRE_MDC_NAME); + + class_unregister_type(LUSTRE_MDC_NAME); + } MODULE_AUTHOR("Cluster File Systems "); @@ -792,9 +827,10 @@ MODULE_DESCRIPTION("Lustre Metadata Client v1.0"); MODULE_LICENSE("GPL"); EXPORT_SYMBOL(mdc_getstatus); +EXPORT_SYMBOL(mdc_getlovinfo); EXPORT_SYMBOL(mdc_enqueue); +EXPORT_SYMBOL(mdc_cancel_unused); EXPORT_SYMBOL(mdc_getattr); -EXPORT_SYMBOL(mdc_statfs); EXPORT_SYMBOL(mdc_create); EXPORT_SYMBOL(mdc_unlink); EXPORT_SYMBOL(mdc_rename); @@ -804,5 +840,7 @@ EXPORT_SYMBOL(mdc_setattr); EXPORT_SYMBOL(mdc_close); EXPORT_SYMBOL(mdc_open); +EXPORT_SYMBOL(mdc_store_create_replay_data); + module_init(ptlrpc_request_init); module_exit(ptlrpc_request_exit);