From: shaver Date: Wed, 23 Oct 2002 18:57:33 +0000 (+0000) Subject: - Beginning of new REINT replay infrastructure. X-Git-Tag: 0.5.15~7 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=ca6c1e0c659e2adddcac3be9706af569677ed6a5;p=fs%2Flustre-release.git - Beginning of new REINT replay infrastructure. - Poison imports for failed OSCs, so future requests get -EIO. - Remove the epoch checking for mounts and inodes, because we don't want to kill a while filesystem because a single OSC died. - Close open files on MDS-disconnect. - If asked with a "force" parameter, disconnect all remaining exports during OBD disconnection. - Teach lconf to send that parameter when given -f. --- diff --git a/lustre/include/linux/lustre_idl.h b/lustre/include/linux/lustre_idl.h index caeffef..1ca158b 100644 --- a/lustre/include/linux/lustre_idl.h +++ b/lustre/include/linux/lustre_idl.h @@ -303,8 +303,10 @@ struct ost_body { #define REINT_LINK 3 #define REINT_UNLINK 4 #define REINT_RENAME 5 -#define REINT_RECREATE 6 -#define REINT_MAX 6 +#define REINT_MAX 5 + +#define REINT_OPCODE_MASK 0xff /* opcodes must fit into this mask */ +#define REINT_REPLAYING 0x1000 /* masked into the opcode to indicate replay */ struct ll_fid { __u64 id; diff --git a/lustre/include/linux/lustre_import.h b/lustre/include/linux/lustre_import.h index 3a183e4..255c238 100644 --- a/lustre/include/linux/lustre_import.h +++ b/lustre/include/linux/lustre_import.h @@ -12,6 +12,8 @@ #ifdef __KERNEL__ +#define IMP_INVALID 1 + #include struct obd_import { struct ptlrpc_connection *imp_connection; @@ -19,8 +21,8 @@ struct obd_import { struct lustre_handle imp_handle; struct list_head imp_chain; struct obd_device *imp_obd; - /* XXX need a UUID here, I think - */ + int imp_flags; + /* XXX need a UUID here, I think */ }; extern struct obd_import *class_conn2cliimp(struct lustre_handle *); diff --git a/lustre/include/linux/lustre_lite.h b/lustre/include/linux/lustre_lite.h index f877713..825b53d 100644 --- a/lustre/include/linux/lustre_lite.h +++ b/lustre/include/linux/lustre_lite.h @@ -49,7 +49,6 @@ struct ll_inode_info { char *lli_symlink_name; struct lustre_handle lli_intent_lock_handle; struct semaphore lli_open_sem; - __u32 lli_mount_epoch; #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) struct inode lli_vfs_inode; #endif @@ -86,17 +85,8 @@ struct ll_sb_info { time_t ll_commitcbd_timeout; spinlock_t ll_commitcbd_lock; struct list_head ll_conn_chain; /* per-conn chain of SBs */ - __u32 ll_mount_epoch; }; -#define CHECK_MOUNT_EPOCH(i) \ -do { \ - if (ll_i2info(i)->lli_mount_epoch != ll_i2sbi(i)->ll_mount_epoch) { \ - make_bad_inode(i); \ - RETURN(-EIO); \ - } \ -} while(0) - static inline struct ll_sb_info *ll_s2sbi(struct super_block *sb) { #if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) diff --git a/lustre/lib/mds_updates.c b/lustre/lib/mds_updates.c index 6d64cea..345ccba 100644 --- a/lustre/lib/mds_updates.c +++ b/lustre/lib/mds_updates.c @@ -424,24 +424,24 @@ static update_unpacker mds_unpackers[REINT_MAX + 1] = { [REINT_LINK] mds_link_unpack, [REINT_UNLINK] mds_unlink_unpack, [REINT_RENAME] mds_rename_unpack, - [REINT_RECREATE] mds_create_unpack, }; int mds_update_unpack(struct ptlrpc_request *req, int offset, struct mds_update_record *rec) { __u32 *opcode = lustre_msg_buf(req->rq_reqmsg, offset); - int rc; + int rc, realop; ENTRY; if (!opcode || req->rq_reqmsg->buflens[offset] < sizeof(*opcode)) RETURN(-EFAULT); - rec->ur_opcode = NTOH__u32(*opcode); + realop = rec->ur_opcode = NTOH__u32(*opcode); + realop &= REINT_OPCODE_MASK; - if (rec->ur_opcode < 0 || rec->ur_opcode > REINT_MAX) + if (realop < 0 || realop > REINT_MAX) RETURN(-EFAULT); - rc = mds_unpackers[rec->ur_opcode](req, offset, rec); + rc = mds_unpackers[realop](req, offset, rec); RETURN(rc); } diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 29d1d35..e5041a0 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -71,8 +71,6 @@ static int ll_file_open(struct inode *inode, struct file *file) LASSERT(!file->private_data); - CHECK_MOUNT_EPOCH(inode); - lsm = lli->lli_smd; /* delayed create of object (intent created inode) */ @@ -261,8 +259,6 @@ static int ll_file_release(struct inode *inode, struct file *file) ENTRY; - CHECK_MOUNT_EPOCH(inode); - fd = (struct ll_file_data *)file->private_data; if (!fd) { LBUG(); @@ -653,8 +649,6 @@ loff_t ll_file_seek(struct file *file, loff_t offset, int origin) long long retval; ENTRY; - CHECK_MOUNT_EPOCH(inode); - switch (origin) { case 2: { struct ll_inode_info *lli = ll_i2info(inode); diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index 54a9d7d..188b7e1 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -194,12 +194,6 @@ static struct dentry *ll_lookup2(struct inode *dir, struct dentry *dentry, ENTRY; - /* CHECK_MOUNT_EPOCH(dir); */ - if (ll_i2info(dir)->lli_mount_epoch != ll_i2sbi(dir)->ll_mount_epoch) { - make_bad_inode(dir); - RETURN(ERR_PTR(-EIO)); - } - if (it == NULL) it = &lookup_it; @@ -503,8 +497,6 @@ static int ll_create(struct inode *dir, struct dentry *dentry, int mode) int rc = 0; ENTRY; - CHECK_MOUNT_EPOCH(dir); - LL_GET_INTENT(dentry, it); inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len, @@ -560,8 +552,6 @@ static int ll_symlink(struct inode *dir, struct dentry *dentry, int err = 0; ENTRY; - CHECK_MOUNT_EPOCH(dir); - LL_GET_INTENT(dentry, it); inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len, diff --git a/lustre/llite/recover.c b/lustre/llite/recover.c index e88cedf..d21954c 100644 --- a/lustre/llite/recover.c +++ b/lustre/llite/recover.c @@ -24,9 +24,21 @@ static void abort_inflight_for_import(struct obd_import *imp) { struct list_head *tmp, *n; + /* Make sure that no new requests get processed for this import. + * ptlrpc_queue_wait must (and does) hold c_lock while testing this flags and + * then putting requests on sending_head or delayed_head. + */ + spin_lock(&imp->imp_connection->c_lock); + imp->imp_flags |= IMP_INVALID; + spin_unlock(&imp->imp_connection->c_lock); + list_for_each_safe(tmp, n, &imp->imp_connection->c_sending_head) { struct ptlrpc_request *req = list_entry(tmp, struct ptlrpc_request, rq_list); + + if (req->rq_import != imp) + continue; + if (req->rq_flags & PTL_RPC_FL_REPLIED) { /* no need to replay, just discard */ CERROR("uncommitted req xid "LPD64" op %d to OST %s\n", @@ -89,13 +101,16 @@ static int ll_reconnect(struct ptlrpc_connection *conn) reconnect_ost(imp); } else { int rc = ptlrpc_reconnect_import(imp, MDS_CONNECT); - if (!rc) + if (!rc) { need_replay = 1; + /* XXX obd_cancel_unused */ + } /* make sure we don't try to replay for dead imps? * * else imp->imp_connection = NULL; * */ + } } diff --git a/lustre/llite/rw.c b/lustre/llite/rw.c index 3742f05..92b33a8 100644 --- a/lustre/llite/rw.c +++ b/lustre/llite/rw.c @@ -80,8 +80,6 @@ static int ll_brw(int cmd, struct inode *inode, struct page *page, int create) int err; ENTRY; - CHECK_MOUNT_EPOCH(inode); - if (!cbd) RETURN(-ENOMEM); @@ -252,8 +250,6 @@ static int ll_commit_write(struct file *file, struct page *page, struct io_cb_data *cbd = ll_init_cb(); ENTRY; - CHECK_MOUNT_EPOCH(inode); - pg.pg = page; pg.count = to; pg.off = (((obd_off)page->index) << PAGE_SHIFT); @@ -293,8 +289,6 @@ static int ll_direct_IO(int rw, struct inode *inode, struct kiobuf *iobuf, int i, rc = 0; struct io_cb_data *cbd; - CHECK_MOUNT_EPOCH(inode); - ENTRY; if (!lsm || !lsm->lsm_object_id) RETURN(-ENOMEM); diff --git a/lustre/llite/super.c b/lustre/llite/super.c index dbf93d8..2d28a69 100644 --- a/lustre/llite/super.c +++ b/lustre/llite/super.c @@ -121,7 +121,6 @@ static struct super_block * ll_read_super(struct super_block *sb, } INIT_LIST_HEAD(&sbi->ll_conn_chain); - sbi->ll_mount_epoch = 0; generate_random_uuid(uuid); class_uuid_unparse(uuid, sbi->ll_sb_uuid); @@ -470,7 +469,6 @@ static void ll_read_inode2(struct inode *inode, void *opaque) ENTRY; sema_init(&lli->lli_open_sem, 1); - lli->lli_mount_epoch = ll_i2sbi(inode)->ll_mount_epoch; /* core attributes first */ ll_update_inode(inode, body); @@ -556,6 +554,8 @@ void ll_umount_begin(struct super_block *sb) conn = list_entry(ctmp, struct ptlrpc_connection, c_sb_chain); spin_lock(&conn->c_lock); + /* XXX should just be dealing with imports, probably through + * XXX iocontrol, need next-gen recovery! */ conn->c_flags |= CONN_INVALID; invalidate_request_list(&conn->c_sending_head); invalidate_request_list(&conn->c_delayed_head); diff --git a/lustre/llite/symlink.c b/lustre/llite/symlink.c index d522485..9b230e7 100644 --- a/lustre/llite/symlink.c +++ b/lustre/llite/symlink.c @@ -37,8 +37,6 @@ static int ll_readlink_internal(struct inode *inode, *request = NULL; - CHECK_MOUNT_EPOCH(inode); - if (lli->lli_symlink_name) { *symname = lli->lli_symlink_name; CDEBUG(D_INODE, "using cached symlink %s\n", *symname); @@ -71,8 +69,6 @@ static int ll_readlink(struct dentry *dentry, char *buffer, int buflen) int rc; ENTRY; - CHECK_MOUNT_EPOCH(inode); - /* on symlinks lli_open_sem protects lli_symlink_name allocation/data */ down(&lli->lli_open_sem); rc = ll_readlink_internal(inode, &request, &symname); @@ -96,8 +92,6 @@ static int ll_follow_link(struct dentry *dentry, struct nameidata *nd) int rc; ENTRY; - CHECK_MOUNT_EPOCH(inode); - down(&lli->lli_open_sem); rc = ll_readlink_internal(inode, &request, &symname); if (rc) diff --git a/lustre/mdc/mdc_reint.c b/lustre/mdc/mdc_reint.c index 324de95..c2e716f 100644 --- a/lustre/mdc/mdc_reint.c +++ b/lustre/mdc/mdc_reint.c @@ -40,9 +40,13 @@ static int mdc_reint(struct ptlrpc_request *request, int level) rc = ptlrpc_queue_wait(request); rc = ptlrpc_check_status(request, rc); - if (rc) + if (rc) { CERROR("error in handling %d\n", rc); - + } else { + /* For future resend/replays. */ + u32 *opcodeptr = lustre_msg_buf(request->rq_reqmsg, 0); + *opcodeptr |= REINT_REPLAYING; + } return rc; } @@ -105,13 +109,10 @@ int mdc_create(struct lustre_handle *conn, level = LUSTRE_CONN_FULL; resend: rc = mdc_reint(req, level); + /* Resend if we were told to. */ if (rc == -ERESTARTSYS) { - struct mds_rec_create *rec = lustre_msg_buf(req->rq_reqmsg, 0); - level = LUSTRE_CONN_RECOVD; - CERROR("Lost reply: re-create rep.\n"); req->rq_flags = 0; - rec->cr_opcode = NTOH__u32(REINT_RECREATE); goto resend; } diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 44021d6..a3f32c7 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -350,13 +350,42 @@ out_dec: return rc; } +/* Call with med->med_open_lock held, please. */ +inline int mds_close_mfd(struct mds_file_data *mfd, struct mds_export_data *med) +{ + struct file *file = mfd->mfd_file; + LASSERT(file->private_data == mfd); + + list_del(&mfd->mfd_list); + mfd->mfd_servercookie = DEAD_HANDLE_MAGIC; + kmem_cache_free(mds_file_cache, mfd); + + return filp_close(file, 0); +} + static int mds_disconnect(struct lustre_handle *conn) { struct obd_export *export = class_conn2export(conn); + struct list_head *tmp, *n; + struct mds_export_data *med = &export->exp_mds_data; int rc; ENTRY; -#warning "Mike: we need to close all files opened on med_open_head" + /* + * Close any open files. + */ + spin_lock(&med->med_open_lock); + list_for_each_safe(tmp, n, &med->med_open_head) { + struct mds_file_data *mfd = + list_entry(tmp, struct mds_file_data, mfd_list); + rc = mds_close_mfd(mfd, med); + if (rc) { + /* XXX better diagnostics, with file path and stuff */ + CDEBUG(D_INODE, "Error %d closing mfd %p\n", rc, mfd); + } + } + spin_unlock(&med->med_open_lock); + ldlm_cancel_locks_for_export(export); mds_client_free(export); @@ -838,7 +867,6 @@ static int mds_close(struct ptlrpc_request *req) { struct mds_export_data *med = &req->rq_export->exp_mds_data; struct mds_body *body; - struct file *file; struct mds_file_data *mfd; int rc; ENTRY; @@ -853,16 +881,9 @@ static int mds_close(struct ptlrpc_request *req) RETURN(-ESTALE); } - file = mfd->mfd_file; - LASSERT(file->private_data == mfd); - spin_lock(&med->med_open_lock); - list_del(&mfd->mfd_list); + req->rq_status = mds_close_mfd(mfd, med); spin_unlock(&med->med_open_lock); - mfd->mfd_servercookie = DEAD_HANDLE_MAGIC; - kmem_cache_free(mds_file_cache, mfd); - - req->rq_status = filp_close(file, 0); if (OBD_FAIL_CHECK(OBD_FAIL_MDS_CLOSE_PACK)) { CERROR("test case OBD_FAIL_MDS_CLOSE_PACK\n"); diff --git a/lustre/mds/mds_reint.c b/lustre/mds/mds_reint.c index 245befb..9025fad 100644 --- a/lustre/mds/mds_reint.c +++ b/lustre/mds/mds_reint.c @@ -160,55 +160,6 @@ out_setattr: return 0; } -static int mds_reint_recreate(struct mds_update_record *rec, int offset, - struct ptlrpc_request *req) -{ - struct dentry *de = NULL; - struct mds_obd *mds = mds_req2mds(req); - struct dentry *dchild = NULL; - struct inode *dir; - int rc = 0; - ENTRY; - - de = mds_fid2dentry(mds, rec->ur_fid1, NULL); - if (IS_ERR(de) || OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_CREATE)) { - LBUG(); - GOTO(out_create_de, rc = -ESTALE); - } - dir = de->d_inode; - CDEBUG(D_INODE, "parent ino %ld\n", dir->i_ino); - - down(&dir->i_sem); - dchild = lookup_one_len(rec->ur_name, de, rec->ur_namelen - 1); - if (IS_ERR(dchild)) { - CERROR("child lookup error %ld\n", PTR_ERR(dchild)); - up(&dir->i_sem); - LBUG(); - GOTO(out_create_dchild, rc = -ESTALE); - } - - if (dchild->d_inode) { - struct mds_body *body; - rc = 0; - body = lustre_msg_buf(req->rq_repmsg, 0); - mds_pack_inode2fid(&body->fid1, dchild->d_inode); - mds_pack_inode2body(body, dchild->d_inode); - } else { - CERROR("child doesn't exist (dir %ld, name %s)\n", - dir->i_ino, rec->ur_name); - rc = -ENOENT; - LBUG(); - } - -out_create_dchild: - l_dput(dchild); - up(&dir->i_sem); -out_create_de: - l_dput(de); - req->rq_status = rc; - return 0; -} - static int mds_reint_create(struct mds_update_record *rec, int offset, struct ptlrpc_request *req) { @@ -261,6 +212,8 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, CDEBUG(D_INODE, "child exists (dir %ld, name %s, ino %ld)\n", dir->i_ino, rec->ur_name, dchild->d_inode->i_ino); + /* XXX check that mode is correct? */ + body = lustre_msg_buf(req->rq_repmsg, offset); mds_pack_inode2fid(&body->fid1, inode); mds_pack_inode2body(body, inode); @@ -277,8 +230,15 @@ static int mds_reint_create(struct mds_update_record *rec, int offset, } else body->valid |= OBD_MD_FLEASIZE; } - /* now a normal case for intent locking */ - GOTO(out_create_dchild, rc = -EEXIST); + + /* This isn't an error for RECREATE. */ + if (rec->ur_opcode & REINT_REPLAYING) { + CDEBUG(D_INODE, "EEXIST suppressed for REPLAYING\n"); + rc = 0; + } else { + rc = -EEXIST; + } + GOTO(out_create_dchild, rc); } OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_CREATE_WRITE, @@ -625,9 +585,17 @@ static int mds_reint_link(struct mds_update_record *rec, int offset, body->valid |= OBD_MD_FLEASIZE; } } - CERROR("child exists (dir %ld, name %s\n", - de_tgt_dir->d_inode->i_ino, rec->ur_name); - GOTO(out_link_dchild, rc = -EEXIST); + if (rec->ur_opcode & REINT_REPLAYING) { + rc = 0; + CDEBUG(D_INODE, + "child exists (dir %ld, name %s) (REPLAYING)\n", + de_tgt_dir->d_inode->i_ino, rec->ur_name); + } else { + rc = -EEXIST; + CERROR("child exists (dir %ld, name %s)\n", + de_tgt_dir->d_inode->i_ino, rec->ur_name); + } + GOTO(out_link_dchild, rc); } OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_LINK_WRITE, @@ -845,7 +813,6 @@ static mds_reinter reinters[REINT_MAX + 1] = { [REINT_UNLINK] mds_reint_unlink, [REINT_LINK] mds_reint_link, [REINT_RENAME] mds_reint_rename, - [REINT_RECREATE] mds_reint_recreate, }; int mds_reint_rec(struct mds_update_record *rec, int offset, @@ -854,11 +821,12 @@ int mds_reint_rec(struct mds_update_record *rec, int offset, struct mds_obd *mds = mds_req2mds(req); struct obd_run_ctxt saved; struct obd_ucred uc; - + int realop = rec->ur_opcode & REINT_OPCODE_MASK; int rc; - if (rec->ur_opcode < 1 || rec->ur_opcode > REINT_MAX) { - CERROR("opcode %d not valid\n", rec->ur_opcode); + if (realop < 1 || realop > REINT_MAX) { + CERROR("opcode %d not valid (%sREPLAYING)\n", realop, + rec->ur_opcode & REINT_REPLAYING ? "" : "not "); rc = req->rq_status = -EINVAL; RETURN(rc); } @@ -867,7 +835,7 @@ int mds_reint_rec(struct mds_update_record *rec, int offset, uc.ouc_fsgid = rec->ur_fsgid; push_ctxt(&saved, &mds->mds_ctxt, &uc); - rc = reinters[rec->ur_opcode] (rec, offset, req); + rc = reinters[realop] (rec, offset, req); pop_ctxt(&saved); return rc; diff --git a/lustre/obdclass/class_obd.c b/lustre/obdclass/class_obd.c index 3eb4180..6d67c32 100644 --- a/lustre/obdclass/class_obd.c +++ b/lustre/obdclass/class_obd.c @@ -470,6 +470,28 @@ inline void obd_conn2data(struct obd_ioctl_data *data, struct lustre_handle *con data->ioc_cookie = conn->cookie; } +static void forcibly_detach_exports(struct obd_device *obd) +{ + int rc; + struct list_head *tmp, *n; + struct lustre_handle fake_conn; + + CDEBUG(D_IOCTL, "OBD device %d (%p) has exports, " + "disconnecting them", obd->obd_minor, obd); + list_for_each_safe(tmp, n, &obd->obd_exports) { + struct obd_export *exp = list_entry(tmp, struct obd_export, + exp_obd_chain); + fake_conn.addr = (__u64)(unsigned long)exp; + fake_conn.cookie = exp->exp_cookie; + rc = obd_disconnect(&fake_conn); + if (rc) { + CDEBUG(D_IOCTL, "disconnecting export %p failed: %d\n", + exp, rc); + } else { + CDEBUG(D_IOCTL, "export %p disconnected\n", exp); + } + } +} /* to control /dev/obd */ static int obd_class_ioctl (struct inode * inode, struct file * filp, @@ -755,11 +777,13 @@ static int obd_class_ioctl (struct inode * inode, struct file * filp, CERROR("OBD device %d not attached\n", obd->obd_minor); GOTO(out, err=-ENODEV); } -#warning FIXME: Mike, we probably need some sort of "force detach" here if (!list_empty(&obd->obd_exports) ) { - CERROR("OBD device %d (%p) has exports\n", - obd->obd_minor, obd); - GOTO(out, err=-EBUSY); + if (data->ioc_inlbuf1[0] != 'F') { + CERROR("OBD device %d (%p) has exports\n", + obd->obd_minor, obd); + GOTO(out, err=-EBUSY); + } + forcibly_detach_exports(obd); } if (lprocfs_dereg_dev(obd) != LPROCFS_SUCCESS) { diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index d86c807..d6df605 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -507,6 +507,27 @@ static int interrupted_request(void *data) RETURN(1); /* ignored, as of this writing */ } +/* If we're being torn down by umount -f, or the import has been + * invalidated (such as by an OST failure), the request must fail with + * -EIO. + * + * Must be called with conn->c_lock held, will drop it if it returns -EIO. + * + * XXX this should just be testing the import, and umount_begin shouldn't touch + * XXX the connection. + */ +#define EIO_IF_INVALID(conn, req) \ +if ((conn->c_flags & CONN_INVALID) || \ + (req->rq_import->imp_flags & IMP_INVALID)) { \ + CERROR("req xid "LPD64" op %d to %s:%d: %s_INVALID\n", \ + (unsigned long long)req->rq_xid, req->rq_reqmsg->opc, \ + req->rq_connection->c_remote_uuid, \ + req->rq_import->imp_client->cli_request_portal, \ + (conn->c_flags & CONN_INVALID) ? "CONN_" : "IMP_"); \ + spin_unlock(&conn->c_lock); \ + RETURN(-EIO); \ +} + int ptlrpc_queue_wait(struct ptlrpc_request *req) { int rc = 0; @@ -523,16 +544,7 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req) /* XXX probably both an import and connection level are needed */ if (req->rq_level > conn->c_level) { spin_lock(&conn->c_lock); - if (conn->c_flags & CONN_INVALID) { - /* being torn down by "umount -f" */ - CERROR("req xid "LPD64" op %d to %s:%d: CONN_INVALID\n", - (unsigned long long)req->rq_xid, - req->rq_reqmsg->opc, - req->rq_connection->c_remote_uuid, - req->rq_import->imp_client->cli_request_portal); - spin_unlock(&conn->c_lock); - RETURN(-EIO); - } + EIO_IF_INVALID(conn, req); list_del(&req->rq_list); list_add_tail(&req->rq_list, &conn->c_delayed_head); spin_unlock(&conn->c_lock); @@ -564,14 +576,7 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req) resend: req->rq_timeout = obd_timeout; spin_lock(&conn->c_lock); - if (conn->c_flags & CONN_INVALID) { - CERROR("req xid "LPD64" op %d to %s:%d: CONN_INVALID\n", - (unsigned long long)req->rq_xid, req->rq_reqmsg->opc, - req->rq_connection->c_remote_uuid, - req->rq_import->imp_client->cli_request_portal); - spin_unlock(&conn->c_lock); /* being torn down by "umount -f" */ - RETURN(-EIO); - } + EIO_IF_INVALID(conn, req); list_del(&req->rq_list); list_add_tail(&req->rq_list, &conn->c_sending_head); @@ -657,6 +662,8 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req) return rc; } +#undef EIO_IF_INVALID + int ptlrpc_replay_req(struct ptlrpc_request *req) { int rc = 0, old_level; @@ -705,12 +712,14 @@ int ptlrpc_replay_req(struct ptlrpc_request *req) } CDEBUG(D_NET, "got rep "LPD64"\n", req->rq_xid); + + /* let the callback do fixups, possibly including in the request */ + if (req->rq_replay_cb) + req->rq_replay_cb(req, req->rq_replay_cb_data); + if (req->rq_repmsg->status == 0) { CDEBUG(D_NET, "--> buf %p len %d status %d\n", req->rq_repmsg, req->rq_replen, req->rq_repmsg->status); - if (req->rq_replay_cb) - req->rq_replay_cb(req, req->rq_replay_cb_data); - } else { CERROR("recovery failed: "); CERROR("req "LPD64" opc %d level %d, conn level %d\n", diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index a4fb6c7..97a9214 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -146,8 +146,10 @@ int ptlrpc_replay(struct ptlrpc_connection *conn) /* server has seen req, we have reply: skip */ if ((req->rq_flags & PTL_RPC_FL_REPLIED) && req->rq_xid <= conn->c_last_xid) { - CDEBUG(D_HA, "REPLIED SKIP: xid "LPD64" transno "LPD64" op %d @ %d\n", - req->rq_xid, req->rq_repmsg->transno, req->rq_reqmsg->opc, + CDEBUG(D_HA, "REPLIED SKIP: xid "LPD64" transno " + LPD64" op %d @ %d\n", + req->rq_xid, req->rq_repmsg->transno, + req->rq_reqmsg->opc, req->rq_import->imp_client->cli_request_portal); continue; } @@ -155,12 +157,14 @@ int ptlrpc_replay(struct ptlrpc_connection *conn) /* server has lost req, we have reply: resend, ign reply */ if ((req->rq_flags & PTL_RPC_FL_REPLIED) && req->rq_xid > conn->c_last_xid) { - CDEBUG(D_HA, "REPLIED RESEND: xid "LPD64" transno "LPD64" op %d @ %d\n", - req->rq_xid, req->rq_repmsg->transno, req->rq_reqmsg->opc, + CDEBUG(D_HA, "REPLIED RESEND: xid "LPD64" transno " + LPD64" op %d @ %d\n", + req->rq_xid, req->rq_repmsg->transno, + req->rq_reqmsg->opc, req->rq_import->imp_client->cli_request_portal); rc = ptlrpc_replay_req(req); if (rc) { - CERROR("request resend error %d for req %Ld\n", + CERROR("request resend error %d for req %Ld\n", rc, req->rq_xid); GOTO(out, rc); } diff --git a/lustre/utils/lconf b/lustre/utils/lconf index 04398ad..c37ee0f 100755 --- a/lustre/utils/lconf +++ b/lustre/utils/lconf @@ -52,7 +52,7 @@ config.xml Lustre configuration in xml format. --get URL to fetch a config file --node Load config for -d | --cleanup Cleans up config. (Shutdown) --f | --force Unmount with \"umount -f\" during shutdown +-f | --force Forced unmounting and/or obd detach during cleanup -v | --verbose Print system commands as they are run -h | --help Print this help --gdb Prints message after creating gdb module script @@ -391,8 +391,8 @@ class LCTLInterface: ignore_errors device $%s cleanup - detach - quit""" % (name) + detach %s + quit""" % (name, ('force', '')[config.force()]) self.run(cmds) # create an lov diff --git a/lustre/utils/obd.c b/lustre/utils/obd.c index 3c6d4ce..d3fdb58 100644 --- a/lustre/utils/obd.c +++ b/lustre/utils/obd.c @@ -574,13 +574,19 @@ int jt_opt_threads(int argc, char **argv) int jt_obd_detach(int argc, char **argv) { struct obd_ioctl_data data; + char force = 'F'; int rc; IOCINIT(data); - if (argc != 1) + if (argc != 1 && argc != 2) return CMD_HELP; + if (argc == 2) { + data.ioc_inllen1 = 1; + data.ioc_inlbuf1 = &force; + } + if (obd_ioctl_pack(&data, &buf, max)) { fprintf(stderr, "error: %s: invalid ioctl\n", cmdname(argv[0])); return -2;