- Poison imports for failed OSCs, so future requests get -EIO.
- Remove the epoch checking for mounts and inodes, because we don't want to
kill a while filesystem because a single OSC died.
- Close open files on MDS-disconnect.
- If asked with a "force" parameter, disconnect all remaining exports during
OBD disconnection.
- Teach lconf to send that parameter when given -f.
#define REINT_LINK 3
#define REINT_UNLINK 4
#define REINT_RENAME 5
-#define REINT_RECREATE 6
-#define REINT_MAX 6
+#define REINT_MAX 5
+
+#define REINT_OPCODE_MASK 0xff /* opcodes must fit into this mask */
+#define REINT_REPLAYING 0x1000 /* masked into the opcode to indicate replay */
struct ll_fid {
__u64 id;
#ifdef __KERNEL__
+#define IMP_INVALID 1
+
#include <linux/lustre_idl.h>
struct obd_import {
struct ptlrpc_connection *imp_connection;
struct lustre_handle imp_handle;
struct list_head imp_chain;
struct obd_device *imp_obd;
- /* XXX need a UUID here, I think
- */
+ int imp_flags;
+ /* XXX need a UUID here, I think */
};
extern struct obd_import *class_conn2cliimp(struct lustre_handle *);
char *lli_symlink_name;
struct lustre_handle lli_intent_lock_handle;
struct semaphore lli_open_sem;
- __u32 lli_mount_epoch;
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
struct inode lli_vfs_inode;
#endif
time_t ll_commitcbd_timeout;
spinlock_t ll_commitcbd_lock;
struct list_head ll_conn_chain; /* per-conn chain of SBs */
- __u32 ll_mount_epoch;
};
-#define CHECK_MOUNT_EPOCH(i) \
-do { \
- if (ll_i2info(i)->lli_mount_epoch != ll_i2sbi(i)->ll_mount_epoch) { \
- make_bad_inode(i); \
- RETURN(-EIO); \
- } \
-} while(0)
-
static inline struct ll_sb_info *ll_s2sbi(struct super_block *sb)
{
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
[REINT_LINK] mds_link_unpack,
[REINT_UNLINK] mds_unlink_unpack,
[REINT_RENAME] mds_rename_unpack,
- [REINT_RECREATE] mds_create_unpack,
};
int mds_update_unpack(struct ptlrpc_request *req, int offset,
struct mds_update_record *rec)
{
__u32 *opcode = lustre_msg_buf(req->rq_reqmsg, offset);
- int rc;
+ int rc, realop;
ENTRY;
if (!opcode || req->rq_reqmsg->buflens[offset] < sizeof(*opcode))
RETURN(-EFAULT);
- rec->ur_opcode = NTOH__u32(*opcode);
+ realop = rec->ur_opcode = NTOH__u32(*opcode);
+ realop &= REINT_OPCODE_MASK;
- if (rec->ur_opcode < 0 || rec->ur_opcode > REINT_MAX)
+ if (realop < 0 || realop > REINT_MAX)
RETURN(-EFAULT);
- rc = mds_unpackers[rec->ur_opcode](req, offset, rec);
+ rc = mds_unpackers[realop](req, offset, rec);
RETURN(rc);
}
LASSERT(!file->private_data);
- CHECK_MOUNT_EPOCH(inode);
-
lsm = lli->lli_smd;
/* delayed create of object (intent created inode) */
ENTRY;
- CHECK_MOUNT_EPOCH(inode);
-
fd = (struct ll_file_data *)file->private_data;
if (!fd) {
LBUG();
long long retval;
ENTRY;
- CHECK_MOUNT_EPOCH(inode);
-
switch (origin) {
case 2: {
struct ll_inode_info *lli = ll_i2info(inode);
ENTRY;
- /* CHECK_MOUNT_EPOCH(dir); */
- if (ll_i2info(dir)->lli_mount_epoch != ll_i2sbi(dir)->ll_mount_epoch) {
- make_bad_inode(dir);
- RETURN(ERR_PTR(-EIO));
- }
-
if (it == NULL)
it = &lookup_it;
int rc = 0;
ENTRY;
- CHECK_MOUNT_EPOCH(dir);
-
LL_GET_INTENT(dentry, it);
inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len,
int err = 0;
ENTRY;
- CHECK_MOUNT_EPOCH(dir);
-
LL_GET_INTENT(dentry, it);
inode = ll_create_node(dir, dentry->d_name.name, dentry->d_name.len,
{
struct list_head *tmp, *n;
+ /* Make sure that no new requests get processed for this import.
+ * ptlrpc_queue_wait must (and does) hold c_lock while testing this flags and
+ * then putting requests on sending_head or delayed_head.
+ */
+ spin_lock(&imp->imp_connection->c_lock);
+ imp->imp_flags |= IMP_INVALID;
+ spin_unlock(&imp->imp_connection->c_lock);
+
list_for_each_safe(tmp, n, &imp->imp_connection->c_sending_head) {
struct ptlrpc_request *req =
list_entry(tmp, struct ptlrpc_request, rq_list);
+
+ if (req->rq_import != imp)
+ continue;
+
if (req->rq_flags & PTL_RPC_FL_REPLIED) {
/* no need to replay, just discard */
CERROR("uncommitted req xid "LPD64" op %d to OST %s\n",
reconnect_ost(imp);
} else {
int rc = ptlrpc_reconnect_import(imp, MDS_CONNECT);
- if (!rc)
+ if (!rc) {
need_replay = 1;
+ /* XXX obd_cancel_unused */
+ }
/* make sure we don't try to replay for dead imps?
*
* else imp->imp_connection = NULL;
*
*/
+
}
}
int err;
ENTRY;
- CHECK_MOUNT_EPOCH(inode);
-
if (!cbd)
RETURN(-ENOMEM);
struct io_cb_data *cbd = ll_init_cb();
ENTRY;
- CHECK_MOUNT_EPOCH(inode);
-
pg.pg = page;
pg.count = to;
pg.off = (((obd_off)page->index) << PAGE_SHIFT);
int i, rc = 0;
struct io_cb_data *cbd;
- CHECK_MOUNT_EPOCH(inode);
-
ENTRY;
if (!lsm || !lsm->lsm_object_id)
RETURN(-ENOMEM);
}
INIT_LIST_HEAD(&sbi->ll_conn_chain);
- sbi->ll_mount_epoch = 0;
generate_random_uuid(uuid);
class_uuid_unparse(uuid, sbi->ll_sb_uuid);
ENTRY;
sema_init(&lli->lli_open_sem, 1);
- lli->lli_mount_epoch = ll_i2sbi(inode)->ll_mount_epoch;
/* core attributes first */
ll_update_inode(inode, body);
conn = list_entry(ctmp, struct ptlrpc_connection, c_sb_chain);
spin_lock(&conn->c_lock);
+ /* XXX should just be dealing with imports, probably through
+ * XXX iocontrol, need next-gen recovery! */
conn->c_flags |= CONN_INVALID;
invalidate_request_list(&conn->c_sending_head);
invalidate_request_list(&conn->c_delayed_head);
*request = NULL;
- CHECK_MOUNT_EPOCH(inode);
-
if (lli->lli_symlink_name) {
*symname = lli->lli_symlink_name;
CDEBUG(D_INODE, "using cached symlink %s\n", *symname);
int rc;
ENTRY;
- CHECK_MOUNT_EPOCH(inode);
-
/* on symlinks lli_open_sem protects lli_symlink_name allocation/data */
down(&lli->lli_open_sem);
rc = ll_readlink_internal(inode, &request, &symname);
int rc;
ENTRY;
- CHECK_MOUNT_EPOCH(inode);
-
down(&lli->lli_open_sem);
rc = ll_readlink_internal(inode, &request, &symname);
if (rc)
rc = ptlrpc_queue_wait(request);
rc = ptlrpc_check_status(request, rc);
- if (rc)
+ if (rc) {
CERROR("error in handling %d\n", rc);
-
+ } else {
+ /* For future resend/replays. */
+ u32 *opcodeptr = lustre_msg_buf(request->rq_reqmsg, 0);
+ *opcodeptr |= REINT_REPLAYING;
+ }
return rc;
}
level = LUSTRE_CONN_FULL;
resend:
rc = mdc_reint(req, level);
+ /* Resend if we were told to. */
if (rc == -ERESTARTSYS) {
- struct mds_rec_create *rec = lustre_msg_buf(req->rq_reqmsg, 0);
-
level = LUSTRE_CONN_RECOVD;
- CERROR("Lost reply: re-create rep.\n");
req->rq_flags = 0;
- rec->cr_opcode = NTOH__u32(REINT_RECREATE);
goto resend;
}
return rc;
}
+/* Call with med->med_open_lock held, please. */
+inline int mds_close_mfd(struct mds_file_data *mfd, struct mds_export_data *med)
+{
+ struct file *file = mfd->mfd_file;
+ LASSERT(file->private_data == mfd);
+
+ list_del(&mfd->mfd_list);
+ mfd->mfd_servercookie = DEAD_HANDLE_MAGIC;
+ kmem_cache_free(mds_file_cache, mfd);
+
+ return filp_close(file, 0);
+}
+
static int mds_disconnect(struct lustre_handle *conn)
{
struct obd_export *export = class_conn2export(conn);
+ struct list_head *tmp, *n;
+ struct mds_export_data *med = &export->exp_mds_data;
int rc;
ENTRY;
-#warning "Mike: we need to close all files opened on med_open_head"
+ /*
+ * Close any open files.
+ */
+ spin_lock(&med->med_open_lock);
+ list_for_each_safe(tmp, n, &med->med_open_head) {
+ struct mds_file_data *mfd =
+ list_entry(tmp, struct mds_file_data, mfd_list);
+ rc = mds_close_mfd(mfd, med);
+ if (rc) {
+ /* XXX better diagnostics, with file path and stuff */
+ CDEBUG(D_INODE, "Error %d closing mfd %p\n", rc, mfd);
+ }
+ }
+ spin_unlock(&med->med_open_lock);
+
ldlm_cancel_locks_for_export(export);
mds_client_free(export);
{
struct mds_export_data *med = &req->rq_export->exp_mds_data;
struct mds_body *body;
- struct file *file;
struct mds_file_data *mfd;
int rc;
ENTRY;
RETURN(-ESTALE);
}
- file = mfd->mfd_file;
- LASSERT(file->private_data == mfd);
-
spin_lock(&med->med_open_lock);
- list_del(&mfd->mfd_list);
+ req->rq_status = mds_close_mfd(mfd, med);
spin_unlock(&med->med_open_lock);
- mfd->mfd_servercookie = DEAD_HANDLE_MAGIC;
- kmem_cache_free(mds_file_cache, mfd);
-
- req->rq_status = filp_close(file, 0);
if (OBD_FAIL_CHECK(OBD_FAIL_MDS_CLOSE_PACK)) {
CERROR("test case OBD_FAIL_MDS_CLOSE_PACK\n");
return 0;
}
-static int mds_reint_recreate(struct mds_update_record *rec, int offset,
- struct ptlrpc_request *req)
-{
- struct dentry *de = NULL;
- struct mds_obd *mds = mds_req2mds(req);
- struct dentry *dchild = NULL;
- struct inode *dir;
- int rc = 0;
- ENTRY;
-
- de = mds_fid2dentry(mds, rec->ur_fid1, NULL);
- if (IS_ERR(de) || OBD_FAIL_CHECK(OBD_FAIL_MDS_REINT_CREATE)) {
- LBUG();
- GOTO(out_create_de, rc = -ESTALE);
- }
- dir = de->d_inode;
- CDEBUG(D_INODE, "parent ino %ld\n", dir->i_ino);
-
- down(&dir->i_sem);
- dchild = lookup_one_len(rec->ur_name, de, rec->ur_namelen - 1);
- if (IS_ERR(dchild)) {
- CERROR("child lookup error %ld\n", PTR_ERR(dchild));
- up(&dir->i_sem);
- LBUG();
- GOTO(out_create_dchild, rc = -ESTALE);
- }
-
- if (dchild->d_inode) {
- struct mds_body *body;
- rc = 0;
- body = lustre_msg_buf(req->rq_repmsg, 0);
- mds_pack_inode2fid(&body->fid1, dchild->d_inode);
- mds_pack_inode2body(body, dchild->d_inode);
- } else {
- CERROR("child doesn't exist (dir %ld, name %s)\n",
- dir->i_ino, rec->ur_name);
- rc = -ENOENT;
- LBUG();
- }
-
-out_create_dchild:
- l_dput(dchild);
- up(&dir->i_sem);
-out_create_de:
- l_dput(de);
- req->rq_status = rc;
- return 0;
-}
-
static int mds_reint_create(struct mds_update_record *rec, int offset,
struct ptlrpc_request *req)
{
CDEBUG(D_INODE, "child exists (dir %ld, name %s, ino %ld)\n",
dir->i_ino, rec->ur_name, dchild->d_inode->i_ino);
+ /* XXX check that mode is correct? */
+
body = lustre_msg_buf(req->rq_repmsg, offset);
mds_pack_inode2fid(&body->fid1, inode);
mds_pack_inode2body(body, inode);
} else
body->valid |= OBD_MD_FLEASIZE;
}
- /* now a normal case for intent locking */
- GOTO(out_create_dchild, rc = -EEXIST);
+
+ /* This isn't an error for RECREATE. */
+ if (rec->ur_opcode & REINT_REPLAYING) {
+ CDEBUG(D_INODE, "EEXIST suppressed for REPLAYING\n");
+ rc = 0;
+ } else {
+ rc = -EEXIST;
+ }
+ GOTO(out_create_dchild, rc);
}
OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_CREATE_WRITE,
body->valid |= OBD_MD_FLEASIZE;
}
}
- CERROR("child exists (dir %ld, name %s\n",
- de_tgt_dir->d_inode->i_ino, rec->ur_name);
- GOTO(out_link_dchild, rc = -EEXIST);
+ if (rec->ur_opcode & REINT_REPLAYING) {
+ rc = 0;
+ CDEBUG(D_INODE,
+ "child exists (dir %ld, name %s) (REPLAYING)\n",
+ de_tgt_dir->d_inode->i_ino, rec->ur_name);
+ } else {
+ rc = -EEXIST;
+ CERROR("child exists (dir %ld, name %s)\n",
+ de_tgt_dir->d_inode->i_ino, rec->ur_name);
+ }
+ GOTO(out_link_dchild, rc);
}
OBD_FAIL_WRITE(OBD_FAIL_MDS_REINT_LINK_WRITE,
[REINT_UNLINK] mds_reint_unlink,
[REINT_LINK] mds_reint_link,
[REINT_RENAME] mds_reint_rename,
- [REINT_RECREATE] mds_reint_recreate,
};
int mds_reint_rec(struct mds_update_record *rec, int offset,
struct mds_obd *mds = mds_req2mds(req);
struct obd_run_ctxt saved;
struct obd_ucred uc;
-
+ int realop = rec->ur_opcode & REINT_OPCODE_MASK;
int rc;
- if (rec->ur_opcode < 1 || rec->ur_opcode > REINT_MAX) {
- CERROR("opcode %d not valid\n", rec->ur_opcode);
+ if (realop < 1 || realop > REINT_MAX) {
+ CERROR("opcode %d not valid (%sREPLAYING)\n", realop,
+ rec->ur_opcode & REINT_REPLAYING ? "" : "not ");
rc = req->rq_status = -EINVAL;
RETURN(rc);
}
uc.ouc_fsgid = rec->ur_fsgid;
push_ctxt(&saved, &mds->mds_ctxt, &uc);
- rc = reinters[rec->ur_opcode] (rec, offset, req);
+ rc = reinters[realop] (rec, offset, req);
pop_ctxt(&saved);
return rc;
data->ioc_cookie = conn->cookie;
}
+static void forcibly_detach_exports(struct obd_device *obd)
+{
+ int rc;
+ struct list_head *tmp, *n;
+ struct lustre_handle fake_conn;
+
+ CDEBUG(D_IOCTL, "OBD device %d (%p) has exports, "
+ "disconnecting them", obd->obd_minor, obd);
+ list_for_each_safe(tmp, n, &obd->obd_exports) {
+ struct obd_export *exp = list_entry(tmp, struct obd_export,
+ exp_obd_chain);
+ fake_conn.addr = (__u64)(unsigned long)exp;
+ fake_conn.cookie = exp->exp_cookie;
+ rc = obd_disconnect(&fake_conn);
+ if (rc) {
+ CDEBUG(D_IOCTL, "disconnecting export %p failed: %d\n",
+ exp, rc);
+ } else {
+ CDEBUG(D_IOCTL, "export %p disconnected\n", exp);
+ }
+ }
+}
/* to control /dev/obd */
static int obd_class_ioctl (struct inode * inode, struct file * filp,
CERROR("OBD device %d not attached\n", obd->obd_minor);
GOTO(out, err=-ENODEV);
}
-#warning FIXME: Mike, we probably need some sort of "force detach" here
if (!list_empty(&obd->obd_exports) ) {
- CERROR("OBD device %d (%p) has exports\n",
- obd->obd_minor, obd);
- GOTO(out, err=-EBUSY);
+ if (data->ioc_inlbuf1[0] != 'F') {
+ CERROR("OBD device %d (%p) has exports\n",
+ obd->obd_minor, obd);
+ GOTO(out, err=-EBUSY);
+ }
+ forcibly_detach_exports(obd);
}
if (lprocfs_dereg_dev(obd) != LPROCFS_SUCCESS) {
RETURN(1); /* ignored, as of this writing */
}
+/* If we're being torn down by umount -f, or the import has been
+ * invalidated (such as by an OST failure), the request must fail with
+ * -EIO.
+ *
+ * Must be called with conn->c_lock held, will drop it if it returns -EIO.
+ *
+ * XXX this should just be testing the import, and umount_begin shouldn't touch
+ * XXX the connection.
+ */
+#define EIO_IF_INVALID(conn, req) \
+if ((conn->c_flags & CONN_INVALID) || \
+ (req->rq_import->imp_flags & IMP_INVALID)) { \
+ CERROR("req xid "LPD64" op %d to %s:%d: %s_INVALID\n", \
+ (unsigned long long)req->rq_xid, req->rq_reqmsg->opc, \
+ req->rq_connection->c_remote_uuid, \
+ req->rq_import->imp_client->cli_request_portal, \
+ (conn->c_flags & CONN_INVALID) ? "CONN_" : "IMP_"); \
+ spin_unlock(&conn->c_lock); \
+ RETURN(-EIO); \
+}
+
int ptlrpc_queue_wait(struct ptlrpc_request *req)
{
int rc = 0;
/* XXX probably both an import and connection level are needed */
if (req->rq_level > conn->c_level) {
spin_lock(&conn->c_lock);
- if (conn->c_flags & CONN_INVALID) {
- /* being torn down by "umount -f" */
- CERROR("req xid "LPD64" op %d to %s:%d: CONN_INVALID\n",
- (unsigned long long)req->rq_xid,
- req->rq_reqmsg->opc,
- req->rq_connection->c_remote_uuid,
- req->rq_import->imp_client->cli_request_portal);
- spin_unlock(&conn->c_lock);
- RETURN(-EIO);
- }
+ EIO_IF_INVALID(conn, req);
list_del(&req->rq_list);
list_add_tail(&req->rq_list, &conn->c_delayed_head);
spin_unlock(&conn->c_lock);
resend:
req->rq_timeout = obd_timeout;
spin_lock(&conn->c_lock);
- if (conn->c_flags & CONN_INVALID) {
- CERROR("req xid "LPD64" op %d to %s:%d: CONN_INVALID\n",
- (unsigned long long)req->rq_xid, req->rq_reqmsg->opc,
- req->rq_connection->c_remote_uuid,
- req->rq_import->imp_client->cli_request_portal);
- spin_unlock(&conn->c_lock); /* being torn down by "umount -f" */
- RETURN(-EIO);
- }
+ EIO_IF_INVALID(conn, req);
list_del(&req->rq_list);
list_add_tail(&req->rq_list, &conn->c_sending_head);
return rc;
}
+#undef EIO_IF_INVALID
+
int ptlrpc_replay_req(struct ptlrpc_request *req)
{
int rc = 0, old_level;
}
CDEBUG(D_NET, "got rep "LPD64"\n", req->rq_xid);
+
+ /* let the callback do fixups, possibly including in the request */
+ if (req->rq_replay_cb)
+ req->rq_replay_cb(req, req->rq_replay_cb_data);
+
if (req->rq_repmsg->status == 0) {
CDEBUG(D_NET, "--> buf %p len %d status %d\n", req->rq_repmsg,
req->rq_replen, req->rq_repmsg->status);
- if (req->rq_replay_cb)
- req->rq_replay_cb(req, req->rq_replay_cb_data);
-
} else {
CERROR("recovery failed: ");
CERROR("req "LPD64" opc %d level %d, conn level %d\n",
/* server has seen req, we have reply: skip */
if ((req->rq_flags & PTL_RPC_FL_REPLIED) &&
req->rq_xid <= conn->c_last_xid) {
- CDEBUG(D_HA, "REPLIED SKIP: xid "LPD64" transno "LPD64" op %d @ %d\n",
- req->rq_xid, req->rq_repmsg->transno, req->rq_reqmsg->opc,
+ CDEBUG(D_HA, "REPLIED SKIP: xid "LPD64" transno "
+ LPD64" op %d @ %d\n",
+ req->rq_xid, req->rq_repmsg->transno,
+ req->rq_reqmsg->opc,
req->rq_import->imp_client->cli_request_portal);
continue;
}
/* server has lost req, we have reply: resend, ign reply */
if ((req->rq_flags & PTL_RPC_FL_REPLIED) &&
req->rq_xid > conn->c_last_xid) {
- CDEBUG(D_HA, "REPLIED RESEND: xid "LPD64" transno "LPD64" op %d @ %d\n",
- req->rq_xid, req->rq_repmsg->transno, req->rq_reqmsg->opc,
+ CDEBUG(D_HA, "REPLIED RESEND: xid "LPD64" transno "
+ LPD64" op %d @ %d\n",
+ req->rq_xid, req->rq_repmsg->transno,
+ req->rq_reqmsg->opc,
req->rq_import->imp_client->cli_request_portal);
rc = ptlrpc_replay_req(req);
if (rc) {
- CERROR("request resend error %d for req %Ld\n",
+ CERROR("request resend error %d for req %Ld\n",
rc, req->rq_xid);
GOTO(out, rc);
}
--get <url> URL to fetch a config file
--node <nodename> Load config for <nodename>
-d | --cleanup Cleans up config. (Shutdown)
--f | --force Unmount with \"umount -f\" during shutdown
+-f | --force Forced unmounting and/or obd detach during cleanup
-v | --verbose Print system commands as they are run
-h | --help Print this help
--gdb Prints message after creating gdb module script
ignore_errors
device $%s
cleanup
- detach
- quit""" % (name)
+ detach %s
+ quit""" % (name, ('force', '')[config.force()])
self.run(cmds)
# create an lov
int jt_obd_detach(int argc, char **argv)
{
struct obd_ioctl_data data;
+ char force = 'F';
int rc;
IOCINIT(data);
- if (argc != 1)
+ if (argc != 1 && argc != 2)
return CMD_HELP;
+ if (argc == 2) {
+ data.ioc_inllen1 = 1;
+ data.ioc_inlbuf1 = &force;
+ }
+
if (obd_ioctl_pack(&data, &buf, max)) {
fprintf(stderr, "error: %s: invalid ioctl\n", cmdname(argv[0]));
return -2;