- Need to be able to go import->obd during recovery.
- Rename mdc to cli in client_obd_connect so that I don't get confused as
often.
- Use ptlrpc_req_finished instead of ptlrpc_free_req in a few places where
we need to keep the request around for possible replay during recovery.
- Flag opens as needing replay.
- Rewrite request address and cookie during replay and resend, to handle
the recovery case (the connection handle changes when the server reboots,
for example).
- ll_reconnect now correctly reconnects with a new server (possibly with
a new server UUID). Tested for MDS, "obviously correct" for OST as well.
- no more references to mdc_getstatus or sbi2mdc in the (now fs-distinct)
ll_reconnect_recover code.
We can now successfully reconnect to a rebooted MDS and begin replay.
Need more work (on MDS or client, not clear yet) to make the replay
succeed.
struct ptlrpc_connection *imp_connection;
struct ptlrpc_client *imp_client;
struct lustre_handle imp_handle;
+ struct list_head imp_chain;
+ struct obd_device *imp_obd;
+ /* XXX need a UUID here, I think
+ * XXX what about client_obd.cl_target_uuid?
+ */
};
extern struct obd_import *class_conn2cliimp(struct lustre_handle *);
struct list_head c_dying_head; /* protected by c_lock */
struct recovd_data c_recovd_data;
- struct list_head c_clients; /* XXXshaver will be c_imports */
+ struct list_head c_imports;
struct list_head c_exports;
-
};
struct ptlrpc_client {
struct obd_ioctl_data* data = buf;
int rq_portal, rp_portal;
char *name;
- struct client_obd *mdc = &obddev->u.cli;
+ struct client_obd *cli = &obddev->u.cli;
char server_uuid[37];
ENTRY;
RETURN(-EINVAL);
}
- sema_init(&mdc->cl_sem, 1);
- mdc->cl_conn_count = 0;
- memcpy(mdc->cl_target_uuid, data->ioc_inlbuf1, data->ioc_inllen1);
+ sema_init(&cli->cl_sem, 1);
+ cli->cl_conn_count = 0;
+ memcpy(cli->cl_target_uuid, data->ioc_inlbuf1, data->ioc_inllen1);
memcpy(server_uuid, data->ioc_inlbuf2, MIN(data->ioc_inllen2,
sizeof(server_uuid)));
- mdc->cl_import.imp_connection = ptlrpc_uuid_to_connection(server_uuid);
- if (!mdc->cl_import.imp_connection)
+ cli->cl_import.imp_connection = ptlrpc_uuid_to_connection(server_uuid);
+ if (!cli->cl_import.imp_connection)
RETURN(-ENOENT);
ptlrpc_init_client(rq_portal, rp_portal, name,
&obddev->obd_ldlm_client);
- mdc->cl_import.imp_client = &obddev->obd_ldlm_client;
+ cli->cl_import.imp_client = &obddev->obd_ldlm_client;
+ cli->cl_import.imp_obd = obddev;
- mdc->cl_max_mdsize = sizeof(struct lov_mds_md);
+ cli->cl_max_mdsize = sizeof(struct lov_mds_md);
MOD_INC_USE_COUNT;
RETURN(0);
sizeof(obd->obd_uuid) };
char *tmp[] = {cli->cl_target_uuid, obd->obd_uuid};
int rq_opc = (obd->obd_type->typ_ops->o_brw) ? OST_CONNECT :MDS_CONNECT;
+ struct ptlrpc_connection *c;
ENTRY;
down(&cli->cl_sem);
request->rq_level = LUSTRE_CONN_NEW;
request->rq_replen = lustre_msg_size(0, NULL);
- // This handle may be important if a callback needs
- // to find the mdc/osc
request->rq_reqmsg->addr = conn->addr;
request->rq_reqmsg->cookie = conn->cookie;
- class_conn2export(conn)->exp_connection = request->rq_connection;
+ c = class_conn2export(conn)->exp_connection = request->rq_connection;
rc = ptlrpc_queue_wait(request);
rc = ptlrpc_check_status(request, rc);
if (rc)
GOTO(out_req, rc);
- request->rq_connection->c_level = LUSTRE_CONN_FULL;
- cli->cl_import.imp_handle = *(struct lustre_handle *)request->rq_repmsg;
+ list_add(&cli->cl_import.imp_chain, &c->c_imports);
+ c->c_level = LUSTRE_CONN_FULL;
+ cli->cl_import.imp_handle.addr = request->rq_repmsg->addr;
+ cli->cl_import.imp_handle.cookie = request->rq_repmsg->cookie;
- recovd_conn_manage(cli->cl_import.imp_connection,
- ptlrpc_recovd, ll_recover);
+ recovd_conn_manage(c, ptlrpc_recovd, ll_recover);
EXIT;
err = class_disconnect(conn);
if (!rc && err)
rc = err;
+ list_del_init(&cli->cl_import.imp_chain);
MOD_DEC_USE_COUNT;
out_sem:
up(&cli->cl_sem);
dlmimp->imp_client = &export->exp_obd->obd_ldlm_client;
dlmimp->imp_handle.addr = req->rq_reqmsg->addr;
dlmimp->imp_handle.cookie = req->rq_reqmsg->cookie;
+ dlmimp->imp_obd = /* LDLM! */ NULL;
#warning Peter: is this the right place to upgrade the server connection level?
req->rq_connection->c_level = LUSTRE_CONN_FULL;
rc = -rc;
GOTO(out, rc);
}
- ptlrpc_free_req(fd->fd_req);
+ ptlrpc_req_finished(fd->fd_req);
//ldlm_cli_cancel_unused();
EXIT;
neg_req:
- ptlrpc_free_req(request);
+ ptlrpc_req_finished(request);
negative:
dentry->d_op = &ll_d_ops;
d_add(dentry, inode);
/* pack the intended request */
mds_getattr_pack(req, 2, dir, de->d_name.name, de->d_name.len);
+ /* we need to replay opens */
+ if (it->it_op == IT_OPEN)
+ req->rq_flags |= PTL_RPC_FL_REPLAY;
+
/* get ready for the reply */
req->rq_replen = lustre_msg_size(3, repsize);
} else if (it->it_op == IT_READDIR) {
ENTRY;
CDEBUG(D_INODE, "resend request %Ld, opc %d\n",
req->rq_xid, req->rq_reqmsg->opc);
+ req->rq_reqmsg->addr = req->rq_import->imp_handle.addr;
+ req->rq_reqmsg->cookie = req->rq_import->imp_handle.cookie;
req->rq_status = -EAGAIN;
req->rq_level = LUSTRE_CONN_RECOVD;
req->rq_flags |= PTL_RPC_FL_RESEND;
RETURN(-rc);
}
+#if 0 && REPLAY_DEBUGGED
+ if (req->rq_flags & PTL_RPC_FL_REPLAY) {
+ /* keep a reference so it's around for replaying.
+ * this is balanced in XXXXXX?
+ */
+ atomic_inc(&req->rq_refcount);
+ }
+#endif
+
spin_lock(&conn->c_lock);
list_del(&req->rq_list);
list_add_tail(&req->rq_list, &conn->c_sending_head);
req->rq_time = CURRENT_TIME;
req->rq_timeout = obd_timeout;
+ req->rq_reqmsg->addr = req->rq_import->imp_handle.addr;
+ req->rq_reqmsg->cookie = req->rq_import->imp_handle.cookie;
rc = ptl_send_rpc(req);
if (rc) {
CERROR("error %d, opcode %d\n", rc, req->rq_reqmsg->opc);
INIT_LIST_HEAD(&c->c_delayed_head);
INIT_LIST_HEAD(&c->c_sending_head);
INIT_LIST_HEAD(&c->c_dying_head);
- INIT_LIST_HEAD(&c->c_clients);
+ INIT_LIST_HEAD(&c->c_imports);
INIT_LIST_HEAD(&c->c_exports);
atomic_set(&c->c_refcount, 0);
ptlrpc_connection_addref(c);
#include <linux/lustre_lite.h>
#include <linux/lustre_ha.h>
-#if 0
-/* FIXME: reference to mdc_getstatus causes dependency problems */
-static int ll_reconnect(struct ll_sb_info *sbi)
+int ll_reconnect(struct ptlrpc_connection *conn)
{
- struct ll_fid rootfid;
- __u64 last_committed;
- __u64 last_xid;
- int err;
struct ptlrpc_request *request;
- struct ptlrpc_connection *conn = sbi2mdc(sbi)->cl_import.imp_connection;
-
- ptlrpc_readdress_connection(conn, "mds");
+ struct list_head *tmp;
+ int rc = -EINVAL;
+ /* XXX c_lock semantics! */
conn->c_level = LUSTRE_CONN_CON;
- /* XXX: need to store the last_* values somewhere */
- err = mdc_getstatus(&sbi->ll_mdc_conn, &rootfid, &last_committed,
- &last_xid, &request);
- if (err) {
- CERROR("cannot mds_connect: rc = %d\n", err);
- GOTO(out_disc, err = -ENOTCONN);
+ /* XXX this code MUST be shared with class_obd_connect! */
+ list_for_each(tmp, &conn->c_imports) {
+ struct obd_import *imp = list_entry(tmp, struct obd_import,
+ imp_chain);
+ struct obd_device *obd = imp->imp_obd;
+ struct client_obd *cli = &obd->u.cli;
+ int rq_opc = (obd->obd_type->typ_ops->o_brw)
+ ? OST_CONNECT : MDS_CONNECT;
+ int size[] = { sizeof(cli->cl_target_uuid),
+ sizeof(obd->obd_uuid) };
+ char *tmp[] = {cli->cl_target_uuid, obd->obd_uuid };
+ struct lustre_handle old_hdl;
+
+ LASSERT(imp->imp_connection == conn);
+ request = ptlrpc_prep_req(imp, rq_opc, 2, size, tmp);
+ request->rq_level = LUSTRE_CONN_NEW;
+ request->rq_replen = lustre_msg_size(0, NULL);
+ /* XXX are (addr, cookie) right? */
+ request->rq_reqmsg->addr = imp->imp_handle.addr;
+ request->rq_reqmsg->cookie = imp->imp_handle.cookie;
+ rc = ptlrpc_queue_wait(request);
+ rc = ptlrpc_check_status(request, rc);
+ if (rc) {
+ CERROR("cannot connect to %s@%s: rc = %d\n",
+ cli->cl_target_uuid, conn->c_remote_uuid, rc);
+ ptlrpc_free_req(request);
+ GOTO(out_disc, rc = -ENOTCONN);
+ }
+
+ old_hdl = imp->imp_handle;
+ imp->imp_handle.addr = request->rq_repmsg->addr;
+ imp->imp_handle.cookie = request->rq_repmsg->cookie;
+ CERROR("reconnected to %s@%s (%Lx/%Lx, was %Lx/%Lx)!\n",
+ cli->cl_target_uuid, conn->c_remote_uuid,
+ imp->imp_handle.addr, imp->imp_handle.cookie,
+ old_hdl.addr, old_hdl.cookie);
+ ptlrpc_free_req(request);
}
- conn->c_last_xid = last_xid;
conn->c_level = LUSTRE_CONN_RECOVD;
out_disc:
- return err;
+ return rc;
}
-#endif
static int ll_recover_upcall(struct ptlrpc_connection *conn)
{
static int ll_recover_reconnect(struct ptlrpc_connection *conn)
{
- RETURN(-ENOSYS);
-#if 0
- /* XXXshaver this code needs to know about connection-driven recovery! */
-
- struct ptlrpc_request *req;
- struct list_head *tmp, *pos;
- struct ll_sb_info *sbi = cli->cli_data;
- struct ptlrpc_connection *conn = cli->cli_connection;
int rc = 0;
+ struct list_head *tmp, *pos;
+ struct ptlrpc_request *req;
ENTRY;
/* 1. reconnect */
- ll_reconnect(sbi);
+ rc = ll_reconnect(conn);
+ if (rc)
+ RETURN(rc);
/* 2. walk the request list */
spin_lock(&conn->c_lock);
if (req->rq_flags & PTL_RPC_FL_REPLAY) {
CDEBUG(D_INODE, "req %Ld needs replay [last rcvd %Ld]\n",
req->rq_xid, conn->c_last_xid);
+ rc = ptlrpc_replay_req(req);
+#if 0
#error We should not hold a spinlock over such a lengthy operation.
#error If necessary, drop spinlock, do operation, re-get spinlock, restart loop.
#error If we need to avoid re-processint items, then delete them from the list
#error as they are replayed and re-add at the tail of this list, so the next
#error item to process will always be at the head of the list.
- rc = ptlrpc_replay_req(req);
+#endif
if (rc) {
CERROR("recovery replay error %d for req %Ld\n",
rc, req->rq_xid);
}
- sbi2mdc(sbi)->cl_conn->c_level = LUSTRE_CONN_FULL;
+ conn->c_level = LUSTRE_CONN_FULL;
recovd_conn_fixed(conn);
/* Finally, continue what we delayed since recovery started */
out:
spin_unlock(&conn->c_lock);
return rc;
-#endif
}
int ll_recover(struct recovd_data *rd, int phase)
struct obd_device *obd = class_conn2obd(hdl);
struct recovd_obd *recovd = &obd->u.recovd;
struct obd_ioctl_data *data = karg;
+#if 0 && PARALLEL_RECOVERY
struct list_head *tmp;
+#endif
ENTRY;