From a98704d407d822649e53fb02d677807099fe5cc8 Mon Sep 17 00:00:00 2001 From: shaver Date: Fri, 6 Sep 2002 15:11:19 +0000 Subject: [PATCH] - Chain imports on connection, like exports. - Need to be able to go import->obd during recovery. - Rename mdc to cli in client_obd_connect so that I don't get confused as often. - Use ptlrpc_req_finished instead of ptlrpc_free_req in a few places where we need to keep the request around for possible replay during recovery. - Flag opens as needing replay. - Rewrite request address and cookie during replay and resend, to handle the recovery case (the connection handle changes when the server reboots, for example). - ll_reconnect now correctly reconnects with a new server (possibly with a new server UUID). Tested for MDS, "obviously correct" for OST as well. - no more references to mdc_getstatus or sbi2mdc in the (now fs-distinct) ll_reconnect_recover code. We can now successfully reconnect to a rebooted MDS and begin replay. Need more work (on MDS or client, not clear yet) to make the replay succeed. --- lustre/include/linux/lustre_import.h | 5 +++ lustre/include/linux/lustre_net.h | 3 +- lustre/lib/l_net.c | 33 ++++++++------- lustre/llite/file.c | 2 +- lustre/llite/namei.c | 2 +- lustre/mdc/mdc_request.c | 4 ++ lustre/ptlrpc/client.c | 13 ++++++ lustre/ptlrpc/connection.c | 2 +- lustre/ptlrpc/recover.c | 82 ++++++++++++++++++++++-------------- lustre/ptlrpc/rpc.c | 2 + 10 files changed, 97 insertions(+), 51 deletions(-) diff --git a/lustre/include/linux/lustre_import.h b/lustre/include/linux/lustre_import.h index e6418cf..aa3e3d7 100644 --- a/lustre/include/linux/lustre_import.h +++ b/lustre/include/linux/lustre_import.h @@ -17,6 +17,11 @@ struct obd_import { struct ptlrpc_connection *imp_connection; struct ptlrpc_client *imp_client; struct lustre_handle imp_handle; + struct list_head imp_chain; + struct obd_device *imp_obd; + /* XXX need a UUID here, I think + * XXX what about client_obd.cl_target_uuid? + */ }; extern struct obd_import *class_conn2cliimp(struct lustre_handle *); diff --git a/lustre/include/linux/lustre_net.h b/lustre/include/linux/lustre_net.h index 79c7d02..08c67c6 100644 --- a/lustre/include/linux/lustre_net.h +++ b/lustre/include/linux/lustre_net.h @@ -62,9 +62,8 @@ struct ptlrpc_connection { struct list_head c_dying_head; /* protected by c_lock */ struct recovd_data c_recovd_data; - struct list_head c_clients; /* XXXshaver will be c_imports */ + struct list_head c_imports; struct list_head c_exports; - }; struct ptlrpc_client { diff --git a/lustre/lib/l_net.c b/lustre/lib/l_net.c index 09e66d9..844aa8d 100644 --- a/lustre/lib/l_net.c +++ b/lustre/lib/l_net.c @@ -51,7 +51,7 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf) struct obd_ioctl_data* data = buf; int rq_portal, rp_portal; char *name; - struct client_obd *mdc = &obddev->u.cli; + struct client_obd *cli = &obddev->u.cli; char server_uuid[37]; ENTRY; @@ -85,21 +85,22 @@ int client_obd_setup(struct obd_device *obddev, obd_count len, void *buf) RETURN(-EINVAL); } - sema_init(&mdc->cl_sem, 1); - mdc->cl_conn_count = 0; - memcpy(mdc->cl_target_uuid, data->ioc_inlbuf1, data->ioc_inllen1); + sema_init(&cli->cl_sem, 1); + cli->cl_conn_count = 0; + memcpy(cli->cl_target_uuid, data->ioc_inlbuf1, data->ioc_inllen1); memcpy(server_uuid, data->ioc_inlbuf2, MIN(data->ioc_inllen2, sizeof(server_uuid))); - mdc->cl_import.imp_connection = ptlrpc_uuid_to_connection(server_uuid); - if (!mdc->cl_import.imp_connection) + cli->cl_import.imp_connection = ptlrpc_uuid_to_connection(server_uuid); + if (!cli->cl_import.imp_connection) RETURN(-ENOENT); ptlrpc_init_client(rq_portal, rp_portal, name, &obddev->obd_ldlm_client); - mdc->cl_import.imp_client = &obddev->obd_ldlm_client; + cli->cl_import.imp_client = &obddev->obd_ldlm_client; + cli->cl_import.imp_obd = obddev; - mdc->cl_max_mdsize = sizeof(struct lov_mds_md); + cli->cl_max_mdsize = sizeof(struct lov_mds_md); MOD_INC_USE_COUNT; RETURN(0); @@ -125,6 +126,7 @@ int client_obd_connect(struct lustre_handle *conn, struct obd_device *obd, sizeof(obd->obd_uuid) }; char *tmp[] = {cli->cl_target_uuid, obd->obd_uuid}; int rq_opc = (obd->obd_type->typ_ops->o_brw) ? OST_CONNECT :MDS_CONNECT; + struct ptlrpc_connection *c; ENTRY; down(&cli->cl_sem); @@ -149,22 +151,21 @@ int client_obd_connect(struct lustre_handle *conn, struct obd_device *obd, request->rq_level = LUSTRE_CONN_NEW; request->rq_replen = lustre_msg_size(0, NULL); - // This handle may be important if a callback needs - // to find the mdc/osc request->rq_reqmsg->addr = conn->addr; request->rq_reqmsg->cookie = conn->cookie; - class_conn2export(conn)->exp_connection = request->rq_connection; + c = class_conn2export(conn)->exp_connection = request->rq_connection; rc = ptlrpc_queue_wait(request); rc = ptlrpc_check_status(request, rc); if (rc) GOTO(out_req, rc); - request->rq_connection->c_level = LUSTRE_CONN_FULL; - cli->cl_import.imp_handle = *(struct lustre_handle *)request->rq_repmsg; + list_add(&cli->cl_import.imp_chain, &c->c_imports); + c->c_level = LUSTRE_CONN_FULL; + cli->cl_import.imp_handle.addr = request->rq_repmsg->addr; + cli->cl_import.imp_handle.cookie = request->rq_repmsg->cookie; - recovd_conn_manage(cli->cl_import.imp_connection, - ptlrpc_recovd, ll_recover); + recovd_conn_manage(c, ptlrpc_recovd, ll_recover); EXIT; @@ -223,6 +224,7 @@ int client_obd_disconnect(struct lustre_handle *conn) err = class_disconnect(conn); if (!rc && err) rc = err; + list_del_init(&cli->cl_import.imp_chain); MOD_DEC_USE_COUNT; out_sem: up(&cli->cl_sem); @@ -292,6 +294,7 @@ int target_handle_connect(struct ptlrpc_request *req) dlmimp->imp_client = &export->exp_obd->obd_ldlm_client; dlmimp->imp_handle.addr = req->rq_reqmsg->addr; dlmimp->imp_handle.cookie = req->rq_reqmsg->cookie; + dlmimp->imp_obd = /* LDLM! */ NULL; #warning Peter: is this the right place to upgrade the server connection level? req->rq_connection->c_level = LUSTRE_CONN_FULL; diff --git a/lustre/llite/file.c b/lustre/llite/file.c index e4b69fe..359b592 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -270,7 +270,7 @@ static int ll_file_release(struct inode *inode, struct file *file) rc = -rc; GOTO(out, rc); } - ptlrpc_free_req(fd->fd_req); + ptlrpc_req_finished(fd->fd_req); //ldlm_cli_cancel_unused(); diff --git a/lustre/llite/namei.c b/lustre/llite/namei.c index b9bb523..9d6f227 100644 --- a/lustre/llite/namei.c +++ b/lustre/llite/namei.c @@ -269,7 +269,7 @@ static struct dentry *ll_lookup2(struct inode *dir, struct dentry *dentry, EXIT; neg_req: - ptlrpc_free_req(request); + ptlrpc_req_finished(request); negative: dentry->d_op = &ll_d_ops; d_add(dentry, inode); diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index 5bd950b..206dcfc 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -304,6 +304,10 @@ int mdc_enqueue(struct lustre_handle *conn, int lock_type, /* pack the intended request */ mds_getattr_pack(req, 2, dir, de->d_name.name, de->d_name.len); + /* we need to replay opens */ + if (it->it_op == IT_OPEN) + req->rq_flags |= PTL_RPC_FL_REPLAY; + /* get ready for the reply */ req->rq_replen = lustre_msg_size(3, repsize); } else if (it->it_op == IT_READDIR) { diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 1257adb..b348e2a 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -404,6 +404,8 @@ void ptlrpc_resend_req(struct ptlrpc_request *req) ENTRY; CDEBUG(D_INODE, "resend request %Ld, opc %d\n", req->rq_xid, req->rq_reqmsg->opc); + req->rq_reqmsg->addr = req->rq_import->imp_handle.addr; + req->rq_reqmsg->cookie = req->rq_import->imp_handle.cookie; req->rq_status = -EAGAIN; req->rq_level = LUSTRE_CONN_RECOVD; req->rq_flags |= PTL_RPC_FL_RESEND; @@ -502,6 +504,15 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req) RETURN(-rc); } +#if 0 && REPLAY_DEBUGGED + if (req->rq_flags & PTL_RPC_FL_REPLAY) { + /* keep a reference so it's around for replaying. + * this is balanced in XXXXXX? + */ + atomic_inc(&req->rq_refcount); + } +#endif + spin_lock(&conn->c_lock); list_del(&req->rq_list); list_add_tail(&req->rq_list, &conn->c_sending_head); @@ -580,6 +591,8 @@ int ptlrpc_replay_req(struct ptlrpc_request *req) req->rq_time = CURRENT_TIME; req->rq_timeout = obd_timeout; + req->rq_reqmsg->addr = req->rq_import->imp_handle.addr; + req->rq_reqmsg->cookie = req->rq_import->imp_handle.cookie; rc = ptl_send_rpc(req); if (rc) { CERROR("error %d, opcode %d\n", rc, req->rq_reqmsg->opc); diff --git a/lustre/ptlrpc/connection.c b/lustre/ptlrpc/connection.c index 31640d8..d95ee40 100644 --- a/lustre/ptlrpc/connection.c +++ b/lustre/ptlrpc/connection.c @@ -76,7 +76,7 @@ struct ptlrpc_connection *ptlrpc_get_connection(struct lustre_peer *peer, INIT_LIST_HEAD(&c->c_delayed_head); INIT_LIST_HEAD(&c->c_sending_head); INIT_LIST_HEAD(&c->c_dying_head); - INIT_LIST_HEAD(&c->c_clients); + INIT_LIST_HEAD(&c->c_imports); INIT_LIST_HEAD(&c->c_exports); atomic_set(&c->c_refcount, 0); ptlrpc_connection_addref(c); diff --git a/lustre/ptlrpc/recover.c b/lustre/ptlrpc/recover.c index 7e0cf5c..9809602 100644 --- a/lustre/ptlrpc/recover.c +++ b/lustre/ptlrpc/recover.c @@ -23,35 +23,58 @@ #include #include -#if 0 -/* FIXME: reference to mdc_getstatus causes dependency problems */ -static int ll_reconnect(struct ll_sb_info *sbi) +int ll_reconnect(struct ptlrpc_connection *conn) { - struct ll_fid rootfid; - __u64 last_committed; - __u64 last_xid; - int err; struct ptlrpc_request *request; - struct ptlrpc_connection *conn = sbi2mdc(sbi)->cl_import.imp_connection; - - ptlrpc_readdress_connection(conn, "mds"); + struct list_head *tmp; + int rc = -EINVAL; + /* XXX c_lock semantics! */ conn->c_level = LUSTRE_CONN_CON; - /* XXX: need to store the last_* values somewhere */ - err = mdc_getstatus(&sbi->ll_mdc_conn, &rootfid, &last_committed, - &last_xid, &request); - if (err) { - CERROR("cannot mds_connect: rc = %d\n", err); - GOTO(out_disc, err = -ENOTCONN); + /* XXX this code MUST be shared with class_obd_connect! */ + list_for_each(tmp, &conn->c_imports) { + struct obd_import *imp = list_entry(tmp, struct obd_import, + imp_chain); + struct obd_device *obd = imp->imp_obd; + struct client_obd *cli = &obd->u.cli; + int rq_opc = (obd->obd_type->typ_ops->o_brw) + ? OST_CONNECT : MDS_CONNECT; + int size[] = { sizeof(cli->cl_target_uuid), + sizeof(obd->obd_uuid) }; + char *tmp[] = {cli->cl_target_uuid, obd->obd_uuid }; + struct lustre_handle old_hdl; + + LASSERT(imp->imp_connection == conn); + request = ptlrpc_prep_req(imp, rq_opc, 2, size, tmp); + request->rq_level = LUSTRE_CONN_NEW; + request->rq_replen = lustre_msg_size(0, NULL); + /* XXX are (addr, cookie) right? */ + request->rq_reqmsg->addr = imp->imp_handle.addr; + request->rq_reqmsg->cookie = imp->imp_handle.cookie; + rc = ptlrpc_queue_wait(request); + rc = ptlrpc_check_status(request, rc); + if (rc) { + CERROR("cannot connect to %s@%s: rc = %d\n", + cli->cl_target_uuid, conn->c_remote_uuid, rc); + ptlrpc_free_req(request); + GOTO(out_disc, rc = -ENOTCONN); + } + + old_hdl = imp->imp_handle; + imp->imp_handle.addr = request->rq_repmsg->addr; + imp->imp_handle.cookie = request->rq_repmsg->cookie; + CERROR("reconnected to %s@%s (%Lx/%Lx, was %Lx/%Lx)!\n", + cli->cl_target_uuid, conn->c_remote_uuid, + imp->imp_handle.addr, imp->imp_handle.cookie, + old_hdl.addr, old_hdl.cookie); + ptlrpc_free_req(request); } - conn->c_last_xid = last_xid; conn->c_level = LUSTRE_CONN_RECOVD; out_disc: - return err; + return rc; } -#endif static int ll_recover_upcall(struct ptlrpc_connection *conn) { @@ -74,19 +97,15 @@ static int ll_recover_upcall(struct ptlrpc_connection *conn) static int ll_recover_reconnect(struct ptlrpc_connection *conn) { - RETURN(-ENOSYS); -#if 0 - /* XXXshaver this code needs to know about connection-driven recovery! */ - - struct ptlrpc_request *req; - struct list_head *tmp, *pos; - struct ll_sb_info *sbi = cli->cli_data; - struct ptlrpc_connection *conn = cli->cli_connection; int rc = 0; + struct list_head *tmp, *pos; + struct ptlrpc_request *req; ENTRY; /* 1. reconnect */ - ll_reconnect(sbi); + rc = ll_reconnect(conn); + if (rc) + RETURN(rc); /* 2. walk the request list */ spin_lock(&conn->c_lock); @@ -97,12 +116,14 @@ static int ll_recover_reconnect(struct ptlrpc_connection *conn) if (req->rq_flags & PTL_RPC_FL_REPLAY) { CDEBUG(D_INODE, "req %Ld needs replay [last rcvd %Ld]\n", req->rq_xid, conn->c_last_xid); + rc = ptlrpc_replay_req(req); +#if 0 #error We should not hold a spinlock over such a lengthy operation. #error If necessary, drop spinlock, do operation, re-get spinlock, restart loop. #error If we need to avoid re-processint items, then delete them from the list #error as they are replayed and re-add at the tail of this list, so the next #error item to process will always be at the head of the list. - rc = ptlrpc_replay_req(req); +#endif if (rc) { CERROR("recovery replay error %d for req %Ld\n", rc, req->rq_xid); @@ -152,7 +173,7 @@ static int ll_recover_reconnect(struct ptlrpc_connection *conn) } - sbi2mdc(sbi)->cl_conn->c_level = LUSTRE_CONN_FULL; + conn->c_level = LUSTRE_CONN_FULL; recovd_conn_fixed(conn); /* Finally, continue what we delayed since recovery started */ @@ -165,7 +186,6 @@ static int ll_recover_reconnect(struct ptlrpc_connection *conn) out: spin_unlock(&conn->c_lock); return rc; -#endif } int ll_recover(struct recovd_data *rd, int phase) diff --git a/lustre/ptlrpc/rpc.c b/lustre/ptlrpc/rpc.c index 40a30ca..9485861 100644 --- a/lustre/ptlrpc/rpc.c +++ b/lustre/ptlrpc/rpc.c @@ -73,7 +73,9 @@ int connmgr_iocontrol(long cmd, struct lustre_handle *hdl, int len, void *karg, struct obd_device *obd = class_conn2obd(hdl); struct recovd_obd *recovd = &obd->u.recovd; struct obd_ioctl_data *data = karg; +#if 0 && PARALLEL_RECOVERY struct list_head *tmp; +#endif ENTRY; -- 1.8.3.1