*
*/
-#define EXPORT_SYMTAB
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/list.h>
-
#define DEBUG_SUBSYSTEM S_RPC
#include <linux/obd_support.h>
#include <linux/obd_class.h>
-#include <linux/lustre_net.h>
+#include <linux/lustre_lib.h>
+#include <linux/lustre_ha.h>
-void ptlrpc_init_client(int dev, int req_portal, int rep_portal,
- struct ptlrpc_client *cl)
+void ptlrpc_init_client(struct recovd_obd *recovd,
+ int (*recover)(struct ptlrpc_client *recover),
+ int req_portal,
+ int rep_portal, struct ptlrpc_client *cl)
{
memset(cl, 0, sizeof(*cl));
- spin_lock_init(&cl->cli_lock);
- cl->cli_xid = 1;
- cl->cli_generation = 1;
- cl->cli_epoch = 1;
- cl->cli_bootcount = 0;
+ cl->cli_recovd = recovd;
+ cl->cli_recover = recover;
+ if (recovd)
+ recovd_cli_manage(recovd, cl);
cl->cli_obd = NULL;
cl->cli_request_portal = req_portal;
cl->cli_reply_portal = rep_portal;
+ INIT_LIST_HEAD(&cl->cli_delayed_head);
INIT_LIST_HEAD(&cl->cli_sending_head);
- INIT_LIST_HEAD(&cl->cli_sent_head);
+ INIT_LIST_HEAD(&cl->cli_dying_head);
+ spin_lock_init(&cl->cli_lock);
sema_init(&cl->cli_rpc_sem, 32);
}
-int ptlrpc_connect_client(char *uuid, struct ptlrpc_client *cl,
- struct lustre_peer *peer)
+__u8 *ptlrpc_req_to_uuid(struct ptlrpc_request *req)
+{
+ return req->rq_connection->c_remote_uuid;
+}
+
+struct ptlrpc_connection *ptlrpc_uuid_to_connection(char *uuid)
{
+ struct ptlrpc_connection *c;
+ struct lustre_peer peer;
int err;
- cl->cli_epoch++;
- err = kportal_uuid_to_peer(uuid, peer);
- if (err != 0)
+ err = kportal_uuid_to_peer(uuid, &peer);
+ if (err != 0) {
+ CERROR("cannot find peer %s!\n", uuid);
+ return NULL;
+ }
+
+ c = ptlrpc_get_connection(&peer);
+ if (c) {
+ memcpy(c->c_remote_uuid, uuid, sizeof(c->c_remote_uuid));
+ c->c_epoch++;
+ }
+
+ return c;
+}
+
+void ptlrpc_readdress_connection(struct ptlrpc_connection *conn, char *uuid)
+{
+ struct lustre_peer peer;
+ int err;
+
+ err = kportal_uuid_to_peer(uuid, &peer);
+ if (err != 0) {
CERROR("cannot find peer %s!\n", uuid);
+ return;
+ }
+
+ memcpy(&conn->c_peer, &peer, sizeof(peer));
+ return;
+}
+
+struct ptlrpc_bulk_desc *ptlrpc_prep_bulk(struct ptlrpc_connection *conn)
+{
+ struct ptlrpc_bulk_desc *desc;
+
+ OBD_ALLOC(desc, sizeof(*desc));
+ if (desc != NULL) {
+ desc->b_connection = ptlrpc_connection_addref(conn);
+ atomic_set(&desc->b_refcount, 1);
+ init_waitqueue_head(&desc->b_waitq);
+ INIT_LIST_HEAD(&desc->b_page_list);
+ ptl_set_inv_handle(&desc->b_md_h);
+ ptl_set_inv_handle(&desc->b_me_h);
+ }
- return err;
+ return desc;
}
-struct ptlrpc_bulk_desc *ptlrpc_prep_bulk(struct lustre_peer *peer)
+struct ptlrpc_bulk_page *ptlrpc_prep_bulk_page(struct ptlrpc_bulk_desc *desc)
{
- struct ptlrpc_bulk_desc *bulk;
+ struct ptlrpc_bulk_page *bulk;
OBD_ALLOC(bulk, sizeof(*bulk));
if (bulk != NULL) {
- memcpy(&bulk->b_peer, peer, sizeof(*peer));
- init_waitqueue_head(&bulk->b_waitq);
+ bulk->b_desc = desc;
+ list_add_tail(&bulk->b_link, &desc->b_page_list);
+ desc->b_page_count++;
}
-
return bulk;
}
+void ptlrpc_free_bulk(struct ptlrpc_bulk_desc *desc)
+{
+ struct list_head *tmp, *next;
+ ENTRY;
+ if (desc == NULL) {
+ EXIT;
+ return;
+ }
+
+ list_for_each_safe(tmp, next, &desc->b_page_list) {
+ struct ptlrpc_bulk_page *bulk;
+ bulk = list_entry(tmp, struct ptlrpc_bulk_page, b_link);
+ ptlrpc_free_bulk_page(bulk);
+ }
+
+ ptlrpc_put_connection(desc->b_connection);
+
+ OBD_FREE(desc, sizeof(*desc));
+ EXIT;
+}
+
+void ptlrpc_free_bulk_page(struct ptlrpc_bulk_page *bulk)
+{
+ ENTRY;
+ if (bulk == NULL) {
+ EXIT;
+ return;
+ }
+
+ list_del(&bulk->b_link);
+ bulk->b_desc->b_page_count--;
+ OBD_FREE(bulk, sizeof(*bulk));
+ EXIT;
+}
+
struct ptlrpc_request *ptlrpc_prep_req(struct ptlrpc_client *cl,
- struct lustre_peer *peer, int opcode,
- int count, int *lengths, char **bufs)
+ struct ptlrpc_connection *conn,
+ int opcode, int count, int *lengths,
+ char **bufs)
{
struct ptlrpc_request *request;
int rc;
RETURN(NULL);
}
- spin_lock(&cl->cli_lock);
- request->rq_xid = cl->cli_xid++;
- spin_unlock(&cl->cli_lock);
-
rc = lustre_pack_msg(count, lengths, bufs,
- &request->rq_reqlen, &request->rq_reqbuf);
+ &request->rq_reqlen, &request->rq_reqmsg);
if (rc) {
CERROR("cannot pack request %d\n", rc);
+ OBD_FREE(request, sizeof(*request));
RETURN(NULL);
}
- request->rq_type = PTL_RPC_REQUEST;
- memcpy(&request->rq_peer, peer, sizeof(*peer));
- request->rq_reqmsg = (struct lustre_msg *)request->rq_reqbuf;
+
+ request->rq_level = LUSTRE_CONN_FULL;
+ request->rq_type = PTL_RPC_TYPE_REQUEST;
+ request->rq_client = cl;
+ request->rq_connection = ptlrpc_connection_addref(conn);
+
+ INIT_LIST_HEAD(&request->rq_list);
+ INIT_LIST_HEAD(&request->rq_multi);
+ /* this will be dec()d once in req_finished, once in free_committed */
+ atomic_set(&request->rq_refcount, 2);
+
+ spin_lock(&conn->c_lock);
+ request->rq_xid = HTON__u32(++conn->c_xid_out);
+ spin_unlock(&conn->c_lock);
+
+ request->rq_reqmsg->magic = PTLRPC_MSG_MAGIC;
+ request->rq_reqmsg->version = PTLRPC_MSG_VERSION;
request->rq_reqmsg->opc = HTON__u32(opcode);
- request->rq_reqmsg->xid = HTON__u32(request->rq_xid);
- request->rq_reqmsg->type = HTON__u32(request->rq_type);
+ request->rq_reqmsg->type = HTON__u32(PTL_RPC_MSG_REQUEST);
RETURN(request);
}
+struct ptlrpc_request *ptlrpc_prep_req2(struct lustre_handle *conn,
+ int opcode, int count, int *lengths,
+ char **bufs)
+{
+ struct client_obd *clobd;
+ struct ptlrpc_request *req;
+ struct obd_export *export;
+
+ export = class_conn2export(conn);
+ if (!export) {
+ LBUG();
+ CERROR("NOT connected\n");
+ return NULL;
+ }
-void ptlrpc_free_req(struct ptlrpc_request *request)
+ clobd = &export->exp_obd->u.cli;
+ req = ptlrpc_prep_req(clobd->cl_client, clobd->cl_conn,
+ opcode, count, lengths, bufs);
+ ptlrpc_hdl2req(req, &clobd->cl_exporth);
+ return req;
+}
+
+void ptlrpc_req_finished(struct ptlrpc_request *request)
{
if (request == NULL)
return;
- if (request->rq_repbuf != NULL)
- OBD_FREE(request->rq_repbuf, request->rq_replen);
+ if (request->rq_repmsg != NULL) {
+ OBD_FREE(request->rq_repmsg, request->rq_replen);
+ request->rq_repmsg = NULL;
+ request->rq_reply_md.start = NULL;
+ }
+
+ if (atomic_dec_and_test(&request->rq_refcount))
+ ptlrpc_free_req(request);
+}
+
+void ptlrpc_free_req(struct ptlrpc_request *request)
+{
+ ENTRY;
+ if (request == NULL) {
+ EXIT;
+ return;
+ }
+
+ if (request->rq_repmsg != NULL)
+ OBD_FREE(request->rq_repmsg, request->rq_replen);
+ if (request->rq_reqmsg != NULL)
+ OBD_FREE(request->rq_reqmsg, request->rq_reqlen);
+
+ if (request->rq_client) {
+ spin_lock(&request->rq_client->cli_lock);
+ list_del_init(&request->rq_list);
+ spin_unlock(&request->rq_client->cli_lock);
+ }
+
+ ptlrpc_put_connection(request->rq_connection);
+ list_del(&request->rq_multi);
OBD_FREE(request, sizeof(*request));
+ EXIT;
}
static int ptlrpc_check_reply(struct ptlrpc_request *req)
{
int rc = 0;
- if (req->rq_repbuf != NULL) {
- req->rq_flags = PTL_RPC_REPLY;
+ if (req->rq_repmsg != NULL) {
+ req->rq_transno = NTOH__u64(req->rq_repmsg->transno);
+ req->rq_flags |= PTL_RPC_FL_REPLIED;
+ GOTO(out, rc = 1);
+ }
+
+ if (req->rq_flags & PTL_RPC_FL_RESEND) {
+ if (l_killable_pending(current)) {
+ CERROR("-- INTR --\n");
+ req->rq_flags |= PTL_RPC_FL_INTR;
+ GOTO(out, rc = 1);
+ }
+ CERROR("-- RESEND --\n");
GOTO(out, rc = 1);
}
- if (sigismember(&(current->pending.signal), SIGKILL) ||
- sigismember(&(current->pending.signal), SIGTERM) ||
- sigismember(&(current->pending.signal), SIGINT)) {
- req->rq_flags = PTL_RPC_INTR;
+ if (req->rq_flags & PTL_RPC_FL_RECOVERY) {
+ CERROR("-- RESTART --\n");
GOTO(out, rc = 1);
}
+ if (req->rq_flags & PTL_RPC_FL_TIMEOUT && l_killable_pending(current)) {
+ req->rq_flags |= PTL_RPC_FL_INTR;
+ GOTO(out, rc = 1);
+ }
+
+ if (req->rq_timeout &&
+ (CURRENT_TIME - req->rq_time >= req->rq_timeout)) {
+ CERROR("-- REQ TIMEOUT ON CONNID %d XID %Ld --\n",
+ req->rq_connid, (unsigned long long)req->rq_xid);
+ /* clear the timeout */
+ req->rq_timeout = 0;
+ req->rq_connection->c_level = LUSTRE_CONN_RECOVD;
+ req->rq_flags |= PTL_RPC_FL_TIMEOUT;
+ if (req->rq_client && req->rq_client->cli_recovd)
+ recovd_cli_fail(req->rq_client);
+ if (req->rq_level < LUSTRE_CONN_FULL) {
+ rc = 1;
+ } else if (l_killable_pending(current)) {
+ req->rq_flags |= PTL_RPC_FL_INTR;
+ rc = 1;
+ } else {
+ rc = 0;
+ }
+ GOTO(out, rc);
+ }
+
out:
+ CDEBUG(D_NET, "req = %p, rc = %d\n", req, rc);
return rc;
}
RETURN(-ENOMEM);
}
+ if (req->rq_repmsg->type == NTOH__u32(PTL_RPC_MSG_ERR)) {
+ CERROR("req->rq_repmsg->type == PTL_RPC_MSG_ERR\n");
+ RETURN(-EINVAL);
+ }
+
if (req->rq_repmsg->status != 0) {
- CERROR("req->rq_repmsg->status is %d\n",
- req->rq_repmsg->status);
+ if (req->rq_repmsg->status < 0)
+ CERROR("req->rq_repmsg->status is %d\n",
+ req->rq_repmsg->status);
+ else
+ CDEBUG(D_INFO, "req->rq_repmsg->status is %d\n",
+ req->rq_repmsg->status);
/* XXX: translate this error from net to host */
RETURN(req->rq_repmsg->status);
}
static void ptlrpc_cleanup_request_buf(struct ptlrpc_request *request)
{
- OBD_FREE(request->rq_reqbuf, request->rq_reqlen);
- request->rq_reqbuf = NULL;
+ OBD_FREE(request->rq_reqmsg, request->rq_reqlen);
+ request->rq_reqmsg = NULL;
request->rq_reqlen = 0;
}
* that we can tear down the buffer safely. */
PtlMEUnlink(request->rq_reply_me_h);
OBD_FREE(request->rq_reply_md.start, request->rq_replen);
- request->rq_repbuf = NULL;
+ request->rq_repmsg = NULL;
request->rq_replen = 0;
return 0;
}
-int ptlrpc_queue_wait(struct ptlrpc_client *cl, struct ptlrpc_request *req)
+/* caller must lock cli */
+void ptlrpc_free_committed(struct ptlrpc_client *cli)
{
- int rc = 0;
+ struct list_head *tmp, *saved;
+ struct ptlrpc_request *req;
+
+ list_for_each_safe(tmp, saved, &cli->cli_sending_head) {
+ req = list_entry(tmp, struct ptlrpc_request, rq_list);
+
+ if ( (req->rq_flags & PTL_RPC_FL_REPLAY) ) {
+ CDEBUG(D_INFO, "Retaining request %Ld for replay\n",
+ req->rq_xid);
+ continue;
+ }
+
+ /* not yet committed */
+ if (req->rq_transno > cli->cli_last_committed)
+ break;
+
+ CDEBUG(D_INFO, "Marking request %Ld as committed ("
+ "transno=%Lu, last_committed=%Lu\n",
+ req->rq_xid, req->rq_transno,
+ cli->cli_last_committed);
+ if (atomic_dec_and_test(&req->rq_refcount)) {
+ /* we do this to prevent free_req deadlock */
+ list_del_init(&req->rq_list);
+ req->rq_client = NULL;
+ ptlrpc_free_req(req);
+ } else {
+ list_del_init(&req->rq_list);
+ list_add(&req->rq_list, &cli->cli_dying_head);
+ }
+ }
+
+ EXIT;
+ return;
+}
+
+void ptlrpc_cleanup_client(struct ptlrpc_client *cli)
+{
+ struct list_head *tmp, *saved;
+ struct ptlrpc_request *req;
ENTRY;
- init_waitqueue_head(&req->rq_wait_for_rep);
+ spin_lock(&cli->cli_lock);
+ list_for_each_safe(tmp, saved, &cli->cli_sending_head) {
+ req = list_entry(tmp, struct ptlrpc_request, rq_list);
+ CDEBUG(D_INFO, "Cleaning req %p from sending list.\n", req);
+ list_del_init(&req->rq_list);
+ req->rq_client = NULL;
+ ptlrpc_free_req(req);
+ }
+ list_for_each_safe(tmp, saved, &cli->cli_dying_head) {
+ req = list_entry(tmp, struct ptlrpc_request, rq_list);
+ CERROR("Request %p is on the dying list at cleanup!\n", req);
+ list_del_init(&req->rq_list);
+ req->rq_client = NULL;
+ ptlrpc_free_req(req);
+ }
+ spin_unlock(&cli->cli_lock);
+
+ EXIT;
+ return;
+}
+
+void ptlrpc_continue_req(struct ptlrpc_request *req)
+{
+ ENTRY;
+ CDEBUG(D_INODE, "continue delayed request %Ld opc %d\n",
+ req->rq_xid, req->rq_reqmsg->opc);
+ wake_up(&req->rq_wait_for_rep);
+ EXIT;
+}
+
+void ptlrpc_resend_req(struct ptlrpc_request *req)
+{
+ ENTRY;
+ CDEBUG(D_INODE, "resend request %Ld, opc %d\n",
+ req->rq_xid, req->rq_reqmsg->opc);
+ req->rq_status = -EAGAIN;
+ req->rq_level = LUSTRE_CONN_RECOVD;
+ req->rq_flags |= PTL_RPC_FL_RESEND;
+ req->rq_flags &= ~PTL_RPC_FL_TIMEOUT;
+ wake_up(&req->rq_wait_for_rep);
+ EXIT;
+}
- req->rq_client = cl;
- req->rq_req_portal = cl->cli_request_portal;
- req->rq_reply_portal = cl->cli_reply_portal;
- rc = ptl_send_rpc(req, cl);
+void ptlrpc_restart_req(struct ptlrpc_request *req)
+{
+ ENTRY;
+ CDEBUG(D_INODE, "restart completed request %Ld, opc %d\n",
+ req->rq_xid, req->rq_reqmsg->opc);
+ req->rq_status = -ERESTARTSYS;
+ req->rq_flags |= PTL_RPC_FL_RECOVERY;
+ req->rq_flags &= ~PTL_RPC_FL_TIMEOUT;
+ wake_up(&req->rq_wait_for_rep);
+ EXIT;
+}
+
+int ptlrpc_queue_wait(struct ptlrpc_request *req)
+{
+ int rc = 0, timeout;
+ struct ptlrpc_client *cli = req->rq_client;
+ ENTRY;
+
+ init_waitqueue_head(&req->rq_wait_for_rep);
+ CDEBUG(D_NET, "subsys: %s req %Ld opc %d level %d, conn level %d\n",
+ cli->cli_name, req->rq_xid, req->rq_reqmsg->opc, req->rq_level,
+ req->rq_connection->c_level);
+
+ /* XXX probably both an import and connection level are needed */
+ if (req->rq_level > req->rq_connection->c_level) {
+ CERROR("process %d waiting for recovery (%d > %d)\n",
+ current->pid, req->rq_level, req->rq_connection->c_level);
+ spin_lock(&cli->cli_lock);
+ list_del_init(&req->rq_list);
+ list_add(&req->rq_list, cli->cli_delayed_head.prev);
+ spin_unlock(&cli->cli_lock);
+ l_wait_event_killable
+ (req->rq_wait_for_rep,
+ req->rq_level <= req->rq_connection->c_level);
+ spin_lock(&cli->cli_lock);
+ list_del_init(&req->rq_list);
+ spin_unlock(&cli->cli_lock);
+ CERROR("process %d resumed\n", current->pid);
+ }
+ resend:
+ req->rq_time = CURRENT_TIME;
+ req->rq_timeout = 100;
+ rc = ptl_send_rpc(req);
if (rc) {
CERROR("error %d, opcode %d\n", rc, req->rq_reqmsg->opc);
+ if ( rc > 0 )
+ rc = -rc;
ptlrpc_cleanup_request_buf(req);
- up(&cl->cli_rpc_sem);
+ up(&cli->cli_rpc_sem);
RETURN(-rc);
}
+ spin_lock(&cli->cli_lock);
+ list_del_init(&req->rq_list);
+ list_add_tail(&req->rq_list, &cli->cli_sending_head);
+ spin_unlock(&cli->cli_lock);
+
CDEBUG(D_OTHER, "-- sleeping\n");
- wait_event_interruptible(req->rq_wait_for_rep, ptlrpc_check_reply(req));
+ /*
+ * req->rq_timeout gets reset in the timeout case, and
+ * l_wait_event_timeout is a macro, so save the timeout value here.
+ */
+ timeout = req->rq_timeout * HZ;
+ l_wait_event_timeout(req->rq_wait_for_rep, ptlrpc_check_reply(req),
+ timeout);
CDEBUG(D_OTHER, "-- done\n");
- ptlrpc_cleanup_request_buf(req);
- up(&cl->cli_rpc_sem);
- if (req->rq_flags == PTL_RPC_INTR) {
+
+ if (req->rq_flags & PTL_RPC_FL_RESEND) {
+ req->rq_flags &= ~PTL_RPC_FL_RESEND;
+ goto resend;
+ }
+
+ up(&cli->cli_rpc_sem);
+ if (req->rq_flags & PTL_RPC_FL_INTR) {
/* Clean up the dangling reply buffers */
ptlrpc_abort(req);
GOTO(out, rc = -EINTR);
}
- if (req->rq_flags != PTL_RPC_REPLY) {
+ if (req->rq_flags & PTL_RPC_FL_TIMEOUT)
+ GOTO(out, rc = -ETIMEDOUT);
+
+ if (!(req->rq_flags & PTL_RPC_FL_REPLIED))
+ GOTO(out, rc = req->rq_status);
+
+ rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen);
+ if (rc) {
+ CERROR("unpack_rep failed: %d\n", rc);
+ GOTO(out, rc);
+ }
+ CDEBUG(D_NET, "got rep %Ld\n", req->rq_xid);
+ if (req->rq_repmsg->status == 0)
+ CDEBUG(D_NET, "--> buf %p len %d status %d\n", req->rq_repmsg,
+ req->rq_replen, req->rq_repmsg->status);
+
+ spin_lock(&cli->cli_lock);
+ cli->cli_last_rcvd = req->rq_repmsg->last_rcvd;
+ cli->cli_last_committed = req->rq_repmsg->last_committed;
+ ptlrpc_free_committed(cli);
+ spin_unlock(&cli->cli_lock);
+
+ EXIT;
+ out:
+ return rc;
+}
+
+int ptlrpc_replay_req(struct ptlrpc_request *req)
+{
+ int rc = 0;
+ struct ptlrpc_client *cli = req->rq_client;
+ ENTRY;
+
+ init_waitqueue_head(&req->rq_wait_for_rep);
+ CDEBUG(D_NET, "req %Ld opc %d level %d, conn level %d\n",
+ req->rq_xid, req->rq_reqmsg->opc, req->rq_level,
+ req->rq_connection->c_level);
+
+ req->rq_time = CURRENT_TIME;
+ req->rq_timeout = 100;
+ rc = ptl_send_rpc(req);
+ if (rc) {
+ CERROR("error %d, opcode %d\n", rc, req->rq_reqmsg->opc);
+ ptlrpc_cleanup_request_buf(req);
+ up(&cli->cli_rpc_sem);
+ RETURN(-rc);
+ }
+
+ CDEBUG(D_OTHER, "-- sleeping\n");
+ l_wait_event_killable(req->rq_wait_for_rep, ptlrpc_check_reply(req));
+ CDEBUG(D_OTHER, "-- done\n");
+
+ up(&cli->cli_rpc_sem);
+
+ if (!(req->rq_flags & PTL_RPC_FL_REPLIED)) {
CERROR("Unknown reason for wakeup\n");
/* XXX Phil - I end up here when I kill obdctl */
ptlrpc_abort(req);
GOTO(out, rc = -EINTR);
}
- rc = lustre_unpack_msg(req->rq_repbuf, req->rq_replen);
- req->rq_repmsg = (struct lustre_msg *)req->rq_repbuf;
+ rc = lustre_unpack_msg(req->rq_repmsg, req->rq_replen);
if (rc) {
CERROR("unpack_rep failed: %d\n", rc);
GOTO(out, rc);
}
- CDEBUG(D_NET, "got rep %d\n", req->rq_repmsg->xid);
+ CDEBUG(D_NET, "got rep %Ld\n", req->rq_xid);
if (req->rq_repmsg->status == 0)
- CDEBUG(D_NET, "--> buf %p len %d status %d\n", req->rq_repbuf,
+ CDEBUG(D_NET, "--> buf %p len %d status %d\n", req->rq_repmsg,
req->rq_replen, req->rq_repmsg->status);
+ else {
+ CERROR("recovery failed: ");
+ CERROR("req %Ld opc %d level %d, conn level %d\n",
+ req->rq_xid, req->rq_reqmsg->opc, req->rq_level,
+ req->rq_connection->c_level);
+ LBUG();
+ }
- EXIT;
out:
- return rc;
+ RETURN(rc);
}