X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fptlrpc%2Frpc.c;h=ceefc3350c389396a7ac12dbbfb46120c4174d17;hb=15301bb6d49a4beedff490d9442c7f0d0e3db489;hp=f5292076fd2dc1cd29257ade82233efe1772837e;hpb=5bb3ae0f4161ba10a043a526a456889f6e44f39b;p=fs%2Flustre-release.git diff --git a/lustre/ptlrpc/rpc.c b/lustre/ptlrpc/rpc.c index f529207..ceefc33 100644 --- a/lustre/ptlrpc/rpc.c +++ b/lustre/ptlrpc/rpc.c @@ -21,224 +21,218 @@ */ #define EXPORT_SYMTAB +#define DEBUG_SUBSYSTEM S_RPC -#include #include -#include - #include -#include +#include +#include +#include +#include -static ptl_handle_eq_t req_eq; +extern int ptlrpc_init_portals(void); +extern void ptlrpc_exit_portals(void); -static int request_callback(ptl_event_t *ev, void *data) +int connmgr_setup(struct obd_device *obddev, obd_count len, void *buf) { - struct ptlrpc_request *rpc = ev->mem_desc.user_ptr; - + struct recovd_obd *recovd = &obddev->u.recovd; + int err; ENTRY; - if (ev->type == PTL_EVENT_SENT) { - kfree(ev->mem_desc.start); - } else if (ev->type == PTL_EVENT_PUT) { - rpc->rq_repbuf = ev->mem_desc.start + ev->offset; - wake_up_interruptible(&rpc->rq_wait_for_rep); - } - - EXIT; - return 1; -} - -static int incoming_callback(ptl_event_t *ev, void *data) -{ - struct ptlrpc_service *service = data; + MOD_INC_USE_COUNT; + memset(recovd, 0, sizeof(*recovd)); - ENTRY; - - if (ev->type == PTL_EVENT_PUT) { - wake_up(service->srv_wait_queue); - } else { - printk("Unexpected event type: %d\n", ev->type); + err = recovd_setup(recovd); + if (err) { + MOD_DEC_USE_COUNT; + RETURN(err); } - EXIT; - return 0; + RETURN(0); } -int ptl_send_buf(struct ptlrpc_request *request, struct lustre_peer *peer, - int portal, int is_request) +int connmgr_cleanup(struct obd_device *dev) { - int rc; - ptl_process_id_t remote_id; - ptl_handle_md_t md_h; - - if (is_request) { - request->rq_req_md.start = request->rq_reqbuf; - request->rq_req_md.length = request->rq_reqlen; - } else { - request->rq_req_md.start = request->rq_repbuf; - request->rq_req_md.length = request->rq_replen; - } - request->rq_req_md.threshold = PTL_MD_THRESH_INF; - request->rq_req_md.options = PTL_MD_OP_PUT; - request->rq_req_md.user_ptr = request; - request->rq_req_md.eventq = req_eq; - - rc = PtlMDBind(peer->peer_ni, request->rq_req_md, &md_h); - if (rc != 0) { - printk(__FUNCTION__ ": PtlMDBind failed: %d\n", rc); - return rc; - } + struct recovd_obd *recovd = &dev->u.recovd; + int err; - remote_id.addr_kind = PTL_ADDR_NID; - remote_id.nid = peer->peer_nid; - remote_id.pid = 0; + err = recovd_cleanup(recovd); + if (err) + LBUG(); - rc = PtlPut(md_h, PTL_NOACK_REQ, remote_id, portal, 0, request->rq_xid, - 0, 0); - if (rc != PTL_OK) { - printk(__FUNCTION__ ": PtlPut failed: %d\n", rc); - /* FIXME: tear down md */ - } - - return rc; + MOD_DEC_USE_COUNT; + RETURN(0); } -int ptl_send_rpc(struct ptlrpc_request *request, struct lustre_peer *peer) +int connmgr_iocontrol(long cmd, struct lustre_handle *hdl, int len, void *karg, + void *uarg) { - ptl_handle_md_t reply_md_h; - ptl_handle_me_t me_h; - ptl_process_id_t local_id; - int rc; + struct ptlrpc_connection *conn = NULL; + struct obd_device *obd = class_conn2obd(hdl); + struct recovd_obd *recovd = &obd->u.recovd; + struct obd_ioctl_data *data = karg; + struct list_head *tmp; + int rc = 0; ENTRY; - request->rq_repbuf = kmalloc(request->rq_replen, GFP_KERNEL); - if (!request->rq_repbuf) { - EXIT; - return -ENOMEM; - } + if (cmd != OBD_IOC_RECOVD_NEWCONN && cmd != OBD_IOC_RECOVD_FAILCONN) + RETURN(-EINVAL); /* XXX ENOSYS? */ + + /* Find the connection that's been rebuilt or has failed. */ + spin_lock(&recovd->recovd_lock); + list_for_each(tmp, &recovd->recovd_troubled_items) { + conn = list_entry(tmp, struct ptlrpc_connection, + c_recovd_data.rd_managed_chain); - local_id.addr_kind = PTL_ADDR_GID; - local_id.gid = PTL_ID_ANY; - local_id.rid = PTL_ID_ANY; + LASSERT(conn->c_recovd_data.rd_recovd == recovd); /* sanity */ - rc = PtlMEAttach(peer->peer_ni, request->rq_reply_portal, local_id, - request->rq_xid, 0, PTL_RETAIN, &me_h); - if (rc != PTL_OK) { - EXIT; - /* FIXME: tear down EQ, free reqbuf */ - return rc; + if (!strcmp(conn->c_remote_uuid, data->ioc_inlbuf1)) + break; + conn = NULL; } - request->rq_reply_md.start = request->rq_repbuf; - request->rq_reply_md.length = request->rq_replen; - request->rq_reply_md.threshold = PTL_MD_THRESH_INF; - request->rq_reply_md.options = PTL_MD_OP_PUT; - request->rq_reply_md.user_ptr = request; - request->rq_reply_md.eventq = req_eq; - - rc = PtlMDAttach(me_h, request->rq_reply_md, PTL_RETAIN, &reply_md_h); - if (rc != PTL_OK) { - EXIT; - return rc; + if (!conn) { + if (cmd == OBD_IOC_RECOVD_NEWCONN) + GOTO(out, rc = -EINVAL); + /* XXX macroize/inline and share with loop above */ + list_for_each(tmp, &recovd->recovd_managed_items) { + conn = list_entry(tmp, struct ptlrpc_connection, + c_recovd_data.rd_managed_chain); + + LASSERT(conn->c_recovd_data.rd_recovd == recovd); + + if (!strcmp(conn->c_remote_uuid, data->ioc_inlbuf1)) + break; + conn = NULL; + } + if (!conn) + GOTO(out, rc = -EINVAL); } - return ptl_send_buf(request, peer, request->rq_req_portal, 1); -} + if (cmd == OBD_IOC_RECOVD_FAILCONN) { + spin_unlock(&recovd->recovd_lock); + recovd_conn_fail(conn); + spin_lock(&recovd->recovd_lock); -int rpc_register_service(struct ptlrpc_service *service, char *uuid) -{ - struct lustre_peer peer; - int rc; - - rc = kportal_uuid_to_peer(uuid, &peer); - if (rc != 0) { - printk("Invalid uuid \"%s\"\n", uuid); - return -EINVAL; + /* Jump straight to the "failed" phase of recovery. */ + conn->c_recovd_data.rd_phase = RD_FAILED; + goto out; } - service->srv_buf = kmalloc(service->srv_buf_size, GFP_KERNEL); - if (service->srv_buf == NULL) { - printk(__FUNCTION__ ": no memory\n"); - return -ENOMEM; - } + /* else (NEWCONN) */ + if (conn->c_recovd_data.rd_phase != RD_PREPARING) + GOTO(out, rc = -EALREADY); - service->srv_id.addr_kind = PTL_ADDR_GID; - service->srv_id.gid = PTL_ID_ANY; - service->srv_id.rid = PTL_ID_ANY; - - rc = PtlMEAttach(peer.peer_ni, service->srv_portal, service->srv_id, - 0, ~0, PTL_RETAIN, &service->srv_me); - if (rc != PTL_OK) { - printk("PtlMEAttach failed: %d\n", rc); - return rc; - } - - rc = PtlEQAlloc(peer.peer_ni, 128, incoming_callback, service, - &service->srv_eq); - if (rc != PTL_OK) { - printk("PtlEQAlloc failed: %d\n", rc); - return rc; - } - - /* FIXME: Build an auto-unlinking MD and build a ring. */ - /* FIXME: Make sure that these are reachable by DMA on well-known - * addresses. */ - service->srv_md.start = service->srv_buf; - service->srv_md.length = service->srv_buf_size; - service->srv_md.threshold = PTL_MD_THRESH_INF; - service->srv_md.options = PTL_MD_OP_PUT; - service->srv_md.user_ptr = service; - service->srv_md.eventq = service->srv_eq; - - rc = PtlMDAttach(service->srv_me, service->srv_md, - PTL_RETAIN, &service->srv_md_h); - if (rc != PTL_OK) { - printk("PtlMDAttach failed: %d\n", rc); - /* FIXME: wow, we need to clean up. */ - return rc; + spin_lock(&conn->c_lock); + if (data->ioc_inllen2) { + CERROR("conn %p UUID change %s -> %s\n", + conn, conn->c_remote_uuid, data->ioc_inlbuf2); + strcpy(conn->c_remote_uuid, data->ioc_inlbuf2); + } else { + CERROR("conn %p UUID %s reconnected\n", conn, + conn->c_remote_uuid); } - - return 0; + ptlrpc_readdress_connection(conn, conn->c_remote_uuid); + spin_unlock(&conn->c_lock); + + conn->c_recovd_data.rd_phase = RD_PREPARED; + wake_up(&recovd->recovd_waitq); + out: + spin_unlock(&recovd->recovd_lock); + RETURN(rc); } -static int req_init_portals(void) -{ - int rc; - const ptl_handle_ni_t *nip; - ptl_handle_ni_t ni; - - nip = inter_module_get_request(LUSTRE_NAL "_ni", LUSTRE_NAL); - if (nip == NULL) { - printk("get_ni failed: is the NAL module loaded?\n"); - return -EIO; - } - ni = *nip; - rc = PtlEQAlloc(ni, 128, request_callback, NULL, &req_eq); - if (rc != PTL_OK) - printk("PtlEQAlloc failed: %d\n", rc); +/* use obd ops to offer management infrastructure */ +static struct obd_ops recovd_obd_ops = { + o_setup: connmgr_setup, + o_cleanup: connmgr_cleanup, + o_iocontrol: connmgr_iocontrol, + o_connect: class_connect, + o_disconnect: class_disconnect +}; - return rc; -} - -static int __init req_init(void) +static int __init ptlrpc_init(void) { - return req_init_portals(); + int rc; + rc = ptlrpc_init_portals(); + if (rc) + RETURN(rc); + ptlrpc_init_connection(); + class_register_type(&recovd_obd_ops, LUSTRE_HA_NAME); + return 0; } -static void __exit req_exit(void) +static void __exit ptlrpc_exit(void) { - PtlEQFree(req_eq); - - inter_module_put(LUSTRE_NAL "_ni"); - - return; + class_unregister_type(LUSTRE_HA_NAME); + ptlrpc_exit_portals(); + ptlrpc_cleanup_connection(); } -MODULE_AUTHOR("Peter J. Braam "); +/* recovd.c */ +EXPORT_SYMBOL(ptlrpc_recovd); +EXPORT_SYMBOL(recovd_conn_fail); +EXPORT_SYMBOL(recovd_conn_manage); +EXPORT_SYMBOL(recovd_conn_fixed); +EXPORT_SYMBOL(recovd_setup); +EXPORT_SYMBOL(recovd_cleanup); + +/* connection.c */ +EXPORT_SYMBOL(ptlrpc_readdress_connection); +EXPORT_SYMBOL(ptlrpc_get_connection); +EXPORT_SYMBOL(ptlrpc_put_connection); +EXPORT_SYMBOL(ptlrpc_connection_addref); +EXPORT_SYMBOL(ptlrpc_init_connection); +EXPORT_SYMBOL(ptlrpc_cleanup_connection); + +/* niobuf.c */ +EXPORT_SYMBOL(ptlrpc_send_bulk); +EXPORT_SYMBOL(ptlrpc_register_bulk); +EXPORT_SYMBOL(ptlrpc_abort_bulk); +EXPORT_SYMBOL(ptlrpc_reply); +EXPORT_SYMBOL(ptlrpc_error); +EXPORT_SYMBOL(ptlrpc_resend_req); +EXPORT_SYMBOL(ptl_send_rpc); +EXPORT_SYMBOL(ptlrpc_link_svc_me); + +/* client.c */ +EXPORT_SYMBOL(ptlrpc_init_client); +EXPORT_SYMBOL(ptlrpc_cleanup_client); +EXPORT_SYMBOL(ptlrpc_req_to_uuid); +EXPORT_SYMBOL(ptlrpc_uuid_to_connection); +EXPORT_SYMBOL(ptlrpc_queue_wait); +EXPORT_SYMBOL(ptlrpc_continue_req); +EXPORT_SYMBOL(ptlrpc_replay_req); +EXPORT_SYMBOL(ptlrpc_restart_req); +EXPORT_SYMBOL(ptlrpc_prep_req); +EXPORT_SYMBOL(ptlrpc_free_req); +EXPORT_SYMBOL(ptlrpc_req_finished); +EXPORT_SYMBOL(ptlrpc_prep_bulk); +EXPORT_SYMBOL(ptlrpc_free_bulk); +EXPORT_SYMBOL(ptlrpc_prep_bulk_page); +EXPORT_SYMBOL(ptlrpc_free_bulk_page); +EXPORT_SYMBOL(ptlrpc_check_status); + +/* service.c */ +EXPORT_SYMBOL(ptlrpc_init_svc); +EXPORT_SYMBOL(ptlrpc_stop_all_threads); +EXPORT_SYMBOL(ptlrpc_start_thread); +EXPORT_SYMBOL(ptlrpc_unregister_service); + +/* pack_generic.c */ +EXPORT_SYMBOL(lustre_pack_msg); +EXPORT_SYMBOL(lustre_msg_size); +EXPORT_SYMBOL(lustre_unpack_msg); +EXPORT_SYMBOL(lustre_msg_buf); + +EXPORT_SYMBOL(ll_recover); + + +MODULE_AUTHOR("Cluster File Systems, Inc "); MODULE_DESCRIPTION("Lustre Request Processor v1.0"); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL"); -module_init(req_init); -module_exit(req_exit); +module_init(ptlrpc_init); +module_exit(ptlrpc_exit);