X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lnet%2Fklnds%2Fo2iblnd%2Fo2iblnd.c;h=2d4ab79d6c3f871b2c3eecf4a7b3c5a9d4dde162;hp=85d37b7ba410eb2aa1d40a507182282cd9f1be13;hb=a7a889f77cec3ad44543fd0b33669521e612097d;hpb=86e192059905ac49406f7aad0fd552bd2038047f;ds=sidebyside diff --git a/lnet/klnds/o2iblnd/o2iblnd.c b/lnet/klnds/o2iblnd/o2iblnd.c index 85d37b7..2d4ab79 100644 --- a/lnet/klnds/o2iblnd/o2iblnd.c +++ b/lnet/klnds/o2iblnd/o2iblnd.c @@ -27,7 +27,6 @@ */ /* * This file is part of Lustre, http://www.lustre.org/ - * Lustre is a trademark of Sun Microsystems, Inc. * * lnet/klnds/o2iblnd/o2iblnd.c * @@ -526,7 +525,6 @@ kiblnd_get_conn_by_idx(struct lnet_ni *ni, int index) { struct kib_peer_ni *peer_ni; struct kib_conn *conn; - struct list_head *ctmp; int i; unsigned long flags; @@ -538,11 +536,11 @@ kiblnd_get_conn_by_idx(struct lnet_ni *ni, int index) if (peer_ni->ibp_ni != ni) continue; - list_for_each(ctmp, &peer_ni->ibp_conns) { + list_for_each_entry(conn, &peer_ni->ibp_conns, + ibc_list) { if (index-- > 0) continue; - conn = list_entry(ctmp, struct kib_conn, ibc_list); kiblnd_conn_addref(conn); read_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); @@ -1630,16 +1628,16 @@ out_fpo: static void kiblnd_fail_fmr_poolset(struct kib_fmr_poolset *fps, struct list_head *zombies) { + struct kib_fmr_pool *fpo; + if (fps->fps_net == NULL) /* intialized? */ return; spin_lock(&fps->fps_lock); - while (!list_empty(&fps->fps_pool_list)) { - struct kib_fmr_pool *fpo = list_entry(fps->fps_pool_list.next, - struct kib_fmr_pool, - fpo_list); - + while ((fpo = list_first_entry_or_null(&fps->fps_pool_list, + struct kib_fmr_pool, + fpo_list)) != NULL) { fpo->fpo_failed = 1; if (fpo->fpo_map_count == 0) list_move(&fpo->fpo_list, zombies); @@ -1753,10 +1751,11 @@ kiblnd_fmr_pool_unmap(struct kib_fmr *fmr, int status) if (frd) { frd->frd_valid = false; + frd->frd_posted = false; + fmr->fmr_frd = NULL; spin_lock(&fps->fps_lock); list_add_tail(&frd->frd_list, &fpo->fast_reg.fpo_pool_list); spin_unlock(&fps->fps_lock); - fmr->fmr_frd = NULL; } } fmr->fmr_pool = NULL; @@ -1923,6 +1922,7 @@ again: fmr->fmr_key = is_rx ? mr->rkey : mr->lkey; fmr->fmr_frd = frd; fmr->fmr_pool = fpo; + frd->frd_posted = false; return 0; } spin_unlock(&fps->fps_lock); @@ -2003,8 +2003,9 @@ kiblnd_destroy_pool_list(struct list_head *head) { struct kib_pool *pool; - while (!list_empty(head)) { - pool = list_entry(head->next, struct kib_pool, po_list); + while ((pool = list_first_entry_or_null(head, + struct kib_pool, + po_list)) != NULL) { list_del(&pool->po_list); LASSERT(pool->po_owner != NULL); @@ -2015,14 +2016,15 @@ kiblnd_destroy_pool_list(struct list_head *head) static void kiblnd_fail_poolset(struct kib_poolset *ps, struct list_head *zombies) { + struct kib_pool *po; + if (ps->ps_net == NULL) /* intialized? */ return; spin_lock(&ps->ps_lock); - while (!list_empty(&ps->ps_pool_list)) { - struct kib_pool *po = list_entry(ps->ps_pool_list.next, - struct kib_pool, po_list); - + while ((po = list_first_entry_or_null(&ps->ps_pool_list, + struct kib_pool, + po_list)) != NULL) { po->po_failed = 1; if (po->po_allocated == 0) list_move(&po->po_list, zombies); @@ -2703,6 +2705,21 @@ kiblnd_dummy_callback(struct rdma_cm_id *cmid, struct rdma_cm_event *event) return 0; } +static int kiblnd_get_link_status(struct net_device *dev) +{ + int ret = -1; + + LASSERT(dev); + + if (!netif_running(dev)) + ret = 0; + /* Some devices may not be providing link settings */ + else if (dev->ethtool_ops->get_link) + ret = dev->ethtool_ops->get_link(dev); + + return ret; +} + static int kiblnd_dev_need_failover(struct kib_dev *dev, struct net *ns) { @@ -2761,30 +2778,31 @@ kiblnd_dev_failover(struct kib_dev *dev, struct net *ns) LIST_HEAD(zombie_tpo); LIST_HEAD(zombie_ppo); LIST_HEAD(zombie_fpo); - struct rdma_cm_id *cmid = NULL; + struct rdma_cm_id *cmid = NULL; struct kib_hca_dev *hdev = NULL; struct kib_hca_dev *old; - struct ib_pd *pd; + struct ib_pd *pd; struct kib_net *net; - struct sockaddr_in addr; - unsigned long flags; - int rc = 0; + struct sockaddr_in addr; + struct net_device *netdev; + unsigned long flags; + int rc = 0; int i; - LASSERT (*kiblnd_tunables.kib_dev_failover > 1 || - dev->ibd_can_failover || - dev->ibd_hdev == NULL); + LASSERT(*kiblnd_tunables.kib_dev_failover > 1 || + dev->ibd_can_failover || + dev->ibd_hdev == NULL); rc = kiblnd_dev_need_failover(dev, ns); - if (rc <= 0) - goto out; + if (rc <= 0) + goto out; - if (dev->ibd_hdev != NULL && - dev->ibd_hdev->ibh_cmid != NULL) { - /* XXX it's not good to close old listener at here, - * because we can fail to create new listener. - * But we have to close it now, otherwise rdma_bind_addr - * will return EADDRINUSE... How crap! */ + if (dev->ibd_hdev != NULL && + dev->ibd_hdev->ibh_cmid != NULL) { + /* XXX it's not good to close old listener at here, + * because we can fail to create new listener. + * But we have to close it now, otherwise rdma_bind_addr + * will return EADDRINUSE... How crap! */ write_lock_irqsave(&kiblnd_data.kib_global_lock, flags); cmid = dev->ibd_hdev->ibh_cmid; @@ -2793,44 +2811,44 @@ kiblnd_dev_failover(struct kib_dev *dev, struct net *ns) dev->ibd_hdev->ibh_cmid = NULL; write_unlock_irqrestore(&kiblnd_data.kib_global_lock, flags); - rdma_destroy_id(cmid); - } + rdma_destroy_id(cmid); + } cmid = kiblnd_rdma_create_id(ns, kiblnd_cm_callback, dev, RDMA_PS_TCP, IB_QPT_RC); - if (IS_ERR(cmid)) { - rc = PTR_ERR(cmid); - CERROR("Failed to create cmid for failover: %d\n", rc); - goto out; - } + if (IS_ERR(cmid)) { + rc = PTR_ERR(cmid); + CERROR("Failed to create cmid for failover: %d\n", rc); + goto out; + } - memset(&addr, 0, sizeof(addr)); - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = (__force u32)htonl(dev->ibd_ifip); - addr.sin_port = htons(*kiblnd_tunables.kib_service); + memset(&addr, 0, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = (__force u32)htonl(dev->ibd_ifip); + addr.sin_port = htons(*kiblnd_tunables.kib_service); - /* Bind to failover device or port */ - rc = rdma_bind_addr(cmid, (struct sockaddr *)&addr); + /* Bind to failover device or port */ + rc = rdma_bind_addr(cmid, (struct sockaddr *)&addr); if (rc != 0 || cmid->device == NULL) { CERROR("Failed to bind %s:%pI4h to device(%p): %d\n", dev->ibd_ifname, &dev->ibd_ifip, cmid->device, rc); - rdma_destroy_id(cmid); - goto out; - } + rdma_destroy_id(cmid); + goto out; + } LIBCFS_ALLOC(hdev, sizeof(*hdev)); - if (hdev == NULL) { - CERROR("Failed to allocate kib_hca_dev\n"); - rdma_destroy_id(cmid); - rc = -ENOMEM; - goto out; - } + if (hdev == NULL) { + CERROR("Failed to allocate kib_hca_dev\n"); + rdma_destroy_id(cmid); + rc = -ENOMEM; + goto out; + } - atomic_set(&hdev->ibh_ref, 1); - hdev->ibh_dev = dev; - hdev->ibh_cmid = cmid; - hdev->ibh_ibdev = cmid->device; + atomic_set(&hdev->ibh_ref, 1); + hdev->ibh_dev = dev; + hdev->ibh_cmid = cmid; + hdev->ibh_ibdev = cmid->device; hdev->ibh_port = cmid->port_num; #ifdef HAVE_IB_ALLOC_PD_2ARGS @@ -2844,13 +2862,13 @@ kiblnd_dev_failover(struct kib_dev *dev, struct net *ns) goto out; } - hdev->ibh_pd = pd; + hdev->ibh_pd = pd; - rc = rdma_listen(cmid, 0); - if (rc != 0) { - CERROR("Can't start new listener: %d\n", rc); - goto out; - } + rc = rdma_listen(cmid, 0); + if (rc != 0) { + CERROR("Can't start new listener: %d\n", rc); + goto out; + } rc = kiblnd_hdev_get_attr(hdev); if (rc != 0) { @@ -2898,11 +2916,18 @@ kiblnd_dev_failover(struct kib_dev *dev, struct net *ns) if (hdev != NULL) kiblnd_hdev_decref(hdev); - if (rc != 0) + if (rc != 0) { dev->ibd_failed_failover++; - else + } else { dev->ibd_failed_failover = 0; + rcu_read_lock(); + netdev = dev_get_by_name_rcu(ns, dev->ibd_ifname); + if (netdev && (kiblnd_get_link_status(netdev) == 1)) + kiblnd_set_ni_fatal_on(dev->ibd_hdev, 0); + rcu_read_unlock(); + } + return rc; } @@ -2956,8 +2981,8 @@ kiblnd_base_shutdown(void) cfs_percpt_for_each(sched, i, kiblnd_data.kib_scheds) wake_up_all(&sched->ibs_waitq); - wake_up_all(&kiblnd_data.kib_connd_waitq); - wake_up_all(&kiblnd_data.kib_failover_waitq); + wake_up(&kiblnd_data.kib_connd_waitq); + wake_up(&kiblnd_data.kib_failover_waitq); wait_var_event_warning(&kiblnd_data.kib_nthreads, !atomic_read(&kiblnd_data.kib_nthreads), @@ -3157,12 +3182,11 @@ kiblnd_start_schedulers(struct kib_sched_info *sched) } for (i = 0; i < nthrs; i++) { - long id; - char name[20]; - id = KIB_THREAD_ID(sched->ibs_cpt, sched->ibs_nthreads + i); - snprintf(name, sizeof(name), "kiblnd_sd_%02ld_%02ld", - KIB_THREAD_CPT(id), KIB_THREAD_TID(id)); - rc = kiblnd_thread_start(kiblnd_scheduler, (void *)id, name); + long id = KIB_THREAD_ID(sched->ibs_cpt, sched->ibs_nthreads + i); + + rc = kiblnd_thread_start(kiblnd_scheduler, (void *)id, + "kiblnd_sd_%02ld_%02ld", + KIB_THREAD_CPT(id), KIB_THREAD_TID(id)); if (rc == 0) continue; @@ -3267,19 +3291,12 @@ kiblnd_startup(struct lnet_ni *ni) kiblnd_tunables_setup(ni); /* - * ni_interfaces is only to support legacy pre Multi-Rail - * tcp bonding for ksocklnd. Multi-Rail wants each secondary - * IP to be treated as an unique 'struct ni' interfaces instead. + * Multi-Rail wants each secondary + * IP to be treated as an unique 'struct ni' interface. */ - if (ni->ni_interfaces[0] != NULL) { + if (ni->ni_interface != NULL) { /* Use the IPoIB interface specified in 'networks=' */ - if (ni->ni_interfaces[1] != NULL) { - CERROR("ko2iblnd: Multiple interfaces not supported\n"); - rc = -EINVAL; - goto failed; - } - - ifname = ni->ni_interfaces[0]; + ifname = ni->ni_interface; } else { ifname = *kiblnd_tunables.kib_default_ipif; } @@ -3382,6 +3399,7 @@ static const struct lnet_lnd the_o2iblnd = { .lnd_ctl = kiblnd_ctl, .lnd_send = kiblnd_send, .lnd_recv = kiblnd_recv, + .lnd_get_dev_prio = kiblnd_get_dev_prio, }; static void ko2inlnd_assert_wire_constants(void)