+ if (!frd->frd_valid) {
+ struct ib_rdma_wr *inv_wr;
+ __u32 key = is_rx ? mr->rkey : mr->lkey;
+
+ inv_wr = &frd->frd_inv_wr;
+ memset(inv_wr, 0, sizeof(*inv_wr));
+
+ inv_wr->wr.opcode = IB_WR_LOCAL_INV;
+ inv_wr->wr.wr_id = IBLND_WID_MR;
+ inv_wr->wr.ex.invalidate_rkey = key;
+
+ /* Bump the key */
+ key = ib_inc_rkey(key);
+ ib_update_fast_reg_key(mr, key);
+ }
+
+#ifdef HAVE_IB_MAP_MR_SG
+#ifdef HAVE_IB_MAP_MR_SG_5ARGS
+ n = ib_map_mr_sg(mr, tx->tx_frags,
+ tx->tx_nfrags, NULL, PAGE_SIZE);
+#else
+ n = ib_map_mr_sg(mr, tx->tx_frags,
+ tx->tx_nfrags, PAGE_SIZE);
+#endif
+ if (unlikely(n != tx->tx_nfrags)) {
+ CERROR("Failed to map mr %d/%d "
+ "elements\n", n, tx->tx_nfrags);
+ return n < 0 ? n : -EINVAL;
+ }
+
+ mr->iova = iov;
+
+ wr = &frd->frd_fastreg_wr;
+ memset(wr, 0, sizeof(*wr));
+
+ wr->wr.opcode = IB_WR_REG_MR;
+ wr->wr.wr_id = IBLND_WID_MR;
+ wr->wr.num_sge = 0;
+ wr->wr.send_flags = 0;
+ wr->mr = mr;
+ wr->key = is_rx ? mr->rkey : mr->lkey;
+ wr->access = (IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE);
+#else
+ if (!tx_pages_mapped) {
+ npages = kiblnd_map_tx_pages(tx, rd);
+ tx_pages_mapped = 1;
+ }
+
+ LASSERT(npages <= frpl->max_page_list_len);
+ memcpy(frpl->page_list, pages,
+ sizeof(*pages) * npages);
+
+ /* Prepare FastReg WR */
+ wr = &frd->frd_fastreg_wr;
+ memset(wr, 0, sizeof(*wr));
+
+ wr->wr.opcode = IB_WR_FAST_REG_MR;
+ wr->wr.wr_id = IBLND_WID_MR;
+
+ wr->wr.wr.fast_reg.iova_start = iov;
+ wr->wr.wr.fast_reg.page_list = frpl;
+ wr->wr.wr.fast_reg.page_list_len = npages;
+ wr->wr.wr.fast_reg.page_shift = PAGE_SHIFT;
+ wr->wr.wr.fast_reg.length = nob;
+ wr->wr.wr.fast_reg.rkey =
+ is_rx ? mr->rkey : mr->lkey;
+ wr->wr.wr.fast_reg.access_flags =
+ (IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE);
+#endif
+
+ fmr->fmr_key = is_rx ? mr->rkey : mr->lkey;
+ fmr->fmr_frd = frd;
+ fmr->fmr_pfmr = NULL;
+ fmr->fmr_pool = fpo;
+ return 0;
+ }
+ spin_unlock(&fps->fps_lock);
+ rc = -EBUSY;
+ }
+
+ spin_lock(&fps->fps_lock);
+ fpo->fpo_map_count--;
+ if (rc != -EAGAIN) {
+ spin_unlock(&fps->fps_lock);
+ return rc;
+ }
+
+ /* EAGAIN and ... */
+ if (version != fps->fps_version) {
+ spin_unlock(&fps->fps_lock);
+ goto again;
+ }
+ }
+
+ if (fps->fps_increasing) {
+ spin_unlock(&fps->fps_lock);
+ CDEBUG(D_NET, "Another thread is allocating new "
+ "FMR pool, waiting for her to complete\n");
+ schedule();
+ goto again;
+
+ }
+
+ if (cfs_time_before(cfs_time_current(), fps->fps_next_retry)) {
+ /* someone failed recently */
+ spin_unlock(&fps->fps_lock);
+ return -EAGAIN;
+ }
+
+ fps->fps_increasing = 1;
+ spin_unlock(&fps->fps_lock);
+
+ CDEBUG(D_NET, "Allocate new FMR pool\n");
+ rc = kiblnd_create_fmr_pool(fps, &fpo);
+ spin_lock(&fps->fps_lock);
+ fps->fps_increasing = 0;
+ if (rc == 0) {
+ fps->fps_version++;
+ list_add_tail(&fpo->fpo_list, &fps->fps_pool_list);
+ } else {
+ fps->fps_next_retry = cfs_time_shift(IBLND_POOL_RETRY);
+ }
+ spin_unlock(&fps->fps_lock);
+
+ goto again;
+}
+
+static void
+kiblnd_fini_pool(kib_pool_t *pool)
+{
+ LASSERT(list_empty(&pool->po_free_list));
+ LASSERT(pool->po_allocated == 0);
+
+ CDEBUG(D_NET, "Finalize %s pool\n", pool->po_owner->ps_name);
+}
+
+static void
+kiblnd_init_pool(kib_poolset_t *ps, kib_pool_t *pool, int size)
+{
+ CDEBUG(D_NET, "Initialize %s pool\n", ps->ps_name);
+
+ memset(pool, 0, sizeof(kib_pool_t));
+ INIT_LIST_HEAD(&pool->po_free_list);
+ pool->po_deadline = cfs_time_shift(IBLND_POOL_DEADLINE);
+ pool->po_owner = ps;
+ pool->po_size = size;
+}
+
+static void
+kiblnd_destroy_pool_list(struct list_head *head)
+{
+ kib_pool_t *pool;
+
+ while (!list_empty(head)) {
+ pool = list_entry(head->next, kib_pool_t, po_list);
+ list_del(&pool->po_list);
+
+ LASSERT(pool->po_owner != NULL);
+ pool->po_owner->ps_pool_destroy(pool);
+ }
+}
+
+static void
+kiblnd_fail_poolset(kib_poolset_t *ps, struct list_head *zombies)
+{
+ if (ps->ps_net == NULL) /* intialized? */
+ return;
+
+ spin_lock(&ps->ps_lock);
+ while (!list_empty(&ps->ps_pool_list)) {
+ kib_pool_t *po = list_entry(ps->ps_pool_list.next,
+ kib_pool_t, po_list);
+ po->po_failed = 1;
+ list_del(&po->po_list);
+ if (po->po_allocated == 0)
+ list_add(&po->po_list, zombies);
+ else
+ list_add(&po->po_list, &ps->ps_failed_pool_list);
+ }
+ spin_unlock(&ps->ps_lock);
+}
+
+static void
+kiblnd_fini_poolset(kib_poolset_t *ps)
+{
+ if (ps->ps_net != NULL) { /* initialized? */
+ kiblnd_destroy_pool_list(&ps->ps_failed_pool_list);
+ kiblnd_destroy_pool_list(&ps->ps_pool_list);
+ }
+}
+
+static int
+kiblnd_init_poolset(kib_poolset_t *ps, int cpt,
+ kib_net_t *net, char *name, int size,
+ kib_ps_pool_create_t po_create,
+ kib_ps_pool_destroy_t po_destroy,
+ kib_ps_node_init_t nd_init,
+ kib_ps_node_fini_t nd_fini)
+{
+ kib_pool_t *pool;
+ int rc;
+
+ memset(ps, 0, sizeof(kib_poolset_t));
+
+ ps->ps_cpt = cpt;
+ ps->ps_net = net;
+ ps->ps_pool_create = po_create;
+ ps->ps_pool_destroy = po_destroy;
+ ps->ps_node_init = nd_init;
+ ps->ps_node_fini = nd_fini;
+ ps->ps_pool_size = size;
+ if (strlcpy(ps->ps_name, name, sizeof(ps->ps_name))
+ >= sizeof(ps->ps_name))
+ return -E2BIG;
+ spin_lock_init(&ps->ps_lock);
+ INIT_LIST_HEAD(&ps->ps_pool_list);
+ INIT_LIST_HEAD(&ps->ps_failed_pool_list);
+
+ rc = ps->ps_pool_create(ps, size, &pool);
+ if (rc == 0)
+ list_add(&pool->po_list, &ps->ps_pool_list);
+ else
+ CERROR("Failed to create the first pool for %s\n", ps->ps_name);
+
+ return rc;
+}
+
+static int
+kiblnd_pool_is_idle(kib_pool_t *pool, cfs_time_t now)
+{
+ if (pool->po_allocated != 0) /* still in use */
+ return 0;
+ if (pool->po_failed)
+ return 1;
+ return cfs_time_aftereq(now, pool->po_deadline);
+}
+
+void
+kiblnd_pool_free_node(kib_pool_t *pool, struct list_head *node)
+{
+ struct list_head zombies = LIST_HEAD_INIT(zombies);
+ kib_poolset_t *ps = pool->po_owner;
+ kib_pool_t *tmp;
+ cfs_time_t now = cfs_time_current();
+
+ spin_lock(&ps->ps_lock);
+
+ if (ps->ps_node_fini != NULL)
+ ps->ps_node_fini(pool, node);
+
+ LASSERT(pool->po_allocated > 0);
+ list_add(node, &pool->po_free_list);
+ pool->po_allocated--;
+
+ list_for_each_entry_safe(pool, tmp, &ps->ps_pool_list, po_list) {
+ /* the first pool is persistent */
+ if (ps->ps_pool_list.next == &pool->po_list)
+ continue;
+
+ if (kiblnd_pool_is_idle(pool, now))
+ list_move(&pool->po_list, &zombies);
+ }
+ spin_unlock(&ps->ps_lock);
+
+ if (!list_empty(&zombies))
+ kiblnd_destroy_pool_list(&zombies);
+}
+
+struct list_head *
+kiblnd_pool_alloc_node(kib_poolset_t *ps)
+{
+ struct list_head *node;
+ kib_pool_t *pool;
+ int rc;
+ unsigned int interval = 1;
+ cfs_time_t time_before;
+ unsigned int trips = 0;
+
+again:
+ spin_lock(&ps->ps_lock);
+ list_for_each_entry(pool, &ps->ps_pool_list, po_list) {
+ if (list_empty(&pool->po_free_list))
+ continue;
+
+ pool->po_allocated++;
+ pool->po_deadline = cfs_time_shift(IBLND_POOL_DEADLINE);
+ node = pool->po_free_list.next;
+ list_del(node);
+
+ if (ps->ps_node_init != NULL) {
+ /* still hold the lock */
+ ps->ps_node_init(pool, node);
+ }
+ spin_unlock(&ps->ps_lock);
+ return node;
+ }
+
+ /* no available tx pool and ... */
+ if (ps->ps_increasing) {
+ /* another thread is allocating a new pool */
+ spin_unlock(&ps->ps_lock);
+ trips++;
+ CDEBUG(D_NET, "Another thread is allocating new "
+ "%s pool, waiting %d HZs for her to complete."
+ "trips = %d\n",
+ ps->ps_name, interval, trips);
+
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule_timeout(interval);
+ if (interval < cfs_time_seconds(1))
+ interval *= 2;
+
+ goto again;