+int
+lnet_cpt_of_md(struct lnet_libmd *md, unsigned int offset)
+{
+ int cpt = CFS_CPT_ANY;
+ unsigned int niov;
+
+ /*
+ * if the md_options has a bulk handle then we want to look at the
+ * bulk md because that's the data which we will be DMAing
+ */
+ if (md && (md->md_options & LNET_MD_BULK_HANDLE) != 0 &&
+ !LNetMDHandleIsInvalid(md->md_bulk_handle))
+ md = lnet_handle2md(&md->md_bulk_handle);
+
+ if (!md || md->md_niov == 0)
+ return CFS_CPT_ANY;
+
+ niov = md->md_niov;
+
+ /*
+ * There are three cases to handle:
+ * 1. The MD is using struct bio_vec
+ * 2. The MD is using struct kvec
+ * 3. Contiguous buffer allocated via vmalloc
+ *
+ * in case 2 we can use virt_to_page() macro to get the page
+ * address of the memory kvec describes.
+ *
+ * in case 3 use is_vmalloc_addr() and vmalloc_to_page()
+ *
+ * The offset provided can be within the first iov/kiov entry or
+ * it could go beyond it. In that case we need to make sure to
+ * look at the page which actually contains the data that will be
+ * DMAed.
+ */
+ if ((md->md_options & LNET_MD_KIOV) != 0) {
+ struct bio_vec *kiov = md->md_iov.kiov;
+
+ while (offset >= kiov->bv_len) {
+ offset -= kiov->bv_len;
+ niov--;
+ kiov++;
+ if (niov == 0) {
+ CERROR("offset %d goes beyond kiov\n", offset);
+ goto out;
+ }
+ }
+
+ cpt = cfs_cpt_of_node(lnet_cpt_table(),
+ page_to_nid(kiov->bv_page));
+ } else {
+ struct kvec *iov = md->md_iov.iov;
+ unsigned long vaddr;
+ struct page *page;
+
+ while (offset >= iov->iov_len) {
+ offset -= iov->iov_len;
+ niov--;
+ iov++;
+ if (niov == 0) {
+ CERROR("offset %d goes beyond iov\n", offset);
+ goto out;
+ }
+ }
+
+ vaddr = ((unsigned long)iov->iov_base) + offset;
+ page = lnet_kvaddr_to_page(vaddr);
+ if (!page) {
+ CERROR("Couldn't resolve vaddr 0x%lx to page\n", vaddr);
+ goto out;
+ }
+ cpt = cfs_cpt_of_node(lnet_cpt_table(), page_to_nid(page));
+ }
+
+out:
+ return cpt;
+}
+
+static int
+lnet_md_build(struct lnet_libmd *lmd, struct lnet_md *umd, int unlink)
+{
+ int i;
+ unsigned int niov;
+ int total_length = 0;
+
+ lmd->md_me = NULL;
+ lmd->md_start = umd->start;
+ lmd->md_offset = 0;
+ lmd->md_max_size = umd->max_size;
+ lmd->md_options = umd->options;
+ lmd->md_user_ptr = umd->user_ptr;
+ lmd->md_eq = NULL;
+ lmd->md_threshold = umd->threshold;
+ lmd->md_refcount = 0;
+ lmd->md_flags = (unlink == LNET_UNLINK) ? LNET_MD_FLAG_AUTO_UNLINK : 0;
+ lmd->md_bulk_handle = umd->bulk_handle;
+
+ if ((umd->options & LNET_MD_IOVEC) != 0) {
+
+ if ((umd->options & LNET_MD_KIOV) != 0) /* Can't specify both */
+ return -EINVAL;
+
+ lmd->md_niov = niov = umd->length;
+ memcpy(lmd->md_iov.iov, umd->start,
+ niov * sizeof(lmd->md_iov.iov[0]));
+
+ for (i = 0; i < (int)niov; i++) {
+ /* We take the base address on trust */
+ if (lmd->md_iov.iov[i].iov_len <= 0) /* invalid length */
+ return -EINVAL;
+
+ total_length += lmd->md_iov.iov[i].iov_len;
+ }
+
+ lmd->md_length = total_length;
+
+ if ((umd->options & LNET_MD_MAX_SIZE) != 0 && /* max size used */
+ (umd->max_size < 0 ||
+ umd->max_size > total_length)) // illegal max_size
+ return -EINVAL;
+
+ } else if ((umd->options & LNET_MD_KIOV) != 0) {
+ lmd->md_niov = niov = umd->length;
+ memcpy(lmd->md_iov.kiov, umd->start,
+ niov * sizeof(lmd->md_iov.kiov[0]));
+
+ for (i = 0; i < (int)niov; i++) {
+ /* We take the page pointer on trust */
+ if (lmd->md_iov.kiov[i].bv_offset +
+ lmd->md_iov.kiov[i].bv_len > PAGE_SIZE)
+ return -EINVAL; /* invalid length */
+
+ total_length += lmd->md_iov.kiov[i].bv_len;
+ }
+
+ lmd->md_length = total_length;
+
+ if ((umd->options & LNET_MD_MAX_SIZE) != 0 && /* max size used */
+ (umd->max_size < 0 ||
+ umd->max_size > total_length)) // illegal max_size
+ return -EINVAL;
+ } else { /* contiguous */
+ lmd->md_length = umd->length;
+ lmd->md_niov = niov = 1;
+ lmd->md_iov.iov[0].iov_base = umd->start;
+ lmd->md_iov.iov[0].iov_len = umd->length;
+
+ if ((umd->options & LNET_MD_MAX_SIZE) != 0 && /* max size used */
+ (umd->max_size < 0 ||
+ umd->max_size > (int)umd->length)) // illegal max_size
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* must be called with resource lock held */
+static int
+lnet_md_link(struct lnet_libmd *md, struct lnet_eq *eq, int cpt)
+{
+ struct lnet_res_container *container = the_lnet.ln_md_containers[cpt];
+
+ /* NB we are passed an allocated, but inactive md.
+ * if we return success, caller may lnet_md_unlink() it.
+ * otherwise caller may only lnet_md_free() it.
+ */
+ /* This implementation doesn't know how to create START events or
+ * disable END events. Best to LASSERT our caller is compliant so
+ * we find out quickly... */
+ /* TODO - reevaluate what should be here in light of
+ * the removal of the start and end events
+ * maybe there we shouldn't even allow LNET_EQ_NONE!)
+ * LASSERT (eq == NULL);
+ */
+ if (eq) {
+ md->md_eq = eq;
+ (*md->md_eq->eq_refs[cpt])++;
+ }
+
+ lnet_res_lh_initialize(container, &md->md_lh);
+
+ LASSERT(list_empty(&md->md_list));
+ list_add(&md->md_list, &container->rec_active);
+
+ return 0;
+}
+
+/* must be called with lnet_res_lock held */