/* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * * Copyright (c) 2011, 2017, Intel Corporation. * * Copyright (c) 2018-2020 Data Direct Networks. * * This file is part of Lustre, https://wiki.whamcloud.com/ * * Portals is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. * * Portals is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * version 2 along with this program; If not, see * http://www.gnu.org/licenses/gpl-2.0.html * * Author: Sonia Sharma */ /* * Copyright (c) 2020, Whamcloud. * */ #include #include #include #include #include #include #include #include #include #include #include #include "liblnd.h" #include #include #include #include #include "liblnetconfig.h" static inline bool lnet_udsp_criteria_present(struct lnet_ud_nid_descr *descr) { return descr->ud_net_id.udn_net_type != 0; } struct lnet_udsp *lnet_udsp_alloc(void) { struct lnet_udsp *udsp; udsp = calloc(1, sizeof(*udsp)); if (!udsp) return NULL; INIT_LIST_HEAD(&udsp->udsp_on_list); INIT_LIST_HEAD(&udsp->udsp_src.ud_addr_range); INIT_LIST_HEAD(&udsp->udsp_src.ud_net_id.udn_net_num_range); INIT_LIST_HEAD(&udsp->udsp_dst.ud_addr_range); INIT_LIST_HEAD(&udsp->udsp_dst.ud_net_id.udn_net_num_range); INIT_LIST_HEAD(&udsp->udsp_rte.ud_addr_range); INIT_LIST_HEAD(&udsp->udsp_rte.ud_net_id.udn_net_num_range); return udsp; } static void lnet_udsp_nid_descr_free(struct lnet_ud_nid_descr *nid_descr, bool blk) { struct list_head *net_range = &nid_descr->ud_net_id.udn_net_num_range; if (!lnet_udsp_criteria_present(nid_descr)) return; /* memory management is a bit tricky here. When we allocate the * memory to store the NID descriptor we allocate a large buffer * for all the data, so we need to free the entire buffer at * once. If the net is present the net_range->next points to that * buffer otherwise if the ud_addr_range is present then it's the * ud_addr_range.next */ if (blk) { if (!list_empty(net_range)) free(net_range->next); else if (!list_empty(&nid_descr->ud_addr_range)) free(nid_descr->ud_addr_range.next); } else { cfs_expr_list_free_list(net_range); cfs_expr_list_free_list(&nid_descr->ud_addr_range); } } void lnet_udsp_free(struct lnet_udsp *udsp, bool blk) { lnet_udsp_nid_descr_free(&udsp->udsp_src, blk); lnet_udsp_nid_descr_free(&udsp->udsp_dst, blk); lnet_udsp_nid_descr_free(&udsp->udsp_rte, blk); free(udsp); } static void copy_range_info(void __user **bulk, void **buf, struct list_head *list, int count) { struct lnet_range_expr *range_expr; struct cfs_range_expr *range; struct cfs_expr_list *exprs; int range_count = count; int i; if (range_count == 0) return; if (range_count == -1) { struct lnet_expressions *e; e = *bulk; range_count = e->le_count; *bulk += sizeof(*e); } exprs = *buf; INIT_LIST_HEAD(&exprs->el_link); INIT_LIST_HEAD(&exprs->el_exprs); list_add_tail(&exprs->el_link, list); *buf += sizeof(*exprs); for (i = 0; i < range_count; i++) { range_expr = *bulk; range = *buf; INIT_LIST_HEAD(&range->re_link); range->re_lo = range_expr->re_lo; range->re_hi = range_expr->re_hi; range->re_stride = range_expr->re_stride; list_add_tail(&range->re_link, &exprs->el_exprs); *bulk += sizeof(*range_expr); *buf += sizeof(*range); } } static int copy_ioc_udsp_descr(struct lnet_ud_nid_descr *nid_descr, char *type, void **bulk, __u32 *bulk_size) { struct lnet_ioctl_udsp_descr *ioc_nid = *bulk; struct lnet_expressions *exprs; __u32 descr_type; int expr_count = 0; int range_count = 0; int i; __u32 size; int remaining_size = *bulk_size; void *tmp = *bulk; __u32 alloc_size; void *buf; size_t range_expr_s = sizeof(struct lnet_range_expr); size_t lnet_exprs_s = sizeof(struct lnet_expressions); /* criteria not present, skip over the static part of the * bulk, which is included for each NID descriptor */ if (ioc_nid->iud_net.ud_net_type == 0) { remaining_size -= sizeof(*ioc_nid); if (remaining_size < 0) return -EINVAL; *bulk += sizeof(*ioc_nid); *bulk_size = remaining_size; return 0; } descr_type = ioc_nid->iud_src_hdr.ud_descr_type; if (descr_type != *(__u32 *)type) return -EINVAL; /* calculate the total size to verify we have enough buffer. * Start of by finding how many ranges there are for the net * expression. */ range_count = ioc_nid->iud_net.ud_net_num_expr.le_count; size = sizeof(*ioc_nid) + (range_count * range_expr_s); remaining_size -= size; if (remaining_size < 0) return -EINVAL; /* the number of expressions for the NID. IE 4 for IP, 1 for GNI */ expr_count = ioc_nid->iud_src_hdr.ud_descr_count; /* point tmp to the beginning of the NID expressions */ tmp += size; for (i = 0; i < expr_count; i++) { /* get the number of ranges per expression */ exprs = tmp; range_count += exprs->le_count; size = (range_expr_s * exprs->le_count) + lnet_exprs_s; remaining_size -= size; if (remaining_size < 0) return -EINVAL; tmp += size; } *bulk_size = remaining_size; /* copy over the net type */ nid_descr->ud_net_id.udn_net_type = ioc_nid->iud_net.ud_net_type; /* allocate the total memory required to copy this NID descriptor */ alloc_size = (sizeof(struct cfs_expr_list) * (expr_count + 1)) + (sizeof(struct cfs_range_expr) * (range_count)); buf = calloc(alloc_size, 1); if (!buf) return -ENOMEM; /* copy over the net number range */ range_count = ioc_nid->iud_net.ud_net_num_expr.le_count; *bulk += sizeof(*ioc_nid); copy_range_info(bulk, &buf, &nid_descr->ud_net_id.udn_net_num_range, range_count); /* copy over the NID descriptor */ for (i = 0; i < expr_count; i++) copy_range_info(bulk, &buf, &nid_descr->ud_addr_range, -1); return 0; } struct lnet_udsp * lnet_udsp_demarshal(void *bulk, __u32 bulk_size) { struct lnet_ioctl_udsp *ioc_udsp; struct lnet_udsp *udsp; int rc = -ENOMEM; if (bulk_size < sizeof(*ioc_udsp)) return NULL; udsp = lnet_udsp_alloc(); if (!udsp) return NULL; ioc_udsp = bulk; udsp->udsp_action_type = ioc_udsp->iou_action_type; udsp->udsp_action.udsp_priority = ioc_udsp->iou_action.priority; udsp->udsp_idx = ioc_udsp->iou_idx; bulk = ioc_udsp->iou_bulk; bulk_size -= sizeof(*ioc_udsp); if (bulk_size != ioc_udsp->iou_bulk_size) goto failed; rc = copy_ioc_udsp_descr(&udsp->udsp_src, "SRC", &bulk, &bulk_size); if (rc < 0) goto failed; rc = copy_ioc_udsp_descr(&udsp->udsp_dst, "DST", &bulk, &bulk_size); if (rc < 0) goto failed; rc = copy_ioc_udsp_descr(&udsp->udsp_rte, "RTE", &bulk, &bulk_size); if (rc < 0) goto failed; return udsp; failed: lnet_udsp_free(udsp, true); return NULL; } static inline int lnet_get_list_len(struct list_head *list) { struct list_head *l; int count = 0; list_for_each(l, list) count++; return count; } static size_t lnet_size_marshaled_nid_descr(struct lnet_ud_nid_descr *descr) { struct cfs_expr_list *expr; int expr_count = 0; int range_count = 0; size_t size = sizeof(struct lnet_ioctl_udsp_descr); if (!lnet_udsp_criteria_present(descr)) return size; if (!list_empty(&descr->ud_net_id.udn_net_num_range)) { expr = list_entry(descr->ud_net_id.udn_net_num_range.next, struct cfs_expr_list, el_link); range_count = lnet_get_list_len(&expr->el_exprs); } /* count the number of cfs_range_expr in the address expressions */ list_for_each_entry(expr, &descr->ud_addr_range, el_link) { expr_count++; range_count += lnet_get_list_len(&expr->el_exprs); } size += (sizeof(struct lnet_expressions) * expr_count); size += (sizeof(struct lnet_range_expr) * range_count); return size; } size_t lnet_get_udsp_size(struct lnet_udsp *udsp) { size_t size = sizeof(struct lnet_ioctl_udsp); size += lnet_size_marshaled_nid_descr(&udsp->udsp_src); size += lnet_size_marshaled_nid_descr(&udsp->udsp_dst); size += lnet_size_marshaled_nid_descr(&udsp->udsp_rte); return size; } static void copy_exprs(struct cfs_expr_list *expr, void __user **bulk, __s32 *bulk_size) { struct cfs_range_expr *range; struct lnet_range_expr range_expr; /* copy over the net range expressions to the bulk */ list_for_each_entry(range, &expr->el_exprs, re_link) { range_expr.re_lo = range->re_lo; range_expr.re_hi = range->re_hi; range_expr.re_stride = range->re_stride; memcpy(*bulk, &range_expr, sizeof(range_expr)); *bulk += sizeof(range_expr); *bulk_size -= sizeof(range_expr); } } static int copy_nid_range(struct lnet_ud_nid_descr *nid_descr, char *type, void __user **bulk, __s32 *bulk_size) { struct lnet_ioctl_udsp_descr ioc_udsp_descr = { { 0 } }; struct cfs_expr_list *expr; struct lnet_expressions ioc_expr; int expr_count; int net_expr_count = 0; ioc_udsp_descr.iud_src_hdr.ud_descr_type = *(__u32 *)type; /* if criteria not present, copy over the static part of the NID * descriptor */ if (!lnet_udsp_criteria_present(nid_descr)) { memcpy(*bulk, &ioc_udsp_descr, sizeof(ioc_udsp_descr)); *bulk += sizeof(ioc_udsp_descr); *bulk_size -= sizeof(ioc_udsp_descr); return 0; } expr_count = lnet_get_list_len(&nid_descr->ud_addr_range); /* copy the net information */ if (!list_empty(&nid_descr->ud_net_id.udn_net_num_range)) { expr = list_entry(nid_descr->ud_net_id.udn_net_num_range.next, struct cfs_expr_list, el_link); net_expr_count = lnet_get_list_len(&expr->el_exprs); } else { net_expr_count = 0; } /* set the total expression count */ ioc_udsp_descr.iud_src_hdr.ud_descr_count = expr_count; ioc_udsp_descr.iud_net.ud_net_type = nid_descr->ud_net_id.udn_net_type; ioc_udsp_descr.iud_net.ud_net_num_expr.le_count = net_expr_count; /* copy over the header info to the bulk */ memcpy(*bulk, &ioc_udsp_descr, sizeof(ioc_udsp_descr)); *bulk += sizeof(ioc_udsp_descr); *bulk_size -= sizeof(ioc_udsp_descr); /* copy over the net num expression if it exists */ if (net_expr_count) copy_exprs(expr, bulk, bulk_size); /* copy the address range */ list_for_each_entry(expr, &nid_descr->ud_addr_range, el_link) { ioc_expr.le_count = lnet_get_list_len(&expr->el_exprs); memcpy(*bulk, &ioc_expr, sizeof(ioc_expr)); *bulk += sizeof(ioc_expr); *bulk_size -= sizeof(ioc_expr); copy_exprs(expr, bulk, bulk_size); } return 0; } static int lnet_udsp_marshal(struct lnet_udsp *udsp, void *bulk, __s32 bulk_size) { struct lnet_ioctl_udsp *ioc_udsp; int rc = -ENOMEM; /* make sure user space allocated enough buffer to marshal the * udsp */ if (bulk_size < lnet_get_udsp_size(udsp)) return -EINVAL; ioc_udsp = bulk; ioc_udsp->iou_idx = udsp->udsp_idx; ioc_udsp->iou_action_type = udsp->udsp_action_type; ioc_udsp->iou_action.priority = udsp->udsp_action.udsp_priority; bulk += sizeof(*ioc_udsp); bulk_size -= sizeof(*ioc_udsp); rc = copy_nid_range(&udsp->udsp_src, "SRC", &bulk, &bulk_size); if (rc != 0) return rc; rc = copy_nid_range(&udsp->udsp_dst, "DST", &bulk, &bulk_size); if (rc != 0) return rc; rc = copy_nid_range(&udsp->udsp_rte, "RTE", &bulk, &bulk_size); return rc; } static enum lnet_udsp_action_type lnet_str2udsp_action(char *type) { if (!type) return EN_LNET_UDSP_ACTION_NONE; if (!strncmp(type, "priority", strlen("priority"))) return EN_LNET_UDSP_ACTION_PRIORITY; if (!strncmp(type, "pref", strlen("pref"))) return EN_LNET_UDSP_ACTION_PREFERRED_LIST; return EN_LNET_UDSP_ACTION_NONE; } int lustre_lnet_add_udsp(char *src, char *dst, char *rte, char *type, union lnet_udsp_action *action, int idx, int seq_no, struct cYAML **err_rc) { struct lnet_udsp *udsp = NULL; struct lnet_ioctl_udsp *udsp_bulk; int rc = LUSTRE_CFG_RC_OUT_OF_MEM; void *bulk = NULL; __u32 bulk_size; char err_str[LNET_MAX_STR_LEN]; enum lnet_udsp_action_type action_type; snprintf(err_str, sizeof(err_str), "\"success\""); action_type = lnet_str2udsp_action(type); if (action_type == EN_LNET_UDSP_ACTION_NONE) { snprintf(err_str, sizeof(err_str), "\"bad action type specified: %s\"", type); rc = LUSTRE_CFG_RC_BAD_PARAM; goto out; } /* sanitize parameters: * src-dst can be simultaneously present * dst-rte can be simultaneously present */ if ((!src && !rte && !dst) || (src && rte && dst) || (src && rte && !dst)) { snprintf(err_str, sizeof(err_str), "\"The combination of src, dst and rte is not supported\""); rc = LUSTRE_CFG_RC_BAD_PARAM; goto out; } udsp = lnet_udsp_alloc(); if (!udsp) { snprintf(err_str, sizeof(err_str), "\"out of memory\""); goto out; } udsp->udsp_idx = idx; udsp->udsp_action_type = action_type; /* a priority of -1 will result in the lowest possible priority */ if (action_type == EN_LNET_UDSP_ACTION_PRIORITY) udsp->udsp_action.udsp_priority = action->udsp_priority; /* override with the default * if priority is expected, but not specified */ if (!rte && ((dst && !src) || (src && !dst)) && action_type != EN_LNET_UDSP_ACTION_PRIORITY) { udsp->udsp_action_type = EN_LNET_UDSP_ACTION_PRIORITY; udsp->udsp_action.udsp_priority = 0; } if (src) { rc = cfs_parse_nid_parts(src, &udsp->udsp_src.ud_addr_range, &udsp->udsp_src.ud_net_id.udn_net_num_range, &udsp->udsp_src.ud_net_id.udn_net_type); if (rc < 0) { snprintf(err_str, sizeof(err_str), "\failed to parse src parameter\""); goto out; } } if (dst) { rc = cfs_parse_nid_parts(dst, &udsp->udsp_dst.ud_addr_range, &udsp->udsp_dst.ud_net_id.udn_net_num_range, &udsp->udsp_dst.ud_net_id.udn_net_type); if (rc < 0) { snprintf(err_str, sizeof(err_str), "\failed to parse dst parameter\""); goto out; } } if (rte) { rc = cfs_parse_nid_parts(rte, &udsp->udsp_rte.ud_addr_range, &udsp->udsp_rte.ud_net_id.udn_net_num_range, &udsp->udsp_rte.ud_net_id.udn_net_type); if (rc < 0) { snprintf(err_str, sizeof(err_str), "\failed to parse rte parameter\""); goto out; } } bulk_size = lnet_get_udsp_size(udsp); bulk = calloc(1, bulk_size); if (!bulk) { rc = LUSTRE_CFG_RC_OUT_OF_MEM; snprintf(err_str, sizeof(err_str), "\"out of memory\""); goto out; } udsp_bulk = bulk; LIBCFS_IOC_INIT_V2(*udsp_bulk, iou_hdr); udsp_bulk->iou_hdr.ioc_len = bulk_size; udsp_bulk->iou_bulk_size = bulk_size - sizeof(*udsp_bulk); rc = lnet_udsp_marshal(udsp, bulk, bulk_size); if (rc != LUSTRE_CFG_RC_NO_ERR) { rc = LUSTRE_CFG_RC_MARSHAL_FAIL; snprintf(err_str, sizeof(err_str), "\"failed to marshal udsp\""); goto out; } udsp_bulk->iou_bulk = bulk + sizeof(*udsp_bulk); rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_ADD_UDSP, bulk); if (rc < 0) { rc = errno; snprintf(err_str, sizeof(err_str), "\"cannot add udsp: %s\"", strerror(errno)); goto out; } rc = LUSTRE_CFG_RC_NO_ERR; out: if (bulk) free(bulk); if (udsp) lnet_udsp_free(udsp, false); cYAML_build_error(rc, seq_no, ADD_CMD, "udsp", err_str, err_rc); return rc; }