int cfs_gettok(struct cfs_lstr *next, char delim, struct cfs_lstr *res);
int cfs_str2num_check(char *str, int nob, unsigned *num,
unsigned min, unsigned max);
+int cfs_expr2str(struct list_head *list, char *str, size_t size);
int cfs_expr_list_match(__u32 value, struct cfs_expr_list *expr_list);
int cfs_expr_list_print(char *buffer, int count,
struct cfs_expr_list *expr_list);
return rc;
}
+int
+cfs_expr2str(struct list_head *list, char *str, size_t size)
+{
+ struct cfs_expr_list *expr;
+ struct cfs_range_expr *range;
+ char tmp[LNET_NIDSTR_SIZE];
+ size_t len;
+ bool first;
+ bool bracket = false;
+ char *format;
+ char *tmpc;
+
+ list_for_each_entry(expr, list, el_link) {
+ first = true;
+ list_for_each_entry(range, &expr->el_exprs, re_link) {
+ if (range->re_lo == range->re_hi) {
+ snprintf(tmp,
+ LNET_NIDSTR_SIZE,
+ "%u.", range->re_lo);
+ } else if (range->re_lo < range->re_hi) {
+ if (range->re_stride > 1) {
+ if (first)
+ format = "[%u-%u/%u,";
+ else
+ format = "%u-%u/%u,";
+ snprintf(tmp, LNET_NIDSTR_SIZE,
+ format, range->re_lo,
+ range->re_hi, range->re_stride);
+ bracket = true;
+ } else {
+ if (first)
+ format = "[%u-%u,";
+ else
+ format = "%u-%u,";
+ snprintf(tmp, LNET_NIDSTR_SIZE,
+ format, range->re_lo,
+ range->re_hi);
+ bracket = true;
+ }
+ } else {
+ return -EINVAL;
+ }
+ len = strlen(tmp);
+ size -= (len + 1);
+ if (size < 0)
+ return -ENOBUFS;
+
+ strncat(str, tmp, size + len);
+ first = false;
+ }
+ if (bracket) {
+ tmpc = str + (strlen(str) - 1);
+ size -= 1;
+ if (size < 0)
+ return -ENOBUFS;
+ *tmpc = ']';
+ *(tmpc+1) = '.';
+ bracket = false;
+ }
+ }
+
+ /*
+ * get rid of the trailing '.' at the end of the string
+ * only if we actually had something on the list passed in.
+ * otherwise we could write outside the array
+ */
+ if (!list_empty(list))
+ str[strlen(str)-1] = '\0';
+ return size;
+}
+
static int
libcfs_num_addr_range_expand(struct list_head *addrranges, __u32 *addrs,
int max_addrs)
* \retval 0 if \a str parsed to numeric address
* \retval errno otherwise
*/
-static int
+int
libcfs_num_parse(char *str, int len, struct list_head *list)
{
struct cfs_expr_list *el;
return 0;
}
+static __u32
+libcfs_net_str_len(const char *str)
+{
+ int i;
+ struct netstrfns *nf = NULL;
+
+ for (i = 0; i < libcfs_nnetstrfns; i++) {
+ nf = &libcfs_netstrfns[i];
+ if (!strncmp(str, nf->nf_name, strlen(nf->nf_name)))
+ return strlen(nf->nf_name);
+ }
+
+ return 0;
+}
+
+int
+parse_net_range(char *str, __u32 len, struct list_head *net_num,
+ __u32 *net_type)
+{
+ struct cfs_lstr next;
+ __u32 net_type_len;
+ __u32 net;
+ char *bracket;
+ char *star;
+
+ if (!str)
+ return -EINVAL;
+
+ next.ls_str = str;
+ next.ls_len = len;
+
+ net_type_len = libcfs_net_str_len(str);
+
+ if (net_type_len < len) {
+ char c = str[net_type_len];
+
+ str[net_type_len] = '\0';
+ net = libcfs_str2net(str);
+ str[net_type_len] = c;
+ } else {
+ net = libcfs_str2net(str);
+ }
+
+ if (net == LNET_NIDNET(LNET_NID_ANY))
+ return -EINVAL;
+
+ *net_type = LNET_NETTYP(net);
+
+ /*
+ * the net is either followed with an absolute number, *, or an
+ * expression enclosed in []
+ */
+ bracket = strchr(next.ls_str, '[');
+ star = strchr(next.ls_str, '*');
+
+ /* "*[" pattern not allowed */
+ if (bracket && star && star < bracket)
+ return -EINVAL;
+
+ if (!bracket) {
+ next.ls_str = str + net_type_len;
+ next.ls_len = strlen(next.ls_str);
+ } else {
+ next.ls_str = bracket;
+ next.ls_len = strlen(bracket);
+ }
+
+ /* if there is no net number just return */
+ if (next.ls_len == 0)
+ return 0;
+
+ return libcfs_num_parse(next.ls_str, next.ls_len,
+ net_num);
+}
+
+int
+parse_address(struct cfs_lstr *src, const __u32 net_type,
+ struct list_head *addr)
+{
+ int i;
+ struct netstrfns *nf = NULL;
+
+ for (i = 0; i < libcfs_nnetstrfns; i++) {
+ nf = &libcfs_netstrfns[i];
+ if (net_type == nf->nf_type)
+ return nf->nf_parse_addrlist(src->ls_str, src->ls_len,
+ addr);
+ }
+
+ return -EINVAL;
+}
+
+int
+cfs_parse_nid_parts(char *str, struct list_head *addr,
+ struct list_head *net_num, __u32 *net_type)
+{
+ struct cfs_lstr next;
+ struct cfs_lstr addrrange;
+ bool found = false;
+ int rc;
+
+ if (!str)
+ return -EINVAL;
+
+ next.ls_str = str;
+ next.ls_len = strlen(str);
+
+ rc = cfs_gettok(&next, '@', &addrrange);
+ if (!rc)
+ return -EINVAL;
+
+ if (!next.ls_str) {
+ /* only net is present */
+ next.ls_str = str;
+ next.ls_len = strlen(str);
+ } else {
+ found = true;
+ }
+
+ /* assume only net is present */
+ rc = parse_net_range(next.ls_str, next.ls_len, net_num, net_type);
+
+ /*
+ * if we successfully parsed the net range and there is no
+ * address, or if we fail to parse the net range then return
+ */
+ if ((!rc && !found) || rc)
+ return rc;
+
+ return parse_address(&addrrange, *net_type, addr);
+}
+
/**
* Frees addrrange structures of \a list.
*
return 0;
}
+static struct netstrfns *
+type2net_info(__u32 net_type)
+{
+ int i;
+
+ for (i = 0; i < libcfs_nnetstrfns; i++) {
+ if (libcfs_netstrfns[i].nf_type == net_type)
+ return &libcfs_netstrfns[i];
+ }
+
+ return NULL;
+}
+
+int
+cfs_match_net(__u32 net_id, __u32 net_type, struct list_head *net_num_list)
+{
+ __u32 net_num;
+
+ if (!net_num_list)
+ return 0;
+
+ if (net_type != LNET_NETTYP(net_id))
+ return 0;
+
+ net_num = LNET_NETNUM(net_id);
+
+ /*
+ * if there is a net number but the list passed in is empty, then
+ * there is no match.
+ */
+ if (!net_num && list_empty(net_num_list))
+ return 1;
+ else if (list_empty(net_num_list))
+ return 0;
+
+ if (!libcfs_num_match(net_num, net_num_list))
+ return 0;
+
+ return 1;
+}
+
+int
+cfs_match_nid_net(lnet_nid_t nid, __u32 net_type,
+ struct list_head *net_num_list,
+ struct list_head *addr)
+{
+ __u32 address;
+ struct netstrfns *fns;
+
+ if (!addr || !net_num_list)
+ return 0;
+
+ fns = type2net_info(LNET_NETTYP(LNET_NIDNET(nid)));
+ if (!fns || !net_num_list || !addr)
+ return 0;
+
+ address = LNET_NIDADDR(nid);
+
+ /* if either the address or net number don't match then no match */
+ if (!fns->nf_match_addr(address, addr) ||
+ !cfs_match_net(LNET_NIDNET(nid), net_type, net_num_list))
+ return 0;
+
+ return 1;
+}
/**
* Print the network part of the nidrange \a nr into the specified \a buffer.
*
api.h \
lib-lnet.h \
lib-types.h \
+ udsp.h \
socklnd.h
extern struct kmem_cache *lnet_mes_cachep; /* MEs kmem_cache */
extern struct kmem_cache *lnet_small_mds_cachep; /* <= LNET_SMALL_MD_SIZE bytes
* MDs kmem_cache */
+extern struct kmem_cache *lnet_udsp_cachep;
extern struct kmem_cache *lnet_rspt_cachep;
extern struct kmem_cache *lnet_msg_cachep;
struct lnet_ni *lnet_get_next_ni_locked(struct lnet_net *mynet,
struct lnet_ni *prev);
struct lnet_ni *lnet_get_ni_idx_locked(int idx);
+int lnet_get_net_healthv_locked(struct lnet_net *net);
extern int libcfs_ioctl_getdata(struct libcfs_ioctl_hdr **hdr_pp,
struct libcfs_ioctl_hdr __user *uparam);
struct lnet_process_id __user *ids);
extern void lnet_peer_ni_set_healthv(lnet_nid_t nid, int value, bool all);
extern void lnet_peer_ni_add_to_recoveryq_locked(struct lnet_peer_ni *lpni);
+extern int lnet_peer_add_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid);
+extern void lnet_peer_clr_pref_nids(struct lnet_peer_ni *lpni);
+extern int lnet_peer_del_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid);
+void lnet_peer_ni_set_selection_priority(struct lnet_peer_ni *lpni,
+ __u32 priority);
void lnet_router_debugfs_init(void);
void lnet_router_debugfs_fini(void);
int lnet_dyn_del_ni(struct lnet_ioctl_config_ni *conf);
int lnet_clear_lazy_portal(struct lnet_ni *ni, int portal, char *reason);
struct lnet_net *lnet_get_net_locked(__u32 net_id);
+void lnet_net_clr_pref_rtrs(struct lnet_net *net);
+int lnet_net_add_pref_rtr(struct lnet_net *net, lnet_nid_t gw_nid);
int lnet_islocalnid(lnet_nid_t nid);
int lnet_islocalnet(__u32 net);
void lnet_counters_get_common(struct lnet_counters_common *common);
int lnet_counters_get(struct lnet_counters *counters);
void lnet_counters_reset(void);
+static inline void
+lnet_ni_set_sel_priority_locked(struct lnet_ni *ni, __u32 priority)
+{
+ ni->ni_sel_priority = priority;
+}
+
+static inline void
+lnet_net_set_sel_priority_locked(struct lnet_net *net, __u32 priority)
+{
+ net->net_sel_priority = priority;
+}
unsigned int lnet_iov_nob(unsigned int niov, struct kvec *iov);
unsigned int lnet_kiov_nob(unsigned int niov, struct bio_vec *iov);
struct lnet_peer_net *lnet_peer_get_net_locked(struct lnet_peer *peer,
__u32 net_id);
bool lnet_peer_is_pref_nid_locked(struct lnet_peer_ni *lpni, lnet_nid_t nid);
+int lnet_peer_add_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid);
+void lnet_peer_clr_pref_nids(struct lnet_peer_ni *lpni);
+bool lnet_peer_is_pref_rtr_locked(struct lnet_peer_ni *lpni, lnet_nid_t gw_nid);
+void lnet_peer_clr_pref_rtrs(struct lnet_peer_ni *lpni);
+int lnet_peer_add_pref_rtr(struct lnet_peer_ni *lpni, lnet_nid_t nid);
int lnet_peer_ni_set_non_mr_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid);
int lnet_add_peer_ni(lnet_nid_t key_nid, lnet_nid_t nid, bool mr);
int lnet_del_peer_ni(lnet_nid_t key_nid, lnet_nid_t nid);
__u32 *peer_tx_qnob);
int lnet_get_peer_ni_hstats(struct lnet_ioctl_peer_ni_hstats *stats);
+static inline void
+lnet_peer_net_set_sel_priority_locked(struct lnet_peer_net *lpn, __u32 priority)
+{
+ lpn->lpn_sel_priority = priority;
+}
+
+
static inline struct lnet_peer_net *
lnet_find_peer_net_locked(struct lnet_peer *peer, __u32 net_id)
{
lnet_atomic_add_unless_max(healthv, value, LNET_MAX_HEALTH_VALUE);
}
+static inline int
+lnet_get_list_len(struct list_head *list)
+{
+ struct list_head *l;
+ int count = 0;
+
+ list_for_each(l, list)
+ count++;
+
+ return count;
+}
+
void lnet_incr_stats(struct lnet_element_stats *stats,
enum lnet_msg_type msg_type,
enum lnet_stats_type stats_type);
* All local and peer NIs created have their health default to this value.
*/
#define LNET_MAX_HEALTH_VALUE 1000
+#define LNET_MAX_SELECTION_PRIORITY UINT_MAX
/* forward refs */
struct lnet_libmd;
* lnet/include/lnet/nidstr.h */
__u32 net_id;
- /* priority of the network */
- __u32 net_prio;
+ /* round robin selection */
+ __u32 net_seq;
/* total number of CPTs in the array */
__u32 net_ncpts;
/* cumulative CPTs of all NIs in this net */
__u32 *net_cpts;
+ /* relative net selection priority */
+ __u32 net_sel_priority;
+
/* network tunables */
struct lnet_ioctl_config_lnd_cmn_tunables net_tunables;
/* protects access to net_last_alive */
spinlock_t net_lock;
+
+ /* list of router nids preferred for this network */
+ struct list_head net_rtr_pref_nids;
};
struct lnet_ni {
*/
atomic_t ni_fatal_error_on;
+ /* the relative selection priority of this NI */
+ __u32 ni_sel_priority;
+
/*
* equivalent interfaces to use
* This is an array because socklnd bonding can still be configured
#define LNET_PING_INFO_TO_BUFFER(PINFO) \
container_of((PINFO), struct lnet_ping_buffer, pb_info)
+struct lnet_nid_list {
+ struct list_head nl_list;
+ lnet_nid_t nl_nid;
+};
+
struct lnet_peer_ni {
/* chain on lpn_peer_nis */
struct list_head lpni_peer_nis;
/* preferred local nids: if only one, use lpni_pref.nid */
union lpni_pref {
lnet_nid_t nid;
- lnet_nid_t *nids;
+ struct list_head nids;
} lpni_pref;
+ /* list of router nids preferred for this peer NI */
+ struct list_head lpni_rtr_pref_nids;
+ /* The relative selection priority of this peer NI */
+ __u32 lpni_sel_priority;
/* number of preferred NIDs in lnpi_pref_nids */
__u32 lpni_pref_nnids;
};
/* selection sequence number */
__u32 lpn_seq;
+ /* relative peer net selection priority */
+ __u32 lpn_sel_priority;
+
/* reference count */
atomic_t lpn_refcount;
};
void **msc_resenders;
};
+/* This UDSP structures need to match the user space liblnetconfig structures
+ * in order for the marshall and unmarshall functions to be common.
+ */
+
+/* Net is described as a
+ * 1. net type
+ * 2. num range
+ */
+struct lnet_ud_net_descr {
+ __u32 udn_net_type;
+ struct list_head udn_net_num_range;
+};
+
+/* each NID range is defined as
+ * 1. net descriptor
+ * 2. address range descriptor
+ */
+struct lnet_ud_nid_descr {
+ struct lnet_ud_net_descr ud_net_id;
+ struct list_head ud_addr_range;
+ __u32 ud_mem_size;
+};
+
+/* a UDSP rule can have up to three user defined NID descriptors
+ * - src: defines the local NID range for the rule
+ * - dst: defines the peer NID range for the rule
+ * - rte: defines the router NID range for the rule
+ *
+ * An action union defines the action to take when the rule
+ * is matched
+ */
+struct lnet_udsp {
+ struct list_head udsp_on_list;
+ __u32 udsp_idx;
+ struct lnet_ud_nid_descr udsp_src;
+ struct lnet_ud_nid_descr udsp_dst;
+ struct lnet_ud_nid_descr udsp_rte;
+ enum lnet_udsp_action_type udsp_action_type;
+ union {
+ __u32 udsp_priority;
+ } udsp_action;
+};
+
/* Peer Discovery states */
#define LNET_DC_STATE_SHUTDOWN 0 /* not started */
#define LNET_DC_STATE_RUNNING 1 /* started up OK */
* work loops
*/
struct completion ln_started;
+ /* UDSP list */
+ struct list_head ln_udsp_list;
};
#endif
--- /dev/null
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright (c) 2011, 2017, Intel Corporation.
+ *
+ * Copyright (c) 2018-2020 Data Direct Networks.
+ *
+ * This file is part of Lustre, https://wiki.whamcloud.com/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * Author: Amir Shehata
+ */
+
+#ifndef UDSP_H
+#define UDSP_H
+
+#include <lnet/lib-lnet.h>
+
+/**
+ * lnet_udsp_add_policy
+ * Add a policy \new in position \idx
+ * Must be called with api_mutex held
+ */
+int lnet_udsp_add_policy(struct lnet_udsp *new, int idx);
+
+/**
+ * lnet_udsp_get_policy
+ * get a policy in position \idx
+ * Must be called with api_mutex held
+ */
+struct lnet_udsp *lnet_udsp_get_policy(int idx);
+
+/**
+ * lnet_udsp_del_policy
+ * Delete a policy from position \idx
+ * Must be called with api_mutex held
+ */
+int lnet_udsp_del_policy(int idx);
+
+/**
+ * lnet_udsp_apply_policies
+ * apply all stored policies across the system
+ * Must be called with api_mutex held
+ * Must NOT be called with lnet_net_lock held
+ * udsp: NULL to apply on all existing udsps
+ * non-NULL to apply to specified udsp
+ * revert: true to revert policy application
+ */
+int lnet_udsp_apply_policies(struct lnet_udsp *udsp, bool revert);
+
+/**
+ * lnet_udsp_apply_policies_on_lpni
+ * apply all stored policies on specified \lpni
+ * Must be called with api_mutex held
+ * Must be called with LNET_LOCK_EX
+ */
+int lnet_udsp_apply_policies_on_lpni(struct lnet_peer_ni *lpni);
+
+/**
+ * lnet_udsp_apply_policies_on_lpn
+ * Must be called with api_mutex held
+ * apply all stored policies on specified \lpn
+ * Must be called with LNET_LOCK_EX
+ */
+int lnet_udsp_apply_policies_on_lpn(struct lnet_peer_net *lpn);
+
+/**
+ * lnet_udsp_apply_policies_on_ni
+ * apply all stored policies on specified \ni
+ * Must be called with api_mutex held
+ * Must be called with LNET_LOCK_EX
+ */
+int lnet_udsp_apply_policies_on_ni(struct lnet_ni *ni);
+
+/**
+ * lnet_udsp_apply_policies_on_net
+ * apply all stored policies on specified \net
+ * Must be called with api_mutex held
+ * Must be called with LNET_LOCK_EX
+ */
+int lnet_udsp_apply_policies_on_net(struct lnet_net *net);
+
+/**
+ * lnet_udsp_alloc
+ * Allocates a UDSP block and initializes it.
+ * Return NULL if allocation fails
+ * pointer to UDSP otherwise.
+ */
+struct lnet_udsp *lnet_udsp_alloc(void);
+
+/**
+ * lnet_udsp_free
+ * Free a UDSP and all its descriptors
+ */
+void lnet_udsp_free(struct lnet_udsp *udsp);
+
+/**
+ * lnet_udsp_destroy
+ * Free all the UDSPs
+ * force: true to indicate shutdown in progress
+ */
+void lnet_udsp_destroy(bool shutdown);
+
+/**
+ * lnet_get_udsp_size
+ * Return the size needed to store the marshalled UDSP
+ */
+size_t lnet_get_udsp_size(struct lnet_udsp *udsp);
+
+/**
+ * lnet_udsp_marshal
+ * Marshal the udsp into the bulk memory provided.
+ * Return success/failure.
+ */
+int lnet_udsp_marshal(struct lnet_udsp *udsp,
+ struct lnet_ioctl_udsp *ioc_udsp);
+/**
+ * lnet_udsp_demarshal_add
+ * Given a bulk containing a single UDSP,
+ * demarshal and populate a udsp structure then add policy
+ */
+int lnet_udsp_demarshal_add(void *bulk, __u32 bulk_size);
+
+/**
+ * lnet_udsp_get_construct_info
+ * get information of how the UDSP policies impacted the given
+ * construct.
+ */
+void lnet_udsp_get_construct_info(struct lnet_ioctl_construct_udsp_info *info);
+
+#endif /* UDSP_H */
#define IOC_LIBCFS_SET_HEALHV _IOWR(IOC_LIBCFS_TYPE, 102, IOCTL_CONFIG_SIZE)
#define IOC_LIBCFS_GET_LOCAL_HSTATS _IOWR(IOC_LIBCFS_TYPE, 103, IOCTL_CONFIG_SIZE)
#define IOC_LIBCFS_GET_RECOVERY_QUEUE _IOWR(IOC_LIBCFS_TYPE, 104, IOCTL_CONFIG_SIZE)
-#define IOC_LIBCFS_MAX_NR 104
+#define IOC_LIBCFS_ADD_UDSP _IOWR(IOC_LIBCFS_TYPE, 105, IOCTL_CONFIG_SIZE)
+#define IOC_LIBCFS_DEL_UDSP _IOWR(IOC_LIBCFS_TYPE, 106, IOCTL_CONFIG_SIZE)
+#define IOC_LIBCFS_GET_UDSP_SIZE _IOWR(IOC_LIBCFS_TYPE, 107, IOCTL_CONFIG_SIZE)
+#define IOC_LIBCFS_GET_UDSP _IOWR(IOC_LIBCFS_TYPE, 108, IOCTL_CONFIG_SIZE)
+#define IOC_LIBCFS_GET_CONST_UDSP_INFO _IOWR(IOC_LIBCFS_TYPE, 109, IOCTL_CONFIG_SIZE)
+#define IOC_LIBCFS_MAX_NR 109
extern int libcfs_ioctl_data_adjust(struct libcfs_ioctl_data *data);
struct lnet_counters st_cntrs;
};
+/* An IP, numeric NID or a Net number is composed of 1 or more of these
+ * descriptor structures.
+ */
+struct lnet_range_expr {
+ __u32 re_lo;
+ __u32 re_hi;
+ __u32 re_stride;
+};
+
+/* le_count identifies the number of lnet_range_expr in the bulk
+ * which follows
+ */
+struct lnet_expressions {
+ __u32 le_count;
+};
+
+/* A net descriptor has the net type, IE: O2IBLND, SOCKLND, etc and an
+ * expression describing a net number range.
+ */
+struct lnet_ioctl_udsp_net_descr {
+ __u32 ud_net_type;
+ struct lnet_expressions ud_net_num_expr;
+};
+
+/* The UDSP descriptor header contains the type of matching criteria, SRC,
+ * DST, RTE, etc and how many lnet_expressions compose the LNet portion of
+ * the LNet NID. For example an IP can be
+ * composed of 4 lnet_expressions , a gni can be composed of 1
+ */
+struct lnet_ioctl_udsp_descr_hdr {
+ /* The literals SRC, DST and RTE are encoded
+ * here.
+ */
+ __u32 ud_descr_type;
+ __u32 ud_descr_count;
+};
+
+/* each matching expression in the UDSP is described with this.
+ * The bulk format is as follows:
+ * 1. 1x struct lnet_ioctl_udsp_net_descr
+ * -> the net part of the NID
+ * 2. >=0 struct lnet_expressions
+ * -> the address part of the NID
+ */
+struct lnet_ioctl_udsp_descr {
+ struct lnet_ioctl_udsp_descr_hdr iud_src_hdr;
+ struct lnet_ioctl_udsp_net_descr iud_net;
+};
+
+/* The cumulative UDSP descriptor
+ * The bulk format is as follows:
+ * 1. >=1 struct lnet_ioctl_udsp_descr
+ *
+ * The size indicated in iou_hdr is the total size of the UDSP.
+ *
+ */
+struct lnet_ioctl_udsp {
+ struct libcfs_ioctl_hdr iou_hdr;
+ __s32 iou_idx;
+ __u32 iou_action_type;
+ __u32 iou_bulk_size;
+ union {
+ __u32 priority;
+ } iou_action;
+ void __user *iou_bulk;
+};
+
+/* structure used to request udsp instantiation information on the
+ * specified construct.
+ * cud_nid: the NID of the local or remote NI to pull info on.
+ * cud_nid_priority: NID prio of the requested NID.
+ * cud_net_priority: net prio of network of the requested NID.
+ * cud_pref_nid: array of preferred NIDs if it exists.
+ */
+struct lnet_ioctl_construct_udsp_info {
+ struct libcfs_ioctl_hdr cud_hdr;
+ __u32 cud_peer:1;
+ lnet_nid_t cud_nid;
+ __u32 cud_nid_priority;
+ __u32 cud_net_priority;
+ lnet_nid_t cud_pref_nid[LNET_MAX_SHOW_NUM_NID];
+ lnet_nid_t cud_pref_rtr_nid[LNET_MAX_SHOW_NUM_NID];
+};
+
#endif /* _LNET_DLC_H_ */
/** Request that no acknowledgment should be generated. */
LNET_NOACK_REQ
};
+
+/**
+ * UDSP action types. There are two available actions:
+ * 1. PRIORITY - set priority of matching LNet constructs
+ * 2. PREFERRED LIST - set preferred list of matching LNet constructs
+ */
+enum lnet_udsp_action_type {
+ EN_LNET_UDSP_ACTION_NONE = 0,
+ /** assign a priority to matching constructs */
+ EN_LNET_UDSP_ACTION_PRIORITY = 1,
+ /** assign a preferred list of NIDs to matching constructs */
+ EN_LNET_UDSP_ACTION_PREFERRED_LIST = 2,
+};
+
/** @} lnet_data */
/** @} lnet */
__u32 libcfs_str2net(const char *str);
lnet_nid_t libcfs_str2nid(const char *str);
int libcfs_str2anynid(lnet_nid_t *nid, const char *str);
+int libcfs_num_parse(char *str, int len, struct list_head *list);
char *libcfs_id2str(struct lnet_process_id id);
void cfs_free_nidlist(struct list_head *list);
int cfs_parse_nidlist(char *str, int len, struct list_head *list);
+int cfs_parse_nid_parts(char *str, struct list_head *addr,
+ struct list_head *net_num, __u32 *net_type);
int cfs_print_nidlist(char *buffer, int count, struct list_head *list);
int cfs_match_nid(lnet_nid_t nid, struct list_head *list);
int cfs_expand_nidlist(struct list_head *nidlist, lnet_nid_t *lnet_nidlist,
int max_nids);
+int cfs_match_nid_net(lnet_nid_t nid, __u32 net, struct list_head *net_num_list,
+ struct list_head *addr);
+int cfs_match_net(__u32 net_id, __u32 net_type,
+ struct list_head *net_num_list);
+
int cfs_ip_addr_parse(char *str, int len, struct list_head *list);
int cfs_ip_addr_match(__u32 addr, struct list_head *list);
int cfs_nidrange_find_min_max(struct list_head *nidlist, char *min_nid,
char *max_nid, __kernel_size_t nidstr_length);
+void cfs_expr_list_free_list(struct list_head *list);
struct netstrfns {
__u32 nf_type;
lnet-objs := api-ni.o config.o nidstrings.o
lnet-objs += lib-me.o lib-msg.o lib-md.o lib-ptl.o
lnet-objs += lib-socket.o lib-move.o module.o lo.o
-lnet-objs += router.o router_proc.o acceptor.o peer.o net_fault.o
+lnet-objs += router.o router_proc.o acceptor.o peer.o net_fault.o udsp.o
default: all
#include <linux/sched/signal.h>
#endif
+#include <lnet/udsp.h>
#include <lnet/lib-lnet.h>
#define D_LNI D_CONSOLE
struct kmem_cache *lnet_mes_cachep; /* MEs kmem_cache */
struct kmem_cache *lnet_small_mds_cachep; /* <= LNET_SMALL_MD_SIZE bytes
* MDs kmem_cache */
+struct kmem_cache *lnet_udsp_cachep; /* udsp cache */
struct kmem_cache *lnet_rspt_cachep; /* response tracker cache */
struct kmem_cache *lnet_msg_cachep;
if (!lnet_small_mds_cachep)
return -ENOMEM;
+ lnet_udsp_cachep = kmem_cache_create("lnet_udsp",
+ sizeof(struct lnet_udsp),
+ 0, 0, NULL);
+ if (!lnet_udsp_cachep)
+ return -ENOMEM;
+
lnet_rspt_cachep = kmem_cache_create("lnet_rspt", sizeof(struct lnet_rsp_tracker),
0, 0, NULL);
if (!lnet_rspt_cachep)
lnet_msg_cachep = NULL;
}
-
if (lnet_rspt_cachep) {
kmem_cache_destroy(lnet_rspt_cachep);
lnet_rspt_cachep = NULL;
}
+ if (lnet_udsp_cachep) {
+ kmem_cache_destroy(lnet_udsp_cachep);
+ lnet_udsp_cachep = NULL;
+ }
+
if (lnet_small_mds_cachep) {
kmem_cache_destroy(lnet_small_mds_cachep);
lnet_small_mds_cachep = NULL;
INIT_LIST_HEAD(&the_lnet.ln_dc_expired);
INIT_LIST_HEAD(&the_lnet.ln_mt_localNIRecovq);
INIT_LIST_HEAD(&the_lnet.ln_mt_peerNIRecovq);
+ INIT_LIST_HEAD(&the_lnet.ln_udsp_list);
init_waitqueue_head(&the_lnet.ln_dc_waitq);
the_lnet.ln_mt_handler = NULL;
init_completion(&the_lnet.ln_started);
the_lnet.ln_counters = NULL;
}
lnet_destroy_remote_nets_table();
+ lnet_udsp_destroy(true);
lnet_slab_cleanup();
return 0;
return NULL;
}
+void
+lnet_net_clr_pref_rtrs(struct lnet_net *net)
+{
+ struct list_head zombies;
+ struct lnet_nid_list *ne;
+ struct lnet_nid_list *tmp;
+
+ INIT_LIST_HEAD(&zombies);
+
+ lnet_net_lock(LNET_LOCK_EX);
+ list_splice_init(&net->net_rtr_pref_nids, &zombies);
+ lnet_net_unlock(LNET_LOCK_EX);
+
+ list_for_each_entry_safe(ne, tmp, &zombies, nl_list) {
+ list_del_init(&ne->nl_list);
+ LIBCFS_FREE(ne, sizeof(*ne));
+ }
+}
+
+int
+lnet_net_add_pref_rtr(struct lnet_net *net,
+ lnet_nid_t gw_nid)
+__must_hold(&the_lnet.ln_api_mutex)
+{
+ struct lnet_nid_list *ne;
+
+ /* This function is called with api_mutex held. When the api_mutex
+ * is held the list can not be modified, as it is only modified as
+ * a result of applying a UDSP and that happens under api_mutex
+ * lock.
+ */
+ list_for_each_entry(ne, &net->net_rtr_pref_nids, nl_list) {
+ if (ne->nl_nid == gw_nid)
+ return -EEXIST;
+ }
+
+ LIBCFS_ALLOC(ne, sizeof(*ne));
+ if (!ne)
+ return -ENOMEM;
+
+ ne->nl_nid = gw_nid;
+
+ /* Lock the cpt to protect against addition and checks in the
+ * selection algorithm
+ */
+ lnet_net_lock(LNET_LOCK_EX);
+ list_add(&ne->nl_list, &net->net_rtr_pref_nids);
+ lnet_net_unlock(LNET_LOCK_EX);
+
+ return 0;
+}
+
+bool
+lnet_net_is_pref_rtr_locked(struct lnet_net *net, lnet_nid_t rtr_nid)
+{
+ struct lnet_nid_list *ne;
+
+ CDEBUG(D_NET, "%s: rtr pref emtpy: %d\n",
+ libcfs_net2str(net->net_id),
+ list_empty(&net->net_rtr_pref_nids));
+
+ if (list_empty(&net->net_rtr_pref_nids))
+ return false;
+
+ list_for_each_entry(ne, &net->net_rtr_pref_nids, nl_list) {
+ CDEBUG(D_NET, "Comparing pref %s with gw %s\n",
+ libcfs_nid2str(ne->nl_nid),
+ libcfs_nid2str(rtr_nid));
+ if (rtr_nid == ne->nl_nid)
+ return true;
+ }
+
+ return false;
+}
+
unsigned int
lnet_nid_cpt_hash(lnet_nid_t nid, unsigned int number)
{
return NULL;
}
+int lnet_get_net_healthv_locked(struct lnet_net *net)
+{
+ struct lnet_ni *ni;
+ int best_healthv = 0;
+ int healthv;
+
+ list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+ healthv = atomic_read(&ni->ni_healthv);
+ if (healthv > best_healthv)
+ best_healthv = healthv;
+ }
+
+ return best_healthv;
+}
+
struct lnet_ni *
lnet_get_next_ni_locked(struct lnet_net *mynet, struct lnet_ni *prev)
{
static int lnet_add_net_common(struct lnet_net *net,
struct lnet_ioctl_config_lnd_tunables *tun)
{
- __u32 net_id;
+ struct lnet_handle_md ping_mdh;
struct lnet_ping_buffer *pbuf;
- struct lnet_handle_md ping_mdh;
- int rc;
struct lnet_remotenet *rnet;
- int net_ni_count;
+ struct lnet_ni *ni;
+ int net_ni_count;
+ __u32 net_id;
+ int rc;
lnet_net_lock(LNET_LOCK_EX);
rnet = lnet_find_rnet_locked(net->net_id);
lnet_net_lock(LNET_LOCK_EX);
net = lnet_get_net_locked(net_id);
- lnet_net_unlock(LNET_LOCK_EX);
-
LASSERT(net);
+ /* apply the UDSPs */
+ rc = lnet_udsp_apply_policies_on_net(net);
+ if (rc)
+ CERROR("Failed to apply UDSPs on local net %s\n",
+ libcfs_net2str(net->net_id));
+
+ /* At this point we lost track of which NI was just added, so we
+ * just re-apply the policies on all of the NIs on this net
+ */
+ list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+ rc = lnet_udsp_apply_policies_on_ni(ni);
+ if (rc)
+ CERROR("Failed to apply UDSPs on ni %s\n",
+ libcfs_nid2str(ni->ni_nid));
+ }
+ lnet_net_unlock(LNET_LOCK_EX);
+
/*
* Start the acceptor thread if this is the first network
* being added that requires the thread.
return 0;
}
+ case IOC_LIBCFS_ADD_UDSP: {
+ struct lnet_ioctl_udsp *ioc_udsp = arg;
+ __u32 bulk_size = ioc_udsp->iou_hdr.ioc_len;
+
+ mutex_lock(&the_lnet.ln_api_mutex);
+ rc = lnet_udsp_demarshal_add(arg, bulk_size);
+ if (!rc) {
+ rc = lnet_udsp_apply_policies(NULL, false);
+ CDEBUG(D_NET, "policy application returned %d\n", rc);
+ rc = 0;
+ }
+ mutex_unlock(&the_lnet.ln_api_mutex);
+
+ return rc;
+ }
+
+ case IOC_LIBCFS_DEL_UDSP: {
+ struct lnet_ioctl_udsp *ioc_udsp = arg;
+ int idx = ioc_udsp->iou_idx;
+
+ if (ioc_udsp->iou_hdr.ioc_len < sizeof(*ioc_udsp))
+ return -EINVAL;
+
+ mutex_lock(&the_lnet.ln_api_mutex);
+ rc = lnet_udsp_del_policy(idx);
+ if (!rc) {
+ rc = lnet_udsp_apply_policies(NULL, false);
+ CDEBUG(D_NET, "policy re-application returned %d\n",
+ rc);
+ rc = 0;
+ }
+ mutex_unlock(&the_lnet.ln_api_mutex);
+
+ return rc;
+ }
+
+ case IOC_LIBCFS_GET_UDSP_SIZE: {
+ struct lnet_ioctl_udsp *ioc_udsp = arg;
+ struct lnet_udsp *udsp;
+
+ if (ioc_udsp->iou_hdr.ioc_len < sizeof(*ioc_udsp))
+ return -EINVAL;
+
+ rc = 0;
+
+ mutex_lock(&the_lnet.ln_api_mutex);
+ udsp = lnet_udsp_get_policy(ioc_udsp->iou_idx);
+ if (!udsp) {
+ rc = -ENOENT;
+ } else {
+ /* coming in iou_idx will hold the idx of the udsp
+ * to get the size of. going out the iou_idx will
+ * hold the size of the UDSP found at the passed
+ * in index.
+ */
+ ioc_udsp->iou_idx = lnet_get_udsp_size(udsp);
+ if (ioc_udsp->iou_idx < 0)
+ rc = -EINVAL;
+ }
+ mutex_unlock(&the_lnet.ln_api_mutex);
+
+ return rc;
+ }
+
+ case IOC_LIBCFS_GET_UDSP: {
+ struct lnet_ioctl_udsp *ioc_udsp = arg;
+ struct lnet_udsp *udsp;
+
+ if (ioc_udsp->iou_hdr.ioc_len < sizeof(*ioc_udsp))
+ return -EINVAL;
+
+ rc = 0;
+
+ mutex_lock(&the_lnet.ln_api_mutex);
+ udsp = lnet_udsp_get_policy(ioc_udsp->iou_idx);
+ if (!udsp)
+ rc = -ENOENT;
+ else
+ rc = lnet_udsp_marshal(udsp, ioc_udsp);
+ mutex_unlock(&the_lnet.ln_api_mutex);
+
+ return rc;
+ }
+
+ case IOC_LIBCFS_GET_CONST_UDSP_INFO: {
+ struct lnet_ioctl_construct_udsp_info *info = arg;
+
+ if (info->cud_hdr.ioc_len < sizeof(*info))
+ return -EINVAL;
+
+ CDEBUG(D_NET, "GET_UDSP_INFO for %s\n",
+ libcfs_nid2str(info->cud_nid));
+
+ mutex_lock(&the_lnet.ln_api_mutex);
+ lnet_udsp_get_construct_info(info);
+ mutex_unlock(&the_lnet.ln_api_mutex);
+
+ return 0;
+ }
+
default:
ni = lnet_net2ni_addref(data->ioc_net);
if (ni == NULL)
INIT_LIST_HEAD(&net->net_ni_list);
INIT_LIST_HEAD(&net->net_ni_added);
INIT_LIST_HEAD(&net->net_ni_zombie);
+ INIT_LIST_HEAD(&net->net_rtr_pref_nids);
spin_lock_init(&net->net_lock);
net->net_id = net_id;
net->net_last_alive = ktime_get_real_seconds();
+ net->net_sel_priority = LNET_MAX_SELECTION_PRIORITY;
+
/* initialize global paramters to undefiend */
net->net_tunables.lct_peer_timeout = -1;
net->net_tunables.lct_max_tx_credits = -1;
ni->ni_net_ns = get_net(&init_net);
ni->ni_state = LNET_NI_STATE_INIT;
+ ni->ni_sel_priority = LNET_MAX_SELECTION_PRIORITY;
list_add_tail(&ni->ni_netlist, &net->net_ni_added);
/*
}
}
-static int
-lnet_compare_gw_lpnis(struct lnet_peer_ni *p1, struct lnet_peer_ni *p2)
-{
- if (p1->lpni_txqnob < p2->lpni_txqnob)
- return 1;
-
- if (p1->lpni_txqnob > p2->lpni_txqnob)
- return -1;
-
- if (p1->lpni_txcredits > p2->lpni_txcredits)
- return 1;
-
- if (p1->lpni_txcredits < p2->lpni_txcredits)
- return -1;
-
- return 0;
-}
-
static struct lnet_peer_ni *
lnet_select_peer_ni(struct lnet_ni *best_ni, lnet_nid_t dst_nid,
struct lnet_peer *peer,
INT_MIN;
int best_lpni_healthv = (best_lpni) ?
atomic_read(&best_lpni->lpni_healthv) : 0;
- bool preferred = false;
- bool ni_is_pref;
+ bool best_lpni_is_preferred = false;
+ bool lpni_is_preferred;
int lpni_healthv;
+ __u32 lpni_sel_prio;
+ __u32 best_sel_prio = LNET_MAX_SELECTION_PRIORITY;
while ((lpni = lnet_get_next_peer_ni_locked(peer, peer_net, lpni))) {
/*
* preferred, then let's use it
*/
if (best_ni) {
- ni_is_pref = lnet_peer_is_pref_nid_locked(lpni,
+ lpni_is_preferred = lnet_peer_is_pref_nid_locked(lpni,
best_ni->ni_nid);
- CDEBUG(D_NET, "%s ni_is_pref = %d\n",
- libcfs_nid2str(best_ni->ni_nid), ni_is_pref);
+ CDEBUG(D_NET, "%s lpni_is_preferred = %d\n",
+ libcfs_nid2str(best_ni->ni_nid),
+ lpni_is_preferred);
} else {
- ni_is_pref = false;
+ lpni_is_preferred = false;
}
lpni_healthv = atomic_read(&lpni->lpni_healthv);
+ lpni_sel_prio = lpni->lpni_sel_priority;
if (best_lpni)
- CDEBUG(D_NET, "%s c:[%d, %d], s:[%d, %d]\n",
+ CDEBUG(D_NET, "n:[%s, %s] h:[%d, %d] p:[%d, %d] c:[%d, %d] s:[%d, %d]\n",
libcfs_nid2str(lpni->lpni_nid),
+ libcfs_nid2str(best_lpni->lpni_nid),
+ lpni_healthv, best_lpni_healthv,
+ lpni_sel_prio, best_sel_prio,
lpni->lpni_txcredits, best_lpni_credits,
lpni->lpni_seq, best_lpni->lpni_seq);
+ else
+ goto select_lpni;
/* pick the healthiest peer ni */
- if (lpni_healthv < best_lpni_healthv) {
+ if (lpni_healthv < best_lpni_healthv)
continue;
- } else if (lpni_healthv > best_lpni_healthv) {
- best_lpni_healthv = lpni_healthv;
+ else if (lpni_healthv > best_lpni_healthv) {
+ if (best_lpni_is_preferred)
+ best_lpni_is_preferred = false;
+ goto select_lpni;
+ }
+
+ if (lpni_sel_prio > best_sel_prio)
+ continue;
+ else if (lpni_sel_prio < best_sel_prio) {
+ if (best_lpni_is_preferred)
+ best_lpni_is_preferred = false;
+ goto select_lpni;
+ }
+
/* if this is a preferred peer use it */
- } else if (!preferred && ni_is_pref) {
- preferred = true;
- } else if (preferred && !ni_is_pref) {
- /*
- * this is not the preferred peer so let's ignore
+ if (!best_lpni_is_preferred && lpni_is_preferred) {
+ best_lpni_is_preferred = true;
+ goto select_lpni;
+ } else if (best_lpni_is_preferred && !lpni_is_preferred) {
+ /* this is not the preferred peer so let's ignore
* it.
*/
continue;
- } else if (lpni->lpni_txcredits < best_lpni_credits) {
- /*
- * We already have a peer that has more credits
+ }
+
+ if (lpni->lpni_txcredits < best_lpni_credits)
+ /* We already have a peer that has more credits
* available than this one. No need to consider
* this peer further.
*/
continue;
- } else if (lpni->lpni_txcredits == best_lpni_credits) {
- /*
- * The best peer found so far and the current peer
- * have the same number of available credits let's
- * make sure to select between them using Round
- * Robin
- */
- if (best_lpni) {
- if (best_lpni->lpni_seq <= lpni->lpni_seq)
- continue;
- }
- }
+ else if (lpni->lpni_txcredits > best_lpni_credits)
+ goto select_lpni;
+ /* The best peer found so far and the current peer
+ * have the same number of available credits let's
+ * make sure to select between them using Round Robin
+ */
+ if (best_lpni && (best_lpni->lpni_seq <= lpni->lpni_seq))
+ continue;
+select_lpni:
+ best_lpni_is_preferred = lpni_is_preferred;
+ best_lpni_healthv = lpni_healthv;
+ best_sel_prio = lpni_sel_prio;
best_lpni = lpni;
best_lpni_credits = lpni->lpni_txcredits;
}
return NULL;
}
+static int
+lnet_compare_gw_lpnis(struct lnet_peer_ni *lpni1, struct lnet_peer_ni *lpni2)
+{
+ if (lpni1->lpni_txqnob < lpni2->lpni_txqnob)
+ return 1;
+
+ if (lpni1->lpni_txqnob > lpni2->lpni_txqnob)
+ return -1;
+
+ if (lpni1->lpni_txcredits > lpni2->lpni_txcredits)
+ return 1;
+
+ if (lpni1->lpni_txcredits < lpni2->lpni_txcredits)
+ return -1;
+
+ return 0;
+}
+
/* Compare route priorities and hop counts */
static int
lnet_compare_routes(struct lnet_route *r1, struct lnet_route *r2)
static struct lnet_route *
lnet_find_route_locked(struct lnet_remotenet *rnet, __u32 src_net,
+ struct lnet_peer_ni *remote_lpni,
struct lnet_route **prev_route,
struct lnet_peer_ni **gwni)
{
struct lnet_route *last_route;
struct lnet_route *route;
int rc;
+ bool best_rte_is_preferred = false;
+ lnet_nid_t gw_pnid;
CDEBUG(D_NET, "Looking up a route to %s, from %s\n",
libcfs_net2str(rnet->lrn_net), libcfs_net2str(src_net));
list_for_each_entry(route, &rnet->lrn_routes, lr_list) {
if (!lnet_is_route_alive(route))
continue;
+ gw_pnid = route->lr_gateway->lp_primary_nid;
- /*
- * Restrict the selection of the router NI on the src_net
- * provided. If the src_net is LNET_NID_ANY, then select
- * the best interface available.
+ /* no protection on below fields, but it's harmless */
+ if (last_route && (last_route->lr_seq - route->lr_seq < 0))
+ last_route = route;
+
+ /* if the best route found is in the preferred list then
+ * tag it as preferred and use it later on. But if we
+ * didn't find any routes which are on the preferred list
+ * then just use the best route possible.
*/
- if (!best_route) {
+ rc = lnet_peer_is_pref_rtr_locked(remote_lpni, gw_pnid);
+
+ if (!best_route || (rc && !best_rte_is_preferred)) {
+ /* Restrict the selection of the router NI on the
+ * src_net provided. If the src_net is LNET_NID_ANY,
+ * then select the best interface available.
+ */
lpni = lnet_find_best_lpni(NULL, LNET_NID_ANY,
route->lr_gateway,
src_net);
- if (lpni) {
- best_route = last_route = route;
- best_gw_ni = lpni;
- } else {
- CDEBUG(D_NET, "Gateway %s does not have a peer NI on net %s\n",
- libcfs_nid2str(route->lr_gateway->lp_primary_nid),
+ if (!lpni) {
+ CDEBUG(D_NET,
+ "Gateway %s does not have a peer NI on net %s\n",
+ libcfs_nid2str(gw_pnid),
libcfs_net2str(src_net));
+ continue;
}
+ }
+
+ if (rc && !best_rte_is_preferred) {
+ /* This is the first preferred route we found,
+ * so it beats any route found previously
+ */
+ best_route = route;
+ if (!last_route)
+ last_route = route;
+ best_gw_ni = lpni;
+ best_rte_is_preferred = true;
+ CDEBUG(D_NET, "preferred gw = %s\n",
+ libcfs_nid2str(gw_pnid));
+ continue;
+ } else if ((!rc) && best_rte_is_preferred)
+ /* The best route we found so far is in the preferred
+ * list, so it beats any non-preferred route
+ */
+ continue;
+ if (!best_route) {
+ best_route = last_route = route;
+ best_gw_ni = lpni;
continue;
}
- /* no protection on below fields, but it's harmless */
- if (last_route->lr_seq - route->lr_seq < 0)
- last_route = route;
-
rc = lnet_compare_routes(route, best_route);
if (rc == -1)
continue;
+ /* Restrict the selection of the router NI on the
+ * src_net provided. If the src_net is LNET_NID_ANY,
+ * then select the best interface available.
+ */
lpni = lnet_find_best_lpni(NULL, LNET_NID_ANY,
route->lr_gateway,
src_net);
- /* restrict the lpni on the src_net if specified */
if (!lpni) {
- CDEBUG(D_NET, "Gateway %s does not have a peer NI on net %s\n",
- libcfs_nid2str(route->lr_gateway->lp_primary_nid),
+ CDEBUG(D_NET,
+ "Gateway %s does not have a peer NI on net %s\n",
+ libcfs_nid2str(gw_pnid),
libcfs_net2str(src_net));
continue;
}
unsigned int shortest_distance;
int best_credits;
int best_healthv;
+ __u32 best_sel_prio;
/*
* If there is no peer_ni that we can send to on this network,
return best_ni;
if (best_ni == NULL) {
+ best_sel_prio = LNET_MAX_SELECTION_PRIORITY;
shortest_distance = UINT_MAX;
best_credits = INT_MIN;
best_healthv = 0;
best_ni->ni_dev_cpt);
best_credits = atomic_read(&best_ni->ni_tx_credits);
best_healthv = atomic_read(&best_ni->ni_healthv);
+ best_sel_prio = best_ni->ni_sel_priority;
}
while ((ni = lnet_get_next_ni_locked(local_net, ni))) {
int ni_credits;
int ni_healthv;
int ni_fatal;
+ __u32 ni_sel_prio;
ni_credits = atomic_read(&ni->ni_tx_credits);
ni_healthv = atomic_read(&ni->ni_healthv);
ni_fatal = atomic_read(&ni->ni_fatal_error_on);
+ ni_sel_prio = ni->ni_sel_priority;
/*
* calculate the distance from the CPT on which
md_cpt,
ni->ni_dev_cpt);
- CDEBUG(D_NET, "compare ni %s [c:%d, d:%d, s:%d] with best_ni %s [c:%d, d:%d, s:%d]\n",
- libcfs_nid2str(ni->ni_nid), ni_credits, distance,
- ni->ni_seq, (best_ni) ? libcfs_nid2str(best_ni->ni_nid)
- : "not seleced", best_credits, shortest_distance,
- (best_ni) ? best_ni->ni_seq : 0);
-
/*
* All distances smaller than the NUMA range
* are treated equally.
* Select on health, shorter distance, available
* credits, then round-robin.
*/
- if (ni_fatal) {
+ if (ni_fatal)
continue;
- } else if (ni_healthv < best_healthv) {
+
+ if (best_ni)
+ CDEBUG(D_NET, "compare ni %s [c:%d, d:%d, s:%d, p:%u] with best_ni %s [c:%d, d:%d, s:%d, p:%u]\n",
+ libcfs_nid2str(ni->ni_nid), ni_credits, distance,
+ ni->ni_seq, ni_sel_prio,
+ (best_ni) ? libcfs_nid2str(best_ni->ni_nid)
+ : "not selected", best_credits, shortest_distance,
+ (best_ni) ? best_ni->ni_seq : 0,
+ best_sel_prio);
+ else
+ goto select_ni;
+
+ if (ni_healthv < best_healthv)
continue;
- } else if (ni_healthv > best_healthv) {
- best_healthv = ni_healthv;
- /*
- * If we're going to prefer this ni because it's
- * the healthiest, then we should set the
- * shortest_distance in the algorithm in case
- * there are multiple NIs with the same health but
- * different distances.
- */
- if (distance < shortest_distance)
- shortest_distance = distance;
- } else if (distance > shortest_distance) {
+ else if (ni_healthv > best_healthv)
+ goto select_ni;
+
+ if (ni_sel_prio > best_sel_prio)
continue;
- } else if (distance < shortest_distance) {
- shortest_distance = distance;
- } else if (ni_credits < best_credits) {
+ else if (ni_sel_prio < best_sel_prio)
+ goto select_ni;
+
+ if (distance > shortest_distance)
continue;
- } else if (ni_credits == best_credits) {
- if (best_ni && best_ni->ni_seq <= ni->ni_seq)
- continue;
- }
+ else if (distance < shortest_distance)
+ goto select_ni;
+
+ if (ni_credits < best_credits)
+ continue;
+ else if (ni_credits > best_credits)
+ goto select_ni;
+
+ if (best_ni && best_ni->ni_seq <= ni->ni_seq)
+ continue;
+
+select_ni:
+ best_sel_prio = ni_sel_prio;
+ shortest_distance = distance;
+ best_healthv = ni_healthv;
best_ni = ni;
best_credits = ni_credits;
}
__u32 routing = send_case & REMOTE_DST;
struct lnet_rsp_tracker *rspt;
- /*
- * Increment sequence number of the selected peer so that we
- * pick the next one in Round Robin.
+ /* Increment sequence number of the selected peer, peer net,
+ * local ni and local net so that we pick the next ones
+ * in Round Robin.
*/
best_lpni->lpni_seq++;
+ best_lpni->lpni_peer_net->lpn_seq++;
+ best_ni->ni_seq++;
+ best_ni->ni_net->net_seq++;
+
+ CDEBUG(D_NET, "%s NI seq info: [%d:%d:%d:%u] %s LPNI seq info [%d:%d:%d:%u]\n",
+ libcfs_nid2str(best_ni->ni_nid),
+ best_ni->ni_seq, best_ni->ni_net->net_seq,
+ atomic_read(&best_ni->ni_tx_credits),
+ best_ni->ni_sel_priority,
+ libcfs_nid2str(best_lpni->lpni_nid),
+ best_lpni->lpni_seq, best_lpni->lpni_peer_net->lpn_seq,
+ best_lpni->lpni_txcredits,
+ best_lpni->lpni_sel_priority);
/*
* grab a reference on the peer_ni so it sticks around even if
lnet_find_best_ni_on_spec_net(struct lnet_ni *cur_best_ni,
struct lnet_peer *peer,
struct lnet_peer_net *peer_net,
- int cpt,
- bool incr_seq)
+ int cpt)
{
struct lnet_net *local_net;
struct lnet_ni *best_ni;
best_ni = lnet_get_best_ni(local_net, cur_best_ni,
peer, peer_net, cpt);
- if (incr_seq && best_ni)
- best_ni->ni_seq++;
-
return best_ni;
}
lnet_nid_t src_nid = (sd->sd_src_nid != LNET_NID_ANY) ? sd->sd_src_nid :
(sd->sd_best_ni != NULL) ? sd->sd_best_ni->ni_nid :
LNET_NID_ANY;
+ int best_lpn_healthv = 0;
+ __u32 best_lpn_sel_prio = LNET_MAX_SELECTION_PRIORITY;
CDEBUG(D_NET, "using src nid %s for route restriction\n",
libcfs_nid2str(src_nid));
best_rnet = rnet;
}
- if (best_lpn->lpn_seq <= lpn->lpn_seq)
+ /* select the preferred peer net */
+ if (best_lpn_healthv > lpn->lpn_healthv)
continue;
+ else if (best_lpn_healthv < lpn->lpn_healthv)
+ goto use_lpn;
+ if (best_lpn_sel_prio < lpn->lpn_sel_priority)
+ continue;
+ else if (best_lpn_sel_prio > lpn->lpn_sel_priority)
+ goto use_lpn;
+
+ if (best_lpn->lpn_seq <= lpn->lpn_seq)
+ continue;
+use_lpn:
+ best_lpn_healthv = lpn->lpn_healthv;
+ best_lpn_sel_prio = lpn->lpn_sel_priority;
best_lpn = lpn;
best_rnet = rnet;
}
*/
best_route = lnet_find_route_locked(best_rnet,
LNET_NIDNET(src_nid),
+ sd->sd_best_lpni,
&last_route, &gwni);
if (!best_route) {
sd->sd_best_ni = lnet_find_best_ni_on_spec_net(NULL, gw,
lnet_peer_get_net_locked(gw,
local_lnet),
- sd->sd_md_cpt,
- true);
+ sd->sd_md_cpt);
if (!sd->sd_best_ni) {
CERROR("Internal Error. Expected local ni on %s but non found :%s\n",
lnet_find_best_ni_on_local_net(struct lnet_peer *peer, int md_cpt,
bool discovery)
{
- struct lnet_peer_net *peer_net = NULL;
+ struct lnet_peer_net *lpn = NULL;
+ struct lnet_peer_net *best_lpn = NULL;
+ struct lnet_net *net = NULL;
+ struct lnet_net *best_net = NULL;
struct lnet_ni *best_ni = NULL;
- int lpn_healthv = 0;
+ int best_lpn_healthv = 0;
+ int best_net_healthv = 0;
+ int net_healthv;
+ __u32 best_lpn_sel_prio = LNET_MAX_SELECTION_PRIORITY;
+ __u32 lpn_sel_prio;
+ __u32 best_net_sel_prio = LNET_MAX_SELECTION_PRIORITY;
+ __u32 net_sel_prio;
+ bool exit = false;
/*
* The peer can have multiple interfaces, some of them can be on
*/
/* go through all the peer nets and find the best_ni */
- list_for_each_entry(peer_net, &peer->lp_peer_nets, lpn_peer_nets) {
+ list_for_each_entry(lpn, &peer->lp_peer_nets, lpn_peer_nets) {
/*
* The peer's list of nets can contain non-local nets. We
* want to only examine the local ones.
*/
- if (!lnet_get_net_locked(peer_net->lpn_net_id))
+ net = lnet_get_net_locked(lpn->lpn_net_id);
+ if (!net)
continue;
- /* always select the lpn with the best health */
- if (lpn_healthv <= peer_net->lpn_healthv)
- lpn_healthv = peer_net->lpn_healthv;
- else
- continue;
-
- best_ni = lnet_find_best_ni_on_spec_net(best_ni, peer, peer_net,
- md_cpt, false);
+ lpn_sel_prio = lpn->lpn_sel_priority;
+ net_healthv = lnet_get_net_healthv_locked(net);
+ net_sel_prio = net->net_sel_priority;
/*
* if this is a discovery message and lp_disc_net_id is
* specified then use that net to send the discovery on.
*/
- if (peer->lp_disc_net_id == peer_net->lpn_net_id &&
- discovery)
+ if (peer->lp_disc_net_id == lpn->lpn_net_id &&
+ discovery) {
+ exit = true;
+ goto select_lpn;
+ }
+
+ if (!best_lpn)
+ goto select_lpn;
+
+ /* always select the lpn with the best health */
+ if (best_lpn_healthv > lpn->lpn_healthv)
+ continue;
+ else if (best_lpn_healthv < lpn->lpn_healthv)
+ goto select_lpn;
+
+ /* select the preferred peer and local nets */
+ if (best_lpn_sel_prio < lpn_sel_prio)
+ continue;
+ else if (best_lpn_sel_prio > lpn_sel_prio)
+ goto select_lpn;
+
+ if (best_net_healthv > net_healthv)
+ continue;
+ else if (best_net_healthv < net_healthv)
+ goto select_lpn;
+
+ if (best_net_sel_prio < net_sel_prio)
+ continue;
+ else if (best_net_sel_prio > net_sel_prio)
+ goto select_lpn;
+
+ if (best_lpn->lpn_seq < lpn->lpn_seq)
+ continue;
+ else if (best_lpn->lpn_seq > lpn->lpn_seq)
+ goto select_lpn;
+
+ /* round robin over the local networks */
+ if (best_net->net_seq <= net->net_seq)
+ continue;
+
+select_lpn:
+ best_net_healthv = net_healthv;
+ best_net_sel_prio = net_sel_prio;
+ best_lpn_healthv = lpn->lpn_healthv;
+ best_lpn_sel_prio = lpn_sel_prio;
+ best_lpn = lpn;
+ best_net = net;
+
+ if (exit)
break;
}
- if (best_ni)
- /* increment sequence number so we can round robin */
- best_ni->ni_seq++;
+ if (best_lpn) {
+ /* Select the best NI on the same net as best_lpn chosen
+ * above
+ */
+ best_ni = lnet_find_best_ni_on_spec_net(NULL, peer,
+ best_lpn, md_cpt);
+ }
return best_ni;
}
best_ni =
lnet_find_best_ni_on_spec_net(NULL, sd->sd_peer,
sd->sd_best_lpni->lpni_peer_net,
- sd->sd_md_cpt, true);
+ sd->sd_md_cpt);
/* If there is no best_ni we don't have a route */
if (!best_ni) {
CERROR("no path to %s from net %s\n",
sd->sd_best_ni = lnet_find_best_ni_on_spec_net(NULL,
sd->sd_peer,
sd->sd_best_lpni->lpni_peer_net,
- sd->sd_md_cpt,
- true);
+ sd->sd_md_cpt);
if (!sd->sd_best_ni) {
CERROR("Unable to forward message to %s. No local NI available\n",
libcfs_nid2str(sd->sd_dst_nid));
sd->sd_best_ni =
lnet_find_best_ni_on_spec_net(NULL, sd->sd_peer,
sd->sd_best_lpni->lpni_peer_net,
- sd->sd_md_cpt, true);
+ sd->sd_md_cpt);
if (!sd->sd_best_ni) {
/*
* \retval 0 if \a str parsed to numeric address
* \retval errno otherwise
*/
-static int
+int
libcfs_num_parse(char *str, int len, struct list_head *list)
{
struct cfs_expr_list *el;
static const size_t libcfs_nnetstrfns = ARRAY_SIZE(libcfs_netstrfns);
static struct netstrfns *
+type2net_info(__u32 net_type)
+{
+ int i;
+
+ for (i = 0; i < libcfs_nnetstrfns; i++) {
+ if (libcfs_netstrfns[i].nf_type == net_type)
+ return &libcfs_netstrfns[i];
+ }
+
+ return NULL;
+}
+
+int
+cfs_match_net(__u32 net_id, __u32 net_type, struct list_head *net_num_list)
+{
+ __u32 net_num;
+
+ if (!net_num_list)
+ return 0;
+
+ if (net_type != LNET_NETTYP(net_id))
+ return 0;
+
+ net_num = LNET_NETNUM(net_id);
+
+ /* if there is a net number but the list passed in is empty, then
+ * there is no match.
+ */
+ if (!net_num && list_empty(net_num_list))
+ return 1;
+ else if (list_empty(net_num_list))
+ return 0;
+
+ if (!libcfs_num_match(net_num, net_num_list))
+ return 0;
+
+ return 1;
+}
+
+int
+cfs_match_nid_net(lnet_nid_t nid, __u32 net_type,
+ struct list_head *net_num_list,
+ struct list_head *addr)
+{
+ __u32 address;
+ struct netstrfns *nf;
+
+ if (!addr || !net_num_list)
+ return 0;
+
+ nf = type2net_info(LNET_NETTYP(LNET_NIDNET(nid)));
+ if (!nf || !net_num_list || !addr)
+ return 0;
+
+ address = LNET_NIDADDR(nid);
+
+ /* if either the address or net number don't match then no match */
+ if (!nf->nf_match_addr(address, addr) ||
+ !cfs_match_net(LNET_NIDNET(nid), net_type, net_num_list))
+ return 0;
+
+ return 1;
+}
+EXPORT_SYMBOL(cfs_match_nid_net);
+
+static struct netstrfns *
libcfs_lnd2netstrfns(__u32 lnd)
{
int i;
#endif
#include <linux/uaccess.h>
+#include <lnet/udsp.h>
#include <lnet/lib-lnet.h>
#include <uapi/linux/lnet/lnet-dlc.h>
INIT_LIST_HEAD(&lpni->lpni_peer_nis);
INIT_LIST_HEAD(&lpni->lpni_recovery);
INIT_LIST_HEAD(&lpni->lpni_on_remote_peer_ni_list);
+ INIT_LIST_HEAD(&lpni->lpni_rtr_pref_nids);
LNetInvalidateMDHandle(&lpni->lpni_recovery_ping_mdh);
atomic_set(&lpni->lpni_refcount, 1);
+ lpni->lpni_sel_priority = LNET_MAX_SELECTION_PRIORITY;
spin_lock_init(&lpni->lpni_lock);
INIT_LIST_HEAD(&lpn->lpn_peer_nets);
INIT_LIST_HEAD(&lpn->lpn_peer_nis);
lpn->lpn_net_id = net_id;
+ lpn->lpn_sel_priority = LNET_MAX_SELECTION_PRIORITY;
CDEBUG(D_NET, "%p net %s\n", lpn, libcfs_net2str(lpn->lpn_net_id));
wake_up(&the_lnet.ln_dc_waitq);
}
+/* find the NID in the preferred gateways for the remote peer
+ * return:
+ * false: list is not empty and NID is not preferred
+ * false: list is empty
+ * true: nid is found in the list
+ */
+bool
+lnet_peer_is_pref_rtr_locked(struct lnet_peer_ni *lpni,
+ lnet_nid_t gw_nid)
+{
+ struct lnet_nid_list *ne;
+
+ CDEBUG(D_NET, "%s: rtr pref emtpy: %d\n",
+ libcfs_nid2str(lpni->lpni_nid),
+ list_empty(&lpni->lpni_rtr_pref_nids));
+
+ if (list_empty(&lpni->lpni_rtr_pref_nids))
+ return false;
+
+ /* iterate through all the preferred NIDs and see if any of them
+ * matches the provided gw_nid
+ */
+ list_for_each_entry(ne, &lpni->lpni_rtr_pref_nids, nl_list) {
+ CDEBUG(D_NET, "Comparing pref %s with gw %s\n",
+ libcfs_nid2str(ne->nl_nid),
+ libcfs_nid2str(gw_nid));
+ if (ne->nl_nid == gw_nid)
+ return true;
+ }
+
+ return false;
+}
+
+void
+lnet_peer_clr_pref_rtrs(struct lnet_peer_ni *lpni)
+{
+ struct list_head zombies;
+ struct lnet_nid_list *ne;
+ struct lnet_nid_list *tmp;
+ int cpt = lpni->lpni_cpt;
+
+ INIT_LIST_HEAD(&zombies);
+
+ lnet_net_lock(cpt);
+ list_splice_init(&lpni->lpni_rtr_pref_nids, &zombies);
+ lnet_net_unlock(cpt);
+
+ list_for_each_entry_safe(ne, tmp, &zombies, nl_list) {
+ list_del(&ne->nl_list);
+ LIBCFS_FREE(ne, sizeof(*ne));
+ }
+}
+
+int
+lnet_peer_add_pref_rtr(struct lnet_peer_ni *lpni,
+ lnet_nid_t gw_nid)
+{
+ int cpt = lpni->lpni_cpt;
+ struct lnet_nid_list *ne = NULL;
+
+ /* This function is called with api_mutex held. When the api_mutex
+ * is held the list can not be modified, as it is only modified as
+ * a result of applying a UDSP and that happens under api_mutex
+ * lock.
+ */
+ __must_hold(&the_lnet.ln_api_mutex);
+
+ list_for_each_entry(ne, &lpni->lpni_rtr_pref_nids, nl_list) {
+ if (ne->nl_nid == gw_nid)
+ return -EEXIST;
+ }
+
+ LIBCFS_CPT_ALLOC(ne, lnet_cpt_table(), cpt, sizeof(*ne));
+ if (!ne)
+ return -ENOMEM;
+
+ ne->nl_nid = gw_nid;
+
+ /* Lock the cpt to protect against addition and checks in the
+ * selection algorithm
+ */
+ lnet_net_lock(cpt);
+ list_add(&ne->nl_list, &lpni->lpni_rtr_pref_nids);
+ lnet_net_unlock(cpt);
+
+ return 0;
+}
+
/*
* Test whether a ni is a preferred ni for this peer_ni, e.g, whether
* this is a preferred point-to-point path. Call with lnet_net_lock in
bool
lnet_peer_is_pref_nid_locked(struct lnet_peer_ni *lpni, lnet_nid_t nid)
{
- int i;
+ struct lnet_nid_list *ne;
if (lpni->lpni_pref_nnids == 0)
return false;
if (lpni->lpni_pref_nnids == 1)
return lpni->lpni_pref.nid == nid;
- for (i = 0; i < lpni->lpni_pref_nnids; i++) {
- if (lpni->lpni_pref.nids[i] == nid)
+ list_for_each_entry(ne, &lpni->lpni_pref.nids, nl_list) {
+ if (ne->nl_nid == nid)
return true;
}
return false;
return rc;
}
+void
+lnet_peer_ni_set_selection_priority(struct lnet_peer_ni *lpni, __u32 priority)
+{
+ lpni->lpni_sel_priority = priority;
+}
+
/*
* Clear the preferred NIDs from a non-multi-rail peer.
*/
int
lnet_peer_add_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid)
{
- lnet_nid_t *nids = NULL;
- lnet_nid_t *oldnids = NULL;
struct lnet_peer *lp = lpni->lpni_peer_net->lpn_peer;
- int size;
- int i;
+ struct lnet_nid_list *ne1 = NULL;
+ struct lnet_nid_list *ne2 = NULL;
+ lnet_nid_t tmp_nid = LNET_NID_ANY;
int rc = 0;
if (nid == LNET_NID_ANY) {
}
/* A non-MR node may have only one preferred NI per peer_ni */
- if (lpni->lpni_pref_nnids > 0) {
- if (!(lp->lp_state & LNET_PEER_MULTI_RAIL)) {
- rc = -EPERM;
- goto out;
- }
+ if (lpni->lpni_pref_nnids > 0 &&
+ !(lp->lp_state & LNET_PEER_MULTI_RAIL)) {
+ rc = -EPERM;
+ goto out;
}
+ /* add the new preferred nid to the list of preferred nids */
if (lpni->lpni_pref_nnids != 0) {
- size = sizeof(*nids) * (lpni->lpni_pref_nnids + 1);
- LIBCFS_CPT_ALLOC(nids, lnet_cpt_table(), lpni->lpni_cpt, size);
- if (!nids) {
+ size_t alloc_size = sizeof(*ne1);
+
+ if (lpni->lpni_pref_nnids == 1) {
+ tmp_nid = lpni->lpni_pref.nid;
+ INIT_LIST_HEAD(&lpni->lpni_pref.nids);
+ }
+
+ list_for_each_entry(ne1, &lpni->lpni_pref.nids, nl_list) {
+ if (ne1->nl_nid == nid) {
+ rc = -EEXIST;
+ goto out;
+ }
+ }
+
+ LIBCFS_CPT_ALLOC(ne1, lnet_cpt_table(), lpni->lpni_cpt,
+ alloc_size);
+ if (!ne1) {
rc = -ENOMEM;
goto out;
}
- for (i = 0; i < lpni->lpni_pref_nnids; i++) {
- if (lpni->lpni_pref.nids[i] == nid) {
- LIBCFS_FREE(nids, size);
- rc = -EEXIST;
+
+ /* move the originally stored nid to the list */
+ if (lpni->lpni_pref_nnids == 1) {
+ LIBCFS_CPT_ALLOC(ne2, lnet_cpt_table(),
+ lpni->lpni_cpt, alloc_size);
+ if (!ne2) {
+ rc = -ENOMEM;
goto out;
}
- nids[i] = lpni->lpni_pref.nids[i];
+ INIT_LIST_HEAD(&ne2->nl_list);
+ ne2->nl_nid = tmp_nid;
}
- nids[i] = nid;
+ ne1->nl_nid = nid;
}
lnet_net_lock(LNET_LOCK_EX);
if (lpni->lpni_pref_nnids == 0) {
lpni->lpni_pref.nid = nid;
} else {
- oldnids = lpni->lpni_pref.nids;
- lpni->lpni_pref.nids = nids;
+ if (ne2)
+ list_add_tail(&ne2->nl_list, &lpni->lpni_pref.nids);
+ list_add_tail(&ne1->nl_list, &lpni->lpni_pref.nids);
}
lpni->lpni_pref_nnids++;
lpni->lpni_state &= ~LNET_PEER_NI_NON_MR_PREF;
spin_unlock(&lpni->lpni_lock);
lnet_net_unlock(LNET_LOCK_EX);
- if (oldnids) {
- size = sizeof(*nids) * (lpni->lpni_pref_nnids - 1);
- CFS_FREE_PTR_ARRAY(oldnids, size);
- }
out:
if (rc == -EEXIST && (lpni->lpni_state & LNET_PEER_NI_NON_MR_PREF)) {
spin_lock(&lpni->lpni_lock);
int
lnet_peer_del_pref_nid(struct lnet_peer_ni *lpni, lnet_nid_t nid)
{
- lnet_nid_t *nids = NULL;
- lnet_nid_t *oldnids = NULL;
struct lnet_peer *lp = lpni->lpni_peer_net->lpn_peer;
- int size;
- int i, j;
+ struct lnet_nid_list *ne = NULL;
int rc = 0;
if (lpni->lpni_pref_nnids == 0) {
rc = -ENOENT;
goto out;
}
- } else if (lpni->lpni_pref_nnids == 2) {
- if (lpni->lpni_pref.nids[0] != nid &&
- lpni->lpni_pref.nids[1] != nid) {
- rc = -ENOENT;
- goto out;
- }
} else {
- size = sizeof(*nids) * (lpni->lpni_pref_nnids - 1);
- LIBCFS_CPT_ALLOC(nids, lnet_cpt_table(), lpni->lpni_cpt, size);
- if (!nids) {
- rc = -ENOMEM;
- goto out;
- }
- for (i = 0, j = 0; i < lpni->lpni_pref_nnids; i++) {
- if (lpni->lpni_pref.nids[i] != nid)
- continue;
- nids[j++] = lpni->lpni_pref.nids[i];
- }
- /* Check if we actually removed a nid. */
- if (j == lpni->lpni_pref_nnids) {
- LIBCFS_FREE(nids, size);
- rc = -ENOENT;
- goto out;
+ list_for_each_entry(ne, &lpni->lpni_pref.nids, nl_list) {
+ if (ne->nl_nid == nid)
+ goto remove_nid_entry;
}
+ rc = -ENOENT;
+ ne = NULL;
+ goto out;
}
+remove_nid_entry:
lnet_net_lock(LNET_LOCK_EX);
spin_lock(&lpni->lpni_lock);
- if (lpni->lpni_pref_nnids == 1) {
+ if (lpni->lpni_pref_nnids == 1)
lpni->lpni_pref.nid = LNET_NID_ANY;
- } else if (lpni->lpni_pref_nnids == 2) {
- oldnids = lpni->lpni_pref.nids;
- if (oldnids[0] == nid)
- lpni->lpni_pref.nid = oldnids[1];
- else
- lpni->lpni_pref.nid = oldnids[2];
- } else {
- oldnids = lpni->lpni_pref.nids;
- lpni->lpni_pref.nids = nids;
+ else {
+ list_del_init(&ne->nl_list);
+ if (lpni->lpni_pref_nnids == 2) {
+ struct lnet_nid_list *ne, *tmp;
+
+ list_for_each_entry_safe(ne, tmp,
+ &lpni->lpni_pref.nids,
+ nl_list) {
+ lpni->lpni_pref.nid = ne->nl_nid;
+ list_del_init(&ne->nl_list);
+ LIBCFS_FREE(ne, sizeof(*ne));
+ }
+ }
}
lpni->lpni_pref_nnids--;
lpni->lpni_state &= ~LNET_PEER_NI_NON_MR_PREF;
spin_unlock(&lpni->lpni_lock);
lnet_net_unlock(LNET_LOCK_EX);
- if (oldnids) {
- size = sizeof(*nids) * (lpni->lpni_pref_nnids + 1);
- CFS_FREE_PTR_ARRAY(oldnids, size);
- }
+ if (ne)
+ LIBCFS_FREE(ne, sizeof(*ne));
out:
CDEBUG(D_NET, "peer %s nid %s: %d\n",
libcfs_nid2str(lp->lp_primary_nid), libcfs_nid2str(nid), rc);
return rc;
}
+void
+lnet_peer_clr_pref_nids(struct lnet_peer_ni *lpni)
+{
+ struct list_head zombies;
+ struct lnet_nid_list *ne;
+ struct lnet_nid_list *tmp;
+
+ INIT_LIST_HEAD(&zombies);
+
+ lnet_net_lock(LNET_LOCK_EX);
+ if (lpni->lpni_pref_nnids == 1)
+ lpni->lpni_pref.nid = LNET_NID_ANY;
+ else if (lpni->lpni_pref_nnids > 1)
+ list_splice_init(&lpni->lpni_pref.nids, &zombies);
+ lpni->lpni_pref_nnids = 0;
+ lnet_net_unlock(LNET_LOCK_EX);
+
+ list_for_each_entry_safe(ne, tmp, &zombies, nl_list) {
+ list_del_init(&ne->nl_list);
+ LIBCFS_FREE(ne, sizeof(*ne));
+ }
+}
+
lnet_nid_t
lnet_peer_primary_nid_locked(lnet_nid_t nid)
{
unsigned flags)
{
struct lnet_peer_table *ptable;
+ bool new_lpn = false;
+ int rc;
/* Install the new peer_ni */
lnet_net_lock(LNET_LOCK_EX);
/* Add peer_net to peer */
if (!lpn->lpn_peer) {
+ new_lpn = true;
lpn->lpn_peer = lp;
list_add_tail(&lpn->lpn_peer_nets, &lp->lp_peer_nets);
lnet_peer_addref_locked(lp);
lp->lp_nnis++;
+ /* apply UDSPs */
+ if (new_lpn) {
+ rc = lnet_udsp_apply_policies_on_lpn(lpn);
+ if (rc)
+ CERROR("Failed to apply UDSPs on lpn %s\n",
+ libcfs_net2str(lpn->lpn_net_id));
+ }
+ rc = lnet_udsp_apply_policies_on_lpni(lpni);
+ if (rc)
+ CERROR("Failed to apply UDSPs on lpni %s\n",
+ libcfs_nid2str(lpni->lpni_nid));
+
CDEBUG(D_NET, "peer %s NID %s flags %#x\n",
libcfs_nid2str(lp->lp_primary_nid),
libcfs_nid2str(lpni->lpni_nid), flags);
spin_unlock(&ptable->pt_zombie_lock);
}
- if (lpni->lpni_pref_nnids > 1)
- CFS_FREE_PTR_ARRAY(lpni->lpni_pref.nids, lpni->lpni_pref_nnids);
+ if (lpni->lpni_pref_nnids > 1) {
+ struct lnet_nid_list *ne, *tmp;
+ list_for_each_entry_safe(ne, tmp, &lpni->lpni_pref.nids,
+ nl_list) {
+ list_del_init(&ne->nl_list);
+ LIBCFS_FREE(ne, sizeof(*ne));
+ }
+ }
LIBCFS_FREE(lpni, sizeof(*lpni));
if (lpn)
--- /dev/null
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright (c) 2011, 2017, Intel Corporation.
+ *
+ * Copyright (c) 2018-2020 Data Direct Networks.
+ *
+ * This file is part of Lustre, https://wiki.whamcloud.com/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * lnet/lnet/udsp.c
+ *
+ * User Defined Selection Policies (UDSP) are introduced to add
+ * ability of fine traffic control. The policies are instantiated
+ * on LNet constructs and allow preference of some constructs
+ * over others as an extension of the selection algorithm.
+ * The order of operation is defined by the selection algorithm logical flow:
+ *
+ * 1. Iterate over all the networks that a peer can be reached on
+ * and select the best local network
+ * - The remote network with the highest priority is examined
+ * (Network Rule)
+ * - The local network with the highest priority is selected
+ * (Network Rule)
+ * - The local NI with the highest priority is selected
+ * (NID Rule)
+ * 2. If the peer is a remote peer and has no local networks,
+ * - then select the remote peer network with the highest priority
+ * (Network Rule)
+ * - Select the highest priority remote peer_ni on the network selected
+ * (NID Rule)
+ * - Now that the peer's network and NI are decided, select the router
+ * in round robin from the peer NI's preferred router list.
+ * (Router Rule)
+ * - Select the highest priority local NI on the local net of the
+ * selected route.
+ * (NID Rule)
+ * 3. Otherwise for local peers, select the peer_ni from the peer.
+ * - highest priority peer NI is selected
+ * (NID Rule)
+ * - Select the peer NI which has the local NI selected on its
+ * preferred list.
+ * (NID Pair Rule)
+ *
+ * Accordingly, the User Interface allows for the following:
+ * - Adding a local network udsp: if multiple local networks are
+ * available, each one can have a priority.
+ * - Adding a local NID udsp: after a local network is chosen,
+ * if there are multiple NIs, each one can have a priority.
+ * - Adding a remote NID udsp: assign priority to a peer NID.
+ * - Adding a NID pair udsp: allows to specify local NIDs
+ * to be added on the list on the specified peer NIs
+ * When selecting a peer NI, the one with the
+ * local NID being used on its list is preferred.
+ * - Adding a Router udsp: similar to the NID pair udsp.
+ * Specified router NIDs are added on the list on the specified peer NIs.
+ * When sending to a remote peer, remote net is selected and the peer NID
+ * is selected. The router which has its nid on the peer NI list
+ * is preferred.
+ * - Deleting a udsp: use the specified policy index to remove it
+ * from the policy list.
+ *
+ * Generally, the syntax is as follows
+ * lnetctl policy <add | del | show>
+ * --src: ip2nets syntax specifying the local NID to match
+ * --dst: ip2nets syntax specifying the remote NID to match
+ * --rte: ip2nets syntax specifying the router NID to match
+ * --priority: Priority to apply to rule matches
+ * --idx: Index of where to insert or delete the rule
+ * By default add appends to the end of the rule list
+ *
+ * Author: Amir Shehata
+ */
+
+#include <linux/uaccess.h>
+
+#include <lnet/udsp.h>
+#include <libcfs/libcfs.h>
+
+struct udsp_info {
+ struct lnet_peer_ni *udi_lpni;
+ struct lnet_peer_net *udi_lpn;
+ struct lnet_ni *udi_ni;
+ struct lnet_net *udi_net;
+ struct lnet_ud_nid_descr *udi_match;
+ struct lnet_ud_nid_descr *udi_action;
+ __u32 udi_priority;
+ enum lnet_udsp_action_type udi_type;
+ bool udi_local;
+ bool udi_revert;
+};
+
+typedef int (*udsp_apply_rule)(struct udsp_info *);
+
+enum udsp_apply {
+ UDSP_APPLY_ON_PEERS = 0,
+ UDSP_APPLY_PRIO_ON_NIS = 1,
+ UDSP_APPLY_RTE_ON_NETS = 2,
+ UDSP_APPLY_MAX_ENUM = 3,
+};
+
+#define RULE_NOT_APPLICABLE -1
+
+static inline bool
+lnet_udsp_is_net_rule(struct lnet_ud_nid_descr *match)
+{
+ return list_empty(&match->ud_addr_range);
+}
+
+static bool
+lnet_udsp_expr_list_equal(struct list_head *e1,
+ struct list_head *e2)
+{
+ struct cfs_expr_list *expr1;
+ struct cfs_expr_list *expr2;
+ struct cfs_range_expr *range1, *range2;
+
+ if (list_empty(e1) && list_empty(e2))
+ return true;
+
+ if (lnet_get_list_len(e1) != lnet_get_list_len(e2))
+ return false;
+
+ expr2 = list_first_entry(e2, struct cfs_expr_list, el_link);
+
+ list_for_each_entry(expr1, e1, el_link) {
+ if (lnet_get_list_len(&expr1->el_exprs) !=
+ lnet_get_list_len(&expr2->el_exprs))
+ return false;
+
+ range2 = list_first_entry(&expr2->el_exprs,
+ struct cfs_range_expr,
+ re_link);
+
+ list_for_each_entry(range1, &expr1->el_exprs, re_link) {
+ if (range1->re_lo != range2->re_lo ||
+ range1->re_hi != range2->re_hi ||
+ range1->re_stride != range2->re_stride)
+ return false;
+ range2 = list_next_entry(range2, re_link);
+ }
+ expr2 = list_next_entry(expr2, el_link);
+ }
+
+ return true;
+}
+
+static bool
+lnet_udsp_nid_descr_equal(struct lnet_ud_nid_descr *e1,
+ struct lnet_ud_nid_descr *e2)
+{
+ if (e1->ud_net_id.udn_net_type != e2->ud_net_id.udn_net_type ||
+ !lnet_udsp_expr_list_equal(&e1->ud_net_id.udn_net_num_range,
+ &e2->ud_net_id.udn_net_num_range) ||
+ !lnet_udsp_expr_list_equal(&e1->ud_addr_range, &e2->ud_addr_range))
+ return false;
+
+ return true;
+}
+
+static bool
+lnet_udsp_action_equal(struct lnet_udsp *e1, struct lnet_udsp *e2)
+{
+ if (e1->udsp_action_type != e2->udsp_action_type)
+ return false;
+
+ if (e1->udsp_action_type == EN_LNET_UDSP_ACTION_PRIORITY &&
+ e1->udsp_action.udsp_priority != e2->udsp_action.udsp_priority)
+ return false;
+
+ return true;
+}
+
+static bool
+lnet_udsp_equal(struct lnet_udsp *e1, struct lnet_udsp *e2)
+{
+ /* check each NID descr */
+ if (!lnet_udsp_nid_descr_equal(&e1->udsp_src, &e2->udsp_src) ||
+ !lnet_udsp_nid_descr_equal(&e1->udsp_dst, &e2->udsp_dst) ||
+ !lnet_udsp_nid_descr_equal(&e1->udsp_rte, &e2->udsp_rte))
+ return false;
+
+ return true;
+}
+
+/* it is enough to look at the net type of the descriptor. If the criteria
+ * is present the net must be specified
+ */
+static inline bool
+lnet_udsp_criteria_present(struct lnet_ud_nid_descr *descr)
+{
+ return (descr->ud_net_id.udn_net_type != 0);
+}
+
+static int
+lnet_udsp_apply_rule_on_ni(struct udsp_info *udi)
+{
+ int rc;
+ struct lnet_ni *ni = udi->udi_ni;
+ struct lnet_ud_nid_descr *ni_match = udi->udi_match;
+ __u32 priority = (udi->udi_revert) ? -1 : udi->udi_priority;
+
+ rc = cfs_match_nid_net(ni->ni_nid,
+ ni_match->ud_net_id.udn_net_type,
+ &ni_match->ud_net_id.udn_net_num_range,
+ &ni_match->ud_addr_range);
+ if (!rc)
+ return 0;
+
+ CDEBUG(D_NET, "apply udsp on ni %s\n",
+ libcfs_nid2str(ni->ni_nid));
+
+ /* Detected match. Set NIDs priority */
+ lnet_ni_set_sel_priority_locked(ni, priority);
+
+ return 0;
+}
+
+static int
+lnet_udsp_apply_rte_list_on_net(struct lnet_net *net,
+ struct lnet_ud_nid_descr *rte_action,
+ bool revert)
+{
+ struct lnet_remotenet *rnet;
+ struct list_head *rn_list;
+ struct lnet_route *route;
+ struct lnet_peer_ni *lpni;
+ bool cleared = false;
+ lnet_nid_t gw_nid, gw_prim_nid;
+ int rc = 0;
+ int i;
+
+ for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
+ rn_list = &the_lnet.ln_remote_nets_hash[i];
+ list_for_each_entry(rnet, rn_list, lrn_list) {
+ list_for_each_entry(route, &rnet->lrn_routes, lr_list) {
+ /* look if gw nid on the same net matches */
+ gw_prim_nid = route->lr_gateway->lp_primary_nid;
+ lpni = NULL;
+ while ((lpni = lnet_get_next_peer_ni_locked(route->lr_gateway,
+ NULL,
+ lpni)) != NULL) {
+ if (!lnet_get_net_locked(lpni->lpni_peer_net->lpn_net_id))
+ continue;
+ gw_nid = lpni->lpni_nid;
+ rc = cfs_match_nid_net(gw_nid,
+ rte_action->ud_net_id.udn_net_type,
+ &rte_action->ud_net_id.udn_net_num_range,
+ &rte_action->ud_addr_range);
+ if (rc)
+ break;
+ }
+ /* match gw primary nid on a remote network */
+ if (!rc) {
+ gw_nid = gw_prim_nid;
+ rc = cfs_match_nid_net(gw_nid,
+ rte_action->ud_net_id.udn_net_type,
+ &rte_action->ud_net_id.udn_net_num_range,
+ &rte_action->ud_addr_range);
+ }
+ if (!rc)
+ continue;
+ lnet_net_unlock(LNET_LOCK_EX);
+ if (!cleared || revert) {
+ lnet_net_clr_pref_rtrs(net);
+ cleared = true;
+ if (revert) {
+ lnet_net_lock(LNET_LOCK_EX);
+ continue;
+ }
+ }
+ /* match. Add to pref NIDs */
+ CDEBUG(D_NET, "udsp net->gw: %s->%s\n",
+ libcfs_net2str(net->net_id),
+ libcfs_nid2str(gw_prim_nid));
+ rc = lnet_net_add_pref_rtr(net, gw_prim_nid);
+ lnet_net_lock(LNET_LOCK_EX);
+ /* success if EEXIST return */
+ if (rc && rc != -EEXIST) {
+ CERROR("Failed to add %s to %s pref rtr list\n",
+ libcfs_nid2str(gw_prim_nid),
+ libcfs_net2str(net->net_id));
+ return rc;
+ }
+ }
+ }
+ }
+
+ return rc;
+}
+
+static int
+lnet_udsp_apply_rte_rule_on_nets(struct udsp_info *udi)
+{
+ int rc = 0;
+ int last_failure = 0;
+ struct lnet_net *net;
+ struct lnet_ud_nid_descr *match = udi->udi_match;
+ struct lnet_ud_nid_descr *rte_action = udi->udi_action;
+
+ list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+ if (LNET_NETTYP(net->net_id) != match->ud_net_id.udn_net_type)
+ continue;
+
+ rc = cfs_match_net(net->net_id,
+ match->ud_net_id.udn_net_type,
+ &match->ud_net_id.udn_net_num_range);
+ if (!rc)
+ continue;
+
+ CDEBUG(D_NET, "apply rule on %s\n",
+ libcfs_net2str(net->net_id));
+ rc = lnet_udsp_apply_rte_list_on_net(net, rte_action,
+ udi->udi_revert);
+ if (rc)
+ last_failure = rc;
+ }
+
+ return last_failure;
+}
+
+static int
+lnet_udsp_apply_rte_rule_on_net(struct udsp_info *udi)
+{
+ int rc = 0;
+ struct lnet_net *net = udi->udi_net;
+ struct lnet_ud_nid_descr *match = udi->udi_match;
+ struct lnet_ud_nid_descr *rte_action = udi->udi_action;
+
+ rc = cfs_match_net(net->net_id,
+ match->ud_net_id.udn_net_type,
+ &match->ud_net_id.udn_net_num_range);
+ if (!rc)
+ return 0;
+
+ CDEBUG(D_NET, "apply rule on %s\n",
+ libcfs_net2str(net->net_id));
+ rc = lnet_udsp_apply_rte_list_on_net(net, rte_action,
+ udi->udi_revert);
+
+ return rc;
+}
+
+static int
+lnet_udsp_apply_prio_rule_on_net(struct udsp_info *udi)
+{
+ int rc;
+ struct lnet_ud_nid_descr *match = udi->udi_match;
+ struct lnet_net *net = udi->udi_net;
+ __u32 priority = (udi->udi_revert) ? -1 : udi->udi_priority;
+
+ if (!lnet_udsp_is_net_rule(match))
+ return RULE_NOT_APPLICABLE;
+
+ rc = cfs_match_net(net->net_id,
+ match->ud_net_id.udn_net_type,
+ &match->ud_net_id.udn_net_num_range);
+ if (!rc)
+ return 0;
+
+ CDEBUG(D_NET, "apply rule on %s\n",
+ libcfs_net2str(net->net_id));
+
+ lnet_net_set_sel_priority_locked(net, priority);
+
+ return 0;
+}
+
+static int
+lnet_udsp_apply_rule_on_nis(struct udsp_info *udi)
+{
+ int rc = 0;
+ struct lnet_ni *ni;
+ struct lnet_net *net;
+ struct lnet_ud_nid_descr *ni_match = udi->udi_match;
+ int last_failure = 0;
+
+ list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+ if (LNET_NETTYP(net->net_id) != ni_match->ud_net_id.udn_net_type)
+ continue;
+
+ udi->udi_net = net;
+ if (!lnet_udsp_apply_prio_rule_on_net(udi))
+ continue;
+
+ list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+ udi->udi_ni = ni;
+ rc = lnet_udsp_apply_rule_on_ni(udi);
+ if (rc)
+ last_failure = rc;
+ }
+ }
+
+ return last_failure;
+}
+
+static int
+lnet_udsp_apply_rte_list_on_lpni(struct lnet_peer_ni *lpni,
+ struct lnet_ud_nid_descr *rte_action,
+ bool revert)
+{
+ struct lnet_remotenet *rnet;
+ struct list_head *rn_list;
+ struct lnet_route *route;
+ bool cleared = false;
+ lnet_nid_t gw_nid;
+ int rc = 0;
+ int i;
+
+ for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
+ rn_list = &the_lnet.ln_remote_nets_hash[i];
+ list_for_each_entry(rnet, rn_list, lrn_list) {
+ list_for_each_entry(route, &rnet->lrn_routes, lr_list) {
+ gw_nid = route->lr_gateway->lp_primary_nid;
+ rc = cfs_match_nid_net(gw_nid,
+ rte_action->ud_net_id.udn_net_type,
+ &rte_action->ud_net_id.udn_net_num_range,
+ &rte_action->ud_addr_range);
+ if (!rc)
+ continue;
+ lnet_net_unlock(LNET_LOCK_EX);
+ if (!cleared || revert) {
+ CDEBUG(D_NET, "%spref rtr nids from lpni %s\n",
+ (revert) ? "revert " : "clear ",
+ libcfs_nid2str(lpni->lpni_nid));
+ lnet_peer_clr_pref_rtrs(lpni);
+ cleared = true;
+ if (revert) {
+ lnet_net_lock(LNET_LOCK_EX);
+ continue;
+ }
+ }
+ CDEBUG(D_NET, "add gw nid %s as preferred for peer %s\n",
+ libcfs_nid2str(gw_nid),
+ libcfs_nid2str(lpni->lpni_nid));
+ /* match. Add to pref NIDs */
+ rc = lnet_peer_add_pref_rtr(lpni, gw_nid);
+ lnet_net_lock(LNET_LOCK_EX);
+ /* success if EEXIST return */
+ if (rc && rc != -EEXIST) {
+ CERROR("Failed to add %s to %s pref rtr list\n",
+ libcfs_nid2str(gw_nid),
+ libcfs_nid2str(lpni->lpni_nid));
+ return rc;
+ }
+ }
+ }
+ }
+
+ return rc;
+}
+
+static int
+lnet_udsp_apply_ni_list(struct lnet_peer_ni *lpni,
+ struct lnet_ud_nid_descr *ni_action,
+ bool revert)
+{
+ int rc = 0;
+ struct lnet_ni *ni;
+ struct lnet_net *net;
+ bool cleared = false;
+
+ list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
+ if (LNET_NETTYP(net->net_id) != ni_action->ud_net_id.udn_net_type)
+ continue;
+ list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
+ rc = cfs_match_nid_net(ni->ni_nid,
+ ni_action->ud_net_id.udn_net_type,
+ &ni_action->ud_net_id.udn_net_num_range,
+ &ni_action->ud_addr_range);
+ if (!rc)
+ continue;
+ lnet_net_unlock(LNET_LOCK_EX);
+ if (!cleared || revert) {
+ lnet_peer_clr_pref_nids(lpni);
+ CDEBUG(D_NET, "%spref nids from lpni %s\n",
+ (revert) ? "revert " : "clear ",
+ libcfs_nid2str(lpni->lpni_nid));
+ cleared = true;
+ if (revert) {
+ lnet_net_lock(LNET_LOCK_EX);
+ continue;
+ }
+ }
+ CDEBUG(D_NET, "add nid %s as preferred for peer %s\n",
+ libcfs_nid2str(ni->ni_nid),
+ libcfs_nid2str(lpni->lpni_nid));
+ /* match. Add to pref NIDs */
+ rc = lnet_peer_add_pref_nid(lpni, ni->ni_nid);
+ lnet_net_lock(LNET_LOCK_EX);
+ /* success if EEXIST return */
+ if (rc && rc != -EEXIST) {
+ CERROR("Failed to add %s to %s pref nid list\n",
+ libcfs_nid2str(ni->ni_nid),
+ libcfs_nid2str(lpni->lpni_nid));
+ return rc;
+ }
+ }
+ }
+
+ return rc;
+}
+
+static int
+lnet_udsp_apply_rule_on_lpni(struct udsp_info *udi)
+{
+ int rc;
+ struct lnet_peer_ni *lpni = udi->udi_lpni;
+ struct lnet_ud_nid_descr *lp_match = udi->udi_match;
+ struct lnet_ud_nid_descr *action = udi->udi_action;
+ __u32 priority = (udi->udi_revert) ? -1 : udi->udi_priority;
+ bool local = udi->udi_local;
+ enum lnet_udsp_action_type type = udi->udi_type;
+
+ rc = cfs_match_nid_net(lpni->lpni_nid,
+ lp_match->ud_net_id.udn_net_type,
+ &lp_match->ud_net_id.udn_net_num_range,
+ &lp_match->ud_addr_range);
+
+ /* check if looking for a net match */
+ if (!rc &&
+ (lnet_get_list_len(&lp_match->ud_addr_range) ||
+ !cfs_match_net(udi->udi_lpn->lpn_net_id,
+ lp_match->ud_net_id.udn_net_type,
+ &lp_match->ud_net_id.udn_net_num_range))) {
+ return 0;
+ }
+
+ if (type == EN_LNET_UDSP_ACTION_PREFERRED_LIST && local) {
+ rc = lnet_udsp_apply_ni_list(lpni, action,
+ udi->udi_revert);
+ if (rc)
+ return rc;
+ } else if (type == EN_LNET_UDSP_ACTION_PREFERRED_LIST &&
+ !local) {
+ rc = lnet_udsp_apply_rte_list_on_lpni(lpni, action,
+ udi->udi_revert);
+ if (rc)
+ return rc;
+ } else {
+ lnet_peer_ni_set_selection_priority(lpni, priority);
+ }
+
+ return 0;
+}
+
+static int
+lnet_udsp_apply_rule_on_lpn(struct udsp_info *udi)
+{
+ int rc;
+ struct lnet_ud_nid_descr *match = udi->udi_match;
+ struct lnet_peer_net *lpn = udi->udi_lpn;
+ __u32 priority = (udi->udi_revert) ? -1 : udi->udi_priority;
+
+ if (udi->udi_type == EN_LNET_UDSP_ACTION_PREFERRED_LIST ||
+ !lnet_udsp_is_net_rule(match))
+ return RULE_NOT_APPLICABLE;
+
+ rc = cfs_match_net(lpn->lpn_net_id,
+ match->ud_net_id.udn_net_type,
+ &match->ud_net_id.udn_net_num_range);
+ if (!rc)
+ return 0;
+
+ CDEBUG(D_NET, "apply rule on lpn %s\n",
+ libcfs_net2str(lpn->lpn_net_id));
+ lnet_peer_net_set_sel_priority_locked(lpn, priority);
+
+ return 0;
+}
+
+static int
+lnet_udsp_apply_rule_on_lpnis(struct udsp_info *udi)
+{
+ /* iterate over all the peers in the system and find if any of the
+ * peers match the criteria. If they do, clear the preferred list
+ * and add the new list
+ */
+ int lncpt = cfs_percpt_number(the_lnet.ln_peer_tables);
+ struct lnet_ud_nid_descr *lp_match = udi->udi_match;
+ struct lnet_peer_table *ptable;
+ struct lnet_peer_net *lpn;
+ struct lnet_peer_ni *lpni;
+ struct lnet_peer *lp;
+ int last_failure = 0;
+ int cpt;
+ int rc;
+
+ for (cpt = 0; cpt < lncpt; cpt++) {
+ ptable = the_lnet.ln_peer_tables[cpt];
+ list_for_each_entry(lp, &ptable->pt_peer_list, lp_peer_list) {
+ CDEBUG(D_NET, "udsp examining lp %s\n",
+ libcfs_nid2str(lp->lp_primary_nid));
+ list_for_each_entry(lpn,
+ &lp->lp_peer_nets,
+ lpn_peer_nets) {
+ CDEBUG(D_NET, "udsp examining lpn %s\n",
+ libcfs_net2str(lpn->lpn_net_id));
+
+ if (LNET_NETTYP(lpn->lpn_net_id) !=
+ lp_match->ud_net_id.udn_net_type)
+ continue;
+
+ udi->udi_lpn = lpn;
+
+ if (!lnet_udsp_apply_rule_on_lpn(udi))
+ continue;
+
+ list_for_each_entry(lpni,
+ &lpn->lpn_peer_nis,
+ lpni_peer_nis) {
+ CDEBUG(D_NET, "udsp examining lpni %s\n",
+ libcfs_nid2str(lpni->lpni_nid));
+ udi->udi_lpni = lpni;
+ rc = lnet_udsp_apply_rule_on_lpni(udi);
+ if (rc)
+ last_failure = rc;
+ }
+ }
+ }
+ }
+
+ return last_failure;
+}
+
+static int
+lnet_udsp_apply_single_policy(struct lnet_udsp *udsp, struct udsp_info *udi,
+ udsp_apply_rule *cbs)
+{
+ int rc;
+
+ if (lnet_udsp_criteria_present(&udsp->udsp_dst) &&
+ lnet_udsp_criteria_present(&udsp->udsp_src)) {
+ /* NID Pair rule */
+ if (!cbs[UDSP_APPLY_ON_PEERS])
+ return 0;
+
+ if (udsp->udsp_action_type !=
+ EN_LNET_UDSP_ACTION_PREFERRED_LIST) {
+ CERROR("Bad action type. Expected %d got %d\n",
+ EN_LNET_UDSP_ACTION_PREFERRED_LIST,
+ udsp->udsp_action_type);
+ return 0;
+ }
+ udi->udi_match = &udsp->udsp_dst;
+ udi->udi_action = &udsp->udsp_src;
+ udi->udi_type = EN_LNET_UDSP_ACTION_PREFERRED_LIST;
+ udi->udi_local = true;
+
+ CDEBUG(D_NET, "applying udsp (%p) dst->src\n",
+ udsp);
+ rc = cbs[UDSP_APPLY_ON_PEERS](udi);
+ if (rc)
+ return rc;
+ } else if (lnet_udsp_criteria_present(&udsp->udsp_dst) &&
+ lnet_udsp_criteria_present(&udsp->udsp_rte)) {
+ /* Router rule */
+ if (!cbs[UDSP_APPLY_ON_PEERS])
+ return 0;
+
+ if (udsp->udsp_action_type !=
+ EN_LNET_UDSP_ACTION_PREFERRED_LIST) {
+ CERROR("Bad action type. Expected %d got %d\n",
+ EN_LNET_UDSP_ACTION_PREFERRED_LIST,
+ udsp->udsp_action_type);
+ return 0;
+ }
+
+ if (lnet_udsp_criteria_present(&udsp->udsp_src)) {
+ CERROR("only one of src or dst can be specified\n");
+ return 0;
+ }
+ udi->udi_match = &udsp->udsp_dst;
+ udi->udi_action = &udsp->udsp_rte;
+ udi->udi_type = EN_LNET_UDSP_ACTION_PREFERRED_LIST;
+ udi->udi_local = false;
+
+ CDEBUG(D_NET, "applying udsp (%p) dst->rte\n",
+ udsp);
+ rc = cbs[UDSP_APPLY_ON_PEERS](udi);
+ if (rc)
+ return rc;
+ } else if (lnet_udsp_criteria_present(&udsp->udsp_dst)) {
+ /* destination priority rule */
+ if (!cbs[UDSP_APPLY_ON_PEERS])
+ return 0;
+
+ if (udsp->udsp_action_type !=
+ EN_LNET_UDSP_ACTION_PRIORITY) {
+ CERROR("Bad action type. Expected %d got %d\n",
+ EN_LNET_UDSP_ACTION_PRIORITY,
+ udsp->udsp_action_type);
+ return 0;
+ }
+ udi->udi_match = &udsp->udsp_dst;
+ udi->udi_type = EN_LNET_UDSP_ACTION_PRIORITY;
+ if (udsp->udsp_action_type !=
+ EN_LNET_UDSP_ACTION_PRIORITY) {
+ udi->udi_priority = 0;
+ } else {
+ udi->udi_priority = udsp->udsp_action.udsp_priority;
+ }
+ udi->udi_local = true;
+
+ CDEBUG(D_NET, "applying udsp (%p) on destination\n",
+ udsp);
+ rc = cbs[UDSP_APPLY_ON_PEERS](udi);
+ if (rc)
+ return rc;
+ } else if (lnet_udsp_criteria_present(&udsp->udsp_src)) {
+ /* source priority rule */
+ if (!cbs[UDSP_APPLY_PRIO_ON_NIS])
+ return 0;
+
+ if (udsp->udsp_action_type !=
+ EN_LNET_UDSP_ACTION_PRIORITY) {
+ CERROR("Bad action type. Expected %d got %d\n",
+ EN_LNET_UDSP_ACTION_PRIORITY,
+ udsp->udsp_action_type);
+ return 0;
+ }
+ udi->udi_match = &udsp->udsp_src;
+ udi->udi_type = EN_LNET_UDSP_ACTION_PRIORITY;
+ if (udsp->udsp_action_type !=
+ EN_LNET_UDSP_ACTION_PRIORITY) {
+ udi->udi_priority = 0;
+ } else {
+ udi->udi_priority = udsp->udsp_action.udsp_priority;
+ }
+ udi->udi_local = true;
+
+ CDEBUG(D_NET, "applying udsp (%p) on source\n",
+ udsp);
+ rc = cbs[UDSP_APPLY_PRIO_ON_NIS](udi);
+ } else {
+ CERROR("Bad UDSP policy\n");
+ return 0;
+ }
+
+ return 0;
+}
+
+static int
+lnet_udsp_apply_policies_helper(struct lnet_udsp *udsp, struct udsp_info *udi,
+ udsp_apply_rule *cbs)
+{
+ int rc;
+ int last_failure = 0;
+
+ if (udsp)
+ return lnet_udsp_apply_single_policy(udsp, udi, cbs);
+
+ list_for_each_entry_reverse(udsp,
+ &the_lnet.ln_udsp_list,
+ udsp_on_list) {
+ rc = lnet_udsp_apply_single_policy(udsp, udi, cbs);
+ if (rc)
+ last_failure = rc;
+ }
+
+ return last_failure;
+}
+
+int
+lnet_udsp_apply_policies_on_ni(struct lnet_ni *ni)
+{
+ struct udsp_info udi;
+ udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
+
+ memset(&udi, 0, sizeof(udi));
+
+ udi.udi_ni = ni;
+
+ cbs[UDSP_APPLY_PRIO_ON_NIS] = lnet_udsp_apply_rule_on_ni;
+
+ return lnet_udsp_apply_policies_helper(NULL, &udi, cbs);
+}
+
+int
+lnet_udsp_apply_policies_on_net(struct lnet_net *net)
+{
+ struct udsp_info udi;
+ udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
+
+ memset(&udi, 0, sizeof(udi));
+
+ udi.udi_net = net;
+
+ cbs[UDSP_APPLY_PRIO_ON_NIS] = lnet_udsp_apply_prio_rule_on_net;
+ cbs[UDSP_APPLY_RTE_ON_NETS] = lnet_udsp_apply_rte_rule_on_net;
+
+ return lnet_udsp_apply_policies_helper(NULL, &udi, cbs);
+}
+
+int
+lnet_udsp_apply_policies_on_lpni(struct lnet_peer_ni *lpni)
+{
+ struct udsp_info udi;
+ udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
+
+ memset(&udi, 0, sizeof(udi));
+
+ udi.udi_lpni = lpni;
+
+ cbs[UDSP_APPLY_ON_PEERS] = lnet_udsp_apply_rule_on_lpni;
+
+ return lnet_udsp_apply_policies_helper(NULL, &udi, cbs);
+}
+
+int
+lnet_udsp_apply_policies_on_lpn(struct lnet_peer_net *lpn)
+{
+ struct udsp_info udi;
+ udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
+
+ memset(&udi, 0, sizeof(udi));
+
+ udi.udi_lpn = lpn;
+
+ cbs[UDSP_APPLY_ON_PEERS] = lnet_udsp_apply_rule_on_lpn;
+
+ return lnet_udsp_apply_policies_helper(NULL, &udi, cbs);
+}
+
+int
+lnet_udsp_apply_policies(struct lnet_udsp *udsp, bool revert)
+{
+ int rc;
+ struct udsp_info udi;
+ udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
+
+ memset(&udi, 0, sizeof(udi));
+
+ cbs[UDSP_APPLY_ON_PEERS] = lnet_udsp_apply_rule_on_lpnis;
+ cbs[UDSP_APPLY_PRIO_ON_NIS] = lnet_udsp_apply_rule_on_nis;
+ cbs[UDSP_APPLY_RTE_ON_NETS] = lnet_udsp_apply_rte_rule_on_nets;
+
+ udi.udi_revert = revert;
+
+ lnet_net_lock(LNET_LOCK_EX);
+ rc = lnet_udsp_apply_policies_helper(udsp, &udi, cbs);
+ lnet_net_unlock(LNET_LOCK_EX);
+
+ return rc;
+}
+
+struct lnet_udsp *
+lnet_udsp_get_policy(int idx)
+{
+ int i = 0;
+ struct lnet_udsp *udsp = NULL;
+ bool found = false;
+
+ CDEBUG(D_NET, "Get UDSP at idx = %d\n", idx);
+
+ if (idx < 0)
+ return NULL;
+
+ list_for_each_entry(udsp, &the_lnet.ln_udsp_list, udsp_on_list) {
+ CDEBUG(D_NET, "iterating over upsp %d:%d:%d\n",
+ udsp->udsp_idx, i, idx);
+ if (i == idx) {
+ found = true;
+ break;
+ }
+ i++;
+ }
+
+ CDEBUG(D_NET, "Found UDSP (%p)\n", udsp);
+
+ if (!found)
+ return NULL;
+
+ return udsp;
+}
+
+int
+lnet_udsp_add_policy(struct lnet_udsp *new, int idx)
+{
+ struct lnet_udsp *udsp;
+ struct lnet_udsp *insert = NULL;
+ int i = 0;
+
+ list_for_each_entry(udsp, &the_lnet.ln_udsp_list, udsp_on_list) {
+ CDEBUG(D_NET, "found udsp i = %d:%d, idx = %d\n",
+ i, udsp->udsp_idx, idx);
+ if (i == idx) {
+ insert = udsp;
+ new->udsp_idx = idx;
+ }
+ i++;
+ if (lnet_udsp_equal(udsp, new)) {
+ if (!lnet_udsp_action_equal(udsp, new) &&
+ udsp->udsp_action_type == EN_LNET_UDSP_ACTION_PRIORITY &&
+ new->udsp_action_type == EN_LNET_UDSP_ACTION_PRIORITY) {
+ udsp->udsp_action.udsp_priority = new->udsp_action.udsp_priority;
+ CDEBUG(D_NET, "udsp: %p index %d updated priority to %d\n",
+ udsp,
+ udsp->udsp_idx,
+ udsp->udsp_action.udsp_priority);
+ return 0;
+ }
+ return -EALREADY;
+ }
+ }
+
+ if (insert) {
+ list_add(&new->udsp_on_list, insert->udsp_on_list.prev);
+ i = 0;
+ list_for_each_entry(udsp,
+ &the_lnet.ln_udsp_list,
+ udsp_on_list) {
+ if (i <= idx) {
+ i++;
+ continue;
+ }
+ udsp->udsp_idx++;
+ }
+ } else {
+ list_add_tail(&new->udsp_on_list, &the_lnet.ln_udsp_list);
+ new->udsp_idx = i;
+ }
+
+ CDEBUG(D_NET, "udsp: %p added at index %d\n", new, new->udsp_idx);
+
+ CDEBUG(D_NET, "udsp list:\n");
+ list_for_each_entry(udsp, &the_lnet.ln_udsp_list, udsp_on_list)
+ CDEBUG(D_NET, "udsp %p:%d\n", udsp, udsp->udsp_idx);
+
+ return 0;
+}
+
+int
+lnet_udsp_del_policy(int idx)
+{
+ struct lnet_udsp *udsp;
+ struct lnet_udsp *tmp;
+ bool removed = false;
+
+ if (idx < 0) {
+ lnet_udsp_destroy(false);
+ return 0;
+ }
+
+ CDEBUG(D_NET, "del udsp at idx = %d\n", idx);
+
+ list_for_each_entry_safe(udsp,
+ tmp,
+ &the_lnet.ln_udsp_list,
+ udsp_on_list) {
+ if (removed)
+ udsp->udsp_idx--;
+ if (udsp->udsp_idx == idx && !removed) {
+ list_del_init(&udsp->udsp_on_list);
+ lnet_udsp_apply_policies(udsp, true);
+ lnet_udsp_free(udsp);
+ removed = true;
+ }
+ }
+
+ return 0;
+}
+
+static void
+lnet_udsp_get_ni_info(struct lnet_ioctl_construct_udsp_info *info,
+ struct lnet_ni *ni)
+{
+ struct lnet_nid_list *ne;
+ struct lnet_net *net = ni->ni_net;
+ int i = 0;
+
+ LASSERT(ni);
+
+ info->cud_nid_priority = ni->ni_sel_priority;
+ if (net) {
+ info->cud_net_priority = ni->ni_net->net_sel_priority;
+ list_for_each_entry(ne, &net->net_rtr_pref_nids, nl_list) {
+ if (i < LNET_MAX_SHOW_NUM_NID)
+ info->cud_pref_rtr_nid[i] = ne->nl_nid;
+ else
+ break;
+ i++;
+ }
+ }
+}
+
+static void
+lnet_udsp_get_peer_info(struct lnet_ioctl_construct_udsp_info *info,
+ struct lnet_peer_ni *lpni)
+{
+ struct lnet_nid_list *ne;
+ int i = 0;
+
+ /* peer tree structure needs to be in existence */
+ LASSERT(lpni && lpni->lpni_peer_net &&
+ lpni->lpni_peer_net->lpn_peer);
+
+ info->cud_nid_priority = lpni->lpni_sel_priority;
+ CDEBUG(D_NET, "lpni %s has %d pref nids\n",
+ libcfs_nid2str(lpni->lpni_nid),
+ lpni->lpni_pref_nnids);
+ if (lpni->lpni_pref_nnids == 1) {
+ info->cud_pref_nid[0] = lpni->lpni_pref.nid;
+ } else if (lpni->lpni_pref_nnids > 1) {
+ struct list_head *list = &lpni->lpni_pref.nids;
+
+ list_for_each_entry(ne, list, nl_list) {
+ if (i < LNET_MAX_SHOW_NUM_NID)
+ info->cud_pref_nid[i] = ne->nl_nid;
+ else
+ break;
+ i++;
+ }
+ }
+
+ i = 0;
+ list_for_each_entry(ne, &lpni->lpni_rtr_pref_nids, nl_list) {
+ if (i < LNET_MAX_SHOW_NUM_NID)
+ info->cud_pref_rtr_nid[i] = ne->nl_nid;
+ else
+ break;
+ i++;
+ }
+
+ info->cud_net_priority = lpni->lpni_peer_net->lpn_sel_priority;
+}
+
+void
+lnet_udsp_get_construct_info(struct lnet_ioctl_construct_udsp_info *info)
+{
+ struct lnet_ni *ni;
+ struct lnet_peer_ni *lpni;
+
+ lnet_net_lock(0);
+ if (!info->cud_peer) {
+ ni = lnet_nid2ni_locked(info->cud_nid, 0);
+ if (ni)
+ lnet_udsp_get_ni_info(info, ni);
+ } else {
+ lpni = lnet_find_peer_ni_locked(info->cud_nid);
+ if (!lpni) {
+ CDEBUG(D_NET, "nid %s is not found\n",
+ libcfs_nid2str(info->cud_nid));
+ } else {
+ lnet_udsp_get_peer_info(info, lpni);
+ lnet_peer_ni_decref_locked(lpni);
+ }
+ }
+ lnet_net_unlock(0);
+}
+
+struct lnet_udsp *
+lnet_udsp_alloc(void)
+{
+ struct lnet_udsp *udsp;
+
+ udsp = kmem_cache_alloc(lnet_udsp_cachep, GFP_NOFS | __GFP_ZERO);
+
+ if (!udsp)
+ return NULL;
+
+ INIT_LIST_HEAD(&udsp->udsp_on_list);
+ INIT_LIST_HEAD(&udsp->udsp_src.ud_addr_range);
+ INIT_LIST_HEAD(&udsp->udsp_src.ud_net_id.udn_net_num_range);
+ INIT_LIST_HEAD(&udsp->udsp_dst.ud_addr_range);
+ INIT_LIST_HEAD(&udsp->udsp_dst.ud_net_id.udn_net_num_range);
+ INIT_LIST_HEAD(&udsp->udsp_rte.ud_addr_range);
+ INIT_LIST_HEAD(&udsp->udsp_rte.ud_net_id.udn_net_num_range);
+
+ CDEBUG(D_MALLOC, "udsp alloc %p\n", udsp);
+ return udsp;
+}
+
+static void
+lnet_udsp_nid_descr_free(struct lnet_ud_nid_descr *nid_descr)
+{
+ struct list_head *net_range = &nid_descr->ud_net_id.udn_net_num_range;
+
+ if (!lnet_udsp_criteria_present(nid_descr))
+ return;
+
+ /* memory management is a bit tricky here. When we allocate the
+ * memory to store the NID descriptor we allocate a large buffer
+ * for all the data, so we need to free the entire buffer at
+ * once. If the net is present the net_range->next points to that
+ * buffer otherwise if the ud_addr_range is present then it's the
+ * ud_addr_range.next
+ */
+ if (!list_empty(net_range))
+ LIBCFS_FREE(net_range->next, nid_descr->ud_mem_size);
+ else if (!list_empty(&nid_descr->ud_addr_range))
+ LIBCFS_FREE(nid_descr->ud_addr_range.next,
+ nid_descr->ud_mem_size);
+}
+
+void
+lnet_udsp_free(struct lnet_udsp *udsp)
+{
+ lnet_udsp_nid_descr_free(&udsp->udsp_src);
+ lnet_udsp_nid_descr_free(&udsp->udsp_dst);
+ lnet_udsp_nid_descr_free(&udsp->udsp_rte);
+
+ CDEBUG(D_MALLOC, "udsp free %p\n", udsp);
+ kmem_cache_free(lnet_udsp_cachep, udsp);
+}
+
+void
+lnet_udsp_destroy(bool shutdown)
+{
+ struct lnet_udsp *udsp, *tmp;
+
+ CDEBUG(D_NET, "Destroying UDSPs in the system\n");
+
+ list_for_each_entry_safe(udsp, tmp, &the_lnet.ln_udsp_list,
+ udsp_on_list) {
+ list_del(&udsp->udsp_on_list);
+ if (!shutdown)
+ lnet_udsp_apply_policies(udsp, true);
+ lnet_udsp_free(udsp);
+ }
+}
+
+static size_t
+lnet_size_marshaled_nid_descr(struct lnet_ud_nid_descr *descr)
+{
+ struct cfs_expr_list *expr;
+ int expr_count = 0;
+ int range_count = 0;
+ size_t size = sizeof(struct lnet_ioctl_udsp_descr);
+
+ if (!lnet_udsp_criteria_present(descr))
+ return size;
+
+ /* we always have one net expression */
+ if (!list_empty(&descr->ud_net_id.udn_net_num_range)) {
+ expr = list_first_entry(&descr->ud_net_id.udn_net_num_range,
+ struct cfs_expr_list, el_link);
+
+ /* count the number of cfs_range_expr in the net expression */
+ range_count = lnet_get_list_len(&expr->el_exprs);
+ }
+
+ /* count the number of cfs_range_expr in the address expressions */
+ list_for_each_entry(expr, &descr->ud_addr_range, el_link) {
+ expr_count++;
+ range_count += lnet_get_list_len(&expr->el_exprs);
+ }
+
+ size += (sizeof(struct lnet_expressions) * expr_count);
+ size += (sizeof(struct lnet_range_expr) * range_count);
+
+ return size;
+}
+
+size_t
+lnet_get_udsp_size(struct lnet_udsp *udsp)
+{
+ size_t size = sizeof(struct lnet_ioctl_udsp);
+
+ size += lnet_size_marshaled_nid_descr(&udsp->udsp_src);
+ size += lnet_size_marshaled_nid_descr(&udsp->udsp_dst);
+ size += lnet_size_marshaled_nid_descr(&udsp->udsp_rte);
+
+ CDEBUG(D_NET, "get udsp (%p) size: %d\n", udsp, (int)size);
+
+ return size;
+}
+
+static int
+copy_exprs(struct cfs_expr_list *expr, void __user **bulk,
+ __u32 *bulk_size)
+{
+ struct cfs_range_expr *range;
+ struct lnet_range_expr range_expr;
+
+ /* copy over the net range expressions to the bulk */
+ list_for_each_entry(range, &expr->el_exprs, re_link) {
+ range_expr.re_lo = range->re_lo;
+ range_expr.re_hi = range->re_hi;
+ range_expr.re_stride = range->re_stride;
+ CDEBUG(D_NET, "Copy Range %u:%u:%u\n",
+ range_expr.re_lo, range_expr.re_hi,
+ range_expr.re_stride);
+ if (copy_to_user(*bulk, &range_expr, sizeof(range_expr))) {
+ CDEBUG(D_NET, "Failed to copy range_expr\n");
+ return -EFAULT;
+ }
+ *bulk += sizeof(range_expr);
+ *bulk_size -= sizeof(range_expr);
+ }
+
+ return 0;
+}
+
+static int
+copy_nid_range(struct lnet_ud_nid_descr *nid_descr, char *type,
+ void __user **bulk, __u32 *bulk_size)
+{
+ struct lnet_ioctl_udsp_descr ioc_udsp_descr;
+ struct cfs_expr_list *expr;
+ struct lnet_expressions ioc_expr;
+ int expr_count;
+ int net_expr_count;
+ int rc;
+
+ memset(&ioc_udsp_descr, 0, sizeof(ioc_udsp_descr));
+ ioc_udsp_descr.iud_src_hdr.ud_descr_type = *(__u32 *)type;
+
+ /* if criteria not present, copy over the static part of the NID
+ * descriptor
+ */
+ if (!lnet_udsp_criteria_present(nid_descr)) {
+ CDEBUG(D_NET, "Descriptor %u:%u:%u:%u\n",
+ ioc_udsp_descr.iud_src_hdr.ud_descr_type,
+ ioc_udsp_descr.iud_src_hdr.ud_descr_count,
+ ioc_udsp_descr.iud_net.ud_net_type,
+ ioc_udsp_descr.iud_net.ud_net_num_expr.le_count);
+ if (copy_to_user(*bulk, &ioc_udsp_descr,
+ sizeof(ioc_udsp_descr))) {
+ CDEBUG(D_NET, "failed to copy ioc_udsp_descr\n");
+ return -EFAULT;
+ }
+ *bulk += sizeof(ioc_udsp_descr);
+ *bulk_size -= sizeof(ioc_udsp_descr);
+ return 0;
+ }
+
+ expr_count = lnet_get_list_len(&nid_descr->ud_addr_range);
+
+ /* copy the net information */
+ if (!list_empty(&nid_descr->ud_net_id.udn_net_num_range)) {
+ expr = list_first_entry(&nid_descr->ud_net_id.udn_net_num_range,
+ struct cfs_expr_list, el_link);
+ net_expr_count = lnet_get_list_len(&expr->el_exprs);
+ } else {
+ net_expr_count = 0;
+ }
+
+ /* set the total expression count */
+ ioc_udsp_descr.iud_src_hdr.ud_descr_count = expr_count;
+ ioc_udsp_descr.iud_net.ud_net_type =
+ nid_descr->ud_net_id.udn_net_type;
+ ioc_udsp_descr.iud_net.ud_net_num_expr.le_count = net_expr_count;
+
+ CDEBUG(D_NET, "Descriptor %u:%u:%u:%u\n",
+ ioc_udsp_descr.iud_src_hdr.ud_descr_type,
+ ioc_udsp_descr.iud_src_hdr.ud_descr_count,
+ ioc_udsp_descr.iud_net.ud_net_type,
+ ioc_udsp_descr.iud_net.ud_net_num_expr.le_count);
+
+ /* copy over the header info to the bulk */
+ if (copy_to_user(*bulk, &ioc_udsp_descr, sizeof(ioc_udsp_descr))) {
+ CDEBUG(D_NET, "Failed to copy data\n");
+ return -EFAULT;
+ }
+ *bulk += sizeof(ioc_udsp_descr);
+ *bulk_size -= sizeof(ioc_udsp_descr);
+
+ /* copy over the net num expression if it exists */
+ if (net_expr_count) {
+ rc = copy_exprs(expr, bulk, bulk_size);
+ if (rc)
+ return rc;
+ }
+
+ /* copy the address range */
+ list_for_each_entry(expr, &nid_descr->ud_addr_range, el_link) {
+ ioc_expr.le_count = lnet_get_list_len(&expr->el_exprs);
+ if (copy_to_user(*bulk, &ioc_expr, sizeof(ioc_expr))) {
+ CDEBUG(D_NET, "failex to copy ioc_expr\n");
+ return -EFAULT;
+ }
+ *bulk += sizeof(ioc_expr);
+ *bulk_size -= sizeof(ioc_expr);
+
+ rc = copy_exprs(expr, bulk, bulk_size);
+ if (rc)
+ return rc;
+ }
+
+ return 0;
+}
+
+int
+lnet_udsp_marshal(struct lnet_udsp *udsp, struct lnet_ioctl_udsp *ioc_udsp)
+{
+ int rc = -ENOMEM;
+ void __user *bulk;
+ __u32 bulk_size;
+
+ if (!ioc_udsp)
+ return -EINVAL;
+
+ bulk = ioc_udsp->iou_bulk;
+ bulk_size = ioc_udsp->iou_hdr.ioc_len +
+ ioc_udsp->iou_bulk_size;
+
+ CDEBUG(D_NET, "marshal udsp (%p)\n", udsp);
+ CDEBUG(D_NET, "MEM -----> bulk: %p:0x%x\n", bulk, bulk_size);
+ /* make sure user space allocated enough buffer to marshal the
+ * udsp
+ */
+ if (bulk_size != lnet_get_udsp_size(udsp)) {
+ rc = -ENOSPC;
+ goto fail;
+ }
+
+ ioc_udsp->iou_idx = udsp->udsp_idx;
+ ioc_udsp->iou_action_type = udsp->udsp_action_type;
+ ioc_udsp->iou_action.priority = udsp->udsp_action.udsp_priority;
+
+ bulk_size -= sizeof(*ioc_udsp);
+
+ rc = copy_nid_range(&udsp->udsp_src, "SRC", &bulk, &bulk_size);
+ if (rc)
+ goto fail;
+
+ rc = copy_nid_range(&udsp->udsp_dst, "DST", &bulk, &bulk_size);
+ if (rc)
+ goto fail;
+
+ rc = copy_nid_range(&udsp->udsp_rte, "RTE", &bulk, &bulk_size);
+ if (rc)
+ goto fail;
+
+ CDEBUG(D_NET, "MEM <----- bulk: %p\n", bulk);
+
+ /* we should've consumed the entire buffer */
+ LASSERT(bulk_size == 0);
+ return 0;
+
+fail:
+ CERROR("Failed to marshal udsp: %d\n", rc);
+ return rc;
+}
+
+static void
+copy_range_info(void **bulk, void **buf, struct list_head *list,
+ int count)
+{
+ struct lnet_range_expr *range_expr;
+ struct cfs_range_expr *range;
+ struct cfs_expr_list *exprs;
+ int range_count = count;
+ int i;
+
+ if (range_count == 0)
+ return;
+
+ if (range_count == -1) {
+ struct lnet_expressions *e;
+
+ e = *bulk;
+ range_count = e->le_count;
+ *bulk += sizeof(*e);
+ }
+
+ exprs = *buf;
+ INIT_LIST_HEAD(&exprs->el_link);
+ INIT_LIST_HEAD(&exprs->el_exprs);
+ list_add_tail(&exprs->el_link, list);
+ *buf += sizeof(*exprs);
+
+ for (i = 0; i < range_count; i++) {
+ range_expr = *bulk;
+ range = *buf;
+ INIT_LIST_HEAD(&range->re_link);
+ range->re_lo = range_expr->re_lo;
+ range->re_hi = range_expr->re_hi;
+ range->re_stride = range_expr->re_stride;
+ CDEBUG(D_NET, "Copy Range %u:%u:%u\n",
+ range->re_lo,
+ range->re_hi,
+ range->re_stride);
+ list_add_tail(&range->re_link, &exprs->el_exprs);
+ *bulk += sizeof(*range_expr);
+ *buf += sizeof(*range);
+ }
+}
+
+static int
+copy_ioc_udsp_descr(struct lnet_ud_nid_descr *nid_descr, char *type,
+ void **bulk, __u32 *bulk_size)
+{
+ struct lnet_ioctl_udsp_descr *ioc_nid = *bulk;
+ struct lnet_expressions *exprs;
+ __u32 descr_type;
+ int expr_count = 0;
+ int range_count = 0;
+ int i;
+ __u32 size;
+ int remaining_size = *bulk_size;
+ void *tmp = *bulk;
+ __u32 alloc_size;
+ void *buf;
+ size_t range_expr_s = sizeof(struct lnet_range_expr);
+ size_t lnet_exprs_s = sizeof(struct lnet_expressions);
+
+ CDEBUG(D_NET, "%s: bulk = %p:%u\n", type, *bulk, *bulk_size);
+
+ /* criteria not present, skip over the static part of the
+ * bulk, which is included for each NID descriptor
+ */
+ if (ioc_nid->iud_net.ud_net_type == 0) {
+ remaining_size -= sizeof(*ioc_nid);
+ if (remaining_size < 0) {
+ CERROR("Truncated userspace udsp buffer given\n");
+ return -EINVAL;
+ }
+ *bulk += sizeof(*ioc_nid);
+ *bulk_size = remaining_size;
+ return 0;
+ }
+
+ descr_type = ioc_nid->iud_src_hdr.ud_descr_type;
+ if (descr_type != *(__u32 *)type) {
+ CERROR("Bad NID descriptor type. Expected %s, given %c%c%c\n",
+ type, (__u8)descr_type, (__u8)(descr_type << 4),
+ (__u8)(descr_type << 8));
+ return -EINVAL;
+ }
+
+ /* calculate the total size to verify we have enough buffer.
+ * Start of by finding how many ranges there are for the net
+ * expression.
+ */
+ range_count = ioc_nid->iud_net.ud_net_num_expr.le_count;
+ size = sizeof(*ioc_nid) + (range_count * range_expr_s);
+ remaining_size -= size;
+ if (remaining_size < 0) {
+ CERROR("Truncated userspace udsp buffer given\n");
+ return -EINVAL;
+ }
+
+ CDEBUG(D_NET, "Total net num ranges in %s: %d:%u\n", type,
+ range_count, size);
+ /* the number of expressions for the NID. IE 4 for IP, 1 for GNI */
+ expr_count = ioc_nid->iud_src_hdr.ud_descr_count;
+ CDEBUG(D_NET, "addr as %d exprs\n", expr_count);
+ /* point tmp to the beginning of the NID expressions */
+ tmp += size;
+ for (i = 0; i < expr_count; i++) {
+ /* get the number of ranges per expression */
+ exprs = tmp;
+ range_count += exprs->le_count;
+ size = (range_expr_s * exprs->le_count) + lnet_exprs_s;
+ remaining_size -= size;
+ CDEBUG(D_NET, "expr %d:%d:%u:%d:%d\n", i, exprs->le_count,
+ size, remaining_size, range_count);
+ if (remaining_size < 0) {
+ CERROR("Truncated userspace udsp buffer given\n");
+ return -EINVAL;
+ }
+ tmp += size;
+ }
+
+ *bulk_size = remaining_size;
+
+ /* copy over the net type */
+ nid_descr->ud_net_id.udn_net_type = ioc_nid->iud_net.ud_net_type;
+
+ CDEBUG(D_NET, "%u\n", nid_descr->ud_net_id.udn_net_type);
+
+ /* allocate the total memory required to copy this NID descriptor */
+ alloc_size = (sizeof(struct cfs_expr_list) * (expr_count + 1)) +
+ (sizeof(struct cfs_range_expr) * (range_count));
+ LIBCFS_ALLOC(buf, alloc_size);
+ if (!buf)
+ return -ENOMEM;
+
+ /* store the amount of memory allocated so we can free it later on */
+ nid_descr->ud_mem_size = alloc_size;
+
+ /* copy over the net number range */
+ range_count = ioc_nid->iud_net.ud_net_num_expr.le_count;
+ *bulk += sizeof(*ioc_nid);
+ CDEBUG(D_NET, "bulk = %p\n", *bulk);
+ copy_range_info(bulk, &buf, &nid_descr->ud_net_id.udn_net_num_range,
+ range_count);
+ CDEBUG(D_NET, "bulk = %p\n", *bulk);
+
+ /* copy over the NID descriptor */
+ for (i = 0; i < expr_count; i++) {
+ copy_range_info(bulk, &buf, &nid_descr->ud_addr_range, -1);
+ CDEBUG(D_NET, "bulk = %p\n", *bulk);
+ }
+
+ return 0;
+}
+
+int
+lnet_udsp_demarshal_add(void *bulk, __u32 bulk_size)
+{
+ struct lnet_ioctl_udsp *ioc_udsp;
+ struct lnet_udsp *udsp;
+ int rc = -ENOMEM;
+ int idx;
+
+ if (bulk_size < sizeof(*ioc_udsp))
+ return -ENOSPC;
+
+ udsp = lnet_udsp_alloc();
+ if (!udsp)
+ return rc;
+
+ ioc_udsp = bulk;
+
+ udsp->udsp_action_type = ioc_udsp->iou_action_type;
+ udsp->udsp_action.udsp_priority = ioc_udsp->iou_action.priority;
+ idx = ioc_udsp->iou_idx;
+
+ CDEBUG(D_NET, "demarshal descr %u:%u:%d:%u\n", udsp->udsp_action_type,
+ udsp->udsp_action.udsp_priority, idx, bulk_size);
+
+ bulk += sizeof(*ioc_udsp);
+ bulk_size -= sizeof(*ioc_udsp);
+
+ rc = copy_ioc_udsp_descr(&udsp->udsp_src, "SRC", &bulk, &bulk_size);
+ if (rc < 0)
+ goto free_udsp;
+
+ rc = copy_ioc_udsp_descr(&udsp->udsp_dst, "DST", &bulk, &bulk_size);
+ if (rc < 0)
+ goto free_udsp;
+
+ rc = copy_ioc_udsp_descr(&udsp->udsp_rte, "RTE", &bulk, &bulk_size);
+ if (rc < 0)
+ goto free_udsp;
+
+ return lnet_udsp_add_policy(udsp, idx);
+
+free_udsp:
+ lnet_udsp_free(udsp);
+ return rc;
+}
lib_LTLIBRARIES = liblnetconfig.la
liblnetconfig_la_SOURCES = liblnetconfig.c liblnetconfig.h \
- liblnetconfig_lnd.c liblnd.h cyaml.c cyaml.h
+ liblnetconfig_lnd.c liblnd.h cyaml.c cyaml.h \
+ liblnetconfig_udsp.c
liblnetconfig_la_CPPFLAGS = -D_LARGEFILE64_SOURCE=1 -D_FILE_OFFSET_BITS=64 \
-DLUSTRE_UTILS=1 -fPIC
liblnetconfig_la_LDFLAGS = -L$(top_builddir)/libcfs/libcfs -lyaml -lm \
#include <glob.h>
#include <libcfs/util/param.h>
-#define CONFIG_CMD "configure"
-#define UNCONFIG_CMD "unconfigure"
-#define ADD_CMD "add"
-#define DEL_CMD "del"
-#define SHOW_CMD "show"
-#define DBG_CMD "dbg"
-#define MANAGE_CMD "manage"
-
-#define MAX_NUM_IPS 128
-
-#define modparam_path "/sys/module/lnet/parameters/"
-#define o2ib_modparam_path "/sys/module/ko2iblnd/parameters/"
-#define gni_nid_path "/proc/cray_xt/"
-
#ifndef HAVE_USRSPC_RDMA_PS_TCP
#define RDMA_PS_TCP 0x0106
#endif
return NULL;
}
+static int
+create_local_udsp_info(struct lnet_ioctl_construct_udsp_info *udsp_info,
+ struct cYAML *net_node)
+{
+ char tmp[LNET_MAX_STR_LEN];
+ struct cYAML *udsp_net;
+ bool created = false;
+ struct cYAML *pref;
+ int i;
+
+ /* add the UDSP info */
+ udsp_net = cYAML_create_object(net_node, "udsp info");
+ if (!udsp_net)
+ return LUSTRE_CFG_RC_OUT_OF_MEM;
+
+ if (!cYAML_create_number(udsp_net, "net priority",
+ (int) udsp_info->cud_net_priority))
+ return LUSTRE_CFG_RC_OUT_OF_MEM;
+
+ if (!cYAML_create_number(udsp_net, "nid priority",
+ (int)udsp_info->cud_nid_priority))
+ return LUSTRE_CFG_RC_OUT_OF_MEM;
+
+ pref = udsp_net;
+
+ for (i = 0; i < LNET_MAX_SHOW_NUM_NID; i++) {
+ memset(tmp, 0, LNET_MAX_STR_LEN);
+ if (udsp_info->cud_pref_rtr_nid[i] == 0)
+ break;
+ if (!created) {
+ pref = cYAML_create_object(udsp_net,
+ "Preferred gateway NIDs");
+ if (!pref)
+ return LUSTRE_CFG_RC_OUT_OF_MEM;
+ created = true;
+ }
+ snprintf(tmp, sizeof(tmp), "NID-%d", i);
+ if (!cYAML_create_string(pref, tmp,
+ libcfs_nid2str(udsp_info->cud_pref_rtr_nid[i])))
+ return LUSTRE_CFG_RC_OUT_OF_MEM;
+ }
+
+ return LUSTRE_CFG_RC_NO_ERR;
+}
+
+static int
+create_remote_udsp_info(struct lnet_ioctl_construct_udsp_info *udsp_info,
+ struct cYAML *nid_node)
+{
+ char tmp[LNET_MAX_STR_LEN];
+ struct cYAML *udsp_nid;
+ bool created = false;
+ struct cYAML *pref;
+ int i;
+
+ /* add the UDSP info */
+ udsp_nid = cYAML_create_object(nid_node, "udsp info");
+ if (!udsp_nid)
+ return LUSTRE_CFG_RC_OUT_OF_MEM;
+
+ if (!cYAML_create_number(udsp_nid, "net priority",
+ (int) udsp_info->cud_net_priority))
+ return LUSTRE_CFG_RC_OUT_OF_MEM;
+
+ if (!cYAML_create_number(udsp_nid, "nid priority",
+ (int) udsp_info->cud_nid_priority))
+ return LUSTRE_CFG_RC_OUT_OF_MEM;
+
+ pref = udsp_nid;
+ for (i = 0; i < LNET_MAX_SHOW_NUM_NID; i++) {
+ memset(tmp, 0, LNET_MAX_STR_LEN);
+ if (udsp_info->cud_pref_rtr_nid[i] == 0)
+ break;
+ if (!created) {
+ pref = cYAML_create_object(udsp_nid,
+ "Preferred gateway NIDs");
+ if (!pref)
+ return LUSTRE_CFG_RC_OUT_OF_MEM;
+ created = true;
+ }
+ snprintf(tmp, sizeof(tmp), "NID-%d", i);
+ if (!cYAML_create_string(pref, tmp,
+ libcfs_nid2str(udsp_info->cud_pref_rtr_nid[i])))
+ return LUSTRE_CFG_RC_OUT_OF_MEM;
+ }
+
+ pref = udsp_nid;
+ created = false;
+ for (i = 0; i < LNET_MAX_SHOW_NUM_NID; i++) {
+ memset(tmp, 0, LNET_MAX_STR_LEN);
+ if (udsp_info->cud_pref_nid[i] == 0)
+ break;
+ if (!created) {
+ pref = cYAML_create_object(udsp_nid,
+ "Preferred source NIDs");
+ if (!pref)
+ return LUSTRE_CFG_RC_OUT_OF_MEM;
+ created = true;
+ }
+ snprintf(tmp, sizeof(tmp), "NID-%d", i);
+ if (!cYAML_create_string(pref, tmp,
+ libcfs_nid2str(udsp_info->cud_pref_nid[i])))
+ return LUSTRE_CFG_RC_OUT_OF_MEM;
+ }
+
+ return LUSTRE_CFG_RC_NO_ERR;
+}
+
int lustre_lnet_show_net(char *nw, int detail, int seq_no,
struct cYAML **show_rc, struct cYAML **err_rc,
bool backup)
struct lnet_ioctl_element_stats *stats;
struct lnet_ioctl_element_msg_stats msg_stats;
struct lnet_ioctl_local_ni_hstats hstats;
+ struct lnet_ioctl_construct_udsp_info udsp_info;
__u32 net = LNET_NET_ANY;
__u32 prev_net = LNET_NET_ANY;
int rc = LUSTRE_CFG_RC_OUT_OF_MEM, i, j;
== NULL)
goto out;
+ if (detail < 4)
+ goto continue_without_udsp_info;
+
+ LIBCFS_IOC_INIT_V2(udsp_info, cud_hdr);
+ udsp_info.cud_nid = ni_data->lic_nid;
+ udsp_info.cud_peer = false;
+ rc = l_ioctl(LNET_DEV_ID,
+ IOC_LIBCFS_GET_CONST_UDSP_INFO,
+ &udsp_info);
+ if (rc != 0) {
+ l_errno = errno;
+ goto continue_without_udsp_info;
+ }
+
+ rc = create_local_udsp_info(&udsp_info, item);
+ if (rc) {
+ l_errno = errno;
+ goto out;
+ }
+
+continue_without_udsp_info:
if (detail < 2)
goto continue_without_msg_stats;
struct lnet_ioctl_element_stats *lpni_stats;
struct lnet_ioctl_element_msg_stats *msg_stats;
struct lnet_ioctl_peer_ni_hstats *hstats;
+ struct lnet_ioctl_construct_udsp_info udsp_info;
lnet_nid_t *nidp;
int rc = LUSTRE_CFG_RC_OUT_OF_MEM;
int i, j, k;
if (backup)
continue;
+ if (detail < 4)
+ goto continue_without_udsp_info;
+
+ LIBCFS_IOC_INIT_V2(udsp_info, cud_hdr);
+ udsp_info.cud_nid = *nidp;
+ udsp_info.cud_peer = true;
+ rc = l_ioctl(LNET_DEV_ID,
+ IOC_LIBCFS_GET_CONST_UDSP_INFO,
+ &udsp_info);
+ if (rc != 0) {
+ l_errno = errno;
+ goto continue_without_udsp_info;
+ }
+
+ rc = create_remote_udsp_info(&udsp_info, peer_ni);
+ if (rc) {
+ l_errno = errno;
+ goto out;
+ }
+
+continue_without_udsp_info:
if (cYAML_create_string(peer_ni, "state",
lpni_cri->cr_aliveness)
== NULL)
show_rc, err_rc);
}
+static int handle_yaml_del_udsp(struct cYAML *tree, struct cYAML **show_rc,
+ struct cYAML **err_rc)
+{
+ struct cYAML *seq_no, *idx;
+
+ seq_no = cYAML_get_object_item(tree, "seq_no");
+ idx = cYAML_get_object_item(tree, "idx");
+
+ return lustre_lnet_del_udsp(idx ? idx->cy_valueint : -1,
+ seq_no ? seq_no->cy_valueint : -1,
+ err_rc);
+}
+
+static int handle_yaml_config_udsp(struct cYAML *tree, struct cYAML **show_rc,
+ struct cYAML **err_rc)
+{
+ struct cYAML *seq_no, *src, *rte, *dst, *prio, *idx;
+ union lnet_udsp_action action;
+
+ seq_no = cYAML_get_object_item(tree, "seq_no");
+ src = cYAML_get_object_item(tree, "src");
+ rte = cYAML_get_object_item(tree, "rte");
+ dst = cYAML_get_object_item(tree, "dst");
+ prio = cYAML_get_object_item(tree, "priority");
+ idx = cYAML_get_object_item(tree, "idx");
+
+ action.udsp_priority = prio ? prio->cy_valueint : -1;
+
+ return lustre_lnet_add_udsp(src ? src->cy_valuestring : NULL,
+ dst ? dst->cy_valuestring : NULL,
+ rte ? rte->cy_valuestring : NULL,
+ prio ? "priority" : "",
+ &action,
+ idx ? idx->cy_valueint : -1,
+ seq_no ? seq_no->cy_valueint : -1,
+ err_rc);
+}
+
+static int handle_yaml_show_udsp(struct cYAML *tree, struct cYAML **show_rc,
+ struct cYAML **err_rc)
+{
+ struct cYAML *seq_no;
+ struct cYAML *idx;
+
+ seq_no = cYAML_get_object_item(tree, "seq_no");
+ idx = cYAML_get_object_item(tree, "idx");
+
+ return lustre_lnet_show_udsp(idx ? idx->cy_valueint : -1,
+ seq_no ? seq_no->cy_valueint : -1,
+ show_rc, err_rc);
+}
+
static int handle_yaml_config_global_settings(struct cYAML *tree,
struct cYAML **show_rc,
struct cYAML **err_rc)
{ .name = "numa", .cb = handle_yaml_config_numa },
{ .name = "ping", .cb = handle_yaml_no_op },
{ .name = "discover", .cb = handle_yaml_no_op },
+ { .name = "udsp", .cb = handle_yaml_config_udsp },
{ .name = NULL } };
static struct lookup_cmd_hdlr_tbl lookup_del_tbl[] = {
{ .name = "numa", .cb = handle_yaml_del_numa },
{ .name = "ping", .cb = handle_yaml_no_op },
{ .name = "discover", .cb = handle_yaml_no_op },
+ { .name = "udsp", .cb = handle_yaml_del_udsp },
{ .name = NULL } };
static struct lookup_cmd_hdlr_tbl lookup_show_tbl[] = {
{ .name = "numa", .cb = handle_yaml_show_numa },
{ .name = "ping", .cb = handle_yaml_no_op },
{ .name = "discover", .cb = handle_yaml_no_op },
+ { .name = "udsp", .cb = handle_yaml_show_udsp },
{ .name = NULL } };
static struct lookup_cmd_hdlr_tbl lookup_exec_tbl[] = {
#define LUSTRE_CFG_RC_MATCH -7
#define LUSTRE_CFG_RC_SKIP -8
#define LUSTRE_CFG_RC_LAST_ELEM -9
+#define LUSTRE_CFG_RC_MARSHAL_FAIL -10
+
+#define CONFIG_CMD "configure"
+#define UNCONFIG_CMD "unconfigure"
+#define ADD_CMD "add"
+#define DEL_CMD "del"
+#define SHOW_CMD "show"
+#define DBG_CMD "dbg"
+#define MANAGE_CMD "manage"
+
+#define MAX_NUM_IPS 128
+
+#define modparam_path "/sys/module/lnet/parameters/"
+#define o2ib_modparam_path "/sys/module/ko2iblnd/parameters/"
+#define gni_nid_path "/proc/cray_xt/"
enum lnetctl_cmd {
LNETCTL_CONFIG_CMD = 1,
struct cfs_expr_list *cpt_expr;
};
+/* This UDSP structures need to match the kernel space structures
+ * in order for the marshall and unmarshall functions to be the same.
+ */
+
+/* Net is described as a
+ * 1. net type
+ * 2. num range
+ */
+struct lnet_ud_net_descr {
+ __u32 udn_net_type;
+ struct list_head udn_net_num_range;
+};
+
+/* each NID range is defined as
+ * 1. net descriptor
+ * 2. address range descriptor
+ */
+struct lnet_ud_nid_descr {
+ struct lnet_ud_net_descr ud_net_id;
+ struct list_head ud_addr_range;
+};
+
+/* a UDSP rule can have up to three user defined NID descriptors
+ * - src: defines the local NID range for the rule
+ * - dst: defines the peer NID range for the rule
+ * - rte: defines the router NID range for the rule
+ *
+ * An action union defines the action to take when the rule
+ * is matched
+ */
+struct lnet_udsp {
+ struct list_head udsp_on_list;
+ __u32 udsp_idx;
+ struct lnet_ud_nid_descr udsp_src;
+ struct lnet_ud_nid_descr udsp_dst;
+ struct lnet_ud_nid_descr udsp_rte;
+ enum lnet_udsp_action_type udsp_action_type;
+ union {
+ __u32 udsp_priority;
+ } udsp_action;
+};
+
+/* This union is passed from lnetctl to fill the action union in udsp
+ * structure
+ * TODO: The idea here is if we add extra actions, ex: drop, it can be
+ * added to the union
+ */
+union lnet_udsp_action {
+ int udsp_priority;
+};
+
/* forward declaration of the cYAML structure. */
struct cYAML;
int lustre_lnet_parse_nidstr(char *nidstr, lnet_nid_t *lnet_nidlist,
int max_nids, char *err_str);
+/* lustre_lnet_add_udsp
+ * Add a selection policy.
+ * src - source NID descriptor
+ * dst - destination NID descriptor
+ * rte - router NID descriptor
+ * type - action type
+ * action - union of the action
+ * idx - the index to delete
+ * seq_no - sequence number of the request
+ * err_rc - [OUT] struct cYAML tree describing the error. Freed by
+ * caller
+ */
+int lustre_lnet_add_udsp(char *src, char *dst, char *rte, char *type,
+ union lnet_udsp_action *action, int idx,
+ int seq_no, struct cYAML **err_rc);
+
+/* lustre_lnet_del_udsp
+ * Delete a net selection policy.
+ * idx - the index to delete
+ * seq_no - sequence number of the request
+ * err_rc - [OUT] struct cYAML tree describing the error. Freed by
+ * caller
+ */
+int lustre_lnet_del_udsp(unsigned int idx, int seq_no, struct cYAML **err_rc);
+
+/* lustre_lnet_show_udsp
+ * show selection policy.
+ * idx - the index to show. -1 to show all policies
+ * seq_no - sequence number of the request
+ * err_rc - [IN/OUT] struct cYAML tree containing udsp info
+ * err_rc - [OUT] struct cYAML tree describing the error. Freed by
+ * caller
+ */
+int lustre_lnet_show_udsp(int idx, int seq_no, struct cYAML **show_rc,
+ struct cYAML **err_rc);
+
#endif /* LIB_LNET_CONFIG_API_H */
--- /dev/null
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Copyright (c) 2011, 2017, Intel Corporation.
+ *
+ * Copyright (c) 2018-2020 Data Direct Networks.
+ *
+ * This file is part of Lustre, https://wiki.whamcloud.com/
+ *
+ * Portals is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Portals is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; If not, see
+ * http://www.gnu.org/licenses/gpl-2.0.html
+ *
+ * Author: Sonia Sharma
+ */
+/*
+ * Copyright (c) 2020, Whamcloud.
+ *
+ */
+
+#include <errno.h>
+#include <limits.h>
+#include <byteswap.h>
+#include <netdb.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <libcfs/util/ioctl.h>
+#include <linux/lnet/lnetctl.h>
+#include "liblnd.h"
+#include <sys/types.h>
+#include <fcntl.h>
+#include <ctype.h>
+#include <linux/lnet/lnet-dlc.h>
+#include "liblnetconfig.h"
+
+static inline bool
+lnet_udsp_criteria_present(struct lnet_ud_nid_descr *descr)
+{
+ return descr->ud_net_id.udn_net_type != 0;
+}
+
+struct lnet_udsp *lnet_udsp_alloc(void)
+{
+ struct lnet_udsp *udsp;
+
+ udsp = calloc(1, sizeof(*udsp));
+
+ if (!udsp)
+ return NULL;
+
+ INIT_LIST_HEAD(&udsp->udsp_on_list);
+ INIT_LIST_HEAD(&udsp->udsp_src.ud_addr_range);
+ INIT_LIST_HEAD(&udsp->udsp_src.ud_net_id.udn_net_num_range);
+ INIT_LIST_HEAD(&udsp->udsp_dst.ud_addr_range);
+ INIT_LIST_HEAD(&udsp->udsp_dst.ud_net_id.udn_net_num_range);
+ INIT_LIST_HEAD(&udsp->udsp_rte.ud_addr_range);
+ INIT_LIST_HEAD(&udsp->udsp_rte.ud_net_id.udn_net_num_range);
+
+ return udsp;
+}
+
+static void
+lnet_udsp_nid_descr_free(struct lnet_ud_nid_descr *nid_descr, bool blk)
+{
+ struct list_head *net_range = &nid_descr->ud_net_id.udn_net_num_range;
+
+ if (!lnet_udsp_criteria_present(nid_descr))
+ return;
+
+ /* memory management is a bit tricky here. When we allocate the
+ * memory to store the NID descriptor we allocate a large buffer
+ * for all the data, so we need to free the entire buffer at
+ * once. If the net is present the net_range->next points to that
+ * buffer otherwise if the ud_addr_range is present then it's the
+ * ud_addr_range.next
+ */
+ if (blk) {
+ if (!list_empty(net_range))
+ free(net_range->next);
+ else if (!list_empty(&nid_descr->ud_addr_range))
+ free(nid_descr->ud_addr_range.next);
+ } else {
+ cfs_expr_list_free_list(net_range);
+ cfs_expr_list_free_list(&nid_descr->ud_addr_range);
+ }
+}
+
+void
+lnet_udsp_free(struct lnet_udsp *udsp, bool blk)
+{
+ lnet_udsp_nid_descr_free(&udsp->udsp_src, blk);
+ lnet_udsp_nid_descr_free(&udsp->udsp_dst, blk);
+ lnet_udsp_nid_descr_free(&udsp->udsp_rte, blk);
+
+ free(udsp);
+}
+
+static void
+copy_range_info(void __user **bulk, void **buf, struct list_head *list,
+ int count)
+{
+ struct lnet_range_expr *range_expr;
+ struct cfs_range_expr *range;
+ struct cfs_expr_list *exprs;
+ int range_count = count;
+ int i;
+
+ if (range_count == 0)
+ return;
+
+ if (range_count == -1) {
+ struct lnet_expressions *e;
+
+ e = *bulk;
+ range_count = e->le_count;
+ *bulk += sizeof(*e);
+ }
+
+ exprs = *buf;
+ INIT_LIST_HEAD(&exprs->el_link);
+ INIT_LIST_HEAD(&exprs->el_exprs);
+ list_add_tail(&exprs->el_link, list);
+ *buf += sizeof(*exprs);
+
+ for (i = 0; i < range_count; i++) {
+ range_expr = *bulk;
+ range = *buf;
+ INIT_LIST_HEAD(&range->re_link);
+ range->re_lo = range_expr->re_lo;
+ range->re_hi = range_expr->re_hi;
+ range->re_stride = range_expr->re_stride;
+ list_add_tail(&range->re_link, &exprs->el_exprs);
+ *bulk += sizeof(*range_expr);
+ *buf += sizeof(*range);
+ }
+}
+
+static int
+copy_ioc_udsp_descr(struct lnet_ud_nid_descr *nid_descr, char *type,
+ void **bulk, __u32 *bulk_size)
+{
+ struct lnet_ioctl_udsp_descr *ioc_nid = *bulk;
+ struct lnet_expressions *exprs;
+ __u32 descr_type;
+ int expr_count = 0;
+ int range_count = 0;
+ int i;
+ __u32 size;
+ int remaining_size = *bulk_size;
+ void *tmp = *bulk;
+ __u32 alloc_size;
+ void *buf;
+ size_t range_expr_s = sizeof(struct lnet_range_expr);
+ size_t lnet_exprs_s = sizeof(struct lnet_expressions);
+
+ /* criteria not present, skip over the static part of the
+ * bulk, which is included for each NID descriptor
+ */
+ if (ioc_nid->iud_net.ud_net_type == 0) {
+ remaining_size -= sizeof(*ioc_nid);
+ if (remaining_size < 0)
+ return -EINVAL;
+ *bulk += sizeof(*ioc_nid);
+ *bulk_size = remaining_size;
+ return 0;
+ }
+
+ descr_type = ioc_nid->iud_src_hdr.ud_descr_type;
+ if (descr_type != *(__u32 *)type)
+ return -EINVAL;
+
+ /* calculate the total size to verify we have enough buffer.
+ * Start of by finding how many ranges there are for the net
+ * expression.
+ */
+ range_count = ioc_nid->iud_net.ud_net_num_expr.le_count;
+ size = sizeof(*ioc_nid) + (range_count * range_expr_s);
+ remaining_size -= size;
+ if (remaining_size < 0)
+ return -EINVAL;
+
+ /* the number of expressions for the NID. IE 4 for IP, 1 for GNI */
+ expr_count = ioc_nid->iud_src_hdr.ud_descr_count;
+ /* point tmp to the beginning of the NID expressions */
+ tmp += size;
+ for (i = 0; i < expr_count; i++) {
+ /* get the number of ranges per expression */
+ exprs = tmp;
+ range_count += exprs->le_count;
+ size = (range_expr_s * exprs->le_count) + lnet_exprs_s;
+ remaining_size -= size;
+ if (remaining_size < 0)
+ return -EINVAL;
+ tmp += size;
+ }
+
+ *bulk_size = remaining_size;
+
+ /* copy over the net type */
+ nid_descr->ud_net_id.udn_net_type = ioc_nid->iud_net.ud_net_type;
+
+ /* allocate the total memory required to copy this NID descriptor */
+ alloc_size = (sizeof(struct cfs_expr_list) * (expr_count + 1)) +
+ (sizeof(struct cfs_range_expr) * (range_count));
+ buf = calloc(alloc_size, 1);
+ if (!buf)
+ return -ENOMEM;
+
+ /* copy over the net number range */
+ range_count = ioc_nid->iud_net.ud_net_num_expr.le_count;
+ *bulk += sizeof(*ioc_nid);
+ copy_range_info(bulk, &buf, &nid_descr->ud_net_id.udn_net_num_range,
+ range_count);
+
+ /* copy over the NID descriptor */
+ for (i = 0; i < expr_count; i++)
+ copy_range_info(bulk, &buf, &nid_descr->ud_addr_range, -1);
+
+ return 0;
+}
+
+struct lnet_udsp *
+lnet_udsp_demarshal(void *bulk, __u32 bulk_size)
+{
+ struct lnet_ioctl_udsp *ioc_udsp;
+ struct lnet_udsp *udsp;
+ int rc = -ENOMEM;
+
+ if (bulk_size < sizeof(*ioc_udsp))
+ return NULL;
+
+ udsp = lnet_udsp_alloc();
+ if (!udsp)
+ return NULL;
+
+ ioc_udsp = bulk;
+
+ udsp->udsp_action_type = ioc_udsp->iou_action_type;
+ udsp->udsp_action.udsp_priority = ioc_udsp->iou_action.priority;
+ udsp->udsp_idx = ioc_udsp->iou_idx;
+
+ bulk = ioc_udsp->iou_bulk;
+ bulk_size -= sizeof(*ioc_udsp);
+
+ if (bulk_size != ioc_udsp->iou_bulk_size)
+ goto failed;
+
+ rc = copy_ioc_udsp_descr(&udsp->udsp_src, "SRC", &bulk, &bulk_size);
+ if (rc < 0)
+ goto failed;
+
+ rc = copy_ioc_udsp_descr(&udsp->udsp_dst, "DST", &bulk, &bulk_size);
+ if (rc < 0)
+ goto failed;
+
+ rc = copy_ioc_udsp_descr(&udsp->udsp_rte, "RTE", &bulk, &bulk_size);
+ if (rc < 0)
+ goto failed;
+
+ return udsp;
+
+failed:
+ lnet_udsp_free(udsp, true);
+ return NULL;
+}
+
+static inline int
+lnet_get_list_len(struct list_head *list)
+{
+ struct list_head *l;
+ int count = 0;
+
+ list_for_each(l, list)
+ count++;
+
+ return count;
+}
+
+static size_t
+lnet_size_marshaled_nid_descr(struct lnet_ud_nid_descr *descr)
+{
+ struct cfs_expr_list *expr;
+ int expr_count = 0;
+ int range_count = 0;
+ size_t size = sizeof(struct lnet_ioctl_udsp_descr);
+
+ if (!lnet_udsp_criteria_present(descr))
+ return size;
+
+ if (!list_empty(&descr->ud_net_id.udn_net_num_range)) {
+ expr = list_entry(descr->ud_net_id.udn_net_num_range.next,
+ struct cfs_expr_list, el_link);
+ range_count = lnet_get_list_len(&expr->el_exprs);
+ }
+
+ /* count the number of cfs_range_expr in the address expressions */
+ list_for_each_entry(expr, &descr->ud_addr_range, el_link) {
+ expr_count++;
+ range_count += lnet_get_list_len(&expr->el_exprs);
+ }
+
+ size += (sizeof(struct lnet_expressions) * expr_count);
+ size += (sizeof(struct lnet_range_expr) * range_count);
+
+ return size;
+}
+
+size_t
+lnet_get_udsp_size(struct lnet_udsp *udsp)
+{
+ size_t size = sizeof(struct lnet_ioctl_udsp);
+
+ size += lnet_size_marshaled_nid_descr(&udsp->udsp_src);
+ size += lnet_size_marshaled_nid_descr(&udsp->udsp_dst);
+ size += lnet_size_marshaled_nid_descr(&udsp->udsp_rte);
+
+ return size;
+}
+
+static void
+copy_exprs(struct cfs_expr_list *expr, void __user **bulk,
+ __s32 *bulk_size)
+{
+ struct cfs_range_expr *range;
+ struct lnet_range_expr range_expr;
+
+ /* copy over the net range expressions to the bulk */
+ list_for_each_entry(range, &expr->el_exprs, re_link) {
+ range_expr.re_lo = range->re_lo;
+ range_expr.re_hi = range->re_hi;
+ range_expr.re_stride = range->re_stride;
+ memcpy(*bulk, &range_expr, sizeof(range_expr));
+ *bulk += sizeof(range_expr);
+ *bulk_size -= sizeof(range_expr);
+ }
+}
+
+static int
+copy_nid_range(struct lnet_ud_nid_descr *nid_descr, char *type,
+ void __user **bulk, __s32 *bulk_size)
+{
+ struct lnet_ioctl_udsp_descr ioc_udsp_descr = { { 0 } };
+ struct cfs_expr_list *expr;
+ struct lnet_expressions ioc_expr;
+ int expr_count;
+ int net_expr_count = 0;
+
+ ioc_udsp_descr.iud_src_hdr.ud_descr_type = *(__u32 *)type;
+
+ /* if criteria not present, copy over the static part of the NID
+ * descriptor
+ */
+ if (!lnet_udsp_criteria_present(nid_descr)) {
+ memcpy(*bulk, &ioc_udsp_descr,
+ sizeof(ioc_udsp_descr));
+ *bulk += sizeof(ioc_udsp_descr);
+ *bulk_size -= sizeof(ioc_udsp_descr);
+ return 0;
+ }
+
+ expr_count = lnet_get_list_len(&nid_descr->ud_addr_range);
+
+ /* copy the net information */
+ if (!list_empty(&nid_descr->ud_net_id.udn_net_num_range)) {
+ expr = list_entry(nid_descr->ud_net_id.udn_net_num_range.next,
+ struct cfs_expr_list, el_link);
+ net_expr_count = lnet_get_list_len(&expr->el_exprs);
+ } else {
+ net_expr_count = 0;
+ }
+
+ /* set the total expression count */
+ ioc_udsp_descr.iud_src_hdr.ud_descr_count = expr_count;
+ ioc_udsp_descr.iud_net.ud_net_type =
+ nid_descr->ud_net_id.udn_net_type;
+ ioc_udsp_descr.iud_net.ud_net_num_expr.le_count = net_expr_count;
+
+ /* copy over the header info to the bulk */
+ memcpy(*bulk, &ioc_udsp_descr, sizeof(ioc_udsp_descr));
+ *bulk += sizeof(ioc_udsp_descr);
+ *bulk_size -= sizeof(ioc_udsp_descr);
+
+ /* copy over the net num expression if it exists */
+ if (net_expr_count)
+ copy_exprs(expr, bulk, bulk_size);
+
+ /* copy the address range */
+ list_for_each_entry(expr, &nid_descr->ud_addr_range, el_link) {
+ ioc_expr.le_count = lnet_get_list_len(&expr->el_exprs);
+ memcpy(*bulk, &ioc_expr, sizeof(ioc_expr));
+ *bulk += sizeof(ioc_expr);
+ *bulk_size -= sizeof(ioc_expr);
+
+ copy_exprs(expr, bulk, bulk_size);
+ }
+
+ return 0;
+}
+
+static int
+lnet_udsp_marshal(struct lnet_udsp *udsp, void *bulk,
+ __s32 bulk_size)
+{
+ struct lnet_ioctl_udsp *ioc_udsp;
+ int rc = -ENOMEM;
+
+ /* make sure user space allocated enough buffer to marshal the
+ * udsp
+ */
+ if (bulk_size < lnet_get_udsp_size(udsp))
+ return -EINVAL;
+
+ ioc_udsp = bulk;
+
+ ioc_udsp->iou_idx = udsp->udsp_idx;
+ ioc_udsp->iou_action_type = udsp->udsp_action_type;
+ ioc_udsp->iou_action.priority = udsp->udsp_action.udsp_priority;
+
+ bulk += sizeof(*ioc_udsp);
+ bulk_size -= sizeof(*ioc_udsp);
+
+ rc = copy_nid_range(&udsp->udsp_src, "SRC", &bulk, &bulk_size);
+ if (rc != 0)
+ return rc;
+
+ rc = copy_nid_range(&udsp->udsp_dst, "DST", &bulk, &bulk_size);
+ if (rc != 0)
+ return rc;
+
+ rc = copy_nid_range(&udsp->udsp_rte, "RTE", &bulk, &bulk_size);
+
+ return rc;
+}
+
+static enum lnet_udsp_action_type
+lnet_str2udsp_action(char *type)
+{
+ if (!type)
+ return EN_LNET_UDSP_ACTION_NONE;
+
+ if (!strncmp(type, "priority", strlen("priority")))
+ return EN_LNET_UDSP_ACTION_PRIORITY;
+
+ if (!strncmp(type, "pref", strlen("pref")))
+ return EN_LNET_UDSP_ACTION_PREFERRED_LIST;
+
+ return EN_LNET_UDSP_ACTION_NONE;
+}
+
+int lustre_lnet_add_udsp(char *src, char *dst, char *rte,
+ char *type, union lnet_udsp_action *action,
+ int idx, int seq_no, struct cYAML **err_rc)
+{
+ struct lnet_udsp *udsp = NULL;
+ struct lnet_ioctl_udsp *udsp_bulk;
+ int rc = LUSTRE_CFG_RC_OUT_OF_MEM;
+ void *bulk = NULL;
+ __u32 bulk_size;
+ char err_str[LNET_MAX_STR_LEN];
+ enum lnet_udsp_action_type action_type;
+
+ snprintf(err_str, sizeof(err_str), "\"success\"");
+
+ action_type = lnet_str2udsp_action(type);
+ if (action_type == EN_LNET_UDSP_ACTION_NONE) {
+ snprintf(err_str, sizeof(err_str),
+ "\"bad action type specified: %s\"", type);
+ rc = LUSTRE_CFG_RC_BAD_PARAM;
+ goto out;
+ }
+
+ /* sanitize parameters:
+ * src-dst can be simultaneously present
+ * dst-rte can be simultaneously present
+ */
+ if ((!src && !rte && !dst) ||
+ (src && rte && dst) ||
+ (src && rte && !dst)) {
+ snprintf(err_str, sizeof(err_str),
+ "\"The combination of src, dst and rte is not supported\"");
+ rc = LUSTRE_CFG_RC_BAD_PARAM;
+ goto out;
+ }
+
+ udsp = lnet_udsp_alloc();
+ if (!udsp) {
+ snprintf(err_str, sizeof(err_str), "\"out of memory\"");
+ goto out;
+ }
+
+ udsp->udsp_idx = idx;
+ udsp->udsp_action_type = action_type;
+
+ /* a priority of -1 will result in the lowest possible priority */
+ if (action_type == EN_LNET_UDSP_ACTION_PRIORITY)
+ udsp->udsp_action.udsp_priority = action->udsp_priority;
+
+ /* override with the default
+ * if priority is expected, but not specified
+ */
+ if (!rte && ((dst && !src) || (src && !dst)) &&
+ action_type != EN_LNET_UDSP_ACTION_PRIORITY) {
+ udsp->udsp_action_type = EN_LNET_UDSP_ACTION_PRIORITY;
+ udsp->udsp_action.udsp_priority = 0;
+ }
+
+ if (src) {
+ rc = cfs_parse_nid_parts(src, &udsp->udsp_src.ud_addr_range,
+ &udsp->udsp_src.ud_net_id.udn_net_num_range,
+ &udsp->udsp_src.ud_net_id.udn_net_type);
+ if (rc < 0) {
+ snprintf(err_str,
+ sizeof(err_str),
+ "\failed to parse src parameter\"");
+ goto out;
+ }
+ }
+ if (dst) {
+ rc = cfs_parse_nid_parts(dst, &udsp->udsp_dst.ud_addr_range,
+ &udsp->udsp_dst.ud_net_id.udn_net_num_range,
+ &udsp->udsp_dst.ud_net_id.udn_net_type);
+ if (rc < 0) {
+ snprintf(err_str,
+ sizeof(err_str),
+ "\failed to parse dst parameter\"");
+ goto out;
+ }
+ }
+ if (rte) {
+ rc = cfs_parse_nid_parts(rte, &udsp->udsp_rte.ud_addr_range,
+ &udsp->udsp_rte.ud_net_id.udn_net_num_range,
+ &udsp->udsp_rte.ud_net_id.udn_net_type);
+ if (rc < 0) {
+ snprintf(err_str,
+ sizeof(err_str),
+ "\failed to parse rte parameter\"");
+ goto out;
+ }
+ }
+
+ bulk_size = lnet_get_udsp_size(udsp);
+ bulk = calloc(1, bulk_size);
+ if (!bulk) {
+ rc = LUSTRE_CFG_RC_OUT_OF_MEM;
+ snprintf(err_str, sizeof(err_str), "\"out of memory\"");
+ goto out;
+ }
+
+ udsp_bulk = bulk;
+ LIBCFS_IOC_INIT_V2(*udsp_bulk, iou_hdr);
+ udsp_bulk->iou_hdr.ioc_len = bulk_size;
+ udsp_bulk->iou_bulk_size = bulk_size - sizeof(*udsp_bulk);
+
+ rc = lnet_udsp_marshal(udsp, bulk, bulk_size);
+ if (rc != LUSTRE_CFG_RC_NO_ERR) {
+ rc = LUSTRE_CFG_RC_MARSHAL_FAIL;
+ snprintf(err_str,
+ sizeof(err_str),
+ "\"failed to marshal udsp\"");
+ goto out;
+ }
+
+ udsp_bulk->iou_bulk = bulk + sizeof(*udsp_bulk);
+
+ rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_ADD_UDSP, bulk);
+ if (rc < 0) {
+ rc = errno;
+ snprintf(err_str, sizeof(err_str),
+ "\"cannot add udsp: %s\"", strerror(errno));
+ goto out;
+ }
+
+ rc = LUSTRE_CFG_RC_NO_ERR;
+
+out:
+ if (bulk)
+ free(bulk);
+ if (udsp)
+ lnet_udsp_free(udsp, false);
+ cYAML_build_error(rc, seq_no, ADD_CMD, "udsp", err_str, err_rc);
+ return rc;
+}
+
+int lustre_lnet_del_udsp(unsigned int idx, int seq_no, struct cYAML **err_rc)
+{
+ int rc;
+ char err_str[LNET_MAX_STR_LEN];
+ struct lnet_ioctl_udsp udsp_bulk;
+
+ snprintf(err_str, sizeof(err_str), "\"success\"");
+
+ LIBCFS_IOC_INIT_V2(udsp_bulk, iou_hdr);
+ udsp_bulk.iou_idx = idx;
+
+ rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_DEL_UDSP, &udsp_bulk);
+ if (rc < 0) {
+ rc = -errno;
+ snprintf(err_str, sizeof(err_str),
+ "\"cannot del udsp: %s\"", strerror(rc));
+ }
+
+ cYAML_build_error(rc, seq_no, ADD_CMD, "udsp", err_str, err_rc);
+ return rc;
+}
+
+int lustre_lnet_nid_descr2str(struct lnet_ud_nid_descr *d,
+ char *str, size_t size)
+{
+ int left = size;
+ int len;
+ char *net;
+ bool addr_found = false;
+
+ /* criteria not defined */
+ if (d->ud_net_id.udn_net_type == 0) {
+ strncat(str, "NA", left - 1);
+ return 0;
+ }
+
+ left = cfs_expr2str(&d->ud_addr_range, str, left);
+ if (left < 0)
+ return left;
+ net = libcfs_net2str(LNET_MKNET(d->ud_net_id.udn_net_type, 0));
+ if (left < size) {
+ len = strlen(net) + 2; /* account for @ and NULL termination */
+ addr_found = true;
+ } else {
+ len = strlen(net) + 1; /* account for NULL termination */
+ }
+
+ if (left - len < 0)
+ return -ENOBUFS;
+
+ if (addr_found) {
+ strncat(str, "@", left);
+ left -= 1;
+ }
+
+ strncat(str, net, left);
+
+ left -= strlen(net) + 1;
+
+ left = cfs_expr2str(&d->ud_net_id.udn_net_num_range, str, left);
+ if (left < 0)
+ return left;
+
+ return 0;
+}
+
+int yaml_add_udsp_action(struct cYAML *y, struct lnet_udsp *udsp)
+{
+ struct cYAML *action;
+
+ switch (udsp->udsp_action_type) {
+ case EN_LNET_UDSP_ACTION_PRIORITY:
+ action = cYAML_create_object(y, "action");
+ if (!action)
+ return -ENOMEM;
+ if (!cYAML_create_number(action, "priority",
+ udsp->udsp_action.udsp_priority))
+ return -ENOMEM;
+
+ default:
+ return 0;
+ }
+
+ return 0;
+}
+
+int lustre_lnet_show_udsp(int idx, int seq_no, struct cYAML **show_rc,
+ struct cYAML **err_rc)
+{
+ struct lnet_ioctl_udsp *data = NULL;
+ char *ioctl_buf = NULL;
+ struct lnet_ioctl_udsp get_size;
+ int rc = LUSTRE_CFG_RC_OUT_OF_MEM, i;
+ int l_errno = 0;
+ int use_idx = 0;
+ struct cYAML *root = NULL, *udsp_node = NULL,
+ *first_seq = NULL;
+ struct cYAML *item = NULL;
+ char err_str[LNET_MAX_STR_LEN];
+ char tmp[LNET_MAX_STR_LEN];
+ struct lnet_udsp *udsp = NULL;
+ bool exist = false;
+
+ snprintf(err_str, sizeof(err_str), "\"out of memory\"");
+
+ root = cYAML_create_object(NULL, NULL);
+ if (!root)
+ goto out;
+
+ udsp_node = cYAML_create_seq(root, "udsp");
+ if (!udsp_node)
+ goto out;
+
+ for (i = 0;; i++) {
+ data = NULL;
+ ioctl_buf = NULL;
+ udsp = NULL;
+
+ LIBCFS_IOC_INIT_V2(get_size, iou_hdr);
+ if (idx != -1)
+ use_idx = idx;
+ else
+ use_idx = i;
+
+ get_size.iou_idx = use_idx;
+
+ rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_UDSP_SIZE, &get_size);
+ if (rc != 0) {
+ l_errno = errno;
+ break;
+ }
+
+ ioctl_buf = calloc(get_size.iou_idx, 1);
+ if (!ioctl_buf) {
+ l_errno = errno;
+ break;
+ }
+
+ data = (struct lnet_ioctl_udsp *)ioctl_buf;
+
+ LIBCFS_IOC_INIT_V2(*data, iou_hdr);
+ data->iou_bulk_size = get_size.iou_idx - sizeof(*data);
+ data->iou_bulk = ioctl_buf + sizeof(*data);
+ data->iou_idx = use_idx;
+
+ rc = l_ioctl(LNET_DEV_ID, IOC_LIBCFS_GET_UDSP, ioctl_buf);
+ if (rc != 0) {
+ l_errno = errno;
+ break;
+ }
+
+ udsp = lnet_udsp_demarshal(ioctl_buf,
+ data->iou_hdr.ioc_len + data->iou_bulk_size);
+ if (!udsp) {
+ l_errno = -EFAULT;
+ break;
+ }
+
+ rc = -EINVAL;
+ exist = true;
+
+ /* create the tree to be printed. */
+ item = cYAML_create_seq_item(udsp_node);
+ if (item == NULL)
+ goto out;
+
+ if (!first_seq)
+ first_seq = item;
+
+ if (cYAML_create_number(item, "idx",
+ udsp->udsp_idx) == NULL)
+ goto out;
+
+ memset(tmp, 0, LNET_MAX_STR_LEN);
+ rc = lustre_lnet_nid_descr2str(&udsp->udsp_src, tmp,
+ LNET_MAX_STR_LEN);
+
+ if (rc)
+ goto out;
+
+ if (cYAML_create_string(item, "src", tmp) == NULL)
+ goto out;
+ memset(tmp, 0, LNET_MAX_STR_LEN);
+ rc = lustre_lnet_nid_descr2str(&udsp->udsp_dst, tmp,
+ LNET_MAX_STR_LEN);
+
+ if (rc)
+ goto out;
+
+ if (cYAML_create_string(item, "dst", tmp) == NULL)
+ goto out;
+
+ memset(tmp, 0, LNET_MAX_STR_LEN);
+ rc = lustre_lnet_nid_descr2str(&udsp->udsp_rte, tmp,
+ LNET_MAX_STR_LEN);
+
+ if (rc)
+ goto out;
+
+ if (cYAML_create_string(item, "rte", tmp) == NULL)
+ goto out;
+
+ if (yaml_add_udsp_action(item, udsp))
+ goto out;
+
+ if (ioctl_buf)
+ free(ioctl_buf);
+ if (udsp)
+ lnet_udsp_free(udsp, true);
+ /* did we show the given index? */
+ if (idx != -1)
+ break;
+ }
+
+ /* Print out the net information only if show_rc is not provided */
+ if (show_rc == NULL)
+ cYAML_print_tree(root);
+
+ if (l_errno != ENOENT) {
+ snprintf(err_str,
+ sizeof(err_str),
+ "\"cannot get udsp: %s\"",
+ strerror(l_errno));
+ rc = -l_errno;
+ goto out;
+ } else {
+ rc = LUSTRE_CFG_RC_NO_ERR;
+ }
+
+ snprintf(err_str, sizeof(err_str), "\"success\"");
+out:
+ if (ioctl_buf)
+ free(ioctl_buf);
+ if (udsp)
+ lnet_udsp_free(udsp, true);
+
+ if (show_rc == NULL || rc != LUSTRE_CFG_RC_NO_ERR || !exist) {
+ cYAML_free_tree(root);
+ } else if (show_rc != NULL && *show_rc != NULL) {
+ struct cYAML *show_node;
+ /* find the net node, if one doesn't exist
+ * then insert one. Otherwise add to the one there
+ */
+ show_node = cYAML_get_object_item(*show_rc, "udsp");
+ if (show_node != NULL && cYAML_is_sequence(show_node)) {
+ cYAML_insert_child(show_node, first_seq);
+ free(udsp_node);
+ free(root);
+ } else if (show_node == NULL) {
+ cYAML_insert_sibling((*show_rc)->cy_child,
+ udsp_node);
+ free(root);
+ } else {
+ cYAML_free_tree(root);
+ }
+ } else {
+ *show_rc = root;
+ }
+
+ cYAML_build_error(rc, seq_no, SHOW_CMD, "udsp", err_str, err_rc);
+
+ return rc;
+}
+
static int jt_show_peer(int argc, char **argv);
static int jt_show_recovery(int argc, char **argv);
static int jt_show_global(int argc, char **argv);
+static int jt_show_udsp(int argc, char **argv);
static int jt_set_tiny(int argc, char **argv);
static int jt_set_small(int argc, char **argv);
static int jt_set_large(int argc, char **argv);
static int jt_set_discovery(int argc, char **argv);
static int jt_set_drop_asym_route(int argc, char **argv);
static int jt_list_peer(int argc, char **argv);
+static int jt_add_udsp(int argc, char **argv);
+static int jt_del_udsp(int argc, char **argv);
/*static int jt_show_peer(int argc, char **argv);*/
static int lnetctl_list_commands(int argc, char **argv);
static int jt_import(int argc, char **argv);
static int jt_calc_service_id(int argc, char **argv);
static int jt_set_response_tracking(int argc, char **argv);
static int jt_set_recovery_limit(int argc, char **argv);
+static int jt_udsp(int argc, char **argv);
command_t cmd_list[] = {
{"lnet", jt_lnet, 0, "lnet {configure | unconfigure} [--all]"},
{"ping", jt_ping, 0, "ping nid,[nid,...]"},
{"discover", jt_discover, 0, "discover nid[,nid,...]"},
{"service-id", jt_calc_service_id, 0, "Calculate IB Lustre service ID\n"},
+ {"udsp", jt_udsp, 0, "udsp {add | del | help}"},
{"help", Parser_help, 0, "help"},
{"exit", Parser_quit, 0, "quit"},
{"quit", Parser_quit, 0, "quit"},
{ 0, 0, 0, NULL }
};
+command_t udsp_cmds[] = {
+ {"add", jt_add_udsp, 0, "add a udsp\n"
+ "\t--src: ip2nets syntax specifying the local NID to match\n"
+ "\t--dst: ip2nets syntax specifying the remote NID to match\n"
+ "\t--rte: ip2nets syntax specifying the router NID to match\n"
+ "\t--priority: priority value (0 - highest priority)\n"
+ "\t--idx: index of where to insert the rule.\n"
+ "\t By default, appends to the end of the rule list.\n"},
+ {"del", jt_del_udsp, 0, "delete a udsp\n"
+ "\t--idx: index of the Policy.\n"},
+ {"show", jt_show_udsp, 0, "show udsps\n"
+ "\t --idx: index of the policy to show.\n"},
+ { 0, 0, 0, NULL }
+};
+
static int jt_calc_service_id(int argc, char **argv)
{
int rc;
return rc;
}
+static int jt_show_udsp(int argc, char **argv)
+{
+ int idx = -1;
+ int rc, opt;
+ struct cYAML *err_rc = NULL, *show_rc = NULL;
+
+ const char *const short_options = "i:";
+ static const struct option long_options[] = {
+ { .name = "idx", .has_arg = required_argument, .val = 'i' },
+ { .name = NULL }
+ };
+
+ rc = check_cmd(udsp_cmds, "udsp", "show", 0, argc, argv);
+ if (rc)
+ return rc;
+
+ while ((opt = getopt_long(argc, argv, short_options,
+ long_options, NULL)) != -1) {
+ switch (opt) {
+ case 'i':
+ idx = atoi(optarg);
+ break;
+ case '?':
+ print_help(net_cmds, "net", "show");
+ default:
+ return 0;
+ }
+ }
+
+ rc = lustre_lnet_show_udsp(idx, -1, &show_rc, &err_rc);
+
+ if (rc != LUSTRE_CFG_RC_NO_ERR)
+ cYAML_print_tree2file(stderr, err_rc);
+ else if (show_rc)
+ cYAML_print_tree(show_rc);
+
+ cYAML_free_tree(err_rc);
+ cYAML_free_tree(show_rc);
+
+ return rc;
+}
+
static int jt_show_global(int argc, char **argv)
{
int rc;
return Parser_execarg(argc - 1, &argv[1], set_cmds);
}
+static int jt_udsp(int argc, char **argv)
+{
+ int rc;
+
+ rc = check_cmd(udsp_cmds, "udsp", NULL, 2, argc, argv);
+ if (rc)
+ return rc;
+
+ return Parser_execarg(argc - 1, &argv[1], udsp_cmds);
+}
+
static int jt_import(int argc, char **argv)
{
char *file = NULL;
err_rc = NULL;
}
+ rc = lustre_lnet_show_udsp(-1, -1, &show_rc, &err_rc);
+ if (rc != LUSTRE_CFG_RC_NO_ERR) {
+ cYAML_print_tree2file(stderr, err_rc);
+ cYAML_free_tree(err_rc);
+ err_rc = NULL;
+ }
+
if (show_rc != NULL) {
cYAML_print_tree2file(f, show_rc);
cYAML_free_tree(show_rc);
return rc;
}
+static int jt_add_udsp(int argc, char **argv)
+{
+ char *src = NULL, *dst = NULL, *rte = NULL;
+ struct cYAML *err_rc = NULL;
+ union lnet_udsp_action udsp_action;
+ long int idx = -1, priority = -1;
+ int opt, rc = 0;
+ char *action_type = "pref";
+
+ const char *const short_options = "s:d:r:p:i:";
+ static const struct option long_options[] = {
+ { .name = "src", .has_arg = required_argument, .val = 's' },
+ { .name = "dst", .has_arg = required_argument, .val = 'd' },
+ { .name = "rte", .has_arg = required_argument, .val = 'r' },
+ { .name = "priority", .has_arg = required_argument, .val = 'p' },
+ { .name = "idx", .has_arg = required_argument, .val = 'i' },
+ { .name = NULL } };
+
+ rc = check_cmd(udsp_cmds, "udsp", "add", 0, argc, argv);
+ if (rc)
+ return rc;
+
+ while ((opt = getopt_long(argc, argv, short_options,
+ long_options, NULL)) != -1) {
+ switch (opt) {
+ case 's':
+ src = optarg;
+ break;
+ case 'd':
+ dst = optarg;
+ break;
+ case 'r':
+ rte = optarg;
+ break;
+ case 'p':
+ rc = parse_long(optarg, &priority);
+ if (rc != 0)
+ priority = -1;
+ action_type = "priority";
+ udsp_action.udsp_priority = priority;
+ break;
+ case 'i':
+ rc = parse_long(optarg, &idx);
+ if (rc != 0)
+ idx = 0;
+ break;
+ case '?':
+ print_help(udsp_cmds, "udsp", "add");
+ default:
+ return 0;
+ }
+ }
+
+ rc = lustre_lnet_add_udsp(src, dst, rte, action_type, &udsp_action,
+ idx, -1, &err_rc);
+
+ if (rc != LUSTRE_CFG_RC_NO_ERR)
+ cYAML_print_tree2file(stderr, err_rc);
+
+ cYAML_free_tree(err_rc);
+
+ return rc;
+}
+
+static int jt_del_udsp(int argc, char **argv)
+{
+ struct cYAML *err_rc = NULL;
+ long int idx = 0;
+ int opt, rc = 0;
+
+ const char *const short_options = "i:";
+ static const struct option long_options[] = {
+ { .name = "idx", .has_arg = required_argument, .val = 'i' },
+ { .name = NULL } };
+
+ rc = check_cmd(udsp_cmds, "udsp", "del", 0, argc, argv);
+ if (rc)
+ return rc;
+
+ while ((opt = getopt_long(argc, argv, short_options,
+ long_options, NULL)) != -1) {
+ switch (opt) {
+ case 'i':
+ rc = parse_long(optarg, &idx);
+ if (rc != 0)
+ idx = 0;
+ break;
+ case '?':
+ print_help(udsp_cmds, "udsp", "add");
+ default:
+ return 0;
+ }
+ }
+
+ rc = lustre_lnet_del_udsp(idx, -1, &err_rc);
+ if (rc != LUSTRE_CFG_RC_NO_ERR)
+ cYAML_print_tree2file(stderr, err_rc);
+
+ cYAML_free_tree(err_rc);
+
+ return rc;
+}
+
static int lnetctl_list_commands(int argc, char **argv)
{
char buffer[81] = ""; /* 80 printable chars + terminating NUL */
.br
\-> Minimum router credits\.
.
+.SS "UDSP Configuration"
+.
+.TP
+\fBlnetctl udsp\fR add
+Add user-defined selection policy.
+.
+.br
+.
+.TP
+Adding a local network udsp.
+.
+.br
+If multiple local networks are available, each one can be assigned a priority\.
+The one with the highest priority is selected to send on\.
+NID and network matching is using NID-range syntax, please see the manual for more detail\.
+.
+.br
+\-\-src : network in NID-range syntax (e.g. tcp0 or tcp[1-3])
+.
+.br
+\-\-<priority> <priority value>: optional priority value in [0-255], 0 as the highest
+.
+.br
+\-\-<idx>: The index of where to insert the rule\. By default append to the end of the list
+.
+.br
+.
+.TP
+Adding a local NID udsp.
+.
+.br
+Assign priority to local NIDs\. After a local network is chosen, the NI with highest priority is selected\.
+.
+.br
+\-\-src: NID in NID-range syntax (e.g. 10.1.1.2@tcp or 10.1.1.*@tcp)
+.
+.br
+\-\-<priority> <priority value>: optional priority value in [0-255], 0 as the highest
+.
+.br
+\-\-<idx>: The index of where to insert the rule\. By default append to the end of the list
+.
+.br
+.
+.TP
+Adding a peer NID udsp.
+.
+.br
+Assign priority to peer NIDs. Peer NID with highest priority is selected to send to\.
+.
+.br
+\-\-dst: NID in NID-range syntax (e.g. 10.1.1.2@tcp)
+.
+.br
+\-\-<priority> <priority value>: optional priority value in [0-255], 0 as the highest
+.
+.br
+\-\-<idx>: The index of where to insert the rule\. By default append to the end of the list
+.
+.br
+.
+.TP
+Adding a NID pair udsp.
+.
+.br
+The local NIDs which match the rule are added on a list on the peer NIs matching the rule\.
+When selecting the peer NI, the one with the local NID being used on its list is preferred\.
+.
+.br
+\-\-dst: NID in NID-range syntax (e.g. 10.1.1.1@tcp)
+.
+.br
+\-\-src: NID in NID-range syntax (e.g. 10.1.1.2@tcp)
+.
+.br
+\-\-<idx>: The index of where to insert the rule\. By default append to the end of the list
+.
+.br
+.
+.TP
+Adding a Peer Router udsp.
+.
+.br
+The router NIDs matching the rule are added on a list on the peer NIs matching the rule\.
+When sending to a remote peer, the router which has its nid on the peer NI list is preferred\.
+.
+.br
+\-\-dst: peer NID in NID-range syntax (e.g. 10.1.1.1@tcp)
+.
+.br
+\-\-rte: router NID in NID-range syntax (e.g. 10.1.2.1@tcp)
+.
+.br
+\-\-<idx>: The index of where to insert the rule\. By default append to the end of the list
+.
+.br
+.
+.TP
+\fBlnetctl udsp\fR del
+Delete user-defined selection policy.
+.
+.br
+\-\-idx: The index of the rule to delete\.
+.
+.br
+.TP
+\fBlnetctl udsp\fR show
+Show all user-defined selection policies in the system\. The policies are dumped in YAML form\.
+.
+.br
+.
.SH "OPTIONS"
.TP
.B --list-commands
state: NA
.
.br
+.
+.SS "Adding a UDSP"
+.
+.IP "\(bu" 4
+lnetctl udsp add \-\-src tcp \-\-priority 1
+.
+.IP "" 0
+.
+.P
+.
+.SS "Deleting a UDSP"
+.
+.IP "\(bu" 4
+lnetctl udsp del \-\-idx 0
+.
+.IP "" 0
+.
+.P
+.SS "Show UDSPs"
+.
+.IP "\(bu" 4
+lnetctl udsp show
+.
+.IP "" 0
+.
+.P
+udsp:
+.
+.br
+ \- idx: 0
+.
+.br
+ src: tcp
+.
+.br
+ dst: NA
+.
+.br
+ rte: NA
+.
+.br
+ action:
+.
+.br
+ priority: 0
+.
+.br
.SH SEE ALSO
.BR lustre (7)