1 // SPDX-License-Identifier: GPL-2.0
3 /* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
5 * Copyright (c) 2011, 2017, Intel Corporation.
7 * Copyright (c) 2018-2020 Data Direct Networks.
10 /* This file is part of Lustre, http://www.lustre.org/
12 * User Defined Selection Policies (UDSP) are introduced to add
13 * ability of fine traffic control. The policies are instantiated
14 * on LNet constructs and allow preference of some constructs
15 * over others as an extension of the selection algorithm.
16 * The order of operation is defined by the selection algorithm logical flow:
18 * 1. Iterate over all the networks that a peer can be reached on
19 * and select the best local network
20 * - The remote network with the highest priority is examined
22 * - The local network with the highest priority is selected
24 * - The local NI with the highest priority is selected
26 * 2. If the peer is a remote peer and has no local networks,
27 * - then select the remote peer network with the highest priority
29 * - Select the highest priority remote peer_ni on the network selected
31 * - Now that the peer's network and NI are decided, select the router
32 * in round robin from the peer NI's preferred router list.
34 * - Select the highest priority local NI on the local net of the
37 * 3. Otherwise for local peers, select the peer_ni from the peer.
38 * - highest priority peer NI is selected
40 * - Select the peer NI which has the local NI selected on its
44 * Accordingly, the User Interface allows for the following:
45 * - Adding a local network udsp: if multiple local networks are
46 * available, each one can have a priority.
47 * - Adding a local NID udsp: after a local network is chosen,
48 * if there are multiple NIs, each one can have a priority.
49 * - Adding a remote NID udsp: assign priority to a peer NID.
50 * - Adding a NID pair udsp: allows to specify local NIDs
51 * to be added on the list on the specified peer NIs
52 * When selecting a peer NI, the one with the
53 * local NID being used on its list is preferred.
54 * - Adding a Router udsp: similar to the NID pair udsp.
55 * Specified router NIDs are added on the list on the specified peer NIs.
56 * When sending to a remote peer, remote net is selected and the peer NID
57 * is selected. The router which has its nid on the peer NI list
59 * - Deleting a udsp: use the specified policy index to remove it
60 * from the policy list.
62 * Generally, the syntax is as follows
63 * lnetctl udsp add: add a udsp
64 * --src: ip2nets syntax specifying the local NID to match
65 * --dst: ip2nets syntax specifying the remote NID to match
66 * --rte: ip2nets syntax specifying the router NID to match
67 * --priority: priority value (0 - highest priority)
68 * --idx: index of where to insert the rule.
69 * By default, appends to the end of the rule list.
70 * lnetctl udsp del: delete a udsp
71 * --idx: index of the Policy.
72 * lnetctl udsp show: show udsps
73 * --idx: index of the policy to show.
75 * Author: Amir Shehata
78 #include <linux/uaccess.h>
80 #include <lnet/udsp.h>
81 #include <libcfs/libcfs.h>
84 struct lnet_peer_ni *udi_lpni;
85 struct lnet_peer_net *udi_lpn;
86 struct lnet_ni *udi_ni;
87 struct lnet_net *udi_net;
88 struct lnet_ud_nid_descr *udi_match;
89 struct lnet_ud_nid_descr *udi_action;
91 enum lnet_udsp_action_type udi_type;
96 typedef int (*udsp_apply_rule)(struct udsp_info *);
99 UDSP_APPLY_ON_PEERS = 0,
100 UDSP_APPLY_PRIO_ON_NIS = 1,
101 UDSP_APPLY_RTE_ON_NETS = 2,
102 UDSP_APPLY_MAX_ENUM = 3,
105 #define RULE_NOT_APPLICABLE -1
108 lnet_udsp_is_net_rule(struct lnet_ud_nid_descr *match)
110 return list_empty(&match->ud_addr_range);
114 lnet_udsp_expr_list_equal(struct list_head *e1,
115 struct list_head *e2)
117 struct cfs_expr_list *expr1;
118 struct cfs_expr_list *expr2;
119 struct cfs_range_expr *range1, *range2;
121 if (list_empty(e1) && list_empty(e2))
124 if (lnet_get_list_len(e1) != lnet_get_list_len(e2))
127 expr2 = list_first_entry(e2, struct cfs_expr_list, el_link);
129 list_for_each_entry(expr1, e1, el_link) {
130 if (lnet_get_list_len(&expr1->el_exprs) !=
131 lnet_get_list_len(&expr2->el_exprs))
134 range2 = list_first_entry(&expr2->el_exprs,
135 struct cfs_range_expr,
138 list_for_each_entry(range1, &expr1->el_exprs, re_link) {
139 if (range1->re_lo != range2->re_lo ||
140 range1->re_hi != range2->re_hi ||
141 range1->re_stride != range2->re_stride)
143 range2 = list_next_entry(range2, re_link);
145 expr2 = list_next_entry(expr2, el_link);
152 lnet_udsp_nid_descr_equal(struct lnet_ud_nid_descr *e1,
153 struct lnet_ud_nid_descr *e2)
155 if (e1->ud_net_id.udn_net_type != e2->ud_net_id.udn_net_type ||
156 !lnet_udsp_expr_list_equal(&e1->ud_net_id.udn_net_num_range,
157 &e2->ud_net_id.udn_net_num_range) ||
158 !lnet_udsp_expr_list_equal(&e1->ud_addr_range, &e2->ud_addr_range))
165 lnet_udsp_action_equal(struct lnet_udsp *e1, struct lnet_udsp *e2)
167 if (e1->udsp_action_type != e2->udsp_action_type)
170 if (e1->udsp_action_type == EN_LNET_UDSP_ACTION_PRIORITY &&
171 e1->udsp_action.udsp_priority != e2->udsp_action.udsp_priority)
178 lnet_udsp_equal(struct lnet_udsp *e1, struct lnet_udsp *e2)
180 /* check each NID descr */
181 if (!lnet_udsp_nid_descr_equal(&e1->udsp_src, &e2->udsp_src) ||
182 !lnet_udsp_nid_descr_equal(&e1->udsp_dst, &e2->udsp_dst) ||
183 !lnet_udsp_nid_descr_equal(&e1->udsp_rte, &e2->udsp_rte))
189 /* it is enough to look at the net type of the descriptor. If the criteria
190 * is present the net must be specified
193 lnet_udsp_criteria_present(struct lnet_ud_nid_descr *descr)
195 return (descr->ud_net_id.udn_net_type != 0);
199 lnet_udsp_apply_rule_on_ni(struct udsp_info *udi)
202 struct lnet_ni *ni = udi->udi_ni;
203 struct lnet_ud_nid_descr *ni_match = udi->udi_match;
204 __u32 priority = (udi->udi_revert) ? -1 : udi->udi_priority;
206 rc = cfs_match_nid_net(
208 ni_match->ud_net_id.udn_net_type,
209 &ni_match->ud_net_id.udn_net_num_range,
210 &ni_match->ud_addr_range);
214 CDEBUG(D_NET, "apply udsp on ni %s\n",
215 libcfs_nidstr(&ni->ni_nid));
217 /* Detected match. Set NIDs priority */
218 lnet_ni_set_sel_priority_locked(ni, priority);
224 lnet_udsp_apply_rte_list_on_net(struct lnet_net *net,
225 struct lnet_ud_nid_descr *rte_action,
228 struct lnet_remotenet *rnet;
229 struct list_head *rn_list;
230 struct lnet_route *route;
231 struct lnet_peer_ni *lpni;
232 bool cleared = false;
233 struct lnet_nid *gw_nid, *gw_prim_nid;
237 for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
238 rn_list = &the_lnet.ln_remote_nets_hash[i];
239 list_for_each_entry(rnet, rn_list, lrn_list) {
240 list_for_each_entry(route, &rnet->lrn_routes, lr_list) {
241 /* look if gw nid on the same net matches */
243 &route->lr_gateway->lp_primary_nid;
245 while ((lpni = lnet_get_next_peer_ni_locked(route->lr_gateway,
248 if (!lnet_get_net_locked(lpni->lpni_peer_net->lpn_net_id))
250 gw_nid = &lpni->lpni_nid;
251 rc = cfs_match_nid_net(
253 rte_action->ud_net_id.udn_net_type,
254 &rte_action->ud_net_id.udn_net_num_range,
255 &rte_action->ud_addr_range);
259 /* match gw primary nid on a remote network */
261 gw_nid = gw_prim_nid;
262 rc = cfs_match_nid_net(
264 rte_action->ud_net_id.udn_net_type,
265 &rte_action->ud_net_id.udn_net_num_range,
266 &rte_action->ud_addr_range);
270 lnet_net_unlock(LNET_LOCK_EX);
271 if (!cleared || revert) {
272 lnet_net_clr_pref_rtrs(net);
275 lnet_net_lock(LNET_LOCK_EX);
279 /* match. Add to pref NIDs */
280 CDEBUG(D_NET, "udsp net->gw: %s->%s\n",
281 libcfs_net2str(net->net_id),
282 libcfs_nidstr(gw_prim_nid));
283 rc = lnet_net_add_pref_rtr(net, gw_prim_nid);
284 lnet_net_lock(LNET_LOCK_EX);
285 /* success if EEXIST return */
286 if (rc && rc != -EEXIST) {
287 CERROR("Failed to add %s to %s pref rtr list\n",
288 libcfs_nidstr(gw_prim_nid),
289 libcfs_net2str(net->net_id));
300 lnet_udsp_apply_rte_rule_on_nets(struct udsp_info *udi)
303 int last_failure = 0;
304 struct lnet_net *net;
305 struct lnet_ud_nid_descr *match = udi->udi_match;
306 struct lnet_ud_nid_descr *rte_action = udi->udi_action;
308 list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
309 if (LNET_NETTYP(net->net_id) != match->ud_net_id.udn_net_type)
312 rc = cfs_match_net(net->net_id,
313 match->ud_net_id.udn_net_type,
314 &match->ud_net_id.udn_net_num_range);
318 CDEBUG(D_NET, "apply rule on %s\n",
319 libcfs_net2str(net->net_id));
320 rc = lnet_udsp_apply_rte_list_on_net(net, rte_action,
330 lnet_udsp_apply_rte_rule_on_net(struct udsp_info *udi)
333 struct lnet_net *net = udi->udi_net;
334 struct lnet_ud_nid_descr *match = udi->udi_match;
335 struct lnet_ud_nid_descr *rte_action = udi->udi_action;
337 rc = cfs_match_net(net->net_id,
338 match->ud_net_id.udn_net_type,
339 &match->ud_net_id.udn_net_num_range);
343 CDEBUG(D_NET, "apply rule on %s\n",
344 libcfs_net2str(net->net_id));
345 rc = lnet_udsp_apply_rte_list_on_net(net, rte_action,
352 lnet_udsp_apply_prio_rule_on_net(struct udsp_info *udi)
355 struct lnet_ud_nid_descr *match = udi->udi_match;
356 struct lnet_net *net = udi->udi_net;
357 __u32 priority = (udi->udi_revert) ? -1 : udi->udi_priority;
359 if (!lnet_udsp_is_net_rule(match))
360 return RULE_NOT_APPLICABLE;
362 rc = cfs_match_net(net->net_id,
363 match->ud_net_id.udn_net_type,
364 &match->ud_net_id.udn_net_num_range);
368 CDEBUG(D_NET, "apply rule on %s\n",
369 libcfs_net2str(net->net_id));
371 lnet_net_set_sel_priority_locked(net, priority);
377 lnet_udsp_apply_rule_on_nis(struct udsp_info *udi)
381 struct lnet_net *net;
382 struct lnet_ud_nid_descr *ni_match = udi->udi_match;
383 int last_failure = 0;
385 list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
386 if (LNET_NETTYP(net->net_id) != ni_match->ud_net_id.udn_net_type)
390 if (!lnet_udsp_apply_prio_rule_on_net(udi))
393 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
395 rc = lnet_udsp_apply_rule_on_ni(udi);
405 lnet_udsp_apply_rte_list_on_lpni(struct lnet_peer_ni *lpni,
406 struct lnet_ud_nid_descr *rte_action,
409 struct lnet_remotenet *rnet;
410 struct list_head *rn_list;
411 struct lnet_route *route;
412 bool cleared = false;
413 struct lnet_nid *gw_nid;
417 for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
418 rn_list = &the_lnet.ln_remote_nets_hash[i];
419 list_for_each_entry(rnet, rn_list, lrn_list) {
420 list_for_each_entry(route, &rnet->lrn_routes, lr_list) {
421 gw_nid = &route->lr_gateway->lp_primary_nid;
422 rc = cfs_match_nid_net(
424 rte_action->ud_net_id.udn_net_type,
425 &rte_action->ud_net_id.udn_net_num_range,
426 &rte_action->ud_addr_range);
429 lnet_net_unlock(LNET_LOCK_EX);
430 if (!cleared || revert) {
431 CDEBUG(D_NET, "%spref rtr nids from lpni %s\n",
432 (revert) ? "revert " : "clear ",
433 libcfs_nidstr(&lpni->lpni_nid));
434 lnet_peer_clr_pref_rtrs(lpni);
437 lnet_net_lock(LNET_LOCK_EX);
441 CDEBUG(D_NET, "add gw nid %s as preferred for peer %s\n",
442 libcfs_nidstr(gw_nid),
443 libcfs_nidstr(&lpni->lpni_nid));
444 /* match. Add to pref NIDs */
445 rc = lnet_peer_add_pref_rtr(lpni, gw_nid);
446 lnet_net_lock(LNET_LOCK_EX);
447 /* success if EEXIST return */
448 if (rc && rc != -EEXIST) {
449 CERROR("Failed to add %s to %s pref rtr list\n",
450 libcfs_nidstr(gw_nid),
451 libcfs_nidstr(&lpni->lpni_nid));
462 lnet_udsp_apply_ni_list(struct lnet_peer_ni *lpni,
463 struct lnet_ud_nid_descr *ni_action,
468 struct lnet_net *net;
469 bool cleared = false;
471 list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
472 if (LNET_NETTYP(net->net_id) != ni_action->ud_net_id.udn_net_type)
474 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
475 rc = cfs_match_nid_net(
477 ni_action->ud_net_id.udn_net_type,
478 &ni_action->ud_net_id.udn_net_num_range,
479 &ni_action->ud_addr_range);
482 lnet_net_unlock(LNET_LOCK_EX);
483 if (!cleared || revert) {
484 lnet_peer_clr_pref_nids(lpni);
485 CDEBUG(D_NET, "%spref nids from lpni %s\n",
486 (revert) ? "revert " : "clear ",
487 libcfs_nidstr(&lpni->lpni_nid));
490 lnet_net_lock(LNET_LOCK_EX);
494 CDEBUG(D_NET, "add nid %s as preferred for peer %s\n",
495 libcfs_nidstr(&ni->ni_nid),
496 libcfs_nidstr(&lpni->lpni_nid));
497 /* match. Add to pref NIDs */
498 rc = lnet_peer_add_pref_nid(lpni, &ni->ni_nid);
499 lnet_net_lock(LNET_LOCK_EX);
500 /* success if EEXIST return */
501 if (rc && rc != -EEXIST) {
502 CERROR("Failed to add %s to %s pref nid list\n",
503 libcfs_nidstr(&ni->ni_nid),
504 libcfs_nidstr(&lpni->lpni_nid));
514 lnet_udsp_apply_rule_on_lpni(struct udsp_info *udi)
517 struct lnet_peer_ni *lpni = udi->udi_lpni;
518 struct lnet_ud_nid_descr *lp_match = udi->udi_match;
519 struct lnet_ud_nid_descr *action = udi->udi_action;
520 __u32 priority = (udi->udi_revert) ? -1 : udi->udi_priority;
521 bool local = udi->udi_local;
522 enum lnet_udsp_action_type type = udi->udi_type;
524 rc = cfs_match_nid_net(
526 lp_match->ud_net_id.udn_net_type,
527 &lp_match->ud_net_id.udn_net_num_range,
528 &lp_match->ud_addr_range);
530 /* check if looking for a net match */
533 lnet_get_list_len(&lp_match->ud_addr_range) ||
534 !cfs_match_net(udi->udi_lpn->lpn_net_id,
535 lp_match->ud_net_id.udn_net_type,
536 &lp_match->ud_net_id.udn_net_num_range))) {
540 if (type == EN_LNET_UDSP_ACTION_PREFERRED_LIST && local) {
541 rc = lnet_udsp_apply_ni_list(lpni, action,
545 } else if (type == EN_LNET_UDSP_ACTION_PREFERRED_LIST &&
547 rc = lnet_udsp_apply_rte_list_on_lpni(lpni, action,
552 lnet_peer_ni_set_selection_priority(lpni, priority);
559 lnet_udsp_apply_rule_on_lpn(struct udsp_info *udi)
562 struct lnet_ud_nid_descr *match = udi->udi_match;
563 struct lnet_peer_net *lpn = udi->udi_lpn;
564 __u32 priority = (udi->udi_revert) ? -1 : udi->udi_priority;
566 if (udi->udi_type == EN_LNET_UDSP_ACTION_PREFERRED_LIST ||
567 !lnet_udsp_is_net_rule(match))
568 return RULE_NOT_APPLICABLE;
570 rc = cfs_match_net(lpn->lpn_net_id,
571 match->ud_net_id.udn_net_type,
572 &match->ud_net_id.udn_net_num_range);
576 CDEBUG(D_NET, "apply rule on lpn %s\n",
577 libcfs_net2str(lpn->lpn_net_id));
578 lnet_peer_net_set_sel_priority_locked(lpn, priority);
584 lnet_udsp_apply_rule_on_lpnis(struct udsp_info *udi)
586 /* iterate over all the peers in the system and find if any of the
587 * peers match the criteria. If they do, clear the preferred list
588 * and add the new list
590 int lncpt = cfs_percpt_number(the_lnet.ln_peer_tables);
591 struct lnet_ud_nid_descr *lp_match = udi->udi_match;
592 struct lnet_peer_table *ptable;
593 struct lnet_peer_net *lpn;
594 struct lnet_peer_ni *lpni;
595 struct lnet_peer *lp;
596 int last_failure = 0;
600 for (cpt = 0; cpt < lncpt; cpt++) {
601 ptable = the_lnet.ln_peer_tables[cpt];
602 list_for_each_entry(lp, &ptable->pt_peer_list, lp_peer_list) {
603 CDEBUG(D_NET, "udsp examining lp %s\n",
604 libcfs_nidstr(&lp->lp_primary_nid));
605 list_for_each_entry(lpn,
608 CDEBUG(D_NET, "udsp examining lpn %s\n",
609 libcfs_net2str(lpn->lpn_net_id));
611 if (LNET_NETTYP(lpn->lpn_net_id) !=
612 lp_match->ud_net_id.udn_net_type)
617 if (!lnet_udsp_apply_rule_on_lpn(udi))
620 list_for_each_entry(lpni,
623 CDEBUG(D_NET, "udsp examining lpni %s\n",
624 libcfs_nidstr(&lpni->lpni_nid));
625 udi->udi_lpni = lpni;
626 rc = lnet_udsp_apply_rule_on_lpni(udi);
638 lnet_udsp_apply_single_policy(struct lnet_udsp *udsp, struct udsp_info *udi,
639 udsp_apply_rule *cbs)
643 if (lnet_udsp_criteria_present(&udsp->udsp_dst) &&
644 lnet_udsp_criteria_present(&udsp->udsp_src)) {
646 if (!cbs[UDSP_APPLY_ON_PEERS])
649 if (udsp->udsp_action_type !=
650 EN_LNET_UDSP_ACTION_PREFERRED_LIST) {
651 CERROR("Bad action type. Expected %d got %d\n",
652 EN_LNET_UDSP_ACTION_PREFERRED_LIST,
653 udsp->udsp_action_type);
656 udi->udi_match = &udsp->udsp_dst;
657 udi->udi_action = &udsp->udsp_src;
658 udi->udi_type = EN_LNET_UDSP_ACTION_PREFERRED_LIST;
659 udi->udi_local = true;
661 CDEBUG(D_NET, "applying udsp (%p) dst->src\n",
663 rc = cbs[UDSP_APPLY_ON_PEERS](udi);
666 } else if (lnet_udsp_criteria_present(&udsp->udsp_dst) &&
667 lnet_udsp_criteria_present(&udsp->udsp_rte)) {
669 if (!cbs[UDSP_APPLY_ON_PEERS])
672 if (udsp->udsp_action_type !=
673 EN_LNET_UDSP_ACTION_PREFERRED_LIST) {
674 CERROR("Bad action type. Expected %d got %d\n",
675 EN_LNET_UDSP_ACTION_PREFERRED_LIST,
676 udsp->udsp_action_type);
680 if (lnet_udsp_criteria_present(&udsp->udsp_src)) {
681 CERROR("only one of src or dst can be specified\n");
684 udi->udi_match = &udsp->udsp_dst;
685 udi->udi_action = &udsp->udsp_rte;
686 udi->udi_type = EN_LNET_UDSP_ACTION_PREFERRED_LIST;
687 udi->udi_local = false;
689 CDEBUG(D_NET, "applying udsp (%p) dst->rte\n",
691 rc = cbs[UDSP_APPLY_ON_PEERS](udi);
694 } else if (lnet_udsp_criteria_present(&udsp->udsp_dst)) {
695 /* destination priority rule */
696 if (!cbs[UDSP_APPLY_ON_PEERS])
699 if (udsp->udsp_action_type !=
700 EN_LNET_UDSP_ACTION_PRIORITY) {
701 CERROR("Bad action type. Expected %d got %d\n",
702 EN_LNET_UDSP_ACTION_PRIORITY,
703 udsp->udsp_action_type);
706 udi->udi_match = &udsp->udsp_dst;
707 udi->udi_type = EN_LNET_UDSP_ACTION_PRIORITY;
708 if (udsp->udsp_action_type !=
709 EN_LNET_UDSP_ACTION_PRIORITY) {
710 udi->udi_priority = 0;
712 udi->udi_priority = udsp->udsp_action.udsp_priority;
714 udi->udi_local = true;
716 CDEBUG(D_NET, "applying udsp (%p) on destination\n",
718 rc = cbs[UDSP_APPLY_ON_PEERS](udi);
721 } else if (lnet_udsp_criteria_present(&udsp->udsp_src)) {
722 /* source priority rule */
723 if (!cbs[UDSP_APPLY_PRIO_ON_NIS])
726 if (udsp->udsp_action_type !=
727 EN_LNET_UDSP_ACTION_PRIORITY) {
728 CERROR("Bad action type. Expected %d got %d\n",
729 EN_LNET_UDSP_ACTION_PRIORITY,
730 udsp->udsp_action_type);
733 udi->udi_match = &udsp->udsp_src;
734 udi->udi_type = EN_LNET_UDSP_ACTION_PRIORITY;
735 if (udsp->udsp_action_type !=
736 EN_LNET_UDSP_ACTION_PRIORITY) {
737 udi->udi_priority = 0;
739 udi->udi_priority = udsp->udsp_action.udsp_priority;
741 udi->udi_local = true;
743 CDEBUG(D_NET, "applying udsp (%p) on source\n",
745 rc = cbs[UDSP_APPLY_PRIO_ON_NIS](udi);
747 CERROR("Bad UDSP policy\n");
755 lnet_udsp_apply_policies_helper(struct lnet_udsp *udsp, struct udsp_info *udi,
756 udsp_apply_rule *cbs)
759 int last_failure = 0;
762 return lnet_udsp_apply_single_policy(udsp, udi, cbs);
764 list_for_each_entry_reverse(udsp,
765 &the_lnet.ln_udsp_list,
767 rc = lnet_udsp_apply_single_policy(udsp, udi, cbs);
776 lnet_udsp_apply_policies_on_ni(struct lnet_ni *ni)
778 struct udsp_info udi;
779 udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
781 memset(&udi, 0, sizeof(udi));
785 cbs[UDSP_APPLY_PRIO_ON_NIS] = lnet_udsp_apply_rule_on_ni;
787 return lnet_udsp_apply_policies_helper(NULL, &udi, cbs);
791 lnet_udsp_apply_policies_on_net(struct lnet_net *net)
793 struct udsp_info udi;
794 udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
796 memset(&udi, 0, sizeof(udi));
800 cbs[UDSP_APPLY_PRIO_ON_NIS] = lnet_udsp_apply_prio_rule_on_net;
801 cbs[UDSP_APPLY_RTE_ON_NETS] = lnet_udsp_apply_rte_rule_on_net;
803 return lnet_udsp_apply_policies_helper(NULL, &udi, cbs);
807 lnet_udsp_apply_policies_on_lpni(struct lnet_peer_ni *lpni)
809 struct udsp_info udi;
810 udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
812 memset(&udi, 0, sizeof(udi));
816 cbs[UDSP_APPLY_ON_PEERS] = lnet_udsp_apply_rule_on_lpni;
818 return lnet_udsp_apply_policies_helper(NULL, &udi, cbs);
822 lnet_udsp_apply_policies_on_lpn(struct lnet_peer_net *lpn)
824 struct udsp_info udi;
825 udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
827 memset(&udi, 0, sizeof(udi));
831 cbs[UDSP_APPLY_ON_PEERS] = lnet_udsp_apply_rule_on_lpn;
833 return lnet_udsp_apply_policies_helper(NULL, &udi, cbs);
837 lnet_udsp_apply_policies(struct lnet_udsp *udsp, bool revert)
840 struct udsp_info udi;
841 udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
843 memset(&udi, 0, sizeof(udi));
845 cbs[UDSP_APPLY_ON_PEERS] = lnet_udsp_apply_rule_on_lpnis;
846 cbs[UDSP_APPLY_PRIO_ON_NIS] = lnet_udsp_apply_rule_on_nis;
847 cbs[UDSP_APPLY_RTE_ON_NETS] = lnet_udsp_apply_rte_rule_on_nets;
849 udi.udi_revert = revert;
851 lnet_net_lock(LNET_LOCK_EX);
852 rc = lnet_udsp_apply_policies_helper(udsp, &udi, cbs);
853 lnet_net_unlock(LNET_LOCK_EX);
859 lnet_udsp_get_policy(int idx)
862 struct lnet_udsp *udsp = NULL;
865 CDEBUG(D_NET, "Get UDSP at idx = %d\n", idx);
870 list_for_each_entry(udsp, &the_lnet.ln_udsp_list, udsp_on_list) {
871 CDEBUG(D_NET, "iterating over upsp %d:%d:%d\n",
872 udsp->udsp_idx, i, idx);
880 CDEBUG(D_NET, "Found UDSP (%p)\n", udsp);
889 lnet_udsp_add_policy(struct lnet_udsp *new, int idx)
891 struct lnet_udsp *udsp;
892 struct lnet_udsp *insert = NULL;
895 list_for_each_entry(udsp, &the_lnet.ln_udsp_list, udsp_on_list) {
896 CDEBUG(D_NET, "found udsp i = %d:%d, idx = %d\n",
897 i, udsp->udsp_idx, idx);
903 if (lnet_udsp_equal(udsp, new)) {
904 if (!lnet_udsp_action_equal(udsp, new) &&
905 udsp->udsp_action_type == EN_LNET_UDSP_ACTION_PRIORITY &&
906 new->udsp_action_type == EN_LNET_UDSP_ACTION_PRIORITY) {
907 udsp->udsp_action.udsp_priority = new->udsp_action.udsp_priority;
908 CDEBUG(D_NET, "udsp: %p index %d updated priority to %d\n",
911 udsp->udsp_action.udsp_priority);
919 list_add(&new->udsp_on_list, insert->udsp_on_list.prev);
921 list_for_each_entry(udsp,
922 &the_lnet.ln_udsp_list,
931 list_add_tail(&new->udsp_on_list, &the_lnet.ln_udsp_list);
935 CDEBUG(D_NET, "udsp: %p added at index %d\n", new, new->udsp_idx);
937 CDEBUG(D_NET, "udsp list:\n");
938 list_for_each_entry(udsp, &the_lnet.ln_udsp_list, udsp_on_list)
939 CDEBUG(D_NET, "udsp %p:%d\n", udsp, udsp->udsp_idx);
945 lnet_udsp_del_policy(int idx)
947 struct lnet_udsp *udsp;
948 struct lnet_udsp *tmp;
949 bool removed = false;
952 lnet_udsp_destroy(false);
956 CDEBUG(D_NET, "del udsp at idx = %d\n", idx);
958 list_for_each_entry_safe(udsp,
960 &the_lnet.ln_udsp_list,
964 if (udsp->udsp_idx == idx && !removed) {
965 list_del_init(&udsp->udsp_on_list);
966 lnet_udsp_apply_policies(udsp, true);
967 lnet_udsp_free(udsp);
979 lnet_udsp_get_ni_info(struct lnet_ioctl_construct_udsp_info *info,
982 struct lnet_nid_list *ne;
983 struct lnet_net *net = ni->ni_net;
988 info->cud_nid_priority = ni->ni_sel_priority;
990 info->cud_net_priority = ni->ni_net->net_sel_priority;
991 list_for_each_entry(ne, &net->net_rtr_pref_nids, nl_list) {
992 if (i < LNET_MAX_SHOW_NUM_NID)
993 info->cud_pref_rtr_nid[i] =
994 lnet_nid_to_nid4(&ne->nl_nid);
1003 lnet_udsp_get_peer_info(struct lnet_ioctl_construct_udsp_info *info,
1004 struct lnet_peer_ni *lpni)
1006 struct lnet_nid_list *ne;
1009 /* peer tree structure needs to be in existence */
1010 LASSERT(lpni && lpni->lpni_peer_net &&
1011 lpni->lpni_peer_net->lpn_peer);
1013 info->cud_nid_priority = lpni->lpni_sel_priority;
1014 CDEBUG(D_NET, "lpni %s has %d pref nids\n",
1015 libcfs_nidstr(&lpni->lpni_nid),
1016 lpni->lpni_pref_nnids);
1017 if (lpni->lpni_pref_nnids == 1) {
1018 info->cud_pref_nid[0] = lnet_nid_to_nid4(&lpni->lpni_pref.nid);
1019 } else if (lpni->lpni_pref_nnids > 1) {
1020 struct list_head *list = &lpni->lpni_pref.nids;
1022 list_for_each_entry(ne, list, nl_list) {
1023 if (i < LNET_MAX_SHOW_NUM_NID)
1024 info->cud_pref_nid[i] =
1025 lnet_nid_to_nid4(&ne->nl_nid);
1033 list_for_each_entry(ne, &lpni->lpni_rtr_pref_nids, nl_list) {
1034 if (i < LNET_MAX_SHOW_NUM_NID)
1035 info->cud_pref_rtr_nid[i] =
1036 lnet_nid_to_nid4(&ne->nl_nid);
1042 info->cud_net_priority = lpni->lpni_peer_net->lpn_sel_priority;
1046 lnet_udsp_get_construct_info(struct lnet_ioctl_construct_udsp_info *info,
1047 struct lnet_nid *nid)
1050 struct lnet_peer_ni *lpni;
1052 if (!info->cud_peer) {
1053 ni = lnet_nid_to_ni_locked(nid, 0);
1055 lnet_udsp_get_ni_info(info, ni);
1057 lpni = lnet_peer_ni_find_locked(nid);
1059 CDEBUG(D_NET, "nid %s is not found\n",
1060 libcfs_nidstr(nid));
1062 lnet_udsp_get_peer_info(info, lpni);
1063 lnet_peer_ni_decref_locked(lpni);
1069 lnet_udsp_alloc(void)
1071 struct lnet_udsp *udsp;
1073 udsp = kmem_cache_alloc(lnet_udsp_cachep, GFP_NOFS | __GFP_ZERO);
1078 INIT_LIST_HEAD(&udsp->udsp_on_list);
1079 INIT_LIST_HEAD(&udsp->udsp_src.ud_addr_range);
1080 INIT_LIST_HEAD(&udsp->udsp_src.ud_net_id.udn_net_num_range);
1081 INIT_LIST_HEAD(&udsp->udsp_dst.ud_addr_range);
1082 INIT_LIST_HEAD(&udsp->udsp_dst.ud_net_id.udn_net_num_range);
1083 INIT_LIST_HEAD(&udsp->udsp_rte.ud_addr_range);
1084 INIT_LIST_HEAD(&udsp->udsp_rte.ud_net_id.udn_net_num_range);
1086 LIBCFS_ALLOC_POST(udsp, sizeof(*udsp), "alloc");
1091 lnet_udsp_nid_descr_free(struct lnet_ud_nid_descr *nid_descr)
1093 struct list_head *net_range = &nid_descr->ud_net_id.udn_net_num_range;
1095 if (!lnet_udsp_criteria_present(nid_descr))
1098 /* memory management is a bit tricky here. When we allocate the
1099 * memory to store the NID descriptor we allocate a large buffer
1100 * for all the data, so we need to free the entire buffer at
1101 * once. If the net is present the net_range->next points to that
1102 * buffer otherwise if the ud_addr_range is present then it's the
1103 * ud_addr_range.next
1105 if (!list_empty(net_range))
1106 LIBCFS_FREE(net_range->next, nid_descr->ud_mem_size);
1107 else if (!list_empty(&nid_descr->ud_addr_range))
1108 LIBCFS_FREE(nid_descr->ud_addr_range.next,
1109 nid_descr->ud_mem_size);
1113 lnet_udsp_free(struct lnet_udsp *udsp)
1115 lnet_udsp_nid_descr_free(&udsp->udsp_src);
1116 lnet_udsp_nid_descr_free(&udsp->udsp_dst);
1117 lnet_udsp_nid_descr_free(&udsp->udsp_rte);
1119 LIBCFS_FREE_PRE(udsp, sizeof(*udsp), "kfreed");
1120 kmem_cache_free(lnet_udsp_cachep, udsp);
1124 lnet_udsp_destroy(bool shutdown)
1126 struct lnet_udsp *udsp, *tmp;
1128 CDEBUG(D_NET, "Destroying UDSPs in the system\n");
1130 list_for_each_entry_safe(udsp, tmp, &the_lnet.ln_udsp_list,
1132 list_del(&udsp->udsp_on_list);
1134 lnet_udsp_apply_policies(udsp, true);
1135 lnet_udsp_free(udsp);
1140 lnet_size_marshaled_nid_descr(struct lnet_ud_nid_descr *descr)
1142 struct cfs_expr_list *expr;
1144 int range_count = 0;
1145 size_t size = sizeof(struct lnet_ioctl_udsp_descr);
1147 if (!lnet_udsp_criteria_present(descr))
1150 /* we always have one net expression */
1151 if (!list_empty(&descr->ud_net_id.udn_net_num_range)) {
1152 expr = list_first_entry(&descr->ud_net_id.udn_net_num_range,
1153 struct cfs_expr_list, el_link);
1155 /* count the number of cfs_range_expr in the net expression */
1156 range_count = lnet_get_list_len(&expr->el_exprs);
1159 /* count the number of cfs_range_expr in the address expressions */
1160 list_for_each_entry(expr, &descr->ud_addr_range, el_link) {
1162 range_count += lnet_get_list_len(&expr->el_exprs);
1165 size += (sizeof(struct lnet_expressions) * expr_count);
1166 size += (sizeof(struct lnet_range_expr) * range_count);
1172 lnet_get_udsp_size(struct lnet_udsp *udsp)
1174 size_t size = sizeof(struct lnet_ioctl_udsp);
1176 size += lnet_size_marshaled_nid_descr(&udsp->udsp_src);
1177 size += lnet_size_marshaled_nid_descr(&udsp->udsp_dst);
1178 size += lnet_size_marshaled_nid_descr(&udsp->udsp_rte);
1180 CDEBUG(D_NET, "get udsp (%p) size: %d\n", udsp, (int)size);
1186 copy_exprs(struct cfs_expr_list *expr, void __user **bulk,
1189 struct cfs_range_expr *range;
1190 struct lnet_range_expr range_expr;
1192 /* copy over the net range expressions to the bulk */
1193 list_for_each_entry(range, &expr->el_exprs, re_link) {
1194 range_expr.re_lo = range->re_lo;
1195 range_expr.re_hi = range->re_hi;
1196 range_expr.re_stride = range->re_stride;
1197 CDEBUG(D_NET, "Copy Range %u:%u:%u\n",
1198 range_expr.re_lo, range_expr.re_hi,
1199 range_expr.re_stride);
1200 if (copy_to_user(*bulk, &range_expr, sizeof(range_expr))) {
1201 CDEBUG(D_NET, "Failed to copy range_expr\n");
1204 *bulk += sizeof(range_expr);
1205 *bulk_size -= sizeof(range_expr);
1212 copy_nid_range(struct lnet_ud_nid_descr *nid_descr, char *type,
1213 void __user **bulk, __u32 *bulk_size)
1215 struct lnet_ioctl_udsp_descr ioc_udsp_descr;
1216 struct cfs_expr_list *expr;
1217 struct lnet_expressions ioc_expr;
1222 memset(&ioc_udsp_descr, 0, sizeof(ioc_udsp_descr));
1223 ioc_udsp_descr.iud_src_hdr.ud_descr_type = *(__u32 *)type;
1225 /* if criteria not present, copy over the static part of the NID
1228 if (!lnet_udsp_criteria_present(nid_descr)) {
1229 CDEBUG(D_NET, "Descriptor %u:%u:%u:%u\n",
1230 ioc_udsp_descr.iud_src_hdr.ud_descr_type,
1231 ioc_udsp_descr.iud_src_hdr.ud_descr_count,
1232 ioc_udsp_descr.iud_net.ud_net_type,
1233 ioc_udsp_descr.iud_net.ud_net_num_expr.le_count);
1234 if (copy_to_user(*bulk, &ioc_udsp_descr,
1235 sizeof(ioc_udsp_descr))) {
1236 CDEBUG(D_NET, "failed to copy ioc_udsp_descr\n");
1239 *bulk += sizeof(ioc_udsp_descr);
1240 *bulk_size -= sizeof(ioc_udsp_descr);
1244 expr_count = lnet_get_list_len(&nid_descr->ud_addr_range);
1246 /* copy the net information */
1247 if (!list_empty(&nid_descr->ud_net_id.udn_net_num_range)) {
1248 expr = list_first_entry(&nid_descr->ud_net_id.udn_net_num_range,
1249 struct cfs_expr_list, el_link);
1250 net_expr_count = lnet_get_list_len(&expr->el_exprs);
1255 /* set the total expression count */
1256 ioc_udsp_descr.iud_src_hdr.ud_descr_count = expr_count;
1257 ioc_udsp_descr.iud_net.ud_net_type =
1258 nid_descr->ud_net_id.udn_net_type;
1259 ioc_udsp_descr.iud_net.ud_net_num_expr.le_count = net_expr_count;
1261 CDEBUG(D_NET, "Descriptor %u:%u:%u:%u\n",
1262 ioc_udsp_descr.iud_src_hdr.ud_descr_type,
1263 ioc_udsp_descr.iud_src_hdr.ud_descr_count,
1264 ioc_udsp_descr.iud_net.ud_net_type,
1265 ioc_udsp_descr.iud_net.ud_net_num_expr.le_count);
1267 /* copy over the header info to the bulk */
1268 if (copy_to_user(*bulk, &ioc_udsp_descr, sizeof(ioc_udsp_descr))) {
1269 CDEBUG(D_NET, "Failed to copy data\n");
1272 *bulk += sizeof(ioc_udsp_descr);
1273 *bulk_size -= sizeof(ioc_udsp_descr);
1275 /* copy over the net num expression if it exists */
1276 if (net_expr_count) {
1277 rc = copy_exprs(expr, bulk, bulk_size);
1282 /* copy the address range */
1283 list_for_each_entry(expr, &nid_descr->ud_addr_range, el_link) {
1284 ioc_expr.le_count = lnet_get_list_len(&expr->el_exprs);
1285 if (copy_to_user(*bulk, &ioc_expr, sizeof(ioc_expr))) {
1286 CDEBUG(D_NET, "failex to copy ioc_expr\n");
1289 *bulk += sizeof(ioc_expr);
1290 *bulk_size -= sizeof(ioc_expr);
1292 rc = copy_exprs(expr, bulk, bulk_size);
1301 lnet_udsp_marshal(struct lnet_udsp *udsp, struct lnet_ioctl_udsp *ioc_udsp)
1310 bulk = ioc_udsp->iou_bulk;
1311 bulk_size = ioc_udsp->iou_hdr.ioc_len +
1312 ioc_udsp->iou_bulk_size;
1314 CDEBUG(D_NET, "marshal udsp (%p)\n", udsp);
1315 CDEBUG(D_NET, "MEM -----> bulk: %p:0x%x\n", bulk, bulk_size);
1316 /* make sure user space allocated enough buffer to marshal the
1319 if (bulk_size != lnet_get_udsp_size(udsp)) {
1324 ioc_udsp->iou_idx = udsp->udsp_idx;
1325 ioc_udsp->iou_action_type = udsp->udsp_action_type;
1326 ioc_udsp->iou_action.priority = udsp->udsp_action.udsp_priority;
1328 bulk_size -= sizeof(*ioc_udsp);
1330 rc = copy_nid_range(&udsp->udsp_src, "SRC", &bulk, &bulk_size);
1334 rc = copy_nid_range(&udsp->udsp_dst, "DST", &bulk, &bulk_size);
1338 rc = copy_nid_range(&udsp->udsp_rte, "RTE", &bulk, &bulk_size);
1342 CDEBUG(D_NET, "MEM <----- bulk: %p\n", bulk);
1344 /* we should've consumed the entire buffer */
1345 LASSERT(bulk_size == 0);
1349 CERROR("Failed to marshal udsp: %d\n", rc);
1354 copy_range_info(void **bulk, void **buf, struct list_head *list,
1357 struct lnet_range_expr *range_expr;
1358 struct cfs_range_expr *range;
1359 struct cfs_expr_list *exprs;
1360 int range_count = count;
1363 if (range_count == 0)
1366 if (range_count == -1) {
1367 struct lnet_expressions *e;
1370 range_count = e->le_count;
1371 *bulk += sizeof(*e);
1375 INIT_LIST_HEAD(&exprs->el_link);
1376 INIT_LIST_HEAD(&exprs->el_exprs);
1377 list_add_tail(&exprs->el_link, list);
1378 *buf += sizeof(*exprs);
1380 for (i = 0; i < range_count; i++) {
1383 INIT_LIST_HEAD(&range->re_link);
1384 range->re_lo = range_expr->re_lo;
1385 range->re_hi = range_expr->re_hi;
1386 range->re_stride = range_expr->re_stride;
1387 CDEBUG(D_NET, "Copy Range %u:%u:%u\n",
1391 list_add_tail(&range->re_link, &exprs->el_exprs);
1392 *bulk += sizeof(*range_expr);
1393 *buf += sizeof(*range);
1398 copy_ioc_udsp_descr(struct lnet_ud_nid_descr *nid_descr, char *type,
1399 void **bulk, __u32 *bulk_size)
1401 struct lnet_ioctl_udsp_descr *ioc_nid = *bulk;
1402 struct lnet_expressions *exprs;
1405 int range_count = 0;
1408 int remaining_size = *bulk_size;
1412 size_t range_expr_s = sizeof(struct lnet_range_expr);
1413 size_t lnet_exprs_s = sizeof(struct lnet_expressions);
1415 CDEBUG(D_NET, "%s: bulk = %p:%u\n", type, *bulk, *bulk_size);
1417 /* criteria not present, skip over the static part of the
1418 * bulk, which is included for each NID descriptor
1420 if (ioc_nid->iud_net.ud_net_type == 0) {
1421 remaining_size -= sizeof(*ioc_nid);
1422 if (remaining_size < 0) {
1423 CERROR("Truncated userspace udsp buffer given\n");
1426 *bulk += sizeof(*ioc_nid);
1427 *bulk_size = remaining_size;
1431 descr_type = ioc_nid->iud_src_hdr.ud_descr_type;
1432 if (descr_type != *(__u32 *)type) {
1433 CERROR("Bad NID descriptor type. Expected %s, given %c%c%c\n",
1434 type, (__u8)descr_type, (__u8)(descr_type << 4),
1435 (__u8)(descr_type << 8));
1439 /* calculate the total size to verify we have enough buffer.
1440 * Start of by finding how many ranges there are for the net
1443 range_count = ioc_nid->iud_net.ud_net_num_expr.le_count;
1444 size = sizeof(*ioc_nid) + (range_count * range_expr_s);
1445 remaining_size -= size;
1446 if (remaining_size < 0) {
1447 CERROR("Truncated userspace udsp buffer given\n");
1451 CDEBUG(D_NET, "Total net num ranges in %s: %d:%u\n", type,
1453 /* the number of expressions for the NID. IE 4 for IP, 1 for GNI */
1454 expr_count = ioc_nid->iud_src_hdr.ud_descr_count;
1455 CDEBUG(D_NET, "addr as %d exprs\n", expr_count);
1456 /* point tmp to the beginning of the NID expressions */
1458 for (i = 0; i < expr_count; i++) {
1459 /* get the number of ranges per expression */
1461 range_count += exprs->le_count;
1462 size = (range_expr_s * exprs->le_count) + lnet_exprs_s;
1463 remaining_size -= size;
1464 CDEBUG(D_NET, "expr %d:%d:%u:%d:%d\n", i, exprs->le_count,
1465 size, remaining_size, range_count);
1466 if (remaining_size < 0) {
1467 CERROR("Truncated userspace udsp buffer given\n");
1473 *bulk_size = remaining_size;
1475 /* copy over the net type */
1476 nid_descr->ud_net_id.udn_net_type = ioc_nid->iud_net.ud_net_type;
1478 CDEBUG(D_NET, "%u\n", nid_descr->ud_net_id.udn_net_type);
1480 /* allocate the total memory required to copy this NID descriptor */
1481 if (ioc_nid->iud_net.ud_net_num_expr.le_count) {
1482 if (ioc_nid->iud_net.ud_net_num_expr.le_count != 1) {
1483 CERROR("Unexpected number of net numeric ranges \"%u\". Cannot add UDSP rule.\n",
1484 ioc_nid->iud_net.ud_net_num_expr.le_count);
1487 alloc_size = (sizeof(struct cfs_expr_list) * (expr_count + 1)) +
1488 (sizeof(struct cfs_range_expr) * (range_count));
1490 alloc_size = (sizeof(struct cfs_expr_list) * (expr_count)) +
1491 (sizeof(struct cfs_range_expr) * (range_count));
1494 LIBCFS_ALLOC(buf, alloc_size);
1498 /* store the amount of memory allocated so we can free it later on */
1499 nid_descr->ud_mem_size = alloc_size;
1501 /* copy over the net number range */
1502 range_count = ioc_nid->iud_net.ud_net_num_expr.le_count;
1503 *bulk += sizeof(*ioc_nid);
1504 CDEBUG(D_NET, "bulk = %p\n", *bulk);
1505 copy_range_info(bulk, &buf, &nid_descr->ud_net_id.udn_net_num_range,
1507 CDEBUG(D_NET, "bulk = %p\n", *bulk);
1509 /* copy over the NID descriptor */
1510 for (i = 0; i < expr_count; i++) {
1511 copy_range_info(bulk, &buf, &nid_descr->ud_addr_range, -1);
1512 CDEBUG(D_NET, "bulk = %p\n", *bulk);
1519 lnet_udsp_demarshal_add(void *bulk, __u32 bulk_size)
1521 struct lnet_ioctl_udsp *ioc_udsp;
1522 struct lnet_udsp *udsp;
1526 if (bulk_size < sizeof(*ioc_udsp))
1529 udsp = lnet_udsp_alloc();
1535 udsp->udsp_action_type = ioc_udsp->iou_action_type;
1536 udsp->udsp_action.udsp_priority = ioc_udsp->iou_action.priority;
1537 idx = ioc_udsp->iou_idx;
1539 CDEBUG(D_NET, "demarshal descr %u:%u:%d:%u\n", udsp->udsp_action_type,
1540 udsp->udsp_action.udsp_priority, idx, bulk_size);
1542 bulk += sizeof(*ioc_udsp);
1543 bulk_size -= sizeof(*ioc_udsp);
1545 rc = copy_ioc_udsp_descr(&udsp->udsp_src, "SRC", &bulk, &bulk_size);
1549 rc = copy_ioc_udsp_descr(&udsp->udsp_dst, "DST", &bulk, &bulk_size);
1553 rc = copy_ioc_udsp_descr(&udsp->udsp_rte, "RTE", &bulk, &bulk_size);
1557 return lnet_udsp_add_policy(udsp, idx);
1560 lnet_udsp_free(udsp);