2 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
4 * Copyright (c) 2011, 2017, Intel Corporation.
6 * Copyright (c) 2018-2020 Data Direct Networks.
8 * This file is part of Lustre, https://wiki.whamcloud.com/
10 * Portals is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Portals is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * version 2 along with this program; If not, see
21 * http://www.gnu.org/licenses/gpl-2.0.html
25 * User Defined Selection Policies (UDSP) are introduced to add
26 * ability of fine traffic control. The policies are instantiated
27 * on LNet constructs and allow preference of some constructs
28 * over others as an extension of the selection algorithm.
29 * The order of operation is defined by the selection algorithm logical flow:
31 * 1. Iterate over all the networks that a peer can be reached on
32 * and select the best local network
33 * - The remote network with the highest priority is examined
35 * - The local network with the highest priority is selected
37 * - The local NI with the highest priority is selected
39 * 2. If the peer is a remote peer and has no local networks,
40 * - then select the remote peer network with the highest priority
42 * - Select the highest priority remote peer_ni on the network selected
44 * - Now that the peer's network and NI are decided, select the router
45 * in round robin from the peer NI's preferred router list.
47 * - Select the highest priority local NI on the local net of the
50 * 3. Otherwise for local peers, select the peer_ni from the peer.
51 * - highest priority peer NI is selected
53 * - Select the peer NI which has the local NI selected on its
57 * Accordingly, the User Interface allows for the following:
58 * - Adding a local network udsp: if multiple local networks are
59 * available, each one can have a priority.
60 * - Adding a local NID udsp: after a local network is chosen,
61 * if there are multiple NIs, each one can have a priority.
62 * - Adding a remote NID udsp: assign priority to a peer NID.
63 * - Adding a NID pair udsp: allows to specify local NIDs
64 * to be added on the list on the specified peer NIs
65 * When selecting a peer NI, the one with the
66 * local NID being used on its list is preferred.
67 * - Adding a Router udsp: similar to the NID pair udsp.
68 * Specified router NIDs are added on the list on the specified peer NIs.
69 * When sending to a remote peer, remote net is selected and the peer NID
70 * is selected. The router which has its nid on the peer NI list
72 * - Deleting a udsp: use the specified policy index to remove it
73 * from the policy list.
75 * Generally, the syntax is as follows
76 * lnetctl policy <add | del | show>
77 * --src: ip2nets syntax specifying the local NID to match
78 * --dst: ip2nets syntax specifying the remote NID to match
79 * --rte: ip2nets syntax specifying the router NID to match
80 * --priority: Priority to apply to rule matches
81 * --idx: Index of where to insert or delete the rule
82 * By default add appends to the end of the rule list
84 * Author: Amir Shehata
87 #include <linux/uaccess.h>
89 #include <lnet/udsp.h>
90 #include <libcfs/libcfs.h>
93 struct lnet_peer_ni *udi_lpni;
94 struct lnet_peer_net *udi_lpn;
95 struct lnet_ni *udi_ni;
96 struct lnet_net *udi_net;
97 struct lnet_ud_nid_descr *udi_match;
98 struct lnet_ud_nid_descr *udi_action;
100 enum lnet_udsp_action_type udi_type;
105 typedef int (*udsp_apply_rule)(struct udsp_info *);
108 UDSP_APPLY_ON_PEERS = 0,
109 UDSP_APPLY_PRIO_ON_NIS = 1,
110 UDSP_APPLY_RTE_ON_NETS = 2,
111 UDSP_APPLY_MAX_ENUM = 3,
114 #define RULE_NOT_APPLICABLE -1
117 lnet_udsp_is_net_rule(struct lnet_ud_nid_descr *match)
119 return list_empty(&match->ud_addr_range);
123 lnet_udsp_expr_list_equal(struct list_head *e1,
124 struct list_head *e2)
126 struct cfs_expr_list *expr1;
127 struct cfs_expr_list *expr2;
128 struct cfs_range_expr *range1, *range2;
130 if (list_empty(e1) && list_empty(e2))
133 if (lnet_get_list_len(e1) != lnet_get_list_len(e2))
136 expr2 = list_first_entry(e2, struct cfs_expr_list, el_link);
138 list_for_each_entry(expr1, e1, el_link) {
139 if (lnet_get_list_len(&expr1->el_exprs) !=
140 lnet_get_list_len(&expr2->el_exprs))
143 range2 = list_first_entry(&expr2->el_exprs,
144 struct cfs_range_expr,
147 list_for_each_entry(range1, &expr1->el_exprs, re_link) {
148 if (range1->re_lo != range2->re_lo ||
149 range1->re_hi != range2->re_hi ||
150 range1->re_stride != range2->re_stride)
152 range2 = list_next_entry(range2, re_link);
154 expr2 = list_next_entry(expr2, el_link);
161 lnet_udsp_nid_descr_equal(struct lnet_ud_nid_descr *e1,
162 struct lnet_ud_nid_descr *e2)
164 if (e1->ud_net_id.udn_net_type != e2->ud_net_id.udn_net_type ||
165 !lnet_udsp_expr_list_equal(&e1->ud_net_id.udn_net_num_range,
166 &e2->ud_net_id.udn_net_num_range) ||
167 !lnet_udsp_expr_list_equal(&e1->ud_addr_range, &e2->ud_addr_range))
174 lnet_udsp_action_equal(struct lnet_udsp *e1, struct lnet_udsp *e2)
176 if (e1->udsp_action_type != e2->udsp_action_type)
179 if (e1->udsp_action_type == EN_LNET_UDSP_ACTION_PRIORITY &&
180 e1->udsp_action.udsp_priority != e2->udsp_action.udsp_priority)
187 lnet_udsp_equal(struct lnet_udsp *e1, struct lnet_udsp *e2)
189 /* check each NID descr */
190 if (!lnet_udsp_nid_descr_equal(&e1->udsp_src, &e2->udsp_src) ||
191 !lnet_udsp_nid_descr_equal(&e1->udsp_dst, &e2->udsp_dst) ||
192 !lnet_udsp_nid_descr_equal(&e1->udsp_rte, &e2->udsp_rte))
198 /* it is enough to look at the net type of the descriptor. If the criteria
199 * is present the net must be specified
202 lnet_udsp_criteria_present(struct lnet_ud_nid_descr *descr)
204 return (descr->ud_net_id.udn_net_type != 0);
208 lnet_udsp_apply_rule_on_ni(struct udsp_info *udi)
211 struct lnet_ni *ni = udi->udi_ni;
212 struct lnet_ud_nid_descr *ni_match = udi->udi_match;
213 __u32 priority = (udi->udi_revert) ? -1 : udi->udi_priority;
215 rc = cfs_match_nid_net(
217 ni_match->ud_net_id.udn_net_type,
218 &ni_match->ud_net_id.udn_net_num_range,
219 &ni_match->ud_addr_range);
223 CDEBUG(D_NET, "apply udsp on ni %s\n",
224 libcfs_nidstr(&ni->ni_nid));
226 /* Detected match. Set NIDs priority */
227 lnet_ni_set_sel_priority_locked(ni, priority);
233 lnet_udsp_apply_rte_list_on_net(struct lnet_net *net,
234 struct lnet_ud_nid_descr *rte_action,
237 struct lnet_remotenet *rnet;
238 struct list_head *rn_list;
239 struct lnet_route *route;
240 struct lnet_peer_ni *lpni;
241 bool cleared = false;
242 struct lnet_nid *gw_nid, *gw_prim_nid;
246 for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
247 rn_list = &the_lnet.ln_remote_nets_hash[i];
248 list_for_each_entry(rnet, rn_list, lrn_list) {
249 list_for_each_entry(route, &rnet->lrn_routes, lr_list) {
250 /* look if gw nid on the same net matches */
252 &route->lr_gateway->lp_primary_nid;
254 while ((lpni = lnet_get_next_peer_ni_locked(route->lr_gateway,
257 if (!lnet_get_net_locked(lpni->lpni_peer_net->lpn_net_id))
259 gw_nid = &lpni->lpni_nid;
260 rc = cfs_match_nid_net(
262 rte_action->ud_net_id.udn_net_type,
263 &rte_action->ud_net_id.udn_net_num_range,
264 &rte_action->ud_addr_range);
268 /* match gw primary nid on a remote network */
270 gw_nid = gw_prim_nid;
271 rc = cfs_match_nid_net(
273 rte_action->ud_net_id.udn_net_type,
274 &rte_action->ud_net_id.udn_net_num_range,
275 &rte_action->ud_addr_range);
279 lnet_net_unlock(LNET_LOCK_EX);
280 if (!cleared || revert) {
281 lnet_net_clr_pref_rtrs(net);
284 lnet_net_lock(LNET_LOCK_EX);
288 /* match. Add to pref NIDs */
289 CDEBUG(D_NET, "udsp net->gw: %s->%s\n",
290 libcfs_net2str(net->net_id),
291 libcfs_nidstr(gw_prim_nid));
292 rc = lnet_net_add_pref_rtr(net, gw_prim_nid);
293 lnet_net_lock(LNET_LOCK_EX);
294 /* success if EEXIST return */
295 if (rc && rc != -EEXIST) {
296 CERROR("Failed to add %s to %s pref rtr list\n",
297 libcfs_nidstr(gw_prim_nid),
298 libcfs_net2str(net->net_id));
309 lnet_udsp_apply_rte_rule_on_nets(struct udsp_info *udi)
312 int last_failure = 0;
313 struct lnet_net *net;
314 struct lnet_ud_nid_descr *match = udi->udi_match;
315 struct lnet_ud_nid_descr *rte_action = udi->udi_action;
317 list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
318 if (LNET_NETTYP(net->net_id) != match->ud_net_id.udn_net_type)
321 rc = cfs_match_net(net->net_id,
322 match->ud_net_id.udn_net_type,
323 &match->ud_net_id.udn_net_num_range);
327 CDEBUG(D_NET, "apply rule on %s\n",
328 libcfs_net2str(net->net_id));
329 rc = lnet_udsp_apply_rte_list_on_net(net, rte_action,
339 lnet_udsp_apply_rte_rule_on_net(struct udsp_info *udi)
342 struct lnet_net *net = udi->udi_net;
343 struct lnet_ud_nid_descr *match = udi->udi_match;
344 struct lnet_ud_nid_descr *rte_action = udi->udi_action;
346 rc = cfs_match_net(net->net_id,
347 match->ud_net_id.udn_net_type,
348 &match->ud_net_id.udn_net_num_range);
352 CDEBUG(D_NET, "apply rule on %s\n",
353 libcfs_net2str(net->net_id));
354 rc = lnet_udsp_apply_rte_list_on_net(net, rte_action,
361 lnet_udsp_apply_prio_rule_on_net(struct udsp_info *udi)
364 struct lnet_ud_nid_descr *match = udi->udi_match;
365 struct lnet_net *net = udi->udi_net;
366 __u32 priority = (udi->udi_revert) ? -1 : udi->udi_priority;
368 if (!lnet_udsp_is_net_rule(match))
369 return RULE_NOT_APPLICABLE;
371 rc = cfs_match_net(net->net_id,
372 match->ud_net_id.udn_net_type,
373 &match->ud_net_id.udn_net_num_range);
377 CDEBUG(D_NET, "apply rule on %s\n",
378 libcfs_net2str(net->net_id));
380 lnet_net_set_sel_priority_locked(net, priority);
386 lnet_udsp_apply_rule_on_nis(struct udsp_info *udi)
390 struct lnet_net *net;
391 struct lnet_ud_nid_descr *ni_match = udi->udi_match;
392 int last_failure = 0;
394 list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
395 if (LNET_NETTYP(net->net_id) != ni_match->ud_net_id.udn_net_type)
399 if (!lnet_udsp_apply_prio_rule_on_net(udi))
402 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
404 rc = lnet_udsp_apply_rule_on_ni(udi);
414 lnet_udsp_apply_rte_list_on_lpni(struct lnet_peer_ni *lpni,
415 struct lnet_ud_nid_descr *rte_action,
418 struct lnet_remotenet *rnet;
419 struct list_head *rn_list;
420 struct lnet_route *route;
421 bool cleared = false;
422 struct lnet_nid *gw_nid;
426 for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
427 rn_list = &the_lnet.ln_remote_nets_hash[i];
428 list_for_each_entry(rnet, rn_list, lrn_list) {
429 list_for_each_entry(route, &rnet->lrn_routes, lr_list) {
430 gw_nid = &route->lr_gateway->lp_primary_nid;
431 rc = cfs_match_nid_net(
433 rte_action->ud_net_id.udn_net_type,
434 &rte_action->ud_net_id.udn_net_num_range,
435 &rte_action->ud_addr_range);
438 lnet_net_unlock(LNET_LOCK_EX);
439 if (!cleared || revert) {
440 CDEBUG(D_NET, "%spref rtr nids from lpni %s\n",
441 (revert) ? "revert " : "clear ",
442 libcfs_nidstr(&lpni->lpni_nid));
443 lnet_peer_clr_pref_rtrs(lpni);
446 lnet_net_lock(LNET_LOCK_EX);
450 CDEBUG(D_NET, "add gw nid %s as preferred for peer %s\n",
451 libcfs_nidstr(gw_nid),
452 libcfs_nidstr(&lpni->lpni_nid));
453 /* match. Add to pref NIDs */
454 rc = lnet_peer_add_pref_rtr(lpni, gw_nid);
455 lnet_net_lock(LNET_LOCK_EX);
456 /* success if EEXIST return */
457 if (rc && rc != -EEXIST) {
458 CERROR("Failed to add %s to %s pref rtr list\n",
459 libcfs_nidstr(gw_nid),
460 libcfs_nidstr(&lpni->lpni_nid));
471 lnet_udsp_apply_ni_list(struct lnet_peer_ni *lpni,
472 struct lnet_ud_nid_descr *ni_action,
477 struct lnet_net *net;
478 bool cleared = false;
480 list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
481 if (LNET_NETTYP(net->net_id) != ni_action->ud_net_id.udn_net_type)
483 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
484 rc = cfs_match_nid_net(
486 ni_action->ud_net_id.udn_net_type,
487 &ni_action->ud_net_id.udn_net_num_range,
488 &ni_action->ud_addr_range);
491 lnet_net_unlock(LNET_LOCK_EX);
492 if (!cleared || revert) {
493 lnet_peer_clr_pref_nids(lpni);
494 CDEBUG(D_NET, "%spref nids from lpni %s\n",
495 (revert) ? "revert " : "clear ",
496 libcfs_nidstr(&lpni->lpni_nid));
499 lnet_net_lock(LNET_LOCK_EX);
503 CDEBUG(D_NET, "add nid %s as preferred for peer %s\n",
504 libcfs_nidstr(&ni->ni_nid),
505 libcfs_nidstr(&lpni->lpni_nid));
506 /* match. Add to pref NIDs */
507 rc = lnet_peer_add_pref_nid(lpni, &ni->ni_nid);
508 lnet_net_lock(LNET_LOCK_EX);
509 /* success if EEXIST return */
510 if (rc && rc != -EEXIST) {
511 CERROR("Failed to add %s to %s pref nid list\n",
512 libcfs_nidstr(&ni->ni_nid),
513 libcfs_nidstr(&lpni->lpni_nid));
523 lnet_udsp_apply_rule_on_lpni(struct udsp_info *udi)
526 struct lnet_peer_ni *lpni = udi->udi_lpni;
527 struct lnet_ud_nid_descr *lp_match = udi->udi_match;
528 struct lnet_ud_nid_descr *action = udi->udi_action;
529 __u32 priority = (udi->udi_revert) ? -1 : udi->udi_priority;
530 bool local = udi->udi_local;
531 enum lnet_udsp_action_type type = udi->udi_type;
533 rc = cfs_match_nid_net(
535 lp_match->ud_net_id.udn_net_type,
536 &lp_match->ud_net_id.udn_net_num_range,
537 &lp_match->ud_addr_range);
539 /* check if looking for a net match */
541 (lnet_get_list_len(&lp_match->ud_addr_range) ||
542 !cfs_match_net(udi->udi_lpn->lpn_net_id,
543 lp_match->ud_net_id.udn_net_type,
544 &lp_match->ud_net_id.udn_net_num_range))) {
548 if (type == EN_LNET_UDSP_ACTION_PREFERRED_LIST && local) {
549 rc = lnet_udsp_apply_ni_list(lpni, action,
553 } else if (type == EN_LNET_UDSP_ACTION_PREFERRED_LIST &&
555 rc = lnet_udsp_apply_rte_list_on_lpni(lpni, action,
560 lnet_peer_ni_set_selection_priority(lpni, priority);
567 lnet_udsp_apply_rule_on_lpn(struct udsp_info *udi)
570 struct lnet_ud_nid_descr *match = udi->udi_match;
571 struct lnet_peer_net *lpn = udi->udi_lpn;
572 __u32 priority = (udi->udi_revert) ? -1 : udi->udi_priority;
574 if (udi->udi_type == EN_LNET_UDSP_ACTION_PREFERRED_LIST ||
575 !lnet_udsp_is_net_rule(match))
576 return RULE_NOT_APPLICABLE;
578 rc = cfs_match_net(lpn->lpn_net_id,
579 match->ud_net_id.udn_net_type,
580 &match->ud_net_id.udn_net_num_range);
584 CDEBUG(D_NET, "apply rule on lpn %s\n",
585 libcfs_net2str(lpn->lpn_net_id));
586 lnet_peer_net_set_sel_priority_locked(lpn, priority);
592 lnet_udsp_apply_rule_on_lpnis(struct udsp_info *udi)
594 /* iterate over all the peers in the system and find if any of the
595 * peers match the criteria. If they do, clear the preferred list
596 * and add the new list
598 int lncpt = cfs_percpt_number(the_lnet.ln_peer_tables);
599 struct lnet_ud_nid_descr *lp_match = udi->udi_match;
600 struct lnet_peer_table *ptable;
601 struct lnet_peer_net *lpn;
602 struct lnet_peer_ni *lpni;
603 struct lnet_peer *lp;
604 int last_failure = 0;
608 for (cpt = 0; cpt < lncpt; cpt++) {
609 ptable = the_lnet.ln_peer_tables[cpt];
610 list_for_each_entry(lp, &ptable->pt_peer_list, lp_peer_list) {
611 CDEBUG(D_NET, "udsp examining lp %s\n",
612 libcfs_nidstr(&lp->lp_primary_nid));
613 list_for_each_entry(lpn,
616 CDEBUG(D_NET, "udsp examining lpn %s\n",
617 libcfs_net2str(lpn->lpn_net_id));
619 if (LNET_NETTYP(lpn->lpn_net_id) !=
620 lp_match->ud_net_id.udn_net_type)
625 if (!lnet_udsp_apply_rule_on_lpn(udi))
628 list_for_each_entry(lpni,
631 CDEBUG(D_NET, "udsp examining lpni %s\n",
632 libcfs_nidstr(&lpni->lpni_nid));
633 udi->udi_lpni = lpni;
634 rc = lnet_udsp_apply_rule_on_lpni(udi);
646 lnet_udsp_apply_single_policy(struct lnet_udsp *udsp, struct udsp_info *udi,
647 udsp_apply_rule *cbs)
651 if (lnet_udsp_criteria_present(&udsp->udsp_dst) &&
652 lnet_udsp_criteria_present(&udsp->udsp_src)) {
654 if (!cbs[UDSP_APPLY_ON_PEERS])
657 if (udsp->udsp_action_type !=
658 EN_LNET_UDSP_ACTION_PREFERRED_LIST) {
659 CERROR("Bad action type. Expected %d got %d\n",
660 EN_LNET_UDSP_ACTION_PREFERRED_LIST,
661 udsp->udsp_action_type);
664 udi->udi_match = &udsp->udsp_dst;
665 udi->udi_action = &udsp->udsp_src;
666 udi->udi_type = EN_LNET_UDSP_ACTION_PREFERRED_LIST;
667 udi->udi_local = true;
669 CDEBUG(D_NET, "applying udsp (%p) dst->src\n",
671 rc = cbs[UDSP_APPLY_ON_PEERS](udi);
674 } else if (lnet_udsp_criteria_present(&udsp->udsp_dst) &&
675 lnet_udsp_criteria_present(&udsp->udsp_rte)) {
677 if (!cbs[UDSP_APPLY_ON_PEERS])
680 if (udsp->udsp_action_type !=
681 EN_LNET_UDSP_ACTION_PREFERRED_LIST) {
682 CERROR("Bad action type. Expected %d got %d\n",
683 EN_LNET_UDSP_ACTION_PREFERRED_LIST,
684 udsp->udsp_action_type);
688 if (lnet_udsp_criteria_present(&udsp->udsp_src)) {
689 CERROR("only one of src or dst can be specified\n");
692 udi->udi_match = &udsp->udsp_dst;
693 udi->udi_action = &udsp->udsp_rte;
694 udi->udi_type = EN_LNET_UDSP_ACTION_PREFERRED_LIST;
695 udi->udi_local = false;
697 CDEBUG(D_NET, "applying udsp (%p) dst->rte\n",
699 rc = cbs[UDSP_APPLY_ON_PEERS](udi);
702 } else if (lnet_udsp_criteria_present(&udsp->udsp_dst)) {
703 /* destination priority rule */
704 if (!cbs[UDSP_APPLY_ON_PEERS])
707 if (udsp->udsp_action_type !=
708 EN_LNET_UDSP_ACTION_PRIORITY) {
709 CERROR("Bad action type. Expected %d got %d\n",
710 EN_LNET_UDSP_ACTION_PRIORITY,
711 udsp->udsp_action_type);
714 udi->udi_match = &udsp->udsp_dst;
715 udi->udi_type = EN_LNET_UDSP_ACTION_PRIORITY;
716 if (udsp->udsp_action_type !=
717 EN_LNET_UDSP_ACTION_PRIORITY) {
718 udi->udi_priority = 0;
720 udi->udi_priority = udsp->udsp_action.udsp_priority;
722 udi->udi_local = true;
724 CDEBUG(D_NET, "applying udsp (%p) on destination\n",
726 rc = cbs[UDSP_APPLY_ON_PEERS](udi);
729 } else if (lnet_udsp_criteria_present(&udsp->udsp_src)) {
730 /* source priority rule */
731 if (!cbs[UDSP_APPLY_PRIO_ON_NIS])
734 if (udsp->udsp_action_type !=
735 EN_LNET_UDSP_ACTION_PRIORITY) {
736 CERROR("Bad action type. Expected %d got %d\n",
737 EN_LNET_UDSP_ACTION_PRIORITY,
738 udsp->udsp_action_type);
741 udi->udi_match = &udsp->udsp_src;
742 udi->udi_type = EN_LNET_UDSP_ACTION_PRIORITY;
743 if (udsp->udsp_action_type !=
744 EN_LNET_UDSP_ACTION_PRIORITY) {
745 udi->udi_priority = 0;
747 udi->udi_priority = udsp->udsp_action.udsp_priority;
749 udi->udi_local = true;
751 CDEBUG(D_NET, "applying udsp (%p) on source\n",
753 rc = cbs[UDSP_APPLY_PRIO_ON_NIS](udi);
755 CERROR("Bad UDSP policy\n");
763 lnet_udsp_apply_policies_helper(struct lnet_udsp *udsp, struct udsp_info *udi,
764 udsp_apply_rule *cbs)
767 int last_failure = 0;
770 return lnet_udsp_apply_single_policy(udsp, udi, cbs);
772 list_for_each_entry_reverse(udsp,
773 &the_lnet.ln_udsp_list,
775 rc = lnet_udsp_apply_single_policy(udsp, udi, cbs);
784 lnet_udsp_apply_policies_on_ni(struct lnet_ni *ni)
786 struct udsp_info udi;
787 udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
789 memset(&udi, 0, sizeof(udi));
793 cbs[UDSP_APPLY_PRIO_ON_NIS] = lnet_udsp_apply_rule_on_ni;
795 return lnet_udsp_apply_policies_helper(NULL, &udi, cbs);
799 lnet_udsp_apply_policies_on_net(struct lnet_net *net)
801 struct udsp_info udi;
802 udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
804 memset(&udi, 0, sizeof(udi));
808 cbs[UDSP_APPLY_PRIO_ON_NIS] = lnet_udsp_apply_prio_rule_on_net;
809 cbs[UDSP_APPLY_RTE_ON_NETS] = lnet_udsp_apply_rte_rule_on_net;
811 return lnet_udsp_apply_policies_helper(NULL, &udi, cbs);
815 lnet_udsp_apply_policies_on_lpni(struct lnet_peer_ni *lpni)
817 struct udsp_info udi;
818 udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
820 memset(&udi, 0, sizeof(udi));
824 cbs[UDSP_APPLY_ON_PEERS] = lnet_udsp_apply_rule_on_lpni;
826 return lnet_udsp_apply_policies_helper(NULL, &udi, cbs);
830 lnet_udsp_apply_policies_on_lpn(struct lnet_peer_net *lpn)
832 struct udsp_info udi;
833 udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
835 memset(&udi, 0, sizeof(udi));
839 cbs[UDSP_APPLY_ON_PEERS] = lnet_udsp_apply_rule_on_lpn;
841 return lnet_udsp_apply_policies_helper(NULL, &udi, cbs);
845 lnet_udsp_apply_policies(struct lnet_udsp *udsp, bool revert)
848 struct udsp_info udi;
849 udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
851 memset(&udi, 0, sizeof(udi));
853 cbs[UDSP_APPLY_ON_PEERS] = lnet_udsp_apply_rule_on_lpnis;
854 cbs[UDSP_APPLY_PRIO_ON_NIS] = lnet_udsp_apply_rule_on_nis;
855 cbs[UDSP_APPLY_RTE_ON_NETS] = lnet_udsp_apply_rte_rule_on_nets;
857 udi.udi_revert = revert;
859 lnet_net_lock(LNET_LOCK_EX);
860 rc = lnet_udsp_apply_policies_helper(udsp, &udi, cbs);
861 lnet_net_unlock(LNET_LOCK_EX);
867 lnet_udsp_get_policy(int idx)
870 struct lnet_udsp *udsp = NULL;
873 CDEBUG(D_NET, "Get UDSP at idx = %d\n", idx);
878 list_for_each_entry(udsp, &the_lnet.ln_udsp_list, udsp_on_list) {
879 CDEBUG(D_NET, "iterating over upsp %d:%d:%d\n",
880 udsp->udsp_idx, i, idx);
888 CDEBUG(D_NET, "Found UDSP (%p)\n", udsp);
897 lnet_udsp_add_policy(struct lnet_udsp *new, int idx)
899 struct lnet_udsp *udsp;
900 struct lnet_udsp *insert = NULL;
903 list_for_each_entry(udsp, &the_lnet.ln_udsp_list, udsp_on_list) {
904 CDEBUG(D_NET, "found udsp i = %d:%d, idx = %d\n",
905 i, udsp->udsp_idx, idx);
911 if (lnet_udsp_equal(udsp, new)) {
912 if (!lnet_udsp_action_equal(udsp, new) &&
913 udsp->udsp_action_type == EN_LNET_UDSP_ACTION_PRIORITY &&
914 new->udsp_action_type == EN_LNET_UDSP_ACTION_PRIORITY) {
915 udsp->udsp_action.udsp_priority = new->udsp_action.udsp_priority;
916 CDEBUG(D_NET, "udsp: %p index %d updated priority to %d\n",
919 udsp->udsp_action.udsp_priority);
927 list_add(&new->udsp_on_list, insert->udsp_on_list.prev);
929 list_for_each_entry(udsp,
930 &the_lnet.ln_udsp_list,
939 list_add_tail(&new->udsp_on_list, &the_lnet.ln_udsp_list);
943 CDEBUG(D_NET, "udsp: %p added at index %d\n", new, new->udsp_idx);
945 CDEBUG(D_NET, "udsp list:\n");
946 list_for_each_entry(udsp, &the_lnet.ln_udsp_list, udsp_on_list)
947 CDEBUG(D_NET, "udsp %p:%d\n", udsp, udsp->udsp_idx);
953 lnet_udsp_del_policy(int idx)
955 struct lnet_udsp *udsp;
956 struct lnet_udsp *tmp;
957 bool removed = false;
960 lnet_udsp_destroy(false);
964 CDEBUG(D_NET, "del udsp at idx = %d\n", idx);
966 list_for_each_entry_safe(udsp,
968 &the_lnet.ln_udsp_list,
972 if (udsp->udsp_idx == idx && !removed) {
973 list_del_init(&udsp->udsp_on_list);
974 lnet_udsp_apply_policies(udsp, true);
975 lnet_udsp_free(udsp);
984 lnet_udsp_get_ni_info(struct lnet_ioctl_construct_udsp_info *info,
987 struct lnet_nid_list *ne;
988 struct lnet_net *net = ni->ni_net;
993 info->cud_nid_priority = ni->ni_sel_priority;
995 info->cud_net_priority = ni->ni_net->net_sel_priority;
996 list_for_each_entry(ne, &net->net_rtr_pref_nids, nl_list) {
997 if (i < LNET_MAX_SHOW_NUM_NID)
998 info->cud_pref_rtr_nid[i] =
999 lnet_nid_to_nid4(&ne->nl_nid);
1008 lnet_udsp_get_peer_info(struct lnet_ioctl_construct_udsp_info *info,
1009 struct lnet_peer_ni *lpni)
1011 struct lnet_nid_list *ne;
1014 /* peer tree structure needs to be in existence */
1015 LASSERT(lpni && lpni->lpni_peer_net &&
1016 lpni->lpni_peer_net->lpn_peer);
1018 info->cud_nid_priority = lpni->lpni_sel_priority;
1019 CDEBUG(D_NET, "lpni %s has %d pref nids\n",
1020 libcfs_nidstr(&lpni->lpni_nid),
1021 lpni->lpni_pref_nnids);
1022 if (lpni->lpni_pref_nnids == 1) {
1023 info->cud_pref_nid[0] = lnet_nid_to_nid4(&lpni->lpni_pref.nid);
1024 } else if (lpni->lpni_pref_nnids > 1) {
1025 struct list_head *list = &lpni->lpni_pref.nids;
1027 list_for_each_entry(ne, list, nl_list) {
1028 if (i < LNET_MAX_SHOW_NUM_NID)
1029 info->cud_pref_nid[i] =
1030 lnet_nid_to_nid4(&ne->nl_nid);
1038 list_for_each_entry(ne, &lpni->lpni_rtr_pref_nids, nl_list) {
1039 if (i < LNET_MAX_SHOW_NUM_NID)
1040 info->cud_pref_rtr_nid[i] =
1041 lnet_nid_to_nid4(&ne->nl_nid);
1047 info->cud_net_priority = lpni->lpni_peer_net->lpn_sel_priority;
1051 lnet_udsp_get_construct_info(struct lnet_ioctl_construct_udsp_info *info)
1054 struct lnet_peer_ni *lpni;
1057 if (!info->cud_peer) {
1058 ni = lnet_nid2ni_locked(info->cud_nid, 0);
1060 lnet_udsp_get_ni_info(info, ni);
1062 lpni = lnet_find_peer_ni_locked(info->cud_nid);
1064 CDEBUG(D_NET, "nid %s is not found\n",
1065 libcfs_nid2str(info->cud_nid));
1067 lnet_udsp_get_peer_info(info, lpni);
1068 lnet_peer_ni_decref_locked(lpni);
1075 lnet_udsp_alloc(void)
1077 struct lnet_udsp *udsp;
1079 udsp = kmem_cache_alloc(lnet_udsp_cachep, GFP_NOFS | __GFP_ZERO);
1084 INIT_LIST_HEAD(&udsp->udsp_on_list);
1085 INIT_LIST_HEAD(&udsp->udsp_src.ud_addr_range);
1086 INIT_LIST_HEAD(&udsp->udsp_src.ud_net_id.udn_net_num_range);
1087 INIT_LIST_HEAD(&udsp->udsp_dst.ud_addr_range);
1088 INIT_LIST_HEAD(&udsp->udsp_dst.ud_net_id.udn_net_num_range);
1089 INIT_LIST_HEAD(&udsp->udsp_rte.ud_addr_range);
1090 INIT_LIST_HEAD(&udsp->udsp_rte.ud_net_id.udn_net_num_range);
1092 CDEBUG(D_MALLOC, "udsp alloc %p\n", udsp);
1097 lnet_udsp_nid_descr_free(struct lnet_ud_nid_descr *nid_descr)
1099 struct list_head *net_range = &nid_descr->ud_net_id.udn_net_num_range;
1101 if (!lnet_udsp_criteria_present(nid_descr))
1104 /* memory management is a bit tricky here. When we allocate the
1105 * memory to store the NID descriptor we allocate a large buffer
1106 * for all the data, so we need to free the entire buffer at
1107 * once. If the net is present the net_range->next points to that
1108 * buffer otherwise if the ud_addr_range is present then it's the
1109 * ud_addr_range.next
1111 if (!list_empty(net_range))
1112 LIBCFS_FREE(net_range->next, nid_descr->ud_mem_size);
1113 else if (!list_empty(&nid_descr->ud_addr_range))
1114 LIBCFS_FREE(nid_descr->ud_addr_range.next,
1115 nid_descr->ud_mem_size);
1119 lnet_udsp_free(struct lnet_udsp *udsp)
1121 lnet_udsp_nid_descr_free(&udsp->udsp_src);
1122 lnet_udsp_nid_descr_free(&udsp->udsp_dst);
1123 lnet_udsp_nid_descr_free(&udsp->udsp_rte);
1125 CDEBUG(D_MALLOC, "udsp free %p\n", udsp);
1126 kmem_cache_free(lnet_udsp_cachep, udsp);
1130 lnet_udsp_destroy(bool shutdown)
1132 struct lnet_udsp *udsp, *tmp;
1134 CDEBUG(D_NET, "Destroying UDSPs in the system\n");
1136 list_for_each_entry_safe(udsp, tmp, &the_lnet.ln_udsp_list,
1138 list_del(&udsp->udsp_on_list);
1140 lnet_udsp_apply_policies(udsp, true);
1141 lnet_udsp_free(udsp);
1146 lnet_size_marshaled_nid_descr(struct lnet_ud_nid_descr *descr)
1148 struct cfs_expr_list *expr;
1150 int range_count = 0;
1151 size_t size = sizeof(struct lnet_ioctl_udsp_descr);
1153 if (!lnet_udsp_criteria_present(descr))
1156 /* we always have one net expression */
1157 if (!list_empty(&descr->ud_net_id.udn_net_num_range)) {
1158 expr = list_first_entry(&descr->ud_net_id.udn_net_num_range,
1159 struct cfs_expr_list, el_link);
1161 /* count the number of cfs_range_expr in the net expression */
1162 range_count = lnet_get_list_len(&expr->el_exprs);
1165 /* count the number of cfs_range_expr in the address expressions */
1166 list_for_each_entry(expr, &descr->ud_addr_range, el_link) {
1168 range_count += lnet_get_list_len(&expr->el_exprs);
1171 size += (sizeof(struct lnet_expressions) * expr_count);
1172 size += (sizeof(struct lnet_range_expr) * range_count);
1178 lnet_get_udsp_size(struct lnet_udsp *udsp)
1180 size_t size = sizeof(struct lnet_ioctl_udsp);
1182 size += lnet_size_marshaled_nid_descr(&udsp->udsp_src);
1183 size += lnet_size_marshaled_nid_descr(&udsp->udsp_dst);
1184 size += lnet_size_marshaled_nid_descr(&udsp->udsp_rte);
1186 CDEBUG(D_NET, "get udsp (%p) size: %d\n", udsp, (int)size);
1192 copy_exprs(struct cfs_expr_list *expr, void __user **bulk,
1195 struct cfs_range_expr *range;
1196 struct lnet_range_expr range_expr;
1198 /* copy over the net range expressions to the bulk */
1199 list_for_each_entry(range, &expr->el_exprs, re_link) {
1200 range_expr.re_lo = range->re_lo;
1201 range_expr.re_hi = range->re_hi;
1202 range_expr.re_stride = range->re_stride;
1203 CDEBUG(D_NET, "Copy Range %u:%u:%u\n",
1204 range_expr.re_lo, range_expr.re_hi,
1205 range_expr.re_stride);
1206 if (copy_to_user(*bulk, &range_expr, sizeof(range_expr))) {
1207 CDEBUG(D_NET, "Failed to copy range_expr\n");
1210 *bulk += sizeof(range_expr);
1211 *bulk_size -= sizeof(range_expr);
1218 copy_nid_range(struct lnet_ud_nid_descr *nid_descr, char *type,
1219 void __user **bulk, __u32 *bulk_size)
1221 struct lnet_ioctl_udsp_descr ioc_udsp_descr;
1222 struct cfs_expr_list *expr;
1223 struct lnet_expressions ioc_expr;
1228 memset(&ioc_udsp_descr, 0, sizeof(ioc_udsp_descr));
1229 ioc_udsp_descr.iud_src_hdr.ud_descr_type = *(__u32 *)type;
1231 /* if criteria not present, copy over the static part of the NID
1234 if (!lnet_udsp_criteria_present(nid_descr)) {
1235 CDEBUG(D_NET, "Descriptor %u:%u:%u:%u\n",
1236 ioc_udsp_descr.iud_src_hdr.ud_descr_type,
1237 ioc_udsp_descr.iud_src_hdr.ud_descr_count,
1238 ioc_udsp_descr.iud_net.ud_net_type,
1239 ioc_udsp_descr.iud_net.ud_net_num_expr.le_count);
1240 if (copy_to_user(*bulk, &ioc_udsp_descr,
1241 sizeof(ioc_udsp_descr))) {
1242 CDEBUG(D_NET, "failed to copy ioc_udsp_descr\n");
1245 *bulk += sizeof(ioc_udsp_descr);
1246 *bulk_size -= sizeof(ioc_udsp_descr);
1250 expr_count = lnet_get_list_len(&nid_descr->ud_addr_range);
1252 /* copy the net information */
1253 if (!list_empty(&nid_descr->ud_net_id.udn_net_num_range)) {
1254 expr = list_first_entry(&nid_descr->ud_net_id.udn_net_num_range,
1255 struct cfs_expr_list, el_link);
1256 net_expr_count = lnet_get_list_len(&expr->el_exprs);
1261 /* set the total expression count */
1262 ioc_udsp_descr.iud_src_hdr.ud_descr_count = expr_count;
1263 ioc_udsp_descr.iud_net.ud_net_type =
1264 nid_descr->ud_net_id.udn_net_type;
1265 ioc_udsp_descr.iud_net.ud_net_num_expr.le_count = net_expr_count;
1267 CDEBUG(D_NET, "Descriptor %u:%u:%u:%u\n",
1268 ioc_udsp_descr.iud_src_hdr.ud_descr_type,
1269 ioc_udsp_descr.iud_src_hdr.ud_descr_count,
1270 ioc_udsp_descr.iud_net.ud_net_type,
1271 ioc_udsp_descr.iud_net.ud_net_num_expr.le_count);
1273 /* copy over the header info to the bulk */
1274 if (copy_to_user(*bulk, &ioc_udsp_descr, sizeof(ioc_udsp_descr))) {
1275 CDEBUG(D_NET, "Failed to copy data\n");
1278 *bulk += sizeof(ioc_udsp_descr);
1279 *bulk_size -= sizeof(ioc_udsp_descr);
1281 /* copy over the net num expression if it exists */
1282 if (net_expr_count) {
1283 rc = copy_exprs(expr, bulk, bulk_size);
1288 /* copy the address range */
1289 list_for_each_entry(expr, &nid_descr->ud_addr_range, el_link) {
1290 ioc_expr.le_count = lnet_get_list_len(&expr->el_exprs);
1291 if (copy_to_user(*bulk, &ioc_expr, sizeof(ioc_expr))) {
1292 CDEBUG(D_NET, "failex to copy ioc_expr\n");
1295 *bulk += sizeof(ioc_expr);
1296 *bulk_size -= sizeof(ioc_expr);
1298 rc = copy_exprs(expr, bulk, bulk_size);
1307 lnet_udsp_marshal(struct lnet_udsp *udsp, struct lnet_ioctl_udsp *ioc_udsp)
1316 bulk = ioc_udsp->iou_bulk;
1317 bulk_size = ioc_udsp->iou_hdr.ioc_len +
1318 ioc_udsp->iou_bulk_size;
1320 CDEBUG(D_NET, "marshal udsp (%p)\n", udsp);
1321 CDEBUG(D_NET, "MEM -----> bulk: %p:0x%x\n", bulk, bulk_size);
1322 /* make sure user space allocated enough buffer to marshal the
1325 if (bulk_size != lnet_get_udsp_size(udsp)) {
1330 ioc_udsp->iou_idx = udsp->udsp_idx;
1331 ioc_udsp->iou_action_type = udsp->udsp_action_type;
1332 ioc_udsp->iou_action.priority = udsp->udsp_action.udsp_priority;
1334 bulk_size -= sizeof(*ioc_udsp);
1336 rc = copy_nid_range(&udsp->udsp_src, "SRC", &bulk, &bulk_size);
1340 rc = copy_nid_range(&udsp->udsp_dst, "DST", &bulk, &bulk_size);
1344 rc = copy_nid_range(&udsp->udsp_rte, "RTE", &bulk, &bulk_size);
1348 CDEBUG(D_NET, "MEM <----- bulk: %p\n", bulk);
1350 /* we should've consumed the entire buffer */
1351 LASSERT(bulk_size == 0);
1355 CERROR("Failed to marshal udsp: %d\n", rc);
1360 copy_range_info(void **bulk, void **buf, struct list_head *list,
1363 struct lnet_range_expr *range_expr;
1364 struct cfs_range_expr *range;
1365 struct cfs_expr_list *exprs;
1366 int range_count = count;
1369 if (range_count == 0)
1372 if (range_count == -1) {
1373 struct lnet_expressions *e;
1376 range_count = e->le_count;
1377 *bulk += sizeof(*e);
1381 INIT_LIST_HEAD(&exprs->el_link);
1382 INIT_LIST_HEAD(&exprs->el_exprs);
1383 list_add_tail(&exprs->el_link, list);
1384 *buf += sizeof(*exprs);
1386 for (i = 0; i < range_count; i++) {
1389 INIT_LIST_HEAD(&range->re_link);
1390 range->re_lo = range_expr->re_lo;
1391 range->re_hi = range_expr->re_hi;
1392 range->re_stride = range_expr->re_stride;
1393 CDEBUG(D_NET, "Copy Range %u:%u:%u\n",
1397 list_add_tail(&range->re_link, &exprs->el_exprs);
1398 *bulk += sizeof(*range_expr);
1399 *buf += sizeof(*range);
1404 copy_ioc_udsp_descr(struct lnet_ud_nid_descr *nid_descr, char *type,
1405 void **bulk, __u32 *bulk_size)
1407 struct lnet_ioctl_udsp_descr *ioc_nid = *bulk;
1408 struct lnet_expressions *exprs;
1411 int range_count = 0;
1414 int remaining_size = *bulk_size;
1418 size_t range_expr_s = sizeof(struct lnet_range_expr);
1419 size_t lnet_exprs_s = sizeof(struct lnet_expressions);
1421 CDEBUG(D_NET, "%s: bulk = %p:%u\n", type, *bulk, *bulk_size);
1423 /* criteria not present, skip over the static part of the
1424 * bulk, which is included for each NID descriptor
1426 if (ioc_nid->iud_net.ud_net_type == 0) {
1427 remaining_size -= sizeof(*ioc_nid);
1428 if (remaining_size < 0) {
1429 CERROR("Truncated userspace udsp buffer given\n");
1432 *bulk += sizeof(*ioc_nid);
1433 *bulk_size = remaining_size;
1437 descr_type = ioc_nid->iud_src_hdr.ud_descr_type;
1438 if (descr_type != *(__u32 *)type) {
1439 CERROR("Bad NID descriptor type. Expected %s, given %c%c%c\n",
1440 type, (__u8)descr_type, (__u8)(descr_type << 4),
1441 (__u8)(descr_type << 8));
1445 /* calculate the total size to verify we have enough buffer.
1446 * Start of by finding how many ranges there are for the net
1449 range_count = ioc_nid->iud_net.ud_net_num_expr.le_count;
1450 size = sizeof(*ioc_nid) + (range_count * range_expr_s);
1451 remaining_size -= size;
1452 if (remaining_size < 0) {
1453 CERROR("Truncated userspace udsp buffer given\n");
1457 CDEBUG(D_NET, "Total net num ranges in %s: %d:%u\n", type,
1459 /* the number of expressions for the NID. IE 4 for IP, 1 for GNI */
1460 expr_count = ioc_nid->iud_src_hdr.ud_descr_count;
1461 CDEBUG(D_NET, "addr as %d exprs\n", expr_count);
1462 /* point tmp to the beginning of the NID expressions */
1464 for (i = 0; i < expr_count; i++) {
1465 /* get the number of ranges per expression */
1467 range_count += exprs->le_count;
1468 size = (range_expr_s * exprs->le_count) + lnet_exprs_s;
1469 remaining_size -= size;
1470 CDEBUG(D_NET, "expr %d:%d:%u:%d:%d\n", i, exprs->le_count,
1471 size, remaining_size, range_count);
1472 if (remaining_size < 0) {
1473 CERROR("Truncated userspace udsp buffer given\n");
1479 *bulk_size = remaining_size;
1481 /* copy over the net type */
1482 nid_descr->ud_net_id.udn_net_type = ioc_nid->iud_net.ud_net_type;
1484 CDEBUG(D_NET, "%u\n", nid_descr->ud_net_id.udn_net_type);
1486 /* allocate the total memory required to copy this NID descriptor */
1487 alloc_size = (sizeof(struct cfs_expr_list) * (expr_count + 1)) +
1488 (sizeof(struct cfs_range_expr) * (range_count));
1489 LIBCFS_ALLOC(buf, alloc_size);
1493 /* store the amount of memory allocated so we can free it later on */
1494 nid_descr->ud_mem_size = alloc_size;
1496 /* copy over the net number range */
1497 range_count = ioc_nid->iud_net.ud_net_num_expr.le_count;
1498 *bulk += sizeof(*ioc_nid);
1499 CDEBUG(D_NET, "bulk = %p\n", *bulk);
1500 copy_range_info(bulk, &buf, &nid_descr->ud_net_id.udn_net_num_range,
1502 CDEBUG(D_NET, "bulk = %p\n", *bulk);
1504 /* copy over the NID descriptor */
1505 for (i = 0; i < expr_count; i++) {
1506 copy_range_info(bulk, &buf, &nid_descr->ud_addr_range, -1);
1507 CDEBUG(D_NET, "bulk = %p\n", *bulk);
1514 lnet_udsp_demarshal_add(void *bulk, __u32 bulk_size)
1516 struct lnet_ioctl_udsp *ioc_udsp;
1517 struct lnet_udsp *udsp;
1521 if (bulk_size < sizeof(*ioc_udsp))
1524 udsp = lnet_udsp_alloc();
1530 udsp->udsp_action_type = ioc_udsp->iou_action_type;
1531 udsp->udsp_action.udsp_priority = ioc_udsp->iou_action.priority;
1532 idx = ioc_udsp->iou_idx;
1534 CDEBUG(D_NET, "demarshal descr %u:%u:%d:%u\n", udsp->udsp_action_type,
1535 udsp->udsp_action.udsp_priority, idx, bulk_size);
1537 bulk += sizeof(*ioc_udsp);
1538 bulk_size -= sizeof(*ioc_udsp);
1540 rc = copy_ioc_udsp_descr(&udsp->udsp_src, "SRC", &bulk, &bulk_size);
1544 rc = copy_ioc_udsp_descr(&udsp->udsp_dst, "DST", &bulk, &bulk_size);
1548 rc = copy_ioc_udsp_descr(&udsp->udsp_rte, "RTE", &bulk, &bulk_size);
1552 return lnet_udsp_add_policy(udsp, idx);
1555 lnet_udsp_free(udsp);