2 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
4 * Copyright (c) 2011, 2017, Intel Corporation.
6 * Copyright (c) 2018-2020 Data Direct Networks.
8 * This file is part of Lustre, https://wiki.whamcloud.com/
10 * Portals is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Portals is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * version 2 along with this program; If not, see
21 * http://www.gnu.org/licenses/gpl-2.0.html
25 * User Defined Selection Policies (UDSP) are introduced to add
26 * ability of fine traffic control. The policies are instantiated
27 * on LNet constructs and allow preference of some constructs
28 * over others as an extension of the selection algorithm.
29 * The order of operation is defined by the selection algorithm logical flow:
31 * 1. Iterate over all the networks that a peer can be reached on
32 * and select the best local network
33 * - The remote network with the highest priority is examined
35 * - The local network with the highest priority is selected
37 * - The local NI with the highest priority is selected
39 * 2. If the peer is a remote peer and has no local networks,
40 * - then select the remote peer network with the highest priority
42 * - Select the highest priority remote peer_ni on the network selected
44 * - Now that the peer's network and NI are decided, select the router
45 * in round robin from the peer NI's preferred router list.
47 * - Select the highest priority local NI on the local net of the
50 * 3. Otherwise for local peers, select the peer_ni from the peer.
51 * - highest priority peer NI is selected
53 * - Select the peer NI which has the local NI selected on its
57 * Accordingly, the User Interface allows for the following:
58 * - Adding a local network udsp: if multiple local networks are
59 * available, each one can have a priority.
60 * - Adding a local NID udsp: after a local network is chosen,
61 * if there are multiple NIs, each one can have a priority.
62 * - Adding a remote NID udsp: assign priority to a peer NID.
63 * - Adding a NID pair udsp: allows to specify local NIDs
64 * to be added on the list on the specified peer NIs
65 * When selecting a peer NI, the one with the
66 * local NID being used on its list is preferred.
67 * - Adding a Router udsp: similar to the NID pair udsp.
68 * Specified router NIDs are added on the list on the specified peer NIs.
69 * When sending to a remote peer, remote net is selected and the peer NID
70 * is selected. The router which has its nid on the peer NI list
72 * - Deleting a udsp: use the specified policy index to remove it
73 * from the policy list.
75 * Generally, the syntax is as follows
76 * lnetctl policy <add | del | show>
77 * --src: ip2nets syntax specifying the local NID to match
78 * --dst: ip2nets syntax specifying the remote NID to match
79 * --rte: ip2nets syntax specifying the router NID to match
80 * --priority: Priority to apply to rule matches
81 * --idx: Index of where to insert or delete the rule
82 * By default add appends to the end of the rule list
84 * Author: Amir Shehata
87 #include <linux/uaccess.h>
89 #include <lnet/udsp.h>
90 #include <libcfs/libcfs.h>
93 struct lnet_peer_ni *udi_lpni;
94 struct lnet_peer_net *udi_lpn;
95 struct lnet_ni *udi_ni;
96 struct lnet_net *udi_net;
97 struct lnet_ud_nid_descr *udi_match;
98 struct lnet_ud_nid_descr *udi_action;
100 enum lnet_udsp_action_type udi_type;
105 typedef int (*udsp_apply_rule)(struct udsp_info *);
108 UDSP_APPLY_ON_PEERS = 0,
109 UDSP_APPLY_PRIO_ON_NIS = 1,
110 UDSP_APPLY_RTE_ON_NETS = 2,
111 UDSP_APPLY_MAX_ENUM = 3,
114 #define RULE_NOT_APPLICABLE -1
117 lnet_udsp_is_net_rule(struct lnet_ud_nid_descr *match)
119 return list_empty(&match->ud_addr_range);
123 lnet_udsp_expr_list_equal(struct list_head *e1,
124 struct list_head *e2)
126 struct cfs_expr_list *expr1;
127 struct cfs_expr_list *expr2;
128 struct cfs_range_expr *range1, *range2;
130 if (list_empty(e1) && list_empty(e2))
133 if (lnet_get_list_len(e1) != lnet_get_list_len(e2))
136 expr2 = list_first_entry(e2, struct cfs_expr_list, el_link);
138 list_for_each_entry(expr1, e1, el_link) {
139 if (lnet_get_list_len(&expr1->el_exprs) !=
140 lnet_get_list_len(&expr2->el_exprs))
143 range2 = list_first_entry(&expr2->el_exprs,
144 struct cfs_range_expr,
147 list_for_each_entry(range1, &expr1->el_exprs, re_link) {
148 if (range1->re_lo != range2->re_lo ||
149 range1->re_hi != range2->re_hi ||
150 range1->re_stride != range2->re_stride)
152 range2 = list_next_entry(range2, re_link);
154 expr2 = list_next_entry(expr2, el_link);
161 lnet_udsp_nid_descr_equal(struct lnet_ud_nid_descr *e1,
162 struct lnet_ud_nid_descr *e2)
164 if (e1->ud_net_id.udn_net_type != e2->ud_net_id.udn_net_type ||
165 !lnet_udsp_expr_list_equal(&e1->ud_net_id.udn_net_num_range,
166 &e2->ud_net_id.udn_net_num_range) ||
167 !lnet_udsp_expr_list_equal(&e1->ud_addr_range, &e2->ud_addr_range))
174 lnet_udsp_action_equal(struct lnet_udsp *e1, struct lnet_udsp *e2)
176 if (e1->udsp_action_type != e2->udsp_action_type)
179 if (e1->udsp_action_type == EN_LNET_UDSP_ACTION_PRIORITY &&
180 e1->udsp_action.udsp_priority != e2->udsp_action.udsp_priority)
187 lnet_udsp_equal(struct lnet_udsp *e1, struct lnet_udsp *e2)
189 /* check each NID descr */
190 if (!lnet_udsp_nid_descr_equal(&e1->udsp_src, &e2->udsp_src) ||
191 !lnet_udsp_nid_descr_equal(&e1->udsp_dst, &e2->udsp_dst) ||
192 !lnet_udsp_nid_descr_equal(&e1->udsp_rte, &e2->udsp_rte))
198 /* it is enough to look at the net type of the descriptor. If the criteria
199 * is present the net must be specified
202 lnet_udsp_criteria_present(struct lnet_ud_nid_descr *descr)
204 return (descr->ud_net_id.udn_net_type != 0);
208 lnet_udsp_apply_rule_on_ni(struct udsp_info *udi)
211 struct lnet_ni *ni = udi->udi_ni;
212 struct lnet_ud_nid_descr *ni_match = udi->udi_match;
213 __u32 priority = (udi->udi_revert) ? -1 : udi->udi_priority;
215 rc = cfs_match_nid_net(
216 lnet_nid_to_nid4(&ni->ni_nid),
217 ni_match->ud_net_id.udn_net_type,
218 &ni_match->ud_net_id.udn_net_num_range,
219 &ni_match->ud_addr_range);
223 CDEBUG(D_NET, "apply udsp on ni %s\n",
224 libcfs_nidstr(&ni->ni_nid));
226 /* Detected match. Set NIDs priority */
227 lnet_ni_set_sel_priority_locked(ni, priority);
233 lnet_udsp_apply_rte_list_on_net(struct lnet_net *net,
234 struct lnet_ud_nid_descr *rte_action,
237 struct lnet_remotenet *rnet;
238 struct list_head *rn_list;
239 struct lnet_route *route;
240 struct lnet_peer_ni *lpni;
241 bool cleared = false;
242 lnet_nid_t gw_nid, gw_prim_nid;
246 for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
247 rn_list = &the_lnet.ln_remote_nets_hash[i];
248 list_for_each_entry(rnet, rn_list, lrn_list) {
249 list_for_each_entry(route, &rnet->lrn_routes, lr_list) {
250 /* look if gw nid on the same net matches */
251 gw_prim_nid = lnet_nid_to_nid4(
252 &route->lr_gateway->lp_primary_nid);
254 while ((lpni = lnet_get_next_peer_ni_locked(route->lr_gateway,
257 if (!lnet_get_net_locked(lpni->lpni_peer_net->lpn_net_id))
259 gw_nid = lnet_nid_to_nid4(&lpni->lpni_nid);
260 rc = cfs_match_nid_net(gw_nid,
261 rte_action->ud_net_id.udn_net_type,
262 &rte_action->ud_net_id.udn_net_num_range,
263 &rte_action->ud_addr_range);
267 /* match gw primary nid on a remote network */
269 gw_nid = gw_prim_nid;
270 rc = cfs_match_nid_net(gw_nid,
271 rte_action->ud_net_id.udn_net_type,
272 &rte_action->ud_net_id.udn_net_num_range,
273 &rte_action->ud_addr_range);
277 lnet_net_unlock(LNET_LOCK_EX);
278 if (!cleared || revert) {
279 lnet_net_clr_pref_rtrs(net);
282 lnet_net_lock(LNET_LOCK_EX);
286 /* match. Add to pref NIDs */
287 CDEBUG(D_NET, "udsp net->gw: %s->%s\n",
288 libcfs_net2str(net->net_id),
289 libcfs_nid2str(gw_prim_nid));
290 rc = lnet_net_add_pref_rtr(net, gw_prim_nid);
291 lnet_net_lock(LNET_LOCK_EX);
292 /* success if EEXIST return */
293 if (rc && rc != -EEXIST) {
294 CERROR("Failed to add %s to %s pref rtr list\n",
295 libcfs_nid2str(gw_prim_nid),
296 libcfs_net2str(net->net_id));
307 lnet_udsp_apply_rte_rule_on_nets(struct udsp_info *udi)
310 int last_failure = 0;
311 struct lnet_net *net;
312 struct lnet_ud_nid_descr *match = udi->udi_match;
313 struct lnet_ud_nid_descr *rte_action = udi->udi_action;
315 list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
316 if (LNET_NETTYP(net->net_id) != match->ud_net_id.udn_net_type)
319 rc = cfs_match_net(net->net_id,
320 match->ud_net_id.udn_net_type,
321 &match->ud_net_id.udn_net_num_range);
325 CDEBUG(D_NET, "apply rule on %s\n",
326 libcfs_net2str(net->net_id));
327 rc = lnet_udsp_apply_rte_list_on_net(net, rte_action,
337 lnet_udsp_apply_rte_rule_on_net(struct udsp_info *udi)
340 struct lnet_net *net = udi->udi_net;
341 struct lnet_ud_nid_descr *match = udi->udi_match;
342 struct lnet_ud_nid_descr *rte_action = udi->udi_action;
344 rc = cfs_match_net(net->net_id,
345 match->ud_net_id.udn_net_type,
346 &match->ud_net_id.udn_net_num_range);
350 CDEBUG(D_NET, "apply rule on %s\n",
351 libcfs_net2str(net->net_id));
352 rc = lnet_udsp_apply_rte_list_on_net(net, rte_action,
359 lnet_udsp_apply_prio_rule_on_net(struct udsp_info *udi)
362 struct lnet_ud_nid_descr *match = udi->udi_match;
363 struct lnet_net *net = udi->udi_net;
364 __u32 priority = (udi->udi_revert) ? -1 : udi->udi_priority;
366 if (!lnet_udsp_is_net_rule(match))
367 return RULE_NOT_APPLICABLE;
369 rc = cfs_match_net(net->net_id,
370 match->ud_net_id.udn_net_type,
371 &match->ud_net_id.udn_net_num_range);
375 CDEBUG(D_NET, "apply rule on %s\n",
376 libcfs_net2str(net->net_id));
378 lnet_net_set_sel_priority_locked(net, priority);
384 lnet_udsp_apply_rule_on_nis(struct udsp_info *udi)
388 struct lnet_net *net;
389 struct lnet_ud_nid_descr *ni_match = udi->udi_match;
390 int last_failure = 0;
392 list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
393 if (LNET_NETTYP(net->net_id) != ni_match->ud_net_id.udn_net_type)
397 if (!lnet_udsp_apply_prio_rule_on_net(udi))
400 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
402 rc = lnet_udsp_apply_rule_on_ni(udi);
412 lnet_udsp_apply_rte_list_on_lpni(struct lnet_peer_ni *lpni,
413 struct lnet_ud_nid_descr *rte_action,
416 struct lnet_remotenet *rnet;
417 struct list_head *rn_list;
418 struct lnet_route *route;
419 bool cleared = false;
424 for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
425 rn_list = &the_lnet.ln_remote_nets_hash[i];
426 list_for_each_entry(rnet, rn_list, lrn_list) {
427 list_for_each_entry(route, &rnet->lrn_routes, lr_list) {
428 gw_nid = lnet_nid_to_nid4(
429 &route->lr_gateway->lp_primary_nid);
430 rc = cfs_match_nid_net(gw_nid,
431 rte_action->ud_net_id.udn_net_type,
432 &rte_action->ud_net_id.udn_net_num_range,
433 &rte_action->ud_addr_range);
436 lnet_net_unlock(LNET_LOCK_EX);
437 if (!cleared || revert) {
438 CDEBUG(D_NET, "%spref rtr nids from lpni %s\n",
439 (revert) ? "revert " : "clear ",
440 libcfs_nidstr(&lpni->lpni_nid));
441 lnet_peer_clr_pref_rtrs(lpni);
444 lnet_net_lock(LNET_LOCK_EX);
448 CDEBUG(D_NET, "add gw nid %s as preferred for peer %s\n",
449 libcfs_nid2str(gw_nid),
450 libcfs_nidstr(&lpni->lpni_nid));
451 /* match. Add to pref NIDs */
452 rc = lnet_peer_add_pref_rtr(lpni, gw_nid);
453 lnet_net_lock(LNET_LOCK_EX);
454 /* success if EEXIST return */
455 if (rc && rc != -EEXIST) {
456 CERROR("Failed to add %s to %s pref rtr list\n",
457 libcfs_nid2str(gw_nid),
458 libcfs_nidstr(&lpni->lpni_nid));
469 lnet_udsp_apply_ni_list(struct lnet_peer_ni *lpni,
470 struct lnet_ud_nid_descr *ni_action,
475 struct lnet_net *net;
476 bool cleared = false;
478 list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
479 if (LNET_NETTYP(net->net_id) != ni_action->ud_net_id.udn_net_type)
481 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
482 rc = cfs_match_nid_net(
483 lnet_nid_to_nid4(&ni->ni_nid),
484 ni_action->ud_net_id.udn_net_type,
485 &ni_action->ud_net_id.udn_net_num_range,
486 &ni_action->ud_addr_range);
489 lnet_net_unlock(LNET_LOCK_EX);
490 if (!cleared || revert) {
491 lnet_peer_clr_pref_nids(lpni);
492 CDEBUG(D_NET, "%spref nids from lpni %s\n",
493 (revert) ? "revert " : "clear ",
494 libcfs_nidstr(&lpni->lpni_nid));
497 lnet_net_lock(LNET_LOCK_EX);
501 CDEBUG(D_NET, "add nid %s as preferred for peer %s\n",
502 libcfs_nidstr(&ni->ni_nid),
503 libcfs_nidstr(&lpni->lpni_nid));
504 /* match. Add to pref NIDs */
505 rc = lnet_peer_add_pref_nid(
506 lpni, lnet_nid_to_nid4(&ni->ni_nid));
507 lnet_net_lock(LNET_LOCK_EX);
508 /* success if EEXIST return */
509 if (rc && rc != -EEXIST) {
510 CERROR("Failed to add %s to %s pref nid list\n",
511 libcfs_nidstr(&ni->ni_nid),
512 libcfs_nidstr(&lpni->lpni_nid));
522 lnet_udsp_apply_rule_on_lpni(struct udsp_info *udi)
525 struct lnet_peer_ni *lpni = udi->udi_lpni;
526 struct lnet_ud_nid_descr *lp_match = udi->udi_match;
527 struct lnet_ud_nid_descr *action = udi->udi_action;
528 __u32 priority = (udi->udi_revert) ? -1 : udi->udi_priority;
529 bool local = udi->udi_local;
530 enum lnet_udsp_action_type type = udi->udi_type;
532 rc = cfs_match_nid_net(lnet_nid_to_nid4(&lpni->lpni_nid),
533 lp_match->ud_net_id.udn_net_type,
534 &lp_match->ud_net_id.udn_net_num_range,
535 &lp_match->ud_addr_range);
537 /* check if looking for a net match */
539 (lnet_get_list_len(&lp_match->ud_addr_range) ||
540 !cfs_match_net(udi->udi_lpn->lpn_net_id,
541 lp_match->ud_net_id.udn_net_type,
542 &lp_match->ud_net_id.udn_net_num_range))) {
546 if (type == EN_LNET_UDSP_ACTION_PREFERRED_LIST && local) {
547 rc = lnet_udsp_apply_ni_list(lpni, action,
551 } else if (type == EN_LNET_UDSP_ACTION_PREFERRED_LIST &&
553 rc = lnet_udsp_apply_rte_list_on_lpni(lpni, action,
558 lnet_peer_ni_set_selection_priority(lpni, priority);
565 lnet_udsp_apply_rule_on_lpn(struct udsp_info *udi)
568 struct lnet_ud_nid_descr *match = udi->udi_match;
569 struct lnet_peer_net *lpn = udi->udi_lpn;
570 __u32 priority = (udi->udi_revert) ? -1 : udi->udi_priority;
572 if (udi->udi_type == EN_LNET_UDSP_ACTION_PREFERRED_LIST ||
573 !lnet_udsp_is_net_rule(match))
574 return RULE_NOT_APPLICABLE;
576 rc = cfs_match_net(lpn->lpn_net_id,
577 match->ud_net_id.udn_net_type,
578 &match->ud_net_id.udn_net_num_range);
582 CDEBUG(D_NET, "apply rule on lpn %s\n",
583 libcfs_net2str(lpn->lpn_net_id));
584 lnet_peer_net_set_sel_priority_locked(lpn, priority);
590 lnet_udsp_apply_rule_on_lpnis(struct udsp_info *udi)
592 /* iterate over all the peers in the system and find if any of the
593 * peers match the criteria. If they do, clear the preferred list
594 * and add the new list
596 int lncpt = cfs_percpt_number(the_lnet.ln_peer_tables);
597 struct lnet_ud_nid_descr *lp_match = udi->udi_match;
598 struct lnet_peer_table *ptable;
599 struct lnet_peer_net *lpn;
600 struct lnet_peer_ni *lpni;
601 struct lnet_peer *lp;
602 int last_failure = 0;
606 for (cpt = 0; cpt < lncpt; cpt++) {
607 ptable = the_lnet.ln_peer_tables[cpt];
608 list_for_each_entry(lp, &ptable->pt_peer_list, lp_peer_list) {
609 CDEBUG(D_NET, "udsp examining lp %s\n",
610 libcfs_nidstr(&lp->lp_primary_nid));
611 list_for_each_entry(lpn,
614 CDEBUG(D_NET, "udsp examining lpn %s\n",
615 libcfs_net2str(lpn->lpn_net_id));
617 if (LNET_NETTYP(lpn->lpn_net_id) !=
618 lp_match->ud_net_id.udn_net_type)
623 if (!lnet_udsp_apply_rule_on_lpn(udi))
626 list_for_each_entry(lpni,
629 CDEBUG(D_NET, "udsp examining lpni %s\n",
630 libcfs_nidstr(&lpni->lpni_nid));
631 udi->udi_lpni = lpni;
632 rc = lnet_udsp_apply_rule_on_lpni(udi);
644 lnet_udsp_apply_single_policy(struct lnet_udsp *udsp, struct udsp_info *udi,
645 udsp_apply_rule *cbs)
649 if (lnet_udsp_criteria_present(&udsp->udsp_dst) &&
650 lnet_udsp_criteria_present(&udsp->udsp_src)) {
652 if (!cbs[UDSP_APPLY_ON_PEERS])
655 if (udsp->udsp_action_type !=
656 EN_LNET_UDSP_ACTION_PREFERRED_LIST) {
657 CERROR("Bad action type. Expected %d got %d\n",
658 EN_LNET_UDSP_ACTION_PREFERRED_LIST,
659 udsp->udsp_action_type);
662 udi->udi_match = &udsp->udsp_dst;
663 udi->udi_action = &udsp->udsp_src;
664 udi->udi_type = EN_LNET_UDSP_ACTION_PREFERRED_LIST;
665 udi->udi_local = true;
667 CDEBUG(D_NET, "applying udsp (%p) dst->src\n",
669 rc = cbs[UDSP_APPLY_ON_PEERS](udi);
672 } else if (lnet_udsp_criteria_present(&udsp->udsp_dst) &&
673 lnet_udsp_criteria_present(&udsp->udsp_rte)) {
675 if (!cbs[UDSP_APPLY_ON_PEERS])
678 if (udsp->udsp_action_type !=
679 EN_LNET_UDSP_ACTION_PREFERRED_LIST) {
680 CERROR("Bad action type. Expected %d got %d\n",
681 EN_LNET_UDSP_ACTION_PREFERRED_LIST,
682 udsp->udsp_action_type);
686 if (lnet_udsp_criteria_present(&udsp->udsp_src)) {
687 CERROR("only one of src or dst can be specified\n");
690 udi->udi_match = &udsp->udsp_dst;
691 udi->udi_action = &udsp->udsp_rte;
692 udi->udi_type = EN_LNET_UDSP_ACTION_PREFERRED_LIST;
693 udi->udi_local = false;
695 CDEBUG(D_NET, "applying udsp (%p) dst->rte\n",
697 rc = cbs[UDSP_APPLY_ON_PEERS](udi);
700 } else if (lnet_udsp_criteria_present(&udsp->udsp_dst)) {
701 /* destination priority rule */
702 if (!cbs[UDSP_APPLY_ON_PEERS])
705 if (udsp->udsp_action_type !=
706 EN_LNET_UDSP_ACTION_PRIORITY) {
707 CERROR("Bad action type. Expected %d got %d\n",
708 EN_LNET_UDSP_ACTION_PRIORITY,
709 udsp->udsp_action_type);
712 udi->udi_match = &udsp->udsp_dst;
713 udi->udi_type = EN_LNET_UDSP_ACTION_PRIORITY;
714 if (udsp->udsp_action_type !=
715 EN_LNET_UDSP_ACTION_PRIORITY) {
716 udi->udi_priority = 0;
718 udi->udi_priority = udsp->udsp_action.udsp_priority;
720 udi->udi_local = true;
722 CDEBUG(D_NET, "applying udsp (%p) on destination\n",
724 rc = cbs[UDSP_APPLY_ON_PEERS](udi);
727 } else if (lnet_udsp_criteria_present(&udsp->udsp_src)) {
728 /* source priority rule */
729 if (!cbs[UDSP_APPLY_PRIO_ON_NIS])
732 if (udsp->udsp_action_type !=
733 EN_LNET_UDSP_ACTION_PRIORITY) {
734 CERROR("Bad action type. Expected %d got %d\n",
735 EN_LNET_UDSP_ACTION_PRIORITY,
736 udsp->udsp_action_type);
739 udi->udi_match = &udsp->udsp_src;
740 udi->udi_type = EN_LNET_UDSP_ACTION_PRIORITY;
741 if (udsp->udsp_action_type !=
742 EN_LNET_UDSP_ACTION_PRIORITY) {
743 udi->udi_priority = 0;
745 udi->udi_priority = udsp->udsp_action.udsp_priority;
747 udi->udi_local = true;
749 CDEBUG(D_NET, "applying udsp (%p) on source\n",
751 rc = cbs[UDSP_APPLY_PRIO_ON_NIS](udi);
753 CERROR("Bad UDSP policy\n");
761 lnet_udsp_apply_policies_helper(struct lnet_udsp *udsp, struct udsp_info *udi,
762 udsp_apply_rule *cbs)
765 int last_failure = 0;
768 return lnet_udsp_apply_single_policy(udsp, udi, cbs);
770 list_for_each_entry_reverse(udsp,
771 &the_lnet.ln_udsp_list,
773 rc = lnet_udsp_apply_single_policy(udsp, udi, cbs);
782 lnet_udsp_apply_policies_on_ni(struct lnet_ni *ni)
784 struct udsp_info udi;
785 udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
787 memset(&udi, 0, sizeof(udi));
791 cbs[UDSP_APPLY_PRIO_ON_NIS] = lnet_udsp_apply_rule_on_ni;
793 return lnet_udsp_apply_policies_helper(NULL, &udi, cbs);
797 lnet_udsp_apply_policies_on_net(struct lnet_net *net)
799 struct udsp_info udi;
800 udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
802 memset(&udi, 0, sizeof(udi));
806 cbs[UDSP_APPLY_PRIO_ON_NIS] = lnet_udsp_apply_prio_rule_on_net;
807 cbs[UDSP_APPLY_RTE_ON_NETS] = lnet_udsp_apply_rte_rule_on_net;
809 return lnet_udsp_apply_policies_helper(NULL, &udi, cbs);
813 lnet_udsp_apply_policies_on_lpni(struct lnet_peer_ni *lpni)
815 struct udsp_info udi;
816 udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
818 memset(&udi, 0, sizeof(udi));
822 cbs[UDSP_APPLY_ON_PEERS] = lnet_udsp_apply_rule_on_lpni;
824 return lnet_udsp_apply_policies_helper(NULL, &udi, cbs);
828 lnet_udsp_apply_policies_on_lpn(struct lnet_peer_net *lpn)
830 struct udsp_info udi;
831 udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
833 memset(&udi, 0, sizeof(udi));
837 cbs[UDSP_APPLY_ON_PEERS] = lnet_udsp_apply_rule_on_lpn;
839 return lnet_udsp_apply_policies_helper(NULL, &udi, cbs);
843 lnet_udsp_apply_policies(struct lnet_udsp *udsp, bool revert)
846 struct udsp_info udi;
847 udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
849 memset(&udi, 0, sizeof(udi));
851 cbs[UDSP_APPLY_ON_PEERS] = lnet_udsp_apply_rule_on_lpnis;
852 cbs[UDSP_APPLY_PRIO_ON_NIS] = lnet_udsp_apply_rule_on_nis;
853 cbs[UDSP_APPLY_RTE_ON_NETS] = lnet_udsp_apply_rte_rule_on_nets;
855 udi.udi_revert = revert;
857 lnet_net_lock(LNET_LOCK_EX);
858 rc = lnet_udsp_apply_policies_helper(udsp, &udi, cbs);
859 lnet_net_unlock(LNET_LOCK_EX);
865 lnet_udsp_get_policy(int idx)
868 struct lnet_udsp *udsp = NULL;
871 CDEBUG(D_NET, "Get UDSP at idx = %d\n", idx);
876 list_for_each_entry(udsp, &the_lnet.ln_udsp_list, udsp_on_list) {
877 CDEBUG(D_NET, "iterating over upsp %d:%d:%d\n",
878 udsp->udsp_idx, i, idx);
886 CDEBUG(D_NET, "Found UDSP (%p)\n", udsp);
895 lnet_udsp_add_policy(struct lnet_udsp *new, int idx)
897 struct lnet_udsp *udsp;
898 struct lnet_udsp *insert = NULL;
901 list_for_each_entry(udsp, &the_lnet.ln_udsp_list, udsp_on_list) {
902 CDEBUG(D_NET, "found udsp i = %d:%d, idx = %d\n",
903 i, udsp->udsp_idx, idx);
909 if (lnet_udsp_equal(udsp, new)) {
910 if (!lnet_udsp_action_equal(udsp, new) &&
911 udsp->udsp_action_type == EN_LNET_UDSP_ACTION_PRIORITY &&
912 new->udsp_action_type == EN_LNET_UDSP_ACTION_PRIORITY) {
913 udsp->udsp_action.udsp_priority = new->udsp_action.udsp_priority;
914 CDEBUG(D_NET, "udsp: %p index %d updated priority to %d\n",
917 udsp->udsp_action.udsp_priority);
925 list_add(&new->udsp_on_list, insert->udsp_on_list.prev);
927 list_for_each_entry(udsp,
928 &the_lnet.ln_udsp_list,
937 list_add_tail(&new->udsp_on_list, &the_lnet.ln_udsp_list);
941 CDEBUG(D_NET, "udsp: %p added at index %d\n", new, new->udsp_idx);
943 CDEBUG(D_NET, "udsp list:\n");
944 list_for_each_entry(udsp, &the_lnet.ln_udsp_list, udsp_on_list)
945 CDEBUG(D_NET, "udsp %p:%d\n", udsp, udsp->udsp_idx);
951 lnet_udsp_del_policy(int idx)
953 struct lnet_udsp *udsp;
954 struct lnet_udsp *tmp;
955 bool removed = false;
958 lnet_udsp_destroy(false);
962 CDEBUG(D_NET, "del udsp at idx = %d\n", idx);
964 list_for_each_entry_safe(udsp,
966 &the_lnet.ln_udsp_list,
970 if (udsp->udsp_idx == idx && !removed) {
971 list_del_init(&udsp->udsp_on_list);
972 lnet_udsp_apply_policies(udsp, true);
973 lnet_udsp_free(udsp);
982 lnet_udsp_get_ni_info(struct lnet_ioctl_construct_udsp_info *info,
985 struct lnet_nid_list *ne;
986 struct lnet_net *net = ni->ni_net;
991 info->cud_nid_priority = ni->ni_sel_priority;
993 info->cud_net_priority = ni->ni_net->net_sel_priority;
994 list_for_each_entry(ne, &net->net_rtr_pref_nids, nl_list) {
995 if (i < LNET_MAX_SHOW_NUM_NID)
996 info->cud_pref_rtr_nid[i] = ne->nl_nid;
1005 lnet_udsp_get_peer_info(struct lnet_ioctl_construct_udsp_info *info,
1006 struct lnet_peer_ni *lpni)
1008 struct lnet_nid_list *ne;
1011 /* peer tree structure needs to be in existence */
1012 LASSERT(lpni && lpni->lpni_peer_net &&
1013 lpni->lpni_peer_net->lpn_peer);
1015 info->cud_nid_priority = lpni->lpni_sel_priority;
1016 CDEBUG(D_NET, "lpni %s has %d pref nids\n",
1017 libcfs_nidstr(&lpni->lpni_nid),
1018 lpni->lpni_pref_nnids);
1019 if (lpni->lpni_pref_nnids == 1) {
1020 info->cud_pref_nid[0] = lpni->lpni_pref.nid;
1021 } else if (lpni->lpni_pref_nnids > 1) {
1022 struct list_head *list = &lpni->lpni_pref.nids;
1024 list_for_each_entry(ne, list, nl_list) {
1025 if (i < LNET_MAX_SHOW_NUM_NID)
1026 info->cud_pref_nid[i] = ne->nl_nid;
1034 list_for_each_entry(ne, &lpni->lpni_rtr_pref_nids, nl_list) {
1035 if (i < LNET_MAX_SHOW_NUM_NID)
1036 info->cud_pref_rtr_nid[i] = ne->nl_nid;
1042 info->cud_net_priority = lpni->lpni_peer_net->lpn_sel_priority;
1046 lnet_udsp_get_construct_info(struct lnet_ioctl_construct_udsp_info *info)
1049 struct lnet_peer_ni *lpni;
1052 if (!info->cud_peer) {
1053 ni = lnet_nid2ni_locked(info->cud_nid, 0);
1055 lnet_udsp_get_ni_info(info, ni);
1057 lpni = lnet_find_peer_ni_locked(info->cud_nid);
1059 CDEBUG(D_NET, "nid %s is not found\n",
1060 libcfs_nid2str(info->cud_nid));
1062 lnet_udsp_get_peer_info(info, lpni);
1063 lnet_peer_ni_decref_locked(lpni);
1070 lnet_udsp_alloc(void)
1072 struct lnet_udsp *udsp;
1074 udsp = kmem_cache_alloc(lnet_udsp_cachep, GFP_NOFS | __GFP_ZERO);
1079 INIT_LIST_HEAD(&udsp->udsp_on_list);
1080 INIT_LIST_HEAD(&udsp->udsp_src.ud_addr_range);
1081 INIT_LIST_HEAD(&udsp->udsp_src.ud_net_id.udn_net_num_range);
1082 INIT_LIST_HEAD(&udsp->udsp_dst.ud_addr_range);
1083 INIT_LIST_HEAD(&udsp->udsp_dst.ud_net_id.udn_net_num_range);
1084 INIT_LIST_HEAD(&udsp->udsp_rte.ud_addr_range);
1085 INIT_LIST_HEAD(&udsp->udsp_rte.ud_net_id.udn_net_num_range);
1087 CDEBUG(D_MALLOC, "udsp alloc %p\n", udsp);
1092 lnet_udsp_nid_descr_free(struct lnet_ud_nid_descr *nid_descr)
1094 struct list_head *net_range = &nid_descr->ud_net_id.udn_net_num_range;
1096 if (!lnet_udsp_criteria_present(nid_descr))
1099 /* memory management is a bit tricky here. When we allocate the
1100 * memory to store the NID descriptor we allocate a large buffer
1101 * for all the data, so we need to free the entire buffer at
1102 * once. If the net is present the net_range->next points to that
1103 * buffer otherwise if the ud_addr_range is present then it's the
1104 * ud_addr_range.next
1106 if (!list_empty(net_range))
1107 LIBCFS_FREE(net_range->next, nid_descr->ud_mem_size);
1108 else if (!list_empty(&nid_descr->ud_addr_range))
1109 LIBCFS_FREE(nid_descr->ud_addr_range.next,
1110 nid_descr->ud_mem_size);
1114 lnet_udsp_free(struct lnet_udsp *udsp)
1116 lnet_udsp_nid_descr_free(&udsp->udsp_src);
1117 lnet_udsp_nid_descr_free(&udsp->udsp_dst);
1118 lnet_udsp_nid_descr_free(&udsp->udsp_rte);
1120 CDEBUG(D_MALLOC, "udsp free %p\n", udsp);
1121 kmem_cache_free(lnet_udsp_cachep, udsp);
1125 lnet_udsp_destroy(bool shutdown)
1127 struct lnet_udsp *udsp, *tmp;
1129 CDEBUG(D_NET, "Destroying UDSPs in the system\n");
1131 list_for_each_entry_safe(udsp, tmp, &the_lnet.ln_udsp_list,
1133 list_del(&udsp->udsp_on_list);
1135 lnet_udsp_apply_policies(udsp, true);
1136 lnet_udsp_free(udsp);
1141 lnet_size_marshaled_nid_descr(struct lnet_ud_nid_descr *descr)
1143 struct cfs_expr_list *expr;
1145 int range_count = 0;
1146 size_t size = sizeof(struct lnet_ioctl_udsp_descr);
1148 if (!lnet_udsp_criteria_present(descr))
1151 /* we always have one net expression */
1152 if (!list_empty(&descr->ud_net_id.udn_net_num_range)) {
1153 expr = list_first_entry(&descr->ud_net_id.udn_net_num_range,
1154 struct cfs_expr_list, el_link);
1156 /* count the number of cfs_range_expr in the net expression */
1157 range_count = lnet_get_list_len(&expr->el_exprs);
1160 /* count the number of cfs_range_expr in the address expressions */
1161 list_for_each_entry(expr, &descr->ud_addr_range, el_link) {
1163 range_count += lnet_get_list_len(&expr->el_exprs);
1166 size += (sizeof(struct lnet_expressions) * expr_count);
1167 size += (sizeof(struct lnet_range_expr) * range_count);
1173 lnet_get_udsp_size(struct lnet_udsp *udsp)
1175 size_t size = sizeof(struct lnet_ioctl_udsp);
1177 size += lnet_size_marshaled_nid_descr(&udsp->udsp_src);
1178 size += lnet_size_marshaled_nid_descr(&udsp->udsp_dst);
1179 size += lnet_size_marshaled_nid_descr(&udsp->udsp_rte);
1181 CDEBUG(D_NET, "get udsp (%p) size: %d\n", udsp, (int)size);
1187 copy_exprs(struct cfs_expr_list *expr, void __user **bulk,
1190 struct cfs_range_expr *range;
1191 struct lnet_range_expr range_expr;
1193 /* copy over the net range expressions to the bulk */
1194 list_for_each_entry(range, &expr->el_exprs, re_link) {
1195 range_expr.re_lo = range->re_lo;
1196 range_expr.re_hi = range->re_hi;
1197 range_expr.re_stride = range->re_stride;
1198 CDEBUG(D_NET, "Copy Range %u:%u:%u\n",
1199 range_expr.re_lo, range_expr.re_hi,
1200 range_expr.re_stride);
1201 if (copy_to_user(*bulk, &range_expr, sizeof(range_expr))) {
1202 CDEBUG(D_NET, "Failed to copy range_expr\n");
1205 *bulk += sizeof(range_expr);
1206 *bulk_size -= sizeof(range_expr);
1213 copy_nid_range(struct lnet_ud_nid_descr *nid_descr, char *type,
1214 void __user **bulk, __u32 *bulk_size)
1216 struct lnet_ioctl_udsp_descr ioc_udsp_descr;
1217 struct cfs_expr_list *expr;
1218 struct lnet_expressions ioc_expr;
1223 memset(&ioc_udsp_descr, 0, sizeof(ioc_udsp_descr));
1224 ioc_udsp_descr.iud_src_hdr.ud_descr_type = *(__u32 *)type;
1226 /* if criteria not present, copy over the static part of the NID
1229 if (!lnet_udsp_criteria_present(nid_descr)) {
1230 CDEBUG(D_NET, "Descriptor %u:%u:%u:%u\n",
1231 ioc_udsp_descr.iud_src_hdr.ud_descr_type,
1232 ioc_udsp_descr.iud_src_hdr.ud_descr_count,
1233 ioc_udsp_descr.iud_net.ud_net_type,
1234 ioc_udsp_descr.iud_net.ud_net_num_expr.le_count);
1235 if (copy_to_user(*bulk, &ioc_udsp_descr,
1236 sizeof(ioc_udsp_descr))) {
1237 CDEBUG(D_NET, "failed to copy ioc_udsp_descr\n");
1240 *bulk += sizeof(ioc_udsp_descr);
1241 *bulk_size -= sizeof(ioc_udsp_descr);
1245 expr_count = lnet_get_list_len(&nid_descr->ud_addr_range);
1247 /* copy the net information */
1248 if (!list_empty(&nid_descr->ud_net_id.udn_net_num_range)) {
1249 expr = list_first_entry(&nid_descr->ud_net_id.udn_net_num_range,
1250 struct cfs_expr_list, el_link);
1251 net_expr_count = lnet_get_list_len(&expr->el_exprs);
1256 /* set the total expression count */
1257 ioc_udsp_descr.iud_src_hdr.ud_descr_count = expr_count;
1258 ioc_udsp_descr.iud_net.ud_net_type =
1259 nid_descr->ud_net_id.udn_net_type;
1260 ioc_udsp_descr.iud_net.ud_net_num_expr.le_count = net_expr_count;
1262 CDEBUG(D_NET, "Descriptor %u:%u:%u:%u\n",
1263 ioc_udsp_descr.iud_src_hdr.ud_descr_type,
1264 ioc_udsp_descr.iud_src_hdr.ud_descr_count,
1265 ioc_udsp_descr.iud_net.ud_net_type,
1266 ioc_udsp_descr.iud_net.ud_net_num_expr.le_count);
1268 /* copy over the header info to the bulk */
1269 if (copy_to_user(*bulk, &ioc_udsp_descr, sizeof(ioc_udsp_descr))) {
1270 CDEBUG(D_NET, "Failed to copy data\n");
1273 *bulk += sizeof(ioc_udsp_descr);
1274 *bulk_size -= sizeof(ioc_udsp_descr);
1276 /* copy over the net num expression if it exists */
1277 if (net_expr_count) {
1278 rc = copy_exprs(expr, bulk, bulk_size);
1283 /* copy the address range */
1284 list_for_each_entry(expr, &nid_descr->ud_addr_range, el_link) {
1285 ioc_expr.le_count = lnet_get_list_len(&expr->el_exprs);
1286 if (copy_to_user(*bulk, &ioc_expr, sizeof(ioc_expr))) {
1287 CDEBUG(D_NET, "failex to copy ioc_expr\n");
1290 *bulk += sizeof(ioc_expr);
1291 *bulk_size -= sizeof(ioc_expr);
1293 rc = copy_exprs(expr, bulk, bulk_size);
1302 lnet_udsp_marshal(struct lnet_udsp *udsp, struct lnet_ioctl_udsp *ioc_udsp)
1311 bulk = ioc_udsp->iou_bulk;
1312 bulk_size = ioc_udsp->iou_hdr.ioc_len +
1313 ioc_udsp->iou_bulk_size;
1315 CDEBUG(D_NET, "marshal udsp (%p)\n", udsp);
1316 CDEBUG(D_NET, "MEM -----> bulk: %p:0x%x\n", bulk, bulk_size);
1317 /* make sure user space allocated enough buffer to marshal the
1320 if (bulk_size != lnet_get_udsp_size(udsp)) {
1325 ioc_udsp->iou_idx = udsp->udsp_idx;
1326 ioc_udsp->iou_action_type = udsp->udsp_action_type;
1327 ioc_udsp->iou_action.priority = udsp->udsp_action.udsp_priority;
1329 bulk_size -= sizeof(*ioc_udsp);
1331 rc = copy_nid_range(&udsp->udsp_src, "SRC", &bulk, &bulk_size);
1335 rc = copy_nid_range(&udsp->udsp_dst, "DST", &bulk, &bulk_size);
1339 rc = copy_nid_range(&udsp->udsp_rte, "RTE", &bulk, &bulk_size);
1343 CDEBUG(D_NET, "MEM <----- bulk: %p\n", bulk);
1345 /* we should've consumed the entire buffer */
1346 LASSERT(bulk_size == 0);
1350 CERROR("Failed to marshal udsp: %d\n", rc);
1355 copy_range_info(void **bulk, void **buf, struct list_head *list,
1358 struct lnet_range_expr *range_expr;
1359 struct cfs_range_expr *range;
1360 struct cfs_expr_list *exprs;
1361 int range_count = count;
1364 if (range_count == 0)
1367 if (range_count == -1) {
1368 struct lnet_expressions *e;
1371 range_count = e->le_count;
1372 *bulk += sizeof(*e);
1376 INIT_LIST_HEAD(&exprs->el_link);
1377 INIT_LIST_HEAD(&exprs->el_exprs);
1378 list_add_tail(&exprs->el_link, list);
1379 *buf += sizeof(*exprs);
1381 for (i = 0; i < range_count; i++) {
1384 INIT_LIST_HEAD(&range->re_link);
1385 range->re_lo = range_expr->re_lo;
1386 range->re_hi = range_expr->re_hi;
1387 range->re_stride = range_expr->re_stride;
1388 CDEBUG(D_NET, "Copy Range %u:%u:%u\n",
1392 list_add_tail(&range->re_link, &exprs->el_exprs);
1393 *bulk += sizeof(*range_expr);
1394 *buf += sizeof(*range);
1399 copy_ioc_udsp_descr(struct lnet_ud_nid_descr *nid_descr, char *type,
1400 void **bulk, __u32 *bulk_size)
1402 struct lnet_ioctl_udsp_descr *ioc_nid = *bulk;
1403 struct lnet_expressions *exprs;
1406 int range_count = 0;
1409 int remaining_size = *bulk_size;
1413 size_t range_expr_s = sizeof(struct lnet_range_expr);
1414 size_t lnet_exprs_s = sizeof(struct lnet_expressions);
1416 CDEBUG(D_NET, "%s: bulk = %p:%u\n", type, *bulk, *bulk_size);
1418 /* criteria not present, skip over the static part of the
1419 * bulk, which is included for each NID descriptor
1421 if (ioc_nid->iud_net.ud_net_type == 0) {
1422 remaining_size -= sizeof(*ioc_nid);
1423 if (remaining_size < 0) {
1424 CERROR("Truncated userspace udsp buffer given\n");
1427 *bulk += sizeof(*ioc_nid);
1428 *bulk_size = remaining_size;
1432 descr_type = ioc_nid->iud_src_hdr.ud_descr_type;
1433 if (descr_type != *(__u32 *)type) {
1434 CERROR("Bad NID descriptor type. Expected %s, given %c%c%c\n",
1435 type, (__u8)descr_type, (__u8)(descr_type << 4),
1436 (__u8)(descr_type << 8));
1440 /* calculate the total size to verify we have enough buffer.
1441 * Start of by finding how many ranges there are for the net
1444 range_count = ioc_nid->iud_net.ud_net_num_expr.le_count;
1445 size = sizeof(*ioc_nid) + (range_count * range_expr_s);
1446 remaining_size -= size;
1447 if (remaining_size < 0) {
1448 CERROR("Truncated userspace udsp buffer given\n");
1452 CDEBUG(D_NET, "Total net num ranges in %s: %d:%u\n", type,
1454 /* the number of expressions for the NID. IE 4 for IP, 1 for GNI */
1455 expr_count = ioc_nid->iud_src_hdr.ud_descr_count;
1456 CDEBUG(D_NET, "addr as %d exprs\n", expr_count);
1457 /* point tmp to the beginning of the NID expressions */
1459 for (i = 0; i < expr_count; i++) {
1460 /* get the number of ranges per expression */
1462 range_count += exprs->le_count;
1463 size = (range_expr_s * exprs->le_count) + lnet_exprs_s;
1464 remaining_size -= size;
1465 CDEBUG(D_NET, "expr %d:%d:%u:%d:%d\n", i, exprs->le_count,
1466 size, remaining_size, range_count);
1467 if (remaining_size < 0) {
1468 CERROR("Truncated userspace udsp buffer given\n");
1474 *bulk_size = remaining_size;
1476 /* copy over the net type */
1477 nid_descr->ud_net_id.udn_net_type = ioc_nid->iud_net.ud_net_type;
1479 CDEBUG(D_NET, "%u\n", nid_descr->ud_net_id.udn_net_type);
1481 /* allocate the total memory required to copy this NID descriptor */
1482 alloc_size = (sizeof(struct cfs_expr_list) * (expr_count + 1)) +
1483 (sizeof(struct cfs_range_expr) * (range_count));
1484 LIBCFS_ALLOC(buf, alloc_size);
1488 /* store the amount of memory allocated so we can free it later on */
1489 nid_descr->ud_mem_size = alloc_size;
1491 /* copy over the net number range */
1492 range_count = ioc_nid->iud_net.ud_net_num_expr.le_count;
1493 *bulk += sizeof(*ioc_nid);
1494 CDEBUG(D_NET, "bulk = %p\n", *bulk);
1495 copy_range_info(bulk, &buf, &nid_descr->ud_net_id.udn_net_num_range,
1497 CDEBUG(D_NET, "bulk = %p\n", *bulk);
1499 /* copy over the NID descriptor */
1500 for (i = 0; i < expr_count; i++) {
1501 copy_range_info(bulk, &buf, &nid_descr->ud_addr_range, -1);
1502 CDEBUG(D_NET, "bulk = %p\n", *bulk);
1509 lnet_udsp_demarshal_add(void *bulk, __u32 bulk_size)
1511 struct lnet_ioctl_udsp *ioc_udsp;
1512 struct lnet_udsp *udsp;
1516 if (bulk_size < sizeof(*ioc_udsp))
1519 udsp = lnet_udsp_alloc();
1525 udsp->udsp_action_type = ioc_udsp->iou_action_type;
1526 udsp->udsp_action.udsp_priority = ioc_udsp->iou_action.priority;
1527 idx = ioc_udsp->iou_idx;
1529 CDEBUG(D_NET, "demarshal descr %u:%u:%d:%u\n", udsp->udsp_action_type,
1530 udsp->udsp_action.udsp_priority, idx, bulk_size);
1532 bulk += sizeof(*ioc_udsp);
1533 bulk_size -= sizeof(*ioc_udsp);
1535 rc = copy_ioc_udsp_descr(&udsp->udsp_src, "SRC", &bulk, &bulk_size);
1539 rc = copy_ioc_udsp_descr(&udsp->udsp_dst, "DST", &bulk, &bulk_size);
1543 rc = copy_ioc_udsp_descr(&udsp->udsp_rte, "RTE", &bulk, &bulk_size);
1547 return lnet_udsp_add_policy(udsp, idx);
1550 lnet_udsp_free(udsp);