2 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
4 * Copyright (c) 2011, 2017, Intel Corporation.
6 * Copyright (c) 2018-2020 Data Direct Networks.
8 * This file is part of Lustre, https://wiki.whamcloud.com/
10 * Portals is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Portals is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * version 2 along with this program; If not, see
21 * http://www.gnu.org/licenses/gpl-2.0.html
25 * User Defined Selection Policies (UDSP) are introduced to add
26 * ability of fine traffic control. The policies are instantiated
27 * on LNet constructs and allow preference of some constructs
28 * over others as an extension of the selection algorithm.
29 * The order of operation is defined by the selection algorithm logical flow:
31 * 1. Iterate over all the networks that a peer can be reached on
32 * and select the best local network
33 * - The remote network with the highest priority is examined
35 * - The local network with the highest priority is selected
37 * - The local NI with the highest priority is selected
39 * 2. If the peer is a remote peer and has no local networks,
40 * - then select the remote peer network with the highest priority
42 * - Select the highest priority remote peer_ni on the network selected
44 * - Now that the peer's network and NI are decided, select the router
45 * in round robin from the peer NI's preferred router list.
47 * - Select the highest priority local NI on the local net of the
50 * 3. Otherwise for local peers, select the peer_ni from the peer.
51 * - highest priority peer NI is selected
53 * - Select the peer NI which has the local NI selected on its
57 * Accordingly, the User Interface allows for the following:
58 * - Adding a local network udsp: if multiple local networks are
59 * available, each one can have a priority.
60 * - Adding a local NID udsp: after a local network is chosen,
61 * if there are multiple NIs, each one can have a priority.
62 * - Adding a remote NID udsp: assign priority to a peer NID.
63 * - Adding a NID pair udsp: allows to specify local NIDs
64 * to be added on the list on the specified peer NIs
65 * When selecting a peer NI, the one with the
66 * local NID being used on its list is preferred.
67 * - Adding a Router udsp: similar to the NID pair udsp.
68 * Specified router NIDs are added on the list on the specified peer NIs.
69 * When sending to a remote peer, remote net is selected and the peer NID
70 * is selected. The router which has its nid on the peer NI list
72 * - Deleting a udsp: use the specified policy index to remove it
73 * from the policy list.
75 * Generally, the syntax is as follows
76 * lnetctl policy <add | del | show>
77 * --src: ip2nets syntax specifying the local NID to match
78 * --dst: ip2nets syntax specifying the remote NID to match
79 * --rte: ip2nets syntax specifying the router NID to match
80 * --priority: Priority to apply to rule matches
81 * --idx: Index of where to insert or delete the rule
82 * By default add appends to the end of the rule list
84 * Author: Amir Shehata
87 #include <linux/uaccess.h>
89 #include <lnet/udsp.h>
90 #include <libcfs/libcfs.h>
93 struct lnet_peer_ni *udi_lpni;
94 struct lnet_peer_net *udi_lpn;
95 struct lnet_ni *udi_ni;
96 struct lnet_net *udi_net;
97 struct lnet_ud_nid_descr *udi_match;
98 struct lnet_ud_nid_descr *udi_action;
100 enum lnet_udsp_action_type udi_type;
105 typedef int (*udsp_apply_rule)(struct udsp_info *);
108 UDSP_APPLY_ON_PEERS = 0,
109 UDSP_APPLY_PRIO_ON_NIS = 1,
110 UDSP_APPLY_RTE_ON_NETS = 2,
111 UDSP_APPLY_MAX_ENUM = 3,
114 #define RULE_NOT_APPLICABLE -1
117 lnet_udsp_is_net_rule(struct lnet_ud_nid_descr *match)
119 return list_empty(&match->ud_addr_range);
123 lnet_udsp_expr_list_equal(struct list_head *e1,
124 struct list_head *e2)
126 struct cfs_expr_list *expr1;
127 struct cfs_expr_list *expr2;
128 struct cfs_range_expr *range1, *range2;
130 if (list_empty(e1) && list_empty(e2))
133 if (lnet_get_list_len(e1) != lnet_get_list_len(e2))
136 expr2 = list_first_entry(e2, struct cfs_expr_list, el_link);
138 list_for_each_entry(expr1, e1, el_link) {
139 if (lnet_get_list_len(&expr1->el_exprs) !=
140 lnet_get_list_len(&expr2->el_exprs))
143 range2 = list_first_entry(&expr2->el_exprs,
144 struct cfs_range_expr,
147 list_for_each_entry(range1, &expr1->el_exprs, re_link) {
148 if (range1->re_lo != range2->re_lo ||
149 range1->re_hi != range2->re_hi ||
150 range1->re_stride != range2->re_stride)
152 range2 = list_next_entry(range2, re_link);
154 expr2 = list_next_entry(expr2, el_link);
161 lnet_udsp_nid_descr_equal(struct lnet_ud_nid_descr *e1,
162 struct lnet_ud_nid_descr *e2)
164 if (e1->ud_net_id.udn_net_type != e2->ud_net_id.udn_net_type ||
165 !lnet_udsp_expr_list_equal(&e1->ud_net_id.udn_net_num_range,
166 &e2->ud_net_id.udn_net_num_range) ||
167 !lnet_udsp_expr_list_equal(&e1->ud_addr_range, &e2->ud_addr_range))
174 lnet_udsp_action_equal(struct lnet_udsp *e1, struct lnet_udsp *e2)
176 if (e1->udsp_action_type != e2->udsp_action_type)
179 if (e1->udsp_action_type == EN_LNET_UDSP_ACTION_PRIORITY &&
180 e1->udsp_action.udsp_priority != e2->udsp_action.udsp_priority)
187 lnet_udsp_equal(struct lnet_udsp *e1, struct lnet_udsp *e2)
189 /* check each NID descr */
190 if (!lnet_udsp_nid_descr_equal(&e1->udsp_src, &e2->udsp_src) ||
191 !lnet_udsp_nid_descr_equal(&e1->udsp_dst, &e2->udsp_dst) ||
192 !lnet_udsp_nid_descr_equal(&e1->udsp_rte, &e2->udsp_rte))
198 /* it is enough to look at the net type of the descriptor. If the criteria
199 * is present the net must be specified
202 lnet_udsp_criteria_present(struct lnet_ud_nid_descr *descr)
204 return (descr->ud_net_id.udn_net_type != 0);
208 lnet_udsp_apply_rule_on_ni(struct udsp_info *udi)
211 struct lnet_ni *ni = udi->udi_ni;
212 struct lnet_ud_nid_descr *ni_match = udi->udi_match;
213 __u32 priority = (udi->udi_revert) ? -1 : udi->udi_priority;
215 rc = cfs_match_nid_net(ni->ni_nid,
216 ni_match->ud_net_id.udn_net_type,
217 &ni_match->ud_net_id.udn_net_num_range,
218 &ni_match->ud_addr_range);
222 CDEBUG(D_NET, "apply udsp on ni %s\n",
223 libcfs_nid2str(ni->ni_nid));
225 /* Detected match. Set NIDs priority */
226 lnet_ni_set_sel_priority_locked(ni, priority);
232 lnet_udsp_apply_rte_list_on_net(struct lnet_net *net,
233 struct lnet_ud_nid_descr *rte_action,
236 struct lnet_remotenet *rnet;
237 struct list_head *rn_list;
238 struct lnet_route *route;
239 struct lnet_peer_ni *lpni;
240 bool cleared = false;
241 lnet_nid_t gw_nid, gw_prim_nid;
245 for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
246 rn_list = &the_lnet.ln_remote_nets_hash[i];
247 list_for_each_entry(rnet, rn_list, lrn_list) {
248 list_for_each_entry(route, &rnet->lrn_routes, lr_list) {
249 /* look if gw nid on the same net matches */
250 gw_prim_nid = route->lr_gateway->lp_primary_nid;
252 while ((lpni = lnet_get_next_peer_ni_locked(route->lr_gateway,
255 if (!lnet_get_net_locked(lpni->lpni_peer_net->lpn_net_id))
257 gw_nid = lpni->lpni_nid;
258 rc = cfs_match_nid_net(gw_nid,
259 rte_action->ud_net_id.udn_net_type,
260 &rte_action->ud_net_id.udn_net_num_range,
261 &rte_action->ud_addr_range);
265 /* match gw primary nid on a remote network */
267 gw_nid = gw_prim_nid;
268 rc = cfs_match_nid_net(gw_nid,
269 rte_action->ud_net_id.udn_net_type,
270 &rte_action->ud_net_id.udn_net_num_range,
271 &rte_action->ud_addr_range);
275 lnet_net_unlock(LNET_LOCK_EX);
276 if (!cleared || revert) {
277 lnet_net_clr_pref_rtrs(net);
280 lnet_net_lock(LNET_LOCK_EX);
284 /* match. Add to pref NIDs */
285 CDEBUG(D_NET, "udsp net->gw: %s->%s\n",
286 libcfs_net2str(net->net_id),
287 libcfs_nid2str(gw_prim_nid));
288 rc = lnet_net_add_pref_rtr(net, gw_prim_nid);
289 lnet_net_lock(LNET_LOCK_EX);
290 /* success if EEXIST return */
291 if (rc && rc != -EEXIST) {
292 CERROR("Failed to add %s to %s pref rtr list\n",
293 libcfs_nid2str(gw_prim_nid),
294 libcfs_net2str(net->net_id));
305 lnet_udsp_apply_rte_rule_on_nets(struct udsp_info *udi)
308 int last_failure = 0;
309 struct lnet_net *net;
310 struct lnet_ud_nid_descr *match = udi->udi_match;
311 struct lnet_ud_nid_descr *rte_action = udi->udi_action;
313 list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
314 if (LNET_NETTYP(net->net_id) != match->ud_net_id.udn_net_type)
317 rc = cfs_match_net(net->net_id,
318 match->ud_net_id.udn_net_type,
319 &match->ud_net_id.udn_net_num_range);
323 CDEBUG(D_NET, "apply rule on %s\n",
324 libcfs_net2str(net->net_id));
325 rc = lnet_udsp_apply_rte_list_on_net(net, rte_action,
335 lnet_udsp_apply_rte_rule_on_net(struct udsp_info *udi)
338 struct lnet_net *net = udi->udi_net;
339 struct lnet_ud_nid_descr *match = udi->udi_match;
340 struct lnet_ud_nid_descr *rte_action = udi->udi_action;
342 rc = cfs_match_net(net->net_id,
343 match->ud_net_id.udn_net_type,
344 &match->ud_net_id.udn_net_num_range);
348 CDEBUG(D_NET, "apply rule on %s\n",
349 libcfs_net2str(net->net_id));
350 rc = lnet_udsp_apply_rte_list_on_net(net, rte_action,
357 lnet_udsp_apply_prio_rule_on_net(struct udsp_info *udi)
360 struct lnet_ud_nid_descr *match = udi->udi_match;
361 struct lnet_net *net = udi->udi_net;
362 __u32 priority = (udi->udi_revert) ? -1 : udi->udi_priority;
364 if (!lnet_udsp_is_net_rule(match))
365 return RULE_NOT_APPLICABLE;
367 rc = cfs_match_net(net->net_id,
368 match->ud_net_id.udn_net_type,
369 &match->ud_net_id.udn_net_num_range);
373 CDEBUG(D_NET, "apply rule on %s\n",
374 libcfs_net2str(net->net_id));
376 lnet_net_set_sel_priority_locked(net, priority);
382 lnet_udsp_apply_rule_on_nis(struct udsp_info *udi)
386 struct lnet_net *net;
387 struct lnet_ud_nid_descr *ni_match = udi->udi_match;
388 int last_failure = 0;
390 list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
391 if (LNET_NETTYP(net->net_id) != ni_match->ud_net_id.udn_net_type)
395 if (!lnet_udsp_apply_prio_rule_on_net(udi))
398 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
400 rc = lnet_udsp_apply_rule_on_ni(udi);
410 lnet_udsp_apply_rte_list_on_lpni(struct lnet_peer_ni *lpni,
411 struct lnet_ud_nid_descr *rte_action,
414 struct lnet_remotenet *rnet;
415 struct list_head *rn_list;
416 struct lnet_route *route;
417 bool cleared = false;
422 for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
423 rn_list = &the_lnet.ln_remote_nets_hash[i];
424 list_for_each_entry(rnet, rn_list, lrn_list) {
425 list_for_each_entry(route, &rnet->lrn_routes, lr_list) {
426 gw_nid = route->lr_gateway->lp_primary_nid;
427 rc = cfs_match_nid_net(gw_nid,
428 rte_action->ud_net_id.udn_net_type,
429 &rte_action->ud_net_id.udn_net_num_range,
430 &rte_action->ud_addr_range);
433 lnet_net_unlock(LNET_LOCK_EX);
434 if (!cleared || revert) {
435 CDEBUG(D_NET, "%spref rtr nids from lpni %s\n",
436 (revert) ? "revert " : "clear ",
437 libcfs_nid2str(lpni->lpni_nid));
438 lnet_peer_clr_pref_rtrs(lpni);
441 lnet_net_lock(LNET_LOCK_EX);
445 CDEBUG(D_NET, "add gw nid %s as preferred for peer %s\n",
446 libcfs_nid2str(gw_nid),
447 libcfs_nid2str(lpni->lpni_nid));
448 /* match. Add to pref NIDs */
449 rc = lnet_peer_add_pref_rtr(lpni, gw_nid);
450 lnet_net_lock(LNET_LOCK_EX);
451 /* success if EEXIST return */
452 if (rc && rc != -EEXIST) {
453 CERROR("Failed to add %s to %s pref rtr list\n",
454 libcfs_nid2str(gw_nid),
455 libcfs_nid2str(lpni->lpni_nid));
466 lnet_udsp_apply_ni_list(struct lnet_peer_ni *lpni,
467 struct lnet_ud_nid_descr *ni_action,
472 struct lnet_net *net;
473 bool cleared = false;
475 list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
476 if (LNET_NETTYP(net->net_id) != ni_action->ud_net_id.udn_net_type)
478 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
479 rc = cfs_match_nid_net(ni->ni_nid,
480 ni_action->ud_net_id.udn_net_type,
481 &ni_action->ud_net_id.udn_net_num_range,
482 &ni_action->ud_addr_range);
485 lnet_net_unlock(LNET_LOCK_EX);
486 if (!cleared || revert) {
487 lnet_peer_clr_pref_nids(lpni);
488 CDEBUG(D_NET, "%spref nids from lpni %s\n",
489 (revert) ? "revert " : "clear ",
490 libcfs_nid2str(lpni->lpni_nid));
493 lnet_net_lock(LNET_LOCK_EX);
497 CDEBUG(D_NET, "add nid %s as preferred for peer %s\n",
498 libcfs_nid2str(ni->ni_nid),
499 libcfs_nid2str(lpni->lpni_nid));
500 /* match. Add to pref NIDs */
501 rc = lnet_peer_add_pref_nid(lpni, ni->ni_nid);
502 lnet_net_lock(LNET_LOCK_EX);
503 /* success if EEXIST return */
504 if (rc && rc != -EEXIST) {
505 CERROR("Failed to add %s to %s pref nid list\n",
506 libcfs_nid2str(ni->ni_nid),
507 libcfs_nid2str(lpni->lpni_nid));
517 lnet_udsp_apply_rule_on_lpni(struct udsp_info *udi)
520 struct lnet_peer_ni *lpni = udi->udi_lpni;
521 struct lnet_ud_nid_descr *lp_match = udi->udi_match;
522 struct lnet_ud_nid_descr *action = udi->udi_action;
523 __u32 priority = (udi->udi_revert) ? -1 : udi->udi_priority;
524 bool local = udi->udi_local;
525 enum lnet_udsp_action_type type = udi->udi_type;
527 rc = cfs_match_nid_net(lpni->lpni_nid,
528 lp_match->ud_net_id.udn_net_type,
529 &lp_match->ud_net_id.udn_net_num_range,
530 &lp_match->ud_addr_range);
532 /* check if looking for a net match */
534 (lnet_get_list_len(&lp_match->ud_addr_range) ||
535 !cfs_match_net(udi->udi_lpn->lpn_net_id,
536 lp_match->ud_net_id.udn_net_type,
537 &lp_match->ud_net_id.udn_net_num_range))) {
541 if (type == EN_LNET_UDSP_ACTION_PREFERRED_LIST && local) {
542 rc = lnet_udsp_apply_ni_list(lpni, action,
546 } else if (type == EN_LNET_UDSP_ACTION_PREFERRED_LIST &&
548 rc = lnet_udsp_apply_rte_list_on_lpni(lpni, action,
553 lnet_peer_ni_set_selection_priority(lpni, priority);
560 lnet_udsp_apply_rule_on_lpn(struct udsp_info *udi)
563 struct lnet_ud_nid_descr *match = udi->udi_match;
564 struct lnet_peer_net *lpn = udi->udi_lpn;
565 __u32 priority = (udi->udi_revert) ? -1 : udi->udi_priority;
567 if (udi->udi_type == EN_LNET_UDSP_ACTION_PREFERRED_LIST ||
568 !lnet_udsp_is_net_rule(match))
569 return RULE_NOT_APPLICABLE;
571 rc = cfs_match_net(lpn->lpn_net_id,
572 match->ud_net_id.udn_net_type,
573 &match->ud_net_id.udn_net_num_range);
577 CDEBUG(D_NET, "apply rule on lpn %s\n",
578 libcfs_net2str(lpn->lpn_net_id));
579 lnet_peer_net_set_sel_priority_locked(lpn, priority);
585 lnet_udsp_apply_rule_on_lpnis(struct udsp_info *udi)
587 /* iterate over all the peers in the system and find if any of the
588 * peers match the criteria. If they do, clear the preferred list
589 * and add the new list
591 int lncpt = cfs_percpt_number(the_lnet.ln_peer_tables);
592 struct lnet_ud_nid_descr *lp_match = udi->udi_match;
593 struct lnet_peer_table *ptable;
594 struct lnet_peer_net *lpn;
595 struct lnet_peer_ni *lpni;
596 struct lnet_peer *lp;
597 int last_failure = 0;
601 for (cpt = 0; cpt < lncpt; cpt++) {
602 ptable = the_lnet.ln_peer_tables[cpt];
603 list_for_each_entry(lp, &ptable->pt_peer_list, lp_peer_list) {
604 CDEBUG(D_NET, "udsp examining lp %s\n",
605 libcfs_nid2str(lp->lp_primary_nid));
606 list_for_each_entry(lpn,
609 CDEBUG(D_NET, "udsp examining lpn %s\n",
610 libcfs_net2str(lpn->lpn_net_id));
612 if (LNET_NETTYP(lpn->lpn_net_id) !=
613 lp_match->ud_net_id.udn_net_type)
618 if (!lnet_udsp_apply_rule_on_lpn(udi))
621 list_for_each_entry(lpni,
624 CDEBUG(D_NET, "udsp examining lpni %s\n",
625 libcfs_nid2str(lpni->lpni_nid));
626 udi->udi_lpni = lpni;
627 rc = lnet_udsp_apply_rule_on_lpni(udi);
639 lnet_udsp_apply_single_policy(struct lnet_udsp *udsp, struct udsp_info *udi,
640 udsp_apply_rule *cbs)
644 if (lnet_udsp_criteria_present(&udsp->udsp_dst) &&
645 lnet_udsp_criteria_present(&udsp->udsp_src)) {
647 if (!cbs[UDSP_APPLY_ON_PEERS])
650 if (udsp->udsp_action_type !=
651 EN_LNET_UDSP_ACTION_PREFERRED_LIST) {
652 CERROR("Bad action type. Expected %d got %d\n",
653 EN_LNET_UDSP_ACTION_PREFERRED_LIST,
654 udsp->udsp_action_type);
657 udi->udi_match = &udsp->udsp_dst;
658 udi->udi_action = &udsp->udsp_src;
659 udi->udi_type = EN_LNET_UDSP_ACTION_PREFERRED_LIST;
660 udi->udi_local = true;
662 CDEBUG(D_NET, "applying udsp (%p) dst->src\n",
664 rc = cbs[UDSP_APPLY_ON_PEERS](udi);
667 } else if (lnet_udsp_criteria_present(&udsp->udsp_dst) &&
668 lnet_udsp_criteria_present(&udsp->udsp_rte)) {
670 if (!cbs[UDSP_APPLY_ON_PEERS])
673 if (udsp->udsp_action_type !=
674 EN_LNET_UDSP_ACTION_PREFERRED_LIST) {
675 CERROR("Bad action type. Expected %d got %d\n",
676 EN_LNET_UDSP_ACTION_PREFERRED_LIST,
677 udsp->udsp_action_type);
681 if (lnet_udsp_criteria_present(&udsp->udsp_src)) {
682 CERROR("only one of src or dst can be specified\n");
685 udi->udi_match = &udsp->udsp_dst;
686 udi->udi_action = &udsp->udsp_rte;
687 udi->udi_type = EN_LNET_UDSP_ACTION_PREFERRED_LIST;
688 udi->udi_local = false;
690 CDEBUG(D_NET, "applying udsp (%p) dst->rte\n",
692 rc = cbs[UDSP_APPLY_ON_PEERS](udi);
695 } else if (lnet_udsp_criteria_present(&udsp->udsp_dst)) {
696 /* destination priority rule */
697 if (!cbs[UDSP_APPLY_ON_PEERS])
700 if (udsp->udsp_action_type !=
701 EN_LNET_UDSP_ACTION_PRIORITY) {
702 CERROR("Bad action type. Expected %d got %d\n",
703 EN_LNET_UDSP_ACTION_PRIORITY,
704 udsp->udsp_action_type);
707 udi->udi_match = &udsp->udsp_dst;
708 udi->udi_type = EN_LNET_UDSP_ACTION_PRIORITY;
709 if (udsp->udsp_action_type !=
710 EN_LNET_UDSP_ACTION_PRIORITY) {
711 udi->udi_priority = 0;
713 udi->udi_priority = udsp->udsp_action.udsp_priority;
715 udi->udi_local = true;
717 CDEBUG(D_NET, "applying udsp (%p) on destination\n",
719 rc = cbs[UDSP_APPLY_ON_PEERS](udi);
722 } else if (lnet_udsp_criteria_present(&udsp->udsp_src)) {
723 /* source priority rule */
724 if (!cbs[UDSP_APPLY_PRIO_ON_NIS])
727 if (udsp->udsp_action_type !=
728 EN_LNET_UDSP_ACTION_PRIORITY) {
729 CERROR("Bad action type. Expected %d got %d\n",
730 EN_LNET_UDSP_ACTION_PRIORITY,
731 udsp->udsp_action_type);
734 udi->udi_match = &udsp->udsp_src;
735 udi->udi_type = EN_LNET_UDSP_ACTION_PRIORITY;
736 if (udsp->udsp_action_type !=
737 EN_LNET_UDSP_ACTION_PRIORITY) {
738 udi->udi_priority = 0;
740 udi->udi_priority = udsp->udsp_action.udsp_priority;
742 udi->udi_local = true;
744 CDEBUG(D_NET, "applying udsp (%p) on source\n",
746 rc = cbs[UDSP_APPLY_PRIO_ON_NIS](udi);
748 CERROR("Bad UDSP policy\n");
756 lnet_udsp_apply_policies_helper(struct lnet_udsp *udsp, struct udsp_info *udi,
757 udsp_apply_rule *cbs)
760 int last_failure = 0;
763 return lnet_udsp_apply_single_policy(udsp, udi, cbs);
765 list_for_each_entry_reverse(udsp,
766 &the_lnet.ln_udsp_list,
768 rc = lnet_udsp_apply_single_policy(udsp, udi, cbs);
777 lnet_udsp_apply_policies_on_ni(struct lnet_ni *ni)
779 struct udsp_info udi;
780 udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
782 memset(&udi, 0, sizeof(udi));
786 cbs[UDSP_APPLY_PRIO_ON_NIS] = lnet_udsp_apply_rule_on_ni;
788 return lnet_udsp_apply_policies_helper(NULL, &udi, cbs);
792 lnet_udsp_apply_policies_on_net(struct lnet_net *net)
794 struct udsp_info udi;
795 udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
797 memset(&udi, 0, sizeof(udi));
801 cbs[UDSP_APPLY_PRIO_ON_NIS] = lnet_udsp_apply_prio_rule_on_net;
802 cbs[UDSP_APPLY_RTE_ON_NETS] = lnet_udsp_apply_rte_rule_on_net;
804 return lnet_udsp_apply_policies_helper(NULL, &udi, cbs);
808 lnet_udsp_apply_policies_on_lpni(struct lnet_peer_ni *lpni)
810 struct udsp_info udi;
811 udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
813 memset(&udi, 0, sizeof(udi));
817 cbs[UDSP_APPLY_ON_PEERS] = lnet_udsp_apply_rule_on_lpni;
819 return lnet_udsp_apply_policies_helper(NULL, &udi, cbs);
823 lnet_udsp_apply_policies_on_lpn(struct lnet_peer_net *lpn)
825 struct udsp_info udi;
826 udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
828 memset(&udi, 0, sizeof(udi));
832 cbs[UDSP_APPLY_ON_PEERS] = lnet_udsp_apply_rule_on_lpn;
834 return lnet_udsp_apply_policies_helper(NULL, &udi, cbs);
838 lnet_udsp_apply_policies(struct lnet_udsp *udsp, bool revert)
841 struct udsp_info udi;
842 udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
844 memset(&udi, 0, sizeof(udi));
846 cbs[UDSP_APPLY_ON_PEERS] = lnet_udsp_apply_rule_on_lpnis;
847 cbs[UDSP_APPLY_PRIO_ON_NIS] = lnet_udsp_apply_rule_on_nis;
848 cbs[UDSP_APPLY_RTE_ON_NETS] = lnet_udsp_apply_rte_rule_on_nets;
850 udi.udi_revert = revert;
852 lnet_net_lock(LNET_LOCK_EX);
853 rc = lnet_udsp_apply_policies_helper(udsp, &udi, cbs);
854 lnet_net_unlock(LNET_LOCK_EX);
860 lnet_udsp_get_policy(int idx)
863 struct lnet_udsp *udsp = NULL;
866 CDEBUG(D_NET, "Get UDSP at idx = %d\n", idx);
871 list_for_each_entry(udsp, &the_lnet.ln_udsp_list, udsp_on_list) {
872 CDEBUG(D_NET, "iterating over upsp %d:%d:%d\n",
873 udsp->udsp_idx, i, idx);
881 CDEBUG(D_NET, "Found UDSP (%p)\n", udsp);
890 lnet_udsp_add_policy(struct lnet_udsp *new, int idx)
892 struct lnet_udsp *udsp;
893 struct lnet_udsp *insert = NULL;
896 list_for_each_entry(udsp, &the_lnet.ln_udsp_list, udsp_on_list) {
897 CDEBUG(D_NET, "found udsp i = %d:%d, idx = %d\n",
898 i, udsp->udsp_idx, idx);
904 if (lnet_udsp_equal(udsp, new)) {
905 if (!lnet_udsp_action_equal(udsp, new) &&
906 udsp->udsp_action_type == EN_LNET_UDSP_ACTION_PRIORITY &&
907 new->udsp_action_type == EN_LNET_UDSP_ACTION_PRIORITY) {
908 udsp->udsp_action.udsp_priority = new->udsp_action.udsp_priority;
909 CDEBUG(D_NET, "udsp: %p index %d updated priority to %d\n",
912 udsp->udsp_action.udsp_priority);
920 list_add(&new->udsp_on_list, insert->udsp_on_list.prev);
922 list_for_each_entry(udsp,
923 &the_lnet.ln_udsp_list,
932 list_add_tail(&new->udsp_on_list, &the_lnet.ln_udsp_list);
936 CDEBUG(D_NET, "udsp: %p added at index %d\n", new, new->udsp_idx);
938 CDEBUG(D_NET, "udsp list:\n");
939 list_for_each_entry(udsp, &the_lnet.ln_udsp_list, udsp_on_list)
940 CDEBUG(D_NET, "udsp %p:%d\n", udsp, udsp->udsp_idx);
946 lnet_udsp_del_policy(int idx)
948 struct lnet_udsp *udsp;
949 struct lnet_udsp *tmp;
950 bool removed = false;
953 lnet_udsp_destroy(false);
957 CDEBUG(D_NET, "del udsp at idx = %d\n", idx);
959 list_for_each_entry_safe(udsp,
961 &the_lnet.ln_udsp_list,
965 if (udsp->udsp_idx == idx && !removed) {
966 list_del_init(&udsp->udsp_on_list);
967 lnet_udsp_apply_policies(udsp, true);
968 lnet_udsp_free(udsp);
977 lnet_udsp_get_ni_info(struct lnet_ioctl_construct_udsp_info *info,
980 struct lnet_nid_list *ne;
981 struct lnet_net *net = ni->ni_net;
986 info->cud_nid_priority = ni->ni_sel_priority;
988 info->cud_net_priority = ni->ni_net->net_sel_priority;
989 list_for_each_entry(ne, &net->net_rtr_pref_nids, nl_list) {
990 if (i < LNET_MAX_SHOW_NUM_NID)
991 info->cud_pref_rtr_nid[i] = ne->nl_nid;
1000 lnet_udsp_get_peer_info(struct lnet_ioctl_construct_udsp_info *info,
1001 struct lnet_peer_ni *lpni)
1003 struct lnet_nid_list *ne;
1006 /* peer tree structure needs to be in existence */
1007 LASSERT(lpni && lpni->lpni_peer_net &&
1008 lpni->lpni_peer_net->lpn_peer);
1010 info->cud_nid_priority = lpni->lpni_sel_priority;
1011 CDEBUG(D_NET, "lpni %s has %d pref nids\n",
1012 libcfs_nid2str(lpni->lpni_nid),
1013 lpni->lpni_pref_nnids);
1014 if (lpni->lpni_pref_nnids == 1) {
1015 info->cud_pref_nid[0] = lpni->lpni_pref.nid;
1016 } else if (lpni->lpni_pref_nnids > 1) {
1017 struct list_head *list = &lpni->lpni_pref.nids;
1019 list_for_each_entry(ne, list, nl_list) {
1020 if (i < LNET_MAX_SHOW_NUM_NID)
1021 info->cud_pref_nid[i] = ne->nl_nid;
1029 list_for_each_entry(ne, &lpni->lpni_rtr_pref_nids, nl_list) {
1030 if (i < LNET_MAX_SHOW_NUM_NID)
1031 info->cud_pref_rtr_nid[i] = ne->nl_nid;
1037 info->cud_net_priority = lpni->lpni_peer_net->lpn_sel_priority;
1041 lnet_udsp_get_construct_info(struct lnet_ioctl_construct_udsp_info *info)
1044 struct lnet_peer_ni *lpni;
1047 if (!info->cud_peer) {
1048 ni = lnet_nid2ni_locked(info->cud_nid, 0);
1050 lnet_udsp_get_ni_info(info, ni);
1052 lpni = lnet_find_peer_ni_locked(info->cud_nid);
1054 CDEBUG(D_NET, "nid %s is not found\n",
1055 libcfs_nid2str(info->cud_nid));
1057 lnet_udsp_get_peer_info(info, lpni);
1058 lnet_peer_ni_decref_locked(lpni);
1065 lnet_udsp_alloc(void)
1067 struct lnet_udsp *udsp;
1069 udsp = kmem_cache_alloc(lnet_udsp_cachep, GFP_NOFS | __GFP_ZERO);
1074 INIT_LIST_HEAD(&udsp->udsp_on_list);
1075 INIT_LIST_HEAD(&udsp->udsp_src.ud_addr_range);
1076 INIT_LIST_HEAD(&udsp->udsp_src.ud_net_id.udn_net_num_range);
1077 INIT_LIST_HEAD(&udsp->udsp_dst.ud_addr_range);
1078 INIT_LIST_HEAD(&udsp->udsp_dst.ud_net_id.udn_net_num_range);
1079 INIT_LIST_HEAD(&udsp->udsp_rte.ud_addr_range);
1080 INIT_LIST_HEAD(&udsp->udsp_rte.ud_net_id.udn_net_num_range);
1082 CDEBUG(D_MALLOC, "udsp alloc %p\n", udsp);
1087 lnet_udsp_nid_descr_free(struct lnet_ud_nid_descr *nid_descr)
1089 struct list_head *net_range = &nid_descr->ud_net_id.udn_net_num_range;
1091 if (!lnet_udsp_criteria_present(nid_descr))
1094 /* memory management is a bit tricky here. When we allocate the
1095 * memory to store the NID descriptor we allocate a large buffer
1096 * for all the data, so we need to free the entire buffer at
1097 * once. If the net is present the net_range->next points to that
1098 * buffer otherwise if the ud_addr_range is present then it's the
1099 * ud_addr_range.next
1101 if (!list_empty(net_range))
1102 LIBCFS_FREE(net_range->next, nid_descr->ud_mem_size);
1103 else if (!list_empty(&nid_descr->ud_addr_range))
1104 LIBCFS_FREE(nid_descr->ud_addr_range.next,
1105 nid_descr->ud_mem_size);
1109 lnet_udsp_free(struct lnet_udsp *udsp)
1111 lnet_udsp_nid_descr_free(&udsp->udsp_src);
1112 lnet_udsp_nid_descr_free(&udsp->udsp_dst);
1113 lnet_udsp_nid_descr_free(&udsp->udsp_rte);
1115 CDEBUG(D_MALLOC, "udsp free %p\n", udsp);
1116 kmem_cache_free(lnet_udsp_cachep, udsp);
1120 lnet_udsp_destroy(bool shutdown)
1122 struct lnet_udsp *udsp, *tmp;
1124 CDEBUG(D_NET, "Destroying UDSPs in the system\n");
1126 list_for_each_entry_safe(udsp, tmp, &the_lnet.ln_udsp_list,
1128 list_del(&udsp->udsp_on_list);
1130 lnet_udsp_apply_policies(udsp, true);
1131 lnet_udsp_free(udsp);
1136 lnet_size_marshaled_nid_descr(struct lnet_ud_nid_descr *descr)
1138 struct cfs_expr_list *expr;
1140 int range_count = 0;
1141 size_t size = sizeof(struct lnet_ioctl_udsp_descr);
1143 if (!lnet_udsp_criteria_present(descr))
1146 /* we always have one net expression */
1147 if (!list_empty(&descr->ud_net_id.udn_net_num_range)) {
1148 expr = list_first_entry(&descr->ud_net_id.udn_net_num_range,
1149 struct cfs_expr_list, el_link);
1151 /* count the number of cfs_range_expr in the net expression */
1152 range_count = lnet_get_list_len(&expr->el_exprs);
1155 /* count the number of cfs_range_expr in the address expressions */
1156 list_for_each_entry(expr, &descr->ud_addr_range, el_link) {
1158 range_count += lnet_get_list_len(&expr->el_exprs);
1161 size += (sizeof(struct lnet_expressions) * expr_count);
1162 size += (sizeof(struct lnet_range_expr) * range_count);
1168 lnet_get_udsp_size(struct lnet_udsp *udsp)
1170 size_t size = sizeof(struct lnet_ioctl_udsp);
1172 size += lnet_size_marshaled_nid_descr(&udsp->udsp_src);
1173 size += lnet_size_marshaled_nid_descr(&udsp->udsp_dst);
1174 size += lnet_size_marshaled_nid_descr(&udsp->udsp_rte);
1176 CDEBUG(D_NET, "get udsp (%p) size: %d\n", udsp, (int)size);
1182 copy_exprs(struct cfs_expr_list *expr, void __user **bulk,
1185 struct cfs_range_expr *range;
1186 struct lnet_range_expr range_expr;
1188 /* copy over the net range expressions to the bulk */
1189 list_for_each_entry(range, &expr->el_exprs, re_link) {
1190 range_expr.re_lo = range->re_lo;
1191 range_expr.re_hi = range->re_hi;
1192 range_expr.re_stride = range->re_stride;
1193 CDEBUG(D_NET, "Copy Range %u:%u:%u\n",
1194 range_expr.re_lo, range_expr.re_hi,
1195 range_expr.re_stride);
1196 if (copy_to_user(*bulk, &range_expr, sizeof(range_expr))) {
1197 CDEBUG(D_NET, "Failed to copy range_expr\n");
1200 *bulk += sizeof(range_expr);
1201 *bulk_size -= sizeof(range_expr);
1208 copy_nid_range(struct lnet_ud_nid_descr *nid_descr, char *type,
1209 void __user **bulk, __u32 *bulk_size)
1211 struct lnet_ioctl_udsp_descr ioc_udsp_descr;
1212 struct cfs_expr_list *expr;
1213 struct lnet_expressions ioc_expr;
1218 memset(&ioc_udsp_descr, 0, sizeof(ioc_udsp_descr));
1219 ioc_udsp_descr.iud_src_hdr.ud_descr_type = *(__u32 *)type;
1221 /* if criteria not present, copy over the static part of the NID
1224 if (!lnet_udsp_criteria_present(nid_descr)) {
1225 CDEBUG(D_NET, "Descriptor %u:%u:%u:%u\n",
1226 ioc_udsp_descr.iud_src_hdr.ud_descr_type,
1227 ioc_udsp_descr.iud_src_hdr.ud_descr_count,
1228 ioc_udsp_descr.iud_net.ud_net_type,
1229 ioc_udsp_descr.iud_net.ud_net_num_expr.le_count);
1230 if (copy_to_user(*bulk, &ioc_udsp_descr,
1231 sizeof(ioc_udsp_descr))) {
1232 CDEBUG(D_NET, "failed to copy ioc_udsp_descr\n");
1235 *bulk += sizeof(ioc_udsp_descr);
1236 *bulk_size -= sizeof(ioc_udsp_descr);
1240 expr_count = lnet_get_list_len(&nid_descr->ud_addr_range);
1242 /* copy the net information */
1243 if (!list_empty(&nid_descr->ud_net_id.udn_net_num_range)) {
1244 expr = list_first_entry(&nid_descr->ud_net_id.udn_net_num_range,
1245 struct cfs_expr_list, el_link);
1246 net_expr_count = lnet_get_list_len(&expr->el_exprs);
1251 /* set the total expression count */
1252 ioc_udsp_descr.iud_src_hdr.ud_descr_count = expr_count;
1253 ioc_udsp_descr.iud_net.ud_net_type =
1254 nid_descr->ud_net_id.udn_net_type;
1255 ioc_udsp_descr.iud_net.ud_net_num_expr.le_count = net_expr_count;
1257 CDEBUG(D_NET, "Descriptor %u:%u:%u:%u\n",
1258 ioc_udsp_descr.iud_src_hdr.ud_descr_type,
1259 ioc_udsp_descr.iud_src_hdr.ud_descr_count,
1260 ioc_udsp_descr.iud_net.ud_net_type,
1261 ioc_udsp_descr.iud_net.ud_net_num_expr.le_count);
1263 /* copy over the header info to the bulk */
1264 if (copy_to_user(*bulk, &ioc_udsp_descr, sizeof(ioc_udsp_descr))) {
1265 CDEBUG(D_NET, "Failed to copy data\n");
1268 *bulk += sizeof(ioc_udsp_descr);
1269 *bulk_size -= sizeof(ioc_udsp_descr);
1271 /* copy over the net num expression if it exists */
1272 if (net_expr_count) {
1273 rc = copy_exprs(expr, bulk, bulk_size);
1278 /* copy the address range */
1279 list_for_each_entry(expr, &nid_descr->ud_addr_range, el_link) {
1280 ioc_expr.le_count = lnet_get_list_len(&expr->el_exprs);
1281 if (copy_to_user(*bulk, &ioc_expr, sizeof(ioc_expr))) {
1282 CDEBUG(D_NET, "failex to copy ioc_expr\n");
1285 *bulk += sizeof(ioc_expr);
1286 *bulk_size -= sizeof(ioc_expr);
1288 rc = copy_exprs(expr, bulk, bulk_size);
1297 lnet_udsp_marshal(struct lnet_udsp *udsp, struct lnet_ioctl_udsp *ioc_udsp)
1306 bulk = ioc_udsp->iou_bulk;
1307 bulk_size = ioc_udsp->iou_hdr.ioc_len +
1308 ioc_udsp->iou_bulk_size;
1310 CDEBUG(D_NET, "marshal udsp (%p)\n", udsp);
1311 CDEBUG(D_NET, "MEM -----> bulk: %p:0x%x\n", bulk, bulk_size);
1312 /* make sure user space allocated enough buffer to marshal the
1315 if (bulk_size != lnet_get_udsp_size(udsp)) {
1320 ioc_udsp->iou_idx = udsp->udsp_idx;
1321 ioc_udsp->iou_action_type = udsp->udsp_action_type;
1322 ioc_udsp->iou_action.priority = udsp->udsp_action.udsp_priority;
1324 bulk_size -= sizeof(*ioc_udsp);
1326 rc = copy_nid_range(&udsp->udsp_src, "SRC", &bulk, &bulk_size);
1330 rc = copy_nid_range(&udsp->udsp_dst, "DST", &bulk, &bulk_size);
1334 rc = copy_nid_range(&udsp->udsp_rte, "RTE", &bulk, &bulk_size);
1338 CDEBUG(D_NET, "MEM <----- bulk: %p\n", bulk);
1340 /* we should've consumed the entire buffer */
1341 LASSERT(bulk_size == 0);
1345 CERROR("Failed to marshal udsp: %d\n", rc);
1350 copy_range_info(void **bulk, void **buf, struct list_head *list,
1353 struct lnet_range_expr *range_expr;
1354 struct cfs_range_expr *range;
1355 struct cfs_expr_list *exprs;
1356 int range_count = count;
1359 if (range_count == 0)
1362 if (range_count == -1) {
1363 struct lnet_expressions *e;
1366 range_count = e->le_count;
1367 *bulk += sizeof(*e);
1371 INIT_LIST_HEAD(&exprs->el_link);
1372 INIT_LIST_HEAD(&exprs->el_exprs);
1373 list_add_tail(&exprs->el_link, list);
1374 *buf += sizeof(*exprs);
1376 for (i = 0; i < range_count; i++) {
1379 INIT_LIST_HEAD(&range->re_link);
1380 range->re_lo = range_expr->re_lo;
1381 range->re_hi = range_expr->re_hi;
1382 range->re_stride = range_expr->re_stride;
1383 CDEBUG(D_NET, "Copy Range %u:%u:%u\n",
1387 list_add_tail(&range->re_link, &exprs->el_exprs);
1388 *bulk += sizeof(*range_expr);
1389 *buf += sizeof(*range);
1394 copy_ioc_udsp_descr(struct lnet_ud_nid_descr *nid_descr, char *type,
1395 void **bulk, __u32 *bulk_size)
1397 struct lnet_ioctl_udsp_descr *ioc_nid = *bulk;
1398 struct lnet_expressions *exprs;
1401 int range_count = 0;
1404 int remaining_size = *bulk_size;
1408 size_t range_expr_s = sizeof(struct lnet_range_expr);
1409 size_t lnet_exprs_s = sizeof(struct lnet_expressions);
1411 CDEBUG(D_NET, "%s: bulk = %p:%u\n", type, *bulk, *bulk_size);
1413 /* criteria not present, skip over the static part of the
1414 * bulk, which is included for each NID descriptor
1416 if (ioc_nid->iud_net.ud_net_type == 0) {
1417 remaining_size -= sizeof(*ioc_nid);
1418 if (remaining_size < 0) {
1419 CERROR("Truncated userspace udsp buffer given\n");
1422 *bulk += sizeof(*ioc_nid);
1423 *bulk_size = remaining_size;
1427 descr_type = ioc_nid->iud_src_hdr.ud_descr_type;
1428 if (descr_type != *(__u32 *)type) {
1429 CERROR("Bad NID descriptor type. Expected %s, given %c%c%c\n",
1430 type, (__u8)descr_type, (__u8)(descr_type << 4),
1431 (__u8)(descr_type << 8));
1435 /* calculate the total size to verify we have enough buffer.
1436 * Start of by finding how many ranges there are for the net
1439 range_count = ioc_nid->iud_net.ud_net_num_expr.le_count;
1440 size = sizeof(*ioc_nid) + (range_count * range_expr_s);
1441 remaining_size -= size;
1442 if (remaining_size < 0) {
1443 CERROR("Truncated userspace udsp buffer given\n");
1447 CDEBUG(D_NET, "Total net num ranges in %s: %d:%u\n", type,
1449 /* the number of expressions for the NID. IE 4 for IP, 1 for GNI */
1450 expr_count = ioc_nid->iud_src_hdr.ud_descr_count;
1451 CDEBUG(D_NET, "addr as %d exprs\n", expr_count);
1452 /* point tmp to the beginning of the NID expressions */
1454 for (i = 0; i < expr_count; i++) {
1455 /* get the number of ranges per expression */
1457 range_count += exprs->le_count;
1458 size = (range_expr_s * exprs->le_count) + lnet_exprs_s;
1459 remaining_size -= size;
1460 CDEBUG(D_NET, "expr %d:%d:%u:%d:%d\n", i, exprs->le_count,
1461 size, remaining_size, range_count);
1462 if (remaining_size < 0) {
1463 CERROR("Truncated userspace udsp buffer given\n");
1469 *bulk_size = remaining_size;
1471 /* copy over the net type */
1472 nid_descr->ud_net_id.udn_net_type = ioc_nid->iud_net.ud_net_type;
1474 CDEBUG(D_NET, "%u\n", nid_descr->ud_net_id.udn_net_type);
1476 /* allocate the total memory required to copy this NID descriptor */
1477 alloc_size = (sizeof(struct cfs_expr_list) * (expr_count + 1)) +
1478 (sizeof(struct cfs_range_expr) * (range_count));
1479 LIBCFS_ALLOC(buf, alloc_size);
1483 /* store the amount of memory allocated so we can free it later on */
1484 nid_descr->ud_mem_size = alloc_size;
1486 /* copy over the net number range */
1487 range_count = ioc_nid->iud_net.ud_net_num_expr.le_count;
1488 *bulk += sizeof(*ioc_nid);
1489 CDEBUG(D_NET, "bulk = %p\n", *bulk);
1490 copy_range_info(bulk, &buf, &nid_descr->ud_net_id.udn_net_num_range,
1492 CDEBUG(D_NET, "bulk = %p\n", *bulk);
1494 /* copy over the NID descriptor */
1495 for (i = 0; i < expr_count; i++) {
1496 copy_range_info(bulk, &buf, &nid_descr->ud_addr_range, -1);
1497 CDEBUG(D_NET, "bulk = %p\n", *bulk);
1504 lnet_udsp_demarshal_add(void *bulk, __u32 bulk_size)
1506 struct lnet_ioctl_udsp *ioc_udsp;
1507 struct lnet_udsp *udsp;
1511 if (bulk_size < sizeof(*ioc_udsp))
1514 udsp = lnet_udsp_alloc();
1520 udsp->udsp_action_type = ioc_udsp->iou_action_type;
1521 udsp->udsp_action.udsp_priority = ioc_udsp->iou_action.priority;
1522 idx = ioc_udsp->iou_idx;
1524 CDEBUG(D_NET, "demarshal descr %u:%u:%d:%u\n", udsp->udsp_action_type,
1525 udsp->udsp_action.udsp_priority, idx, bulk_size);
1527 bulk += sizeof(*ioc_udsp);
1528 bulk_size -= sizeof(*ioc_udsp);
1530 rc = copy_ioc_udsp_descr(&udsp->udsp_src, "SRC", &bulk, &bulk_size);
1534 rc = copy_ioc_udsp_descr(&udsp->udsp_dst, "DST", &bulk, &bulk_size);
1538 rc = copy_ioc_udsp_descr(&udsp->udsp_rte, "RTE", &bulk, &bulk_size);
1542 return lnet_udsp_add_policy(udsp, idx);
1545 lnet_udsp_free(udsp);