2 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
4 * Copyright (c) 2011, 2017, Intel Corporation.
6 * Copyright (c) 2018-2020 Data Direct Networks.
8 * This file is part of Lustre, https://wiki.whamcloud.com/
10 * Portals is free software; you can redistribute it and/or
11 * modify it under the terms of version 2 of the GNU General Public
12 * License as published by the Free Software Foundation.
14 * Portals is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * version 2 along with this program; If not, see
21 * http://www.gnu.org/licenses/gpl-2.0.html
25 * User Defined Selection Policies (UDSP) are introduced to add
26 * ability of fine traffic control. The policies are instantiated
27 * on LNet constructs and allow preference of some constructs
28 * over others as an extension of the selection algorithm.
29 * The order of operation is defined by the selection algorithm logical flow:
31 * 1. Iterate over all the networks that a peer can be reached on
32 * and select the best local network
33 * - The remote network with the highest priority is examined
35 * - The local network with the highest priority is selected
37 * - The local NI with the highest priority is selected
39 * 2. If the peer is a remote peer and has no local networks,
40 * - then select the remote peer network with the highest priority
42 * - Select the highest priority remote peer_ni on the network selected
44 * - Now that the peer's network and NI are decided, select the router
45 * in round robin from the peer NI's preferred router list.
47 * - Select the highest priority local NI on the local net of the
50 * 3. Otherwise for local peers, select the peer_ni from the peer.
51 * - highest priority peer NI is selected
53 * - Select the peer NI which has the local NI selected on its
57 * Accordingly, the User Interface allows for the following:
58 * - Adding a local network udsp: if multiple local networks are
59 * available, each one can have a priority.
60 * - Adding a local NID udsp: after a local network is chosen,
61 * if there are multiple NIs, each one can have a priority.
62 * - Adding a remote NID udsp: assign priority to a peer NID.
63 * - Adding a NID pair udsp: allows to specify local NIDs
64 * to be added on the list on the specified peer NIs
65 * When selecting a peer NI, the one with the
66 * local NID being used on its list is preferred.
67 * - Adding a Router udsp: similar to the NID pair udsp.
68 * Specified router NIDs are added on the list on the specified peer NIs.
69 * When sending to a remote peer, remote net is selected and the peer NID
70 * is selected. The router which has its nid on the peer NI list
72 * - Deleting a udsp: use the specified policy index to remove it
73 * from the policy list.
75 * Generally, the syntax is as follows
76 * lnetctl policy <add | del | show>
77 * --src: ip2nets syntax specifying the local NID to match
78 * --dst: ip2nets syntax specifying the remote NID to match
79 * --rte: ip2nets syntax specifying the router NID to match
80 * --priority: Priority to apply to rule matches
81 * --idx: Index of where to insert or delete the rule
82 * By default add appends to the end of the rule list
84 * Author: Amir Shehata
87 #include <linux/uaccess.h>
89 #include <lnet/udsp.h>
90 #include <libcfs/libcfs.h>
93 struct lnet_peer_ni *udi_lpni;
94 struct lnet_peer_net *udi_lpn;
95 struct lnet_ni *udi_ni;
96 struct lnet_net *udi_net;
97 struct lnet_ud_nid_descr *udi_match;
98 struct lnet_ud_nid_descr *udi_action;
100 enum lnet_udsp_action_type udi_type;
105 typedef int (*udsp_apply_rule)(struct udsp_info *);
108 UDSP_APPLY_ON_PEERS = 0,
109 UDSP_APPLY_PRIO_ON_NIS = 1,
110 UDSP_APPLY_RTE_ON_NETS = 2,
111 UDSP_APPLY_MAX_ENUM = 3,
114 #define RULE_NOT_APPLICABLE -1
117 lnet_udsp_is_net_rule(struct lnet_ud_nid_descr *match)
119 return list_empty(&match->ud_addr_range);
123 lnet_udsp_expr_list_equal(struct list_head *e1,
124 struct list_head *e2)
126 struct cfs_expr_list *expr1;
127 struct cfs_expr_list *expr2;
128 struct cfs_range_expr *range1, *range2;
130 if (list_empty(e1) && list_empty(e2))
133 if (lnet_get_list_len(e1) != lnet_get_list_len(e2))
136 expr2 = list_first_entry(e2, struct cfs_expr_list, el_link);
138 list_for_each_entry(expr1, e1, el_link) {
139 if (lnet_get_list_len(&expr1->el_exprs) !=
140 lnet_get_list_len(&expr2->el_exprs))
143 range2 = list_first_entry(&expr2->el_exprs,
144 struct cfs_range_expr,
147 list_for_each_entry(range1, &expr1->el_exprs, re_link) {
148 if (range1->re_lo != range2->re_lo ||
149 range1->re_hi != range2->re_hi ||
150 range1->re_stride != range2->re_stride)
152 range2 = list_next_entry(range2, re_link);
154 expr2 = list_next_entry(expr2, el_link);
161 lnet_udsp_nid_descr_equal(struct lnet_ud_nid_descr *e1,
162 struct lnet_ud_nid_descr *e2)
164 if (e1->ud_net_id.udn_net_type != e2->ud_net_id.udn_net_type ||
165 !lnet_udsp_expr_list_equal(&e1->ud_net_id.udn_net_num_range,
166 &e2->ud_net_id.udn_net_num_range) ||
167 !lnet_udsp_expr_list_equal(&e1->ud_addr_range, &e2->ud_addr_range))
174 lnet_udsp_action_equal(struct lnet_udsp *e1, struct lnet_udsp *e2)
176 if (e1->udsp_action_type != e2->udsp_action_type)
179 if (e1->udsp_action_type == EN_LNET_UDSP_ACTION_PRIORITY &&
180 e1->udsp_action.udsp_priority != e2->udsp_action.udsp_priority)
187 lnet_udsp_equal(struct lnet_udsp *e1, struct lnet_udsp *e2)
189 /* check each NID descr */
190 if (!lnet_udsp_nid_descr_equal(&e1->udsp_src, &e2->udsp_src) ||
191 !lnet_udsp_nid_descr_equal(&e1->udsp_dst, &e2->udsp_dst) ||
192 !lnet_udsp_nid_descr_equal(&e1->udsp_rte, &e2->udsp_rte))
198 /* it is enough to look at the net type of the descriptor. If the criteria
199 * is present the net must be specified
202 lnet_udsp_criteria_present(struct lnet_ud_nid_descr *descr)
204 return (descr->ud_net_id.udn_net_type != 0);
208 lnet_udsp_apply_rule_on_ni(struct udsp_info *udi)
211 struct lnet_ni *ni = udi->udi_ni;
212 struct lnet_ud_nid_descr *ni_match = udi->udi_match;
213 __u32 priority = (udi->udi_revert) ? -1 : udi->udi_priority;
215 rc = cfs_match_nid_net(
216 lnet_nid_to_nid4(&ni->ni_nid),
217 ni_match->ud_net_id.udn_net_type,
218 &ni_match->ud_net_id.udn_net_num_range,
219 &ni_match->ud_addr_range);
223 CDEBUG(D_NET, "apply udsp on ni %s\n",
224 libcfs_nidstr(&ni->ni_nid));
226 /* Detected match. Set NIDs priority */
227 lnet_ni_set_sel_priority_locked(ni, priority);
233 lnet_udsp_apply_rte_list_on_net(struct lnet_net *net,
234 struct lnet_ud_nid_descr *rte_action,
237 struct lnet_remotenet *rnet;
238 struct list_head *rn_list;
239 struct lnet_route *route;
240 struct lnet_peer_ni *lpni;
241 bool cleared = false;
242 lnet_nid_t gw_nid, gw_prim_nid;
246 for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
247 rn_list = &the_lnet.ln_remote_nets_hash[i];
248 list_for_each_entry(rnet, rn_list, lrn_list) {
249 list_for_each_entry(route, &rnet->lrn_routes, lr_list) {
250 /* look if gw nid on the same net matches */
251 gw_prim_nid = route->lr_gateway->lp_primary_nid;
253 while ((lpni = lnet_get_next_peer_ni_locked(route->lr_gateway,
256 if (!lnet_get_net_locked(lpni->lpni_peer_net->lpn_net_id))
258 gw_nid = lnet_nid_to_nid4(&lpni->lpni_nid);
259 rc = cfs_match_nid_net(gw_nid,
260 rte_action->ud_net_id.udn_net_type,
261 &rte_action->ud_net_id.udn_net_num_range,
262 &rte_action->ud_addr_range);
266 /* match gw primary nid on a remote network */
268 gw_nid = gw_prim_nid;
269 rc = cfs_match_nid_net(gw_nid,
270 rte_action->ud_net_id.udn_net_type,
271 &rte_action->ud_net_id.udn_net_num_range,
272 &rte_action->ud_addr_range);
276 lnet_net_unlock(LNET_LOCK_EX);
277 if (!cleared || revert) {
278 lnet_net_clr_pref_rtrs(net);
281 lnet_net_lock(LNET_LOCK_EX);
285 /* match. Add to pref NIDs */
286 CDEBUG(D_NET, "udsp net->gw: %s->%s\n",
287 libcfs_net2str(net->net_id),
288 libcfs_nid2str(gw_prim_nid));
289 rc = lnet_net_add_pref_rtr(net, gw_prim_nid);
290 lnet_net_lock(LNET_LOCK_EX);
291 /* success if EEXIST return */
292 if (rc && rc != -EEXIST) {
293 CERROR("Failed to add %s to %s pref rtr list\n",
294 libcfs_nid2str(gw_prim_nid),
295 libcfs_net2str(net->net_id));
306 lnet_udsp_apply_rte_rule_on_nets(struct udsp_info *udi)
309 int last_failure = 0;
310 struct lnet_net *net;
311 struct lnet_ud_nid_descr *match = udi->udi_match;
312 struct lnet_ud_nid_descr *rte_action = udi->udi_action;
314 list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
315 if (LNET_NETTYP(net->net_id) != match->ud_net_id.udn_net_type)
318 rc = cfs_match_net(net->net_id,
319 match->ud_net_id.udn_net_type,
320 &match->ud_net_id.udn_net_num_range);
324 CDEBUG(D_NET, "apply rule on %s\n",
325 libcfs_net2str(net->net_id));
326 rc = lnet_udsp_apply_rte_list_on_net(net, rte_action,
336 lnet_udsp_apply_rte_rule_on_net(struct udsp_info *udi)
339 struct lnet_net *net = udi->udi_net;
340 struct lnet_ud_nid_descr *match = udi->udi_match;
341 struct lnet_ud_nid_descr *rte_action = udi->udi_action;
343 rc = cfs_match_net(net->net_id,
344 match->ud_net_id.udn_net_type,
345 &match->ud_net_id.udn_net_num_range);
349 CDEBUG(D_NET, "apply rule on %s\n",
350 libcfs_net2str(net->net_id));
351 rc = lnet_udsp_apply_rte_list_on_net(net, rte_action,
358 lnet_udsp_apply_prio_rule_on_net(struct udsp_info *udi)
361 struct lnet_ud_nid_descr *match = udi->udi_match;
362 struct lnet_net *net = udi->udi_net;
363 __u32 priority = (udi->udi_revert) ? -1 : udi->udi_priority;
365 if (!lnet_udsp_is_net_rule(match))
366 return RULE_NOT_APPLICABLE;
368 rc = cfs_match_net(net->net_id,
369 match->ud_net_id.udn_net_type,
370 &match->ud_net_id.udn_net_num_range);
374 CDEBUG(D_NET, "apply rule on %s\n",
375 libcfs_net2str(net->net_id));
377 lnet_net_set_sel_priority_locked(net, priority);
383 lnet_udsp_apply_rule_on_nis(struct udsp_info *udi)
387 struct lnet_net *net;
388 struct lnet_ud_nid_descr *ni_match = udi->udi_match;
389 int last_failure = 0;
391 list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
392 if (LNET_NETTYP(net->net_id) != ni_match->ud_net_id.udn_net_type)
396 if (!lnet_udsp_apply_prio_rule_on_net(udi))
399 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
401 rc = lnet_udsp_apply_rule_on_ni(udi);
411 lnet_udsp_apply_rte_list_on_lpni(struct lnet_peer_ni *lpni,
412 struct lnet_ud_nid_descr *rte_action,
415 struct lnet_remotenet *rnet;
416 struct list_head *rn_list;
417 struct lnet_route *route;
418 bool cleared = false;
423 for (i = 0; i < LNET_REMOTE_NETS_HASH_SIZE; i++) {
424 rn_list = &the_lnet.ln_remote_nets_hash[i];
425 list_for_each_entry(rnet, rn_list, lrn_list) {
426 list_for_each_entry(route, &rnet->lrn_routes, lr_list) {
427 gw_nid = route->lr_gateway->lp_primary_nid;
428 rc = cfs_match_nid_net(gw_nid,
429 rte_action->ud_net_id.udn_net_type,
430 &rte_action->ud_net_id.udn_net_num_range,
431 &rte_action->ud_addr_range);
434 lnet_net_unlock(LNET_LOCK_EX);
435 if (!cleared || revert) {
436 CDEBUG(D_NET, "%spref rtr nids from lpni %s\n",
437 (revert) ? "revert " : "clear ",
438 libcfs_nidstr(&lpni->lpni_nid));
439 lnet_peer_clr_pref_rtrs(lpni);
442 lnet_net_lock(LNET_LOCK_EX);
446 CDEBUG(D_NET, "add gw nid %s as preferred for peer %s\n",
447 libcfs_nid2str(gw_nid),
448 libcfs_nidstr(&lpni->lpni_nid));
449 /* match. Add to pref NIDs */
450 rc = lnet_peer_add_pref_rtr(lpni, gw_nid);
451 lnet_net_lock(LNET_LOCK_EX);
452 /* success if EEXIST return */
453 if (rc && rc != -EEXIST) {
454 CERROR("Failed to add %s to %s pref rtr list\n",
455 libcfs_nid2str(gw_nid),
456 libcfs_nidstr(&lpni->lpni_nid));
467 lnet_udsp_apply_ni_list(struct lnet_peer_ni *lpni,
468 struct lnet_ud_nid_descr *ni_action,
473 struct lnet_net *net;
474 bool cleared = false;
476 list_for_each_entry(net, &the_lnet.ln_nets, net_list) {
477 if (LNET_NETTYP(net->net_id) != ni_action->ud_net_id.udn_net_type)
479 list_for_each_entry(ni, &net->net_ni_list, ni_netlist) {
480 rc = cfs_match_nid_net(
481 lnet_nid_to_nid4(&ni->ni_nid),
482 ni_action->ud_net_id.udn_net_type,
483 &ni_action->ud_net_id.udn_net_num_range,
484 &ni_action->ud_addr_range);
487 lnet_net_unlock(LNET_LOCK_EX);
488 if (!cleared || revert) {
489 lnet_peer_clr_pref_nids(lpni);
490 CDEBUG(D_NET, "%spref nids from lpni %s\n",
491 (revert) ? "revert " : "clear ",
492 libcfs_nidstr(&lpni->lpni_nid));
495 lnet_net_lock(LNET_LOCK_EX);
499 CDEBUG(D_NET, "add nid %s as preferred for peer %s\n",
500 libcfs_nidstr(&ni->ni_nid),
501 libcfs_nidstr(&lpni->lpni_nid));
502 /* match. Add to pref NIDs */
503 rc = lnet_peer_add_pref_nid(
504 lpni, lnet_nid_to_nid4(&ni->ni_nid));
505 lnet_net_lock(LNET_LOCK_EX);
506 /* success if EEXIST return */
507 if (rc && rc != -EEXIST) {
508 CERROR("Failed to add %s to %s pref nid list\n",
509 libcfs_nidstr(&ni->ni_nid),
510 libcfs_nidstr(&lpni->lpni_nid));
520 lnet_udsp_apply_rule_on_lpni(struct udsp_info *udi)
523 struct lnet_peer_ni *lpni = udi->udi_lpni;
524 struct lnet_ud_nid_descr *lp_match = udi->udi_match;
525 struct lnet_ud_nid_descr *action = udi->udi_action;
526 __u32 priority = (udi->udi_revert) ? -1 : udi->udi_priority;
527 bool local = udi->udi_local;
528 enum lnet_udsp_action_type type = udi->udi_type;
530 rc = cfs_match_nid_net(lnet_nid_to_nid4(&lpni->lpni_nid),
531 lp_match->ud_net_id.udn_net_type,
532 &lp_match->ud_net_id.udn_net_num_range,
533 &lp_match->ud_addr_range);
535 /* check if looking for a net match */
537 (lnet_get_list_len(&lp_match->ud_addr_range) ||
538 !cfs_match_net(udi->udi_lpn->lpn_net_id,
539 lp_match->ud_net_id.udn_net_type,
540 &lp_match->ud_net_id.udn_net_num_range))) {
544 if (type == EN_LNET_UDSP_ACTION_PREFERRED_LIST && local) {
545 rc = lnet_udsp_apply_ni_list(lpni, action,
549 } else if (type == EN_LNET_UDSP_ACTION_PREFERRED_LIST &&
551 rc = lnet_udsp_apply_rte_list_on_lpni(lpni, action,
556 lnet_peer_ni_set_selection_priority(lpni, priority);
563 lnet_udsp_apply_rule_on_lpn(struct udsp_info *udi)
566 struct lnet_ud_nid_descr *match = udi->udi_match;
567 struct lnet_peer_net *lpn = udi->udi_lpn;
568 __u32 priority = (udi->udi_revert) ? -1 : udi->udi_priority;
570 if (udi->udi_type == EN_LNET_UDSP_ACTION_PREFERRED_LIST ||
571 !lnet_udsp_is_net_rule(match))
572 return RULE_NOT_APPLICABLE;
574 rc = cfs_match_net(lpn->lpn_net_id,
575 match->ud_net_id.udn_net_type,
576 &match->ud_net_id.udn_net_num_range);
580 CDEBUG(D_NET, "apply rule on lpn %s\n",
581 libcfs_net2str(lpn->lpn_net_id));
582 lnet_peer_net_set_sel_priority_locked(lpn, priority);
588 lnet_udsp_apply_rule_on_lpnis(struct udsp_info *udi)
590 /* iterate over all the peers in the system and find if any of the
591 * peers match the criteria. If they do, clear the preferred list
592 * and add the new list
594 int lncpt = cfs_percpt_number(the_lnet.ln_peer_tables);
595 struct lnet_ud_nid_descr *lp_match = udi->udi_match;
596 struct lnet_peer_table *ptable;
597 struct lnet_peer_net *lpn;
598 struct lnet_peer_ni *lpni;
599 struct lnet_peer *lp;
600 int last_failure = 0;
604 for (cpt = 0; cpt < lncpt; cpt++) {
605 ptable = the_lnet.ln_peer_tables[cpt];
606 list_for_each_entry(lp, &ptable->pt_peer_list, lp_peer_list) {
607 CDEBUG(D_NET, "udsp examining lp %s\n",
608 libcfs_nid2str(lp->lp_primary_nid));
609 list_for_each_entry(lpn,
612 CDEBUG(D_NET, "udsp examining lpn %s\n",
613 libcfs_net2str(lpn->lpn_net_id));
615 if (LNET_NETTYP(lpn->lpn_net_id) !=
616 lp_match->ud_net_id.udn_net_type)
621 if (!lnet_udsp_apply_rule_on_lpn(udi))
624 list_for_each_entry(lpni,
627 CDEBUG(D_NET, "udsp examining lpni %s\n",
628 libcfs_nidstr(&lpni->lpni_nid));
629 udi->udi_lpni = lpni;
630 rc = lnet_udsp_apply_rule_on_lpni(udi);
642 lnet_udsp_apply_single_policy(struct lnet_udsp *udsp, struct udsp_info *udi,
643 udsp_apply_rule *cbs)
647 if (lnet_udsp_criteria_present(&udsp->udsp_dst) &&
648 lnet_udsp_criteria_present(&udsp->udsp_src)) {
650 if (!cbs[UDSP_APPLY_ON_PEERS])
653 if (udsp->udsp_action_type !=
654 EN_LNET_UDSP_ACTION_PREFERRED_LIST) {
655 CERROR("Bad action type. Expected %d got %d\n",
656 EN_LNET_UDSP_ACTION_PREFERRED_LIST,
657 udsp->udsp_action_type);
660 udi->udi_match = &udsp->udsp_dst;
661 udi->udi_action = &udsp->udsp_src;
662 udi->udi_type = EN_LNET_UDSP_ACTION_PREFERRED_LIST;
663 udi->udi_local = true;
665 CDEBUG(D_NET, "applying udsp (%p) dst->src\n",
667 rc = cbs[UDSP_APPLY_ON_PEERS](udi);
670 } else if (lnet_udsp_criteria_present(&udsp->udsp_dst) &&
671 lnet_udsp_criteria_present(&udsp->udsp_rte)) {
673 if (!cbs[UDSP_APPLY_ON_PEERS])
676 if (udsp->udsp_action_type !=
677 EN_LNET_UDSP_ACTION_PREFERRED_LIST) {
678 CERROR("Bad action type. Expected %d got %d\n",
679 EN_LNET_UDSP_ACTION_PREFERRED_LIST,
680 udsp->udsp_action_type);
684 if (lnet_udsp_criteria_present(&udsp->udsp_src)) {
685 CERROR("only one of src or dst can be specified\n");
688 udi->udi_match = &udsp->udsp_dst;
689 udi->udi_action = &udsp->udsp_rte;
690 udi->udi_type = EN_LNET_UDSP_ACTION_PREFERRED_LIST;
691 udi->udi_local = false;
693 CDEBUG(D_NET, "applying udsp (%p) dst->rte\n",
695 rc = cbs[UDSP_APPLY_ON_PEERS](udi);
698 } else if (lnet_udsp_criteria_present(&udsp->udsp_dst)) {
699 /* destination priority rule */
700 if (!cbs[UDSP_APPLY_ON_PEERS])
703 if (udsp->udsp_action_type !=
704 EN_LNET_UDSP_ACTION_PRIORITY) {
705 CERROR("Bad action type. Expected %d got %d\n",
706 EN_LNET_UDSP_ACTION_PRIORITY,
707 udsp->udsp_action_type);
710 udi->udi_match = &udsp->udsp_dst;
711 udi->udi_type = EN_LNET_UDSP_ACTION_PRIORITY;
712 if (udsp->udsp_action_type !=
713 EN_LNET_UDSP_ACTION_PRIORITY) {
714 udi->udi_priority = 0;
716 udi->udi_priority = udsp->udsp_action.udsp_priority;
718 udi->udi_local = true;
720 CDEBUG(D_NET, "applying udsp (%p) on destination\n",
722 rc = cbs[UDSP_APPLY_ON_PEERS](udi);
725 } else if (lnet_udsp_criteria_present(&udsp->udsp_src)) {
726 /* source priority rule */
727 if (!cbs[UDSP_APPLY_PRIO_ON_NIS])
730 if (udsp->udsp_action_type !=
731 EN_LNET_UDSP_ACTION_PRIORITY) {
732 CERROR("Bad action type. Expected %d got %d\n",
733 EN_LNET_UDSP_ACTION_PRIORITY,
734 udsp->udsp_action_type);
737 udi->udi_match = &udsp->udsp_src;
738 udi->udi_type = EN_LNET_UDSP_ACTION_PRIORITY;
739 if (udsp->udsp_action_type !=
740 EN_LNET_UDSP_ACTION_PRIORITY) {
741 udi->udi_priority = 0;
743 udi->udi_priority = udsp->udsp_action.udsp_priority;
745 udi->udi_local = true;
747 CDEBUG(D_NET, "applying udsp (%p) on source\n",
749 rc = cbs[UDSP_APPLY_PRIO_ON_NIS](udi);
751 CERROR("Bad UDSP policy\n");
759 lnet_udsp_apply_policies_helper(struct lnet_udsp *udsp, struct udsp_info *udi,
760 udsp_apply_rule *cbs)
763 int last_failure = 0;
766 return lnet_udsp_apply_single_policy(udsp, udi, cbs);
768 list_for_each_entry_reverse(udsp,
769 &the_lnet.ln_udsp_list,
771 rc = lnet_udsp_apply_single_policy(udsp, udi, cbs);
780 lnet_udsp_apply_policies_on_ni(struct lnet_ni *ni)
782 struct udsp_info udi;
783 udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
785 memset(&udi, 0, sizeof(udi));
789 cbs[UDSP_APPLY_PRIO_ON_NIS] = lnet_udsp_apply_rule_on_ni;
791 return lnet_udsp_apply_policies_helper(NULL, &udi, cbs);
795 lnet_udsp_apply_policies_on_net(struct lnet_net *net)
797 struct udsp_info udi;
798 udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
800 memset(&udi, 0, sizeof(udi));
804 cbs[UDSP_APPLY_PRIO_ON_NIS] = lnet_udsp_apply_prio_rule_on_net;
805 cbs[UDSP_APPLY_RTE_ON_NETS] = lnet_udsp_apply_rte_rule_on_net;
807 return lnet_udsp_apply_policies_helper(NULL, &udi, cbs);
811 lnet_udsp_apply_policies_on_lpni(struct lnet_peer_ni *lpni)
813 struct udsp_info udi;
814 udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
816 memset(&udi, 0, sizeof(udi));
820 cbs[UDSP_APPLY_ON_PEERS] = lnet_udsp_apply_rule_on_lpni;
822 return lnet_udsp_apply_policies_helper(NULL, &udi, cbs);
826 lnet_udsp_apply_policies_on_lpn(struct lnet_peer_net *lpn)
828 struct udsp_info udi;
829 udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
831 memset(&udi, 0, sizeof(udi));
835 cbs[UDSP_APPLY_ON_PEERS] = lnet_udsp_apply_rule_on_lpn;
837 return lnet_udsp_apply_policies_helper(NULL, &udi, cbs);
841 lnet_udsp_apply_policies(struct lnet_udsp *udsp, bool revert)
844 struct udsp_info udi;
845 udsp_apply_rule cbs[UDSP_APPLY_MAX_ENUM] = {NULL};
847 memset(&udi, 0, sizeof(udi));
849 cbs[UDSP_APPLY_ON_PEERS] = lnet_udsp_apply_rule_on_lpnis;
850 cbs[UDSP_APPLY_PRIO_ON_NIS] = lnet_udsp_apply_rule_on_nis;
851 cbs[UDSP_APPLY_RTE_ON_NETS] = lnet_udsp_apply_rte_rule_on_nets;
853 udi.udi_revert = revert;
855 lnet_net_lock(LNET_LOCK_EX);
856 rc = lnet_udsp_apply_policies_helper(udsp, &udi, cbs);
857 lnet_net_unlock(LNET_LOCK_EX);
863 lnet_udsp_get_policy(int idx)
866 struct lnet_udsp *udsp = NULL;
869 CDEBUG(D_NET, "Get UDSP at idx = %d\n", idx);
874 list_for_each_entry(udsp, &the_lnet.ln_udsp_list, udsp_on_list) {
875 CDEBUG(D_NET, "iterating over upsp %d:%d:%d\n",
876 udsp->udsp_idx, i, idx);
884 CDEBUG(D_NET, "Found UDSP (%p)\n", udsp);
893 lnet_udsp_add_policy(struct lnet_udsp *new, int idx)
895 struct lnet_udsp *udsp;
896 struct lnet_udsp *insert = NULL;
899 list_for_each_entry(udsp, &the_lnet.ln_udsp_list, udsp_on_list) {
900 CDEBUG(D_NET, "found udsp i = %d:%d, idx = %d\n",
901 i, udsp->udsp_idx, idx);
907 if (lnet_udsp_equal(udsp, new)) {
908 if (!lnet_udsp_action_equal(udsp, new) &&
909 udsp->udsp_action_type == EN_LNET_UDSP_ACTION_PRIORITY &&
910 new->udsp_action_type == EN_LNET_UDSP_ACTION_PRIORITY) {
911 udsp->udsp_action.udsp_priority = new->udsp_action.udsp_priority;
912 CDEBUG(D_NET, "udsp: %p index %d updated priority to %d\n",
915 udsp->udsp_action.udsp_priority);
923 list_add(&new->udsp_on_list, insert->udsp_on_list.prev);
925 list_for_each_entry(udsp,
926 &the_lnet.ln_udsp_list,
935 list_add_tail(&new->udsp_on_list, &the_lnet.ln_udsp_list);
939 CDEBUG(D_NET, "udsp: %p added at index %d\n", new, new->udsp_idx);
941 CDEBUG(D_NET, "udsp list:\n");
942 list_for_each_entry(udsp, &the_lnet.ln_udsp_list, udsp_on_list)
943 CDEBUG(D_NET, "udsp %p:%d\n", udsp, udsp->udsp_idx);
949 lnet_udsp_del_policy(int idx)
951 struct lnet_udsp *udsp;
952 struct lnet_udsp *tmp;
953 bool removed = false;
956 lnet_udsp_destroy(false);
960 CDEBUG(D_NET, "del udsp at idx = %d\n", idx);
962 list_for_each_entry_safe(udsp,
964 &the_lnet.ln_udsp_list,
968 if (udsp->udsp_idx == idx && !removed) {
969 list_del_init(&udsp->udsp_on_list);
970 lnet_udsp_apply_policies(udsp, true);
971 lnet_udsp_free(udsp);
980 lnet_udsp_get_ni_info(struct lnet_ioctl_construct_udsp_info *info,
983 struct lnet_nid_list *ne;
984 struct lnet_net *net = ni->ni_net;
989 info->cud_nid_priority = ni->ni_sel_priority;
991 info->cud_net_priority = ni->ni_net->net_sel_priority;
992 list_for_each_entry(ne, &net->net_rtr_pref_nids, nl_list) {
993 if (i < LNET_MAX_SHOW_NUM_NID)
994 info->cud_pref_rtr_nid[i] = ne->nl_nid;
1003 lnet_udsp_get_peer_info(struct lnet_ioctl_construct_udsp_info *info,
1004 struct lnet_peer_ni *lpni)
1006 struct lnet_nid_list *ne;
1009 /* peer tree structure needs to be in existence */
1010 LASSERT(lpni && lpni->lpni_peer_net &&
1011 lpni->lpni_peer_net->lpn_peer);
1013 info->cud_nid_priority = lpni->lpni_sel_priority;
1014 CDEBUG(D_NET, "lpni %s has %d pref nids\n",
1015 libcfs_nidstr(&lpni->lpni_nid),
1016 lpni->lpni_pref_nnids);
1017 if (lpni->lpni_pref_nnids == 1) {
1018 info->cud_pref_nid[0] = lpni->lpni_pref.nid;
1019 } else if (lpni->lpni_pref_nnids > 1) {
1020 struct list_head *list = &lpni->lpni_pref.nids;
1022 list_for_each_entry(ne, list, nl_list) {
1023 if (i < LNET_MAX_SHOW_NUM_NID)
1024 info->cud_pref_nid[i] = ne->nl_nid;
1032 list_for_each_entry(ne, &lpni->lpni_rtr_pref_nids, nl_list) {
1033 if (i < LNET_MAX_SHOW_NUM_NID)
1034 info->cud_pref_rtr_nid[i] = ne->nl_nid;
1040 info->cud_net_priority = lpni->lpni_peer_net->lpn_sel_priority;
1044 lnet_udsp_get_construct_info(struct lnet_ioctl_construct_udsp_info *info)
1047 struct lnet_peer_ni *lpni;
1050 if (!info->cud_peer) {
1051 ni = lnet_nid2ni_locked(info->cud_nid, 0);
1053 lnet_udsp_get_ni_info(info, ni);
1055 lpni = lnet_find_peer_ni_locked(info->cud_nid);
1057 CDEBUG(D_NET, "nid %s is not found\n",
1058 libcfs_nid2str(info->cud_nid));
1060 lnet_udsp_get_peer_info(info, lpni);
1061 lnet_peer_ni_decref_locked(lpni);
1068 lnet_udsp_alloc(void)
1070 struct lnet_udsp *udsp;
1072 udsp = kmem_cache_alloc(lnet_udsp_cachep, GFP_NOFS | __GFP_ZERO);
1077 INIT_LIST_HEAD(&udsp->udsp_on_list);
1078 INIT_LIST_HEAD(&udsp->udsp_src.ud_addr_range);
1079 INIT_LIST_HEAD(&udsp->udsp_src.ud_net_id.udn_net_num_range);
1080 INIT_LIST_HEAD(&udsp->udsp_dst.ud_addr_range);
1081 INIT_LIST_HEAD(&udsp->udsp_dst.ud_net_id.udn_net_num_range);
1082 INIT_LIST_HEAD(&udsp->udsp_rte.ud_addr_range);
1083 INIT_LIST_HEAD(&udsp->udsp_rte.ud_net_id.udn_net_num_range);
1085 CDEBUG(D_MALLOC, "udsp alloc %p\n", udsp);
1090 lnet_udsp_nid_descr_free(struct lnet_ud_nid_descr *nid_descr)
1092 struct list_head *net_range = &nid_descr->ud_net_id.udn_net_num_range;
1094 if (!lnet_udsp_criteria_present(nid_descr))
1097 /* memory management is a bit tricky here. When we allocate the
1098 * memory to store the NID descriptor we allocate a large buffer
1099 * for all the data, so we need to free the entire buffer at
1100 * once. If the net is present the net_range->next points to that
1101 * buffer otherwise if the ud_addr_range is present then it's the
1102 * ud_addr_range.next
1104 if (!list_empty(net_range))
1105 LIBCFS_FREE(net_range->next, nid_descr->ud_mem_size);
1106 else if (!list_empty(&nid_descr->ud_addr_range))
1107 LIBCFS_FREE(nid_descr->ud_addr_range.next,
1108 nid_descr->ud_mem_size);
1112 lnet_udsp_free(struct lnet_udsp *udsp)
1114 lnet_udsp_nid_descr_free(&udsp->udsp_src);
1115 lnet_udsp_nid_descr_free(&udsp->udsp_dst);
1116 lnet_udsp_nid_descr_free(&udsp->udsp_rte);
1118 CDEBUG(D_MALLOC, "udsp free %p\n", udsp);
1119 kmem_cache_free(lnet_udsp_cachep, udsp);
1123 lnet_udsp_destroy(bool shutdown)
1125 struct lnet_udsp *udsp, *tmp;
1127 CDEBUG(D_NET, "Destroying UDSPs in the system\n");
1129 list_for_each_entry_safe(udsp, tmp, &the_lnet.ln_udsp_list,
1131 list_del(&udsp->udsp_on_list);
1133 lnet_udsp_apply_policies(udsp, true);
1134 lnet_udsp_free(udsp);
1139 lnet_size_marshaled_nid_descr(struct lnet_ud_nid_descr *descr)
1141 struct cfs_expr_list *expr;
1143 int range_count = 0;
1144 size_t size = sizeof(struct lnet_ioctl_udsp_descr);
1146 if (!lnet_udsp_criteria_present(descr))
1149 /* we always have one net expression */
1150 if (!list_empty(&descr->ud_net_id.udn_net_num_range)) {
1151 expr = list_first_entry(&descr->ud_net_id.udn_net_num_range,
1152 struct cfs_expr_list, el_link);
1154 /* count the number of cfs_range_expr in the net expression */
1155 range_count = lnet_get_list_len(&expr->el_exprs);
1158 /* count the number of cfs_range_expr in the address expressions */
1159 list_for_each_entry(expr, &descr->ud_addr_range, el_link) {
1161 range_count += lnet_get_list_len(&expr->el_exprs);
1164 size += (sizeof(struct lnet_expressions) * expr_count);
1165 size += (sizeof(struct lnet_range_expr) * range_count);
1171 lnet_get_udsp_size(struct lnet_udsp *udsp)
1173 size_t size = sizeof(struct lnet_ioctl_udsp);
1175 size += lnet_size_marshaled_nid_descr(&udsp->udsp_src);
1176 size += lnet_size_marshaled_nid_descr(&udsp->udsp_dst);
1177 size += lnet_size_marshaled_nid_descr(&udsp->udsp_rte);
1179 CDEBUG(D_NET, "get udsp (%p) size: %d\n", udsp, (int)size);
1185 copy_exprs(struct cfs_expr_list *expr, void __user **bulk,
1188 struct cfs_range_expr *range;
1189 struct lnet_range_expr range_expr;
1191 /* copy over the net range expressions to the bulk */
1192 list_for_each_entry(range, &expr->el_exprs, re_link) {
1193 range_expr.re_lo = range->re_lo;
1194 range_expr.re_hi = range->re_hi;
1195 range_expr.re_stride = range->re_stride;
1196 CDEBUG(D_NET, "Copy Range %u:%u:%u\n",
1197 range_expr.re_lo, range_expr.re_hi,
1198 range_expr.re_stride);
1199 if (copy_to_user(*bulk, &range_expr, sizeof(range_expr))) {
1200 CDEBUG(D_NET, "Failed to copy range_expr\n");
1203 *bulk += sizeof(range_expr);
1204 *bulk_size -= sizeof(range_expr);
1211 copy_nid_range(struct lnet_ud_nid_descr *nid_descr, char *type,
1212 void __user **bulk, __u32 *bulk_size)
1214 struct lnet_ioctl_udsp_descr ioc_udsp_descr;
1215 struct cfs_expr_list *expr;
1216 struct lnet_expressions ioc_expr;
1221 memset(&ioc_udsp_descr, 0, sizeof(ioc_udsp_descr));
1222 ioc_udsp_descr.iud_src_hdr.ud_descr_type = *(__u32 *)type;
1224 /* if criteria not present, copy over the static part of the NID
1227 if (!lnet_udsp_criteria_present(nid_descr)) {
1228 CDEBUG(D_NET, "Descriptor %u:%u:%u:%u\n",
1229 ioc_udsp_descr.iud_src_hdr.ud_descr_type,
1230 ioc_udsp_descr.iud_src_hdr.ud_descr_count,
1231 ioc_udsp_descr.iud_net.ud_net_type,
1232 ioc_udsp_descr.iud_net.ud_net_num_expr.le_count);
1233 if (copy_to_user(*bulk, &ioc_udsp_descr,
1234 sizeof(ioc_udsp_descr))) {
1235 CDEBUG(D_NET, "failed to copy ioc_udsp_descr\n");
1238 *bulk += sizeof(ioc_udsp_descr);
1239 *bulk_size -= sizeof(ioc_udsp_descr);
1243 expr_count = lnet_get_list_len(&nid_descr->ud_addr_range);
1245 /* copy the net information */
1246 if (!list_empty(&nid_descr->ud_net_id.udn_net_num_range)) {
1247 expr = list_first_entry(&nid_descr->ud_net_id.udn_net_num_range,
1248 struct cfs_expr_list, el_link);
1249 net_expr_count = lnet_get_list_len(&expr->el_exprs);
1254 /* set the total expression count */
1255 ioc_udsp_descr.iud_src_hdr.ud_descr_count = expr_count;
1256 ioc_udsp_descr.iud_net.ud_net_type =
1257 nid_descr->ud_net_id.udn_net_type;
1258 ioc_udsp_descr.iud_net.ud_net_num_expr.le_count = net_expr_count;
1260 CDEBUG(D_NET, "Descriptor %u:%u:%u:%u\n",
1261 ioc_udsp_descr.iud_src_hdr.ud_descr_type,
1262 ioc_udsp_descr.iud_src_hdr.ud_descr_count,
1263 ioc_udsp_descr.iud_net.ud_net_type,
1264 ioc_udsp_descr.iud_net.ud_net_num_expr.le_count);
1266 /* copy over the header info to the bulk */
1267 if (copy_to_user(*bulk, &ioc_udsp_descr, sizeof(ioc_udsp_descr))) {
1268 CDEBUG(D_NET, "Failed to copy data\n");
1271 *bulk += sizeof(ioc_udsp_descr);
1272 *bulk_size -= sizeof(ioc_udsp_descr);
1274 /* copy over the net num expression if it exists */
1275 if (net_expr_count) {
1276 rc = copy_exprs(expr, bulk, bulk_size);
1281 /* copy the address range */
1282 list_for_each_entry(expr, &nid_descr->ud_addr_range, el_link) {
1283 ioc_expr.le_count = lnet_get_list_len(&expr->el_exprs);
1284 if (copy_to_user(*bulk, &ioc_expr, sizeof(ioc_expr))) {
1285 CDEBUG(D_NET, "failex to copy ioc_expr\n");
1288 *bulk += sizeof(ioc_expr);
1289 *bulk_size -= sizeof(ioc_expr);
1291 rc = copy_exprs(expr, bulk, bulk_size);
1300 lnet_udsp_marshal(struct lnet_udsp *udsp, struct lnet_ioctl_udsp *ioc_udsp)
1309 bulk = ioc_udsp->iou_bulk;
1310 bulk_size = ioc_udsp->iou_hdr.ioc_len +
1311 ioc_udsp->iou_bulk_size;
1313 CDEBUG(D_NET, "marshal udsp (%p)\n", udsp);
1314 CDEBUG(D_NET, "MEM -----> bulk: %p:0x%x\n", bulk, bulk_size);
1315 /* make sure user space allocated enough buffer to marshal the
1318 if (bulk_size != lnet_get_udsp_size(udsp)) {
1323 ioc_udsp->iou_idx = udsp->udsp_idx;
1324 ioc_udsp->iou_action_type = udsp->udsp_action_type;
1325 ioc_udsp->iou_action.priority = udsp->udsp_action.udsp_priority;
1327 bulk_size -= sizeof(*ioc_udsp);
1329 rc = copy_nid_range(&udsp->udsp_src, "SRC", &bulk, &bulk_size);
1333 rc = copy_nid_range(&udsp->udsp_dst, "DST", &bulk, &bulk_size);
1337 rc = copy_nid_range(&udsp->udsp_rte, "RTE", &bulk, &bulk_size);
1341 CDEBUG(D_NET, "MEM <----- bulk: %p\n", bulk);
1343 /* we should've consumed the entire buffer */
1344 LASSERT(bulk_size == 0);
1348 CERROR("Failed to marshal udsp: %d\n", rc);
1353 copy_range_info(void **bulk, void **buf, struct list_head *list,
1356 struct lnet_range_expr *range_expr;
1357 struct cfs_range_expr *range;
1358 struct cfs_expr_list *exprs;
1359 int range_count = count;
1362 if (range_count == 0)
1365 if (range_count == -1) {
1366 struct lnet_expressions *e;
1369 range_count = e->le_count;
1370 *bulk += sizeof(*e);
1374 INIT_LIST_HEAD(&exprs->el_link);
1375 INIT_LIST_HEAD(&exprs->el_exprs);
1376 list_add_tail(&exprs->el_link, list);
1377 *buf += sizeof(*exprs);
1379 for (i = 0; i < range_count; i++) {
1382 INIT_LIST_HEAD(&range->re_link);
1383 range->re_lo = range_expr->re_lo;
1384 range->re_hi = range_expr->re_hi;
1385 range->re_stride = range_expr->re_stride;
1386 CDEBUG(D_NET, "Copy Range %u:%u:%u\n",
1390 list_add_tail(&range->re_link, &exprs->el_exprs);
1391 *bulk += sizeof(*range_expr);
1392 *buf += sizeof(*range);
1397 copy_ioc_udsp_descr(struct lnet_ud_nid_descr *nid_descr, char *type,
1398 void **bulk, __u32 *bulk_size)
1400 struct lnet_ioctl_udsp_descr *ioc_nid = *bulk;
1401 struct lnet_expressions *exprs;
1404 int range_count = 0;
1407 int remaining_size = *bulk_size;
1411 size_t range_expr_s = sizeof(struct lnet_range_expr);
1412 size_t lnet_exprs_s = sizeof(struct lnet_expressions);
1414 CDEBUG(D_NET, "%s: bulk = %p:%u\n", type, *bulk, *bulk_size);
1416 /* criteria not present, skip over the static part of the
1417 * bulk, which is included for each NID descriptor
1419 if (ioc_nid->iud_net.ud_net_type == 0) {
1420 remaining_size -= sizeof(*ioc_nid);
1421 if (remaining_size < 0) {
1422 CERROR("Truncated userspace udsp buffer given\n");
1425 *bulk += sizeof(*ioc_nid);
1426 *bulk_size = remaining_size;
1430 descr_type = ioc_nid->iud_src_hdr.ud_descr_type;
1431 if (descr_type != *(__u32 *)type) {
1432 CERROR("Bad NID descriptor type. Expected %s, given %c%c%c\n",
1433 type, (__u8)descr_type, (__u8)(descr_type << 4),
1434 (__u8)(descr_type << 8));
1438 /* calculate the total size to verify we have enough buffer.
1439 * Start of by finding how many ranges there are for the net
1442 range_count = ioc_nid->iud_net.ud_net_num_expr.le_count;
1443 size = sizeof(*ioc_nid) + (range_count * range_expr_s);
1444 remaining_size -= size;
1445 if (remaining_size < 0) {
1446 CERROR("Truncated userspace udsp buffer given\n");
1450 CDEBUG(D_NET, "Total net num ranges in %s: %d:%u\n", type,
1452 /* the number of expressions for the NID. IE 4 for IP, 1 for GNI */
1453 expr_count = ioc_nid->iud_src_hdr.ud_descr_count;
1454 CDEBUG(D_NET, "addr as %d exprs\n", expr_count);
1455 /* point tmp to the beginning of the NID expressions */
1457 for (i = 0; i < expr_count; i++) {
1458 /* get the number of ranges per expression */
1460 range_count += exprs->le_count;
1461 size = (range_expr_s * exprs->le_count) + lnet_exprs_s;
1462 remaining_size -= size;
1463 CDEBUG(D_NET, "expr %d:%d:%u:%d:%d\n", i, exprs->le_count,
1464 size, remaining_size, range_count);
1465 if (remaining_size < 0) {
1466 CERROR("Truncated userspace udsp buffer given\n");
1472 *bulk_size = remaining_size;
1474 /* copy over the net type */
1475 nid_descr->ud_net_id.udn_net_type = ioc_nid->iud_net.ud_net_type;
1477 CDEBUG(D_NET, "%u\n", nid_descr->ud_net_id.udn_net_type);
1479 /* allocate the total memory required to copy this NID descriptor */
1480 alloc_size = (sizeof(struct cfs_expr_list) * (expr_count + 1)) +
1481 (sizeof(struct cfs_range_expr) * (range_count));
1482 LIBCFS_ALLOC(buf, alloc_size);
1486 /* store the amount of memory allocated so we can free it later on */
1487 nid_descr->ud_mem_size = alloc_size;
1489 /* copy over the net number range */
1490 range_count = ioc_nid->iud_net.ud_net_num_expr.le_count;
1491 *bulk += sizeof(*ioc_nid);
1492 CDEBUG(D_NET, "bulk = %p\n", *bulk);
1493 copy_range_info(bulk, &buf, &nid_descr->ud_net_id.udn_net_num_range,
1495 CDEBUG(D_NET, "bulk = %p\n", *bulk);
1497 /* copy over the NID descriptor */
1498 for (i = 0; i < expr_count; i++) {
1499 copy_range_info(bulk, &buf, &nid_descr->ud_addr_range, -1);
1500 CDEBUG(D_NET, "bulk = %p\n", *bulk);
1507 lnet_udsp_demarshal_add(void *bulk, __u32 bulk_size)
1509 struct lnet_ioctl_udsp *ioc_udsp;
1510 struct lnet_udsp *udsp;
1514 if (bulk_size < sizeof(*ioc_udsp))
1517 udsp = lnet_udsp_alloc();
1523 udsp->udsp_action_type = ioc_udsp->iou_action_type;
1524 udsp->udsp_action.udsp_priority = ioc_udsp->iou_action.priority;
1525 idx = ioc_udsp->iou_idx;
1527 CDEBUG(D_NET, "demarshal descr %u:%u:%d:%u\n", udsp->udsp_action_type,
1528 udsp->udsp_action.udsp_priority, idx, bulk_size);
1530 bulk += sizeof(*ioc_udsp);
1531 bulk_size -= sizeof(*ioc_udsp);
1533 rc = copy_ioc_udsp_descr(&udsp->udsp_src, "SRC", &bulk, &bulk_size);
1537 rc = copy_ioc_udsp_descr(&udsp->udsp_dst, "DST", &bulk, &bulk_size);
1541 rc = copy_ioc_udsp_descr(&udsp->udsp_rte, "RTE", &bulk, &bulk_size);
1545 return lnet_udsp_add_policy(udsp, idx);
1548 lnet_udsp_free(udsp);