Whamcloud - gitweb
LU-8726 osd-ldiskfs: bypass read for benchmarking
[fs/lustre-release.git] / lustre / ptlrpc / nrs_tbf.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License version 2 for more details.  A copy is
14  * included in the COPYING file that accompanied this code.
15
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (C) 2013 DataDirect Networks, Inc.
24  *
25  * Copyright (c) 2014, 2016, Intel Corporation.
26  */
27 /*
28  * lustre/ptlrpc/nrs_tbf.c
29  *
30  * Network Request Scheduler (NRS) Token Bucket Filter(TBF) policy
31  *
32  */
33
34 #ifdef HAVE_SERVER_SUPPORT
35
36 /**
37  * \addtogoup nrs
38  * @{
39  */
40
41 #define DEBUG_SUBSYSTEM S_RPC
42 #include <obd_support.h>
43 #include <obd_class.h>
44 #include <libcfs/libcfs.h>
45 #include "ptlrpc_internal.h"
46
47 /**
48  * \name tbf
49  *
50  * Token Bucket Filter over client NIDs
51  *
52  * @{
53  */
54
55 #define NRS_POL_NAME_TBF        "tbf"
56
57 static int tbf_jobid_cache_size = 8192;
58 module_param(tbf_jobid_cache_size, int, 0644);
59 MODULE_PARM_DESC(tbf_jobid_cache_size, "The size of jobid cache");
60
61 static int tbf_rate = 10000;
62 module_param(tbf_rate, int, 0644);
63 MODULE_PARM_DESC(tbf_rate, "Default rate limit in RPCs/s");
64
65 static int tbf_depth = 3;
66 module_param(tbf_depth, int, 0644);
67 MODULE_PARM_DESC(tbf_depth, "How many tokens that a client can save up");
68
69 static enum hrtimer_restart nrs_tbf_timer_cb(struct hrtimer *timer)
70 {
71         struct nrs_tbf_head *head = container_of(timer, struct nrs_tbf_head,
72                                                  th_timer);
73         struct ptlrpc_nrs   *nrs = head->th_res.res_policy->pol_nrs;
74         struct ptlrpc_service_part *svcpt = nrs->nrs_svcpt;
75
76         nrs->nrs_throttling = 0;
77         wake_up(&svcpt->scp_waitq);
78
79         return HRTIMER_NORESTART;
80 }
81
82 #define NRS_TBF_DEFAULT_RULE "default"
83
84 static void nrs_tbf_rule_fini(struct nrs_tbf_rule *rule)
85 {
86         LASSERT(atomic_read(&rule->tr_ref) == 0);
87         LASSERT(list_empty(&rule->tr_cli_list));
88         LASSERT(list_empty(&rule->tr_linkage));
89
90         rule->tr_head->th_ops->o_rule_fini(rule);
91         OBD_FREE_PTR(rule);
92 }
93
94 /**
95  * Decreases the rule's usage reference count, and stops the rule in case it
96  * was already stopping and have no more outstanding usage references (which
97  * indicates it has no more queued or started requests, and can be safely
98  * stopped).
99  */
100 static void nrs_tbf_rule_put(struct nrs_tbf_rule *rule)
101 {
102         if (atomic_dec_and_test(&rule->tr_ref))
103                 nrs_tbf_rule_fini(rule);
104 }
105
106 /**
107  * Increases the rule's usage reference count.
108  */
109 static inline void nrs_tbf_rule_get(struct nrs_tbf_rule *rule)
110 {
111         atomic_inc(&rule->tr_ref);
112 }
113
114 static void
115 nrs_tbf_cli_rule_put(struct nrs_tbf_client *cli)
116 {
117         LASSERT(!list_empty(&cli->tc_linkage));
118         LASSERT(cli->tc_rule);
119         spin_lock(&cli->tc_rule->tr_rule_lock);
120         list_del_init(&cli->tc_linkage);
121         spin_unlock(&cli->tc_rule->tr_rule_lock);
122         nrs_tbf_rule_put(cli->tc_rule);
123         cli->tc_rule = NULL;
124 }
125
126 static void
127 nrs_tbf_cli_reset_value(struct nrs_tbf_head *head,
128                         struct nrs_tbf_client *cli)
129
130 {
131         struct nrs_tbf_rule *rule = cli->tc_rule;
132
133         cli->tc_rpc_rate = rule->tr_rpc_rate;
134         cli->tc_nsecs = rule->tr_nsecs;
135         cli->tc_depth = rule->tr_depth;
136         cli->tc_ntoken = rule->tr_depth;
137         cli->tc_check_time = ktime_to_ns(ktime_get());
138         cli->tc_rule_sequence = atomic_read(&head->th_rule_sequence);
139         cli->tc_rule_generation = rule->tr_generation;
140
141         if (cli->tc_in_heap)
142                 cfs_binheap_relocate(head->th_binheap,
143                                      &cli->tc_node);
144 }
145
146 static void
147 nrs_tbf_cli_reset(struct nrs_tbf_head *head,
148                   struct nrs_tbf_rule *rule,
149                   struct nrs_tbf_client *cli)
150 {
151         spin_lock(&cli->tc_rule_lock);
152         if (cli->tc_rule != NULL && !list_empty(&cli->tc_linkage)) {
153                 LASSERT(rule != cli->tc_rule);
154                 nrs_tbf_cli_rule_put(cli);
155         }
156         LASSERT(cli->tc_rule == NULL);
157         LASSERT(list_empty(&cli->tc_linkage));
158         /* Rule's ref is added before called */
159         cli->tc_rule = rule;
160         spin_lock(&rule->tr_rule_lock);
161         list_add_tail(&cli->tc_linkage, &rule->tr_cli_list);
162         spin_unlock(&rule->tr_rule_lock);
163         spin_unlock(&cli->tc_rule_lock);
164         nrs_tbf_cli_reset_value(head, cli);
165 }
166
167 static int
168 nrs_tbf_rule_dump(struct nrs_tbf_rule *rule, struct seq_file *m)
169 {
170         return rule->tr_head->th_ops->o_rule_dump(rule, m);
171 }
172
173 static int
174 nrs_tbf_rule_dump_all(struct nrs_tbf_head *head, struct seq_file *m)
175 {
176         struct nrs_tbf_rule *rule;
177         int rc = 0;
178
179         LASSERT(head != NULL);
180         spin_lock(&head->th_rule_lock);
181         /* List the rules from newest to oldest */
182         list_for_each_entry(rule, &head->th_list, tr_linkage) {
183                 LASSERT((rule->tr_flags & NTRS_STOPPING) == 0);
184                 rc = nrs_tbf_rule_dump(rule, m);
185                 if (rc) {
186                         rc = -ENOSPC;
187                         break;
188                 }
189         }
190         spin_unlock(&head->th_rule_lock);
191
192         return rc;
193 }
194
195 static struct nrs_tbf_rule *
196 nrs_tbf_rule_find_nolock(struct nrs_tbf_head *head,
197                          const char *name)
198 {
199         struct nrs_tbf_rule *rule;
200
201         LASSERT(head != NULL);
202         list_for_each_entry(rule, &head->th_list, tr_linkage) {
203                 LASSERT((rule->tr_flags & NTRS_STOPPING) == 0);
204                 if (strcmp(rule->tr_name, name) == 0) {
205                         nrs_tbf_rule_get(rule);
206                         return rule;
207                 }
208         }
209         return NULL;
210 }
211
212 static struct nrs_tbf_rule *
213 nrs_tbf_rule_find(struct nrs_tbf_head *head,
214                   const char *name)
215 {
216         struct nrs_tbf_rule *rule;
217
218         LASSERT(head != NULL);
219         spin_lock(&head->th_rule_lock);
220         rule = nrs_tbf_rule_find_nolock(head, name);
221         spin_unlock(&head->th_rule_lock);
222         return rule;
223 }
224
225 static struct nrs_tbf_rule *
226 nrs_tbf_rule_match(struct nrs_tbf_head *head,
227                    struct nrs_tbf_client *cli)
228 {
229         struct nrs_tbf_rule *rule = NULL;
230         struct nrs_tbf_rule *tmp_rule;
231
232         spin_lock(&head->th_rule_lock);
233         /* Match the newest rule in the list */
234         list_for_each_entry(tmp_rule, &head->th_list, tr_linkage) {
235                 LASSERT((tmp_rule->tr_flags & NTRS_STOPPING) == 0);
236                 if (head->th_ops->o_rule_match(tmp_rule, cli)) {
237                         rule = tmp_rule;
238                         break;
239                 }
240         }
241
242         if (rule == NULL)
243                 rule = head->th_rule;
244
245         nrs_tbf_rule_get(rule);
246         spin_unlock(&head->th_rule_lock);
247         return rule;
248 }
249
250 static void
251 nrs_tbf_cli_init(struct nrs_tbf_head *head,
252                  struct nrs_tbf_client *cli,
253                  struct ptlrpc_request *req)
254 {
255         struct nrs_tbf_rule *rule;
256
257         memset(cli, 0, sizeof(*cli));
258         cli->tc_in_heap = false;
259         head->th_ops->o_cli_init(cli, req);
260         INIT_LIST_HEAD(&cli->tc_list);
261         INIT_LIST_HEAD(&cli->tc_linkage);
262         spin_lock_init(&cli->tc_rule_lock);
263         atomic_set(&cli->tc_ref, 1);
264         rule = nrs_tbf_rule_match(head, cli);
265         nrs_tbf_cli_reset(head, rule, cli);
266 }
267
268 static void
269 nrs_tbf_cli_fini(struct nrs_tbf_client *cli)
270 {
271         LASSERT(list_empty(&cli->tc_list));
272         LASSERT(!cli->tc_in_heap);
273         LASSERT(atomic_read(&cli->tc_ref) == 0);
274         spin_lock(&cli->tc_rule_lock);
275         nrs_tbf_cli_rule_put(cli);
276         spin_unlock(&cli->tc_rule_lock);
277         OBD_FREE_PTR(cli);
278 }
279
280 static int
281 nrs_tbf_rule_start(struct ptlrpc_nrs_policy *policy,
282                    struct nrs_tbf_head *head,
283                    struct nrs_tbf_cmd *start)
284 {
285         struct nrs_tbf_rule     *rule;
286         struct nrs_tbf_rule     *tmp_rule;
287         struct nrs_tbf_rule     *next_rule;
288         char                    *next_name = start->u.tc_start.ts_next_name;
289         int                      rc;
290
291         rule = nrs_tbf_rule_find(head, start->tc_name);
292         if (rule) {
293                 nrs_tbf_rule_put(rule);
294                 return -EEXIST;
295         }
296
297         OBD_CPT_ALLOC_PTR(rule, nrs_pol2cptab(policy), nrs_pol2cptid(policy));
298         if (rule == NULL)
299                 return -ENOMEM;
300
301         memcpy(rule->tr_name, start->tc_name, strlen(start->tc_name));
302         rule->tr_rpc_rate = start->u.tc_start.ts_rpc_rate;
303         rule->tr_nsecs = NSEC_PER_SEC;
304         do_div(rule->tr_nsecs, rule->tr_rpc_rate);
305         rule->tr_depth = tbf_depth;
306         atomic_set(&rule->tr_ref, 1);
307         INIT_LIST_HEAD(&rule->tr_cli_list);
308         INIT_LIST_HEAD(&rule->tr_nids);
309         INIT_LIST_HEAD(&rule->tr_linkage);
310         spin_lock_init(&rule->tr_rule_lock);
311         rule->tr_head = head;
312
313         rc = head->th_ops->o_rule_init(policy, rule, start);
314         if (rc) {
315                 OBD_FREE_PTR(rule);
316                 return rc;
317         }
318
319         /* Add as the newest rule */
320         spin_lock(&head->th_rule_lock);
321         tmp_rule = nrs_tbf_rule_find_nolock(head, start->tc_name);
322         if (tmp_rule) {
323                 spin_unlock(&head->th_rule_lock);
324                 nrs_tbf_rule_put(tmp_rule);
325                 nrs_tbf_rule_put(rule);
326                 return -EEXIST;
327         }
328
329         if (next_name) {
330                 next_rule = nrs_tbf_rule_find_nolock(head, next_name);
331                 if (!next_rule) {
332                         spin_unlock(&head->th_rule_lock);
333                         nrs_tbf_rule_put(rule);
334                         return -ENOENT;
335                 }
336
337                 list_add(&rule->tr_linkage, next_rule->tr_linkage.prev);
338                 nrs_tbf_rule_put(next_rule);
339         } else {
340                 /* Add on the top of the rule list */
341                 list_add(&rule->tr_linkage, &head->th_list);
342         }
343         spin_unlock(&head->th_rule_lock);
344         atomic_inc(&head->th_rule_sequence);
345         if (start->u.tc_start.ts_rule_flags & NTRS_DEFAULT) {
346                 rule->tr_flags |= NTRS_DEFAULT;
347                 LASSERT(head->th_rule == NULL);
348                 head->th_rule = rule;
349         }
350
351         return 0;
352 }
353
354 /**
355  * Change the rank of a rule in the rule list
356  *
357  * The matched rule will be moved to the position right before another
358  * given rule.
359  *
360  * \param[in] policy    the policy instance
361  * \param[in] head      the TBF policy instance
362  * \param[in] name      the rule name to be moved
363  * \param[in] next_name the rule name before which the matched rule will be
364  *                      moved
365  *
366  */
367 static int
368 nrs_tbf_rule_change_rank(struct ptlrpc_nrs_policy *policy,
369                          struct nrs_tbf_head *head,
370                          char *name,
371                          char *next_name)
372 {
373         struct nrs_tbf_rule     *rule = NULL;
374         struct nrs_tbf_rule     *next_rule = NULL;
375         int                      rc = 0;
376
377         LASSERT(head != NULL);
378
379         spin_lock(&head->th_rule_lock);
380         rule = nrs_tbf_rule_find_nolock(head, name);
381         if (!rule)
382                 GOTO(out, rc = -ENOENT);
383
384         if (strcmp(name, next_name) == 0)
385                 GOTO(out_put, rc);
386
387         next_rule = nrs_tbf_rule_find_nolock(head, next_name);
388         if (!next_rule)
389                 GOTO(out_put, rc = -ENOENT);
390
391         list_move(&rule->tr_linkage, next_rule->tr_linkage.prev);
392         nrs_tbf_rule_put(next_rule);
393 out_put:
394         nrs_tbf_rule_put(rule);
395 out:
396         spin_unlock(&head->th_rule_lock);
397         return rc;
398 }
399
400 static int
401 nrs_tbf_rule_change_rate(struct ptlrpc_nrs_policy *policy,
402                          struct nrs_tbf_head *head,
403                          char *name,
404                          __u64 rate)
405 {
406         struct nrs_tbf_rule *rule;
407
408         assert_spin_locked(&policy->pol_nrs->nrs_lock);
409
410         rule = nrs_tbf_rule_find(head, name);
411         if (rule == NULL)
412                 return -ENOENT;
413
414         rule->tr_rpc_rate = rate;
415         rule->tr_nsecs = NSEC_PER_SEC;
416         do_div(rule->tr_nsecs, rule->tr_rpc_rate);
417         rule->tr_generation++;
418         nrs_tbf_rule_put(rule);
419
420         return 0;
421 }
422
423 static int
424 nrs_tbf_rule_change(struct ptlrpc_nrs_policy *policy,
425                     struct nrs_tbf_head *head,
426                     struct nrs_tbf_cmd *change)
427 {
428         __u64    rate = change->u.tc_change.tc_rpc_rate;
429         char    *next_name = change->u.tc_change.tc_next_name;
430         int      rc;
431
432         if (rate != 0) {
433                 rc = nrs_tbf_rule_change_rate(policy, head, change->tc_name,
434                                               rate);
435                 if (rc)
436                         return rc;
437         }
438
439         if (next_name) {
440                 rc = nrs_tbf_rule_change_rank(policy, head, change->tc_name,
441                                               next_name);
442                 if (rc)
443                         return rc;
444         }
445
446         return 0;
447 }
448
449 static int
450 nrs_tbf_rule_stop(struct ptlrpc_nrs_policy *policy,
451                   struct nrs_tbf_head *head,
452                   struct nrs_tbf_cmd *stop)
453 {
454         struct nrs_tbf_rule *rule;
455
456         assert_spin_locked(&policy->pol_nrs->nrs_lock);
457
458         if (strcmp(stop->tc_name, NRS_TBF_DEFAULT_RULE) == 0)
459                 return -EPERM;
460
461         rule = nrs_tbf_rule_find(head, stop->tc_name);
462         if (rule == NULL)
463                 return -ENOENT;
464
465         list_del_init(&rule->tr_linkage);
466         rule->tr_flags |= NTRS_STOPPING;
467         nrs_tbf_rule_put(rule);
468         nrs_tbf_rule_put(rule);
469
470         return 0;
471 }
472
473 static int
474 nrs_tbf_command(struct ptlrpc_nrs_policy *policy,
475                 struct nrs_tbf_head *head,
476                 struct nrs_tbf_cmd *cmd)
477 {
478         int rc;
479
480         assert_spin_locked(&policy->pol_nrs->nrs_lock);
481
482         switch (cmd->tc_cmd) {
483         case NRS_CTL_TBF_START_RULE:
484                 if (cmd->u.tc_start.ts_valid_type != head->th_type_flag)
485                         return -EINVAL;
486
487                 spin_unlock(&policy->pol_nrs->nrs_lock);
488                 rc = nrs_tbf_rule_start(policy, head, cmd);
489                 spin_lock(&policy->pol_nrs->nrs_lock);
490                 return rc;
491         case NRS_CTL_TBF_CHANGE_RULE:
492                 rc = nrs_tbf_rule_change(policy, head, cmd);
493                 return rc;
494         case NRS_CTL_TBF_STOP_RULE:
495                 rc = nrs_tbf_rule_stop(policy, head, cmd);
496                 /* Take it as a success, if not exists at all */
497                 return rc == -ENOENT ? 0 : rc;
498         default:
499                 return -EFAULT;
500         }
501 }
502
503 /**
504  * Binary heap predicate.
505  *
506  * \param[in] e1 the first binheap node to compare
507  * \param[in] e2 the second binheap node to compare
508  *
509  * \retval 0 e1 > e2
510  * \retval 1 e1 < e2
511  */
512 static int
513 tbf_cli_compare(struct cfs_binheap_node *e1, struct cfs_binheap_node *e2)
514 {
515         struct nrs_tbf_client *cli1;
516         struct nrs_tbf_client *cli2;
517
518         cli1 = container_of(e1, struct nrs_tbf_client, tc_node);
519         cli2 = container_of(e2, struct nrs_tbf_client, tc_node);
520
521         if (cli1->tc_check_time + cli1->tc_nsecs <
522             cli2->tc_check_time + cli2->tc_nsecs)
523                 return 1;
524         else if (cli1->tc_check_time + cli1->tc_nsecs >
525                  cli2->tc_check_time + cli2->tc_nsecs)
526                 return 0;
527
528         if (cli1->tc_check_time < cli2->tc_check_time)
529                 return 1;
530         else if (cli1->tc_check_time > cli2->tc_check_time)
531                 return 0;
532
533         /* Maybe need more comparasion, e.g. request number in the rules */
534         return 1;
535 }
536
537 /**
538  * TBF binary heap operations
539  */
540 static struct cfs_binheap_ops nrs_tbf_heap_ops = {
541         .hop_enter      = NULL,
542         .hop_exit       = NULL,
543         .hop_compare    = tbf_cli_compare,
544 };
545
546 static unsigned nrs_tbf_jobid_hop_hash(struct cfs_hash *hs, const void *key,
547                                   unsigned mask)
548 {
549         return cfs_hash_djb2_hash(key, strlen(key), mask);
550 }
551
552 static int nrs_tbf_jobid_hop_keycmp(const void *key, struct hlist_node *hnode)
553 {
554         struct nrs_tbf_client *cli = hlist_entry(hnode,
555                                                      struct nrs_tbf_client,
556                                                      tc_hnode);
557
558         return (strcmp(cli->tc_jobid, key) == 0);
559 }
560
561 static void *nrs_tbf_jobid_hop_key(struct hlist_node *hnode)
562 {
563         struct nrs_tbf_client *cli = hlist_entry(hnode,
564                                                      struct nrs_tbf_client,
565                                                      tc_hnode);
566
567         return cli->tc_jobid;
568 }
569
570 static void *nrs_tbf_jobid_hop_object(struct hlist_node *hnode)
571 {
572         return hlist_entry(hnode, struct nrs_tbf_client, tc_hnode);
573 }
574
575 static void nrs_tbf_jobid_hop_get(struct cfs_hash *hs, struct hlist_node *hnode)
576 {
577         struct nrs_tbf_client *cli = hlist_entry(hnode,
578                                                      struct nrs_tbf_client,
579                                                      tc_hnode);
580
581         atomic_inc(&cli->tc_ref);
582 }
583
584 static void nrs_tbf_jobid_hop_put(struct cfs_hash *hs, struct hlist_node *hnode)
585 {
586         struct nrs_tbf_client *cli = hlist_entry(hnode,
587                                                      struct nrs_tbf_client,
588                                                      tc_hnode);
589
590         atomic_dec(&cli->tc_ref);
591 }
592
593 static void
594 nrs_tbf_jobid_hop_exit(struct cfs_hash *hs, struct hlist_node *hnode)
595
596 {
597         struct nrs_tbf_client *cli = hlist_entry(hnode,
598                                                  struct nrs_tbf_client,
599                                                  tc_hnode);
600
601         LASSERT(atomic_read(&cli->tc_ref) == 0);
602         nrs_tbf_cli_fini(cli);
603 }
604
605 static struct cfs_hash_ops nrs_tbf_jobid_hash_ops = {
606         .hs_hash        = nrs_tbf_jobid_hop_hash,
607         .hs_keycmp      = nrs_tbf_jobid_hop_keycmp,
608         .hs_key         = nrs_tbf_jobid_hop_key,
609         .hs_object      = nrs_tbf_jobid_hop_object,
610         .hs_get         = nrs_tbf_jobid_hop_get,
611         .hs_put         = nrs_tbf_jobid_hop_put,
612         .hs_put_locked  = nrs_tbf_jobid_hop_put,
613         .hs_exit        = nrs_tbf_jobid_hop_exit,
614 };
615
616 #define NRS_TBF_JOBID_HASH_FLAGS (CFS_HASH_SPIN_BKTLOCK | \
617                                   CFS_HASH_NO_ITEMREF | \
618                                   CFS_HASH_DEPTH)
619
620 static struct nrs_tbf_client *
621 nrs_tbf_jobid_hash_lookup(struct cfs_hash *hs,
622                           struct cfs_hash_bd *bd,
623                           const char *jobid)
624 {
625         struct hlist_node *hnode;
626         struct nrs_tbf_client *cli;
627
628         /* cfs_hash_bd_peek_locked is a somehow "internal" function
629          * of cfs_hash, it doesn't add refcount on object. */
630         hnode = cfs_hash_bd_peek_locked(hs, bd, (void *)jobid);
631         if (hnode == NULL)
632                 return NULL;
633
634         cfs_hash_get(hs, hnode);
635         cli = container_of0(hnode, struct nrs_tbf_client, tc_hnode);
636         if (!list_empty(&cli->tc_lru))
637                 list_del_init(&cli->tc_lru);
638         return cli;
639 }
640
641 #define NRS_TBF_JOBID_NULL ""
642
643 static struct nrs_tbf_client *
644 nrs_tbf_jobid_cli_find(struct nrs_tbf_head *head,
645                        struct ptlrpc_request *req)
646 {
647         const char              *jobid;
648         struct nrs_tbf_client   *cli;
649         struct cfs_hash         *hs = head->th_cli_hash;
650         struct cfs_hash_bd               bd;
651
652         jobid = lustre_msg_get_jobid(req->rq_reqmsg);
653         if (jobid == NULL)
654                 jobid = NRS_TBF_JOBID_NULL;
655         cfs_hash_bd_get_and_lock(hs, (void *)jobid, &bd, 1);
656         cli = nrs_tbf_jobid_hash_lookup(hs, &bd, jobid);
657         cfs_hash_bd_unlock(hs, &bd, 1);
658
659         return cli;
660 }
661
662 static struct nrs_tbf_client *
663 nrs_tbf_jobid_cli_findadd(struct nrs_tbf_head *head,
664                           struct nrs_tbf_client *cli)
665 {
666         const char              *jobid;
667         struct nrs_tbf_client   *ret;
668         struct cfs_hash         *hs = head->th_cli_hash;
669         struct cfs_hash_bd               bd;
670
671         jobid = cli->tc_jobid;
672         cfs_hash_bd_get_and_lock(hs, (void *)jobid, &bd, 1);
673         ret = nrs_tbf_jobid_hash_lookup(hs, &bd, jobid);
674         if (ret == NULL) {
675                 cfs_hash_bd_add_locked(hs, &bd, &cli->tc_hnode);
676                 ret = cli;
677         }
678         cfs_hash_bd_unlock(hs, &bd, 1);
679
680         return ret;
681 }
682
683 static void
684 nrs_tbf_jobid_cli_put(struct nrs_tbf_head *head,
685                       struct nrs_tbf_client *cli)
686 {
687         struct cfs_hash_bd               bd;
688         struct cfs_hash         *hs = head->th_cli_hash;
689         struct nrs_tbf_bucket   *bkt;
690         int                      hw;
691         struct list_head        zombies;
692
693         INIT_LIST_HEAD(&zombies);
694         cfs_hash_bd_get(hs, &cli->tc_jobid, &bd);
695         bkt = cfs_hash_bd_extra_get(hs, &bd);
696         if (!cfs_hash_bd_dec_and_lock(hs, &bd, &cli->tc_ref))
697                 return;
698         LASSERT(list_empty(&cli->tc_lru));
699         list_add_tail(&cli->tc_lru, &bkt->ntb_lru);
700
701         /*
702          * Check and purge the LRU, there is at least one client in the LRU.
703          */
704         hw = tbf_jobid_cache_size >>
705              (hs->hs_cur_bits - hs->hs_bkt_bits);
706         while (cfs_hash_bd_count_get(&bd) > hw) {
707                 if (unlikely(list_empty(&bkt->ntb_lru)))
708                         break;
709                 cli = list_entry(bkt->ntb_lru.next,
710                                      struct nrs_tbf_client,
711                                      tc_lru);
712                 LASSERT(atomic_read(&cli->tc_ref) == 0);
713                 cfs_hash_bd_del_locked(hs, &bd, &cli->tc_hnode);
714                 list_move(&cli->tc_lru, &zombies);
715         }
716         cfs_hash_bd_unlock(head->th_cli_hash, &bd, 1);
717
718         while (!list_empty(&zombies)) {
719                 cli = container_of0(zombies.next,
720                                     struct nrs_tbf_client, tc_lru);
721                 list_del_init(&cli->tc_lru);
722                 nrs_tbf_cli_fini(cli);
723         }
724 }
725
726 static void
727 nrs_tbf_jobid_cli_init(struct nrs_tbf_client *cli,
728                        struct ptlrpc_request *req)
729 {
730         char *jobid = lustre_msg_get_jobid(req->rq_reqmsg);
731
732         if (jobid == NULL)
733                 jobid = NRS_TBF_JOBID_NULL;
734         LASSERT(strlen(jobid) < LUSTRE_JOBID_SIZE);
735         INIT_LIST_HEAD(&cli->tc_lru);
736         memcpy(cli->tc_jobid, jobid, strlen(jobid));
737 }
738
739 static int nrs_tbf_jobid_hash_order(void)
740 {
741         int bits;
742
743         for (bits = 1; (1 << bits) < tbf_jobid_cache_size; ++bits)
744                 ;
745
746         return bits;
747 }
748
749 #define NRS_TBF_JOBID_BKT_BITS 10
750
751 static int
752 nrs_tbf_jobid_startup(struct ptlrpc_nrs_policy *policy,
753                       struct nrs_tbf_head *head)
754 {
755         struct nrs_tbf_cmd       start;
756         struct nrs_tbf_bucket   *bkt;
757         int                      bits;
758         int                      i;
759         int                      rc;
760         struct cfs_hash_bd       bd;
761
762         bits = nrs_tbf_jobid_hash_order();
763         if (bits < NRS_TBF_JOBID_BKT_BITS)
764                 bits = NRS_TBF_JOBID_BKT_BITS;
765         head->th_cli_hash = cfs_hash_create("nrs_tbf_hash",
766                                             bits,
767                                             bits,
768                                             NRS_TBF_JOBID_BKT_BITS,
769                                             sizeof(*bkt),
770                                             0,
771                                             0,
772                                             &nrs_tbf_jobid_hash_ops,
773                                             NRS_TBF_JOBID_HASH_FLAGS);
774         if (head->th_cli_hash == NULL)
775                 return -ENOMEM;
776
777         cfs_hash_for_each_bucket(head->th_cli_hash, &bd, i) {
778                 bkt = cfs_hash_bd_extra_get(head->th_cli_hash, &bd);
779                 INIT_LIST_HEAD(&bkt->ntb_lru);
780         }
781
782         memset(&start, 0, sizeof(start));
783         start.u.tc_start.ts_jobids_str = "*";
784
785         start.u.tc_start.ts_rpc_rate = tbf_rate;
786         start.u.tc_start.ts_rule_flags = NTRS_DEFAULT;
787         start.tc_name = NRS_TBF_DEFAULT_RULE;
788         INIT_LIST_HEAD(&start.u.tc_start.ts_jobids);
789         rc = nrs_tbf_rule_start(policy, head, &start);
790         if (rc) {
791                 cfs_hash_putref(head->th_cli_hash);
792                 head->th_cli_hash = NULL;
793         }
794
795         return rc;
796 }
797
798 /**
799  * Frees jobid of \a list.
800  *
801  */
802 static void
803 nrs_tbf_jobid_list_free(struct list_head *jobid_list)
804 {
805         struct nrs_tbf_jobid *jobid, *n;
806
807         list_for_each_entry_safe(jobid, n, jobid_list, tj_linkage) {
808                 OBD_FREE(jobid->tj_id, strlen(jobid->tj_id) + 1);
809                 list_del(&jobid->tj_linkage);
810                 OBD_FREE(jobid, sizeof(struct nrs_tbf_jobid));
811         }
812 }
813
814 static int
815 nrs_tbf_jobid_list_add(const struct cfs_lstr *id, struct list_head *jobid_list)
816 {
817         struct nrs_tbf_jobid *jobid;
818
819         OBD_ALLOC(jobid, sizeof(struct nrs_tbf_jobid));
820         if (jobid == NULL)
821                 return -ENOMEM;
822
823         OBD_ALLOC(jobid->tj_id, id->ls_len + 1);
824         if (jobid->tj_id == NULL) {
825                 OBD_FREE(jobid, sizeof(struct nrs_tbf_jobid));
826                 return -ENOMEM;
827         }
828
829         memcpy(jobid->tj_id, id->ls_str, id->ls_len);
830         list_add_tail(&jobid->tj_linkage, jobid_list);
831         return 0;
832 }
833
834 static int
835 nrs_tbf_jobid_list_match(struct list_head *jobid_list, char *id)
836 {
837         struct nrs_tbf_jobid *jobid;
838
839         list_for_each_entry(jobid, jobid_list, tj_linkage) {
840                 if (strcmp(id, jobid->tj_id) == 0)
841                         return 1;
842         }
843         return 0;
844 }
845
846 static int
847 nrs_tbf_jobid_list_parse(char *str, int len, struct list_head *jobid_list)
848 {
849         struct cfs_lstr src;
850         struct cfs_lstr res;
851         int rc = 0;
852         ENTRY;
853
854         src.ls_str = str;
855         src.ls_len = len;
856         INIT_LIST_HEAD(jobid_list);
857         while (src.ls_str) {
858                 rc = cfs_gettok(&src, ' ', &res);
859                 if (rc == 0) {
860                         rc = -EINVAL;
861                         break;
862                 }
863                 rc = nrs_tbf_jobid_list_add(&res, jobid_list);
864                 if (rc)
865                         break;
866         }
867         if (rc)
868                 nrs_tbf_jobid_list_free(jobid_list);
869         RETURN(rc);
870 }
871
872 static void nrs_tbf_jobid_cmd_fini(struct nrs_tbf_cmd *cmd)
873 {
874         if (!list_empty(&cmd->u.tc_start.ts_jobids))
875                 nrs_tbf_jobid_list_free(&cmd->u.tc_start.ts_jobids);
876         if (cmd->u.tc_start.ts_jobids_str)
877                 OBD_FREE(cmd->u.tc_start.ts_jobids_str,
878                          strlen(cmd->u.tc_start.ts_jobids_str) + 1);
879 }
880
881 static int nrs_tbf_check_id_value(struct cfs_lstr *src, char *key)
882 {
883         struct cfs_lstr res;
884         int keylen = strlen(key);
885         int rc;
886
887         rc = cfs_gettok(src, '=', &res);
888         if (rc == 0 || res.ls_len != keylen ||
889             strncmp(res.ls_str, key, keylen) != 0 ||
890             src->ls_len <= 2 || src->ls_str[0] != '{' ||
891             src->ls_str[src->ls_len - 1] != '}')
892                 return -EINVAL;
893
894         /* Skip '{' and '}' */
895         src->ls_str++;
896         src->ls_len -= 2;
897         return 0;
898 }
899
900 static int nrs_tbf_jobid_parse(struct nrs_tbf_cmd *cmd, char *id)
901 {
902         struct cfs_lstr src;
903         int rc;
904
905         src.ls_str = id;
906         src.ls_len = strlen(id);
907         rc = nrs_tbf_check_id_value(&src, "jobid");
908         if (rc)
909                 return rc;
910
911         OBD_ALLOC(cmd->u.tc_start.ts_jobids_str, src.ls_len + 1);
912         if (cmd->u.tc_start.ts_jobids_str == NULL)
913                 return -ENOMEM;
914
915         memcpy(cmd->u.tc_start.ts_jobids_str, src.ls_str, src.ls_len);
916
917         /* parse jobid list */
918         rc = nrs_tbf_jobid_list_parse(cmd->u.tc_start.ts_jobids_str,
919                                       strlen(cmd->u.tc_start.ts_jobids_str),
920                                       &cmd->u.tc_start.ts_jobids);
921         if (rc)
922                 nrs_tbf_jobid_cmd_fini(cmd);
923
924         return rc;
925 }
926
927 static int nrs_tbf_jobid_rule_init(struct ptlrpc_nrs_policy *policy,
928                                    struct nrs_tbf_rule *rule,
929                                    struct nrs_tbf_cmd *start)
930 {
931         int rc = 0;
932
933         LASSERT(start->u.tc_start.ts_jobids_str);
934         OBD_ALLOC(rule->tr_jobids_str,
935                   strlen(start->u.tc_start.ts_jobids_str) + 1);
936         if (rule->tr_jobids_str == NULL)
937                 return -ENOMEM;
938
939         memcpy(rule->tr_jobids_str,
940                start->u.tc_start.ts_jobids_str,
941                strlen(start->u.tc_start.ts_jobids_str));
942
943         INIT_LIST_HEAD(&rule->tr_jobids);
944         if (!list_empty(&start->u.tc_start.ts_jobids)) {
945                 rc = nrs_tbf_jobid_list_parse(rule->tr_jobids_str,
946                                               strlen(rule->tr_jobids_str),
947                                               &rule->tr_jobids);
948                 if (rc)
949                         CERROR("jobids {%s} illegal\n", rule->tr_jobids_str);
950         }
951         if (rc)
952                 OBD_FREE(rule->tr_jobids_str,
953                          strlen(start->u.tc_start.ts_jobids_str) + 1);
954         return rc;
955 }
956
957 static int
958 nrs_tbf_jobid_rule_dump(struct nrs_tbf_rule *rule, struct seq_file *m)
959 {
960         seq_printf(m, "%s {%s} %llu, ref %d\n", rule->tr_name,
961                    rule->tr_jobids_str, rule->tr_rpc_rate,
962                    atomic_read(&rule->tr_ref) - 1);
963         return 0;
964 }
965
966 static int
967 nrs_tbf_jobid_rule_match(struct nrs_tbf_rule *rule,
968                          struct nrs_tbf_client *cli)
969 {
970         return nrs_tbf_jobid_list_match(&rule->tr_jobids, cli->tc_jobid);
971 }
972
973 static void nrs_tbf_jobid_rule_fini(struct nrs_tbf_rule *rule)
974 {
975         if (!list_empty(&rule->tr_jobids))
976                 nrs_tbf_jobid_list_free(&rule->tr_jobids);
977         LASSERT(rule->tr_jobids_str != NULL);
978         OBD_FREE(rule->tr_jobids_str, strlen(rule->tr_jobids_str) + 1);
979 }
980
981 static struct nrs_tbf_ops nrs_tbf_jobid_ops = {
982         .o_name = NRS_TBF_TYPE_JOBID,
983         .o_startup = nrs_tbf_jobid_startup,
984         .o_cli_find = nrs_tbf_jobid_cli_find,
985         .o_cli_findadd = nrs_tbf_jobid_cli_findadd,
986         .o_cli_put = nrs_tbf_jobid_cli_put,
987         .o_cli_init = nrs_tbf_jobid_cli_init,
988         .o_rule_init = nrs_tbf_jobid_rule_init,
989         .o_rule_dump = nrs_tbf_jobid_rule_dump,
990         .o_rule_match = nrs_tbf_jobid_rule_match,
991         .o_rule_fini = nrs_tbf_jobid_rule_fini,
992 };
993
994 /**
995  * libcfs_hash operations for nrs_tbf_net::cn_cli_hash
996  *
997  * This uses ptlrpc_request::rq_peer.nid as its key, in order to hash
998  * nrs_tbf_client objects.
999  */
1000 #define NRS_TBF_NID_BKT_BITS    8
1001 #define NRS_TBF_NID_BITS        16
1002
1003 static unsigned nrs_tbf_nid_hop_hash(struct cfs_hash *hs, const void *key,
1004                                   unsigned mask)
1005 {
1006         return cfs_hash_djb2_hash(key, sizeof(lnet_nid_t), mask);
1007 }
1008
1009 static int nrs_tbf_nid_hop_keycmp(const void *key, struct hlist_node *hnode)
1010 {
1011         lnet_nid_t            *nid = (lnet_nid_t *)key;
1012         struct nrs_tbf_client *cli = hlist_entry(hnode,
1013                                                      struct nrs_tbf_client,
1014                                                      tc_hnode);
1015
1016         return *nid == cli->tc_nid;
1017 }
1018
1019 static void *nrs_tbf_nid_hop_key(struct hlist_node *hnode)
1020 {
1021         struct nrs_tbf_client *cli = hlist_entry(hnode,
1022                                                      struct nrs_tbf_client,
1023                                                      tc_hnode);
1024
1025         return &cli->tc_nid;
1026 }
1027
1028 static void *nrs_tbf_nid_hop_object(struct hlist_node *hnode)
1029 {
1030         return hlist_entry(hnode, struct nrs_tbf_client, tc_hnode);
1031 }
1032
1033 static void nrs_tbf_nid_hop_get(struct cfs_hash *hs, struct hlist_node *hnode)
1034 {
1035         struct nrs_tbf_client *cli = hlist_entry(hnode,
1036                                                      struct nrs_tbf_client,
1037                                                      tc_hnode);
1038
1039         atomic_inc(&cli->tc_ref);
1040 }
1041
1042 static void nrs_tbf_nid_hop_put(struct cfs_hash *hs, struct hlist_node *hnode)
1043 {
1044         struct nrs_tbf_client *cli = hlist_entry(hnode,
1045                                                      struct nrs_tbf_client,
1046                                                      tc_hnode);
1047
1048         atomic_dec(&cli->tc_ref);
1049 }
1050
1051 static void nrs_tbf_nid_hop_exit(struct cfs_hash *hs, struct hlist_node *hnode)
1052 {
1053         struct nrs_tbf_client *cli = hlist_entry(hnode,
1054                                                      struct nrs_tbf_client,
1055                                                      tc_hnode);
1056
1057         LASSERTF(atomic_read(&cli->tc_ref) == 0,
1058                  "Busy TBF object from client with NID %s, with %d refs\n",
1059                  libcfs_nid2str(cli->tc_nid), atomic_read(&cli->tc_ref));
1060
1061         nrs_tbf_cli_fini(cli);
1062 }
1063
1064 static struct cfs_hash_ops nrs_tbf_nid_hash_ops = {
1065         .hs_hash        = nrs_tbf_nid_hop_hash,
1066         .hs_keycmp      = nrs_tbf_nid_hop_keycmp,
1067         .hs_key         = nrs_tbf_nid_hop_key,
1068         .hs_object      = nrs_tbf_nid_hop_object,
1069         .hs_get         = nrs_tbf_nid_hop_get,
1070         .hs_put         = nrs_tbf_nid_hop_put,
1071         .hs_put_locked  = nrs_tbf_nid_hop_put,
1072         .hs_exit        = nrs_tbf_nid_hop_exit,
1073 };
1074
1075 static struct nrs_tbf_client *
1076 nrs_tbf_nid_cli_find(struct nrs_tbf_head *head,
1077                      struct ptlrpc_request *req)
1078 {
1079         return cfs_hash_lookup(head->th_cli_hash, &req->rq_peer.nid);
1080 }
1081
1082 static struct nrs_tbf_client *
1083 nrs_tbf_nid_cli_findadd(struct nrs_tbf_head *head,
1084                         struct nrs_tbf_client *cli)
1085 {
1086         return cfs_hash_findadd_unique(head->th_cli_hash, &cli->tc_nid,
1087                                        &cli->tc_hnode);
1088 }
1089
1090 static void
1091 nrs_tbf_nid_cli_put(struct nrs_tbf_head *head,
1092                       struct nrs_tbf_client *cli)
1093 {
1094         cfs_hash_put(head->th_cli_hash, &cli->tc_hnode);
1095 }
1096
1097 static int
1098 nrs_tbf_nid_startup(struct ptlrpc_nrs_policy *policy,
1099                     struct nrs_tbf_head *head)
1100 {
1101         struct nrs_tbf_cmd      start;
1102         int rc;
1103
1104         head->th_cli_hash = cfs_hash_create("nrs_tbf_hash",
1105                                             NRS_TBF_NID_BITS,
1106                                             NRS_TBF_NID_BITS,
1107                                             NRS_TBF_NID_BKT_BITS, 0,
1108                                             CFS_HASH_MIN_THETA,
1109                                             CFS_HASH_MAX_THETA,
1110                                             &nrs_tbf_nid_hash_ops,
1111                                             CFS_HASH_RW_BKTLOCK);
1112         if (head->th_cli_hash == NULL)
1113                 return -ENOMEM;
1114
1115         memset(&start, 0, sizeof(start));
1116         start.u.tc_start.ts_nids_str = "*";
1117
1118         start.u.tc_start.ts_rpc_rate = tbf_rate;
1119         start.u.tc_start.ts_rule_flags = NTRS_DEFAULT;
1120         start.tc_name = NRS_TBF_DEFAULT_RULE;
1121         INIT_LIST_HEAD(&start.u.tc_start.ts_nids);
1122         rc = nrs_tbf_rule_start(policy, head, &start);
1123         if (rc) {
1124                 cfs_hash_putref(head->th_cli_hash);
1125                 head->th_cli_hash = NULL;
1126         }
1127
1128         return rc;
1129 }
1130
1131 static void
1132 nrs_tbf_nid_cli_init(struct nrs_tbf_client *cli,
1133                              struct ptlrpc_request *req)
1134 {
1135         cli->tc_nid = req->rq_peer.nid;
1136 }
1137
1138 static int nrs_tbf_nid_rule_init(struct ptlrpc_nrs_policy *policy,
1139                                  struct nrs_tbf_rule *rule,
1140                                  struct nrs_tbf_cmd *start)
1141 {
1142         LASSERT(start->u.tc_start.ts_nids_str);
1143         OBD_ALLOC(rule->tr_nids_str,
1144                   strlen(start->u.tc_start.ts_nids_str) + 1);
1145         if (rule->tr_nids_str == NULL)
1146                 return -ENOMEM;
1147
1148         memcpy(rule->tr_nids_str,
1149                start->u.tc_start.ts_nids_str,
1150                strlen(start->u.tc_start.ts_nids_str));
1151
1152         INIT_LIST_HEAD(&rule->tr_nids);
1153         if (!list_empty(&start->u.tc_start.ts_nids)) {
1154                 if (cfs_parse_nidlist(rule->tr_nids_str,
1155                                       strlen(rule->tr_nids_str),
1156                                       &rule->tr_nids) <= 0) {
1157                         CERROR("nids {%s} illegal\n",
1158                                rule->tr_nids_str);
1159                         OBD_FREE(rule->tr_nids_str,
1160                                  strlen(start->u.tc_start.ts_nids_str) + 1);
1161                         return -EINVAL;
1162                 }
1163         }
1164         return 0;
1165 }
1166
1167 static int
1168 nrs_tbf_nid_rule_dump(struct nrs_tbf_rule *rule, struct seq_file *m)
1169 {
1170         seq_printf(m, "%s {%s} %llu, ref %d\n", rule->tr_name,
1171                    rule->tr_nids_str, rule->tr_rpc_rate,
1172                    atomic_read(&rule->tr_ref) - 1);
1173         return 0;
1174 }
1175
1176 static int
1177 nrs_tbf_nid_rule_match(struct nrs_tbf_rule *rule,
1178                        struct nrs_tbf_client *cli)
1179 {
1180         return cfs_match_nid(cli->tc_nid, &rule->tr_nids);
1181 }
1182
1183 static void nrs_tbf_nid_rule_fini(struct nrs_tbf_rule *rule)
1184 {
1185         if (!list_empty(&rule->tr_nids))
1186                 cfs_free_nidlist(&rule->tr_nids);
1187         LASSERT(rule->tr_nids_str != NULL);
1188         OBD_FREE(rule->tr_nids_str, strlen(rule->tr_nids_str) + 1);
1189 }
1190
1191 static void nrs_tbf_nid_cmd_fini(struct nrs_tbf_cmd *cmd)
1192 {
1193         if (!list_empty(&cmd->u.tc_start.ts_nids))
1194                 cfs_free_nidlist(&cmd->u.tc_start.ts_nids);
1195         if (cmd->u.tc_start.ts_nids_str)
1196                 OBD_FREE(cmd->u.tc_start.ts_nids_str,
1197                          strlen(cmd->u.tc_start.ts_nids_str) + 1);
1198 }
1199
1200 static int nrs_tbf_nid_parse(struct nrs_tbf_cmd *cmd, char *id)
1201 {
1202         struct cfs_lstr src;
1203         int rc;
1204
1205         src.ls_str = id;
1206         src.ls_len = strlen(id);
1207         rc = nrs_tbf_check_id_value(&src, "nid");
1208         if (rc)
1209                 return rc;
1210
1211         OBD_ALLOC(cmd->u.tc_start.ts_nids_str, src.ls_len + 1);
1212         if (cmd->u.tc_start.ts_nids_str == NULL)
1213                 return -ENOMEM;
1214
1215         memcpy(cmd->u.tc_start.ts_nids_str, src.ls_str, src.ls_len);
1216
1217         /* parse NID list */
1218         if (cfs_parse_nidlist(cmd->u.tc_start.ts_nids_str,
1219                               strlen(cmd->u.tc_start.ts_nids_str),
1220                               &cmd->u.tc_start.ts_nids) <= 0) {
1221                 nrs_tbf_nid_cmd_fini(cmd);
1222                 return -EINVAL;
1223         }
1224
1225         return 0;
1226 }
1227
1228 static struct nrs_tbf_ops nrs_tbf_nid_ops = {
1229         .o_name = NRS_TBF_TYPE_NID,
1230         .o_startup = nrs_tbf_nid_startup,
1231         .o_cli_find = nrs_tbf_nid_cli_find,
1232         .o_cli_findadd = nrs_tbf_nid_cli_findadd,
1233         .o_cli_put = nrs_tbf_nid_cli_put,
1234         .o_cli_init = nrs_tbf_nid_cli_init,
1235         .o_rule_init = nrs_tbf_nid_rule_init,
1236         .o_rule_dump = nrs_tbf_nid_rule_dump,
1237         .o_rule_match = nrs_tbf_nid_rule_match,
1238         .o_rule_fini = nrs_tbf_nid_rule_fini,
1239 };
1240
1241 static void nrs_tbf_opcode_rule_fini(struct nrs_tbf_rule *rule)
1242 {
1243         if (rule->tr_opcodes != NULL)
1244                 CFS_FREE_BITMAP(rule->tr_opcodes);
1245
1246         LASSERT(rule->tr_opcodes_str != NULL);
1247         OBD_FREE(rule->tr_opcodes_str, strlen(rule->tr_opcodes_str) + 1);
1248 }
1249
1250 static unsigned nrs_tbf_opcode_hop_hash(struct cfs_hash *hs, const void *key,
1251                                         unsigned mask)
1252 {
1253         return cfs_hash_djb2_hash(key, sizeof(__u32), mask);
1254 }
1255
1256 static int nrs_tbf_opcode_hop_keycmp(const void *key, struct hlist_node *hnode)
1257 {
1258         const __u32     *opc = key;
1259         struct nrs_tbf_client *cli = hlist_entry(hnode,
1260                                                  struct nrs_tbf_client,
1261                                                  tc_hnode);
1262
1263         return *opc == cli->tc_opcode;
1264 }
1265
1266 static void *nrs_tbf_opcode_hop_key(struct hlist_node *hnode)
1267 {
1268         struct nrs_tbf_client *cli = hlist_entry(hnode,
1269                                                  struct nrs_tbf_client,
1270                                                  tc_hnode);
1271
1272         return &cli->tc_opcode;
1273 }
1274
1275 static void *nrs_tbf_opcode_hop_object(struct hlist_node *hnode)
1276 {
1277         return hlist_entry(hnode, struct nrs_tbf_client, tc_hnode);
1278 }
1279
1280 static void nrs_tbf_opcode_hop_get(struct cfs_hash *hs,
1281                                    struct hlist_node *hnode)
1282 {
1283         struct nrs_tbf_client *cli = hlist_entry(hnode,
1284                                                  struct nrs_tbf_client,
1285                                                  tc_hnode);
1286
1287         atomic_inc(&cli->tc_ref);
1288 }
1289
1290 static void nrs_tbf_opcode_hop_put(struct cfs_hash *hs,
1291                                    struct hlist_node *hnode)
1292 {
1293         struct nrs_tbf_client *cli = hlist_entry(hnode,
1294                                                  struct nrs_tbf_client,
1295                                                  tc_hnode);
1296
1297         atomic_dec(&cli->tc_ref);
1298 }
1299
1300 static void nrs_tbf_opcode_hop_exit(struct cfs_hash *hs,
1301                                     struct hlist_node *hnode)
1302 {
1303         struct nrs_tbf_client *cli = hlist_entry(hnode,
1304                                                  struct nrs_tbf_client,
1305                                                  tc_hnode);
1306
1307         LASSERTF(atomic_read(&cli->tc_ref) == 0,
1308                  "Busy TBF object from client with opcode %s, with %d refs\n",
1309                  ll_opcode2str(cli->tc_opcode),
1310                  atomic_read(&cli->tc_ref));
1311
1312         nrs_tbf_cli_fini(cli);
1313 }
1314 static struct cfs_hash_ops nrs_tbf_opcode_hash_ops = {
1315         .hs_hash        = nrs_tbf_opcode_hop_hash,
1316         .hs_keycmp      = nrs_tbf_opcode_hop_keycmp,
1317         .hs_key         = nrs_tbf_opcode_hop_key,
1318         .hs_object      = nrs_tbf_opcode_hop_object,
1319         .hs_get         = nrs_tbf_opcode_hop_get,
1320         .hs_put         = nrs_tbf_opcode_hop_put,
1321         .hs_put_locked  = nrs_tbf_opcode_hop_put,
1322         .hs_exit        = nrs_tbf_opcode_hop_exit,
1323 };
1324
1325 static int
1326 nrs_tbf_opcode_startup(struct ptlrpc_nrs_policy *policy,
1327                     struct nrs_tbf_head *head)
1328 {
1329         struct nrs_tbf_cmd      start = { 0 };
1330         int rc;
1331
1332         head->th_cli_hash = cfs_hash_create("nrs_tbf_hash",
1333                                             NRS_TBF_NID_BITS,
1334                                             NRS_TBF_NID_BITS,
1335                                             NRS_TBF_NID_BKT_BITS, 0,
1336                                             CFS_HASH_MIN_THETA,
1337                                             CFS_HASH_MAX_THETA,
1338                                             &nrs_tbf_opcode_hash_ops,
1339                                             CFS_HASH_RW_BKTLOCK);
1340         if (head->th_cli_hash == NULL)
1341                 return -ENOMEM;
1342
1343         start.u.tc_start.ts_opcodes = NULL;
1344         start.u.tc_start.ts_opcodes_str = "*";
1345
1346         start.u.tc_start.ts_rpc_rate = tbf_rate;
1347         start.u.tc_start.ts_rule_flags = NTRS_DEFAULT;
1348         start.tc_name = NRS_TBF_DEFAULT_RULE;
1349         rc = nrs_tbf_rule_start(policy, head, &start);
1350
1351         return rc;
1352 }
1353
1354 static struct nrs_tbf_client *
1355 nrs_tbf_opcode_cli_find(struct nrs_tbf_head *head,
1356                         struct ptlrpc_request *req)
1357 {
1358         __u32 opc;
1359
1360         opc = lustre_msg_get_opc(req->rq_reqmsg);
1361         return cfs_hash_lookup(head->th_cli_hash, &opc);
1362 }
1363
1364 static struct nrs_tbf_client *
1365 nrs_tbf_opcode_cli_findadd(struct nrs_tbf_head *head,
1366                            struct nrs_tbf_client *cli)
1367 {
1368         return cfs_hash_findadd_unique(head->th_cli_hash, &cli->tc_opcode,
1369                                        &cli->tc_hnode);
1370 }
1371
1372 static void
1373 nrs_tbf_opcode_cli_init(struct nrs_tbf_client *cli,
1374                         struct ptlrpc_request *req)
1375 {
1376         cli->tc_opcode = lustre_msg_get_opc(req->rq_reqmsg);
1377 }
1378
1379 #define MAX_OPCODE_LEN  32
1380 static int
1381 nrs_tbf_opcode_set_bit(const struct cfs_lstr *id, struct cfs_bitmap *opcodes)
1382 {
1383         int     op = 0;
1384         char    opcode_str[MAX_OPCODE_LEN];
1385
1386         if (id->ls_len + 1 > MAX_OPCODE_LEN)
1387                 return -EINVAL;
1388
1389         memcpy(opcode_str, id->ls_str, id->ls_len);
1390         opcode_str[id->ls_len] = '\0';
1391
1392         op = ll_str2opcode(opcode_str);
1393         if (op < 0)
1394                 return -EINVAL;
1395
1396         cfs_bitmap_set(opcodes, op);
1397         return 0;
1398 }
1399
1400 static int
1401 nrs_tbf_opcode_list_parse(char *str, int len, struct cfs_bitmap *opcodes)
1402 {
1403         struct cfs_lstr src;
1404         struct cfs_lstr res;
1405         int rc = 0;
1406
1407         ENTRY;
1408
1409         src.ls_str = str;
1410         src.ls_len = len;
1411         while (src.ls_str) {
1412                 rc = cfs_gettok(&src, ' ', &res);
1413                 if (rc == 0) {
1414                         rc = -EINVAL;
1415                         break;
1416                 }
1417                 rc = nrs_tbf_opcode_set_bit(&res, opcodes);
1418                 if (rc)
1419                         break;
1420         }
1421
1422         RETURN(rc);
1423 }
1424
1425 static void nrs_tbf_opcode_cmd_fini(struct nrs_tbf_cmd *cmd)
1426 {
1427         if (cmd->u.tc_start.ts_opcodes)
1428                 CFS_FREE_BITMAP(cmd->u.tc_start.ts_opcodes);
1429
1430         if (cmd->u.tc_start.ts_opcodes_str)
1431                 OBD_FREE(cmd->u.tc_start.ts_opcodes_str,
1432                          strlen(cmd->u.tc_start.ts_opcodes_str) + 1);
1433
1434 }
1435
1436 static int nrs_tbf_opcode_parse(struct nrs_tbf_cmd *cmd, char *id)
1437 {
1438         struct cfs_lstr src;
1439         int rc;
1440
1441         cmd->u.tc_start.ts_opcodes = CFS_ALLOCATE_BITMAP(LUSTRE_MAX_OPCODES);
1442         if (cmd->u.tc_start.ts_opcodes == NULL)
1443                 return -ENOMEM;
1444
1445         src.ls_str = id;
1446         src.ls_len = strlen(id);
1447         rc = nrs_tbf_check_id_value(&src, "opcode");
1448         if (rc)
1449                 GOTO(out, rc);
1450
1451         OBD_ALLOC(cmd->u.tc_start.ts_opcodes_str, src.ls_len + 1);
1452         if (cmd->u.tc_start.ts_opcodes_str == NULL)
1453                 GOTO(out, rc = -ENOMEM);
1454
1455         memcpy(cmd->u.tc_start.ts_opcodes_str, src.ls_str, src.ls_len);
1456
1457         /* parse opcode list */
1458         rc = nrs_tbf_opcode_list_parse(cmd->u.tc_start.ts_opcodes_str,
1459                                        strlen(cmd->u.tc_start.ts_opcodes_str),
1460                                        cmd->u.tc_start.ts_opcodes);
1461 out:
1462         if (rc != 0)
1463                 nrs_tbf_opcode_cmd_fini(cmd);
1464
1465         return rc;
1466 }
1467
1468 static int
1469 nrs_tbf_opcode_rule_match(struct nrs_tbf_rule *rule,
1470                           struct nrs_tbf_client *cli)
1471 {
1472         if (rule->tr_opcodes == NULL)
1473                 return 0;
1474
1475         return cfs_bitmap_check(rule->tr_opcodes, cli->tc_opcode);
1476 }
1477
1478 static int nrs_tbf_opcode_rule_init(struct ptlrpc_nrs_policy *policy,
1479                                     struct nrs_tbf_rule *rule,
1480                                     struct nrs_tbf_cmd *start)
1481 {
1482         LASSERT(start->u.tc_start.ts_opcodes_str != NULL);
1483         OBD_ALLOC(rule->tr_opcodes_str,
1484                   strlen(start->u.tc_start.ts_opcodes_str) + 1);
1485         if (rule->tr_opcodes_str == NULL)
1486                 return -ENOMEM;
1487
1488         strncpy(rule->tr_opcodes_str, start->u.tc_start.ts_opcodes_str,
1489                 strlen(start->u.tc_start.ts_opcodes_str) + 1);
1490
1491         if (start->u.tc_start.ts_opcodes == NULL)
1492                 return 0;
1493
1494         rule->tr_opcodes = CFS_ALLOCATE_BITMAP(LUSTRE_MAX_OPCODES);
1495         if (rule->tr_opcodes == NULL) {
1496                 OBD_FREE(rule->tr_opcodes_str,
1497                          strlen(start->u.tc_start.ts_opcodes_str) + 1);
1498                 return -ENOMEM;
1499         }
1500
1501         cfs_bitmap_copy(rule->tr_opcodes, start->u.tc_start.ts_opcodes);
1502
1503         return 0;
1504 }
1505
1506 static int
1507 nrs_tbf_opcode_rule_dump(struct nrs_tbf_rule *rule, struct seq_file *m)
1508 {
1509         seq_printf(m, "%s {%s} %llu, ref %d\n", rule->tr_name,
1510                    rule->tr_opcodes_str, rule->tr_rpc_rate,
1511                    atomic_read(&rule->tr_ref) - 1);
1512         return 0;
1513 }
1514
1515
1516 struct nrs_tbf_ops nrs_tbf_opcode_ops = {
1517         .o_name = NRS_TBF_TYPE_OPCODE,
1518         .o_startup = nrs_tbf_opcode_startup,
1519         .o_cli_find = nrs_tbf_opcode_cli_find,
1520         .o_cli_findadd = nrs_tbf_opcode_cli_findadd,
1521         .o_cli_put = nrs_tbf_nid_cli_put,
1522         .o_cli_init = nrs_tbf_opcode_cli_init,
1523         .o_rule_init = nrs_tbf_opcode_rule_init,
1524         .o_rule_dump = nrs_tbf_opcode_rule_dump,
1525         .o_rule_match = nrs_tbf_opcode_rule_match,
1526         .o_rule_fini = nrs_tbf_opcode_rule_fini,
1527 };
1528
1529 static struct nrs_tbf_type nrs_tbf_types[] = {
1530         {
1531                 .ntt_name = NRS_TBF_TYPE_JOBID,
1532                 .ntt_flag = NRS_TBF_FLAG_JOBID,
1533                 .ntt_ops = &nrs_tbf_jobid_ops,
1534         },
1535         {
1536                 .ntt_name = NRS_TBF_TYPE_NID,
1537                 .ntt_flag = NRS_TBF_FLAG_NID,
1538                 .ntt_ops = &nrs_tbf_nid_ops,
1539         },
1540         {
1541                 .ntt_name = NRS_TBF_TYPE_OPCODE,
1542                 .ntt_flag = NRS_TBF_FLAG_OPCODE,
1543                 .ntt_ops = &nrs_tbf_opcode_ops,
1544         },
1545 };
1546
1547 /**
1548  * Is called before the policy transitions into
1549  * ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED; allocates and initializes a
1550  * policy-specific private data structure.
1551  *
1552  * \param[in] policy The policy to start
1553  *
1554  * \retval -ENOMEM OOM error
1555  * \retval  0      success
1556  *
1557  * \see nrs_policy_register()
1558  * \see nrs_policy_ctl()
1559  */
1560 static int nrs_tbf_start(struct ptlrpc_nrs_policy *policy, char *arg)
1561 {
1562         struct nrs_tbf_head     *head;
1563         struct nrs_tbf_ops      *ops;
1564         __u32                    type;
1565         int found = 0;
1566         int i;
1567         int rc = 0;
1568
1569         if (arg == NULL || strlen(arg) > NRS_TBF_TYPE_MAX_LEN)
1570                 GOTO(out, rc = -EINVAL);
1571
1572         for (i = 0; i < ARRAY_SIZE(nrs_tbf_types); i++) {
1573                 if (strcmp(arg, nrs_tbf_types[i].ntt_name) == 0) {
1574                         ops = nrs_tbf_types[i].ntt_ops;
1575                         type = nrs_tbf_types[i].ntt_flag;
1576                         found = 1;
1577                         break;
1578                 }
1579         }
1580         if (found == 0)
1581                 GOTO(out, rc = -ENOTSUPP);
1582
1583         OBD_CPT_ALLOC_PTR(head, nrs_pol2cptab(policy), nrs_pol2cptid(policy));
1584         if (head == NULL)
1585                 GOTO(out, rc = -ENOMEM);
1586
1587         memcpy(head->th_type, arg, strlen(arg));
1588         head->th_type[strlen(arg)] = '\0';
1589         head->th_ops = ops;
1590         head->th_type_flag = type;
1591
1592         head->th_binheap = cfs_binheap_create(&nrs_tbf_heap_ops,
1593                                               CBH_FLAG_ATOMIC_GROW, 4096, NULL,
1594                                               nrs_pol2cptab(policy),
1595                                               nrs_pol2cptid(policy));
1596         if (head->th_binheap == NULL)
1597                 GOTO(out_free_head, rc = -ENOMEM);
1598
1599         atomic_set(&head->th_rule_sequence, 0);
1600         spin_lock_init(&head->th_rule_lock);
1601         INIT_LIST_HEAD(&head->th_list);
1602         hrtimer_init(&head->th_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1603         head->th_timer.function = nrs_tbf_timer_cb;
1604         rc = head->th_ops->o_startup(policy, head);
1605         if (rc)
1606                 GOTO(out_free_heap, rc);
1607
1608         policy->pol_private = head;
1609         return 0;
1610 out_free_heap:
1611         cfs_binheap_destroy(head->th_binheap);
1612 out_free_head:
1613         OBD_FREE_PTR(head);
1614 out:
1615         return rc;
1616 }
1617
1618 /**
1619  * Is called before the policy transitions into
1620  * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPED; deallocates the policy-specific
1621  * private data structure.
1622  *
1623  * \param[in] policy The policy to stop
1624  *
1625  * \see nrs_policy_stop0()
1626  */
1627 static void nrs_tbf_stop(struct ptlrpc_nrs_policy *policy)
1628 {
1629         struct nrs_tbf_head *head = policy->pol_private;
1630         struct ptlrpc_nrs *nrs = policy->pol_nrs;
1631         struct nrs_tbf_rule *rule, *n;
1632
1633         LASSERT(head != NULL);
1634         LASSERT(head->th_cli_hash != NULL);
1635         hrtimer_cancel(&head->th_timer);
1636         /* Should cleanup hash first before free rules */
1637         cfs_hash_putref(head->th_cli_hash);
1638         list_for_each_entry_safe(rule, n, &head->th_list, tr_linkage) {
1639                 list_del_init(&rule->tr_linkage);
1640                 nrs_tbf_rule_put(rule);
1641         }
1642         LASSERT(list_empty(&head->th_list));
1643         LASSERT(head->th_binheap != NULL);
1644         LASSERT(cfs_binheap_is_empty(head->th_binheap));
1645         cfs_binheap_destroy(head->th_binheap);
1646         OBD_FREE_PTR(head);
1647         nrs->nrs_throttling = 0;
1648         wake_up(&policy->pol_nrs->nrs_svcpt->scp_waitq);
1649 }
1650
1651 /**
1652  * Performs a policy-specific ctl function on TBF policy instances; similar
1653  * to ioctl.
1654  *
1655  * \param[in]     policy the policy instance
1656  * \param[in]     opc    the opcode
1657  * \param[in,out] arg    used for passing parameters and information
1658  *
1659  * \pre assert_spin_locked(&policy->pol_nrs->->nrs_lock)
1660  * \post assert_spin_locked(&policy->pol_nrs->->nrs_lock)
1661  *
1662  * \retval 0   operation carried out successfully
1663  * \retval -ve error
1664  */
1665 static int nrs_tbf_ctl(struct ptlrpc_nrs_policy *policy,
1666                        enum ptlrpc_nrs_ctl opc,
1667                        void *arg)
1668 {
1669         int rc = 0;
1670         ENTRY;
1671
1672         assert_spin_locked(&policy->pol_nrs->nrs_lock);
1673
1674         switch ((enum nrs_ctl_tbf)opc) {
1675         default:
1676                 RETURN(-EINVAL);
1677
1678         /**
1679          * Read RPC rate size of a policy instance.
1680          */
1681         case NRS_CTL_TBF_RD_RULE: {
1682                 struct nrs_tbf_head *head = policy->pol_private;
1683                 struct seq_file *m = (struct seq_file *) arg;
1684                 struct ptlrpc_service_part *svcpt;
1685
1686                 svcpt = policy->pol_nrs->nrs_svcpt;
1687                 seq_printf(m, "CPT %d:\n", svcpt->scp_cpt);
1688
1689                 rc = nrs_tbf_rule_dump_all(head, m);
1690                 }
1691                 break;
1692
1693         /**
1694          * Write RPC rate of a policy instance.
1695          */
1696         case NRS_CTL_TBF_WR_RULE: {
1697                 struct nrs_tbf_head *head = policy->pol_private;
1698                 struct nrs_tbf_cmd *cmd;
1699
1700                 cmd = (struct nrs_tbf_cmd *)arg;
1701                 rc = nrs_tbf_command(policy,
1702                                      head,
1703                                      cmd);
1704                 }
1705                 break;
1706         /**
1707          * Read the TBF policy type of a policy instance.
1708          */
1709         case NRS_CTL_TBF_RD_TYPE_FLAG: {
1710                 struct nrs_tbf_head *head = policy->pol_private;
1711
1712                 *(__u32 *)arg = head->th_type_flag;
1713                 }
1714                 break;
1715         }
1716
1717         RETURN(rc);
1718 }
1719
1720 /**
1721  * Is called for obtaining a TBF policy resource.
1722  *
1723  * \param[in]  policy     The policy on which the request is being asked for
1724  * \param[in]  nrq        The request for which resources are being taken
1725  * \param[in]  parent     Parent resource, unused in this policy
1726  * \param[out] resp       Resources references are placed in this array
1727  * \param[in]  moving_req Signifies limited caller context; unused in this
1728  *                        policy
1729  *
1730  *
1731  * \see nrs_resource_get_safe()
1732  */
1733 static int nrs_tbf_res_get(struct ptlrpc_nrs_policy *policy,
1734                            struct ptlrpc_nrs_request *nrq,
1735                            const struct ptlrpc_nrs_resource *parent,
1736                            struct ptlrpc_nrs_resource **resp,
1737                            bool moving_req)
1738 {
1739         struct nrs_tbf_head   *head;
1740         struct nrs_tbf_client *cli;
1741         struct nrs_tbf_client *tmp;
1742         struct ptlrpc_request *req;
1743
1744         if (parent == NULL) {
1745                 *resp = &((struct nrs_tbf_head *)policy->pol_private)->th_res;
1746                 return 0;
1747         }
1748
1749         head = container_of(parent, struct nrs_tbf_head, th_res);
1750         req = container_of(nrq, struct ptlrpc_request, rq_nrq);
1751         cli = head->th_ops->o_cli_find(head, req);
1752         if (cli != NULL) {
1753                 spin_lock(&policy->pol_nrs->nrs_svcpt->scp_req_lock);
1754                 LASSERT(cli->tc_rule);
1755                 if (cli->tc_rule_sequence !=
1756                     atomic_read(&head->th_rule_sequence) ||
1757                     cli->tc_rule->tr_flags & NTRS_STOPPING) {
1758                         struct nrs_tbf_rule *rule;
1759
1760                         rule = nrs_tbf_rule_match(head, cli);
1761                         if (rule != cli->tc_rule)
1762                                 nrs_tbf_cli_reset(head, rule, cli);
1763                         else
1764                                 nrs_tbf_rule_put(rule);
1765                 } else if (cli->tc_rule_generation !=
1766                            cli->tc_rule->tr_generation) {
1767                         nrs_tbf_cli_reset_value(head, cli);
1768                 }
1769                 spin_unlock(&policy->pol_nrs->nrs_svcpt->scp_req_lock);
1770                 goto out;
1771         }
1772
1773         OBD_CPT_ALLOC_GFP(cli, nrs_pol2cptab(policy), nrs_pol2cptid(policy),
1774                           sizeof(*cli), moving_req ? GFP_ATOMIC : __GFP_IO);
1775         if (cli == NULL)
1776                 return -ENOMEM;
1777
1778         nrs_tbf_cli_init(head, cli, req);
1779         tmp = head->th_ops->o_cli_findadd(head, cli);
1780         if (tmp != cli) {
1781                 atomic_dec(&cli->tc_ref);
1782                 nrs_tbf_cli_fini(cli);
1783                 cli = tmp;
1784         }
1785 out:
1786         *resp = &cli->tc_res;
1787
1788         return 1;
1789 }
1790
1791 /**
1792  * Called when releasing references to the resource hierachy obtained for a
1793  * request for scheduling using the TBF policy.
1794  *
1795  * \param[in] policy   the policy the resource belongs to
1796  * \param[in] res      the resource to be released
1797  */
1798 static void nrs_tbf_res_put(struct ptlrpc_nrs_policy *policy,
1799                             const struct ptlrpc_nrs_resource *res)
1800 {
1801         struct nrs_tbf_head   *head;
1802         struct nrs_tbf_client *cli;
1803
1804         /**
1805          * Do nothing for freeing parent, nrs_tbf_net resources
1806          */
1807         if (res->res_parent == NULL)
1808                 return;
1809
1810         cli = container_of(res, struct nrs_tbf_client, tc_res);
1811         head = container_of(res->res_parent, struct nrs_tbf_head, th_res);
1812
1813         head->th_ops->o_cli_put(head, cli);
1814 }
1815
1816 /**
1817  * Called when getting a request from the TBF policy for handling, or just
1818  * peeking; removes the request from the policy when it is to be handled.
1819  *
1820  * \param[in] policy The policy
1821  * \param[in] peek   When set, signifies that we just want to examine the
1822  *                   request, and not handle it, so the request is not removed
1823  *                   from the policy.
1824  * \param[in] force  Force the policy to return a request; unused in this
1825  *                   policy
1826  *
1827  * \retval The request to be handled; this is the next request in the TBF
1828  *         rule
1829  *
1830  * \see ptlrpc_nrs_req_get_nolock()
1831  * \see nrs_request_get()
1832  */
1833 static
1834 struct ptlrpc_nrs_request *nrs_tbf_req_get(struct ptlrpc_nrs_policy *policy,
1835                                            bool peek, bool force)
1836 {
1837         struct nrs_tbf_head       *head = policy->pol_private;
1838         struct ptlrpc_nrs_request *nrq = NULL;
1839         struct nrs_tbf_client     *cli;
1840         struct cfs_binheap_node   *node;
1841
1842         assert_spin_locked(&policy->pol_nrs->nrs_svcpt->scp_req_lock);
1843
1844         if (!peek && policy->pol_nrs->nrs_throttling)
1845                 return NULL;
1846
1847         node = cfs_binheap_root(head->th_binheap);
1848         if (unlikely(node == NULL))
1849                 return NULL;
1850
1851         cli = container_of(node, struct nrs_tbf_client, tc_node);
1852         LASSERT(cli->tc_in_heap);
1853         if (peek) {
1854                 nrq = list_entry(cli->tc_list.next,
1855                                      struct ptlrpc_nrs_request,
1856                                      nr_u.tbf.tr_list);
1857         } else {
1858                 __u64 now = ktime_to_ns(ktime_get());
1859                 __u64 passed;
1860                 __u64 ntoken;
1861                 __u64 deadline;
1862
1863                 deadline = cli->tc_check_time +
1864                           cli->tc_nsecs;
1865                 LASSERT(now >= cli->tc_check_time);
1866                 passed = now - cli->tc_check_time;
1867                 ntoken = passed * cli->tc_rpc_rate;
1868                 do_div(ntoken, NSEC_PER_SEC);
1869                 ntoken += cli->tc_ntoken;
1870                 if (ntoken > cli->tc_depth)
1871                         ntoken = cli->tc_depth;
1872                 if (ntoken > 0) {
1873                         struct ptlrpc_request *req;
1874                         nrq = list_entry(cli->tc_list.next,
1875                                              struct ptlrpc_nrs_request,
1876                                              nr_u.tbf.tr_list);
1877                         req = container_of(nrq,
1878                                            struct ptlrpc_request,
1879                                            rq_nrq);
1880                         ntoken--;
1881                         cli->tc_ntoken = ntoken;
1882                         cli->tc_check_time = now;
1883                         list_del_init(&nrq->nr_u.tbf.tr_list);
1884                         if (list_empty(&cli->tc_list)) {
1885                                 cfs_binheap_remove(head->th_binheap,
1886                                                    &cli->tc_node);
1887                                 cli->tc_in_heap = false;
1888                         } else {
1889                                 cfs_binheap_relocate(head->th_binheap,
1890                                                      &cli->tc_node);
1891                         }
1892                         CDEBUG(D_RPCTRACE,
1893                                "NRS start %s request from %s, "
1894                                "seq: %llu\n",
1895                                policy->pol_desc->pd_name,
1896                                libcfs_id2str(req->rq_peer),
1897                                nrq->nr_u.tbf.tr_sequence);
1898                 } else {
1899                         ktime_t time;
1900
1901                         policy->pol_nrs->nrs_throttling = 1;
1902                         head->th_deadline = deadline;
1903                         time = ktime_set(0, 0);
1904                         time = ktime_add_ns(time, deadline);
1905                         hrtimer_start(&head->th_timer, time, HRTIMER_MODE_ABS);
1906                 }
1907         }
1908
1909         return nrq;
1910 }
1911
1912 /**
1913  * Adds request \a nrq to \a policy's list of queued requests
1914  *
1915  * \param[in] policy The policy
1916  * \param[in] nrq    The request to add
1917  *
1918  * \retval 0 success; nrs_request_enqueue() assumes this function will always
1919  *                    succeed
1920  */
1921 static int nrs_tbf_req_add(struct ptlrpc_nrs_policy *policy,
1922                            struct ptlrpc_nrs_request *nrq)
1923 {
1924         struct nrs_tbf_head   *head;
1925         struct nrs_tbf_client *cli;
1926         int                    rc = 0;
1927
1928         assert_spin_locked(&policy->pol_nrs->nrs_svcpt->scp_req_lock);
1929
1930         cli = container_of(nrs_request_resource(nrq),
1931                            struct nrs_tbf_client, tc_res);
1932         head = container_of(nrs_request_resource(nrq)->res_parent,
1933                             struct nrs_tbf_head, th_res);
1934         if (list_empty(&cli->tc_list)) {
1935                 LASSERT(!cli->tc_in_heap);
1936                 rc = cfs_binheap_insert(head->th_binheap, &cli->tc_node);
1937                 if (rc == 0) {
1938                         cli->tc_in_heap = true;
1939                         nrq->nr_u.tbf.tr_sequence = head->th_sequence++;
1940                         list_add_tail(&nrq->nr_u.tbf.tr_list,
1941                                           &cli->tc_list);
1942                         if (policy->pol_nrs->nrs_throttling) {
1943                                 __u64 deadline = cli->tc_check_time +
1944                                                  cli->tc_nsecs;
1945                                 if ((head->th_deadline > deadline) &&
1946                                     (hrtimer_try_to_cancel(&head->th_timer)
1947                                      >= 0)) {
1948                                         ktime_t time;
1949                                         head->th_deadline = deadline;
1950                                         time = ktime_set(0, 0);
1951                                         time = ktime_add_ns(time, deadline);
1952                                         hrtimer_start(&head->th_timer, time,
1953                                                       HRTIMER_MODE_ABS);
1954                                 }
1955                         }
1956                 }
1957         } else {
1958                 LASSERT(cli->tc_in_heap);
1959                 nrq->nr_u.tbf.tr_sequence = head->th_sequence++;
1960                 list_add_tail(&nrq->nr_u.tbf.tr_list,
1961                                   &cli->tc_list);
1962         }
1963         return rc;
1964 }
1965
1966 /**
1967  * Removes request \a nrq from \a policy's list of queued requests.
1968  *
1969  * \param[in] policy The policy
1970  * \param[in] nrq    The request to remove
1971  */
1972 static void nrs_tbf_req_del(struct ptlrpc_nrs_policy *policy,
1973                              struct ptlrpc_nrs_request *nrq)
1974 {
1975         struct nrs_tbf_head   *head;
1976         struct nrs_tbf_client *cli;
1977
1978         assert_spin_locked(&policy->pol_nrs->nrs_svcpt->scp_req_lock);
1979
1980         cli = container_of(nrs_request_resource(nrq),
1981                            struct nrs_tbf_client, tc_res);
1982         head = container_of(nrs_request_resource(nrq)->res_parent,
1983                             struct nrs_tbf_head, th_res);
1984
1985         LASSERT(!list_empty(&nrq->nr_u.tbf.tr_list));
1986         list_del_init(&nrq->nr_u.tbf.tr_list);
1987         if (list_empty(&cli->tc_list)) {
1988                 cfs_binheap_remove(head->th_binheap,
1989                                    &cli->tc_node);
1990                 cli->tc_in_heap = false;
1991         } else {
1992                 cfs_binheap_relocate(head->th_binheap,
1993                                      &cli->tc_node);
1994         }
1995 }
1996
1997 /**
1998  * Prints a debug statement right before the request \a nrq stops being
1999  * handled.
2000  *
2001  * \param[in] policy The policy handling the request
2002  * \param[in] nrq    The request being handled
2003  *
2004  * \see ptlrpc_server_finish_request()
2005  * \see ptlrpc_nrs_req_stop_nolock()
2006  */
2007 static void nrs_tbf_req_stop(struct ptlrpc_nrs_policy *policy,
2008                               struct ptlrpc_nrs_request *nrq)
2009 {
2010         struct ptlrpc_request *req = container_of(nrq, struct ptlrpc_request,
2011                                                   rq_nrq);
2012
2013         assert_spin_locked(&policy->pol_nrs->nrs_svcpt->scp_req_lock);
2014
2015         CDEBUG(D_RPCTRACE, "NRS stop %s request from %s, seq: %llu\n",
2016                policy->pol_desc->pd_name, libcfs_id2str(req->rq_peer),
2017                nrq->nr_u.tbf.tr_sequence);
2018 }
2019
2020 #ifdef CONFIG_PROC_FS
2021
2022 /**
2023  * lprocfs interface
2024  */
2025
2026 /**
2027  * The maximum RPC rate.
2028  */
2029 #define LPROCFS_NRS_RATE_MAX            65535
2030
2031 static int
2032 ptlrpc_lprocfs_nrs_tbf_rule_seq_show(struct seq_file *m, void *data)
2033 {
2034         struct ptlrpc_service       *svc = m->private;
2035         int                          rc;
2036
2037         seq_printf(m, "regular_requests:\n");
2038         /**
2039          * Perform two separate calls to this as only one of the NRS heads'
2040          * policies may be in the ptlrpc_nrs_pol_state::NRS_POL_STATE_STARTED or
2041          * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPING state.
2042          */
2043         rc = ptlrpc_nrs_policy_control(svc, PTLRPC_NRS_QUEUE_REG,
2044                                        NRS_POL_NAME_TBF,
2045                                        NRS_CTL_TBF_RD_RULE,
2046                                        false, m);
2047         if (rc == 0) {
2048                 /**
2049                  * -ENOSPC means buf in the parameter m is overflow, return 0
2050                  * here to let upper layer function seq_read alloc a larger
2051                  * memory area and do this process again.
2052                  */
2053         } else if (rc == -ENOSPC) {
2054                 return 0;
2055
2056                 /**
2057                  * Ignore -ENODEV as the regular NRS head's policy may be in the
2058                  * ptlrpc_nrs_pol_state::NRS_POL_STATE_STOPPED state.
2059                  */
2060         } else if (rc != -ENODEV) {
2061                 return rc;
2062         }
2063
2064         if (!nrs_svc_has_hp(svc))
2065                 goto no_hp;
2066
2067         seq_printf(m, "high_priority_requests:\n");
2068         rc = ptlrpc_nrs_policy_control(svc, PTLRPC_NRS_QUEUE_HP,
2069                                        NRS_POL_NAME_TBF,
2070                                        NRS_CTL_TBF_RD_RULE,
2071                                        false, m);
2072         if (rc == 0) {
2073                 /**
2074                  * -ENOSPC means buf in the parameter m is overflow, return 0
2075                  * here to let upper layer function seq_read alloc a larger
2076                  * memory area and do this process again.
2077                  */
2078         } else if (rc == -ENOSPC) {
2079                 return 0;
2080         }
2081
2082 no_hp:
2083
2084         return rc;
2085 }
2086
2087 static int nrs_tbf_id_parse(struct nrs_tbf_cmd *cmd, char *token)
2088 {
2089         int rc;
2090
2091         switch (cmd->u.tc_start.ts_valid_type) {
2092         case NRS_TBF_FLAG_JOBID:
2093                 rc = nrs_tbf_jobid_parse(cmd, token);
2094                 break;
2095         case NRS_TBF_FLAG_NID:
2096                 rc = nrs_tbf_nid_parse(cmd, token);
2097                 break;
2098         case NRS_TBF_FLAG_OPCODE:
2099                 rc = nrs_tbf_opcode_parse(cmd, token);
2100                 break;
2101         default:
2102                 RETURN(-EINVAL);
2103         }
2104
2105         return rc;
2106 }
2107
2108 static void nrs_tbf_cmd_fini(struct nrs_tbf_cmd *cmd)
2109 {
2110         if (cmd->tc_cmd == NRS_CTL_TBF_START_RULE) {
2111                 if (cmd->u.tc_start.ts_valid_type == NRS_TBF_FLAG_JOBID)
2112                         nrs_tbf_jobid_cmd_fini(cmd);
2113                 else if (cmd->u.tc_start.ts_valid_type == NRS_TBF_FLAG_NID)
2114                         nrs_tbf_nid_cmd_fini(cmd);
2115                 else if (cmd->u.tc_start.ts_valid_type == NRS_TBF_FLAG_OPCODE)
2116                         nrs_tbf_opcode_cmd_fini(cmd);
2117         }
2118 }
2119
2120 static bool name_is_valid(const char *name)
2121 {
2122         int i;
2123
2124         for (i = 0; i < strlen(name); i++) {
2125                 if ((!isalnum(name[i])) &&
2126                     (name[i] != '_'))
2127                         return false;
2128         }
2129         return true;
2130 }
2131
2132 static int
2133 nrs_tbf_parse_value_pair(struct nrs_tbf_cmd *cmd, char *buffer)
2134 {
2135         char    *key;
2136         char    *val;
2137         int      rc;
2138         __u64    rate;
2139
2140         val = buffer;
2141         key = strsep(&val, "=");
2142         if (val == NULL || strlen(val) == 0)
2143                 return -EINVAL;
2144
2145         /* Key of the value pair */
2146         if (strcmp(key, "rate") == 0) {
2147                 rc = kstrtoull(val, 10, &rate);
2148                 if (rc)
2149                         return rc;
2150
2151                 if (rate <= 0 || rate >= LPROCFS_NRS_RATE_MAX)
2152                         return -EINVAL;
2153
2154                 if (cmd->tc_cmd == NRS_CTL_TBF_START_RULE)
2155                         cmd->u.tc_start.ts_rpc_rate = rate;
2156                 else if (cmd->tc_cmd == NRS_CTL_TBF_CHANGE_RULE)
2157                         cmd->u.tc_change.tc_rpc_rate = rate;
2158                 else
2159                         return -EINVAL;
2160         }  else if (strcmp(key, "rank") == 0) {
2161                 if (!name_is_valid(val))
2162                         return -EINVAL;
2163
2164                 if (cmd->tc_cmd == NRS_CTL_TBF_START_RULE)
2165                         cmd->u.tc_start.ts_next_name = val;
2166                 else if (cmd->tc_cmd == NRS_CTL_TBF_CHANGE_RULE)
2167                         cmd->u.tc_change.tc_next_name = val;
2168                 else
2169                         return -EINVAL;
2170         } else {
2171                 return -EINVAL;
2172         }
2173         return 0;
2174 }
2175
2176 static int
2177 nrs_tbf_parse_value_pairs(struct nrs_tbf_cmd *cmd, char *buffer)
2178 {
2179         char    *val;
2180         char    *token;
2181         int      rc;
2182
2183         val = buffer;
2184         while (val != NULL && strlen(val) != 0) {
2185                 token = strsep(&val, " ");
2186                 rc = nrs_tbf_parse_value_pair(cmd, token);
2187                 if (rc)
2188                         return rc;
2189         }
2190
2191         switch (cmd->tc_cmd) {
2192         case NRS_CTL_TBF_START_RULE:
2193                 if (cmd->u.tc_start.ts_rpc_rate == 0)
2194                         cmd->u.tc_start.ts_rpc_rate = tbf_rate;
2195                 break;
2196         case NRS_CTL_TBF_CHANGE_RULE:
2197                 if (cmd->u.tc_change.tc_rpc_rate == 0 &&
2198                     cmd->u.tc_change.tc_next_name == NULL)
2199                         return -EINVAL;
2200                 break;
2201         case NRS_CTL_TBF_STOP_RULE:
2202                 break;
2203         default:
2204                 return -EINVAL;
2205         }
2206         return 0;
2207 }
2208
2209 static struct nrs_tbf_cmd *
2210 nrs_tbf_parse_cmd(char *buffer, unsigned long count, __u32 type_flag)
2211 {
2212         static struct nrs_tbf_cmd       *cmd;
2213         char                            *token;
2214         char                            *val;
2215         int                              rc = 0;
2216
2217         OBD_ALLOC_PTR(cmd);
2218         if (cmd == NULL)
2219                 GOTO(out, rc = -ENOMEM);
2220         memset(cmd, 0, sizeof(*cmd));
2221
2222         val = buffer;
2223         token = strsep(&val, " ");
2224         if (val == NULL || strlen(val) == 0)
2225                 GOTO(out_free_cmd, rc = -EINVAL);
2226
2227         /* Type of the command */
2228         if (strcmp(token, "start") == 0) {
2229                 cmd->tc_cmd = NRS_CTL_TBF_START_RULE;
2230                 cmd->u.tc_start.ts_valid_type = type_flag;
2231         } else if (strcmp(token, "stop") == 0)
2232                 cmd->tc_cmd = NRS_CTL_TBF_STOP_RULE;
2233         else if (strcmp(token, "change") == 0)
2234                 cmd->tc_cmd = NRS_CTL_TBF_CHANGE_RULE;
2235         else
2236                 GOTO(out_free_cmd, rc = -EINVAL);
2237
2238         /* Name of the rule */
2239         token = strsep(&val, " ");
2240         if ((val == NULL && cmd->tc_cmd != NRS_CTL_TBF_STOP_RULE) ||
2241             !name_is_valid(token))
2242                 GOTO(out_free_cmd, rc = -EINVAL);
2243         cmd->tc_name = token;
2244
2245         if (cmd->tc_cmd == NRS_CTL_TBF_START_RULE) {
2246                 /* List of ID */
2247                 LASSERT(val);
2248                 token = val;
2249                 val = strrchr(token, '}');
2250                 if (!val)
2251                         GOTO(out_free_cmd, rc = -EINVAL);
2252
2253                 /* Skip '}' */
2254                 val++;
2255                 if (*val == '\0') {
2256                         val = NULL;
2257                 } else if (*val == ' ') {
2258                         *val = '\0';
2259                         val++;
2260                 } else
2261                         GOTO(out_free_cmd, rc = -EINVAL);
2262
2263                 rc = nrs_tbf_id_parse(cmd, token);
2264                 if (rc)
2265                         GOTO(out_free_cmd, rc);
2266         }
2267
2268         rc = nrs_tbf_parse_value_pairs(cmd, val);
2269         if (rc)
2270                 GOTO(out_cmd_fini, rc = -EINVAL);
2271         goto out;
2272 out_cmd_fini:
2273         nrs_tbf_cmd_fini(cmd);
2274 out_free_cmd:
2275         OBD_FREE_PTR(cmd);
2276 out:
2277         if (rc)
2278                 cmd = ERR_PTR(rc);
2279         return cmd;
2280 }
2281
2282 /**
2283  * Get the TBF policy type (nid, jobid, etc) preset by
2284  * proc entry 'nrs_policies' for command buffer parsing.
2285  *
2286  * \param[in] svc the PTLRPC service
2287  * \param[in] queue the NRS queue type
2288  *
2289  * \retval the preset TBF policy type flag
2290  */
2291 static __u32
2292 nrs_tbf_type_flag(struct ptlrpc_service *svc, enum ptlrpc_nrs_queue_type queue)
2293 {
2294         __u32   type;
2295         int     rc;
2296
2297         rc = ptlrpc_nrs_policy_control(svc, queue,
2298                                        NRS_POL_NAME_TBF,
2299                                        NRS_CTL_TBF_RD_TYPE_FLAG,
2300                                        true, &type);
2301         if (rc != 0)
2302                 type = NRS_TBF_FLAG_INVALID;
2303
2304         return type;
2305 }
2306
2307 extern struct nrs_core nrs_core;
2308 #define LPROCFS_WR_NRS_TBF_MAX_CMD (4096)
2309 static ssize_t
2310 ptlrpc_lprocfs_nrs_tbf_rule_seq_write(struct file *file,
2311                                       const char __user *buffer,
2312                                       size_t count, loff_t *off)
2313 {
2314         struct seq_file           *m = file->private_data;
2315         struct ptlrpc_service     *svc = m->private;
2316         char                      *kernbuf;
2317         char                      *val;
2318         int                        rc;
2319         static struct nrs_tbf_cmd *cmd;
2320         enum ptlrpc_nrs_queue_type queue = PTLRPC_NRS_QUEUE_BOTH;
2321         unsigned long              length;
2322         char                      *token;
2323
2324         OBD_ALLOC(kernbuf, LPROCFS_WR_NRS_TBF_MAX_CMD);
2325         if (kernbuf == NULL)
2326                 GOTO(out, rc = -ENOMEM);
2327
2328         if (count > LPROCFS_WR_NRS_TBF_MAX_CMD - 1)
2329                 GOTO(out_free_kernbuff, rc = -EINVAL);
2330
2331         if (copy_from_user(kernbuf, buffer, count))
2332                 GOTO(out_free_kernbuff, rc = -EFAULT);
2333
2334         val = kernbuf;
2335         token = strsep(&val, " ");
2336         if (val == NULL)
2337                 GOTO(out_free_kernbuff, rc = -EINVAL);
2338
2339         if (strcmp(token, "reg") == 0) {
2340                 queue = PTLRPC_NRS_QUEUE_REG;
2341         } else if (strcmp(token, "hp") == 0) {
2342                 queue = PTLRPC_NRS_QUEUE_HP;
2343         } else {
2344                 kernbuf[strlen(token)] = ' ';
2345                 val = kernbuf;
2346         }
2347         length = strlen(val);
2348
2349         if (length == 0)
2350                 GOTO(out_free_kernbuff, rc = -EINVAL);
2351
2352         if (queue == PTLRPC_NRS_QUEUE_HP && !nrs_svc_has_hp(svc))
2353                 GOTO(out_free_kernbuff, rc = -ENODEV);
2354         else if (queue == PTLRPC_NRS_QUEUE_BOTH && !nrs_svc_has_hp(svc))
2355                 queue = PTLRPC_NRS_QUEUE_REG;
2356
2357         cmd = nrs_tbf_parse_cmd(val, length, nrs_tbf_type_flag(svc, queue));
2358         if (IS_ERR(cmd))
2359                 GOTO(out_free_kernbuff, rc = PTR_ERR(cmd));
2360
2361         /**
2362          * Serialize NRS core lprocfs operations with policy registration/
2363          * unregistration.
2364          */
2365         mutex_lock(&nrs_core.nrs_mutex);
2366         rc = ptlrpc_nrs_policy_control(svc, queue,
2367                                        NRS_POL_NAME_TBF,
2368                                        NRS_CTL_TBF_WR_RULE,
2369                                        false, cmd);
2370         mutex_unlock(&nrs_core.nrs_mutex);
2371
2372         nrs_tbf_cmd_fini(cmd);
2373         OBD_FREE_PTR(cmd);
2374 out_free_kernbuff:
2375         OBD_FREE(kernbuf, LPROCFS_WR_NRS_TBF_MAX_CMD);
2376 out:
2377         return rc ? rc : count;
2378 }
2379 LPROC_SEQ_FOPS(ptlrpc_lprocfs_nrs_tbf_rule);
2380
2381 /**
2382  * Initializes a TBF policy's lprocfs interface for service \a svc
2383  *
2384  * \param[in] svc the service
2385  *
2386  * \retval 0    success
2387  * \retval != 0 error
2388  */
2389 static int nrs_tbf_lprocfs_init(struct ptlrpc_service *svc)
2390 {
2391         struct lprocfs_vars nrs_tbf_lprocfs_vars[] = {
2392                 { .name         = "nrs_tbf_rule",
2393                   .fops         = &ptlrpc_lprocfs_nrs_tbf_rule_fops,
2394                   .data = svc },
2395                 { NULL }
2396         };
2397
2398         if (svc->srv_procroot == NULL)
2399                 return 0;
2400
2401         return lprocfs_add_vars(svc->srv_procroot, nrs_tbf_lprocfs_vars, NULL);
2402 }
2403
2404 /**
2405  * Cleans up a TBF policy's lprocfs interface for service \a svc
2406  *
2407  * \param[in] svc the service
2408  */
2409 static void nrs_tbf_lprocfs_fini(struct ptlrpc_service *svc)
2410 {
2411         if (svc->srv_procroot == NULL)
2412                 return;
2413
2414         lprocfs_remove_proc_entry("nrs_tbf_rule", svc->srv_procroot);
2415 }
2416
2417 #endif /* CONFIG_PROC_FS */
2418
2419 /**
2420  * TBF policy operations
2421  */
2422 static const struct ptlrpc_nrs_pol_ops nrs_tbf_ops = {
2423         .op_policy_start        = nrs_tbf_start,
2424         .op_policy_stop         = nrs_tbf_stop,
2425         .op_policy_ctl          = nrs_tbf_ctl,
2426         .op_res_get             = nrs_tbf_res_get,
2427         .op_res_put             = nrs_tbf_res_put,
2428         .op_req_get             = nrs_tbf_req_get,
2429         .op_req_enqueue         = nrs_tbf_req_add,
2430         .op_req_dequeue         = nrs_tbf_req_del,
2431         .op_req_stop            = nrs_tbf_req_stop,
2432 #ifdef CONFIG_PROC_FS
2433         .op_lprocfs_init        = nrs_tbf_lprocfs_init,
2434         .op_lprocfs_fini        = nrs_tbf_lprocfs_fini,
2435 #endif
2436 };
2437
2438 /**
2439  * TBF policy configuration
2440  */
2441 struct ptlrpc_nrs_pol_conf nrs_conf_tbf = {
2442         .nc_name                = NRS_POL_NAME_TBF,
2443         .nc_ops                 = &nrs_tbf_ops,
2444         .nc_compat              = nrs_policy_compat_all,
2445 };
2446
2447 /** @} tbf */
2448
2449 /** @} nrs */
2450
2451 #endif /* HAVE_SERVER_SUPPORT */