Whamcloud - gitweb
LU-14475 log: Rewrite some log messages
[fs/lustre-release.git] / libcfs / libcfs / workitem.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2013, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  *
31  * libcfs/libcfs/workitem.c
32  *
33  * Author: Isaac Huang <isaac@clusterfs.com>
34  *         Liang Zhen  <zhen.liang@sun.com>
35  */
36
37 #define DEBUG_SUBSYSTEM S_LNET
38
39 #include <linux/kthread.h>
40 #include <libcfs/libcfs.h>
41
42 #define CFS_WS_NAME_LEN         16
43
44 struct cfs_wi_sched {
45         struct list_head                ws_list;        /* chain on global list */
46         /** serialised workitems */
47         spinlock_t                      ws_lock;
48         /** where schedulers sleep */
49         wait_queue_head_t               ws_waitq;
50         /** concurrent workitems */
51         struct list_head                ws_runq;
52         /** rescheduled running-workitems, a workitem can be rescheduled
53          * while running in wi_action(), but we don't to execute it again
54          * unless it returns from wi_action(), so we put it on ws_rerunq
55          * while rescheduling, and move it to runq after it returns
56          * from wi_action() */
57         struct list_head                ws_rerunq;
58         /** CPT-table for this scheduler */
59         struct cfs_cpt_table    *ws_cptab;
60         /** CPT id for affinity */
61         int                     ws_cpt;
62         /** number of scheduled workitems */
63         int                     ws_nscheduled;
64         /** started scheduler thread, protected by cfs_wi_data::wi_glock */
65         unsigned int            ws_nthreads:30;
66         /** shutting down, protected by cfs_wi_data::wi_glock */
67         unsigned int            ws_stopping:1;
68         /** serialize starting thread, protected by cfs_wi_data::wi_glock */
69         unsigned int            ws_starting:1;
70         /** scheduler name */
71         char                    ws_name[CFS_WS_NAME_LEN];
72 };
73
74 static struct cfs_workitem_data {
75         /** serialize */
76         spinlock_t              wi_glock;
77         /** list of all schedulers */
78         struct list_head        wi_scheds;
79         /** WI module is initialized */
80         int                     wi_init;
81         /** shutting down the whole WI module */
82         int                     wi_stopping;
83 } cfs_wi_data;
84
85 static inline int
86 cfs_wi_sched_cansleep(struct cfs_wi_sched *sched)
87 {
88         spin_lock(&sched->ws_lock);
89         if (sched->ws_stopping) {
90                 spin_unlock(&sched->ws_lock);
91                 return 0;
92         }
93
94         if (!list_empty(&sched->ws_runq)) {
95                 spin_unlock(&sched->ws_lock);
96                 return 0;
97         }
98         spin_unlock(&sched->ws_lock);
99         return 1;
100 }
101
102 /* XXX:
103  * 0. it only works when called from wi->wi_action.
104  * 1. when it returns no one shall try to schedule the workitem.
105  */
106 void
107 cfs_wi_exit(struct cfs_wi_sched *sched, struct cfs_workitem *wi)
108 {
109         LASSERT(!in_interrupt()); /* because we use plain spinlock */
110         LASSERT(!sched->ws_stopping);
111
112         spin_lock(&sched->ws_lock);
113
114         LASSERT(wi->wi_running);
115
116         if (wi->wi_scheduled) { /* cancel pending schedules */
117                 LASSERT(!list_empty(&wi->wi_list));
118                 list_del_init(&wi->wi_list);
119
120                 LASSERT(sched->ws_nscheduled > 0);
121                 sched->ws_nscheduled--;
122         }
123
124         LASSERT(list_empty(&wi->wi_list));
125
126         wi->wi_scheduled = 1; /* LBUG future schedule attempts */
127         spin_unlock(&sched->ws_lock);
128 }
129 EXPORT_SYMBOL(cfs_wi_exit);
130
131 /**
132  * cancel schedule request of workitem \a wi
133  */
134 int
135 cfs_wi_deschedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi)
136 {
137         int     rc;
138
139         LASSERT(!in_interrupt()); /* because we use plain spinlock */
140         LASSERT(!sched->ws_stopping);
141
142         /*
143          * return 0 if it's running already, otherwise return 1, which
144          * means the workitem will not be scheduled and will not have
145          * any race with wi_action.
146          */
147         spin_lock(&sched->ws_lock);
148
149         rc = !(wi->wi_running);
150
151         if (wi->wi_scheduled) { /* cancel pending schedules */
152                 LASSERT(!list_empty(&wi->wi_list));
153                 list_del_init(&wi->wi_list);
154
155                 LASSERT(sched->ws_nscheduled > 0);
156                 sched->ws_nscheduled--;
157
158                 wi->wi_scheduled = 0;
159         }
160
161         LASSERT (list_empty(&wi->wi_list));
162
163         spin_unlock(&sched->ws_lock);
164         return rc;
165 }
166 EXPORT_SYMBOL(cfs_wi_deschedule);
167
168 /*
169  * Workitem scheduled with (serial == 1) is strictly serialised not only with
170  * itself, but also with others scheduled this way.
171  *
172  * Now there's only one static serialised queue, but in the future more might
173  * be added, and even dynamic creation of serialised queues might be supported.
174  */
175 void
176 cfs_wi_schedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi)
177 {
178         LASSERT(!in_interrupt()); /* because we use plain spinlock */
179         LASSERT(!sched->ws_stopping);
180
181         spin_lock(&sched->ws_lock);
182
183         if (!wi->wi_scheduled) {
184                 LASSERT (list_empty(&wi->wi_list));
185
186                 wi->wi_scheduled = 1;
187                 sched->ws_nscheduled++;
188                 if (!wi->wi_running) {
189                         list_add_tail(&wi->wi_list, &sched->ws_runq);
190                         wake_up(&sched->ws_waitq);
191                 } else {
192                         list_add(&wi->wi_list, &sched->ws_rerunq);
193                 }
194         }
195
196         LASSERT (!list_empty(&wi->wi_list));
197         spin_unlock(&sched->ws_lock);
198 }
199 EXPORT_SYMBOL(cfs_wi_schedule);
200
201 static int
202 cfs_wi_scheduler(void *arg)
203 {
204         struct cfs_wi_sched *sched = (struct cfs_wi_sched *)arg;
205
206         /* CPT affinity scheduler? */
207         if (sched->ws_cptab != NULL)
208                 if (cfs_cpt_bind(sched->ws_cptab, sched->ws_cpt) != 0)
209                         CWARN("Unable to bind %s on CPU partition %d\n",
210                                 sched->ws_name, sched->ws_cpt);
211
212         spin_lock(&cfs_wi_data.wi_glock);
213
214         LASSERT(sched->ws_starting == 1);
215         sched->ws_starting--;
216         sched->ws_nthreads++;
217
218         spin_unlock(&cfs_wi_data.wi_glock);
219
220         spin_lock(&sched->ws_lock);
221
222         while (!sched->ws_stopping) {
223                 int             nloops = 0;
224                 int             rc;
225                 struct cfs_workitem *wi;
226
227                 while (!list_empty(&sched->ws_runq) &&
228                        nloops < CFS_WI_RESCHED) {
229                         wi = list_entry(sched->ws_runq.next,
230                                         struct cfs_workitem, wi_list);
231                         LASSERT(wi->wi_scheduled && !wi->wi_running);
232
233                         list_del_init(&wi->wi_list);
234
235                         LASSERT(sched->ws_nscheduled > 0);
236                         sched->ws_nscheduled--;
237
238                         wi->wi_running   = 1;
239                         wi->wi_scheduled = 0;
240
241                         spin_unlock(&sched->ws_lock);
242                         nloops++;
243
244                         rc = (*wi->wi_action) (wi);
245
246                         spin_lock(&sched->ws_lock);
247                         if (rc != 0) /* WI should be dead, even be freed! */
248                                 continue;
249
250                         wi->wi_running = 0;
251                         if (list_empty(&wi->wi_list))
252                                 continue;
253
254                         LASSERT(wi->wi_scheduled);
255                         /* wi is rescheduled, should be on rerunq now, we
256                          * move it to runq so it can run action now */
257                         list_move_tail(&wi->wi_list, &sched->ws_runq);
258                 }
259
260                 if (!list_empty(&sched->ws_runq)) {
261                         spin_unlock(&sched->ws_lock);
262                         /* don't sleep because some workitems still
263                          * expect me to come back soon */
264                         cond_resched();
265                         spin_lock(&sched->ws_lock);
266                         continue;
267                 }
268
269                 spin_unlock(&sched->ws_lock);
270                 rc = wait_event_interruptible_exclusive(sched->ws_waitq,
271                                 !cfs_wi_sched_cansleep(sched));
272                 spin_lock(&sched->ws_lock);
273         }
274
275         spin_unlock(&sched->ws_lock);
276
277         spin_lock(&cfs_wi_data.wi_glock);
278         sched->ws_nthreads--;
279         spin_unlock(&cfs_wi_data.wi_glock);
280
281         return 0;
282 }
283
284 void
285 cfs_wi_sched_destroy(struct cfs_wi_sched *sched)
286 {
287         LASSERT(cfs_wi_data.wi_init);
288         LASSERT(!cfs_wi_data.wi_stopping);
289
290         spin_lock(&cfs_wi_data.wi_glock);
291         if (sched->ws_stopping) {
292                 CDEBUG(D_INFO, "%s is in progress of stopping\n",
293                        sched->ws_name);
294                 spin_unlock(&cfs_wi_data.wi_glock);
295                 return;
296         }
297
298         LASSERT(!list_empty(&sched->ws_list));
299         sched->ws_stopping = 1;
300
301         spin_unlock(&cfs_wi_data.wi_glock);
302
303         wake_up_all(&sched->ws_waitq);
304
305         spin_lock(&cfs_wi_data.wi_glock);
306         {
307                 int i = 2;
308
309                 while (sched->ws_nthreads > 0) {
310                         CDEBUG(is_power_of_2(++i / 20) ? D_WARNING : D_NET,
311                                "waiting %us for %d %s worker threads to exit\n",
312                                i / 20, sched->ws_nthreads, sched->ws_name);
313
314                         spin_unlock(&cfs_wi_data.wi_glock);
315                         schedule_timeout_uninterruptible(cfs_time_seconds(1)
316                                                          / 20);
317                         spin_lock(&cfs_wi_data.wi_glock);
318                 }
319         }
320
321         list_del(&sched->ws_list);
322
323         spin_unlock(&cfs_wi_data.wi_glock);
324
325         LASSERT(sched->ws_nscheduled == 0);
326
327         LIBCFS_FREE(sched, sizeof(*sched));
328 }
329 EXPORT_SYMBOL(cfs_wi_sched_destroy);
330
331 int
332 cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab,
333                     int cpt, int nthrs, struct cfs_wi_sched **sched_pp)
334 {
335         struct cfs_wi_sched     *sched;
336
337         LASSERT(cfs_wi_data.wi_init);
338         LASSERT(!cfs_wi_data.wi_stopping);
339         LASSERT(cptab == NULL || cpt == CFS_CPT_ANY ||
340                 (cpt >= 0 && cpt < cfs_cpt_number(cptab)));
341
342         LIBCFS_ALLOC(sched, sizeof(*sched));
343         if (sched == NULL)
344                 return -ENOMEM;
345
346         if (strlen(name) > sizeof(sched->ws_name)-1) {
347                 LIBCFS_FREE(sched, sizeof(*sched));
348                 return -E2BIG;
349         }
350         strlcpy(sched->ws_name, name, sizeof(sched->ws_name));
351
352         sched->ws_cptab = cptab;
353         sched->ws_cpt = cpt;
354
355         spin_lock_init(&sched->ws_lock);
356         init_waitqueue_head(&sched->ws_waitq);
357
358         INIT_LIST_HEAD(&sched->ws_runq);
359         INIT_LIST_HEAD(&sched->ws_rerunq);
360         INIT_LIST_HEAD(&sched->ws_list);
361
362         for (; nthrs > 0; nthrs--)  {
363                 char                    name[16];
364                 struct task_struct      *task;
365
366                 spin_lock(&cfs_wi_data.wi_glock);
367                 while (sched->ws_starting > 0) {
368                         spin_unlock(&cfs_wi_data.wi_glock);
369                         schedule();
370                         spin_lock(&cfs_wi_data.wi_glock);
371                 }
372
373                 sched->ws_starting++;
374                 spin_unlock(&cfs_wi_data.wi_glock);
375
376                 if (sched->ws_cptab != NULL && sched->ws_cpt >= 0) {
377                         snprintf(name, sizeof(name), "%s_%02d_%02d",
378                                  sched->ws_name, sched->ws_cpt,
379                                  sched->ws_nthreads);
380                 } else {
381                         snprintf(name, sizeof(name), "%s_%02d",
382                                  sched->ws_name, sched->ws_nthreads);
383                 }
384
385                 task = kthread_run(cfs_wi_scheduler, sched, "%s", name);
386                 if (IS_ERR(task)) {
387                         int rc = PTR_ERR(task);
388
389                         CERROR("Failed to create thread for "
390                                 "WI scheduler %s: %d\n", name, rc);
391
392                         spin_lock(&cfs_wi_data.wi_glock);
393
394                         /* make up for cfs_wi_sched_destroy */
395                         list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
396                         sched->ws_starting--;
397
398                         spin_unlock(&cfs_wi_data.wi_glock);
399
400                         cfs_wi_sched_destroy(sched);
401                         return rc;
402                 }
403         }
404
405         spin_lock(&cfs_wi_data.wi_glock);
406         list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
407         spin_unlock(&cfs_wi_data.wi_glock);
408
409         *sched_pp = sched;
410         return 0;
411 }
412 EXPORT_SYMBOL(cfs_wi_sched_create);
413
414 int
415 cfs_wi_startup(void)
416 {
417         memset(&cfs_wi_data, 0, sizeof(struct cfs_workitem_data));
418
419         spin_lock_init(&cfs_wi_data.wi_glock);
420         INIT_LIST_HEAD(&cfs_wi_data.wi_scheds);
421         cfs_wi_data.wi_init = 1;
422
423         return 0;
424 }
425
426 void
427 cfs_wi_shutdown (void)
428 {
429         struct cfs_wi_sched     *sched;
430
431         spin_lock(&cfs_wi_data.wi_glock);
432         cfs_wi_data.wi_stopping = 1;
433         spin_unlock(&cfs_wi_data.wi_glock);
434
435         /* nobody should contend on this list */
436         list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
437                 sched->ws_stopping = 1;
438                 wake_up_all(&sched->ws_waitq);
439         }
440
441         list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
442                 spin_lock(&cfs_wi_data.wi_glock);
443
444                 while (sched->ws_nthreads != 0) {
445                         spin_unlock(&cfs_wi_data.wi_glock);
446                         schedule_timeout_uninterruptible(cfs_time_seconds(1)
447                                                          / 20);
448                         spin_lock(&cfs_wi_data.wi_glock);
449                 }
450                 spin_unlock(&cfs_wi_data.wi_glock);
451         }
452
453         while (!list_empty(&cfs_wi_data.wi_scheds)) {
454                 sched = list_entry(cfs_wi_data.wi_scheds.next,
455                                        struct cfs_wi_sched, ws_list);
456                 list_del(&sched->ws_list);
457                 LIBCFS_FREE(sched, sizeof(*sched));
458         }
459
460         cfs_wi_data.wi_stopping = 0;
461         cfs_wi_data.wi_init = 0;
462 }