Whamcloud - gitweb
LU-6245 libcfs: remove typedefs in libcfs source code
[fs/lustre-release.git] / libcfs / libcfs / workitem.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2011, 2014, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * libcfs/libcfs/workitem.c
37  *
38  * Author: Isaac Huang <isaac@clusterfs.com>
39  *         Liang Zhen  <zhen.liang@sun.com>
40  */
41
42 #define DEBUG_SUBSYSTEM S_LNET
43
44 #include <linux/kthread.h>
45 #include <libcfs/libcfs.h>
46
47 #define CFS_WS_NAME_LEN         16
48
49 struct cfs_wi_sched {
50         struct list_head                ws_list;        /* chain on global list */
51         /** serialised workitems */
52         spinlock_t                      ws_lock;
53         /** where schedulers sleep */
54         wait_queue_head_t               ws_waitq;
55         /** concurrent workitems */
56         struct list_head                ws_runq;
57         /** rescheduled running-workitems, a workitem can be rescheduled
58          * while running in wi_action(), but we don't to execute it again
59          * unless it returns from wi_action(), so we put it on ws_rerunq
60          * while rescheduling, and move it to runq after it returns
61          * from wi_action() */
62         struct list_head                ws_rerunq;
63         /** CPT-table for this scheduler */
64         struct cfs_cpt_table    *ws_cptab;
65         /** CPT id for affinity */
66         int                     ws_cpt;
67         /** number of scheduled workitems */
68         int                     ws_nscheduled;
69         /** started scheduler thread, protected by cfs_wi_data::wi_glock */
70         unsigned int            ws_nthreads:30;
71         /** shutting down, protected by cfs_wi_data::wi_glock */
72         unsigned int            ws_stopping:1;
73         /** serialize starting thread, protected by cfs_wi_data::wi_glock */
74         unsigned int            ws_starting:1;
75         /** scheduler name */
76         char                    ws_name[CFS_WS_NAME_LEN];
77 };
78
79 static struct cfs_workitem_data {
80         /** serialize */
81         spinlock_t              wi_glock;
82         /** list of all schedulers */
83         struct list_head        wi_scheds;
84         /** WI module is initialized */
85         int                     wi_init;
86         /** shutting down the whole WI module */
87         int                     wi_stopping;
88 } cfs_wi_data;
89
90 static inline void
91 cfs_wi_sched_lock(struct cfs_wi_sched *sched)
92 {
93         spin_lock(&sched->ws_lock);
94 }
95
96 static inline void
97 cfs_wi_sched_unlock(struct cfs_wi_sched *sched)
98 {
99         spin_unlock(&sched->ws_lock);
100 }
101
102 static inline int
103 cfs_wi_sched_cansleep(struct cfs_wi_sched *sched)
104 {
105         cfs_wi_sched_lock(sched);
106         if (sched->ws_stopping) {
107                 cfs_wi_sched_unlock(sched);
108                 return 0;
109         }
110
111         if (!list_empty(&sched->ws_runq)) {
112                 cfs_wi_sched_unlock(sched);
113                 return 0;
114         }
115         cfs_wi_sched_unlock(sched);
116         return 1;
117 }
118
119 /* XXX:
120  * 0. it only works when called from wi->wi_action.
121  * 1. when it returns no one shall try to schedule the workitem.
122  */
123 void
124 cfs_wi_exit(struct cfs_wi_sched *sched, struct cfs_workitem *wi)
125 {
126         LASSERT(!in_interrupt()); /* because we use plain spinlock */
127         LASSERT(!sched->ws_stopping);
128
129         cfs_wi_sched_lock(sched);
130
131         LASSERT(wi->wi_running);
132
133         if (wi->wi_scheduled) { /* cancel pending schedules */
134                 LASSERT(!list_empty(&wi->wi_list));
135                 list_del_init(&wi->wi_list);
136
137                 LASSERT(sched->ws_nscheduled > 0);
138                 sched->ws_nscheduled--;
139         }
140
141         LASSERT(list_empty(&wi->wi_list));
142
143         wi->wi_scheduled = 1; /* LBUG future schedule attempts */
144         cfs_wi_sched_unlock(sched);
145
146         return;
147 }
148 EXPORT_SYMBOL(cfs_wi_exit);
149
150 /**
151  * cancel schedule request of workitem \a wi
152  */
153 int
154 cfs_wi_deschedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi)
155 {
156         int     rc;
157
158         LASSERT(!in_interrupt()); /* because we use plain spinlock */
159         LASSERT(!sched->ws_stopping);
160
161         /*
162          * return 0 if it's running already, otherwise return 1, which
163          * means the workitem will not be scheduled and will not have
164          * any race with wi_action.
165          */
166         cfs_wi_sched_lock(sched);
167
168         rc = !(wi->wi_running);
169
170         if (wi->wi_scheduled) { /* cancel pending schedules */
171                 LASSERT(!list_empty(&wi->wi_list));
172                 list_del_init(&wi->wi_list);
173
174                 LASSERT(sched->ws_nscheduled > 0);
175                 sched->ws_nscheduled--;
176
177                 wi->wi_scheduled = 0;
178         }
179
180         LASSERT (list_empty(&wi->wi_list));
181
182         cfs_wi_sched_unlock(sched);
183         return rc;
184 }
185 EXPORT_SYMBOL(cfs_wi_deschedule);
186
187 /*
188  * Workitem scheduled with (serial == 1) is strictly serialised not only with
189  * itself, but also with others scheduled this way.
190  *
191  * Now there's only one static serialised queue, but in the future more might
192  * be added, and even dynamic creation of serialised queues might be supported.
193  */
194 void
195 cfs_wi_schedule(struct cfs_wi_sched *sched, struct cfs_workitem *wi)
196 {
197         LASSERT(!in_interrupt()); /* because we use plain spinlock */
198         LASSERT(!sched->ws_stopping);
199
200         cfs_wi_sched_lock(sched);
201
202         if (!wi->wi_scheduled) {
203                 LASSERT (list_empty(&wi->wi_list));
204
205                 wi->wi_scheduled = 1;
206                 sched->ws_nscheduled++;
207                 if (!wi->wi_running) {
208                         list_add_tail(&wi->wi_list, &sched->ws_runq);
209                         wake_up(&sched->ws_waitq);
210                 } else {
211                         list_add(&wi->wi_list, &sched->ws_rerunq);
212                 }
213         }
214
215         LASSERT (!list_empty(&wi->wi_list));
216         cfs_wi_sched_unlock(sched);
217         return;
218 }
219 EXPORT_SYMBOL(cfs_wi_schedule);
220
221 static int
222 cfs_wi_scheduler(void *arg)
223 {
224         struct cfs_wi_sched *sched = (struct cfs_wi_sched *)arg;
225
226         cfs_block_allsigs();
227
228         /* CPT affinity scheduler? */
229         if (sched->ws_cptab != NULL)
230                 if (cfs_cpt_bind(sched->ws_cptab, sched->ws_cpt) != 0)
231                         CWARN("Failed to bind %s on CPT %d\n",
232                                 sched->ws_name, sched->ws_cpt);
233
234         spin_lock(&cfs_wi_data.wi_glock);
235
236         LASSERT(sched->ws_starting == 1);
237         sched->ws_starting--;
238         sched->ws_nthreads++;
239
240         spin_unlock(&cfs_wi_data.wi_glock);
241
242         cfs_wi_sched_lock(sched);
243
244         while (!sched->ws_stopping) {
245                 int             nloops = 0;
246                 int             rc;
247                 struct cfs_workitem *wi;
248
249                 while (!list_empty(&sched->ws_runq) &&
250                        nloops < CFS_WI_RESCHED) {
251                         wi = list_entry(sched->ws_runq.next,
252                                         struct cfs_workitem, wi_list);
253                         LASSERT(wi->wi_scheduled && !wi->wi_running);
254
255                         list_del_init(&wi->wi_list);
256
257                         LASSERT(sched->ws_nscheduled > 0);
258                         sched->ws_nscheduled--;
259
260                         wi->wi_running   = 1;
261                         wi->wi_scheduled = 0;
262
263
264                         cfs_wi_sched_unlock(sched);
265                         nloops++;
266
267                         rc = (*wi->wi_action) (wi);
268
269                         cfs_wi_sched_lock(sched);
270                         if (rc != 0) /* WI should be dead, even be freed! */
271                                 continue;
272
273                         wi->wi_running = 0;
274                         if (list_empty(&wi->wi_list))
275                                 continue;
276
277                         LASSERT(wi->wi_scheduled);
278                         /* wi is rescheduled, should be on rerunq now, we
279                          * move it to runq so it can run action now */
280                         list_move_tail(&wi->wi_list, &sched->ws_runq);
281                 }
282
283                 if (!list_empty(&sched->ws_runq)) {
284                         cfs_wi_sched_unlock(sched);
285                         /* don't sleep because some workitems still
286                          * expect me to come back soon */
287                         cond_resched();
288                         cfs_wi_sched_lock(sched);
289                         continue;
290                 }
291
292                 cfs_wi_sched_unlock(sched);
293                 rc = wait_event_interruptible_exclusive(sched->ws_waitq,
294                                 !cfs_wi_sched_cansleep(sched));
295                 cfs_wi_sched_lock(sched);
296         }
297
298         cfs_wi_sched_unlock(sched);
299
300         spin_lock(&cfs_wi_data.wi_glock);
301         sched->ws_nthreads--;
302         spin_unlock(&cfs_wi_data.wi_glock);
303
304         return 0;
305 }
306
307 void
308 cfs_wi_sched_destroy(struct cfs_wi_sched *sched)
309 {
310         LASSERT(cfs_wi_data.wi_init);
311         LASSERT(!cfs_wi_data.wi_stopping);
312
313         spin_lock(&cfs_wi_data.wi_glock);
314         if (sched->ws_stopping) {
315                 CDEBUG(D_INFO, "%s is in progress of stopping\n",
316                        sched->ws_name);
317                 spin_unlock(&cfs_wi_data.wi_glock);
318                 return;
319         }
320
321         LASSERT(!list_empty(&sched->ws_list));
322         sched->ws_stopping = 1;
323
324         spin_unlock(&cfs_wi_data.wi_glock);
325
326         wake_up_all(&sched->ws_waitq);
327
328         spin_lock(&cfs_wi_data.wi_glock);
329         {
330                 int i = 2;
331
332                 while (sched->ws_nthreads > 0) {
333                         CDEBUG(IS_PO2(++i) ? D_WARNING : D_NET,
334                                "waiting for %d threads of WI sched[%s] to "
335                                "terminate\n", sched->ws_nthreads,
336                                sched->ws_name);
337
338                         spin_unlock(&cfs_wi_data.wi_glock);
339                         set_current_state(TASK_UNINTERRUPTIBLE);
340                         schedule_timeout(cfs_time_seconds(1) / 20);
341                         spin_lock(&cfs_wi_data.wi_glock);
342                 }
343         }
344
345         list_del(&sched->ws_list);
346
347         spin_unlock(&cfs_wi_data.wi_glock);
348
349         LASSERT(sched->ws_nscheduled == 0);
350
351         LIBCFS_FREE(sched, sizeof(*sched));
352 }
353 EXPORT_SYMBOL(cfs_wi_sched_destroy);
354
355 int
356 cfs_wi_sched_create(char *name, struct cfs_cpt_table *cptab,
357                     int cpt, int nthrs, struct cfs_wi_sched **sched_pp)
358 {
359         struct cfs_wi_sched     *sched;
360
361         LASSERT(cfs_wi_data.wi_init);
362         LASSERT(!cfs_wi_data.wi_stopping);
363         LASSERT(cptab == NULL || cpt == CFS_CPT_ANY ||
364                 (cpt >= 0 && cpt < cfs_cpt_number(cptab)));
365
366         LIBCFS_ALLOC(sched, sizeof(*sched));
367         if (sched == NULL)
368                 return -ENOMEM;
369
370         if (strlen(name) > sizeof(sched->ws_name)-1) {
371                 LIBCFS_FREE(sched, sizeof(*sched));
372                 return -E2BIG;
373         }
374         strlcpy(sched->ws_name, name, sizeof(sched->ws_name));
375
376         sched->ws_cptab = cptab;
377         sched->ws_cpt = cpt;
378
379         spin_lock_init(&sched->ws_lock);
380         init_waitqueue_head(&sched->ws_waitq);
381
382         INIT_LIST_HEAD(&sched->ws_runq);
383         INIT_LIST_HEAD(&sched->ws_rerunq);
384         INIT_LIST_HEAD(&sched->ws_list);
385
386         for (; nthrs > 0; nthrs--)  {
387                 char                    name[16];
388                 struct task_struct      *task;
389
390                 spin_lock(&cfs_wi_data.wi_glock);
391                 while (sched->ws_starting > 0) {
392                         spin_unlock(&cfs_wi_data.wi_glock);
393                         schedule();
394                         spin_lock(&cfs_wi_data.wi_glock);
395                 }
396
397                 sched->ws_starting++;
398                 spin_unlock(&cfs_wi_data.wi_glock);
399
400                 if (sched->ws_cptab != NULL && sched->ws_cpt >= 0) {
401                         snprintf(name, sizeof(name), "%s_%02d_%02d",
402                                  sched->ws_name, sched->ws_cpt,
403                                  sched->ws_nthreads);
404                 } else {
405                         snprintf(name, sizeof(name), "%s_%02d",
406                                  sched->ws_name, sched->ws_nthreads);
407                 }
408
409                 task = kthread_run(cfs_wi_scheduler, sched, name);
410                 if (IS_ERR(task)) {
411                         int rc = PTR_ERR(task);
412
413                         CERROR("Failed to create thread for "
414                                 "WI scheduler %s: %d\n", name, rc);
415
416                         spin_lock(&cfs_wi_data.wi_glock);
417
418                         /* make up for cfs_wi_sched_destroy */
419                         list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
420                         sched->ws_starting--;
421
422                         spin_unlock(&cfs_wi_data.wi_glock);
423
424                         cfs_wi_sched_destroy(sched);
425                         return rc;
426                 }
427         }
428
429         spin_lock(&cfs_wi_data.wi_glock);
430         list_add(&sched->ws_list, &cfs_wi_data.wi_scheds);
431         spin_unlock(&cfs_wi_data.wi_glock);
432
433         *sched_pp = sched;
434         return 0;
435 }
436 EXPORT_SYMBOL(cfs_wi_sched_create);
437
438 int
439 cfs_wi_startup(void)
440 {
441         memset(&cfs_wi_data, 0, sizeof(struct cfs_workitem_data));
442
443         spin_lock_init(&cfs_wi_data.wi_glock);
444         INIT_LIST_HEAD(&cfs_wi_data.wi_scheds);
445         cfs_wi_data.wi_init = 1;
446
447         return 0;
448 }
449
450 void
451 cfs_wi_shutdown (void)
452 {
453         struct cfs_wi_sched     *sched;
454
455         spin_lock(&cfs_wi_data.wi_glock);
456         cfs_wi_data.wi_stopping = 1;
457         spin_unlock(&cfs_wi_data.wi_glock);
458
459         /* nobody should contend on this list */
460         list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
461                 sched->ws_stopping = 1;
462                 wake_up_all(&sched->ws_waitq);
463         }
464
465         list_for_each_entry(sched, &cfs_wi_data.wi_scheds, ws_list) {
466                 spin_lock(&cfs_wi_data.wi_glock);
467
468                 while (sched->ws_nthreads != 0) {
469                         spin_unlock(&cfs_wi_data.wi_glock);
470                         set_current_state(TASK_UNINTERRUPTIBLE);
471                         schedule_timeout(cfs_time_seconds(1) / 20);
472                         spin_lock(&cfs_wi_data.wi_glock);
473                 }
474                 spin_unlock(&cfs_wi_data.wi_glock);
475         }
476
477         while (!list_empty(&cfs_wi_data.wi_scheds)) {
478                 sched = list_entry(cfs_wi_data.wi_scheds.next,
479                                        struct cfs_wi_sched, ws_list);
480                 list_del(&sched->ws_list);
481                 LIBCFS_FREE(sched, sizeof(*sched));
482         }
483
484         cfs_wi_data.wi_stopping = 0;
485         cfs_wi_data.wi_init = 0;
486 }