Whamcloud - gitweb
LU-506 kernel: FC15 - fix GCC 'set-but-unused' warnings
[fs/lustre-release.git] / libcfs / libcfs / workitem.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
30  * Use is subject to license terms.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * libcfs/libcfs/workitem.c
37  *
38  * Author: Isaac Huang <isaac@clusterfs.com>
39  *         Liang Zhen  <zhen.liang@sun.com>
40  */
41
42 #define DEBUG_SUBSYSTEM S_LNET
43
44 #include <libcfs/libcfs.h>
45
46 typedef struct cfs_wi_sched {
47 #ifdef __KERNEL__
48         /** serialised workitems */
49         cfs_spinlock_t  ws_lock;
50         /** where schedulers sleep */
51         cfs_waitq_t     ws_waitq;
52 #endif
53         /** concurrent workitems */
54         cfs_list_t      ws_runq;
55         /** rescheduled running-workitems */
56         cfs_list_t      ws_rerunq;
57         /** shutting down */
58         int             ws_shuttingdown;
59 } cfs_wi_sched_t;
60
61 #ifdef __KERNEL__
62 /**
63  * we have 2 cfs_wi_sched_t so far:
64  * one for CFS_WI_SCHED_ANY, another for CFS_WI_SCHED_SERIAL
65  * per-cpu implementation will be added for SMP scalability
66  */
67
68 #define CFS_WI_NSCHED   2
69 #else
70 /** always 2 for userspace */
71 #define CFS_WI_NSCHED   2
72 #endif /* __KERNEL__ */
73
74 struct cfs_workitem_data {
75         /** serialize */
76         cfs_spinlock_t  wi_glock;
77         /** number of cfs_wi_sched_t */
78         int             wi_nsched;
79         /** number of threads (all schedulers) */
80         int             wi_nthreads;
81         /** default scheduler */
82         cfs_wi_sched_t *wi_scheds;
83 } cfs_wi_data;
84
85 static inline cfs_wi_sched_t *
86 cfs_wi_to_sched(cfs_workitem_t *wi)
87 {
88         LASSERT(wi->wi_sched_id == CFS_WI_SCHED_ANY ||
89                 wi->wi_sched_id == CFS_WI_SCHED_SERIAL ||
90                 (wi->wi_sched_id >= 0 &&
91                  wi->wi_sched_id < cfs_wi_data.wi_nsched));
92
93         if (wi->wi_sched_id == CFS_WI_SCHED_ANY)
94                 return &cfs_wi_data.wi_scheds[0];
95         if (wi->wi_sched_id == CFS_WI_SCHED_SERIAL)
96                 return &cfs_wi_data.wi_scheds[cfs_wi_data.wi_nsched - 1];
97
98         return &cfs_wi_data.wi_scheds[wi->wi_sched_id];
99 }
100
101 #ifdef __KERNEL__
102 static inline void
103 cfs_wi_sched_lock(cfs_wi_sched_t *sched)
104 {
105         cfs_spin_lock(&sched->ws_lock);
106 }
107
108 static inline void
109 cfs_wi_sched_unlock(cfs_wi_sched_t *sched)
110 {
111         cfs_spin_unlock(&sched->ws_lock);
112 }
113
114 static inline int
115 cfs_wi_sched_cansleep(cfs_wi_sched_t *sched)
116 {
117         cfs_wi_sched_lock(sched);
118         if (sched->ws_shuttingdown) {
119                 cfs_wi_sched_unlock(sched);
120                 return 0;
121         }
122
123         if (!cfs_list_empty(&sched->ws_runq)) {
124                 cfs_wi_sched_unlock(sched);
125                 return 0;
126         }
127         cfs_wi_sched_unlock(sched);
128         return 1;
129 }
130
131 #else
132
133 static inline void
134 cfs_wi_sched_lock(cfs_wi_sched_t *sched)
135 {
136         cfs_spin_lock(&cfs_wi_data.wi_glock);
137 }
138
139 static inline void
140 cfs_wi_sched_unlock(cfs_wi_sched_t *sched)
141 {
142         cfs_spin_unlock(&cfs_wi_data.wi_glock);
143 }
144
145 #endif
146
147 /* XXX:
148  * 0. it only works when called from wi->wi_action.
149  * 1. when it returns no one shall try to schedule the workitem.
150  */
151 void
152 cfs_wi_exit(cfs_workitem_t *wi)
153 {
154         cfs_wi_sched_t *sched = cfs_wi_to_sched(wi);
155
156         LASSERT (!cfs_in_interrupt()); /* because we use plain spinlock */
157         LASSERT (!sched->ws_shuttingdown);
158
159         cfs_wi_sched_lock(sched);
160
161 #ifdef __KERNEL__
162         LASSERT (wi->wi_running);
163 #endif
164         if (wi->wi_scheduled) { /* cancel pending schedules */
165                 LASSERT (!cfs_list_empty(&wi->wi_list));
166                 cfs_list_del_init(&wi->wi_list);
167         }
168
169         LASSERT (cfs_list_empty(&wi->wi_list));
170         wi->wi_scheduled = 1; /* LBUG future schedule attempts */
171
172         cfs_wi_sched_unlock(sched);
173         return;
174 }
175 CFS_EXPORT_SYMBOL(cfs_wi_exit);
176
177 /**
178  * cancel a workitem:
179  */
180 int
181 cfs_wi_cancel (cfs_workitem_t *wi)
182 {
183         cfs_wi_sched_t *sched = cfs_wi_to_sched(wi);
184         int             rc;
185
186         LASSERT (!cfs_in_interrupt()); /* because we use plain spinlock */
187         LASSERT (!sched->ws_shuttingdown);
188
189         cfs_wi_sched_lock(sched);
190         /*
191          * return 0 if it's running already, otherwise return 1, which
192          * means the workitem will not be scheduled and will not have
193          * any race with wi_action.
194          */
195         rc = !(wi->wi_running);
196
197         if (wi->wi_scheduled) { /* cancel pending schedules */
198                 LASSERT (!cfs_list_empty(&wi->wi_list));
199                 cfs_list_del_init(&wi->wi_list);
200                 wi->wi_scheduled = 0;
201         }
202
203         LASSERT (cfs_list_empty(&wi->wi_list));
204
205         cfs_wi_sched_unlock(sched);
206         return rc;
207 }
208
209 CFS_EXPORT_SYMBOL(cfs_wi_cancel);
210
211 /*
212  * Workitem scheduled with (serial == 1) is strictly serialised not only with
213  * itself, but also with others scheduled this way.
214  *
215  * Now there's only one static serialised queue, but in the future more might
216  * be added, and even dynamic creation of serialised queues might be supported.
217  */
218 void
219 cfs_wi_schedule(cfs_workitem_t *wi)
220 {
221         cfs_wi_sched_t *sched = cfs_wi_to_sched(wi);
222
223         LASSERT (!cfs_in_interrupt()); /* because we use plain spinlock */
224         LASSERT (!sched->ws_shuttingdown);
225
226         cfs_wi_sched_lock(sched);
227
228         if (!wi->wi_scheduled) {
229                 LASSERT (cfs_list_empty(&wi->wi_list));
230
231                 wi->wi_scheduled = 1;
232                 if (!wi->wi_running) {
233                         cfs_list_add_tail(&wi->wi_list, &sched->ws_runq);
234 #ifdef __KERNEL__
235                         cfs_waitq_signal(&sched->ws_waitq);
236 #endif
237                 } else {
238                         cfs_list_add(&wi->wi_list, &sched->ws_rerunq);
239                 }
240         }
241
242         LASSERT (!cfs_list_empty(&wi->wi_list));
243         cfs_wi_sched_unlock(sched);
244         return;
245 }
246
247 CFS_EXPORT_SYMBOL(cfs_wi_schedule);
248
249 #ifdef __KERNEL__
250
251 static int
252 cfs_wi_scheduler (void *arg)
253 {
254         int             id     = (int)(long_ptr_t) arg;
255         int             serial = (id == -1);
256         char            name[24];
257         cfs_wi_sched_t *sched;
258
259         if (serial) {
260                 sched = &cfs_wi_data.wi_scheds[cfs_wi_data.wi_nsched - 1];
261                 cfs_daemonize("wi_serial_sd");
262         } else {
263                 /* will be sched = &cfs_wi_data.wi_scheds[id] in the future */
264                 sched = &cfs_wi_data.wi_scheds[0];
265                 snprintf(name, sizeof(name), "cfs_wi_sd%03d", id);
266                 cfs_daemonize(name);
267         }
268
269         cfs_block_allsigs();
270
271         cfs_wi_sched_lock(sched);
272
273         while (!sched->ws_shuttingdown) {
274                 int             nloops = 0;
275                 int             rc;
276                 cfs_workitem_t *wi;
277
278                 while (!cfs_list_empty(&sched->ws_runq) &&
279                        nloops < CFS_WI_RESCHED) {
280                         wi = cfs_list_entry(sched->ws_runq.next,
281                                             cfs_workitem_t, wi_list);
282                         LASSERT (wi->wi_scheduled && !wi->wi_running);
283
284                         cfs_list_del_init(&wi->wi_list);
285
286                         wi->wi_running   = 1;
287                         wi->wi_scheduled = 0;
288                         cfs_wi_sched_unlock(sched);
289                         nloops++;
290
291                         rc = (*wi->wi_action) (wi);
292
293                         cfs_wi_sched_lock(sched);
294                         if (rc != 0) /* WI should be dead, even be freed! */
295                                 continue;
296
297                         wi->wi_running = 0;
298                         if (cfs_list_empty(&wi->wi_list))
299                                 continue;
300
301                         LASSERT (wi->wi_scheduled);
302                         /* wi is rescheduled, should be on rerunq now, we
303                          * move it to runq so it can run action now */
304                         cfs_list_move_tail(&wi->wi_list, &sched->ws_runq);
305                 }
306
307                 if (!cfs_list_empty(&sched->ws_runq)) {
308                         cfs_wi_sched_unlock(sched);
309                         /* don't sleep because some workitems still
310                          * expect me to come back soon */
311                         cfs_cond_resched();
312                         cfs_wi_sched_lock(sched);
313                         continue;
314                 }
315
316                 cfs_wi_sched_unlock(sched);
317                 cfs_wait_event_interruptible_exclusive(sched->ws_waitq,
318                                 !cfs_wi_sched_cansleep(sched), rc);
319                 cfs_wi_sched_lock(sched);
320         }
321
322         cfs_wi_sched_unlock(sched);
323
324         cfs_spin_lock(&cfs_wi_data.wi_glock);
325         cfs_wi_data.wi_nthreads--;
326         cfs_spin_unlock(&cfs_wi_data.wi_glock);
327         return 0;
328 }
329
330 static int
331 cfs_wi_start_thread (int (*func) (void*), void *arg)
332 {
333         long pid;
334
335         pid = cfs_create_thread(func, arg, 0);
336         if (pid < 0)
337                 return (int)pid;
338
339         cfs_spin_lock(&cfs_wi_data.wi_glock);
340         cfs_wi_data.wi_nthreads++;
341         cfs_spin_unlock(&cfs_wi_data.wi_glock);
342         return 0;
343 }
344
345 #else /* __KERNEL__ */
346
347 int
348 cfs_wi_check_events (void)
349 {
350         int               n = 0;
351         cfs_workitem_t   *wi;
352         cfs_list_t       *q;
353
354         cfs_spin_lock(&cfs_wi_data.wi_glock);
355
356         for (;;) {
357                 /** rerunq is always empty for userspace */
358                 if (!cfs_list_empty(&cfs_wi_data.wi_scheds[1].ws_runq))
359                         q = &cfs_wi_data.wi_scheds[1].ws_runq;
360                 else if (!cfs_list_empty(&cfs_wi_data.wi_scheds[0].ws_runq))
361                         q = &cfs_wi_data.wi_scheds[0].ws_runq;
362                 else
363                         break;
364
365                 wi = cfs_list_entry(q->next, cfs_workitem_t, wi_list);
366                 cfs_list_del_init(&wi->wi_list);
367
368                 LASSERT (wi->wi_scheduled);
369                 wi->wi_scheduled = 0;
370                 cfs_spin_unlock(&cfs_wi_data.wi_glock);
371
372                 n++;
373                 (*wi->wi_action) (wi);
374
375                 cfs_spin_lock(&cfs_wi_data.wi_glock);
376         }
377
378         cfs_spin_unlock(&cfs_wi_data.wi_glock);
379         return n;
380 }
381
382 #endif
383
384 static void
385 cfs_wi_sched_init(cfs_wi_sched_t *sched)
386 {
387         sched->ws_shuttingdown = 0;
388 #ifdef __KERNEL__
389         cfs_spin_lock_init(&sched->ws_lock);
390         cfs_waitq_init(&sched->ws_waitq);
391 #endif
392         CFS_INIT_LIST_HEAD(&sched->ws_runq);
393         CFS_INIT_LIST_HEAD(&sched->ws_rerunq);
394 }
395
396 static void
397 cfs_wi_sched_shutdown(cfs_wi_sched_t *sched)
398 {
399         cfs_wi_sched_lock(sched);
400
401         LASSERT(cfs_list_empty(&sched->ws_runq));
402         LASSERT(cfs_list_empty(&sched->ws_rerunq));
403
404         sched->ws_shuttingdown = 1;
405
406 #ifdef __KERNEL__
407         cfs_waitq_broadcast(&sched->ws_waitq);
408 #endif
409         cfs_wi_sched_unlock(sched);
410 }
411
412
413 int
414 cfs_wi_startup (void)
415 {
416         int i;
417         int n, rc;
418
419         cfs_wi_data.wi_nthreads = 0;
420         cfs_wi_data.wi_nsched   = CFS_WI_NSCHED;
421         LIBCFS_ALLOC(cfs_wi_data.wi_scheds,
422                      cfs_wi_data.wi_nsched * sizeof(cfs_wi_sched_t));
423         if (cfs_wi_data.wi_scheds == NULL)
424                 return -ENOMEM;
425
426         cfs_spin_lock_init(&cfs_wi_data.wi_glock);
427         for (i = 0; i < cfs_wi_data.wi_nsched; i++)
428                 cfs_wi_sched_init(&cfs_wi_data.wi_scheds[i]);
429
430 #ifdef __KERNEL__
431         n = cfs_num_online_cpus();
432         for (i = 0; i <= n; i++) {
433                 rc = cfs_wi_start_thread(cfs_wi_scheduler,
434                                          (void *)(long_ptr_t)(i == n ? -1 : i));
435                 if (rc != 0) {
436                         CERROR ("Can't spawn workitem scheduler: %d\n", rc);
437                         cfs_wi_shutdown();
438                         return rc;
439                 }
440         }
441 #else
442         SET_BUT_UNUSED(rc);
443         SET_BUT_UNUSED(n);
444 #endif
445
446         return 0;
447 }
448
449 void
450 cfs_wi_shutdown (void)
451 {
452         int i;
453
454         if (cfs_wi_data.wi_scheds == NULL)
455                 return;
456
457         for (i = 0; i < cfs_wi_data.wi_nsched; i++)
458                 cfs_wi_sched_shutdown(&cfs_wi_data.wi_scheds[i]);
459
460 #ifdef __KERNEL__
461         cfs_spin_lock(&cfs_wi_data.wi_glock);
462         i = 2;
463         while (cfs_wi_data.wi_nthreads != 0) {
464                 CDEBUG(IS_PO2(++i) ? D_WARNING : D_NET,
465                        "waiting for %d threads to terminate\n",
466                        cfs_wi_data.wi_nthreads);
467                 cfs_spin_unlock(&cfs_wi_data.wi_glock);
468
469                 cfs_pause(cfs_time_seconds(1));
470
471                 cfs_spin_lock(&cfs_wi_data.wi_glock);
472         }
473         cfs_spin_unlock(&cfs_wi_data.wi_glock);
474 #endif
475         LIBCFS_FREE(cfs_wi_data.wi_scheds,
476                     cfs_wi_data.wi_nsched * sizeof(cfs_wi_sched_t));
477         return;
478 }