Whamcloud - gitweb
LU-1146 build: batch update copyright messages
[fs/lustre-release.git] / libcfs / libcfs / workitem.c
1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=8:tabstop=8:
3  *
4  * GPL HEADER START
5  *
6  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 only,
10  * as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful, but
13  * WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License version 2 for more details (a copy is included
16  * in the LICENSE file that accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License
19  * version 2 along with this program; If not, see
20  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
21  *
22  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23  * CA 95054 USA or visit www.sun.com if you need additional information or
24  * have any questions.
25  *
26  * GPL HEADER END
27  */
28 /*
29  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
30  * Use is subject to license terms.
31  *
32  * Copyright (c) 2011, Whamcloud, Inc.
33  */
34 /*
35  * This file is part of Lustre, http://www.lustre.org/
36  * Lustre is a trademark of Sun Microsystems, Inc.
37  *
38  * libcfs/libcfs/workitem.c
39  *
40  * Author: Isaac Huang <isaac@clusterfs.com>
41  *         Liang Zhen  <zhen.liang@sun.com>
42  */
43
44 #define DEBUG_SUBSYSTEM S_LNET
45
46 #include <libcfs/libcfs.h>
47
48 typedef struct cfs_wi_sched {
49 #ifdef __KERNEL__
50         /** serialised workitems */
51         cfs_spinlock_t  ws_lock;
52         /** where schedulers sleep */
53         cfs_waitq_t     ws_waitq;
54 #endif
55         /** concurrent workitems */
56         cfs_list_t      ws_runq;
57         /** rescheduled running-workitems */
58         cfs_list_t      ws_rerunq;
59         /** shutting down */
60         int             ws_shuttingdown;
61 } cfs_wi_sched_t;
62
63 #ifdef __KERNEL__
64 /**
65  * we have 2 cfs_wi_sched_t so far:
66  * one for CFS_WI_SCHED_ANY, another for CFS_WI_SCHED_SERIAL
67  * per-cpu implementation will be added for SMP scalability
68  */
69
70 #define CFS_WI_NSCHED   2
71 #else
72 /** always 2 for userspace */
73 #define CFS_WI_NSCHED   2
74 #endif /* __KERNEL__ */
75
76 struct cfs_workitem_data {
77         /** serialize */
78         cfs_spinlock_t  wi_glock;
79         /** number of cfs_wi_sched_t */
80         int             wi_nsched;
81         /** number of threads (all schedulers) */
82         int             wi_nthreads;
83         /** default scheduler */
84         cfs_wi_sched_t *wi_scheds;
85 } cfs_wi_data;
86
87 static inline cfs_wi_sched_t *
88 cfs_wi_to_sched(cfs_workitem_t *wi)
89 {
90         LASSERT(wi->wi_sched_id == CFS_WI_SCHED_ANY ||
91                 wi->wi_sched_id == CFS_WI_SCHED_SERIAL ||
92                 (wi->wi_sched_id >= 0 &&
93                  wi->wi_sched_id < cfs_wi_data.wi_nsched));
94
95         if (wi->wi_sched_id == CFS_WI_SCHED_ANY)
96                 return &cfs_wi_data.wi_scheds[0];
97         if (wi->wi_sched_id == CFS_WI_SCHED_SERIAL)
98                 return &cfs_wi_data.wi_scheds[cfs_wi_data.wi_nsched - 1];
99
100         return &cfs_wi_data.wi_scheds[wi->wi_sched_id];
101 }
102
103 #ifdef __KERNEL__
104 static inline void
105 cfs_wi_sched_lock(cfs_wi_sched_t *sched)
106 {
107         cfs_spin_lock(&sched->ws_lock);
108 }
109
110 static inline void
111 cfs_wi_sched_unlock(cfs_wi_sched_t *sched)
112 {
113         cfs_spin_unlock(&sched->ws_lock);
114 }
115
116 static inline int
117 cfs_wi_sched_cansleep(cfs_wi_sched_t *sched)
118 {
119         cfs_wi_sched_lock(sched);
120         if (sched->ws_shuttingdown) {
121                 cfs_wi_sched_unlock(sched);
122                 return 0;
123         }
124
125         if (!cfs_list_empty(&sched->ws_runq)) {
126                 cfs_wi_sched_unlock(sched);
127                 return 0;
128         }
129         cfs_wi_sched_unlock(sched);
130         return 1;
131 }
132
133 #else
134
135 static inline void
136 cfs_wi_sched_lock(cfs_wi_sched_t *sched)
137 {
138         cfs_spin_lock(&cfs_wi_data.wi_glock);
139 }
140
141 static inline void
142 cfs_wi_sched_unlock(cfs_wi_sched_t *sched)
143 {
144         cfs_spin_unlock(&cfs_wi_data.wi_glock);
145 }
146
147 #endif
148
149 /* XXX:
150  * 0. it only works when called from wi->wi_action.
151  * 1. when it returns no one shall try to schedule the workitem.
152  */
153 void
154 cfs_wi_exit(cfs_workitem_t *wi)
155 {
156         cfs_wi_sched_t *sched = cfs_wi_to_sched(wi);
157
158         LASSERT (!cfs_in_interrupt()); /* because we use plain spinlock */
159         LASSERT (!sched->ws_shuttingdown);
160
161         cfs_wi_sched_lock(sched);
162
163 #ifdef __KERNEL__
164         LASSERT (wi->wi_running);
165 #endif
166         if (wi->wi_scheduled) { /* cancel pending schedules */
167                 LASSERT (!cfs_list_empty(&wi->wi_list));
168                 cfs_list_del_init(&wi->wi_list);
169         }
170
171         LASSERT (cfs_list_empty(&wi->wi_list));
172         wi->wi_scheduled = 1; /* LBUG future schedule attempts */
173
174         cfs_wi_sched_unlock(sched);
175         return;
176 }
177 CFS_EXPORT_SYMBOL(cfs_wi_exit);
178
179 /**
180  * cancel a workitem:
181  */
182 int
183 cfs_wi_cancel (cfs_workitem_t *wi)
184 {
185         cfs_wi_sched_t *sched = cfs_wi_to_sched(wi);
186         int             rc;
187
188         LASSERT (!cfs_in_interrupt()); /* because we use plain spinlock */
189         LASSERT (!sched->ws_shuttingdown);
190
191         cfs_wi_sched_lock(sched);
192         /*
193          * return 0 if it's running already, otherwise return 1, which
194          * means the workitem will not be scheduled and will not have
195          * any race with wi_action.
196          */
197         rc = !(wi->wi_running);
198
199         if (wi->wi_scheduled) { /* cancel pending schedules */
200                 LASSERT (!cfs_list_empty(&wi->wi_list));
201                 cfs_list_del_init(&wi->wi_list);
202                 wi->wi_scheduled = 0;
203         }
204
205         LASSERT (cfs_list_empty(&wi->wi_list));
206
207         cfs_wi_sched_unlock(sched);
208         return rc;
209 }
210
211 CFS_EXPORT_SYMBOL(cfs_wi_cancel);
212
213 /*
214  * Workitem scheduled with (serial == 1) is strictly serialised not only with
215  * itself, but also with others scheduled this way.
216  *
217  * Now there's only one static serialised queue, but in the future more might
218  * be added, and even dynamic creation of serialised queues might be supported.
219  */
220 void
221 cfs_wi_schedule(cfs_workitem_t *wi)
222 {
223         cfs_wi_sched_t *sched = cfs_wi_to_sched(wi);
224
225         LASSERT (!cfs_in_interrupt()); /* because we use plain spinlock */
226         LASSERT (!sched->ws_shuttingdown);
227
228         cfs_wi_sched_lock(sched);
229
230         if (!wi->wi_scheduled) {
231                 LASSERT (cfs_list_empty(&wi->wi_list));
232
233                 wi->wi_scheduled = 1;
234                 if (!wi->wi_running) {
235                         cfs_list_add_tail(&wi->wi_list, &sched->ws_runq);
236 #ifdef __KERNEL__
237                         cfs_waitq_signal(&sched->ws_waitq);
238 #endif
239                 } else {
240                         cfs_list_add(&wi->wi_list, &sched->ws_rerunq);
241                 }
242         }
243
244         LASSERT (!cfs_list_empty(&wi->wi_list));
245         cfs_wi_sched_unlock(sched);
246         return;
247 }
248
249 CFS_EXPORT_SYMBOL(cfs_wi_schedule);
250
251 #ifdef __KERNEL__
252
253 static int
254 cfs_wi_scheduler (void *arg)
255 {
256         int             id     = (int)(long_ptr_t) arg;
257         int             serial = (id == -1);
258         char            name[24];
259         cfs_wi_sched_t *sched;
260
261         if (serial) {
262                 sched = &cfs_wi_data.wi_scheds[cfs_wi_data.wi_nsched - 1];
263                 cfs_daemonize("wi_serial_sd");
264         } else {
265                 /* will be sched = &cfs_wi_data.wi_scheds[id] in the future */
266                 sched = &cfs_wi_data.wi_scheds[0];
267                 snprintf(name, sizeof(name), "cfs_wi_sd%03d", id);
268                 cfs_daemonize(name);
269         }
270
271         cfs_block_allsigs();
272
273         cfs_wi_sched_lock(sched);
274
275         while (!sched->ws_shuttingdown) {
276                 int             nloops = 0;
277                 int             rc;
278                 cfs_workitem_t *wi;
279
280                 while (!cfs_list_empty(&sched->ws_runq) &&
281                        nloops < CFS_WI_RESCHED) {
282                         wi = cfs_list_entry(sched->ws_runq.next,
283                                             cfs_workitem_t, wi_list);
284                         LASSERT (wi->wi_scheduled && !wi->wi_running);
285
286                         cfs_list_del_init(&wi->wi_list);
287
288                         wi->wi_running   = 1;
289                         wi->wi_scheduled = 0;
290                         cfs_wi_sched_unlock(sched);
291                         nloops++;
292
293                         rc = (*wi->wi_action) (wi);
294
295                         cfs_wi_sched_lock(sched);
296                         if (rc != 0) /* WI should be dead, even be freed! */
297                                 continue;
298
299                         wi->wi_running = 0;
300                         if (cfs_list_empty(&wi->wi_list))
301                                 continue;
302
303                         LASSERT (wi->wi_scheduled);
304                         /* wi is rescheduled, should be on rerunq now, we
305                          * move it to runq so it can run action now */
306                         cfs_list_move_tail(&wi->wi_list, &sched->ws_runq);
307                 }
308
309                 if (!cfs_list_empty(&sched->ws_runq)) {
310                         cfs_wi_sched_unlock(sched);
311                         /* don't sleep because some workitems still
312                          * expect me to come back soon */
313                         cfs_cond_resched();
314                         cfs_wi_sched_lock(sched);
315                         continue;
316                 }
317
318                 cfs_wi_sched_unlock(sched);
319                 cfs_wait_event_interruptible_exclusive(sched->ws_waitq,
320                                 !cfs_wi_sched_cansleep(sched), rc);
321                 cfs_wi_sched_lock(sched);
322         }
323
324         cfs_wi_sched_unlock(sched);
325
326         cfs_spin_lock(&cfs_wi_data.wi_glock);
327         cfs_wi_data.wi_nthreads--;
328         cfs_spin_unlock(&cfs_wi_data.wi_glock);
329         return 0;
330 }
331
332 static int
333 cfs_wi_start_thread (int (*func) (void*), void *arg)
334 {
335         long pid;
336
337         pid = cfs_create_thread(func, arg, 0);
338         if (pid < 0)
339                 return (int)pid;
340
341         cfs_spin_lock(&cfs_wi_data.wi_glock);
342         cfs_wi_data.wi_nthreads++;
343         cfs_spin_unlock(&cfs_wi_data.wi_glock);
344         return 0;
345 }
346
347 #else /* __KERNEL__ */
348
349 int
350 cfs_wi_check_events (void)
351 {
352         int               n = 0;
353         cfs_workitem_t   *wi;
354         cfs_list_t       *q;
355
356         cfs_spin_lock(&cfs_wi_data.wi_glock);
357
358         for (;;) {
359                 /** rerunq is always empty for userspace */
360                 if (!cfs_list_empty(&cfs_wi_data.wi_scheds[1].ws_runq))
361                         q = &cfs_wi_data.wi_scheds[1].ws_runq;
362                 else if (!cfs_list_empty(&cfs_wi_data.wi_scheds[0].ws_runq))
363                         q = &cfs_wi_data.wi_scheds[0].ws_runq;
364                 else
365                         break;
366
367                 wi = cfs_list_entry(q->next, cfs_workitem_t, wi_list);
368                 cfs_list_del_init(&wi->wi_list);
369
370                 LASSERT (wi->wi_scheduled);
371                 wi->wi_scheduled = 0;
372                 cfs_spin_unlock(&cfs_wi_data.wi_glock);
373
374                 n++;
375                 (*wi->wi_action) (wi);
376
377                 cfs_spin_lock(&cfs_wi_data.wi_glock);
378         }
379
380         cfs_spin_unlock(&cfs_wi_data.wi_glock);
381         return n;
382 }
383
384 #endif
385
386 static void
387 cfs_wi_sched_init(cfs_wi_sched_t *sched)
388 {
389         sched->ws_shuttingdown = 0;
390 #ifdef __KERNEL__
391         cfs_spin_lock_init(&sched->ws_lock);
392         cfs_waitq_init(&sched->ws_waitq);
393 #endif
394         CFS_INIT_LIST_HEAD(&sched->ws_runq);
395         CFS_INIT_LIST_HEAD(&sched->ws_rerunq);
396 }
397
398 static void
399 cfs_wi_sched_shutdown(cfs_wi_sched_t *sched)
400 {
401         cfs_wi_sched_lock(sched);
402
403         LASSERT(cfs_list_empty(&sched->ws_runq));
404         LASSERT(cfs_list_empty(&sched->ws_rerunq));
405
406         sched->ws_shuttingdown = 1;
407
408 #ifdef __KERNEL__
409         cfs_waitq_broadcast(&sched->ws_waitq);
410 #endif
411         cfs_wi_sched_unlock(sched);
412 }
413
414
415 int
416 cfs_wi_startup (void)
417 {
418         int i;
419         int n, rc;
420
421         cfs_wi_data.wi_nthreads = 0;
422         cfs_wi_data.wi_nsched   = CFS_WI_NSCHED;
423         LIBCFS_ALLOC(cfs_wi_data.wi_scheds,
424                      cfs_wi_data.wi_nsched * sizeof(cfs_wi_sched_t));
425         if (cfs_wi_data.wi_scheds == NULL)
426                 return -ENOMEM;
427
428         cfs_spin_lock_init(&cfs_wi_data.wi_glock);
429         for (i = 0; i < cfs_wi_data.wi_nsched; i++)
430                 cfs_wi_sched_init(&cfs_wi_data.wi_scheds[i]);
431
432 #ifdef __KERNEL__
433         n = cfs_num_online_cpus();
434         for (i = 0; i <= n; i++) {
435                 rc = cfs_wi_start_thread(cfs_wi_scheduler,
436                                          (void *)(long_ptr_t)(i == n ? -1 : i));
437                 if (rc != 0) {
438                         CERROR ("Can't spawn workitem scheduler: %d\n", rc);
439                         cfs_wi_shutdown();
440                         return rc;
441                 }
442         }
443 #else
444         SET_BUT_UNUSED(rc);
445         SET_BUT_UNUSED(n);
446 #endif
447
448         return 0;
449 }
450
451 void
452 cfs_wi_shutdown (void)
453 {
454         int i;
455
456         if (cfs_wi_data.wi_scheds == NULL)
457                 return;
458
459         for (i = 0; i < cfs_wi_data.wi_nsched; i++)
460                 cfs_wi_sched_shutdown(&cfs_wi_data.wi_scheds[i]);
461
462 #ifdef __KERNEL__
463         cfs_spin_lock(&cfs_wi_data.wi_glock);
464         i = 2;
465         while (cfs_wi_data.wi_nthreads != 0) {
466                 CDEBUG(IS_PO2(++i) ? D_WARNING : D_NET,
467                        "waiting for %d threads to terminate\n",
468                        cfs_wi_data.wi_nthreads);
469                 cfs_spin_unlock(&cfs_wi_data.wi_glock);
470
471                 cfs_pause(cfs_time_seconds(1));
472
473                 cfs_spin_lock(&cfs_wi_data.wi_glock);
474         }
475         cfs_spin_unlock(&cfs_wi_data.wi_glock);
476 #endif
477         LIBCFS_FREE(cfs_wi_data.wi_scheds,
478                     cfs_wi_data.wi_nsched * sizeof(cfs_wi_sched_t));
479         return;
480 }