1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
30 * Use is subject to license terms.
32 * Copyright (c) 2011, Whamcloud, Inc.
35 * This file is part of Lustre, http://www.lustre.org/
36 * Lustre is a trademark of Sun Microsystems, Inc.
38 * libcfs/libcfs/workitem.c
40 * Author: Isaac Huang <isaac@clusterfs.com>
41 * Liang Zhen <zhen.liang@sun.com>
44 #define DEBUG_SUBSYSTEM S_LNET
46 #include <libcfs/libcfs.h>
48 typedef struct cfs_wi_sched {
50 /** serialised workitems */
51 cfs_spinlock_t ws_lock;
52 /** where schedulers sleep */
55 /** concurrent workitems */
57 /** rescheduled running-workitems */
65 * we have 2 cfs_wi_sched_t so far:
66 * one for CFS_WI_SCHED_ANY, another for CFS_WI_SCHED_SERIAL
67 * per-cpu implementation will be added for SMP scalability
70 #define CFS_WI_NSCHED 2
72 /** always 2 for userspace */
73 #define CFS_WI_NSCHED 2
74 #endif /* __KERNEL__ */
76 struct cfs_workitem_data {
78 cfs_spinlock_t wi_glock;
79 /** number of cfs_wi_sched_t */
81 /** number of threads (all schedulers) */
83 /** default scheduler */
84 cfs_wi_sched_t *wi_scheds;
87 static inline cfs_wi_sched_t *
88 cfs_wi_to_sched(cfs_workitem_t *wi)
90 LASSERT(wi->wi_sched_id == CFS_WI_SCHED_ANY ||
91 wi->wi_sched_id == CFS_WI_SCHED_SERIAL ||
92 (wi->wi_sched_id >= 0 &&
93 wi->wi_sched_id < cfs_wi_data.wi_nsched));
95 if (wi->wi_sched_id == CFS_WI_SCHED_ANY)
96 return &cfs_wi_data.wi_scheds[0];
97 if (wi->wi_sched_id == CFS_WI_SCHED_SERIAL)
98 return &cfs_wi_data.wi_scheds[cfs_wi_data.wi_nsched - 1];
100 return &cfs_wi_data.wi_scheds[wi->wi_sched_id];
105 cfs_wi_sched_lock(cfs_wi_sched_t *sched)
107 cfs_spin_lock(&sched->ws_lock);
111 cfs_wi_sched_unlock(cfs_wi_sched_t *sched)
113 cfs_spin_unlock(&sched->ws_lock);
117 cfs_wi_sched_cansleep(cfs_wi_sched_t *sched)
119 cfs_wi_sched_lock(sched);
120 if (sched->ws_shuttingdown) {
121 cfs_wi_sched_unlock(sched);
125 if (!cfs_list_empty(&sched->ws_runq)) {
126 cfs_wi_sched_unlock(sched);
129 cfs_wi_sched_unlock(sched);
136 cfs_wi_sched_lock(cfs_wi_sched_t *sched)
138 cfs_spin_lock(&cfs_wi_data.wi_glock);
142 cfs_wi_sched_unlock(cfs_wi_sched_t *sched)
144 cfs_spin_unlock(&cfs_wi_data.wi_glock);
150 * 0. it only works when called from wi->wi_action.
151 * 1. when it returns no one shall try to schedule the workitem.
154 cfs_wi_exit(cfs_workitem_t *wi)
156 cfs_wi_sched_t *sched = cfs_wi_to_sched(wi);
158 LASSERT (!cfs_in_interrupt()); /* because we use plain spinlock */
159 LASSERT (!sched->ws_shuttingdown);
161 cfs_wi_sched_lock(sched);
164 LASSERT (wi->wi_running);
166 if (wi->wi_scheduled) { /* cancel pending schedules */
167 LASSERT (!cfs_list_empty(&wi->wi_list));
168 cfs_list_del_init(&wi->wi_list);
171 LASSERT (cfs_list_empty(&wi->wi_list));
172 wi->wi_scheduled = 1; /* LBUG future schedule attempts */
174 cfs_wi_sched_unlock(sched);
177 CFS_EXPORT_SYMBOL(cfs_wi_exit);
183 cfs_wi_cancel (cfs_workitem_t *wi)
185 cfs_wi_sched_t *sched = cfs_wi_to_sched(wi);
188 LASSERT (!cfs_in_interrupt()); /* because we use plain spinlock */
189 LASSERT (!sched->ws_shuttingdown);
191 cfs_wi_sched_lock(sched);
193 * return 0 if it's running already, otherwise return 1, which
194 * means the workitem will not be scheduled and will not have
195 * any race with wi_action.
197 rc = !(wi->wi_running);
199 if (wi->wi_scheduled) { /* cancel pending schedules */
200 LASSERT (!cfs_list_empty(&wi->wi_list));
201 cfs_list_del_init(&wi->wi_list);
202 wi->wi_scheduled = 0;
205 LASSERT (cfs_list_empty(&wi->wi_list));
207 cfs_wi_sched_unlock(sched);
211 CFS_EXPORT_SYMBOL(cfs_wi_cancel);
214 * Workitem scheduled with (serial == 1) is strictly serialised not only with
215 * itself, but also with others scheduled this way.
217 * Now there's only one static serialised queue, but in the future more might
218 * be added, and even dynamic creation of serialised queues might be supported.
221 cfs_wi_schedule(cfs_workitem_t *wi)
223 cfs_wi_sched_t *sched = cfs_wi_to_sched(wi);
225 LASSERT (!cfs_in_interrupt()); /* because we use plain spinlock */
226 LASSERT (!sched->ws_shuttingdown);
228 cfs_wi_sched_lock(sched);
230 if (!wi->wi_scheduled) {
231 LASSERT (cfs_list_empty(&wi->wi_list));
233 wi->wi_scheduled = 1;
234 if (!wi->wi_running) {
235 cfs_list_add_tail(&wi->wi_list, &sched->ws_runq);
237 cfs_waitq_signal(&sched->ws_waitq);
240 cfs_list_add(&wi->wi_list, &sched->ws_rerunq);
244 LASSERT (!cfs_list_empty(&wi->wi_list));
245 cfs_wi_sched_unlock(sched);
249 CFS_EXPORT_SYMBOL(cfs_wi_schedule);
254 cfs_wi_scheduler (void *arg)
256 int id = (int)(long_ptr_t) arg;
257 int serial = (id == -1);
259 cfs_wi_sched_t *sched;
262 sched = &cfs_wi_data.wi_scheds[cfs_wi_data.wi_nsched - 1];
263 cfs_daemonize("wi_serial_sd");
265 /* will be sched = &cfs_wi_data.wi_scheds[id] in the future */
266 sched = &cfs_wi_data.wi_scheds[0];
267 snprintf(name, sizeof(name), "cfs_wi_sd%03d", id);
273 cfs_wi_sched_lock(sched);
275 while (!sched->ws_shuttingdown) {
280 while (!cfs_list_empty(&sched->ws_runq) &&
281 nloops < CFS_WI_RESCHED) {
282 wi = cfs_list_entry(sched->ws_runq.next,
283 cfs_workitem_t, wi_list);
284 LASSERT (wi->wi_scheduled && !wi->wi_running);
286 cfs_list_del_init(&wi->wi_list);
289 wi->wi_scheduled = 0;
290 cfs_wi_sched_unlock(sched);
293 rc = (*wi->wi_action) (wi);
295 cfs_wi_sched_lock(sched);
296 if (rc != 0) /* WI should be dead, even be freed! */
300 if (cfs_list_empty(&wi->wi_list))
303 LASSERT (wi->wi_scheduled);
304 /* wi is rescheduled, should be on rerunq now, we
305 * move it to runq so it can run action now */
306 cfs_list_move_tail(&wi->wi_list, &sched->ws_runq);
309 if (!cfs_list_empty(&sched->ws_runq)) {
310 cfs_wi_sched_unlock(sched);
311 /* don't sleep because some workitems still
312 * expect me to come back soon */
314 cfs_wi_sched_lock(sched);
318 cfs_wi_sched_unlock(sched);
319 cfs_wait_event_interruptible_exclusive(sched->ws_waitq,
320 !cfs_wi_sched_cansleep(sched), rc);
321 cfs_wi_sched_lock(sched);
324 cfs_wi_sched_unlock(sched);
326 cfs_spin_lock(&cfs_wi_data.wi_glock);
327 cfs_wi_data.wi_nthreads--;
328 cfs_spin_unlock(&cfs_wi_data.wi_glock);
333 cfs_wi_start_thread (int (*func) (void*), void *arg)
337 pid = cfs_create_thread(func, arg, 0);
341 cfs_spin_lock(&cfs_wi_data.wi_glock);
342 cfs_wi_data.wi_nthreads++;
343 cfs_spin_unlock(&cfs_wi_data.wi_glock);
347 #else /* __KERNEL__ */
350 cfs_wi_check_events (void)
356 cfs_spin_lock(&cfs_wi_data.wi_glock);
359 /** rerunq is always empty for userspace */
360 if (!cfs_list_empty(&cfs_wi_data.wi_scheds[1].ws_runq))
361 q = &cfs_wi_data.wi_scheds[1].ws_runq;
362 else if (!cfs_list_empty(&cfs_wi_data.wi_scheds[0].ws_runq))
363 q = &cfs_wi_data.wi_scheds[0].ws_runq;
367 wi = cfs_list_entry(q->next, cfs_workitem_t, wi_list);
368 cfs_list_del_init(&wi->wi_list);
370 LASSERT (wi->wi_scheduled);
371 wi->wi_scheduled = 0;
372 cfs_spin_unlock(&cfs_wi_data.wi_glock);
375 (*wi->wi_action) (wi);
377 cfs_spin_lock(&cfs_wi_data.wi_glock);
380 cfs_spin_unlock(&cfs_wi_data.wi_glock);
387 cfs_wi_sched_init(cfs_wi_sched_t *sched)
389 sched->ws_shuttingdown = 0;
391 cfs_spin_lock_init(&sched->ws_lock);
392 cfs_waitq_init(&sched->ws_waitq);
394 CFS_INIT_LIST_HEAD(&sched->ws_runq);
395 CFS_INIT_LIST_HEAD(&sched->ws_rerunq);
399 cfs_wi_sched_shutdown(cfs_wi_sched_t *sched)
401 cfs_wi_sched_lock(sched);
403 LASSERT(cfs_list_empty(&sched->ws_runq));
404 LASSERT(cfs_list_empty(&sched->ws_rerunq));
406 sched->ws_shuttingdown = 1;
409 cfs_waitq_broadcast(&sched->ws_waitq);
411 cfs_wi_sched_unlock(sched);
416 cfs_wi_startup (void)
421 cfs_wi_data.wi_nthreads = 0;
422 cfs_wi_data.wi_nsched = CFS_WI_NSCHED;
423 LIBCFS_ALLOC(cfs_wi_data.wi_scheds,
424 cfs_wi_data.wi_nsched * sizeof(cfs_wi_sched_t));
425 if (cfs_wi_data.wi_scheds == NULL)
428 cfs_spin_lock_init(&cfs_wi_data.wi_glock);
429 for (i = 0; i < cfs_wi_data.wi_nsched; i++)
430 cfs_wi_sched_init(&cfs_wi_data.wi_scheds[i]);
433 n = cfs_num_online_cpus();
434 for (i = 0; i <= n; i++) {
435 rc = cfs_wi_start_thread(cfs_wi_scheduler,
436 (void *)(long_ptr_t)(i == n ? -1 : i));
438 CERROR ("Can't spawn workitem scheduler: %d\n", rc);
452 cfs_wi_shutdown (void)
456 if (cfs_wi_data.wi_scheds == NULL)
459 for (i = 0; i < cfs_wi_data.wi_nsched; i++)
460 cfs_wi_sched_shutdown(&cfs_wi_data.wi_scheds[i]);
463 cfs_spin_lock(&cfs_wi_data.wi_glock);
465 while (cfs_wi_data.wi_nthreads != 0) {
466 CDEBUG(IS_PO2(++i) ? D_WARNING : D_NET,
467 "waiting for %d threads to terminate\n",
468 cfs_wi_data.wi_nthreads);
469 cfs_spin_unlock(&cfs_wi_data.wi_glock);
471 cfs_pause(cfs_time_seconds(1));
473 cfs_spin_lock(&cfs_wi_data.wi_glock);
475 cfs_spin_unlock(&cfs_wi_data.wi_glock);
477 LIBCFS_FREE(cfs_wi_data.wi_scheds,
478 cfs_wi_data.wi_nsched * sizeof(cfs_wi_sched_t));